diff options
Diffstat (limited to 'src')
415 files changed, 15862 insertions, 7520 deletions
diff --git a/src/crypto_engines/ipsecmb/ipsecmb.c b/src/crypto_engines/ipsecmb/ipsecmb.c index 9981d738401..3006c5294d5 100644 --- a/src/crypto_engines/ipsecmb/ipsecmb.c +++ b/src/crypto_engines/ipsecmb/ipsecmb.c @@ -861,6 +861,7 @@ crypto_ipsecmb_init (vnet_crypto_engine_registration_t *r) ipsecmb_alg_data_t *ad; ipsecmb_per_thread_data_t *ptd; IMB_MGR *m = 0; + IMB_ARCH arch; if (!clib_cpu_supports_aes ()) return "AES ISA not available on this CPU"; @@ -875,12 +876,8 @@ crypto_ipsecmb_init (vnet_crypto_engine_registration_t *r) clib_memset_u8 (ptd->burst_jobs, 0, sizeof (IMB_JOB) * IMB_MAX_BURST_SIZE); #endif - if (clib_cpu_supports_avx512f ()) - init_mb_mgr_avx512 (ptd->mgr); - else if (clib_cpu_supports_avx2 () && clib_cpu_supports_bmi2 ()) - init_mb_mgr_avx2 (ptd->mgr); - else - init_mb_mgr_sse (ptd->mgr); + + init_mb_mgr_auto (ptd->mgr, &arch); if (ptd == imbm->per_thread_data) m = ptd->mgr; diff --git a/src/crypto_engines/openssl/main.c b/src/crypto_engines/openssl/main.c index f6c2229d3cd..a95c1710a34 100644 --- a/src/crypto_engines/openssl/main.c +++ b/src/crypto_engines/openssl/main.c @@ -49,6 +49,26 @@ static u32 num_threads; _ (null_gmac, AES_192_NULL_GMAC, EVP_aes_192_gcm, 0, 0) \ _ (null_gmac, AES_256_NULL_GMAC, EVP_aes_256_gcm, 0, 0) +#define foreach_openssl_linked_cbc_hmac_op \ + _ (AES_128_CBC_SHA1_TAG12, EVP_aes_128_cbc, EVP_sha1, 12) \ + _ (AES_192_CBC_SHA1_TAG12, EVP_aes_192_cbc, EVP_sha1, 12) \ + _ (AES_256_CBC_SHA1_TAG12, EVP_aes_256_cbc, EVP_sha1, 12) \ + _ (AES_128_CBC_SHA224_TAG14, EVP_aes_128_cbc, EVP_sha224, 14) \ + _ (AES_192_CBC_SHA224_TAG14, EVP_aes_192_cbc, EVP_sha224, 14) \ + _ (AES_256_CBC_SHA224_TAG14, EVP_aes_256_cbc, EVP_sha224, 14) \ + _ (AES_128_CBC_SHA256_TAG16, EVP_aes_128_cbc, EVP_sha256, 16) \ + _ (AES_192_CBC_SHA256_TAG16, EVP_aes_192_cbc, EVP_sha256, 16) \ + _ (AES_256_CBC_SHA256_TAG16, EVP_aes_256_cbc, EVP_sha256, 16) \ + _ (AES_128_CBC_SHA384_TAG24, EVP_aes_128_cbc, EVP_sha384, 24) \ + _ (AES_192_CBC_SHA384_TAG24, EVP_aes_192_cbc, EVP_sha384, 24) \ + _ (AES_256_CBC_SHA384_TAG24, EVP_aes_256_cbc, EVP_sha384, 24) \ + _ (AES_128_CBC_SHA512_TAG32, EVP_aes_128_cbc, EVP_sha512, 32) \ + _ (AES_192_CBC_SHA512_TAG32, EVP_aes_192_cbc, EVP_sha512, 32) \ + _ (AES_256_CBC_SHA512_TAG32, EVP_aes_256_cbc, EVP_sha512, 32) \ + _ (AES_128_CBC_MD5_TAG12, EVP_aes_128_cbc, EVP_md5, 12) \ + _ (AES_192_CBC_MD5_TAG12, EVP_aes_192_cbc, EVP_md5, 12) \ + _ (AES_256_CBC_MD5_TAG12, EVP_aes_256_cbc, EVP_md5, 12) + #define foreach_openssl_chacha20_evp_op \ _ (chacha20_poly1305, CHACHA20_POLY1305, EVP_chacha20_poly1305, 0, 0) \ _ (chacha20_poly1305, CHACHA20_POLY1305_TAG16_AAD0, EVP_chacha20_poly1305, \ @@ -611,6 +631,56 @@ crypto_openssl_key_handler (vnet_crypto_key_op_t kop, foreach_openssl_evp_op; #undef _ +#define _(n, c, m, t) \ + static u32 openssl_ops_enc_##n (vlib_main_t *vm, vnet_crypto_op_t *ops[], \ + u32 n_ops) \ + { \ + for (u32 i = 0; i < n_ops; i++) \ + ops[i]->digest_len = t; \ + openssl_ops_enc_cbc (vm, ops, 0, n_ops, c (), 1, 16); \ + openssl_ops_hmac (vm, ops, 0, n_ops, m ()); \ + return n_ops; \ + } \ + static u32 openssl_ops_dec_##n (vlib_main_t *vm, vnet_crypto_op_t *ops[], \ + u32 n_ops) \ + { \ + for (u32 i = 0; i < n_ops; i++) \ + ops[i]->digest_len = t; \ + openssl_ops_dec_cbc (vm, ops, 0, n_ops, c (), 1, 16); \ + openssl_ops_hmac (vm, ops, 0, n_ops, m ()); \ + return n_ops; \ + } \ + static u32 openssl_ops_enc_chained_##n ( \ + vlib_main_t *vm, vnet_crypto_op_t *ops[], vnet_crypto_op_chunk_t *chunks, \ + u32 n_ops) \ + { \ + for (u32 i = 0; i < n_ops; i++) \ + ops[i]->digest_len = t; \ + openssl_ops_enc_cbc (vm, ops, chunks, n_ops, c (), 1, 16); \ + openssl_ops_hmac (vm, ops, chunks, n_ops, m ()); \ + return n_ops; \ + } \ + static u32 openssl_ops_dec_chained_##n ( \ + vlib_main_t *vm, vnet_crypto_op_t *ops[], vnet_crypto_op_chunk_t *chunks, \ + u32 n_ops) \ + { \ + for (u32 i = 0; i < n_ops; i++) \ + ops[i]->digest_len = t; \ + openssl_ops_dec_cbc (vm, ops, chunks, n_ops, c (), 1, 16); \ + openssl_ops_hmac (vm, ops, chunks, n_ops, m ()); \ + return n_ops; \ + } \ + static void *openssl_ctx_##n (vnet_crypto_key_t *key, \ + vnet_crypto_key_op_t kop, \ + vnet_crypto_key_index_t idx) \ + { \ + openssl_ctx_cipher (key, kop, idx, c (), 0); \ + openssl_ctx_hmac (key, kop, idx, m ()); \ + return NULL; \ + } +foreach_openssl_linked_cbc_hmac_op +#undef _ + #define _(a, b) \ static u32 openssl_ops_hash_##a (vlib_main_t *vm, vnet_crypto_op_t *ops[], \ u32 n_ops) \ @@ -624,7 +694,7 @@ foreach_openssl_evp_op; return openssl_ops_hash (vm, ops, chunks, n_ops, b ()); \ } -foreach_openssl_hash_op; + foreach_openssl_hash_op; #undef _ #define _(a, b) \ @@ -666,8 +736,12 @@ crypto_openssl_init (vnet_crypto_engine_registration_t *r) foreach_openssl_evp_op; #undef _ +#define _(n, c, m, t) cm->ctx_fn[VNET_CRYPTO_ALG_##n] = openssl_ctx_##n; + foreach_openssl_linked_cbc_hmac_op +#undef _ + #define _(a, b) cm->ctx_fn[VNET_CRYPTO_ALG_HMAC_##a] = openssl_ctx_hmac_##a; - foreach_openssl_hmac_op; + foreach_openssl_hmac_op; #undef _ per_thread_data = r->per_thread_data; @@ -691,17 +765,28 @@ vnet_crypto_engine_op_handlers_t op_handlers[] = { .cfn = openssl_ops_dec_chained_##a }, foreach_openssl_evp_op #undef _ +#define _(n, c, m, t) \ + { \ + .opt = VNET_CRYPTO_OP_##n##_ENC, \ + .fn = openssl_ops_enc_##n, \ + .cfn = openssl_ops_enc_chained_##n, \ + }, \ + { .opt = VNET_CRYPTO_OP_##n##_DEC, \ + .fn = openssl_ops_dec_##n, \ + .cfn = openssl_ops_dec_chained_##n }, + foreach_openssl_linked_cbc_hmac_op +#undef _ #define _(a, b) \ { .opt = VNET_CRYPTO_OP_##a##_HMAC, \ .fn = openssl_ops_hmac_##a, \ .cfn = openssl_ops_hmac_chained_##a }, - foreach_openssl_hmac_op + foreach_openssl_hmac_op #undef _ #define _(a, b) \ { .opt = VNET_CRYPTO_OP_##a##_HASH, \ .fn = openssl_ops_hash_##a, \ .cfn = openssl_ops_hash_chained_##a }, - foreach_openssl_hash_op + foreach_openssl_hash_op #undef _ {} }; diff --git a/src/examples/srv6-sample-localsid/node.c b/src/examples/srv6-sample-localsid/node.c index e3a3259e877..4d727498c03 100644 --- a/src/examples/srv6-sample-localsid/node.c +++ b/src/examples/srv6-sample-localsid/node.c @@ -173,7 +173,7 @@ srv6_localsid_sample_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_fram from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; next_index = node->cached_next_index; - u32 thread_index = vlib_get_thread_index (); + clib_thread_index_t thread_index = vlib_get_thread_index (); while (n_left_from > 0) { diff --git a/src/plugins/abf/abf_itf_attach.c b/src/plugins/abf/abf_itf_attach.c index 04e5c4c40c2..3e55df52562 100644 --- a/src/plugins/abf/abf_itf_attach.c +++ b/src/plugins/abf/abf_itf_attach.c @@ -681,18 +681,20 @@ VLIB_REGISTER_NODE (abf_ip6_node) = } }; -VNET_FEATURE_INIT (abf_ip4_feat, static) = -{ +VNET_FEATURE_INIT (abf_ip4_feat, static) = { .arc_name = "ip4-unicast", .node_name = "abf-input-ip4", - .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa"), + .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa", + "ip4-full-reassembly-feature", + "ip4-sv-reassembly-feature"), }; -VNET_FEATURE_INIT (abf_ip6_feat, static) = -{ +VNET_FEATURE_INIT (abf_ip6_feat, static) = { .arc_name = "ip6-unicast", .node_name = "abf-input-ip6", - .runs_after = VNET_FEATURES ("acl-plugin-in-ip6-fa"), + .runs_after = VNET_FEATURES ("acl-plugin-in-ip6-fa", + "ip6-full-reassembly-feature", + "ip6-sv-reassembly-feature"), }; static fib_node_t * diff --git a/src/plugins/acl/elog_acl_trace.h b/src/plugins/acl/elog_acl_trace.h index 0c4f68f7b0f..ae2ef8588ea 100644 --- a/src/plugins/acl/elog_acl_trace.h +++ b/src/plugins/acl/elog_acl_trace.h @@ -19,119 +19,143 @@ /* use like: elog_acl_cond_trace_X1(am, (x < 0), "foobar: %d", "i4", int32_value); */ -#define elog_acl_cond_trace_X1(am, trace_cond, acl_elog_trace_format_label, acl_elog_trace_format_args, acl_elog_val1) \ -do { \ - if (trace_cond) { \ - CLIB_UNUSED(struct { u8 available_space[18 - sizeof(acl_elog_val1)]; } *static_check); \ - u16 thread_index = os_get_thread_index (); \ - vlib_worker_thread_t * w = vlib_worker_threads + thread_index; \ - ELOG_TYPE_DECLARE (e) = \ - { \ - .format = "(%02d) " acl_elog_trace_format_label, \ - .format_args = "i2" acl_elog_trace_format_args, \ - }; \ - CLIB_PACKED(struct \ - { \ - u16 thread; \ - typeof(acl_elog_val1) val1; \ - }) *ed; \ - ed = ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, w->elog_track); \ - ed->thread = thread_index; \ - ed->val1 = acl_elog_val1; \ - } \ -} while (0) - +#define elog_acl_cond_trace_X1(am, trace_cond, acl_elog_trace_format_label, \ + acl_elog_trace_format_args, acl_elog_val1) \ + do \ + { \ + if (trace_cond) \ + { \ + CLIB_UNUSED (struct { \ + u8 available_space[18 - sizeof (acl_elog_val1)]; \ + } * static_check); \ + clib_thread_index_t thread_index = os_get_thread_index (); \ + vlib_worker_thread_t *w = vlib_worker_threads + thread_index; \ + ELOG_TYPE_DECLARE (e) = { \ + .format = "(%02d) " acl_elog_trace_format_label, \ + .format_args = "i2" acl_elog_trace_format_args, \ + }; \ + CLIB_PACKED (struct { \ + u16 thread; \ + typeof (acl_elog_val1) val1; \ + }) * \ + ed; \ + ed = \ + ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, w->elog_track); \ + ed->thread = thread_index; \ + ed->val1 = acl_elog_val1; \ + } \ + } \ + while (0) /* use like: elog_acl_cond_trace_X2(am, (x<0), "foobar: %d some u64: %lu", "i4i8", int32_value, int64_value); */ -#define elog_acl_cond_trace_X2(am, trace_cond, acl_elog_trace_format_label, acl_elog_trace_format_args, \ - acl_elog_val1, acl_elog_val2) \ -do { \ - if (trace_cond) { \ - CLIB_UNUSED(struct { u8 available_space[18 - sizeof(acl_elog_val1) - sizeof(acl_elog_val2)]; } *static_check); \ - u16 thread_index = os_get_thread_index (); \ - vlib_worker_thread_t * w = vlib_worker_threads + thread_index; \ - ELOG_TYPE_DECLARE (e) = \ - { \ - .format = "(%02d) " acl_elog_trace_format_label, \ - .format_args = "i2" acl_elog_trace_format_args, \ - }; \ - CLIB_PACKED(struct \ - { \ - u16 thread; \ - typeof(acl_elog_val1) val1; \ - typeof(acl_elog_val2) val2; \ - }) *ed; \ - ed = ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, w->elog_track); \ - ed->thread = thread_index; \ - ed->val1 = acl_elog_val1; \ - ed->val2 = acl_elog_val2; \ - } \ -} while (0) - +#define elog_acl_cond_trace_X2(am, trace_cond, acl_elog_trace_format_label, \ + acl_elog_trace_format_args, acl_elog_val1, \ + acl_elog_val2) \ + do \ + { \ + if (trace_cond) \ + { \ + CLIB_UNUSED (struct { \ + u8 available_space[18 - sizeof (acl_elog_val1) - \ + sizeof (acl_elog_val2)]; \ + } * static_check); \ + clib_thread_index_t thread_index = os_get_thread_index (); \ + vlib_worker_thread_t *w = vlib_worker_threads + thread_index; \ + ELOG_TYPE_DECLARE (e) = { \ + .format = "(%02d) " acl_elog_trace_format_label, \ + .format_args = "i2" acl_elog_trace_format_args, \ + }; \ + CLIB_PACKED (struct { \ + u16 thread; \ + typeof (acl_elog_val1) val1; \ + typeof (acl_elog_val2) val2; \ + }) * \ + ed; \ + ed = \ + ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, w->elog_track); \ + ed->thread = thread_index; \ + ed->val1 = acl_elog_val1; \ + ed->val2 = acl_elog_val2; \ + } \ + } \ + while (0) /* use like: elog_acl_cond_trace_X3(am, (x<0), "foobar: %d some u64 %lu baz: %d", "i4i8i4", int32_value, u64_value, int_value); */ -#define elog_acl_cond_trace_X3(am, trace_cond, acl_elog_trace_format_label, acl_elog_trace_format_args, acl_elog_val1, \ - acl_elog_val2, acl_elog_val3) \ -do { \ - if (trace_cond) { \ - CLIB_UNUSED(struct { u8 available_space[18 - sizeof(acl_elog_val1) - sizeof(acl_elog_val2) \ - - sizeof(acl_elog_val3)]; } *static_check); \ - u16 thread_index = os_get_thread_index (); \ - vlib_worker_thread_t * w = vlib_worker_threads + thread_index; \ - ELOG_TYPE_DECLARE (e) = \ - { \ - .format = "(%02d) " acl_elog_trace_format_label, \ - .format_args = "i2" acl_elog_trace_format_args, \ - }; \ - CLIB_PACKED(struct \ - { \ - u16 thread; \ - typeof(acl_elog_val1) val1; \ - typeof(acl_elog_val2) val2; \ - typeof(acl_elog_val3) val3; \ - }) *ed; \ - ed = ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, w->elog_track); \ - ed->thread = thread_index; \ - ed->val1 = acl_elog_val1; \ - ed->val2 = acl_elog_val2; \ - ed->val3 = acl_elog_val3; \ - } \ -} while (0) - +#define elog_acl_cond_trace_X3(am, trace_cond, acl_elog_trace_format_label, \ + acl_elog_trace_format_args, acl_elog_val1, \ + acl_elog_val2, acl_elog_val3) \ + do \ + { \ + if (trace_cond) \ + { \ + CLIB_UNUSED (struct { \ + u8 available_space[18 - sizeof (acl_elog_val1) - \ + sizeof (acl_elog_val2) - \ + sizeof (acl_elog_val3)]; \ + } * static_check); \ + clib_thread_index_t thread_index = os_get_thread_index (); \ + vlib_worker_thread_t *w = vlib_worker_threads + thread_index; \ + ELOG_TYPE_DECLARE (e) = { \ + .format = "(%02d) " acl_elog_trace_format_label, \ + .format_args = "i2" acl_elog_trace_format_args, \ + }; \ + CLIB_PACKED (struct { \ + u16 thread; \ + typeof (acl_elog_val1) val1; \ + typeof (acl_elog_val2) val2; \ + typeof (acl_elog_val3) val3; \ + }) * \ + ed; \ + ed = \ + ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, w->elog_track); \ + ed->thread = thread_index; \ + ed->val1 = acl_elog_val1; \ + ed->val2 = acl_elog_val2; \ + ed->val3 = acl_elog_val3; \ + } \ + } \ + while (0) /* use like: elog_acl_cond_trace_X4(am, (x<0), "foobar: %d some int %d baz: %d bar: %d", "i4i4i4i4", int32_value, int32_value2, int_value, int_value); */ -#define elog_acl_cond_trace_X4(am, trace_cond, acl_elog_trace_format_label, acl_elog_trace_format_args, acl_elog_val1, \ - acl_elog_val2, acl_elog_val3, acl_elog_val4) \ -do { \ - if (trace_cond) { \ - CLIB_UNUSED(struct { u8 available_space[18 - sizeof(acl_elog_val1) - sizeof(acl_elog_val2) \ - - sizeof(acl_elog_val3) -sizeof(acl_elog_val4)]; } *static_check); \ - u16 thread_index = os_get_thread_index (); \ - vlib_worker_thread_t * w = vlib_worker_threads + thread_index; \ - ELOG_TYPE_DECLARE (e) = \ - { \ - .format = "(%02d) " acl_elog_trace_format_label, \ - .format_args = "i2" acl_elog_trace_format_args, \ - }; \ - CLIB_PACKED(struct \ - { \ - u16 thread; \ - typeof(acl_elog_val1) val1; \ - typeof(acl_elog_val2) val2; \ - typeof(acl_elog_val3) val3; \ - typeof(acl_elog_val4) val4; \ - }) *ed; \ - ed = ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, w->elog_track); \ - ed->thread = thread_index; \ - ed->val1 = acl_elog_val1; \ - ed->val2 = acl_elog_val2; \ - ed->val3 = acl_elog_val3; \ - ed->val4 = acl_elog_val4; \ - } \ -} while (0) - +#define elog_acl_cond_trace_X4(am, trace_cond, acl_elog_trace_format_label, \ + acl_elog_trace_format_args, acl_elog_val1, \ + acl_elog_val2, acl_elog_val3, acl_elog_val4) \ + do \ + { \ + if (trace_cond) \ + { \ + CLIB_UNUSED (struct { \ + u8 available_space[18 - sizeof (acl_elog_val1) - \ + sizeof (acl_elog_val2) - \ + sizeof (acl_elog_val3) - \ + sizeof (acl_elog_val4)]; \ + } * static_check); \ + clib_thread_index_t thread_index = os_get_thread_index (); \ + vlib_worker_thread_t *w = vlib_worker_threads + thread_index; \ + ELOG_TYPE_DECLARE (e) = { \ + .format = "(%02d) " acl_elog_trace_format_label, \ + .format_args = "i2" acl_elog_trace_format_args, \ + }; \ + CLIB_PACKED (struct { \ + u16 thread; \ + typeof (acl_elog_val1) val1; \ + typeof (acl_elog_val2) val2; \ + typeof (acl_elog_val3) val3; \ + typeof (acl_elog_val4) val4; \ + }) * \ + ed; \ + ed = \ + ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, w->elog_track); \ + ed->thread = thread_index; \ + ed->val1 = acl_elog_val1; \ + ed->val2 = acl_elog_val2; \ + ed->val3 = acl_elog_val3; \ + ed->val4 = acl_elog_val4; \ + } \ + } \ + while (0) #endif diff --git a/src/plugins/acl/fa_node.h b/src/plugins/acl/fa_node.h index c4a971aada3..f1ea8dfaf0a 100644 --- a/src/plugins/acl/fa_node.h +++ b/src/plugins/acl/fa_node.h @@ -110,7 +110,7 @@ typedef struct { u8 as_u8[2]; u16 as_u16; } tcp_flags_seen; ; /* +2 bytes = 62 */ - u16 thread_index; /* +2 bytes = 64 */ + clib_thread_index_t thread_index; /* +2 bytes = 64 */ u64 link_enqueue_time; /* 8 byte = 8 */ u32 link_prev_idx; /* +4 bytes = 12 */ u32 link_next_idx; /* +4 bytes = 16 */ @@ -133,7 +133,7 @@ typedef struct { u64 as_u64; struct { u32 session_index; - u16 thread_index; + clib_thread_index_t thread_index; u16 intf_policy_epoch; }; }; @@ -255,119 +255,143 @@ u8 *format_acl_plugin_5tuple (u8 * s, va_list * args); /* use like: elog_acl_maybe_trace_X1(am, "foobar: %d", "i4", int32_value); */ -#define elog_acl_maybe_trace_X1(am, acl_elog_trace_format_label, acl_elog_trace_format_args, acl_elog_val1) \ -do { \ - if (am->trace_sessions) { \ - CLIB_UNUSED(struct { u8 available_space[18 - sizeof(acl_elog_val1)]; } *static_check); \ - u16 thread_index = os_get_thread_index (); \ - vlib_worker_thread_t * w = vlib_worker_threads + thread_index; \ - ELOG_TYPE_DECLARE (e) = \ - { \ - .format = "(%02d) " acl_elog_trace_format_label, \ - .format_args = "i2" acl_elog_trace_format_args, \ - }; \ - CLIB_PACKED(struct \ - { \ - u16 thread; \ - typeof(acl_elog_val1) val1; \ - }) *ed; \ - ed = ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, w->elog_track); \ - ed->thread = thread_index; \ - ed->val1 = acl_elog_val1; \ - } \ -} while (0) - +#define elog_acl_maybe_trace_X1(am, acl_elog_trace_format_label, \ + acl_elog_trace_format_args, acl_elog_val1) \ + do \ + { \ + if (am->trace_sessions) \ + { \ + CLIB_UNUSED (struct { \ + u8 available_space[18 - sizeof (acl_elog_val1)]; \ + } * static_check); \ + clib_thread_index_t thread_index = os_get_thread_index (); \ + vlib_worker_thread_t *w = vlib_worker_threads + thread_index; \ + ELOG_TYPE_DECLARE (e) = { \ + .format = "(%02d) " acl_elog_trace_format_label, \ + .format_args = "i2" acl_elog_trace_format_args, \ + }; \ + CLIB_PACKED (struct { \ + u16 thread; \ + typeof (acl_elog_val1) val1; \ + }) * \ + ed; \ + ed = \ + ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, w->elog_track); \ + ed->thread = thread_index; \ + ed->val1 = acl_elog_val1; \ + } \ + } \ + while (0) /* use like: elog_acl_maybe_trace_X2(am, "foobar: %d some u64: %lu", "i4i8", int32_value, int64_value); */ -#define elog_acl_maybe_trace_X2(am, acl_elog_trace_format_label, acl_elog_trace_format_args, \ - acl_elog_val1, acl_elog_val2) \ -do { \ - if (am->trace_sessions) { \ - CLIB_UNUSED(struct { u8 available_space[18 - sizeof(acl_elog_val1) - sizeof(acl_elog_val2)]; } *static_check); \ - u16 thread_index = os_get_thread_index (); \ - vlib_worker_thread_t * w = vlib_worker_threads + thread_index; \ - ELOG_TYPE_DECLARE (e) = \ - { \ - .format = "(%02d) " acl_elog_trace_format_label, \ - .format_args = "i2" acl_elog_trace_format_args, \ - }; \ - CLIB_PACKED(struct \ - { \ - u16 thread; \ - typeof(acl_elog_val1) val1; \ - typeof(acl_elog_val2) val2; \ - }) *ed; \ - ed = ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, w->elog_track); \ - ed->thread = thread_index; \ - ed->val1 = acl_elog_val1; \ - ed->val2 = acl_elog_val2; \ - } \ -} while (0) - +#define elog_acl_maybe_trace_X2(am, acl_elog_trace_format_label, \ + acl_elog_trace_format_args, acl_elog_val1, \ + acl_elog_val2) \ + do \ + { \ + if (am->trace_sessions) \ + { \ + CLIB_UNUSED (struct { \ + u8 available_space[18 - sizeof (acl_elog_val1) - \ + sizeof (acl_elog_val2)]; \ + } * static_check); \ + clib_thread_index_t thread_index = os_get_thread_index (); \ + vlib_worker_thread_t *w = vlib_worker_threads + thread_index; \ + ELOG_TYPE_DECLARE (e) = { \ + .format = "(%02d) " acl_elog_trace_format_label, \ + .format_args = "i2" acl_elog_trace_format_args, \ + }; \ + CLIB_PACKED (struct { \ + u16 thread; \ + typeof (acl_elog_val1) val1; \ + typeof (acl_elog_val2) val2; \ + }) * \ + ed; \ + ed = \ + ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, w->elog_track); \ + ed->thread = thread_index; \ + ed->val1 = acl_elog_val1; \ + ed->val2 = acl_elog_val2; \ + } \ + } \ + while (0) /* use like: elog_acl_maybe_trace_X3(am, "foobar: %d some u64 %lu baz: %d", "i4i8i4", int32_value, u64_value, int_value); */ -#define elog_acl_maybe_trace_X3(am, acl_elog_trace_format_label, acl_elog_trace_format_args, acl_elog_val1, \ - acl_elog_val2, acl_elog_val3) \ -do { \ - if (am->trace_sessions) { \ - CLIB_UNUSED(struct { u8 available_space[18 - sizeof(acl_elog_val1) - sizeof(acl_elog_val2) \ - - sizeof(acl_elog_val3)]; } *static_check); \ - u16 thread_index = os_get_thread_index (); \ - vlib_worker_thread_t * w = vlib_worker_threads + thread_index; \ - ELOG_TYPE_DECLARE (e) = \ - { \ - .format = "(%02d) " acl_elog_trace_format_label, \ - .format_args = "i2" acl_elog_trace_format_args, \ - }; \ - CLIB_PACKED(struct \ - { \ - u16 thread; \ - typeof(acl_elog_val1) val1; \ - typeof(acl_elog_val2) val2; \ - typeof(acl_elog_val3) val3; \ - }) *ed; \ - ed = ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, w->elog_track); \ - ed->thread = thread_index; \ - ed->val1 = acl_elog_val1; \ - ed->val2 = acl_elog_val2; \ - ed->val3 = acl_elog_val3; \ - } \ -} while (0) - +#define elog_acl_maybe_trace_X3(am, acl_elog_trace_format_label, \ + acl_elog_trace_format_args, acl_elog_val1, \ + acl_elog_val2, acl_elog_val3) \ + do \ + { \ + if (am->trace_sessions) \ + { \ + CLIB_UNUSED (struct { \ + u8 available_space[18 - sizeof (acl_elog_val1) - \ + sizeof (acl_elog_val2) - \ + sizeof (acl_elog_val3)]; \ + } * static_check); \ + clib_thread_index_t thread_index = os_get_thread_index (); \ + vlib_worker_thread_t *w = vlib_worker_threads + thread_index; \ + ELOG_TYPE_DECLARE (e) = { \ + .format = "(%02d) " acl_elog_trace_format_label, \ + .format_args = "i2" acl_elog_trace_format_args, \ + }; \ + CLIB_PACKED (struct { \ + u16 thread; \ + typeof (acl_elog_val1) val1; \ + typeof (acl_elog_val2) val2; \ + typeof (acl_elog_val3) val3; \ + }) * \ + ed; \ + ed = \ + ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, w->elog_track); \ + ed->thread = thread_index; \ + ed->val1 = acl_elog_val1; \ + ed->val2 = acl_elog_val2; \ + ed->val3 = acl_elog_val3; \ + } \ + } \ + while (0) /* use like: elog_acl_maybe_trace_X4(am, "foobar: %d some int %d baz: %d bar: %d", "i4i4i4i4", int32_value, int32_value2, int_value, int_value); */ -#define elog_acl_maybe_trace_X4(am, acl_elog_trace_format_label, acl_elog_trace_format_args, acl_elog_val1, \ - acl_elog_val2, acl_elog_val3, acl_elog_val4) \ -do { \ - if (am->trace_sessions) { \ - CLIB_UNUSED(struct { u8 available_space[18 - sizeof(acl_elog_val1) - sizeof(acl_elog_val2) \ - - sizeof(acl_elog_val3) -sizeof(acl_elog_val4)]; } *static_check); \ - u16 thread_index = os_get_thread_index (); \ - vlib_worker_thread_t * w = vlib_worker_threads + thread_index; \ - ELOG_TYPE_DECLARE (e) = \ - { \ - .format = "(%02d) " acl_elog_trace_format_label, \ - .format_args = "i2" acl_elog_trace_format_args, \ - }; \ - CLIB_PACKED(struct \ - { \ - u16 thread; \ - typeof(acl_elog_val1) val1; \ - typeof(acl_elog_val2) val2; \ - typeof(acl_elog_val3) val3; \ - typeof(acl_elog_val4) val4; \ - }) *ed; \ - ed = ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, w->elog_track); \ - ed->thread = thread_index; \ - ed->val1 = acl_elog_val1; \ - ed->val2 = acl_elog_val2; \ - ed->val3 = acl_elog_val3; \ - ed->val4 = acl_elog_val4; \ - } \ -} while (0) - +#define elog_acl_maybe_trace_X4(am, acl_elog_trace_format_label, \ + acl_elog_trace_format_args, acl_elog_val1, \ + acl_elog_val2, acl_elog_val3, acl_elog_val4) \ + do \ + { \ + if (am->trace_sessions) \ + { \ + CLIB_UNUSED (struct { \ + u8 available_space[18 - sizeof (acl_elog_val1) - \ + sizeof (acl_elog_val2) - \ + sizeof (acl_elog_val3) - \ + sizeof (acl_elog_val4)]; \ + } * static_check); \ + clib_thread_index_t thread_index = os_get_thread_index (); \ + vlib_worker_thread_t *w = vlib_worker_threads + thread_index; \ + ELOG_TYPE_DECLARE (e) = { \ + .format = "(%02d) " acl_elog_trace_format_label, \ + .format_args = "i2" acl_elog_trace_format_args, \ + }; \ + CLIB_PACKED (struct { \ + u16 thread; \ + typeof (acl_elog_val1) val1; \ + typeof (acl_elog_val2) val2; \ + typeof (acl_elog_val3) val3; \ + typeof (acl_elog_val4) val4; \ + }) * \ + ed; \ + ed = \ + ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, w->elog_track); \ + ed->thread = thread_index; \ + ed->val1 = acl_elog_val1; \ + ed->val2 = acl_elog_val2; \ + ed->val3 = acl_elog_val3; \ + ed->val4 = acl_elog_val4; \ + } \ + } \ + while (0) #endif diff --git a/src/plugins/acl/hash_lookup.c b/src/plugins/acl/hash_lookup.c index 9c3c662a8f1..b4f86208a71 100644 --- a/src/plugins/acl/hash_lookup.c +++ b/src/plugins/acl/hash_lookup.c @@ -946,31 +946,15 @@ hash_acl_reapply(acl_main_t *am, u32 lc_index, int acl_index) static void make_ip6_address_mask(ip6_address_t *addr, u8 prefix_len) { + ASSERT (prefix_len <= 128); ip6_address_mask_from_width(addr, prefix_len); } - -/* Maybe should be moved into the core somewhere */ -always_inline void -ip4_address_mask_from_width (ip4_address_t * a, u32 width) -{ - int i, byte, bit, bitnum; - ASSERT (width <= 32); - clib_memset (a, 0, sizeof (a[0])); - for (i = 0; i < width; i++) - { - bitnum = (7 - (i & 7)); - byte = i / 8; - bit = 1 << bitnum; - a->as_u8[byte] |= bit; - } -} - - static void make_ip4_address_mask(ip4_address_t *addr, u8 prefix_len) { - ip4_address_mask_from_width(addr, prefix_len); + ASSERT (prefix_len <= 32); + ip4_preflen_to_mask (prefix_len, addr); } static void diff --git a/src/plugins/acl/public_inlines.h b/src/plugins/acl/public_inlines.h index eb9f0de920f..f39285344b0 100644 --- a/src/plugins/acl/public_inlines.h +++ b/src/plugins/acl/public_inlines.h @@ -268,8 +268,8 @@ fa_acl_match_ip6_addr (ip6_address_t * addr1, ip6_address_t * addr2, } if (prefixlen % 8) { - u8 b1 = *((u8 *) addr1 + 1 + prefixlen / 8); - u8 b2 = *((u8 *) addr2 + 1 + prefixlen / 8); + u8 b1 = *((u8 *) addr1 + prefixlen / 8); + u8 b2 = *((u8 *) addr2 + prefixlen / 8); u8 mask0 = (0xff - ((1 << (8 - (prefixlen % 8))) - 1)); return (b1 & mask0) == b2; } @@ -715,8 +715,10 @@ acl_plugin_match_5tuple_inline_and_count (void *p_acl_main, u32 lc_index, r_acl_pos_p, r_acl_match_p, r_rule_match_p, trace_bitmap); } if (PREDICT_TRUE(ret)) { - u16 thread_index = os_get_thread_index (); - vlib_increment_combined_counter(am->combined_acl_counters + *r_acl_match_p, thread_index, *r_rule_match_p, 1, packet_size); + clib_thread_index_t thread_index = os_get_thread_index (); + vlib_increment_combined_counter ( + am->combined_acl_counters + *r_acl_match_p, thread_index, + *r_rule_match_p, 1, packet_size); } return ret; } diff --git a/src/plugins/acl/sess_mgmt_node.c b/src/plugins/acl/sess_mgmt_node.c index 418baef9b6b..10f0e92c808 100644 --- a/src/plugins/acl/sess_mgmt_node.c +++ b/src/plugins/acl/sess_mgmt_node.c @@ -136,16 +136,17 @@ fa_session_get_list_timeout (acl_main_t * am, fa_session_t * sess) } static u64 -acl_fa_get_list_head_expiry_time (acl_main_t * am, - acl_fa_per_worker_data_t * pw, u64 now, - u16 thread_index, int timeout_type) +acl_fa_get_list_head_expiry_time (acl_main_t *am, acl_fa_per_worker_data_t *pw, + u64 now, clib_thread_index_t thread_index, + int timeout_type) { return pw->fa_conn_list_head_expiry_time[timeout_type]; } static int -acl_fa_conn_time_to_check (acl_main_t * am, acl_fa_per_worker_data_t * pw, - u64 now, u16 thread_index, u32 session_index) +acl_fa_conn_time_to_check (acl_main_t *am, acl_fa_per_worker_data_t *pw, + u64 now, clib_thread_index_t thread_index, + u32 session_index) { if (session_index == FA_SESSION_BOGUS_INDEX) return 0; @@ -162,7 +163,8 @@ acl_fa_conn_time_to_check (acl_main_t * am, acl_fa_per_worker_data_t * pw, * return the total number of sessions reclaimed. */ static int -acl_fa_check_idle_sessions (acl_main_t * am, u16 thread_index, u64 now) +acl_fa_check_idle_sessions (acl_main_t *am, clib_thread_index_t thread_index, + u64 now) { acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index]; fa_full_session_id_t fsid; @@ -429,7 +431,7 @@ acl_fa_worker_conn_cleaner_process (vlib_main_t * vm, { acl_main_t *am = &acl_main; u64 now = clib_cpu_time_now (); - u16 thread_index = os_get_thread_index (); + clib_thread_index_t thread_index = os_get_thread_index (); acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index]; int num_expired; elog_acl_maybe_trace_X1 (am, diff --git a/src/plugins/acl/session_inlines.h b/src/plugins/acl/session_inlines.h index edc8a7057ee..c98194005a4 100644 --- a/src/plugins/acl/session_inlines.h +++ b/src/plugins/acl/session_inlines.h @@ -115,16 +115,16 @@ fa_session_get_timeout (acl_main_t * am, fa_session_t * sess) } always_inline fa_session_t * -get_session_ptr_no_check (acl_main_t * am, u16 thread_index, +get_session_ptr_no_check (acl_main_t *am, clib_thread_index_t thread_index, u32 session_index) { acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index]; return pool_elt_at_index (pw->fa_sessions_pool, session_index); } - always_inline fa_session_t * -get_session_ptr (acl_main_t * am, u16 thread_index, u32 session_index) +get_session_ptr (acl_main_t *am, clib_thread_index_t thread_index, + u32 session_index) { acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index]; @@ -135,7 +135,8 @@ get_session_ptr (acl_main_t * am, u16 thread_index, u32 session_index) } always_inline int -is_valid_session_ptr (acl_main_t * am, u16 thread_index, fa_session_t * sess) +is_valid_session_ptr (acl_main_t *am, clib_thread_index_t thread_index, + fa_session_t *sess) { acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index]; return ((sess != 0) @@ -470,10 +471,10 @@ acl_fa_can_add_session (acl_main_t * am, int is_input, u32 sw_if_index) am->fa_conn_table_max_entries); } - always_inline void -acl_fa_try_recycle_session (acl_main_t * am, int is_input, u16 thread_index, - u32 sw_if_index, u64 now) +acl_fa_try_recycle_session (acl_main_t *am, int is_input, + clib_thread_index_t thread_index, u32 sw_if_index, + u64 now) { /* try to recycle a TCP transient session */ acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index]; diff --git a/src/plugins/adl/ip4_allowlist.c b/src/plugins/adl/ip4_allowlist.c index 4c755725ea7..a44cb51762f 100644 --- a/src/plugins/adl/ip4_allowlist.c +++ b/src/plugins/adl/ip4_allowlist.c @@ -58,7 +58,7 @@ VLIB_NODE_FN (ip4_adl_allowlist_node) (vlib_main_t * vm, adl_feature_type_t next_index; adl_main_t *cm = &adl_main; vlib_combined_counter_main_t * vcm = &load_balance_main.lbm_via_counters; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; u32 allowed_packets; from = vlib_frame_vector_args (frame); diff --git a/src/plugins/adl/ip6_allowlist.c b/src/plugins/adl/ip6_allowlist.c index 5f38484666b..f9d964645c4 100644 --- a/src/plugins/adl/ip6_allowlist.c +++ b/src/plugins/adl/ip6_allowlist.c @@ -58,7 +58,7 @@ VLIB_NODE_FN (ip6_adl_allowlist_node) (vlib_main_t * vm, adl_feature_type_t next_index; adl_main_t *cm = &adl_main; vlib_combined_counter_main_t * vcm = &load_balance_main.lbm_via_counters; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; u32 allowed_packets; from = vlib_frame_vector_args (frame); diff --git a/src/plugins/af_packet/af_packet.c b/src/plugins/af_packet/af_packet.c index 8cb2af27d7f..f3a1f495fe7 100644 --- a/src/plugins/af_packet/af_packet.c +++ b/src/plugins/af_packet/af_packet.c @@ -30,7 +30,7 @@ #include <vppinfra/linux/sysfs.h> #include <vlib/vlib.h> -#include <vlib/unix/unix.h> +#include <vlib/file.h> #include <vnet/ip/ip.h> #include <vnet/devices/netlink.h> #include <vnet/ethernet/ethernet.h> diff --git a/src/plugins/af_packet/node.c b/src/plugins/af_packet/node.c index 279f11c0183..e60a037b093 100644 --- a/src/plugins/af_packet/node.c +++ b/src/plugins/af_packet/node.c @@ -269,7 +269,7 @@ af_packet_v3_device_input_fn (vlib_main_t *vm, vlib_node_runtime_t *node, u32 block_nr = rx_queue->rx_req->req3.tp_block_nr; u8 *block_start = 0; uword n_trace = vlib_get_trace_count (vm, node); - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; u32 n_buffer_bytes = vlib_buffer_get_default_data_size (vm); u32 min_bufs = rx_queue->rx_req->req3.tp_frame_size / n_buffer_bytes; u32 num_pkts = 0; @@ -571,7 +571,7 @@ af_packet_v2_device_input_fn (vlib_main_t *vm, vlib_node_runtime_t *node, u32 frame_num = rx_queue->rx_req->req.tp_frame_nr; u8 *block_start = rx_queue->rx_ring[block]; uword n_trace = vlib_get_trace_count (vm, node); - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; u32 n_buffer_bytes = vlib_buffer_get_default_data_size (vm); u32 min_bufs = rx_queue->rx_req->req.tp_frame_size / n_buffer_bytes; u32 sw_if_index = apif->sw_if_index; diff --git a/src/plugins/af_xdp/device.c b/src/plugins/af_xdp/device.c index 63a276ce51e..8d9496206d2 100644 --- a/src/plugins/af_xdp/device.c +++ b/src/plugins/af_xdp/device.c @@ -24,7 +24,7 @@ #include <linux/limits.h> #include <bpf/bpf.h> #include <vlib/vlib.h> -#include <vlib/unix/unix.h> +#include <vlib/file.h> #include <vlib/pci/pci.h> #include <vppinfra/linux/netns.h> #include <vppinfra/linux/sysfs.h> diff --git a/src/plugins/cnat/cnat_snat_policy.c b/src/plugins/cnat/cnat_snat_policy.c index cd9bfef492a..5f15b7d26c9 100644 --- a/src/plugins/cnat/cnat_snat_policy.c +++ b/src/plugins/cnat/cnat_snat_policy.c @@ -22,7 +22,8 @@ cnat_snat_policy_main_t cnat_snat_policy_main; uword unformat_cnat_snat_interface_map_type (unformat_input_t *input, va_list *args) { - u8 *a = va_arg (*args, u8 *); + cnat_snat_interface_map_type_t *a = + va_arg (*args, cnat_snat_interface_map_type_t *); if (unformat (input, "include-v4")) *a = CNAT_SNAT_IF_MAP_INCLUDE_V4; else if (unformat (input, "include-v6")) @@ -113,7 +114,7 @@ cnat_snat_policy_add_del_if_command_fn (vlib_main_t *vm, vnet_main_t *vnm = vnet_get_main (); int is_add = 1; u32 sw_if_index = ~0; - u32 table = 0; + cnat_snat_interface_map_type_t table = CNAT_SNAT_IF_MAP_INCLUDE_V4; int rv; while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) diff --git a/src/plugins/cnat/cnat_types.h b/src/plugins/cnat/cnat_types.h index d229d21adae..37eb62ec981 100644 --- a/src/plugins/cnat/cnat_types.h +++ b/src/plugins/cnat/cnat_types.h @@ -192,7 +192,7 @@ typedef struct cnat_timestamp_mpool_t_ typedef struct cnat_node_ctx_ { f64 now; - u32 thread_index; + clib_thread_index_t thread_index; ip_address_family_t af; u8 do_trace; } cnat_node_ctx_t; diff --git a/src/plugins/crypto_sw_scheduler/main.c b/src/plugins/crypto_sw_scheduler/main.c index dc97ce937d9..bb1505a38cf 100644 --- a/src/plugins/crypto_sw_scheduler/main.c +++ b/src/plugins/crypto_sw_scheduler/main.c @@ -446,7 +446,7 @@ convert_async_crypto_id (vnet_crypto_op_id_t async_op_id, u32 *crypto_op, static_always_inline vnet_crypto_async_frame_t * crypto_sw_scheduler_dequeue (vlib_main_t *vm, u32 *nb_elts_processed, - u32 *enqueue_thread_idx) + clib_thread_index_t *enqueue_thread_idx) { crypto_sw_scheduler_main_t *cm = &crypto_sw_scheduler_main; crypto_sw_scheduler_per_thread_data_t *ptd = diff --git a/src/plugins/ct6/ct6.h b/src/plugins/ct6/ct6.h index 0b7deb07839..a6919174d86 100644 --- a/src/plugins/ct6/ct6.h +++ b/src/plugins/ct6/ct6.h @@ -46,7 +46,7 @@ typedef CLIB_PACKED (struct typedef struct { ct6_session_key_t key; - u32 thread_index; + clib_thread_index_t thread_index; u32 next_index; u32 prev_index; u32 hits; @@ -95,7 +95,7 @@ static inline void ct6_lru_remove (ct6_main_t * cmp, ct6_session_t * s0) { ct6_session_t *next_sess, *prev_sess; - u32 thread_index; + clib_thread_index_t thread_index; u32 s0_index; thread_index = s0->thread_index; @@ -128,7 +128,7 @@ static inline void ct6_lru_add (ct6_main_t * cmp, ct6_session_t * s0, f64 now) { ct6_session_t *next_sess; - u32 thread_index; + clib_thread_index_t thread_index; u32 s0_index; s0->hits++; diff --git a/src/plugins/dev_armada/pp2/rx.c b/src/plugins/dev_armada/pp2/rx.c index 5b0e8d35000..8eff72d6157 100644 --- a/src/plugins/dev_armada/pp2/rx.c +++ b/src/plugins/dev_armada/pp2/rx.c @@ -140,6 +140,7 @@ mrvl_pp2_rx_inline (vlib_main_t *vm, vlib_node_runtime_t *node, n_desc); n_sel = vlib_frame_bitmap_count_set_bits (selected_bmp); n_avail -= n_sel; + vlib_frame_bitmap_xor (avail_bmp, selected_bmp); if (uword_bitmap_is_bit_set (mp->valid_dsa_src_bitmap, index)) { @@ -207,7 +208,7 @@ mrvl_pp2_rx_refill (vlib_main_t *vm, vlib_node_runtime_t *node, vnet_dev_port_t *port = rxq->port; vnet_dev_t *dev = port->dev; mvpp2_device_t *md = vnet_dev_get_data (dev); - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; struct pp2_hif *hif = md->hif[thread_index]; struct pp2_bpool *bpool = md->thread[thread_index].bpool; struct buff_release_entry *bre = md->thread[thread_index].bre; diff --git a/src/plugins/dev_ena/ena.c b/src/plugins/dev_ena/ena.c index ed5c47ed505..a81a33d5f22 100644 --- a/src/plugins/dev_ena/ena.c +++ b/src/plugins/dev_ena/ena.c @@ -13,7 +13,6 @@ static ena_aq_host_info_t host_info = { .os_type = 3, /* DPDK */ - .kernel_ver_str = VPP_BUILD_VER, .os_dist_str = VPP_BUILD_VER, .driver_version = { .major = 16, @@ -171,6 +170,8 @@ ena_init (vlib_main_t *vm, vnet_dev_t *dev) *ed->host_info = host_info; ed->host_info->num_cpus = vlib_get_n_threads (); + strncpy ((char *) ed->host_info->kernel_ver_str, VPP_BUILD_VER, + sizeof (ed->host_info->kernel_ver_str) - 1); ena_set_mem_addr (vm, dev, &host_attr.os_info_ba, ed->host_info); if ((rv = ena_aq_set_feature (vm, dev, ENA_ADMIN_FEAT_ID_HOST_ATTR_CONFIG, diff --git a/src/plugins/dev_iavf/virtchnl.h b/src/plugins/dev_iavf/virtchnl.h index 2099104c8ad..72158684e9e 100644 --- a/src/plugins/dev_iavf/virtchnl.h +++ b/src/plugins/dev_iavf/virtchnl.h @@ -560,6 +560,7 @@ typedef struct { u16 unicast_promisc : 1; u16 multicast_promisc : 1; + u16 unused : 14; }; u16 flags; }; diff --git a/src/plugins/dev_octeon/crypto.c b/src/plugins/dev_octeon/crypto.c index 800f24a008a..49b6f61375c 100644 --- a/src/plugins/dev_octeon/crypto.c +++ b/src/plugins/dev_octeon/crypto.c @@ -1354,7 +1354,7 @@ oct_crypto_aead_session_update (vlib_main_t *vm, oct_crypto_sess_t *sess, vnet_crypto_key_t *key = vnet_crypto_get_key (key_index); roc_se_cipher_type enc_type = 0; roc_se_auth_type auth_type = 0; - u32 digest_len = ~0; + u32 digest_len = 16; i32 rv = 0; switch (key->alg) @@ -1366,9 +1366,6 @@ oct_crypto_aead_session_update (vlib_main_t *vm, oct_crypto_sess_t *sess, sess->aes_gcm = 1; sess->iv_offset = 0; sess->iv_length = 16; - sess->cpt_ctx.mac_len = 16; - sess->cpt_op = type; - digest_len = 16; break; case VNET_CRYPTO_ALG_CHACHA20_POLY1305: enc_type = ROC_SE_CHACHA20; @@ -1381,6 +1378,9 @@ oct_crypto_aead_session_update (vlib_main_t *vm, oct_crypto_sess_t *sess, return -1; } + sess->cpt_ctx.mac_len = digest_len; + sess->cpt_op = type; + rv = roc_se_ciph_key_set (&sess->cpt_ctx, enc_type, key->data, key->length); if (rv) { @@ -1827,7 +1827,7 @@ oct_crypto_enqueue_aead_aad_0_dec (vlib_main_t *vm, vnet_crypto_async_frame_t * oct_crypto_frame_dequeue (vlib_main_t *vm, u32 *nb_elts_processed, - u32 *enqueue_thread_idx) + clib_thread_index_t *enqueue_thread_idx) { oct_crypto_main_t *ocm = &oct_crypto_main; u32 deq_head, status = VNET_CRYPTO_OP_STATUS_COMPLETED; @@ -1940,7 +1940,7 @@ oct_init_crypto_engine_handlers (vlib_main_t *vm, vnet_dev_t *dev) } int -oct_conf_sw_queue (vlib_main_t *vm, vnet_dev_t *dev) +oct_conf_sw_queue (vlib_main_t *vm, vnet_dev_t *dev, oct_crypto_dev_t *ocd) { oct_crypto_main_t *ocm = &oct_crypto_main; vlib_thread_main_t *tm = vlib_get_thread_main (); @@ -1961,7 +1961,7 @@ oct_conf_sw_queue (vlib_main_t *vm, vnet_dev_t *dev) * Each pending queue will get number of cpt desc / number of cores. * And that desc count is shared across inflight entries. */ - n_inflight_req = (OCT_CPT_LF_MAX_NB_DESC / tm->n_vlib_mains); + n_inflight_req = (ocd->n_desc / tm->n_vlib_mains); for (i = 0; i < tm->n_vlib_mains; ++i) { diff --git a/src/plugins/dev_octeon/crypto.h b/src/plugins/dev_octeon/crypto.h index 5bd26f6b9be..a99ee12ddb2 100644 --- a/src/plugins/dev_octeon/crypto.h +++ b/src/plugins/dev_octeon/crypto.h @@ -11,6 +11,9 @@ #define OCT_MAX_N_CPT_DEV 2 +#define OCT_CPT_LF_DEF_NB_DESC 16384 + +#define OCT_CPT_LF_MIN_NB_DESC 1024 #define OCT_CPT_LF_MAX_NB_DESC 128000 /* CRYPTO_ID, KEY_LENGTH_IN_BYTES, TAG_LEN, AAD_LEN */ @@ -81,6 +84,7 @@ typedef struct struct roc_cpt_lmtline lmtline; struct roc_cpt_lf lf; vnet_dev_t *dev; + u32 n_desc; } oct_crypto_dev_t; typedef struct @@ -207,9 +211,10 @@ int oct_crypto_enqueue_aead_aad_12_dec (vlib_main_t *vm, vnet_crypto_async_frame_t *frame); int oct_crypto_enqueue_aead_aad_0_dec (vlib_main_t *vm, vnet_crypto_async_frame_t *frame); -vnet_crypto_async_frame_t *oct_crypto_frame_dequeue (vlib_main_t *vm, - u32 *nb_elts_processed, - u32 *enqueue_thread_idx); +vnet_crypto_async_frame_t * +oct_crypto_frame_dequeue (vlib_main_t *vm, u32 *nb_elts_processed, + clib_thread_index_t *enqueue_thread_idx); int oct_init_crypto_engine_handlers (vlib_main_t *vm, vnet_dev_t *dev); -int oct_conf_sw_queue (vlib_main_t *vm, vnet_dev_t *dev); +int oct_conf_sw_queue (vlib_main_t *vm, vnet_dev_t *dev, + oct_crypto_dev_t *ocd); #endif /* _CRYPTO_H_ */ diff --git a/src/plugins/dev_octeon/init.c b/src/plugins/dev_octeon/init.c index 561cbe94fed..69fb097e91f 100644 --- a/src/plugins/dev_octeon/init.c +++ b/src/plugins/dev_octeon/init.c @@ -61,6 +61,22 @@ static struct #undef _ }; +static vnet_dev_arg_t oct_dev_args[] = { + { + .id = OCT_DEV_ARG_CRYPTO_N_DESC, + .name = "n_desc", + .desc = "number of cpt descriptors, applicable to cpt devices only", + .type = VNET_DEV_ARG_TYPE_UINT32, + .default_val.uint32 = OCT_CPT_LF_DEF_NB_DESC, + }, + { + .id = OCT_DEV_ARG_END, + .name = "end", + .desc = "Argument end", + .type = VNET_DEV_ARG_END, + }, +}; + static u8 * oct_probe (vlib_main_t *vm, vnet_dev_bus_index_t bus_index, void *dev_info) { @@ -241,7 +257,7 @@ oct_conf_cpt_queue (vlib_main_t *vm, vnet_dev_t *dev, oct_crypto_dev_t *ocd) cpt_lf = &ocd->lf; cpt_lmtline = &ocd->lmtline; - cpt_lf->nb_desc = OCT_CPT_LF_MAX_NB_DESC; + cpt_lf->nb_desc = ocd->n_desc; cpt_lf->lf_id = 0; if ((rrv = roc_cpt_lf_init (roc_cpt, cpt_lf)) < 0) return cnx_return_roc_err (dev, rrv, "roc_cpt_lf_init"); @@ -261,6 +277,7 @@ oct_init_cpt (vlib_main_t *vm, vnet_dev_t *dev) extern oct_plt_init_param_t oct_plt_init_param; oct_device_t *cd = vnet_dev_get_data (dev); oct_crypto_dev_t *ocd = NULL; + u32 n_desc; int rrv; if (ocm->n_cpt == OCT_MAX_N_CPT_DEV || ocm->started) @@ -274,6 +291,27 @@ oct_init_cpt (vlib_main_t *vm, vnet_dev_t *dev) ocd->roc_cpt->pci_dev = &cd->plt_pci_dev; ocd->dev = dev; + ocd->n_desc = OCT_CPT_LF_DEF_NB_DESC; + + foreach_vnet_dev_args (arg, dev) + { + if (arg->id == OCT_DEV_ARG_CRYPTO_N_DESC && + vnet_dev_arg_get_uint32 (arg)) + { + n_desc = vnet_dev_arg_get_uint32 (arg); + if (n_desc < OCT_CPT_LF_MIN_NB_DESC || + n_desc > OCT_CPT_LF_MAX_NB_DESC) + { + log_err (dev, + "number of cpt descriptors should be within range " + "of %u and %u", + OCT_CPT_LF_MIN_NB_DESC, OCT_CPT_LF_MAX_NB_DESC); + return VNET_DEV_ERR_NOT_SUPPORTED; + } + + ocd->n_desc = vnet_dev_arg_get_uint32 (arg); + } + } if ((rrv = roc_cpt_dev_init (ocd->roc_cpt))) return cnx_return_roc_err (dev, rrv, "roc_cpt_dev_init"); @@ -290,7 +328,7 @@ oct_init_cpt (vlib_main_t *vm, vnet_dev_t *dev) * Initialize s/w queues, which are common across multiple * crypto devices */ - oct_conf_sw_queue (vm, dev); + oct_conf_sw_queue (vm, dev, ocd); ocm->crypto_dev[0] = ocd; } @@ -396,6 +434,7 @@ VNET_DEV_REGISTER_DRIVER (octeon) = { .free = oct_free, .probe = oct_probe, }, + .args = oct_dev_args, }; static clib_error_t * diff --git a/src/plugins/dev_octeon/octeon.h b/src/plugins/dev_octeon/octeon.h index ccf8f62880d..0cf937528f0 100644 --- a/src/plugins/dev_octeon/octeon.h +++ b/src/plugins/dev_octeon/octeon.h @@ -25,6 +25,12 @@ typedef enum { + OCT_DEV_ARG_CRYPTO_N_DESC = 1, + OCT_DEV_ARG_END, +} oct_dev_args_t; + +typedef enum +{ OCT_DEVICE_TYPE_UNKNOWN = 0, OCT_DEVICE_TYPE_RVU_PF, OCT_DEVICE_TYPE_RVU_VF, diff --git a/src/plugins/dma_intel/dsa.c b/src/plugins/dma_intel/dsa.c index 473f2efa93e..20a90e34b0e 100644 --- a/src/plugins/dma_intel/dsa.c +++ b/src/plugins/dma_intel/dsa.c @@ -103,7 +103,6 @@ intel_dsa_batch_fallback (vlib_main_t *vm, intel_dsa_batch_t *b, clib_memcpy_fast (desc->dst, desc->src, desc->size); } b->status = INTEL_DSA_STATUS_CPU_SUCCESS; - ch->submitted++; return; } @@ -407,6 +406,7 @@ intel_dsa_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node, /* fallback to software if exception happened */ intel_dsa_batch_fallback (vm, b, ch); glitch = 1 & b->barrier_before_last; + t->pending_batches[n++] = b; } else { diff --git a/src/plugins/dpdk/cryptodev/cryptodev.c b/src/plugins/dpdk/cryptodev/cryptodev.c index c60f9c886ff..af695580363 100644 --- a/src/plugins/dpdk/cryptodev/cryptodev.c +++ b/src/plugins/dpdk/cryptodev/cryptodev.c @@ -128,14 +128,14 @@ prepare_linked_xform (struct rte_crypto_sym_xform *xforms, xform_cipher->cipher.algo = cipher_algo; xform_cipher->cipher.key.data = key_cipher->data; - xform_cipher->cipher.key.length = vec_len (key_cipher->data); + xform_cipher->cipher.key.length = key_cipher->length; xform_cipher->cipher.iv.length = 16; xform_cipher->cipher.iv.offset = CRYPTODEV_IV_OFFSET; xform_auth->auth.algo = auth_algo; xform_auth->auth.digest_length = digest_len; xform_auth->auth.key.data = key_auth->data; - xform_auth->auth.key.length = vec_len (key_auth->data); + xform_auth->auth.key.length = key_auth->length; return 0; } @@ -608,7 +608,7 @@ format_cryptodev_inst (u8 * s, va_list * args) cryptodev_main_t *cmt = &cryptodev_main; u32 inst = va_arg (*args, u32); cryptodev_inst_t *cit = cmt->cryptodev_inst + inst; - u32 thread_index = 0; + clib_thread_index_t thread_index = 0; struct rte_cryptodev_info info; rte_cryptodev_info_get (cit->dev_id, &info); @@ -670,7 +670,7 @@ cryptodev_show_cache_rings_fn (vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd) { cryptodev_main_t *cmt = &cryptodev_main; - u32 thread_index = 0; + clib_thread_index_t thread_index = 0; u16 i; vec_foreach_index (thread_index, cmt->per_thread_data) { @@ -756,7 +756,7 @@ cryptodev_set_assignment_fn (vlib_main_t * vm, unformat_input_t * input, cryptodev_main_t *cmt = &cryptodev_main; cryptodev_engine_thread_t *cet; unformat_input_t _line_input, *line_input = &_line_input; - u32 thread_index, inst_index; + clib_thread_index_t thread_index, inst_index; u32 thread_present = 0, inst_present = 0; clib_error_t *error = 0; int ret; diff --git a/src/plugins/dpdk/cryptodev/cryptodev_op_data_path.c b/src/plugins/dpdk/cryptodev/cryptodev_op_data_path.c index 8d55e4fbf0f..2282ffac10c 100644 --- a/src/plugins/dpdk/cryptodev/cryptodev_op_data_path.c +++ b/src/plugins/dpdk/cryptodev/cryptodev_op_data_path.c @@ -461,7 +461,8 @@ error_exit: } static_always_inline u8 -cryptodev_frame_dequeue_internal (vlib_main_t *vm, u32 *enqueue_thread_idx) +cryptodev_frame_dequeue_internal (vlib_main_t *vm, + clib_thread_index_t *enqueue_thread_idx) { cryptodev_main_t *cmt = &cryptodev_main; cryptodev_engine_thread_t *cet = cmt->per_thread_data + vm->thread_index; @@ -563,7 +564,7 @@ cryptodev_enqueue_frame (vlib_main_t *vm, cryptodev_cache_ring_elt_t *ring_elt) static_always_inline vnet_crypto_async_frame_t * cryptodev_frame_dequeue (vlib_main_t *vm, u32 *nb_elts_processed, - u32 *enqueue_thread_idx) + clib_thread_index_t *enqueue_thread_idx) { cryptodev_main_t *cmt = &cryptodev_main; vnet_crypto_main_t *cm = &crypto_main; @@ -670,7 +671,7 @@ cryptodev_register_cop_hdl (vlib_main_t *vm, u32 eidx) vec_foreach (cet, cmt->per_thread_data) { - u32 thread_index = cet - cmt->per_thread_data; + clib_thread_index_t thread_index = cet - cmt->per_thread_data; u32 numa = vlib_get_main_by_index (thread_index)->numa_node; name = format (0, "vpp_cop_pool_%u_%u", numa, thread_index); cet->cop_pool = rte_mempool_create ( diff --git a/src/plugins/dpdk/cryptodev/cryptodev_raw_data_path.c b/src/plugins/dpdk/cryptodev/cryptodev_raw_data_path.c index 67ab9c89e67..40d0a4299da 100644 --- a/src/plugins/dpdk/cryptodev/cryptodev_raw_data_path.c +++ b/src/plugins/dpdk/cryptodev/cryptodev_raw_data_path.c @@ -463,7 +463,8 @@ cryptodev_post_dequeue (void *frame, u32 index, u8 is_op_success) } static_always_inline u8 -cryptodev_raw_dequeue_internal (vlib_main_t *vm, u32 *enqueue_thread_idx) +cryptodev_raw_dequeue_internal (vlib_main_t *vm, + clib_thread_index_t *enqueue_thread_idx) { cryptodev_main_t *cmt = &cryptodev_main; cryptodev_engine_thread_t *cet = cmt->per_thread_data + vm->thread_index; @@ -537,7 +538,7 @@ cryptodev_enqueue_frame_to_qat (vlib_main_t *vm, static_always_inline vnet_crypto_async_frame_t * cryptodev_raw_dequeue (vlib_main_t *vm, u32 *nb_elts_processed, - u32 *enqueue_thread_idx) + clib_thread_index_t *enqueue_thread_idx) { cryptodev_main_t *cmt = &cryptodev_main; vnet_crypto_main_t *cm = &crypto_main; diff --git a/src/plugins/dpdk/device/common.c b/src/plugins/dpdk/device/common.c index d6eed5441b4..7671fc2639c 100644 --- a/src/plugins/dpdk/device/common.c +++ b/src/plugins/dpdk/device/common.c @@ -17,7 +17,7 @@ #include <vppinfra/vec.h> #include <vppinfra/format.h> #include <vppinfra/file.h> -#include <vlib/unix/unix.h> +#include <vlib/file.h> #include <assert.h> #include <vnet/ip/ip.h> @@ -369,8 +369,7 @@ dpdk_setup_interrupts (dpdk_device_t *xd) if (xd->flags & DPDK_DEVICE_FLAG_INT_UNMASKABLE) { clib_file_main_t *fm = &file_main; - clib_file_t *f = - pool_elt_at_index (fm->file_pool, rxq->clib_file_index); + clib_file_t *f = clib_file_get (fm, rxq->clib_file_index); fm->file_update (f, UNIX_FILE_UPDATE_DELETE); } } diff --git a/src/plugins/dpdk/device/device.c b/src/plugins/dpdk/device/device.c index c5abbd5f727..5fd936d1743 100644 --- a/src/plugins/dpdk/device/device.c +++ b/src/plugins/dpdk/device/device.c @@ -22,7 +22,7 @@ #include <dpdk/device/dpdk.h> #include <dpdk/device/dpdk_priv.h> #include <vppinfra/error.h> -#include <vlib/unix/unix.h> +#include <vlib/file.h> #define foreach_dpdk_tx_func_error \ _(PKT_DROP, "Tx packet drops (dpdk tx failure)") @@ -159,7 +159,7 @@ tx_burst_vector_internal (vlib_main_t *vm, dpdk_device_t *xd, { dpdk_tx_queue_t *txq; u32 n_retry; - int n_sent = 0; + u32 n_sent = 0; n_retry = 16; txq = vec_elt_at_index (xd->tx_queues, queue_id); @@ -279,9 +279,11 @@ VNET_DEVICE_CLASS_TX_FN (dpdk_device_class) (vlib_main_t * vm, vnet_hw_if_tx_frame_t *tf = vlib_frame_scalar_args (f); u32 n_packets = f->n_vectors; u32 n_left; - u32 thread_index = vm->thread_index; + u32 n_prep; + clib_thread_index_t thread_index = vm->thread_index; int queue_id = tf->queue_id; u8 is_shared = tf->shared_queue; + u8 offload_enabled = 0; u32 tx_pkts = 0; dpdk_per_thread_data_t *ptd = vec_elt_at_index (dm->per_thread_data, thread_index); @@ -333,6 +335,7 @@ VNET_DEVICE_CLASS_TX_FN (dpdk_device_class) (vlib_main_t * vm, if (PREDICT_FALSE ((xd->flags & DPDK_DEVICE_FLAG_TX_OFFLOAD) && (or_flags & VNET_BUFFER_F_OFFLOAD))) { + offload_enabled = 1; dpdk_buffer_tx_offload (xd, b[0], mb[0]); dpdk_buffer_tx_offload (xd, b[1], mb[1]); dpdk_buffer_tx_offload (xd, b[2], mb[2]); @@ -386,6 +389,7 @@ VNET_DEVICE_CLASS_TX_FN (dpdk_device_class) (vlib_main_t * vm, if (PREDICT_FALSE ((xd->flags & DPDK_DEVICE_FLAG_TX_OFFLOAD) && (or_flags & VNET_BUFFER_F_OFFLOAD))) { + offload_enabled = 1; dpdk_buffer_tx_offload (xd, b[0], mb[0]); dpdk_buffer_tx_offload (xd, b[1], mb[1]); } @@ -408,7 +412,13 @@ VNET_DEVICE_CLASS_TX_FN (dpdk_device_class) (vlib_main_t * vm, b[0] = vlib_buffer_from_rte_mbuf (mb[0]); dpdk_validate_rte_mbuf (vm, b[0], 1); - dpdk_buffer_tx_offload (xd, b[0], mb[0]); + + if (PREDICT_FALSE ((xd->flags & DPDK_DEVICE_FLAG_TX_OFFLOAD) && + (b[0]->flags & VNET_BUFFER_F_OFFLOAD))) + { + offload_enabled = 1; + dpdk_buffer_tx_offload (xd, b[0], mb[0]); + } if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE)) if (b[0]->flags & VLIB_BUFFER_IS_TRACED) @@ -418,32 +428,44 @@ VNET_DEVICE_CLASS_TX_FN (dpdk_device_class) (vlib_main_t * vm, n_left--; } - /* transmit as many packets as possible */ + /* prepare and transmit as many packets as possible */ tx_pkts = n_packets = mb - ptd->mbufs; - n_left = tx_burst_vector_internal (vm, xd, ptd->mbufs, n_packets, queue_id, - is_shared); + n_prep = n_packets; - { - /* If there is no callback then drop any non-transmitted packets */ - if (PREDICT_FALSE (n_left)) - { - tx_pkts -= n_left; - vlib_simple_counter_main_t *cm; - vnet_main_t *vnm = vnet_get_main (); + if (PREDICT_FALSE (offload_enabled && + (xd->flags & DPDK_DEVICE_FLAG_TX_PREPARE))) + { + n_prep = + rte_eth_tx_prepare (xd->port_id, queue_id, ptd->mbufs, n_packets); - cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, - VNET_INTERFACE_COUNTER_TX_ERROR); + /* If mbufs are malformed then drop any non-prepared packets */ + if (PREDICT_FALSE (n_prep != n_packets)) + { + n_left = n_packets - n_prep; + } + } - vlib_increment_simple_counter (cm, thread_index, xd->sw_if_index, - n_left); + n_left += + tx_burst_vector_internal (vm, xd, ptd->mbufs, n_prep, queue_id, is_shared); - vlib_error_count (vm, node->node_index, DPDK_TX_FUNC_ERROR_PKT_DROP, - n_left); + /* If there is no callback then drop any non-transmitted packets */ + if (PREDICT_FALSE (n_left)) + { + tx_pkts -= n_left; + vlib_simple_counter_main_t *cm; + vnet_main_t *vnm = vnet_get_main (); - while (n_left--) - rte_pktmbuf_free (ptd->mbufs[n_packets - n_left - 1]); - } - } + cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, + VNET_INTERFACE_COUNTER_TX_ERROR); + + vlib_increment_simple_counter (cm, thread_index, xd->sw_if_index, + n_left); + + vlib_error_count (vm, node->node_index, DPDK_TX_FUNC_ERROR_PKT_DROP, + n_left); + + rte_pktmbuf_free_bulk (&ptd->mbufs[tx_pkts], n_left); + } return tx_pkts; } @@ -707,7 +729,7 @@ dpdk_interface_rx_mode_change (vnet_main_t *vnm, u32 hw_if_index, u32 qid, else if (mode == VNET_HW_IF_RX_MODE_POLLING) { rxq = vec_elt_at_index (xd->rx_queues, qid); - f = pool_elt_at_index (fm->file_pool, rxq->clib_file_index); + f = clib_file_get (fm, rxq->clib_file_index); fm->file_update (f, UNIX_FILE_UPDATE_DELETE); } else if (!(xd->flags & DPDK_DEVICE_FLAG_INT_UNMASKABLE)) @@ -715,7 +737,7 @@ dpdk_interface_rx_mode_change (vnet_main_t *vnm, u32 hw_if_index, u32 qid, else { rxq = vec_elt_at_index (xd->rx_queues, qid); - f = pool_elt_at_index (fm->file_pool, rxq->clib_file_index); + f = clib_file_get (fm, rxq->clib_file_index); fm->file_update (f, UNIX_FILE_UPDATE_ADD); } if (rv) diff --git a/src/plugins/dpdk/device/dpdk.h b/src/plugins/dpdk/device/dpdk.h index 2440439989f..70d9cc715dc 100644 --- a/src/plugins/dpdk/device/dpdk.h +++ b/src/plugins/dpdk/device/dpdk.h @@ -71,7 +71,8 @@ typedef uint16_t dpdk_portid_t; _ (11, RX_FLOW_OFFLOAD, "rx-flow-offload") \ _ (12, RX_IP4_CKSUM, "rx-ip4-cksum") \ _ (13, INT_SUPPORTED, "int-supported") \ - _ (14, INT_UNMASKABLE, "int-unmaskable") + _ (14, INT_UNMASKABLE, "int-unmaskable") \ + _ (15, TX_PREPARE, "tx-prepare") typedef enum { @@ -131,6 +132,7 @@ typedef struct u32 interface_number_from_port_id : 1; u32 use_intel_phdr_cksum : 1; u32 int_unmaskable : 1; + u32 need_tx_prepare : 1; } dpdk_driver_t; dpdk_driver_t *dpdk_driver_find (const char *name, const char **desc); diff --git a/src/plugins/dpdk/device/dpdk_priv.h b/src/plugins/dpdk/device/dpdk_priv.h index 794953da55e..2067b118532 100644 --- a/src/plugins/dpdk/device/dpdk_priv.h +++ b/src/plugins/dpdk/device/dpdk_priv.h @@ -50,7 +50,7 @@ dpdk_device_flag_set (dpdk_device_t *xd, __typeof__ (xd->flags) flag, int val) void dpdk_counters_xstats_init (dpdk_device_t *xd); static inline void -dpdk_get_xstats (dpdk_device_t *xd, u32 thread_index) +dpdk_get_xstats (dpdk_device_t *xd, clib_thread_index_t thread_index) { int ret; int i; @@ -101,7 +101,7 @@ static inline void dpdk_update_counters (dpdk_device_t * xd, f64 now) { vnet_main_t *vnm = vnet_get_main (); - u32 thread_index = vlib_get_thread_index (); + clib_thread_index_t thread_index = vlib_get_thread_index (); xd->time_last_stats_update = now ? now : xd->time_last_stats_update; clib_memcpy_fast (&xd->last_stats, &xd->stats, sizeof (xd->last_stats)); diff --git a/src/plugins/dpdk/device/driver.c b/src/plugins/dpdk/device/driver.c index 2fde041684c..469a4b5de2b 100644 --- a/src/plugins/dpdk/device/driver.c +++ b/src/plugins/dpdk/device/driver.c @@ -113,6 +113,7 @@ static dpdk_driver_t dpdk_drivers[] = { .drivers = DPDK_DRIVERS ({ "net_ena", "AWS ENA VF" }), .interface_name_prefix = "VirtualFunctionEthernet", .enable_rxq_int = 1, + .need_tx_prepare = 1, }, { .drivers = DPDK_DRIVERS ({ "net_vmxnet3", "VMware VMXNET3" }), diff --git a/src/plugins/dpdk/device/format.c b/src/plugins/dpdk/device/format.c index fd301da8ea5..f0199c929cc 100644 --- a/src/plugins/dpdk/device/format.c +++ b/src/plugins/dpdk/device/format.c @@ -117,8 +117,8 @@ _ (TX_MACSEC, "TX MACSEC") \ _ (TX_OUTER_IPV4, "TX outer IPV4") \ _ (TX_OUTER_IPV6, "TX outer IPV6") \ - _ (TX_OUTER_IP_CKSUM, "Outer IP cksum of Tx pkt. computed by NIC") \ - _ (TX_OUTER_UDP_CKSUM, "TX outer UDP cksum") \ + _ (TX_OUTER_IP_CKSUM, "Outer IP cksum of TX pkt. computed by NIC") \ + _ (TX_OUTER_UDP_CKSUM, "Outer UDP cksum of TX pkt. computed by NIC") \ _ (TX_QINQ, "TX QINQ") \ _ (TX_SCTP_CKSUM, "SCTP cksum of TX pkt. computed by NIC") \ _ (TX_SEC_OFFLOAD, "TX SEC OFFLOAD") \ @@ -133,7 +133,7 @@ _ (TX_TUNNEL_UDP, "TX tunnel UDP") \ _ (TX_TUNNEL_VXLAN, "TX packet is a VXLAN packet") \ _ (TX_TUNNEL_VXLAN_GPE, "TX tunnel VXLAN GPE") \ - _ (TX_UDP_CKSUM, "TX UDP cksum") \ + _ (TX_UDP_CKSUM, "UDP cksum of TX pkt. computed by NIC") \ _ (TX_UDP_SEG, "TX UDP SEG") \ _ (TX_VLAN, "TX packet is a 802.1q VLAN packet") diff --git a/src/plugins/dpdk/device/init.c b/src/plugins/dpdk/device/init.c index aaa2c1f4a68..83c2614e97e 100644 --- a/src/plugins/dpdk/device/init.c +++ b/src/plugins/dpdk/device/init.c @@ -18,7 +18,7 @@ #include <vppinfra/format.h> #include <vppinfra/bitmap.h> #include <vppinfra/linux/sysfs.h> -#include <vlib/unix/unix.h> +#include <vlib/file.h> #include <vlib/log.h> #include <vnet/vnet.h> @@ -390,6 +390,8 @@ dpdk_lib_init (dpdk_main_t * dm) dpdk_device_flag_set (xd, DPDK_DEVICE_FLAG_INTEL_PHDR_CKSUM, 1); if (dr->int_unmaskable) dpdk_device_flag_set (xd, DPDK_DEVICE_FLAG_INT_UNMASKABLE, 1); + if (dr->need_tx_prepare) + dpdk_device_flag_set (xd, DPDK_DEVICE_FLAG_TX_PREPARE, 1); } else dpdk_log_warn ("[%u] unknown driver '%s'", port_id, di.driver_name); diff --git a/src/plugins/dpdk/device/node.c b/src/plugins/dpdk/device/node.c index ca1690b708f..2f4c10ebf46 100644 --- a/src/plugins/dpdk/device/node.c +++ b/src/plugins/dpdk/device/node.c @@ -340,8 +340,9 @@ dpdk_process_lro_offload (dpdk_device_t *xd, dpdk_per_thread_data_t *ptd, } static_always_inline u32 -dpdk_device_input (vlib_main_t * vm, dpdk_main_t * dm, dpdk_device_t * xd, - vlib_node_runtime_t * node, u32 thread_index, u16 queue_id) +dpdk_device_input (vlib_main_t *vm, dpdk_main_t *dm, dpdk_device_t *xd, + vlib_node_runtime_t *node, clib_thread_index_t thread_index, + u16 queue_id) { uword n_rx_packets = 0, n_rx_bytes; dpdk_rx_queue_t *rxq = vec_elt_at_index (xd->rx_queues, queue_id); @@ -543,7 +544,7 @@ VLIB_NODE_FN (dpdk_input_node) (vlib_main_t * vm, vlib_node_runtime_t * node, dpdk_device_t *xd; uword n_rx_packets = 0; vnet_hw_if_rxq_poll_vector_t *pv; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; /* * Poll all devices on this cpu for input/interrupts. diff --git a/src/plugins/geneve/decap.c b/src/plugins/geneve/decap.c index c64121e2829..3a1de2af217 100644 --- a/src/plugins/geneve/decap.c +++ b/src/plugins/geneve/decap.c @@ -79,7 +79,7 @@ geneve_input (vlib_main_t * vm, geneve4_tunnel_key_t last_key4; geneve6_tunnel_key_t last_key6; u32 pkts_decapsulated = 0; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; u32 stats_sw_if_index, stats_n_packets, stats_n_bytes; vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs; diff --git a/src/plugins/geneve/encap.c b/src/plugins/geneve/encap.c index 609da2218cf..581c47983df 100644 --- a/src/plugins/geneve/encap.c +++ b/src/plugins/geneve/encap.c @@ -60,7 +60,7 @@ geneve_encap_inline (vlib_main_t * vm, vnet_interface_main_t *im = &vnm->interface_main; u32 pkts_encapsulated = 0; u16 old_l0 = 0, old_l1 = 0; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; u32 stats_sw_if_index, stats_n_packets, stats_n_bytes; u32 sw_if_index0 = ~0, sw_if_index1 = ~0; u32 next0 = 0, next1 = 0; diff --git a/src/plugins/gtpu/gtpu_decap.c b/src/plugins/gtpu/gtpu_decap.c index 093d85ef13c..4e0f8bf8e16 100644 --- a/src/plugins/gtpu/gtpu_decap.c +++ b/src/plugins/gtpu/gtpu_decap.c @@ -85,7 +85,7 @@ gtpu_input (vlib_main_t * vm, gtpu4_tunnel_key_t last_key4; gtpu6_tunnel_key_t last_key6; u32 pkts_decapsulated = 0; - u32 thread_index = vlib_get_thread_index(); + clib_thread_index_t thread_index = vlib_get_thread_index (); u32 stats_sw_if_index, stats_n_packets, stats_n_bytes; if (is_ip4) @@ -1838,7 +1838,7 @@ gtpu_flow_input (vlib_main_t * vm, vnet_main_t * vnm = gtm->vnet_main; vnet_interface_main_t * im = &vnm->interface_main; u32 pkts_decapsulated = 0; - u32 thread_index = vlib_get_thread_index(); + clib_thread_index_t thread_index = vlib_get_thread_index (); u32 stats_sw_if_index, stats_n_packets, stats_n_bytes; u8 ip_err0, ip_err1, udp_err0, udp_err1, csum_err0, csum_err1; diff --git a/src/plugins/gtpu/gtpu_encap.c b/src/plugins/gtpu/gtpu_encap.c index 2c3c46a4be2..1caca1da915 100644 --- a/src/plugins/gtpu/gtpu_encap.c +++ b/src/plugins/gtpu/gtpu_encap.c @@ -67,7 +67,7 @@ gtpu_encap_inline (vlib_main_t * vm, vnet_interface_main_t * im = &vnm->interface_main; u32 pkts_encapsulated = 0; u16 old_l0 = 0, old_l1 = 0, old_l2 = 0, old_l3 = 0; - u32 thread_index = vlib_get_thread_index(); + clib_thread_index_t thread_index = vlib_get_thread_index (); u32 stats_sw_if_index, stats_n_packets, stats_n_bytes; u32 sw_if_index0 = 0, sw_if_index1 = 0, sw_if_index2 = 0, sw_if_index3 = 0; u32 next0 = 0, next1 = 0, next2 = 0, next3 = 0; diff --git a/src/plugins/hs_apps/CMakeLists.txt b/src/plugins/hs_apps/CMakeLists.txt index eae100949d4..3e80a84aae4 100644 --- a/src/plugins/hs_apps/CMakeLists.txt +++ b/src/plugins/hs_apps/CMakeLists.txt @@ -71,7 +71,11 @@ if(VPP_BUILD_VCL_TESTS) "vcl/${test}.c" vcl/vcl_test_protos.c LINK_LIBRARIES vppcom pthread ${EPOLL_LIB} - NO_INSTALL ) endforeach() + + add_vpp_executable(vcl_test_cl_udp SOURCES "vcl/vcl_test_cl_udp.c" + LINK_LIBRARIES vppcom pthread ${EPOLL_LIB} + NO_INSTALL + ) endif(VPP_BUILD_VCL_TESTS) diff --git a/src/plugins/hs_apps/echo_client.c b/src/plugins/hs_apps/echo_client.c index ff5a3bd6b3c..1c0e49c716b 100644 --- a/src/plugins/hs_apps/echo_client.c +++ b/src/plugins/hs_apps/echo_client.c @@ -53,7 +53,7 @@ signal_evt_to_cli (int code) } static inline ec_worker_t * -ec_worker_get (u32 thread_index) +ec_worker_get (clib_thread_index_t thread_index) { return vec_elt_at_index (ec_main.wrk, thread_index); } @@ -79,21 +79,29 @@ ec_session_get (ec_worker_t *wrk, u32 ec_index) static void send_data_chunk (ec_main_t *ecm, ec_session_t *es) { + const u64 max_burst = 128000; u8 *test_data = ecm->connect_test_data; int test_buf_len, test_buf_offset, rv; + u64 bytes_to_send; u32 bytes_this_chunk; + svm_fifo_t *f = es->tx_fifo; test_buf_len = vec_len (test_data); ASSERT (test_buf_len > 0); + if (ecm->run_time) + bytes_to_send = clib_min (svm_fifo_max_enqueue_prod (f), max_burst); + else + bytes_to_send = clib_min (es->bytes_to_send, max_burst); + if (ecm->throughput) + bytes_to_send = clib_min (es->bytes_paced_current, bytes_to_send); test_buf_offset = es->bytes_sent % test_buf_len; - bytes_this_chunk = - clib_min (test_buf_len - test_buf_offset, es->bytes_to_send); + + bytes_this_chunk = clib_min (test_buf_len - test_buf_offset, bytes_to_send); if (!es->is_dgram) { if (ecm->no_copy) { - svm_fifo_t *f = es->tx_fifo; rv = clib_min (svm_fifo_max_enqueue_prod (f), bytes_this_chunk); svm_fifo_enqueue_nocopy (f, rv); session_program_tx_io_evt (es->tx_fifo->vpp_sh, SESSION_IO_EVT_TX); @@ -105,7 +113,6 @@ send_data_chunk (ec_main_t *ecm, ec_session_t *es) } else { - svm_fifo_t *f = es->tx_fifo; u32 max_enqueue = svm_fifo_max_enqueue_prod (f); if (max_enqueue < sizeof (session_dgram_hdr_t)) @@ -136,7 +143,8 @@ send_data_chunk (ec_main_t *ecm, ec_session_t *es) else { bytes_this_chunk = clib_min (bytes_this_chunk, max_enqueue); - bytes_this_chunk = clib_min (bytes_this_chunk, 1460); + if (!ecm->throughput) + bytes_this_chunk = clib_min (bytes_this_chunk, 1460); rv = app_send_dgram ((app_session_t *) es, test_data + test_buf_offset, bytes_this_chunk, 0); @@ -147,8 +155,16 @@ send_data_chunk (ec_main_t *ecm, ec_session_t *es) if (rv > 0) { /* Account for it... */ - es->bytes_to_send -= rv; es->bytes_sent += rv; + if (ecm->run_time) + es->bytes_to_receive += rv; + else + es->bytes_to_send -= rv; + if (ecm->throughput) + { + es->bytes_paced_current -= rv; + es->bytes_paced_current += es->bytes_paced_target; + } if (ecm->cfg.verbose) { @@ -228,6 +244,7 @@ ec_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) { u32 *conn_indices, *conns_this_batch, nconns_this_batch; int thread_index = vm->thread_index, i, delete_session; + f64 time_now; ec_main_t *ecm = &ec_main; ec_worker_t *wrk; ec_session_t *es; @@ -266,7 +283,7 @@ ec_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) { ecm->repeats++; ecm->prev_conns = vec_len (conns_this_batch); - if (ecm->repeats == 500000) + if (ecm->repeats == 500000 && !ecm->run_time) { ec_err ("stuck clients"); } @@ -277,18 +294,23 @@ ec_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) ecm->repeats = 0; } + time_now = vlib_time_now (ecm->vlib_main); /* * Handle connections in this batch */ for (i = 0; i < vec_len (conns_this_batch); i++) { es = ec_session_get (wrk, conns_this_batch[i]); + if (ecm->throughput && time_now < es->time_to_send) + continue; delete_session = 1; if (es->bytes_to_send > 0) { send_data_chunk (ecm, es); + if (ecm->throughput) + es->time_to_send += ecm->pacing_window_len; delete_session = 0; } @@ -297,7 +319,7 @@ ec_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) delete_session = 0; } - if (PREDICT_FALSE (delete_session == 1)) + if (PREDICT_FALSE (delete_session == 1) || ecm->timer_expired) { clib_atomic_fetch_add (&ecm->tx_total, es->bytes_sent); clib_atomic_fetch_add (&ecm->rx_total, es->bytes_received); @@ -326,6 +348,8 @@ ec_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) signal_evt_to_cli (EC_CLI_TEST_DONE); } } + if (ecm->throughput) + time_now = vlib_time_now (vm); } wrk->conn_indices = conn_indices; @@ -356,6 +380,7 @@ ec_reset_runtime_config (ec_main_t *ecm) ecm->tls_engine = CRYPTO_ENGINE_OPENSSL; ecm->no_copy = 0; ecm->run_test = EC_STARTING; + ecm->timer_expired = false; ecm->ready_connections = 0; ecm->connect_conn_index = 0; ecm->rx_total = 0; @@ -368,6 +393,9 @@ ec_reset_runtime_config (ec_main_t *ecm) ecm->attach_flags = 0; ecm->syn_timeout = 20.0; ecm->test_timeout = 20.0; + ecm->run_time = 0; + ecm->throughput = 0; + ecm->pacing_window_len = 1; vec_free (ecm->connect_uri); } @@ -474,7 +502,8 @@ ec_cleanup (ec_main_t *ecm) vec_free (ecm->connect_uri); vec_free (ecm->appns_id); - + if (ecm->throughput) + ecm->pacing_window_len = 1; if (ecm->barrier_acq_needed) vlib_worker_thread_barrier_sync (ecm->vlib_main); } @@ -565,7 +594,7 @@ quic_ec_session_connected_callback (u32 app_index, u32 api_context, ec_main_t *ecm = &ec_main; ec_session_t *es; ec_worker_t *wrk; - u32 thread_index; + clib_thread_index_t thread_index; if (PREDICT_FALSE (api_context == HS_CTRL_HANDLE)) return ec_ctrl_session_connected_callback (s); @@ -616,13 +645,48 @@ quic_ec_session_connected_callback (u32 app_index, u32 api_context, return 0; } +static void +ec_calc_tput (ec_main_t *ecm) +{ + vlib_main_t *vm = vlib_get_main (); + ec_worker_t *wrk; + ec_session_t *sess; + f64 pacing_base; + u64 bytes_paced_target; + /* periodic writes larger than this clog up the fifo */ + const u64 target_size_threshold = 4344; + + /* find a suitable pacing window length & data chunk size */ + bytes_paced_target = + ecm->throughput * ecm->pacing_window_len / ecm->n_clients; + while (bytes_paced_target > target_size_threshold) + { + ecm->pacing_window_len /= 2; + bytes_paced_target /= 2; + } + + /* order sessions to shoot out data sequentially */ + pacing_base = vlib_time_now (vm) - ecm->pacing_window_len; + vec_foreach (wrk, ecm->wrk) + { + vec_foreach (sess, wrk->sessions) + { + sess->time_to_send = + pacing_base + ecm->pacing_window_len / ecm->n_clients; + pacing_base = sess->time_to_send; + sess->bytes_paced_target = bytes_paced_target; + sess->bytes_paced_current = bytes_paced_target; + } + } +} + static int ec_session_connected_callback (u32 app_index, u32 api_context, session_t *s, session_error_t err) { ec_main_t *ecm = &ec_main; ec_session_t *es; - u32 thread_index; + clib_thread_index_t thread_index; ec_worker_t *wrk; if (PREDICT_FALSE (ecm->run_test != EC_STARTING)) @@ -656,12 +720,16 @@ ec_session_connected_callback (u32 app_index, u32 api_context, session_t *s, es->bytes_to_receive = ecm->echo_bytes ? ecm->bytes_to_send : 0ULL; es->vpp_session_handle = session_handle (s); es->vpp_session_index = s->session_index; + es->bytes_paced_target = ~0; + es->bytes_paced_current = ~0; s->opaque = es->session_index; vec_add1 (wrk->conn_indices, es->session_index); clib_atomic_fetch_add (&ecm->ready_connections, 1); if (ecm->ready_connections == ecm->expected_connections) { + if (ecm->throughput) + ec_calc_tput (ecm); ecm->run_test = EC_RUNNING; /* Signal the CLI process that the action is starting... */ signal_evt_to_cli (EC_CLI_CONNECTS_DONE); @@ -1072,8 +1140,8 @@ ec_command_fn (vlib_main_t *vm, unformat_input_t *input, ec_main_t *ecm = &ec_main; uword *event_data = 0, event_type; clib_error_t *error = 0; - int rv, had_config = 1; - u64 tmp, total_bytes; + int rv, timed_run_conflict = 0, had_config = 1; + u64 total_bytes; f64 delta; if (ecm->test_client_attached) @@ -1099,17 +1167,15 @@ ec_command_fn (vlib_main_t *vm, unformat_input_t *input, ; else if (unformat (line_input, "quic-streams %d", &ecm->quic_streams)) ; - else if (unformat (line_input, "mbytes %lld", &tmp)) - ecm->bytes_to_send = tmp << 20; - else if (unformat (line_input, "gbytes %lld", &tmp)) - ecm->bytes_to_send = tmp << 30; else if (unformat (line_input, "bytes %U", unformat_memory_size, &ecm->bytes_to_send)) - ; + timed_run_conflict++; else if (unformat (line_input, "test-timeout %f", &ecm->test_timeout)) ; else if (unformat (line_input, "syn-timeout %f", &ecm->syn_timeout)) ; + else if (unformat (line_input, "run-time %f", &ecm->run_time)) + ; else if (unformat (line_input, "echo-bytes")) ecm->echo_bytes = 1; else if (unformat (line_input, "fifo-size %U", unformat_memory_size, @@ -1121,6 +1187,9 @@ ec_command_fn (vlib_main_t *vm, unformat_input_t *input, else if (unformat (line_input, "private-segment-size %U", unformat_memory_size, &ecm->private_segment_size)) ; + else if (unformat (line_input, "throughput %U", unformat_memory_size, + &ecm->throughput)) + ; else if (unformat (line_input, "preallocate-fifos")) ecm->prealloc_fifos = 1; else if (unformat (line_input, "preallocate-sessions")) @@ -1153,6 +1222,9 @@ ec_command_fn (vlib_main_t *vm, unformat_input_t *input, } } + if (timed_run_conflict && ecm->run_time) + return clib_error_return (0, "failed: invalid arguments for a timed run!"); + parse_config: ecm->cfg.num_test_sessions = ecm->expected_connections = @@ -1237,12 +1309,22 @@ parse_config: clib_error_return (0, "failed: unexpected event(2): %d", event_type); goto stop_test; } + /* Testing officially starts now */ + ecm->test_start_time = vlib_time_now (ecm->vlib_main); + ec_cli ("Test started at %.6f", ecm->test_start_time); + + /* + * If a timed run, wait and expire timer + */ + if (ecm->run_time) + { + vlib_process_suspend (vm, ecm->run_time); + ec_main.timer_expired = true; + } /* * Wait for the sessions to finish or test_timeout seconds pass */ - ecm->test_start_time = vlib_time_now (ecm->vlib_main); - ec_cli ("Test started at %.6f", ecm->test_start_time); vlib_process_wait_for_event_or_clock (vm, ecm->test_timeout); event_type = vlib_process_get_events (vm, &event_data); switch (event_type) @@ -1336,11 +1418,11 @@ cleanup: VLIB_CLI_COMMAND (ec_command, static) = { .path = "test echo clients", .short_help = - "test echo clients [nclients %d][[m|g]bytes <bytes>]" - "[test-timeout <time>][syn-timeout <time>][echo-bytes][fifo-size <size>]" + "test echo clients [nclients %d][bytes <bytes>[m|g]][test-timeout <time>]" + "[run-time <time>][syn-timeout <time>][echo-bytes][fifo-size <size>]" "[private-segment-count <count>][private-segment-size <bytes>[m|g]]" "[preallocate-fifos][preallocate-sessions][client-batch <batch-size>]" - "[uri <tcp://ip/port>][test-bytes][verbose]", + "[throughput <bytes>[m|g]][uri <tcp://ip/port>][test-bytes][verbose]", .function = ec_command_fn, .is_mp_safe = 1, }; diff --git a/src/plugins/hs_apps/echo_client.h b/src/plugins/hs_apps/echo_client.h index 5868c3652ce..d928a4e936f 100644 --- a/src/plugins/hs_apps/echo_client.h +++ b/src/plugins/hs_apps/echo_client.h @@ -29,12 +29,15 @@ typedef struct ec_session_ foreach_app_session_field #undef _ u32 vpp_session_index; - u32 thread_index; + clib_thread_index_t thread_index; u64 bytes_to_send; u64 bytes_sent; u64 bytes_to_receive; u64 bytes_received; u64 vpp_session_handle; + f64 time_to_send; + u64 bytes_paced_target; + u64 bytes_paced_current; } ec_session_t; typedef struct ec_worker_ @@ -45,7 +48,7 @@ typedef struct ec_worker_ u32 *conn_indices; /**< sessions handled by worker */ u32 *conns_this_batch; /**< sessions handled in batch */ svm_msg_q_t *vpp_event_queue; /**< session layer worker mq */ - u32 thread_index; /**< thread index for worker */ + clib_thread_index_t thread_index; /**< thread index for worker */ } ec_worker_t; typedef struct @@ -57,6 +60,7 @@ typedef struct volatile u64 rx_total; volatile u64 tx_total; volatile int run_test; /**< Signal start of test */ + volatile bool timer_expired; /**< Signal end of timed test */ f64 syn_start_time; f64 test_start_time; @@ -64,6 +68,8 @@ typedef struct u32 prev_conns; u32 repeats; + f64 + pacing_window_len; /**< Time between data chunk sends when limiting tput */ u32 connect_conn_index; /**< Connects attempted progress */ /* @@ -88,6 +94,7 @@ typedef struct u32 connections_per_batch; /**< Connections to rx/tx at once */ u32 private_segment_count; /**< Number of private fifo segs */ u64 private_segment_size; /**< size of private fifo segs */ + u64 throughput; /**< Target bytes per second */ u32 tls_engine; /**< TLS engine mbedtls/openssl */ u32 no_copy; /**< Don't memcpy data to tx fifo */ u32 quic_streams; /**< QUIC streams per connection */ @@ -97,6 +104,7 @@ typedef struct u64 appns_secret; /**< App namespace secret */ f64 syn_timeout; /**< Test syn timeout (s) */ f64 test_timeout; /**< Test timeout (s) */ + f64 run_time; /**< Length of a test (s) */ /* * Flags diff --git a/src/plugins/hs_apps/echo_server.c b/src/plugins/hs_apps/echo_server.c index dc303e2f83a..61b86769768 100644 --- a/src/plugins/hs_apps/echo_server.c +++ b/src/plugins/hs_apps/echo_server.c @@ -40,7 +40,7 @@ typedef struct es_session_t *sessions; u8 *rx_buf; /**< Per-thread RX buffer */ svm_msg_q_t *vpp_event_queue; - u32 thread_index; + clib_thread_index_t thread_index; } es_worker_t; typedef struct @@ -87,7 +87,7 @@ echo_server_main_t echo_server_main; #define es_cli(_fmt, _args...) vlib_cli_output (vm, _fmt, ##_args) static inline es_worker_t * -es_worker_get (u32 thread_index) +es_worker_get (clib_thread_index_t thread_index) { return vec_elt_at_index (echo_server_main.wrk, thread_index); } @@ -277,7 +277,7 @@ es_wrk_cleanup_sessions (void *args) { echo_server_main_t *esm = &echo_server_main; vnet_disconnect_args_t _a = {}, *a = &_a; - u32 thread_index = pointer_to_uword (args); + clib_thread_index_t thread_index = pointer_to_uword (args); es_session_t *es; es_worker_t *wrk; @@ -373,7 +373,7 @@ echo_server_rx_callback (session_t * s) int actual_transfer; svm_fifo_t *tx_fifo, *rx_fifo; echo_server_main_t *esm = &echo_server_main; - u32 thread_index = vlib_get_thread_index (); + clib_thread_index_t thread_index = vlib_get_thread_index (); es_worker_t *wrk; es_session_t *es; diff --git a/src/plugins/hs_apps/http_cli.c b/src/plugins/hs_apps/http_cli.c index 531e2750c1e..40acf6a1635 100644 --- a/src/plugins/hs_apps/http_cli.c +++ b/src/plugins/hs_apps/http_cli.c @@ -37,7 +37,7 @@ typedef struct typedef struct { u32 hs_index; - u32 thread_index; + clib_thread_index_t thread_index; u64 node_index; u8 plain_text; u8 *buf; @@ -47,7 +47,7 @@ typedef struct { CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); u32 session_index; - u32 thread_index; + clib_thread_index_t thread_index; u8 *tx_buf; u32 tx_offset; u32 vpp_session_index; @@ -85,7 +85,7 @@ typedef struct static hcs_main_t hcs_main; static hcs_session_t * -hcs_session_alloc (u32 thread_index) +hcs_session_alloc (clib_thread_index_t thread_index) { hcs_main_t *hcm = &hcs_main; hcs_session_t *hs; @@ -98,7 +98,7 @@ hcs_session_alloc (u32 thread_index) } static hcs_session_t * -hcs_session_get (u32 thread_index, u32 hs_index) +hcs_session_get (clib_thread_index_t thread_index, u32 hs_index) { hcs_main_t *hcm = &hcs_main; if (pool_is_free_index (hcm->sessions[thread_index], hs_index)) diff --git a/src/plugins/hs_apps/http_client.c b/src/plugins/hs_apps/http_client.c index 20271fc4aea..578d21140f1 100644 --- a/src/plugins/hs_apps/http_client.c +++ b/src/plugins/hs_apps/http_client.c @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: Apache-2.0 - * Copyright(c) 2024 Cisco Systems, Inc. + * Copyright(c) 2025 Cisco Systems, Inc. */ #include <vnet/session/application.h> @@ -12,29 +12,36 @@ typedef struct { + u64 req_per_wrk; + u64 request_count; + f64 start, end; + f64 elapsed_time; +} hc_stats_t; + +typedef struct +{ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); u32 session_index; - u32 thread_index; - u32 vpp_session_index; + clib_thread_index_t thread_index; u64 to_recv; u8 is_closed; + hc_stats_t stats; + u64 data_offset; + u8 *resp_headers; + u8 *http_response; + u8 *response_status; } hc_session_t; typedef struct { - u64 request_count; - f64 start, end; - f64 elapsed_time; -} hc_stats_t; - -typedef struct -{ hc_session_t *sessions; - u32 thread_index; + clib_thread_index_t thread_index; vlib_main_t *vlib_main; u8 *headers_buf; http_headers_ctx_t req_headers; http_msg_t msg; + u32 session_index; + bool has_common_headers; } hc_worker_t; typedef struct @@ -52,11 +59,7 @@ typedef struct session_endpoint_cfg_t connect_sep; u8 *target; u8 *data; - u64 data_offset; hc_worker_t *wrk; - u8 *resp_headers; - u8 *http_response; - u8 *response_status; hc_http_header_t *custom_header; u8 is_file; u8 use_ptr; @@ -67,6 +70,19 @@ typedef struct u64 repeat_count; f64 duration; bool repeat; + bool multi_session; + u32 done_count; + u32 connected_counter; + u32 worker_index; + u32 max_sessions; + u32 private_segment_size; + u32 prealloc_fifos; + u32 fifo_size; + u8 *appns_id; + u64 appns_secret; + clib_spinlock_t lock; + bool was_transport_closed; + u32 ckpair_index; } hc_main_t; typedef enum @@ -82,26 +98,19 @@ static hc_main_t hc_main; static hc_stats_t hc_stats; static inline hc_worker_t * -hc_worker_get (u32 thread_index) +hc_worker_get (clib_thread_index_t thread_index) { return &hc_main.wrk[thread_index]; } static inline hc_session_t * -hc_session_get (u32 session_index, u32 thread_index) +hc_session_get (u32 session_index, clib_thread_index_t thread_index) { hc_worker_t *wrk = hc_worker_get (thread_index); wrk->vlib_main = vlib_get_main_by_index (thread_index); return pool_elt_at_index (wrk->sessions, session_index); } -static void -hc_ho_session_free (u32 hs_index) -{ - hc_worker_t *wrk = hc_worker_get (0); - pool_put_index (wrk->sessions, hs_index); -} - static hc_session_t * hc_session_alloc (hc_worker_t *wrk) { @@ -115,14 +124,14 @@ hc_session_alloc (hc_worker_t *wrk) } static int -hc_request (session_t *s, session_error_t err) +hc_request (session_t *s, hc_worker_t *wrk, hc_session_t *hc_session, + session_error_t err) { hc_main_t *hcm = &hc_main; u64 to_send; u32 n_enq; u8 n_segs; int rv; - hc_worker_t *wrk = hc_worker_get (s->thread_index); if (hcm->use_ptr) { @@ -166,7 +175,7 @@ hc_request (session_t *s, session_error_t err) rv = svm_fifo_enqueue (s->tx_fifo, n_enq, hcm->data); if (rv < to_send) { - hcm->data_offset = (rv > 0) ? rv : 0; + hc_session->data_offset = (rv > 0) ? rv : 0; svm_fifo_add_want_deq_ntf (s->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF); } } @@ -185,9 +194,8 @@ hc_session_connected_callback (u32 app_index, u32 hc_session_index, { hc_main_t *hcm = &hc_main; hc_worker_t *wrk; - u32 new_hc_index; + hc_session_t *hc_session; hc_http_header_t *header; - HTTP_DBG (1, "ho hc_index: %d", hc_session_index); if (err) { @@ -199,68 +207,89 @@ hc_session_connected_callback (u32 app_index, u32 hc_session_index, } wrk = hc_worker_get (s->thread_index); - hc_session_t *hc_session, *new_hc_session = hc_session_alloc (wrk); - hc_session = hc_session_get (hc_session_index, 0); - new_hc_index = new_hc_session->session_index; - clib_memcpy_fast (new_hc_session, hc_session, sizeof (*hc_session)); - new_hc_session->session_index = new_hc_index; - new_hc_session->thread_index = s->thread_index; - new_hc_session->vpp_session_index = s->session_index; - HTTP_DBG (1, "new hc_index: %d", new_hc_session->session_index); - s->opaque = new_hc_index; + hc_session = hc_session_alloc (wrk); + clib_spinlock_lock_if_init (&hcm->lock); + hcm->connected_counter++; + clib_spinlock_unlock_if_init (&hcm->lock); - if (hcm->req_method == HTTP_REQ_POST) + hc_session->thread_index = s->thread_index; + s->opaque = hc_session->session_index; + wrk->session_index = hc_session->session_index; + + if (hcm->multi_session) { - if (hcm->is_file) - http_add_header ( - &wrk->req_headers, HTTP_HEADER_CONTENT_TYPE, - http_content_type_token (HTTP_CONTENT_APP_OCTET_STREAM)); - else - http_add_header ( - &wrk->req_headers, HTTP_HEADER_CONTENT_TYPE, - http_content_type_token (HTTP_CONTENT_APP_X_WWW_FORM_URLENCODED)); + hc_session->stats.req_per_wrk = hcm->repeat_count / hcm->max_sessions; + clib_spinlock_lock_if_init (&hcm->lock); + /* add remaining requests to the first connected session */ + if (hcm->connected_counter == 1) + { + hc_session->stats.req_per_wrk += + hcm->repeat_count % hcm->max_sessions; + } + clib_spinlock_unlock_if_init (&hcm->lock); } - http_add_header (&wrk->req_headers, HTTP_HEADER_ACCEPT, "*", 1); - - vec_foreach (header, hcm->custom_header) - http_add_custom_header ( - &wrk->req_headers, (const char *) header->name, vec_len (header->name), - (const char *) header->value, vec_len (header->value)); - - clib_warning ("%U", format_http_bytes, wrk->headers_buf, - wrk->req_headers.tail_offset); - wrk->msg.method_type = hcm->req_method; - if (hcm->req_method == HTTP_REQ_POST) - wrk->msg.data.body_len = vec_len (hcm->data); else - wrk->msg.data.body_len = 0; - - wrk->msg.type = HTTP_MSG_REQUEST; - /* request target */ - wrk->msg.data.target_path_len = vec_len (hcm->target); - /* custom headers */ - wrk->msg.data.headers_len = wrk->req_headers.tail_offset; - /* total length */ - wrk->msg.data.len = wrk->msg.data.target_path_len + - wrk->msg.data.headers_len + wrk->msg.data.body_len; - - if (hcm->use_ptr) { - wrk->msg.data.type = HTTP_MSG_DATA_PTR; + hc_session->stats.req_per_wrk = hcm->repeat_count; + hcm->worker_index = s->thread_index; } - else + + if (!wrk->has_common_headers) { - wrk->msg.data.type = HTTP_MSG_DATA_INLINE; - wrk->msg.data.target_path_offset = 0; - wrk->msg.data.headers_offset = wrk->msg.data.target_path_len; - wrk->msg.data.body_offset = - wrk->msg.data.headers_offset + wrk->msg.data.headers_len; + wrk->has_common_headers = true; + if (hcm->req_method == HTTP_REQ_POST) + { + if (hcm->is_file) + http_add_header ( + &wrk->req_headers, HTTP_HEADER_CONTENT_TYPE, + http_content_type_token (HTTP_CONTENT_APP_OCTET_STREAM)); + else + http_add_header (&wrk->req_headers, HTTP_HEADER_CONTENT_TYPE, + http_content_type_token ( + HTTP_CONTENT_APP_X_WWW_FORM_URLENCODED)); + } + http_add_header (&wrk->req_headers, HTTP_HEADER_ACCEPT, "*", 1); + + vec_foreach (header, hcm->custom_header) + http_add_custom_header (&wrk->req_headers, (const char *) header->name, + vec_len (header->name), + (const char *) header->value, + vec_len (header->value)); + + wrk->msg.method_type = hcm->req_method; + if (hcm->req_method == HTTP_REQ_POST) + wrk->msg.data.body_len = vec_len (hcm->data); + else + wrk->msg.data.body_len = 0; + + wrk->msg.type = HTTP_MSG_REQUEST; + /* request target */ + wrk->msg.data.target_path_len = vec_len (hcm->target); + /* custom headers */ + wrk->msg.data.headers_len = wrk->req_headers.tail_offset; + /* total length */ + wrk->msg.data.len = wrk->msg.data.target_path_len + + wrk->msg.data.headers_len + wrk->msg.data.body_len; + + if (hcm->use_ptr) + { + wrk->msg.data.type = HTTP_MSG_DATA_PTR; + } + else + { + wrk->msg.data.type = HTTP_MSG_DATA_INLINE; + wrk->msg.data.target_path_offset = 0; + wrk->msg.data.headers_offset = wrk->msg.data.target_path_len; + wrk->msg.data.body_offset = + wrk->msg.data.headers_offset + wrk->msg.data.headers_len; + } } if (hcm->repeat) - hc_stats.start = vlib_time_now (vlib_get_main_by_index (s->thread_index)); + hc_session->stats.start = + vlib_time_now (vlib_get_main_by_index (s->thread_index)); - return hc_request (s, err); + return hc_request (s, wrk, hc_session, err); } static void @@ -275,21 +304,38 @@ hc_session_disconnect_callback (session_t *s) if ((rv = vnet_disconnect_session (a))) clib_warning ("warning: disconnect returned: %U", format_session_error, rv); + clib_spinlock_lock_if_init (&hcm->lock); + hcm->done_count++; + clib_spinlock_unlock_if_init (&hcm->lock); } static void hc_session_transport_closed_callback (session_t *s) { hc_main_t *hcm = &hc_main; - vlib_process_signal_event_mt (hcm->wrk->vlib_main, hcm->cli_node_index, - HC_TRANSPORT_CLOSED, 0); -} + hc_worker_t *wrk = hc_worker_get (s->thread_index); -static void -hc_ho_cleanup_callback (session_t *s) -{ - HTTP_DBG (1, "ho hc_index: %d:", s->opaque); - hc_ho_session_free (s->opaque); + clib_spinlock_lock_if_init (&hcm->lock); + if (s->session_state == SESSION_STATE_TRANSPORT_CLOSED) + { + hcm->was_transport_closed = true; + } + + /* send an event when all sessions are closed */ + if (hcm->done_count >= hcm->max_sessions) + { + if (hcm->was_transport_closed) + { + vlib_process_signal_event_mt (wrk->vlib_main, hcm->cli_node_index, + HC_TRANSPORT_CLOSED, 0); + } + else + { + vlib_process_signal_event_mt (wrk->vlib_main, hcm->cli_node_index, + HC_REPEAT_DONE, 0); + } + } + clib_spinlock_unlock_if_init (&hcm->lock); } static void @@ -315,20 +361,23 @@ hc_rx_callback (session_t *s) { hc_main_t *hcm = &hc_main; hc_worker_t *wrk = hc_worker_get (s->thread_index); - hc_session_t *hc_session; + hc_session_t *hc_session = hc_session_get (s->opaque, s->thread_index); http_msg_t msg; int rv; + u32 max_deq; session_error_t session_err = 0; int send_err = 0; - hc_session = hc_session_get (s->opaque, s->thread_index); - if (hc_session->is_closed) { clib_warning ("hc_session_index[%d] is closed", s->opaque); return -1; } + max_deq = svm_fifo_max_dequeue_cons (s->rx_fifo); + if (PREDICT_FALSE (max_deq == 0)) + goto done; + if (hc_session->to_recv == 0) { rv = svm_fifo_dequeue (s->rx_fifo, sizeof (msg), (u8 *) &msg); @@ -344,17 +393,20 @@ hc_rx_callback (session_t *s) if (msg.data.headers_len) { - hcm->response_status = - format (0, "%U", format_http_status_code, msg.code); + + if (!hcm->repeat) + hc_session->response_status = + format (0, "%U", format_http_status_code, msg.code); + svm_fifo_dequeue_drop (s->rx_fifo, msg.data.headers_offset); - vec_validate (hcm->resp_headers, msg.data.headers_len - 1); - vec_set_len (hcm->resp_headers, msg.data.headers_len); + vec_validate (hc_session->resp_headers, msg.data.headers_len - 1); + vec_set_len (hc_session->resp_headers, msg.data.headers_len); rv = svm_fifo_dequeue (s->rx_fifo, msg.data.headers_len, - hcm->resp_headers); + hc_session->resp_headers); ASSERT (rv == msg.data.headers_len); - HTTP_DBG (1, (char *) format (0, "%v", hcm->resp_headers)); + HTTP_DBG (1, (char *) format (0, "%v", hc_session->resp_headers)); msg.data.body_offset -= msg.data.headers_len + msg.data.headers_offset; } @@ -372,18 +424,18 @@ hc_rx_callback (session_t *s) { goto done; } - vec_validate (hcm->http_response, msg.data.body_len - 1); - vec_reset_length (hcm->http_response); + vec_validate (hc_session->http_response, msg.data.body_len - 1); + vec_reset_length (hc_session->http_response); } - u32 max_deq = svm_fifo_max_dequeue (s->rx_fifo); + max_deq = svm_fifo_max_dequeue (s->rx_fifo); if (!max_deq) { goto done; } u32 n_deq = clib_min (hc_session->to_recv, max_deq); - u32 curr = vec_len (hcm->http_response); - rv = svm_fifo_dequeue (s->rx_fifo, n_deq, hcm->http_response + curr); + u32 curr = vec_len (hc_session->http_response); + rv = svm_fifo_dequeue (s->rx_fifo, n_deq, hc_session->http_response + curr); if (rv < 0) { clib_warning ("app dequeue(n=%d) failed; rv = %d", n_deq, rv); @@ -393,7 +445,7 @@ hc_rx_callback (session_t *s) } ASSERT (rv == n_deq); - vec_set_len (hcm->http_response, curr + n_deq); + vec_set_len (hc_session->http_response, curr + n_deq); ASSERT (hc_session->to_recv >= rv); hc_session->to_recv -= rv; @@ -402,20 +454,19 @@ done: { if (hcm->repeat) { - hc_stats.request_count++; - hc_stats.end = vlib_time_now (wrk->vlib_main); - hc_stats.elapsed_time = hc_stats.end - hc_stats.start; + hc_session->stats.request_count++; + hc_session->stats.end = vlib_time_now (wrk->vlib_main); + hc_session->stats.elapsed_time = + hc_session->stats.end - hc_session->stats.start; - if (hc_stats.elapsed_time >= hcm->duration && - hc_stats.request_count >= hcm->repeat_count) + if (hc_session->stats.elapsed_time >= hcm->duration && + hc_session->stats.request_count >= hc_session->stats.req_per_wrk) { - vlib_process_signal_event_mt ( - wrk->vlib_main, hcm->cli_node_index, HC_REPEAT_DONE, 0); hc_session_disconnect_callback (s); } else { - send_err = hc_request (s, session_err); + send_err = hc_request (s, wrk, hc_session, session_err); if (send_err) clib_warning ("failed to send request, error %d", send_err); } @@ -434,11 +485,13 @@ static int hc_tx_callback (session_t *s) { hc_main_t *hcm = &hc_main; + hc_session_t *hc_session = hc_session_get (s->opaque, s->thread_index); u64 to_send; int rv; - to_send = vec_len (hcm->data) - hcm->data_offset; - rv = svm_fifo_enqueue (s->tx_fifo, to_send, hcm->data + hcm->data_offset); + to_send = vec_len (hcm->data) - hc_session->data_offset; + rv = svm_fifo_enqueue (s->tx_fifo, to_send, + hcm->data + hc_session->data_offset); if (rv <= 0) { @@ -448,7 +501,7 @@ hc_tx_callback (session_t *s) if (rv < to_send) { - hcm->data_offset += rv; + hc_session->data_offset += rv; svm_fifo_add_want_deq_ntf (s->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF); } @@ -465,7 +518,6 @@ static session_cb_vft_t hc_session_cb_vft = { .session_reset_callback = hc_session_reset_callback, .builtin_app_rx_callback = hc_rx_callback, .builtin_app_tx_callback = hc_tx_callback, - .half_open_cleanup_callback = hc_ho_cleanup_callback, }; static clib_error_t * @@ -474,8 +526,13 @@ hc_attach () hc_main_t *hcm = &hc_main; vnet_app_attach_args_t _a, *a = &_a; u64 options[18]; + u32 segment_size = 128 << 20; + vnet_app_add_cert_key_pair_args_t _ck_pair, *ck_pair = &_ck_pair; int rv; + if (hcm->private_segment_size) + segment_size = hcm->private_segment_size; + clib_memset (a, 0, sizeof (*a)); clib_memset (options, 0, sizeof (options)); @@ -483,7 +540,20 @@ hc_attach () a->name = format (0, "http_client"); a->session_cb_vft = &hc_session_cb_vft; a->options = options; + a->options[APP_OPTIONS_SEGMENT_SIZE] = segment_size; + a->options[APP_OPTIONS_ADD_SEGMENT_SIZE] = segment_size; + a->options[APP_OPTIONS_RX_FIFO_SIZE] = + hcm->fifo_size ? hcm->fifo_size : 8 << 10; + a->options[APP_OPTIONS_TX_FIFO_SIZE] = + hcm->fifo_size ? hcm->fifo_size : 32 << 10; a->options[APP_OPTIONS_FLAGS] = APP_OPTIONS_FLAGS_IS_BUILTIN; + a->options[APP_OPTIONS_PREALLOC_FIFO_PAIRS] = hcm->prealloc_fifos; + a->options[APP_OPTIONS_TLS_ENGINE] = CRYPTO_ENGINE_OPENSSL; + if (hcm->appns_id) + { + a->namespace_id = hcm->appns_id; + a->options[APP_OPTIONS_NAMESPACE_SECRET] = hcm->appns_secret; + } if ((rv = vnet_application_attach (a))) return clib_error_return (0, "attach returned: %U", format_session_error, @@ -493,6 +563,14 @@ hc_attach () vec_free (a->name); hcm->attached = 1; + clib_memset (ck_pair, 0, sizeof (*ck_pair)); + ck_pair->cert = (u8 *) test_srv_crt_rsa; + ck_pair->key = (u8 *) test_srv_key_rsa; + ck_pair->cert_len = test_srv_crt_rsa_len; + ck_pair->key_len = test_srv_key_rsa_len; + vnet_app_add_cert_key_pair (ck_pair); + hcm->ckpair_index = ck_pair->index; + return 0; } @@ -500,14 +578,19 @@ static int hc_connect_rpc (void *rpc_args) { vnet_connect_args_t *a = rpc_args; - int rv; + int rv = ~0; + hc_main_t *hcm = &hc_main; - rv = vnet_connect (a); - if (rv > 0) - clib_warning (0, "connect returned: %U", format_session_error, rv); + for (u32 i = 0; i < hcm->max_sessions; i++) + { + rv = vnet_connect (a); + if (rv > 0) + clib_warning (0, "connect returned: %U", format_session_error, rv); + } session_endpoint_free_ext_cfgs (&a->sep_ext); vec_free (a); + return rv; } @@ -516,14 +599,10 @@ hc_connect () { hc_main_t *hcm = &hc_main; vnet_connect_args_t *a = 0; - hc_worker_t *wrk; - hc_session_t *hc_session; transport_endpt_ext_cfg_t *ext_cfg; transport_endpt_cfg_http_t http_cfg = { (u32) hcm->timeout, 0 }; - vec_validate (a, 0); clib_memset (a, 0, sizeof (a[0])); - clib_memcpy (&a->sep_ext, &hcm->connect_sep, sizeof (hcm->connect_sep)); a->app_index = hcm->app_index; @@ -531,15 +610,49 @@ hc_connect () &a->sep_ext, TRANSPORT_ENDPT_EXT_CFG_HTTP, sizeof (http_cfg)); clib_memcpy (ext_cfg->data, &http_cfg, sizeof (http_cfg)); - /* allocate http session on main thread */ - wrk = hc_worker_get (0); - hc_session = hc_session_alloc (wrk); - a->api_context = hc_session->session_index; + if (hcm->connect_sep.flags & SESSION_ENDPT_CFG_F_SECURE) + { + ext_cfg = session_endpoint_add_ext_cfg ( + &a->sep_ext, TRANSPORT_ENDPT_EXT_CFG_CRYPTO, + sizeof (transport_endpt_crypto_cfg_t)); + ext_cfg->crypto.ckpair_index = hcm->ckpair_index; + } session_send_rpc_evt_to_thread_force (transport_cl_thread (), hc_connect_rpc, a); } +static void +hc_get_repeat_stats (vlib_main_t *vm) +{ + hc_main_t *hcm = &hc_main; + hc_worker_t *wrk; + hc_session_t *hc_session; + + if (hcm->repeat) + { + vec_foreach (wrk, hcm->wrk) + { + vec_foreach (hc_session, wrk->sessions) + { + hc_stats.request_count += hc_session->stats.request_count; + hc_session->stats.request_count = 0; + if (hc_stats.elapsed_time < hc_session->stats.elapsed_time) + { + hc_stats.elapsed_time = hc_session->stats.elapsed_time; + hc_session->stats.elapsed_time = 0; + } + } + } + vlib_cli_output (vm, + "< %d request(s) in %.6fs\n< avg latency " + "%.4fms\n< %.2f req/sec", + hc_stats.request_count, hc_stats.elapsed_time, + (hc_stats.elapsed_time / hc_stats.request_count) * 1000, + hc_stats.request_count / hc_stats.elapsed_time); + } +} + static clib_error_t * hc_get_event (vlib_main_t *vm) { @@ -548,6 +661,8 @@ hc_get_event (vlib_main_t *vm) clib_error_t *err = NULL; FILE *file_ptr; u64 event_timeout; + hc_worker_t *wrk; + hc_session_t *hc_session; event_timeout = hcm->timeout ? hcm->timeout : 10; if (event_timeout == hcm->duration) @@ -558,20 +673,26 @@ hc_get_event (vlib_main_t *vm) switch (event_type) { case ~0: + hc_get_repeat_stats (vm); err = clib_error_return (0, "error: timeout"); break; case HC_CONNECT_FAILED: + hc_get_repeat_stats (vm); err = clib_error_return (0, "error: failed to connect"); break; case HC_TRANSPORT_CLOSED: + hc_get_repeat_stats (vm); err = clib_error_return (0, "error: transport closed"); break; case HC_GENERIC_ERR: + hc_get_repeat_stats (vm); err = clib_error_return (0, "error: unknown"); break; case HC_REPLY_RECEIVED: if (hcm->filename) { + wrk = hc_worker_get (hcm->worker_index); + hc_session = hc_session_get (wrk->session_index, wrk->thread_index); file_ptr = fopen ((char *) format (0, "/tmp/%v", hcm->filename), "a"); if (file_ptr == NULL) @@ -580,26 +701,27 @@ hc_get_event (vlib_main_t *vm) } else { - fprintf (file_ptr, "< %s\n< %s\n< %s", hcm->response_status, - hcm->resp_headers, hcm->http_response); + fprintf (file_ptr, "< %s\n< %s\n< %s", + hc_session->response_status, hc_session->resp_headers, + hc_session->http_response); fclose (file_ptr); vlib_cli_output (vm, "file saved (/tmp/%v)", hcm->filename); } } if (hcm->verbose) - vlib_cli_output (vm, "< %v< %v", hcm->response_status, - hcm->resp_headers); - vlib_cli_output (vm, "\n%v\n", hcm->http_response); + { + wrk = hc_worker_get (hcm->worker_index); + hc_session = hc_session_get (wrk->session_index, wrk->thread_index); + vlib_cli_output (vm, "< %v\n< %v\n%v", hc_session->response_status, + hc_session->resp_headers, + hc_session->http_response); + } break; case HC_REPEAT_DONE: - vlib_cli_output (vm, - "< %d request(s) in %.6fs\n< avg latency " - "%.4fms\n< %.2f req/sec", - hc_stats.request_count, hc_stats.elapsed_time, - (hc_stats.elapsed_time / hc_stats.request_count) * 1000, - hc_stats.request_count / hc_stats.elapsed_time); + hc_get_repeat_stats (vm); break; default: + hc_get_repeat_stats (vm); err = clib_error_return (0, "error: unexpected event %d", event_type); break; } @@ -612,15 +734,17 @@ static clib_error_t * hc_run (vlib_main_t *vm) { hc_main_t *hcm = &hc_main; - vlib_thread_main_t *vtm = vlib_get_thread_main (); u32 num_threads; hc_worker_t *wrk; clib_error_t *err; - num_threads = 1 /* main thread */ + vtm->n_threads; + num_threads = 1 /* main thread */ + vlib_num_workers (); + if (vlib_num_workers ()) + clib_spinlock_init (&hcm->lock); vec_validate (hcm->wrk, num_threads - 1); vec_foreach (wrk, hcm->wrk) { + wrk->has_common_headers = false; wrk->thread_index = wrk - hcm->wrk; /* 4k for headers should be enough */ vec_validate (wrk->headers_buf, 4095); @@ -657,10 +781,18 @@ hc_detach () } static void -hcc_worker_cleanup (hc_worker_t *wrk) +hc_worker_cleanup (hc_worker_t *wrk) { - HTTP_DBG (1, "worker cleanup"); + hc_session_t *hc_session; + HTTP_DBG (1, "worker and worker sessions cleanup"); + vec_free (wrk->headers_buf); + vec_foreach (hc_session, wrk->sessions) + { + vec_free (hc_session->resp_headers); + vec_free (hc_session->http_response); + vec_free (hc_session->response_status); + } pool_free (wrk->sessions); } @@ -673,16 +805,14 @@ hc_cleanup () hc_http_header_t *header; vec_foreach (wrk, hcm->wrk) - hcc_worker_cleanup (wrk); + hc_worker_cleanup (wrk); vec_free (hcm->uri); vec_free (hcm->target); vec_free (hcm->data); - vec_free (hcm->resp_headers); - vec_free (hcm->http_response); - vec_free (hcm->response_status); vec_free (hcm->wrk); vec_free (hcm->filename); + vec_free (hcm->appns_id); vec_foreach (header, hcm->custom_header) { vec_free (header->name); @@ -698,6 +828,8 @@ hc_command_fn (vlib_main_t *vm, unformat_input_t *input, hc_main_t *hcm = &hc_main; clib_error_t *err = 0; unformat_input_t _line_input, *line_input = &_line_input; + u64 mem_size; + u8 *appns_id = 0; u8 *path = 0; u8 *file_data; hc_http_header_t new_header; @@ -708,7 +840,16 @@ hc_command_fn (vlib_main_t *vm, unformat_input_t *input, hcm->repeat_count = 0; hcm->duration = 0; hcm->repeat = false; + hcm->multi_session = false; + hcm->done_count = 0; + hcm->connected_counter = 0; + hcm->max_sessions = 1; + hcm->prealloc_fifos = 0; + hcm->private_segment_size = 0; + hcm->fifo_size = 0; + hcm->was_transport_closed = false; hc_stats.request_count = 0; + hc_stats.elapsed_time = 0; if (hcm->attached) return clib_error_return (0, "failed: already running!"); @@ -729,8 +870,6 @@ hc_command_fn (vlib_main_t *vm, unformat_input_t *input, ; else if (unformat (line_input, "data %v", &hcm->data)) hcm->is_file = 0; - else if (unformat (line_input, "target %s", &hcm->target)) - ; else if (unformat (line_input, "file %s", &path)) hcm->is_file = 1; else if (unformat (line_input, "use-ptr")) @@ -761,6 +900,29 @@ hc_command_fn (vlib_main_t *vm, unformat_input_t *input, } else if (unformat (line_input, "duration %f", &hcm->duration)) hcm->repeat = true; + else if (unformat (line_input, "sessions %d", &hcm->max_sessions)) + { + hcm->multi_session = true; + if (hcm->max_sessions <= 1) + { + err = clib_error_return (0, "sessions must be > 1"); + goto done; + } + } + else if (unformat (line_input, "prealloc-fifos %d", + &hcm->prealloc_fifos)) + ; + else if (unformat (line_input, "private-segment-size %U", + unformat_memory_size, &mem_size)) + hcm->private_segment_size = mem_size; + else if (unformat (line_input, "fifo-size %U", unformat_memory_size, + &mem_size)) + hcm->fifo_size = mem_size; + else if (unformat (line_input, "appns %_%v%_", &appns_id)) + ; + else if (unformat (line_input, "secret %lu", &hcm->appns_secret)) + ; + else { err = clib_error_return (0, "unknown input `%U'", @@ -774,11 +936,7 @@ hc_command_fn (vlib_main_t *vm, unformat_input_t *input, err = clib_error_return (0, "URI not defined"); goto done; } - if (!hcm->target) - { - err = clib_error_return (0, "target not defined"); - goto done; - } + if (!hcm->data && hcm->req_method == HTTP_REQ_POST) { if (path) @@ -794,6 +952,7 @@ hc_command_fn (vlib_main_t *vm, unformat_input_t *input, goto done; } } + if (hcm->duration && hcm->repeat_count) { err = clib_error_return ( @@ -801,6 +960,20 @@ hc_command_fn (vlib_main_t *vm, unformat_input_t *input, goto done; } + if (hcm->multi_session && !hcm->repeat) + { + err = clib_error_return ( + 0, "multiple sessions are only supported with request repeating"); + goto done; + } + + if ((rv = parse_target ((char **) &hcm->uri, (char **) &hcm->target))) + { + err = clib_error_return (0, "target parse error: %U", + format_session_error, rv); + goto done; + } + if ((rv = parse_uri ((char *) hcm->uri, &hcm->connect_sep))) { err = @@ -808,6 +981,12 @@ hc_command_fn (vlib_main_t *vm, unformat_input_t *input, goto done; } + if (hcm->duration >= hcm->timeout) + { + hcm->timeout = hcm->duration + 10; + } + hcm->appns_id = appns_id; + if (hcm->repeat) vlib_cli_output (vm, "Running, please wait..."); @@ -842,10 +1021,12 @@ done: VLIB_CLI_COMMAND (hc_command, static) = { .path = "http client", .short_help = - "[post] uri http://<ip-addr> target <origin-form> " + "[post] uri http://<ip-addr>/<origin-form> " "[data <form-urlencoded> | file <file-path>] [use-ptr] " "[save-to <filename>] [header <Key:Value>] [verbose] " - "[timeout <seconds> (default = 10)] [repeat <count> | duration <seconds>]", + "[timeout <seconds> (default = 10)] [repeat <count> | duration <seconds>] " + "[sessions <# of sessions>] [appns <app-ns> secret <appns-secret>] " + "[fifo-size <nM|G>] [private-segment-size <nM|G>] [prealloc-fifos <n>]", .function = hc_command_fn, .is_mp_safe = 1, }; diff --git a/src/plugins/hs_apps/http_client_cli.c b/src/plugins/hs_apps/http_client_cli.c index 4ee3b49444c..b72d4dfae54 100644 --- a/src/plugins/hs_apps/http_client_cli.c +++ b/src/plugins/hs_apps/http_client_cli.c @@ -31,7 +31,7 @@ typedef struct { CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); u32 session_index; - u32 thread_index; + clib_thread_index_t thread_index; u32 rx_offset; u32 vpp_session_index; u64 to_recv; @@ -41,7 +41,7 @@ typedef struct typedef struct { hcc_session_t *sessions; - u32 thread_index; + clib_thread_index_t thread_index; } hcc_worker_t; typedef struct @@ -62,6 +62,8 @@ typedef struct u8 *http_response; u8 *appns_id; u64 appns_secret; + u32 ckpair_index; + u8 need_crypto; } hcc_main_t; typedef enum @@ -74,7 +76,7 @@ typedef enum static hcc_main_t hcc_main; static hcc_worker_t * -hcc_worker_get (u32 thread_index) +hcc_worker_get (clib_thread_index_t thread_index) { return vec_elt_at_index (hcc_main.wrk, thread_index); } @@ -90,7 +92,7 @@ hcc_session_alloc (hcc_worker_t *wrk) } static hcc_session_t * -hcc_session_get (u32 hs_index, u32 thread_index) +hcc_session_get (u32 hs_index, clib_thread_index_t thread_index) { hcc_worker_t *wrk = hcc_worker_get (thread_index); return pool_elt_at_index (wrk->sessions, hs_index); @@ -333,6 +335,7 @@ hcc_attach () vnet_app_attach_args_t _a, *a = &_a; u64 options[18]; u32 segment_size = 128 << 20; + vnet_app_add_cert_key_pair_args_t _ck_pair, *ck_pair = &_ck_pair; int rv; if (hcm->private_segment_size) @@ -353,6 +356,7 @@ hcc_attach () hcm->fifo_size ? hcm->fifo_size : 32 << 10; a->options[APP_OPTIONS_FLAGS] = APP_OPTIONS_FLAGS_IS_BUILTIN; a->options[APP_OPTIONS_PREALLOC_FIFO_PAIRS] = hcm->prealloc_fifos; + a->options[APP_OPTIONS_TLS_ENGINE] = CRYPTO_ENGINE_OPENSSL; if (hcm->appns_id) { a->namespace_id = hcm->appns_id; @@ -365,6 +369,15 @@ hcc_attach () hcm->app_index = a->app_index; vec_free (a->name); hcm->test_client_attached = 1; + + clib_memset (ck_pair, 0, sizeof (*ck_pair)); + ck_pair->cert = (u8 *) test_srv_crt_rsa; + ck_pair->key = (u8 *) test_srv_key_rsa; + ck_pair->cert_len = test_srv_crt_rsa_len; + ck_pair->key_len = test_srv_key_rsa_len; + vnet_app_add_cert_key_pair (ck_pair); + hcm->ckpair_index = ck_pair->index; + return 0; } @@ -411,6 +424,14 @@ hcc_connect () &a->sep_ext, TRANSPORT_ENDPT_EXT_CFG_HTTP, sizeof (http_cfg)); clib_memcpy (ext_cfg->data, &http_cfg, sizeof (http_cfg)); + if (hcm->need_crypto) + { + ext_cfg = session_endpoint_add_ext_cfg ( + &a->sep_ext, TRANSPORT_ENDPT_EXT_CFG_CRYPTO, + sizeof (transport_endpt_crypto_cfg_t)); + ext_cfg->crypto.ckpair_index = hcm->ckpair_index; + } + /* allocate http session on main thread */ wrk = hcc_worker_get (0); hs = hcc_session_alloc (wrk); @@ -581,6 +602,8 @@ hcc_command_fn (vlib_main_t *vm, unformat_input_t *input, err = clib_error_return (0, "Uri parse error: %d", rv); goto done; } + hcm->need_crypto = hcm->connect_sep.transport_proto == TRANSPORT_PROTO_TLS; + hcm->connect_sep.transport_proto = TRANSPORT_PROTO_HTTP; session_enable_disable_args_t args = { .is_en = 1, .rt_engine_type = diff --git a/src/plugins/hs_apps/http_tps.c b/src/plugins/hs_apps/http_tps.c index 59a0309e363..486d4a525e3 100644 --- a/src/plugins/hs_apps/http_tps.c +++ b/src/plugins/hs_apps/http_tps.c @@ -25,7 +25,7 @@ typedef struct { CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); u32 session_index; - u32 thread_index; + clib_thread_index_t thread_index; u64 data_len; u64 data_offset; u32 vpp_session_index; @@ -78,7 +78,7 @@ typedef struct hs_main_ static hts_main_t hts_main; static hts_session_t * -hts_session_alloc (u32 thread_index) +hts_session_alloc (clib_thread_index_t thread_index) { hts_main_t *htm = &hts_main; hts_session_t *hs; @@ -92,7 +92,7 @@ hts_session_alloc (u32 thread_index) } static hts_session_t * -hts_session_get (u32 thread_index, u32 hts_index) +hts_session_get (clib_thread_index_t thread_index, u32 hts_index) { hts_main_t *htm = &hts_main; @@ -345,6 +345,11 @@ hts_session_rx_body (hts_session_t *hs, session_t *ts) ASSERT (rv == n_deq); } hs->left_recv -= n_deq; + if (svm_fifo_needs_deq_ntf (ts->rx_fifo, n_deq)) + { + svm_fifo_clear_deq_ntf (ts->rx_fifo); + session_program_transport_io_evt (ts->handle, SESSION_IO_EVT_RX); + } if (hs->close_threshold > 0) { @@ -620,7 +625,7 @@ hts_start_listen (hts_main_t *htm, session_endpoint_cfg_t *sep, u8 *uri, u8 need_crypto; hts_session_t *hls; session_t *ls; - u32 thread_index = 0; + clib_thread_index_t thread_index = 0; int rv; clib_memset (a, 0, sizeof (*a)); diff --git a/src/plugins/hs_apps/proxy.c b/src/plugins/hs_apps/proxy.c index 1bcc1e85a17..140183d5f59 100644 --- a/src/plugins/hs_apps/proxy.c +++ b/src/plugins/hs_apps/proxy.c @@ -112,7 +112,8 @@ proxy_do_connect (vnet_connect_args_t *a) static void proxy_handle_connects_rpc (void *args) { - u32 thread_index = pointer_to_uword (args), n_connects = 0, n_pending; + clib_thread_index_t thread_index = pointer_to_uword (args), n_connects = 0, + n_pending; proxy_worker_t *wrk; u32 max_connects; @@ -1137,7 +1138,8 @@ active_open_tx_callback (session_t * ao_s) if (sc->pair.is_http) { /* notify HTTP transport */ - session_program_rx_io_evt (sc->pair.session_handle); + session_program_transport_io_evt (sc->pair.session_handle, + SESSION_IO_EVT_RX); } else { diff --git a/src/plugins/hs_apps/proxy.h b/src/plugins/hs_apps/proxy.h index f26f4bf0ea2..88b7cdf41ee 100644 --- a/src/plugins/hs_apps/proxy.h +++ b/src/plugins/hs_apps/proxy.h @@ -117,7 +117,7 @@ typedef struct extern proxy_main_t proxy_main; static inline proxy_worker_t * -proxy_worker_get (u32 thread_index) +proxy_worker_get (clib_thread_index_t thread_index) { proxy_main_t *pm = &proxy_main; return vec_elt_at_index (pm->workers, thread_index); diff --git a/src/plugins/hs_apps/test_builtins.c b/src/plugins/hs_apps/test_builtins.c index c314e71b5df..4c324d5b953 100644 --- a/src/plugins/hs_apps/test_builtins.c +++ b/src/plugins/hs_apps/test_builtins.c @@ -161,6 +161,7 @@ test_builtins_init (vlib_main_t *vm) 0, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"); (*fp) (handle_get_test1, "test1", HTTP_REQ_GET); + (*fp) (handle_get_test1, "test1", HTTP_REQ_POST); (*fp) (handle_get_test2, "test2", HTTP_REQ_GET); (*fp) (handle_get_test_delayed, "test_delayed", HTTP_REQ_GET); (*fp) (handle_post_test3, "test3", HTTP_REQ_POST); diff --git a/src/plugins/hs_apps/vcl/vcl_test_cl_udp.c b/src/plugins/hs_apps/vcl/vcl_test_cl_udp.c new file mode 100644 index 00000000000..066635e3d9b --- /dev/null +++ b/src/plugins/hs_apps/vcl/vcl_test_cl_udp.c @@ -0,0 +1,156 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2025 Cisco Systems, Inc. + */ + +#include <sys/types.h> +#include <sys/socket.h> +#include <string.h> +#include <vcl/vppcom.h> +#include <hs_apps/vcl/vcl_test.h> + +typedef enum vt_clu_type_ +{ + VT_CLU_TYPE_NONE = 0, + VT_CLU_TYPE_SERVER, + VT_CLU_TYPE_CLIENT, +} vt_clu_type_t; + +typedef struct vtclu_main_ +{ + vt_clu_type_t app_type; + vppcom_endpt_t endpt; + union + { + struct sockaddr_storage srvr_addr; + struct sockaddr_storage clnt_addr; + }; + uint16_t port; +} vt_clu_main_t; + +static vt_clu_main_t vt_clu_main; + +static void +vt_clu_parse_args (vt_clu_main_t *vclum, int argc, char **argv) +{ + int c; + + memset (vclum, 0, sizeof (*vclum)); + vclum->port = VCL_TEST_SERVER_PORT; + + opterr = 0; + while ((c = getopt (argc, argv, "s:c:")) != -1) + switch (c) + { + case 's': + vclum->app_type = VT_CLU_TYPE_SERVER; + if (inet_pton ( + AF_INET, optarg, + &((struct sockaddr_in *) &vclum->srvr_addr)->sin_addr) != 1) + vtwrn ("couldn't parse ipv4 addr %s", optarg); + break; + case 'c': + vclum->app_type = VT_CLU_TYPE_CLIENT; + if (inet_pton ( + AF_INET, optarg, + &((struct sockaddr_in *) &vclum->clnt_addr)->sin_addr) != 1) + break; + } + + if (vclum->app_type == VT_CLU_TYPE_NONE) + { + vtwrn ("client or server must be configured"); + exit (1); + } + + vclum->endpt.is_ip4 = 1; + vclum->endpt.ip = + (uint8_t *) &((struct sockaddr_in *) &vclum->srvr_addr)->sin_addr; + vclum->endpt.port = htons (vclum->endpt.port); +} + +int +main (int argc, char **argv) +{ + vt_clu_main_t *vclum = &vt_clu_main; + int rv, vcl_sh; + const int buflen = 64; + char buf[buflen]; + + struct sockaddr_in _addr; + vppcom_endpt_t rmt_ep = { .ip = (void *) &_addr }; + + vt_clu_parse_args (vclum, argc, argv); + + rv = vppcom_app_create ("vcl_test_cl_udp"); + if (rv) + vtfail ("vppcom_app_create()", rv); + + vcl_sh = vppcom_session_create (VPPCOM_PROTO_UDP, 0 /* is_nonblocking */); + if (vcl_sh < 0) + { + vterr ("vppcom_session_create()", vcl_sh); + return vcl_sh; + } + + if (vclum->app_type == VT_CLU_TYPE_SERVER) + { + /* Listen is implicit */ + rv = vppcom_session_bind (vcl_sh, &vclum->endpt); + if (rv < 0) + { + vterr ("vppcom_session_bind()", rv); + return rv; + } + + rv = vppcom_session_recvfrom (vcl_sh, buf, buflen, 0, &rmt_ep); + if (rv < 0) + { + vterr ("vppcom_session_recvfrom()", rv); + return rv; + } + buf[rv] = 0; + vtinf ("Received message from client: %s", buf); + + char *msg = "hello cl udp client"; + int msg_len = strnlen (msg, buflen); + memcpy (buf, msg, msg_len); + /* send 2 times to be sure */ + for (int i = 0; i < 2; i++) + { + rv = vppcom_session_sendto (vcl_sh, buf, msg_len, 0, &rmt_ep); + if (rv < 0) + { + vterr ("vppcom_session_sendto()", rv); + return rv; + } + usleep (500); + } + } + else if (vclum->app_type == VT_CLU_TYPE_CLIENT) + { + char *msg = "hello cl udp server"; + int msg_len = strnlen (msg, buflen); + memcpy (buf, msg, msg_len); + + /* send 3 times to be sure */ + for (int i = 0; i < 3; i++) + { + rv = vppcom_session_sendto (vcl_sh, buf, msg_len, 0, &vclum->endpt); + if (rv < 0) + { + vterr ("vppcom_session_sendto()", rv); + return rv; + } + usleep (500); + } + + rv = vppcom_session_recvfrom (vcl_sh, buf, buflen, 0, &rmt_ep); + if (rv < 0) + { + vterr ("vppcom_session_recvfrom()", rv); + return rv; + } + buf[rv] = 0; + vtinf ("Received message from server: %s", buf); + } +}
\ No newline at end of file diff --git a/src/plugins/http/CMakeLists.txt b/src/plugins/http/CMakeLists.txt index 075b8d6817b..ca2c0a9dc05 100644 --- a/src/plugins/http/CMakeLists.txt +++ b/src/plugins/http/CMakeLists.txt @@ -11,11 +11,20 @@ # See the License for the specific language governing permissions and # limitations under the License. +option(VPP_ENABLE_HTTP_2 "Build http plugin with HTTP/2 enabled" OFF) +if(VPP_ENABLE_HTTP_2) + add_compile_definitions(HTTP_2_ENABLE=1) +endif() + add_vpp_plugin(http SOURCES + http2/hpack.c + http2/http2.c + http2/frame.c http.c http_buffer.c http_timer.c + http1.c ) add_vpp_plugin(http_unittest diff --git a/src/plugins/http/extras/mk_huffman_table.py b/src/plugins/http/extras/mk_huffman_table.py new file mode 100644 index 00000000000..378544b0dce --- /dev/null +++ b/src/plugins/http/extras/mk_huffman_table.py @@ -0,0 +1,416 @@ +#!/usr/bin/env python3 +from io import StringIO + + +# SPDX-License-Identifier: Apache-2.0 +# Copyright(c) 2025 Cisco Systems, Inc. + + +# e.g. 5 bit code symbol has 8 slots (2^8-5), last 3 bits are irrelevant +def generate_slots(fh, s, cl): + for i in range(1 << 8 - cl): + fh.write(" { 0x%02X, %d },\n" % (s, cl)) + + +# list of code and code length tuples +huff_code_table = [] + +# Huffman code from RFC7541 Appendix B, EOS removed +rfc7541_huffman_code = """\ + ( 0) |11111111|11000 1ff8 [13] + ( 1) |11111111|11111111|1011000 7fffd8 [23] + ( 2) |11111111|11111111|11111110|0010 fffffe2 [28] + ( 3) |11111111|11111111|11111110|0011 fffffe3 [28] + ( 4) |11111111|11111111|11111110|0100 fffffe4 [28] + ( 5) |11111111|11111111|11111110|0101 fffffe5 [28] + ( 6) |11111111|11111111|11111110|0110 fffffe6 [28] + ( 7) |11111111|11111111|11111110|0111 fffffe7 [28] + ( 8) |11111111|11111111|11111110|1000 fffffe8 [28] + ( 9) |11111111|11111111|11101010 ffffea [24] + ( 10) |11111111|11111111|11111111|111100 3ffffffc [30] + ( 11) |11111111|11111111|11111110|1001 fffffe9 [28] + ( 12) |11111111|11111111|11111110|1010 fffffea [28] + ( 13) |11111111|11111111|11111111|111101 3ffffffd [30] + ( 14) |11111111|11111111|11111110|1011 fffffeb [28] + ( 15) |11111111|11111111|11111110|1100 fffffec [28] + ( 16) |11111111|11111111|11111110|1101 fffffed [28] + ( 17) |11111111|11111111|11111110|1110 fffffee [28] + ( 18) |11111111|11111111|11111110|1111 fffffef [28] + ( 19) |11111111|11111111|11111111|0000 ffffff0 [28] + ( 20) |11111111|11111111|11111111|0001 ffffff1 [28] + ( 21) |11111111|11111111|11111111|0010 ffffff2 [28] + ( 22) |11111111|11111111|11111111|111110 3ffffffe [30] + ( 23) |11111111|11111111|11111111|0011 ffffff3 [28] + ( 24) |11111111|11111111|11111111|0100 ffffff4 [28] + ( 25) |11111111|11111111|11111111|0101 ffffff5 [28] + ( 26) |11111111|11111111|11111111|0110 ffffff6 [28] + ( 27) |11111111|11111111|11111111|0111 ffffff7 [28] + ( 28) |11111111|11111111|11111111|1000 ffffff8 [28] + ( 29) |11111111|11111111|11111111|1001 ffffff9 [28] + ( 30) |11111111|11111111|11111111|1010 ffffffa [28] + ( 31) |11111111|11111111|11111111|1011 ffffffb [28] +' ' ( 32) |010100 14 [ 6] +'!' ( 33) |11111110|00 3f8 [10] +'"' ( 34) |11111110|01 3f9 [10] +'#' ( 35) |11111111|1010 ffa [12] +'$' ( 36) |11111111|11001 1ff9 [13] +'%' ( 37) |010101 15 [ 6] +'&' ( 38) |11111000 f8 [ 8] +''' ( 39) |11111111|010 7fa [11] +'(' ( 40) |11111110|10 3fa [10] +')' ( 41) |11111110|11 3fb [10] +'*' ( 42) |11111001 f9 [ 8] +'+' ( 43) |11111111|011 7fb [11] +',' ( 44) |11111010 fa [ 8] +'-' ( 45) |010110 16 [ 6] +'.' ( 46) |010111 17 [ 6] +'/' ( 47) |011000 18 [ 6] +'0' ( 48) |00000 0 [ 5] +'1' ( 49) |00001 1 [ 5] +'2' ( 50) |00010 2 [ 5] +'3' ( 51) |011001 19 [ 6] +'4' ( 52) |011010 1a [ 6] +'5' ( 53) |011011 1b [ 6] +'6' ( 54) |011100 1c [ 6] +'7' ( 55) |011101 1d [ 6] +'8' ( 56) |011110 1e [ 6] +'9' ( 57) |011111 1f [ 6] +':' ( 58) |1011100 5c [ 7] +';' ( 59) |11111011 fb [ 8] +'<' ( 60) |11111111|1111100 7ffc [15] +'=' ( 61) |100000 20 [ 6] +'>' ( 62) |11111111|1011 ffb [12] +'?' ( 63) |11111111|00 3fc [10] +'@' ( 64) |11111111|11010 1ffa [13] +'A' ( 65) |100001 21 [ 6] +'B' ( 66) |1011101 5d [ 7] +'C' ( 67) |1011110 5e [ 7] +'D' ( 68) |1011111 5f [ 7] +'E' ( 69) |1100000 60 [ 7] +'F' ( 70) |1100001 61 [ 7] +'G' ( 71) |1100010 62 [ 7] +'H' ( 72) |1100011 63 [ 7] +'I' ( 73) |1100100 64 [ 7] +'J' ( 74) |1100101 65 [ 7] +'K' ( 75) |1100110 66 [ 7] +'L' ( 76) |1100111 67 [ 7] +'M' ( 77) |1101000 68 [ 7] +'N' ( 78) |1101001 69 [ 7] +'O' ( 79) |1101010 6a [ 7] +'P' ( 80) |1101011 6b [ 7] +'Q' ( 81) |1101100 6c [ 7] +'R' ( 82) |1101101 6d [ 7] +'S' ( 83) |1101110 6e [ 7] +'T' ( 84) |1101111 6f [ 7] +'U' ( 85) |1110000 70 [ 7] +'V' ( 86) |1110001 71 [ 7] +'W' ( 87) |1110010 72 [ 7] +'X' ( 88) |11111100 fc [ 8] +'Y' ( 89) |1110011 73 [ 7] +'Z' ( 90) |11111101 fd [ 8] +'[' ( 91) |11111111|11011 1ffb [13] +'\' ( 92) |11111111|11111110|000 7fff0 [19] +']' ( 93) |11111111|11100 1ffc [13] +'^' ( 94) |11111111|111100 3ffc [14] +'_' ( 95) |100010 22 [ 6] +'`' ( 96) |11111111|1111101 7ffd [15] +'a' ( 97) |00011 3 [ 5] +'b' ( 98) |100011 23 [ 6] +'c' ( 99) |00100 4 [ 5] +'d' (100) |100100 24 [ 6] +'e' (101) |00101 5 [ 5] +'f' (102) |100101 25 [ 6] +'g' (103) |100110 26 [ 6] +'h' (104) |100111 27 [ 6] +'i' (105) |00110 6 [ 5] +'j' (106) |1110100 74 [ 7] +'k' (107) |1110101 75 [ 7] +'l' (108) |101000 28 [ 6] +'m' (109) |101001 29 [ 6] +'n' (110) |101010 2a [ 6] +'o' (111) |00111 7 [ 5] +'p' (112) |101011 2b [ 6] +'q' (113) |1110110 76 [ 7] +'r' (114) |101100 2c [ 6] +'s' (115) |01000 8 [ 5] +'t' (116) |01001 9 [ 5] +'u' (117) |101101 2d [ 6] +'v' (118) |1110111 77 [ 7] +'w' (119) |1111000 78 [ 7] +'x' (120) |1111001 79 [ 7] +'y' (121) |1111010 7a [ 7] +'z' (122) |1111011 7b [ 7] +'{' (123) |11111111|1111110 7ffe [15] +'|' (124) |11111111|100 7fc [11] +'}' (125) |11111111|111101 3ffd [14] +'~' (126) |11111111|11101 1ffd [13] + (127) |11111111|11111111|11111111|1100 ffffffc [28] + (128) |11111111|11111110|0110 fffe6 [20] + (129) |11111111|11111111|010010 3fffd2 [22] + (130) |11111111|11111110|0111 fffe7 [20] + (131) |11111111|11111110|1000 fffe8 [20] + (132) |11111111|11111111|010011 3fffd3 [22] + (133) |11111111|11111111|010100 3fffd4 [22] + (134) |11111111|11111111|010101 3fffd5 [22] + (135) |11111111|11111111|1011001 7fffd9 [23] + (136) |11111111|11111111|010110 3fffd6 [22] + (137) |11111111|11111111|1011010 7fffda [23] + (138) |11111111|11111111|1011011 7fffdb [23] + (139) |11111111|11111111|1011100 7fffdc [23] + (140) |11111111|11111111|1011101 7fffdd [23] + (141) |11111111|11111111|1011110 7fffde [23] + (142) |11111111|11111111|11101011 ffffeb [24] + (143) |11111111|11111111|1011111 7fffdf [23] + (144) |11111111|11111111|11101100 ffffec [24] + (145) |11111111|11111111|11101101 ffffed [24] + (146) |11111111|11111111|010111 3fffd7 [22] + (147) |11111111|11111111|1100000 7fffe0 [23] + (148) |11111111|11111111|11101110 ffffee [24] + (149) |11111111|11111111|1100001 7fffe1 [23] + (150) |11111111|11111111|1100010 7fffe2 [23] + (151) |11111111|11111111|1100011 7fffe3 [23] + (152) |11111111|11111111|1100100 7fffe4 [23] + (153) |11111111|11111110|11100 1fffdc [21] + (154) |11111111|11111111|011000 3fffd8 [22] + (155) |11111111|11111111|1100101 7fffe5 [23] + (156) |11111111|11111111|011001 3fffd9 [22] + (157) |11111111|11111111|1100110 7fffe6 [23] + (158) |11111111|11111111|1100111 7fffe7 [23] + (159) |11111111|11111111|11101111 ffffef [24] + (160) |11111111|11111111|011010 3fffda [22] + (161) |11111111|11111110|11101 1fffdd [21] + (162) |11111111|11111110|1001 fffe9 [20] + (163) |11111111|11111111|011011 3fffdb [22] + (164) |11111111|11111111|011100 3fffdc [22] + (165) |11111111|11111111|1101000 7fffe8 [23] + (166) |11111111|11111111|1101001 7fffe9 [23] + (167) |11111111|11111110|11110 1fffde [21] + (168) |11111111|11111111|1101010 7fffea [23] + (169) |11111111|11111111|011101 3fffdd [22] + (170) |11111111|11111111|011110 3fffde [22] + (171) |11111111|11111111|11110000 fffff0 [24] + (172) |11111111|11111110|11111 1fffdf [21] + (173) |11111111|11111111|011111 3fffdf [22] + (174) |11111111|11111111|1101011 7fffeb [23] + (175) |11111111|11111111|1101100 7fffec [23] + (176) |11111111|11111111|00000 1fffe0 [21] + (177) |11111111|11111111|00001 1fffe1 [21] + (178) |11111111|11111111|100000 3fffe0 [22] + (179) |11111111|11111111|00010 1fffe2 [21] + (180) |11111111|11111111|1101101 7fffed [23] + (181) |11111111|11111111|100001 3fffe1 [22] + (182) |11111111|11111111|1101110 7fffee [23] + (183) |11111111|11111111|1101111 7fffef [23] + (184) |11111111|11111110|1010 fffea [20] + (185) |11111111|11111111|100010 3fffe2 [22] + (186) |11111111|11111111|100011 3fffe3 [22] + (187) |11111111|11111111|100100 3fffe4 [22] + (188) |11111111|11111111|1110000 7ffff0 [23] + (189) |11111111|11111111|100101 3fffe5 [22] + (190) |11111111|11111111|100110 3fffe6 [22] + (191) |11111111|11111111|1110001 7ffff1 [23] + (192) |11111111|11111111|11111000|00 3ffffe0 [26] + (193) |11111111|11111111|11111000|01 3ffffe1 [26] + (194) |11111111|11111110|1011 fffeb [20] + (195) |11111111|11111110|001 7fff1 [19] + (196) |11111111|11111111|100111 3fffe7 [22] + (197) |11111111|11111111|1110010 7ffff2 [23] + (198) |11111111|11111111|101000 3fffe8 [22] + (199) |11111111|11111111|11110110|0 1ffffec [25] + (200) |11111111|11111111|11111000|10 3ffffe2 [26] + (201) |11111111|11111111|11111000|11 3ffffe3 [26] + (202) |11111111|11111111|11111001|00 3ffffe4 [26] + (203) |11111111|11111111|11111011|110 7ffffde [27] + (204) |11111111|11111111|11111011|111 7ffffdf [27] + (205) |11111111|11111111|11111001|01 3ffffe5 [26] + (206) |11111111|11111111|11110001 fffff1 [24] + (207) |11111111|11111111|11110110|1 1ffffed [25] + (208) |11111111|11111110|010 7fff2 [19] + (209) |11111111|11111111|00011 1fffe3 [21] + (210) |11111111|11111111|11111001|10 3ffffe6 [26] + (211) |11111111|11111111|11111100|000 7ffffe0 [27] + (212) |11111111|11111111|11111100|001 7ffffe1 [27] + (213) |11111111|11111111|11111001|11 3ffffe7 [26] + (214) |11111111|11111111|11111100|010 7ffffe2 [27] + (215) |11111111|11111111|11110010 fffff2 [24] + (216) |11111111|11111111|00100 1fffe4 [21] + (217) |11111111|11111111|00101 1fffe5 [21] + (218) |11111111|11111111|11111010|00 3ffffe8 [26] + (219) |11111111|11111111|11111010|01 3ffffe9 [26] + (220) |11111111|11111111|11111111|1101 ffffffd [28] + (221) |11111111|11111111|11111100|011 7ffffe3 [27] + (222) |11111111|11111111|11111100|100 7ffffe4 [27] + (223) |11111111|11111111|11111100|101 7ffffe5 [27] + (224) |11111111|11111110|1100 fffec [20] + (225) |11111111|11111111|11110011 fffff3 [24] + (226) |11111111|11111110|1101 fffed [20] + (227) |11111111|11111111|00110 1fffe6 [21] + (228) |11111111|11111111|101001 3fffe9 [22] + (229) |11111111|11111111|00111 1fffe7 [21] + (230) |11111111|11111111|01000 1fffe8 [21] + (231) |11111111|11111111|1110011 7ffff3 [23] + (232) |11111111|11111111|101010 3fffea [22] + (233) |11111111|11111111|101011 3fffeb [22] + (234) |11111111|11111111|11110111|0 1ffffee [25] + (235) |11111111|11111111|11110111|1 1ffffef [25] + (236) |11111111|11111111|11110100 fffff4 [24] + (237) |11111111|11111111|11110101 fffff5 [24] + (238) |11111111|11111111|11111010|10 3ffffea [26] + (239) |11111111|11111111|1110100 7ffff4 [23] + (240) |11111111|11111111|11111010|11 3ffffeb [26] + (241) |11111111|11111111|11111100|110 7ffffe6 [27] + (242) |11111111|11111111|11111011|00 3ffffec [26] + (243) |11111111|11111111|11111011|01 3ffffed [26] + (244) |11111111|11111111|11111100|111 7ffffe7 [27] + (245) |11111111|11111111|11111101|000 7ffffe8 [27] + (246) |11111111|11111111|11111101|001 7ffffe9 [27] + (247) |11111111|11111111|11111101|010 7ffffea [27] + (248) |11111111|11111111|11111101|011 7ffffeb [27] + (249) |11111111|11111111|11111111|1110 ffffffe [28] + (250) |11111111|11111111|11111101|100 7ffffec [27] + (251) |11111111|11111111|11111101|101 7ffffed [27] + (252) |11111111|11111111|11111101|110 7ffffee [27] + (253) |11111111|11111111|11111101|111 7ffffef [27] + (254) |11111111|11111111|11111110|000 7fffff0 [27] + (255) |11111111|11111111|11111011|10 3ffffee [26]""" + +# parse Huffman code +for line in StringIO(rfc7541_huffman_code): + # we need just last two columns + l = line.rstrip().split(" ") + # len in bits + code_len = l[-1][1:-1].strip() + # code as hex aligned to LSB + code = l[-2].strip() + huff_code_table.append((code_len, code)) + +f = open("../http2/huffman_table.h", "w") +f.write( + """/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2025 Cisco Systems, Inc. + */ + +/* generated by mk_huffman_table.py */ + +#ifndef SRC_PLUGINS_HTTP_HUFFMAN_TABLE_H_ +#define SRC_PLUGINS_HTTP_HUFFMAN_TABLE_H_ + +#include <vppinfra/types.h> + +typedef struct +{ + u8 code_len; + u32 code; +} hpack_huffman_symbol_t; + +static hpack_huffman_symbol_t huff_sym_table[] = { +""" +) + +# encoding table +[f.write(" {" + code[0] + ", 0x" + code[1] + "},\n") for code in huff_code_table] + +f.write( + """}; + +typedef struct +{ + u8 symbol; + u8 code_len; +} hpack_huffman_code_t; + +static hpack_huffman_code_t huff_code_table_fast[] = { +""" +) + +# fast decoding table, symbols with code length from 5 to 8 bits (most of printable ASCII characters) +[generate_slots(f, i, 5) for i, code in enumerate(huff_code_table) if code[0] == "5"] +[generate_slots(f, i, 6) for i, code in enumerate(huff_code_table) if code[0] == "6"] +[generate_slots(f, i, 7) for i, code in enumerate(huff_code_table) if code[0] == "7"] +[generate_slots(f, i, 8) for i, code in enumerate(huff_code_table) if code[0] == "8"] + +# last 2 entries are longer codes prefixes, code_len set to 0 +f.write(" { 0x00, 0 },\n") +f.write(" { 0x00, 0 },\n") + +f.write( + """}; + +typedef struct +{ + u32 first_code; + u8 code_len; + u8 symbols[29]; +} hpack_huffman_group_t; + +/* clang-format off */ + +static hpack_huffman_group_t huff_code_table_slow[] = { +""" +) +for i in range(10, 31): + symbols = [ + (symbol, code[1]) + for symbol, code in enumerate(huff_code_table) + if code[0] == str(i) + ] + if symbols: + _, first_code = symbols[0] + f.write(" {\n 0x" + first_code + ", /* first_code */\n") + f.write(" " + str(i) + ", /* code_len */\n") + f.write(" {\n ") + [f.write(" 0x%02X," % s) for s, c in symbols[:10]] + if len(symbols) > 10: + f.write("\n ") + [f.write(" 0x%02X," % s) for s, c in symbols[10:20]] + if len(symbols) > 20: + f.write("\n ") + [f.write(" 0x%02X," % s) for s, c in symbols[20:30]] + f.write("\n } /* symbols */\n },\n") + +f.write( + """}; + +/* clang format-on */ + +always_inline hpack_huffman_group_t * +hpack_huffman_get_group (u32 value) +{ +""" +) + +index = 0 + +symbols = [ + (symbol, code[1]) for symbol, code in enumerate(huff_code_table) if code[0] == "10" +] +_, last_code = symbols[-1] +boundary = (int(last_code, 16) + 1) << 22 +f.write(" if (value < 0x%X)\n" % boundary) +f.write(" return &huff_code_table_slow[%d];\n" % index) +index += 1 + +for i in range(11, 30): + symbols = [ + (symbol, code[1]) + for symbol, code in enumerate(huff_code_table) + if code[0] == str(i) + ] + if symbols: + _, last_code = symbols[-1] + boundary = (int(last_code, 16) + 1) << (32 - i) + f.write(" else if (value < 0x%X)\n" % boundary) + f.write(" return &huff_code_table_slow[%d];\n" % index) + index += 1 + +f.write(" else\n") +f.write(" return &huff_code_table_slow[%d];\n" % index) + +f.write( + """} + +#endif /* SRC_PLUGINS_HTTP_HUFFMAN_TABLE_H_ */ +""" +) + +f.close() diff --git a/src/plugins/http/http.c b/src/plugins/http/http.c index 69b661d0611..c7eefcdae48 100644 --- a/src/plugins/http/http.c +++ b/src/plugins/http/http.c @@ -13,43 +13,59 @@ * limitations under the License. */ +#include <vpp/app/version.h> +#include <vnet/session/application_interface.h> +#include <vnet/session/application.h> + #include <http/http.h> -#include <vnet/session/session.h> +#include <http/http_private.h> #include <http/http_timer.h> -#include <http/http_status_codes.h> -#include <http/http_header_names.h> static http_main_t http_main; - -#define HTTP_FIFO_THRESH (16 << 10) - -/* HTTP state machine result */ -typedef enum http_sm_result_t_ -{ - HTTP_SM_STOP = 0, - HTTP_SM_CONTINUE = 1, - HTTP_SM_ERROR = -1, -} http_sm_result_t; +static http_engine_vft_t *http_vfts; const http_buffer_type_t msg_to_buf_type[] = { [HTTP_MSG_DATA_INLINE] = HTTP_BUFFER_FIFO, [HTTP_MSG_DATA_PTR] = HTTP_BUFFER_PTR, }; -const char *http_upgrade_proto_str[] = { "", -#define _(sym, str) str, - foreach_http_upgrade_proto -#undef _ -}; +void +http_register_engine (const http_engine_vft_t *vft, http_version_t version) +{ + vec_validate (http_vfts, version); + http_vfts[version] = *vft; +} + +int +http_v_find_index (u8 *vec, u32 offset, u32 num, char *str) +{ + int start_index = offset; + u32 slen = (u32) strnlen_s_inline (str, 16); + u32 vlen = vec_len (vec); + + ASSERT (slen > 0); -#define expect_char(c) \ - if (*p++ != c) \ - { \ - clib_warning ("unexpected character"); \ - return -1; \ + if (vlen <= slen) + return -1; + + int end_index = vlen - slen; + if (num) + { + if (num < slen) + return -1; + end_index = clib_min (end_index, offset + num - slen); } -static u8 * + for (; start_index <= end_index; start_index++) + { + if (!memcmp (vec + start_index, str, slen)) + return start_index; + } + + return -1; +} + +u8 * format_http_req_state (u8 *s, va_list *va) { http_req_state_t state = va_arg (*va, http_req_state_t); @@ -68,18 +84,7 @@ format_http_req_state (u8 *s, va_list *va) return format (s, "%s", t); } -#define http_req_state_change(_hc, _state) \ - do \ - { \ - HTTP_DBG (1, "changing http req state: %U -> %U", \ - format_http_req_state, (_hc)->req.state, \ - format_http_req_state, _state); \ - ASSERT ((_hc)->req.state != HTTP_REQ_STATE_TUNNEL); \ - (_hc)->req.state = _state; \ - } \ - while (0) - -static u8 * +u8 * format_http_conn_state (u8 *s, va_list *args) { http_conn_t *hc = va_arg (*args, http_conn_t *); @@ -98,36 +103,41 @@ format_http_conn_state (u8 *s, va_list *args) return format (s, "%s", t); } +u8 * +format_http_time_now (u8 *s, va_list *args) +{ + http_conn_t __clib_unused *hc = va_arg (*args, http_conn_t *); + http_main_t *hm = &http_main; + f64 now = clib_timebase_now (&hm->timebase); + return format (s, "%U", format_clib_timebase_time, now); +} + static inline http_worker_t * -http_worker_get (u32 thread_index) +http_worker_get (clib_thread_index_t thread_index) { return &http_main.wrk[thread_index]; } static inline u32 -http_conn_alloc_w_thread (u32 thread_index) +http_conn_alloc_w_thread (clib_thread_index_t thread_index) { http_worker_t *wrk = http_worker_get (thread_index); http_conn_t *hc; pool_get_aligned_safe (wrk->conn_pool, hc, CLIB_CACHE_LINE_BYTES); - clib_memset (hc, 0, sizeof (*hc)); - hc->c_thread_index = thread_index; - hc->h_hc_index = hc - wrk->conn_pool; - hc->h_pa_session_handle = SESSION_INVALID_HANDLE; - hc->h_tc_session_handle = SESSION_INVALID_HANDLE; - return hc->h_hc_index; + return (hc - wrk->conn_pool); } static inline http_conn_t * -http_conn_get_w_thread (u32 hc_index, u32 thread_index) +http_conn_get_w_thread (u32 hc_index, clib_thread_index_t thread_index) { http_worker_t *wrk = http_worker_get (thread_index); return pool_elt_at_index (wrk->conn_pool, hc_index); } static inline http_conn_t * -http_conn_get_w_thread_if_valid (u32 hc_index, u32 thread_index) +http_conn_get_w_thread_if_valid (u32 hc_index, + clib_thread_index_t thread_index) { http_worker_t *wrk = http_worker_get (thread_index); if (pool_is_free_index (wrk->conn_pool, hc_index)) @@ -135,13 +145,22 @@ http_conn_get_w_thread_if_valid (u32 hc_index, u32 thread_index) return pool_elt_at_index (wrk->conn_pool, hc_index); } -void +static void http_conn_free (http_conn_t *hc) { http_worker_t *wrk = http_worker_get (hc->c_thread_index); + if (CLIB_DEBUG) + memset (hc, 0xba, sizeof (*hc)); pool_put (wrk->conn_pool, hc); } +static void +http_add_postponed_ho_cleanups (u32 ho_hc_index) +{ + http_main_t *hm = &http_main; + vec_add1 (hm->postponed_ho_free, ho_hc_index); +} + static inline http_conn_t * http_ho_conn_get (u32 ho_hc_index) { @@ -149,26 +168,66 @@ http_ho_conn_get (u32 ho_hc_index) return pool_elt_at_index (hm->ho_conn_pool, ho_hc_index); } -void +static void http_ho_conn_free (http_conn_t *ho_hc) { http_main_t *hm = &http_main; + if (CLIB_DEBUG) + memset (ho_hc, 0xba, sizeof (*ho_hc)); pool_put (hm->ho_conn_pool, ho_hc); } +static void +http_ho_try_free (u32 ho_hc_index) +{ + http_conn_t *ho_hc; + HTTP_DBG (1, "half open: %x", ho_hc_index); + ho_hc = http_ho_conn_get (ho_hc_index); + if (!(ho_hc->flags & HTTP_CONN_F_HO_DONE)) + { + HTTP_DBG (1, "postponed cleanup"); + ho_hc->hc_tc_session_handle = SESSION_INVALID_HANDLE; + http_add_postponed_ho_cleanups (ho_hc_index); + return; + } + if (!(ho_hc->flags & HTTP_CONN_F_NO_APP_SESSION)) + session_half_open_delete_notify (&ho_hc->connection); + http_ho_conn_free (ho_hc); +} + +static void +http_flush_postponed_ho_cleanups () +{ + http_main_t *hm = &http_main; + u32 *ho_indexp, *tmp; + + tmp = hm->postponed_ho_free; + hm->postponed_ho_free = hm->ho_free_list; + hm->ho_free_list = tmp; + + vec_foreach (ho_indexp, hm->ho_free_list) + http_ho_try_free (*ho_indexp); + + vec_reset_length (hm->ho_free_list); +} + static inline u32 http_ho_conn_alloc (void) { http_main_t *hm = &http_main; http_conn_t *hc; + if (vec_len (hm->postponed_ho_free)) + http_flush_postponed_ho_cleanups (); + pool_get_aligned_safe (hm->ho_conn_pool, hc, CLIB_CACHE_LINE_BYTES); clib_memset (hc, 0, sizeof (*hc)); - hc->h_hc_index = hc - hm->ho_conn_pool; - hc->h_pa_session_handle = SESSION_INVALID_HANDLE; - hc->h_tc_session_handle = SESSION_INVALID_HANDLE; + hc->hc_hc_index = hc - hm->ho_conn_pool; + hc->hc_pa_session_handle = SESSION_INVALID_HANDLE; + hc->hc_tc_session_handle = SESSION_INVALID_HANDLE; hc->timeout = HTTP_CONN_TIMEOUT; - return hc->h_hc_index; + hc->version = HTTP_VERSION_NA; + return hc->hc_hc_index; } static u32 @@ -178,18 +237,19 @@ http_listener_alloc (void) http_conn_t *lhc; pool_get_zero (hm->listener_pool, lhc); - lhc->c_c_index = lhc - hm->listener_pool; + lhc->hc_hc_index = lhc - hm->listener_pool; lhc->timeout = HTTP_CONN_TIMEOUT; - return lhc->c_c_index; + lhc->version = HTTP_VERSION_NA; + return lhc->hc_hc_index; } -http_conn_t * +static http_conn_t * http_listener_get (u32 lhc_index) { return pool_elt_at_index (http_main.listener_pool, lhc_index); } -void +static void http_listener_free (http_conn_t *lhc) { http_main_t *hm = &http_main; @@ -204,7 +264,7 @@ void http_disconnect_transport (http_conn_t *hc) { vnet_disconnect_args_t a = { - .handle = hc->h_tc_session_handle, + .handle = hc->hc_tc_session_handle, .app_index = http_main.app_index, }; @@ -214,6 +274,110 @@ http_disconnect_transport (http_conn_t *hc) clib_warning ("disconnect returned"); } +void +http_shutdown_transport (http_conn_t *hc) +{ + vnet_shutdown_args_t a = { + .handle = hc->hc_tc_session_handle, + .app_index = http_main.app_index, + }; + + hc->state = HTTP_CONN_STATE_CLOSED; + + if (vnet_shutdown_session (&a)) + clib_warning ("shutdown returned"); +} + +http_status_code_t +http_sc_by_u16 (u16 status_code) +{ + http_main_t *hm = &http_main; + return hm->sc_by_u16[status_code]; +} + +u8 * +http_get_app_header_list (http_req_t *req, http_msg_t *msg) +{ + http_main_t *hm = &http_main; + session_t *as; + u8 *app_headers; + int rv; + + as = session_get_from_handle (req->hr_pa_session_handle); + + if (msg->data.type == HTTP_MSG_DATA_PTR) + { + uword app_headers_ptr; + rv = svm_fifo_dequeue (as->tx_fifo, sizeof (app_headers_ptr), + (u8 *) &app_headers_ptr); + ASSERT (rv == sizeof (app_headers_ptr)); + app_headers = uword_to_pointer (app_headers_ptr, u8 *); + } + else + { + app_headers = hm->app_header_lists[as->thread_index]; + rv = svm_fifo_dequeue (as->tx_fifo, msg->data.headers_len, app_headers); + ASSERT (rv == msg->data.headers_len); + } + + return app_headers; +} + +u8 * +http_get_app_target (http_req_t *req, http_msg_t *msg) +{ + session_t *as; + u8 *target; + int rv; + + as = session_get_from_handle (req->hr_pa_session_handle); + + if (msg->data.type == HTTP_MSG_DATA_PTR) + { + uword target_ptr; + rv = svm_fifo_dequeue (as->tx_fifo, sizeof (target_ptr), + (u8 *) &target_ptr); + ASSERT (rv == sizeof (target_ptr)); + target = uword_to_pointer (target_ptr, u8 *); + } + else + { + vec_reset_length (req->target); + vec_validate (req->target, msg->data.target_path_len - 1); + rv = + svm_fifo_dequeue (as->tx_fifo, msg->data.target_path_len, req->target); + ASSERT (rv == msg->data.target_path_len); + target = req->target; + } + return target; +} + +u8 * +http_get_tx_buf (http_conn_t *hc) +{ + http_main_t *hm = &http_main; + u8 *buf = hm->tx_bufs[hc->c_thread_index]; + vec_reset_length (buf); + return buf; +} + +u8 * +http_get_rx_buf (http_conn_t *hc) +{ + http_main_t *hm = &http_main; + u8 *buf = hm->rx_bufs[hc->c_thread_index]; + vec_reset_length (buf); + return buf; +} + +void +http_req_tx_buffer_init (http_req_t *req, http_msg_t *msg) +{ + session_t *as = session_get_from_handle (req->hr_pa_session_handle); + http_buffer_init (&req->tx_buf, msg_to_buf_type[msg->data.type], as->tx_fifo, + msg->data.body_len); +} + static void http_conn_invalidate_timer_cb (u32 hs_handle) { @@ -230,7 +394,7 @@ http_conn_invalidate_timer_cb (u32 hs_handle) } hc->timer_handle = HTTP_TIMER_HANDLE_INVALID; - hc->pending_timer = 1; + hc->flags |= HTTP_CONN_F_PENDING_TIMER; } static void @@ -250,24 +414,30 @@ http_conn_timeout_cb (void *hc_handlep) return; } - if (!hc->pending_timer) + if (!(hc->flags & HTTP_CONN_F_PENDING_TIMER)) { HTTP_DBG (1, "timer not pending"); return; } - session_transport_closing_notify (&hc->connection); + /* in case nothing received on cleartext connection before timeout */ + if (PREDICT_FALSE (hc->version != HTTP_VERSION_NA)) + http_vfts[hc->version].transport_close_callback (hc); http_disconnect_transport (hc); } +/*************************/ +/* session VFT callbacks */ +/*************************/ + int http_ts_accept_callback (session_t *ts) { - session_t *ts_listener, *as, *asl; - app_worker_t *app_wrk; + session_t *ts_listener; http_conn_t *lhc, *hc; u32 hc_index, thresh; - int rv; + http_conn_handle_t hc_handle; + transport_proto_t tp; ts_listener = listen_session_get_from_handle (ts->listener_handle); lhc = http_listener_get (ts_listener->opaque); @@ -277,61 +447,35 @@ http_ts_accept_callback (session_t *ts) clib_memcpy_fast (hc, lhc, sizeof (*lhc)); hc->timer_handle = HTTP_TIMER_HANDLE_INVALID; hc->c_thread_index = ts->thread_index; - hc->h_hc_index = hc_index; - - hc->h_tc_session_handle = session_handle (ts); + hc->hc_hc_index = hc_index; + hc->flags |= HTTP_CONN_F_NO_APP_SESSION; + hc->hc_tc_session_handle = session_handle (ts); hc->c_flags |= TRANSPORT_CONNECTION_F_NO_LOOKUP; - hc->state = HTTP_CONN_STATE_ESTABLISHED; - http_req_state_change (hc, HTTP_REQ_STATE_WAIT_TRANSPORT_METHOD); ts->session_state = SESSION_STATE_READY; - ts->opaque = hc_index; - - /* - * Alloc session and initialize - */ - as = session_alloc (hc->c_thread_index); - hc->c_s_index = as->session_index; - - as->app_wrk_index = hc->h_pa_wrk_index; - as->connection_index = hc->c_c_index; - as->session_state = SESSION_STATE_ACCEPTING; - - asl = listen_session_get_from_handle (lhc->h_pa_session_handle); - as->session_type = asl->session_type; - as->listener_handle = lhc->h_pa_session_handle; - - /* - * Init session fifos and notify app - */ - if ((rv = app_worker_init_accepted (as))) + tp = session_get_transport_proto (ts); + if (tp == TRANSPORT_PROTO_TLS) { - HTTP_DBG (1, "failed to allocate fifos"); - hc->h_pa_session_handle = SESSION_INVALID_HANDLE; - session_free (as); - return rv; + /* TODO: set by ALPN result */ + hc->version = HTTP_VERSION_1; } - - hc->h_pa_session_handle = session_handle (as); - hc->h_pa_wrk_index = as->app_wrk_index; - app_wrk = app_worker_get (as->app_wrk_index); + else + { + /* going to decide in http_ts_rx_callback */ + hc->version = HTTP_VERSION_NA; + } + hc_handle.version = hc->version; + hc_handle.conn_index = hc_index; + ts->opaque = hc_handle.as_u32; HTTP_DBG (1, "Accepted on listener %u new connection [%u]%x", ts_listener->opaque, vlib_get_thread_index (), hc_index); - if ((rv = app_worker_accept_notify (app_wrk, as))) - { - HTTP_DBG (0, "app accept returned"); - session_free (as); - return rv; - } - /* Avoid enqueuing small chunks of data on transport tx notifications. If * the fifo is small (under 16K) we set the threshold to it's size, meaning * a notification will be given when the fifo empties. */ - ts = session_get_from_handle (hc->h_tc_session_handle); thresh = clib_min (svm_fifo_size (ts->tx_fifo), HTTP_FIFO_THRESH); svm_fifo_set_deq_thresh (ts->tx_fifo, thresh); @@ -345,9 +489,9 @@ http_ts_connected_callback (u32 http_app_index, u32 ho_hc_index, session_t *ts, session_error_t err) { u32 new_hc_index; - session_t *as; http_conn_t *hc, *ho_hc; app_worker_t *app_wrk; + http_conn_handle_t hc_handle; int rv; ho_hc = http_ho_conn_get (ho_hc_index); @@ -357,9 +501,10 @@ http_ts_connected_callback (u32 http_app_index, u32 ho_hc_index, session_t *ts, { clib_warning ("half-open hc index %d, error: %U", ho_hc_index, format_session_error, err); - app_wrk = app_worker_get_if_valid (ho_hc->h_pa_wrk_index); + ho_hc->flags |= HTTP_CONN_F_HO_DONE; + app_wrk = app_worker_get_if_valid (ho_hc->hc_pa_wrk_index); if (app_wrk) - app_worker_connect_notify (app_wrk, 0, err, ho_hc->h_pa_app_api_ctx); + app_worker_connect_notify (app_wrk, 0, err, ho_hc->hc_pa_app_api_ctx); return 0; } @@ -368,46 +513,31 @@ http_ts_connected_callback (u32 http_app_index, u32 ho_hc_index, session_t *ts, clib_memcpy_fast (hc, ho_hc, sizeof (*hc)); + /* in chain with TLS there is race on half-open cleanup */ + __atomic_fetch_or (&ho_hc->flags, HTTP_CONN_F_HO_DONE, __ATOMIC_RELEASE); + hc->timer_handle = HTTP_TIMER_HANDLE_INVALID; hc->c_thread_index = ts->thread_index; - hc->h_tc_session_handle = session_handle (ts); - hc->c_c_index = new_hc_index; + hc->hc_tc_session_handle = session_handle (ts); + hc->hc_hc_index = new_hc_index; hc->c_flags |= TRANSPORT_CONNECTION_F_NO_LOOKUP; hc->state = HTTP_CONN_STATE_ESTABLISHED; - http_req_state_change (hc, HTTP_REQ_STATE_WAIT_APP_METHOD); - ts->session_state = SESSION_STATE_READY; - ts->opaque = new_hc_index; - - /* allocate app session and initialize */ - - as = session_alloc (hc->c_thread_index); - hc->c_s_index = as->session_index; - as->connection_index = hc->c_c_index; - as->app_wrk_index = hc->h_pa_wrk_index; - as->session_state = SESSION_STATE_READY; - as->opaque = hc->h_pa_app_api_ctx; - as->session_type = session_type_from_proto_and_ip ( - TRANSPORT_PROTO_HTTP, session_type_is_ip4 (ts->session_type)); + hc->flags |= HTTP_CONN_F_NO_APP_SESSION; + /* TODO: TLS set by ALPN result, TCP: prior knowledge (set in ho) */ + hc_handle.version = hc->version; + hc_handle.conn_index = new_hc_index; + ts->opaque = hc_handle.as_u32; HTTP_DBG (1, "half-open hc index %x, hc [%u]%x", ho_hc_index, ts->thread_index, new_hc_index); - app_wrk = app_worker_get (hc->h_pa_wrk_index); - if (!app_wrk) + if ((rv = http_vfts[hc->version].transport_connected_callback (hc))) { - clib_warning ("no app worker"); - return -1; - } - - if ((rv = app_worker_init_connected (app_wrk, as))) - { - HTTP_DBG (1, "failed to allocate fifos"); - session_free (as); + clib_warning ("transport_connected_callback failed, rv=%d", rv); return rv; } - app_worker_connect_notify (app_wrk, as, err, hc->h_pa_app_api_ctx); - hc->h_pa_session_handle = session_handle (as); + http_conn_timer_start (hc); return 0; @@ -417,1832 +547,154 @@ static void http_ts_disconnect_callback (session_t *ts) { http_conn_t *hc; + http_conn_handle_t hc_handle; + + hc_handle.as_u32 = ts->opaque; - hc = http_conn_get_w_thread (ts->opaque, ts->thread_index); + HTTP_DBG (1, "hc [%u]%x", ts->thread_index, hc_handle.conn_index); + + hc = http_conn_get_w_thread (hc_handle.conn_index, ts->thread_index); if (hc->state < HTTP_CONN_STATE_TRANSPORT_CLOSED) hc->state = HTTP_CONN_STATE_TRANSPORT_CLOSED; - /* Nothing more to rx, propagate to app */ - if (!svm_fifo_max_dequeue_cons (ts->rx_fifo)) - session_transport_closing_notify (&hc->connection); + /* in case peer close cleartext connection before send something */ + if (PREDICT_FALSE (hc->version == HTTP_VERSION_NA)) + return; + + http_vfts[hc->version].transport_close_callback (hc); } static void http_ts_reset_callback (session_t *ts) { http_conn_t *hc; + http_conn_handle_t hc_handle; - hc = http_conn_get_w_thread (ts->opaque, ts->thread_index); - - hc->state = HTTP_CONN_STATE_CLOSED; - http_buffer_free (&hc->req.tx_buf); - http_req_state_change (hc, HTTP_REQ_STATE_WAIT_TRANSPORT_METHOD); - session_transport_reset_notify (&hc->connection); - - http_disconnect_transport (hc); -} - -/** - * http error boilerplate - */ -static const char *http_error_template = "HTTP/1.1 %s\r\n" - "Date: %U GMT\r\n" - "Connection: close\r\n" - "Content-Length: 0\r\n\r\n"; - -/** - * http response boilerplate - */ -static const char *http_response_template = "HTTP/1.1 %s\r\n" - "Date: %U GMT\r\n" - "Server: %v\r\n"; - -static const char *content_len_template = "Content-Length: %llu\r\n"; - -static const char *connection_upgrade_template = "Connection: upgrade\r\n" - "Upgrade: %s\r\n"; - -/** - * http request boilerplate - */ -static const char *http_get_request_template = "GET %s HTTP/1.1\r\n" - "Host: %v\r\n" - "User-Agent: %v\r\n"; - -static const char *http_post_request_template = "POST %s HTTP/1.1\r\n" - "Host: %v\r\n" - "User-Agent: %v\r\n" - "Content-Length: %llu\r\n"; - -static u32 -http_send_data (http_conn_t *hc, u8 *data, u32 length) -{ - const u32 max_burst = 64 << 10; - session_t *ts; - u32 to_send; - int rv; - - ts = session_get_from_handle (hc->h_tc_session_handle); + hc_handle.as_u32 = ts->opaque; - to_send = clib_min (length, max_burst); - rv = svm_fifo_enqueue (ts->tx_fifo, to_send, data); - if (rv <= 0) - { - clib_warning ("svm_fifo_enqueue failed, rv %d", rv); - return 0; - } + HTTP_DBG (1, "hc [%u]%x", ts->thread_index, hc_handle.conn_index); - if (svm_fifo_set_event (ts->tx_fifo)) - session_program_tx_io_evt (ts->handle, SESSION_IO_EVT_TX); + hc = http_conn_get_w_thread (hc_handle.conn_index, ts->thread_index); - return rv; -} + hc->state = HTTP_CONN_STATE_CLOSED; + /* in case peer reset cleartext connection before send something */ + if (PREDICT_FALSE (hc->version != HTTP_VERSION_NA)) + http_vfts[hc->version].transport_reset_callback (hc); -static void -http_send_error (http_conn_t *hc, http_status_code_t ec) -{ - http_main_t *hm = &http_main; - u8 *data; - f64 now; - - if (ec >= HTTP_N_STATUS) - ec = HTTP_STATUS_INTERNAL_ERROR; - - now = clib_timebase_now (&hm->timebase); - data = format (0, http_error_template, http_status_code_str[ec], - format_clib_timebase_time, now); - HTTP_DBG (3, "%v", data); - http_send_data (hc, data, vec_len (data)); - vec_free (data); + http_disconnect_transport (hc); } static int -http_read_message (http_conn_t *hc) +http_ts_rx_callback (session_t *ts) { + http_conn_t *hc; + http_conn_handle_t hc_handle; u32 max_deq; - session_t *ts; - int n_read; - - ts = session_get_from_handle (hc->h_tc_session_handle); - - max_deq = svm_fifo_max_dequeue (ts->rx_fifo); - if (PREDICT_FALSE (max_deq == 0)) - return -1; - - vec_validate (hc->req.rx_buf, max_deq - 1); - n_read = svm_fifo_peek (ts->rx_fifo, 0, max_deq, hc->req.rx_buf); - ASSERT (n_read == max_deq); - HTTP_DBG (1, "read %u bytes from rx_fifo", n_read); - - return 0; -} - -static void -http_read_message_drop (http_conn_t *hc, u32 len) -{ - session_t *ts; - - ts = session_get_from_handle (hc->h_tc_session_handle); - svm_fifo_dequeue_drop (ts->rx_fifo, len); - vec_reset_length (hc->req.rx_buf); + u8 *rx_buf; - if (svm_fifo_is_empty (ts->rx_fifo)) - svm_fifo_unset_event (ts->rx_fifo); -} - -static void -http_read_message_drop_all (http_conn_t *hc) -{ - session_t *ts; - - ts = session_get_from_handle (hc->h_tc_session_handle); - svm_fifo_dequeue_drop_all (ts->rx_fifo); - vec_reset_length (hc->req.rx_buf); - - if (svm_fifo_is_empty (ts->rx_fifo)) - svm_fifo_unset_event (ts->rx_fifo); -} - -/** - * @brief Find the first occurrence of the string in the vector. - * - * @param vec The vector to be scanned. - * @param offset Search offset in the vector. - * @param num Maximum number of characters to be searched if non-zero. - * @param str The string to be searched. - * - * @return @c -1 if the string is not found within the vector; index otherwise. - */ -static inline int -v_find_index (u8 *vec, u32 offset, u32 num, char *str) -{ - int start_index = offset; - u32 slen = (u32) strnlen_s_inline (str, 16); - u32 vlen = vec_len (vec); - - ASSERT (slen > 0); - - if (vlen <= slen) - return -1; - - int end_index = vlen - slen; - if (num) - { - if (num < slen) - return -1; - end_index = clib_min (end_index, offset + num - slen); - } - - for (; start_index <= end_index; start_index++) - { - if (!memcmp (vec + start_index, str, slen)) - return start_index; - } - - return -1; -} - -static void -http_identify_optional_query (http_req_t *req) -{ - int i; - for (i = req->target_path_offset; - i < (req->target_path_offset + req->target_path_len); i++) - { - if (req->rx_buf[i] == '?') - { - req->target_query_offset = i + 1; - req->target_query_len = req->target_path_offset + - req->target_path_len - - req->target_query_offset; - req->target_path_len = - req->target_path_len - req->target_query_len - 1; - break; - } - } -} - -static int -http_parse_target (http_req_t *req) -{ - int i; - u8 *p, *end; - - /* asterisk-form = "*" */ - if ((req->rx_buf[req->target_path_offset] == '*') && - (req->target_path_len == 1)) - { - req->target_form = HTTP_TARGET_ASTERISK_FORM; - /* we do not support OPTIONS request */ - return -1; - } - - /* origin-form = 1*( "/" segment ) [ "?" query ] */ - if (req->rx_buf[req->target_path_offset] == '/') - { - /* drop leading slash */ - req->target_path_len--; - req->target_path_offset++; - req->target_form = HTTP_TARGET_ORIGIN_FORM; - http_identify_optional_query (req); - /* can't be CONNECT method */ - return req->method == HTTP_REQ_CONNECT ? -1 : 0; - } - - /* absolute-form = - * scheme "://" host [ ":" port ] *( "/" segment ) [ "?" query ] */ - if (req->target_path_len > 8 && - !memcmp (req->rx_buf + req->target_path_offset, "http", 4)) - { - req->scheme = HTTP_URL_SCHEME_HTTP; - p = req->rx_buf + req->target_path_offset + 4; - if (*p == 's') - { - p++; - req->scheme = HTTP_URL_SCHEME_HTTPS; - } - if (*p++ == ':') - { - expect_char ('/'); - expect_char ('/'); - req->target_form = HTTP_TARGET_ABSOLUTE_FORM; - req->target_authority_offset = p - req->rx_buf; - req->target_authority_len = 0; - end = req->rx_buf + req->target_path_offset + req->target_path_len; - while (p < end) - { - if (*p == '/') - { - p++; /* drop leading slash */ - req->target_path_offset = p - req->rx_buf; - req->target_path_len = end - p; - break; - } - req->target_authority_len++; - p++; - } - if (!req->target_path_len) - { - clib_warning ("zero length host"); - return -1; - } - http_identify_optional_query (req); - /* can't be CONNECT method */ - return req->method == HTTP_REQ_CONNECT ? -1 : 0; - } - } - - /* authority-form = host ":" port */ - for (i = req->target_path_offset; - i < (req->target_path_offset + req->target_path_len); i++) - { - if ((req->rx_buf[i] == ':') && (isdigit (req->rx_buf[i + 1]))) - { - req->target_authority_len = req->target_path_len; - req->target_path_len = 0; - req->target_authority_offset = req->target_path_offset; - req->target_path_offset = 0; - req->target_form = HTTP_TARGET_AUTHORITY_FORM; - /* "authority-form" is only used for CONNECT requests */ - return req->method == HTTP_REQ_CONNECT ? 0 : -1; - } - } - - return -1; -} + hc_handle.as_u32 = ts->opaque; -static int -http_parse_request_line (http_req_t *req, http_status_code_t *ec) -{ - int i, target_len; - u32 next_line_offset, method_offset; + HTTP_DBG (1, "hc [%u]%x", ts->thread_index, hc_handle.conn_index); - /* request-line = method SP request-target SP HTTP-version CRLF */ - i = v_find_index (req->rx_buf, 8, 0, "\r\n"); - if (i < 0) - { - clib_warning ("request line incomplete"); - *ec = HTTP_STATUS_BAD_REQUEST; - return -1; - } - HTTP_DBG (2, "request line length: %d", i); - req->control_data_len = i + 2; - next_line_offset = req->control_data_len; - - /* there should be at least one more CRLF */ - if (vec_len (req->rx_buf) < (next_line_offset + 2)) - { - clib_warning ("malformed message, too short"); - *ec = HTTP_STATUS_BAD_REQUEST; - return -1; - } + hc = http_conn_get_w_thread (hc_handle.conn_index, ts->thread_index); - /* - * RFC9112 2.2: - * In the interest of robustness, a server that is expecting to receive and - * parse a request-line SHOULD ignore at least one empty line (CRLF) - * received prior to the request-line. - */ - method_offset = req->rx_buf[0] == '\r' && req->rx_buf[1] == '\n' ? 2 : 0; - /* parse method */ - if (!memcmp (req->rx_buf + method_offset, "GET ", 4)) - { - HTTP_DBG (0, "GET method"); - req->method = HTTP_REQ_GET; - req->target_path_offset = method_offset + 4; - } - else if (!memcmp (req->rx_buf + method_offset, "POST ", 5)) - { - HTTP_DBG (0, "POST method"); - req->method = HTTP_REQ_POST; - req->target_path_offset = method_offset + 5; - } - else if (!memcmp (req->rx_buf + method_offset, "CONNECT ", 8)) - { - HTTP_DBG (0, "CONNECT method"); - req->method = HTTP_REQ_CONNECT; - req->upgrade_proto = HTTP_UPGRADE_PROTO_NA; - req->target_path_offset = method_offset + 8; - req->is_tunnel = 1; - } - else - { - if (req->rx_buf[method_offset] - 'A' <= 'Z' - 'A') - { - clib_warning ("method not implemented: %8v", req->rx_buf); - *ec = HTTP_STATUS_NOT_IMPLEMENTED; - return -1; - } - else - { - clib_warning ("not method name: %8v", req->rx_buf); - *ec = HTTP_STATUS_BAD_REQUEST; - return -1; - } - } - - /* find version */ - i = v_find_index (req->rx_buf, next_line_offset - 11, 11, " HTTP/"); - if (i < 0) - { - clib_warning ("HTTP version not present"); - *ec = HTTP_STATUS_BAD_REQUEST; - return -1; - } - /* verify major version */ - if (isdigit (req->rx_buf[i + 6])) - { - if (req->rx_buf[i + 6] != '1') - { - clib_warning ("HTTP major version '%c' not supported", - req->rx_buf[i + 6]); - *ec = HTTP_STATUS_HTTP_VERSION_NOT_SUPPORTED; - return -1; - } - } - else - { - clib_warning ("HTTP major version '%c' is not digit", - req->rx_buf[i + 6]); - *ec = HTTP_STATUS_BAD_REQUEST; - return -1; - } - - /* parse request-target */ - HTTP_DBG (2, "http at %d", i); - target_len = i - req->target_path_offset; - HTTP_DBG (2, "target_len %d", target_len); - if (target_len < 1) - { - clib_warning ("request-target not present"); - *ec = HTTP_STATUS_BAD_REQUEST; - return -1; - } - req->target_path_len = target_len; - req->target_query_offset = 0; - req->target_query_len = 0; - req->target_authority_len = 0; - req->target_authority_offset = 0; - if (http_parse_target (req)) - { - clib_warning ("invalid target"); - *ec = HTTP_STATUS_BAD_REQUEST; - return -1; - } - HTTP_DBG (2, "request-target path length: %u", req->target_path_len); - HTTP_DBG (2, "request-target path offset: %u", req->target_path_offset); - HTTP_DBG (2, "request-target query length: %u", req->target_query_len); - HTTP_DBG (2, "request-target query offset: %u", req->target_query_offset); - - /* set buffer offset to nex line start */ - req->rx_buf_offset = next_line_offset; - - return 0; -} - -#define parse_int(val, mul) \ - do \ - { \ - if (!isdigit (*p)) \ - { \ - clib_warning ("expected digit"); \ - return -1; \ - } \ - val += mul * (*p++ - '0'); \ - } \ - while (0) - -static int -http_parse_status_line (http_req_t *req) -{ - int i; - u32 next_line_offset; - u8 *p, *end; - u16 status_code = 0; - http_main_t *hm = &http_main; - - i = v_find_index (req->rx_buf, 0, 0, "\r\n"); - /* status-line = HTTP-version SP status-code SP [ reason-phrase ] CRLF */ - if (i < 0) - { - clib_warning ("status line incomplete"); - return -1; - } - HTTP_DBG (2, "status line length: %d", i); - if (i < 12) - { - clib_warning ("status line too short (%d)", i); - return -1; - } - req->control_data_len = i + 2; - next_line_offset = req->control_data_len; - p = req->rx_buf; - end = req->rx_buf + i; - - /* there should be at least one more CRLF */ - if (vec_len (req->rx_buf) < (next_line_offset + 2)) - { - clib_warning ("malformed message, too short"); - return -1; - } - - /* parse version */ - expect_char ('H'); - expect_char ('T'); - expect_char ('T'); - expect_char ('P'); - expect_char ('/'); - expect_char ('1'); - expect_char ('.'); - if (!isdigit (*p++)) - { - clib_warning ("invalid HTTP minor version"); - return -1; - } - - /* skip space(s) */ - if (*p != ' ') - { - clib_warning ("no space after HTTP version"); - return -1; - } - do - { - p++; - if (p == end) - { - clib_warning ("no status code"); - return -1; - } - } - while (*p == ' '); - - /* parse status code */ - if ((end - p) < 3) - { - clib_warning ("not enough characters for status code"); - return -1; - } - parse_int (status_code, 100); - parse_int (status_code, 10); - parse_int (status_code, 1); - if (status_code < 100 || status_code > 599) - { - clib_warning ("invalid status code %d", status_code); - return -1; - } - req->status_code = hm->sc_by_u16[status_code]; - HTTP_DBG (0, "status code: %d", status_code); - - /* set buffer offset to nex line start */ - req->rx_buf_offset = next_line_offset; - - return 0; -} - -static int -http_identify_headers (http_req_t *req, http_status_code_t *ec) -{ - int rv; - u8 *p, *end, *name_start, *value_start; - u32 name_len, value_len; - http_field_line_t *field_line; - uword header_index; - - vec_reset_length (req->headers); - req->content_len_header_index = ~0; - req->connection_header_index = ~0; - req->upgrade_header_index = ~0; - req->host_header_index = ~0; - req->headers_offset = req->rx_buf_offset; - - /* check if we have any header */ - if ((req->rx_buf[req->rx_buf_offset] == '\r') && - (req->rx_buf[req->rx_buf_offset + 1] == '\n')) - { - /* just another CRLF -> no headers */ - HTTP_DBG (2, "no headers"); - req->headers_len = 0; - req->control_data_len += 2; - return 0; - } - - end = req->rx_buf + vec_len (req->rx_buf); - p = req->rx_buf + req->rx_buf_offset; - - while (1) - { - rv = _parse_field_name (&p, end, &name_start, &name_len); - if (rv != 0) - { - *ec = HTTP_STATUS_BAD_REQUEST; - return -1; - } - rv = _parse_field_value (&p, end, &value_start, &value_len); - if (rv != 0 || (end - p) < 2) - { - *ec = HTTP_STATUS_BAD_REQUEST; - return -1; - } - - vec_add2 (req->headers, field_line, 1); - field_line->name_offset = - (name_start - req->rx_buf) - req->headers_offset; - field_line->name_len = name_len; - field_line->value_offset = - (value_start - req->rx_buf) - req->headers_offset; - field_line->value_len = value_len; - header_index = field_line - req->headers; - - /* find headers that will be used later in preprocessing */ - /* names are case-insensitive (RFC9110 section 5.1) */ - if (req->content_len_header_index == ~0 && - http_token_is_case ( - (const char *) name_start, name_len, - http_header_name_token (HTTP_HEADER_CONTENT_LENGTH))) - req->content_len_header_index = header_index; - else if (req->connection_header_index == ~0 && - http_token_is_case ( - (const char *) name_start, name_len, - http_header_name_token (HTTP_HEADER_CONNECTION))) - req->connection_header_index = header_index; - else if (req->upgrade_header_index == ~0 && - http_token_is_case ( - (const char *) name_start, name_len, - http_header_name_token (HTTP_HEADER_UPGRADE))) - req->upgrade_header_index = header_index; - else if (req->host_header_index == ~0 && - http_token_is_case ((const char *) name_start, name_len, - http_header_name_token (HTTP_HEADER_HOST))) - req->host_header_index = header_index; - - /* are we done? */ - if (*p == '\r' && *(p + 1) == '\n') - break; - } - - req->headers_len = p - (req->rx_buf + req->headers_offset); - req->control_data_len += (req->headers_len + 2); - HTTP_DBG (2, "headers length: %u", req->headers_len); - HTTP_DBG (2, "headers offset: %u", req->headers_offset); - - return 0; -} - -static int -http_identify_message_body (http_req_t *req, http_status_code_t *ec) -{ - int i; - u8 *p; - u64 body_len = 0, digit; - http_field_line_t *field_line; - - req->body_len = 0; - - if (req->headers_len == 0) - { - HTTP_DBG (2, "no header, no message-body"); - return 0; - } - if (req->is_tunnel) - { - HTTP_DBG (2, "tunnel, no message-body"); - return 0; - } - - /* TODO check for chunked transfer coding */ - - if (req->content_len_header_index == ~0) + if (hc->state == HTTP_CONN_STATE_CLOSED) { - HTTP_DBG (2, "Content-Length header not present, no message-body"); + HTTP_DBG (1, "conn closed"); + svm_fifo_dequeue_drop_all (ts->rx_fifo); return 0; } - field_line = vec_elt_at_index (req->headers, req->content_len_header_index); - p = req->rx_buf + req->headers_offset + field_line->value_offset; - for (i = 0; i < field_line->value_len; i++) + if (hc_handle.version == HTTP_VERSION_NA) { - /* check for digit */ - if (!isdigit (*p)) - { - clib_warning ("expected digit"); - *ec = HTTP_STATUS_BAD_REQUEST; - return -1; - } - digit = *p - '0'; - u64 new_body_len = body_len * 10 + digit; - /* check for overflow */ - if (new_body_len < body_len) + HTTP_DBG (1, "unknown http version"); + max_deq = svm_fifo_max_dequeue_cons (ts->rx_fifo); + if (max_deq >= http2_conn_preface.len) { - clib_warning ("too big number, overflow"); - *ec = HTTP_STATUS_BAD_REQUEST; - return -1; - } - body_len = new_body_len; - p++; - } - - req->body_len = body_len; - - req->body_offset = req->headers_offset + req->headers_len + 2; - HTTP_DBG (2, "body length: %llu", req->body_len); - HTTP_DBG (2, "body offset: %u", req->body_offset); - - return 0; -} - -static http_sm_result_t -http_req_state_wait_transport_reply (http_conn_t *hc, - transport_send_params_t *sp) -{ - int rv; - http_msg_t msg = {}; - app_worker_t *app_wrk; - session_t *as; - u32 len, max_enq, body_sent; - http_status_code_t ec; - - rv = http_read_message (hc); - - /* Nothing yet, wait for data or timer expire */ - if (rv) - { - HTTP_DBG (1, "no data to deq"); - return HTTP_SM_STOP; - } - - HTTP_DBG (3, "%v", hc->req.rx_buf); - - if (vec_len (hc->req.rx_buf) < 8) - { - clib_warning ("response buffer too short"); - goto error; - } - - rv = http_parse_status_line (&hc->req); - if (rv) - goto error; - - rv = http_identify_headers (&hc->req, &ec); - if (rv) - goto error; - - rv = http_identify_message_body (&hc->req, &ec); - if (rv) - goto error; - - /* send at least "control data" which is necessary minimum, - * if there is some space send also portion of body */ - as = session_get_from_handle (hc->h_pa_session_handle); - max_enq = svm_fifo_max_enqueue (as->rx_fifo); - max_enq -= sizeof (msg); - if (max_enq < hc->req.control_data_len) - { - clib_warning ("not enough room for control data in app's rx fifo"); - goto error; - } - len = clib_min (max_enq, vec_len (hc->req.rx_buf)); - - msg.type = HTTP_MSG_REPLY; - msg.code = hc->req.status_code; - msg.data.headers_offset = hc->req.headers_offset; - msg.data.headers_len = hc->req.headers_len; - msg.data.body_offset = hc->req.body_offset; - msg.data.body_len = hc->req.body_len; - msg.data.type = HTTP_MSG_DATA_INLINE; - msg.data.len = len; - msg.data.headers_ctx = pointer_to_uword (hc->req.headers); - - svm_fifo_seg_t segs[2] = { { (u8 *) &msg, sizeof (msg) }, - { hc->req.rx_buf, len } }; - - rv = svm_fifo_enqueue_segments (as->rx_fifo, segs, 2, 0 /* allow partial */); - ASSERT (rv == (sizeof (msg) + len)); - - http_read_message_drop (hc, len); - - body_sent = len - hc->req.control_data_len; - hc->req.to_recv = hc->req.body_len - body_sent; - if (hc->req.to_recv == 0) - { - /* all sent, we are done */ - http_req_state_change (hc, HTTP_REQ_STATE_WAIT_APP_METHOD); - } - else - { - /* stream rest of the response body */ - http_req_state_change (hc, HTTP_REQ_STATE_TRANSPORT_IO_MORE_DATA); - } - - app_wrk = app_worker_get_if_valid (as->app_wrk_index); - if (app_wrk) - app_worker_rx_notify (app_wrk, as); - return HTTP_SM_STOP; - -error: - http_read_message_drop_all (hc); - session_transport_closing_notify (&hc->connection); - session_transport_closed_notify (&hc->connection); - http_disconnect_transport (hc); - return HTTP_SM_ERROR; -} - -#define http_field_line_value_token(_fl, _req) \ - (const char *) ((_req)->rx_buf + (_req)->headers_offset + \ - (_fl)->value_offset), \ - (_fl)->value_len - -static void -http_check_connection_upgrade (http_req_t *req) -{ - http_field_line_t *connection, *upgrade; - u8 skip; - - skip = (req->method != HTTP_REQ_GET) + (req->connection_header_index == ~0) + - (req->upgrade_header_index == ~0); - if (skip) - return; - - connection = vec_elt_at_index (req->headers, req->connection_header_index); - /* connection options are case-insensitive (RFC9110 7.6.1) */ - if (http_token_is_case (http_field_line_value_token (connection, req), - http_token_lit ("upgrade"))) - { - upgrade = vec_elt_at_index (req->headers, req->upgrade_header_index); - - /* check upgrade protocol, we want to ignore something like upgrade to - * newer HTTP version, only tunnels are supported */ - if (0) - ; -#define _(sym, str) \ - else if (http_token_is_case (http_field_line_value_token (upgrade, req), \ - http_token_lit (str))) req->upgrade_proto = \ - HTTP_UPGRADE_PROTO_##sym; - foreach_http_upgrade_proto -#undef _ - else return; - - HTTP_DBG (1, "connection upgrade: %U", format_http_bytes, - req->rx_buf + req->headers_offset + upgrade->value_offset, - upgrade->value_len); - req->is_tunnel = 1; - req->method = HTTP_REQ_CONNECT; - } -} - -static void -http_target_fixup (http_conn_t *hc) -{ - http_field_line_t *host; - - if (hc->req.target_form == HTTP_TARGET_ABSOLUTE_FORM) - return; - - /* scheme fixup */ - hc->req.scheme = session_get_transport_proto (session_get_from_handle ( - hc->h_tc_session_handle)) == TRANSPORT_PROTO_TLS ? - HTTP_URL_SCHEME_HTTPS : - HTTP_URL_SCHEME_HTTP; - - if (hc->req.target_form == HTTP_TARGET_AUTHORITY_FORM || - hc->req.connection_header_index == ~0) - return; - - /* authority fixup */ - host = vec_elt_at_index (hc->req.headers, hc->req.connection_header_index); - hc->req.target_authority_offset = host->value_offset; - hc->req.target_authority_len = host->value_len; -} - -static http_sm_result_t -http_req_state_wait_transport_method (http_conn_t *hc, - transport_send_params_t *sp) -{ - http_status_code_t ec; - app_worker_t *app_wrk; - http_msg_t msg; - session_t *as; - int rv; - u32 len, max_enq, body_sent; - u64 max_deq; - - rv = http_read_message (hc); - - /* Nothing yet, wait for data or timer expire */ - if (rv) - return HTTP_SM_STOP; - - HTTP_DBG (3, "%v", hc->req.rx_buf); - - if (vec_len (hc->req.rx_buf) < 8) - { - ec = HTTP_STATUS_BAD_REQUEST; - goto error; - } - - rv = http_parse_request_line (&hc->req, &ec); - if (rv) - goto error; - - rv = http_identify_headers (&hc->req, &ec); - if (rv) - goto error; - - http_target_fixup (hc); - http_check_connection_upgrade (&hc->req); - - rv = http_identify_message_body (&hc->req, &ec); - if (rv) - goto error; - - /* send at least "control data" which is necessary minimum, - * if there is some space send also portion of body */ - as = session_get_from_handle (hc->h_pa_session_handle); - max_enq = svm_fifo_max_enqueue (as->rx_fifo); - if (max_enq < hc->req.control_data_len) - { - clib_warning ("not enough room for control data in app's rx fifo"); - ec = HTTP_STATUS_INTERNAL_ERROR; - goto error; - } - /* do not dequeue more than one HTTP request, we do not support pipelining */ - max_deq = clib_min (hc->req.control_data_len + hc->req.body_len, - vec_len (hc->req.rx_buf)); - len = clib_min (max_enq, max_deq); - - msg.type = HTTP_MSG_REQUEST; - msg.method_type = hc->req.method; - msg.data.type = HTTP_MSG_DATA_INLINE; - msg.data.len = len; - msg.data.scheme = hc->req.scheme; - msg.data.target_authority_offset = hc->req.target_authority_offset; - msg.data.target_authority_len = hc->req.target_authority_len; - msg.data.target_path_offset = hc->req.target_path_offset; - msg.data.target_path_len = hc->req.target_path_len; - msg.data.target_query_offset = hc->req.target_query_offset; - msg.data.target_query_len = hc->req.target_query_len; - msg.data.headers_offset = hc->req.headers_offset; - msg.data.headers_len = hc->req.headers_len; - msg.data.body_offset = hc->req.body_offset; - msg.data.body_len = hc->req.body_len; - msg.data.headers_ctx = pointer_to_uword (hc->req.headers); - msg.data.upgrade_proto = hc->req.upgrade_proto; - - svm_fifo_seg_t segs[2] = { { (u8 *) &msg, sizeof (msg) }, - { hc->req.rx_buf, len } }; - - rv = svm_fifo_enqueue_segments (as->rx_fifo, segs, 2, 0 /* allow partial */); - ASSERT (rv == (sizeof (msg) + len)); - - body_sent = len - hc->req.control_data_len; - hc->req.to_recv = hc->req.body_len - body_sent; - if (hc->req.to_recv == 0) - { - /* drop everything, we do not support pipelining */ - http_read_message_drop_all (hc); - /* all sent, we are done */ - http_req_state_change (hc, HTTP_REQ_STATE_WAIT_APP_REPLY); - } - else - { - http_read_message_drop (hc, len); - /* stream rest of the response body */ - http_req_state_change (hc, HTTP_REQ_STATE_TRANSPORT_IO_MORE_DATA); - } - - app_wrk = app_worker_get_if_valid (as->app_wrk_index); - if (app_wrk) - app_worker_rx_notify (app_wrk, as); - - return HTTP_SM_STOP; - -error: - http_read_message_drop_all (hc); - http_send_error (hc, ec); - session_transport_closing_notify (&hc->connection); - http_disconnect_transport (hc); - - return HTTP_SM_ERROR; -} - -static void -http_write_app_headers (http_conn_t *hc, http_msg_t *msg, u8 **tx_buf) -{ - http_main_t *hm = &http_main; - session_t *as; - u8 *app_headers, *p, *end; - u32 *tmp; - int rv; - - as = session_get_from_handle (hc->h_pa_session_handle); - - /* read app header list */ - if (msg->data.type == HTTP_MSG_DATA_PTR) - { - uword app_headers_ptr; - rv = svm_fifo_dequeue (as->tx_fifo, sizeof (app_headers_ptr), - (u8 *) &app_headers_ptr); - ASSERT (rv == sizeof (app_headers_ptr)); - app_headers = uword_to_pointer (app_headers_ptr, u8 *); - } - else - { - app_headers = hm->app_header_lists[hc->c_thread_index]; - rv = svm_fifo_dequeue (as->tx_fifo, msg->data.headers_len, app_headers); - ASSERT (rv == msg->data.headers_len); - } - - /* serialize app headers to tx_buf */ - end = app_headers + msg->data.headers_len; - while (app_headers < end) - { - /* custom header name? */ - tmp = (u32 *) app_headers; - if (PREDICT_FALSE (*tmp & HTTP_CUSTOM_HEADER_NAME_BIT)) - { - http_custom_token_t *name, *value; - name = (http_custom_token_t *) app_headers; - u32 name_len = name->len & ~HTTP_CUSTOM_HEADER_NAME_BIT; - app_headers += sizeof (http_custom_token_t) + name_len; - value = (http_custom_token_t *) app_headers; - app_headers += sizeof (http_custom_token_t) + value->len; - vec_add2 (*tx_buf, p, name_len + value->len + 4); - clib_memcpy (p, name->token, name_len); - p += name_len; - *p++ = ':'; - *p++ = ' '; - clib_memcpy (p, value->token, value->len); - p += value->len; - *p++ = '\r'; - *p++ = '\n'; - } - else - { - http_app_header_t *header; - header = (http_app_header_t *) app_headers; - app_headers += sizeof (http_app_header_t) + header->value.len; - http_token_t name = { http_header_name_token (header->name) }; - vec_add2 (*tx_buf, p, name.len + header->value.len + 4); - clib_memcpy (p, name.base, name.len); - p += name.len; - *p++ = ':'; - *p++ = ' '; - clib_memcpy (p, header->value.token, header->value.len); - p += header->value.len; - *p++ = '\r'; - *p++ = '\n'; - } - } -} - -static http_sm_result_t -http_req_state_wait_app_reply (http_conn_t *hc, transport_send_params_t *sp) -{ - http_main_t *hm = &http_main; - u8 *response; - u32 sent; - f64 now; - session_t *as; - http_status_code_t sc; - http_msg_t msg; - int rv; - http_sm_result_t sm_result = HTTP_SM_ERROR; - http_req_state_t next_state = HTTP_REQ_STATE_WAIT_TRANSPORT_METHOD; - - as = session_get_from_handle (hc->h_pa_session_handle); - - rv = svm_fifo_dequeue (as->tx_fifo, sizeof (msg), (u8 *) &msg); - ASSERT (rv == sizeof (msg)); - - if (msg.data.type > HTTP_MSG_DATA_PTR) - { - clib_warning ("no data"); - sc = HTTP_STATUS_INTERNAL_ERROR; - goto error; - } - - if (msg.type != HTTP_MSG_REPLY) - { - clib_warning ("unexpected message type %d", msg.type); - sc = HTTP_STATUS_INTERNAL_ERROR; - goto error; - } - - if (msg.code >= HTTP_N_STATUS) - { - clib_warning ("unsupported status code: %d", msg.code); - return HTTP_SM_ERROR; - } - - response = hm->tx_bufs[hc->c_thread_index]; - vec_reset_length (response); - /* - * Add "protocol layer" headers: - * - current time - * - server name - * - data length - */ - now = clib_timebase_now (&hm->timebase); - response = - format (response, http_response_template, http_status_code_str[msg.code], - /* Date */ - format_clib_timebase_time, now, - /* Server */ - hc->app_name); - - /* RFC9110 8.6: A server MUST NOT send Content-Length header field in a - * 2xx (Successful) response to CONNECT or with a status code of 101 - * (Switching Protocols). */ - if (hc->req.is_tunnel && (http_status_code_str[msg.code][0] == '2' || - msg.code == HTTP_STATUS_SWITCHING_PROTOCOLS)) - { - ASSERT (msg.data.body_len == 0); - next_state = HTTP_REQ_STATE_TUNNEL; - if (hc->req.upgrade_proto > HTTP_UPGRADE_PROTO_NA) - { - response = format (response, connection_upgrade_template, - http_upgrade_proto_str[hc->req.upgrade_proto]); - if (hc->req.upgrade_proto == HTTP_UPGRADE_PROTO_CONNECT_UDP && - hc->udp_tunnel_mode == HTTP_UDP_TUNNEL_DGRAM) - next_state = HTTP_REQ_STATE_UDP_TUNNEL; - } - /* cleanup some stuff we don't need anymore in tunnel mode */ - vec_free (hc->req.rx_buf); - vec_free (hc->req.headers); - http_buffer_free (&hc->req.tx_buf); - hc->req.to_skip = 0; - } - else - response = format (response, content_len_template, msg.data.body_len); - - /* Add headers from app (if any) */ - if (msg.data.headers_len) - { - HTTP_DBG (0, "got headers from app, len %d", msg.data.headers_len); - http_write_app_headers (hc, &msg, &response); - } - /* Add empty line after headers */ - response = format (response, "\r\n"); - HTTP_DBG (3, "%v", response); - - sent = http_send_data (hc, response, vec_len (response)); - if (sent != vec_len (response)) - { - clib_warning ("sending status-line and headers failed!"); - sc = HTTP_STATUS_INTERNAL_ERROR; - goto error; - } - - if (msg.data.body_len) - { - /* Start sending the actual data */ - http_buffer_init (&hc->req.tx_buf, msg_to_buf_type[msg.data.type], - as->tx_fifo, msg.data.body_len); - next_state = HTTP_REQ_STATE_APP_IO_MORE_DATA; - sm_result = HTTP_SM_CONTINUE; - } - else - { - /* No response body, we are done */ - sm_result = HTTP_SM_STOP; - } - - http_req_state_change (hc, next_state); - - ASSERT (sp->max_burst_size >= sent); - sp->max_burst_size -= sent; - return sm_result; - -error: - http_send_error (hc, sc); - session_transport_closing_notify (&hc->connection); - http_disconnect_transport (hc); - return HTTP_SM_STOP; -} - -static http_sm_result_t -http_req_state_wait_app_method (http_conn_t *hc, transport_send_params_t *sp) -{ - http_main_t *hm = &http_main; - http_msg_t msg; - session_t *as; - u8 *target_buff = 0, *request = 0, *target; - u32 sent; - int rv; - http_sm_result_t sm_result = HTTP_SM_ERROR; - http_req_state_t next_state; - - as = session_get_from_handle (hc->h_pa_session_handle); - - rv = svm_fifo_dequeue (as->tx_fifo, sizeof (msg), (u8 *) &msg); - ASSERT (rv == sizeof (msg)); - - if (msg.data.type > HTTP_MSG_DATA_PTR) - { - clib_warning ("no data"); - goto error; - } - - if (msg.type != HTTP_MSG_REQUEST) - { - clib_warning ("unexpected message type %d", msg.type); - goto error; - } - - /* read request target */ - if (msg.data.type == HTTP_MSG_DATA_PTR) - { - uword target_ptr; - rv = svm_fifo_dequeue (as->tx_fifo, sizeof (target_ptr), - (u8 *) &target_ptr); - ASSERT (rv == sizeof (target_ptr)); - target = uword_to_pointer (target_ptr, u8 *); - } - else - { - vec_validate (target_buff, msg.data.target_path_len - 1); - rv = - svm_fifo_dequeue (as->tx_fifo, msg.data.target_path_len, target_buff); - ASSERT (rv == msg.data.target_path_len); - target = target_buff; - } - - request = hm->tx_bufs[hc->c_thread_index]; - vec_reset_length (request); - /* currently we support only GET and POST method */ - if (msg.method_type == HTTP_REQ_GET) - { - if (msg.data.body_len) - { - clib_warning ("GET request shouldn't include data"); - goto error; - } - /* - * Add "protocol layer" headers: - * - host - * - user agent - */ - request = format (request, http_get_request_template, - /* target */ - target, - /* Host */ - hc->host, - /* User-Agent */ - hc->app_name); - - next_state = HTTP_REQ_STATE_WAIT_TRANSPORT_REPLY; - sm_result = HTTP_SM_STOP; - } - else if (msg.method_type == HTTP_REQ_POST) - { - if (!msg.data.body_len) - { - clib_warning ("POST request should include data"); - goto error; - } - /* - * Add "protocol layer" headers: - * - host - * - user agent - * - content length - */ - request = format (request, http_post_request_template, - /* target */ - target, - /* Host */ - hc->host, - /* User-Agent */ - hc->app_name, - /* Content-Length */ - msg.data.body_len); - - http_buffer_init (&hc->req.tx_buf, msg_to_buf_type[msg.data.type], - as->tx_fifo, msg.data.body_len); - - next_state = HTTP_REQ_STATE_APP_IO_MORE_DATA; - sm_result = HTTP_SM_CONTINUE; - } - else - { - clib_warning ("unsupported method %d", msg.method_type); - goto error; - } - - /* Add headers from app (if any) */ - if (msg.data.headers_len) - { - HTTP_DBG (0, "got headers from app, len %d", msg.data.headers_len); - http_write_app_headers (hc, &msg, &request); - } - /* Add empty line after headers */ - request = format (request, "\r\n"); - HTTP_DBG (3, "%v", request); - - sent = http_send_data (hc, request, vec_len (request)); - if (sent != vec_len (request)) - { - clib_warning ("sending request-line and headers failed!"); - sm_result = HTTP_SM_ERROR; - goto error; - } - - http_req_state_change (hc, next_state); - goto done; - -error: - svm_fifo_dequeue_drop_all (as->tx_fifo); - session_transport_closing_notify (&hc->connection); - session_transport_closed_notify (&hc->connection); - http_disconnect_transport (hc); - -done: - vec_free (target_buff); - return sm_result; -} - -static http_sm_result_t -http_req_state_transport_io_more_data (http_conn_t *hc, - transport_send_params_t *sp) -{ - session_t *as, *ts; - app_worker_t *app_wrk; - svm_fifo_seg_t _seg, *seg = &_seg; - u32 max_len, max_deq, max_enq, n_segs = 1; - int rv, len; - - as = session_get_from_handle (hc->h_pa_session_handle); - ts = session_get_from_handle (hc->h_tc_session_handle); - - max_deq = svm_fifo_max_dequeue (ts->rx_fifo); - if (max_deq == 0) - { - HTTP_DBG (1, "no data to deq"); - return HTTP_SM_STOP; - } - - max_enq = svm_fifo_max_enqueue (as->rx_fifo); - if (max_enq == 0) - { - HTTP_DBG (1, "app's rx fifo full"); - svm_fifo_add_want_deq_ntf (as->rx_fifo, SVM_FIFO_WANT_DEQ_NOTIF); - return HTTP_SM_STOP; - } - - max_len = clib_min (max_enq, max_deq); - len = svm_fifo_segments (ts->rx_fifo, 0, seg, &n_segs, max_len); - if (len < 0) - { - HTTP_DBG (1, "svm_fifo_segments() len %d", len); - return HTTP_SM_STOP; - } - - rv = svm_fifo_enqueue_segments (as->rx_fifo, seg, 1, 0 /* allow partial */); - if (rv < 0) - { - clib_warning ("data enqueue failed, rv: %d", rv); - return HTTP_SM_ERROR; - } - - svm_fifo_dequeue_drop (ts->rx_fifo, rv); - if (rv > hc->req.to_recv) - { - clib_warning ("http protocol error: received more data than expected"); - session_transport_closing_notify (&hc->connection); - http_disconnect_transport (hc); - http_req_state_change (hc, HTTP_REQ_STATE_WAIT_APP_METHOD); - return HTTP_SM_ERROR; - } - hc->req.to_recv -= rv; - HTTP_DBG (1, "drained %d from ts; remains %lu", rv, hc->req.to_recv); - - /* Finished transaction: - * server back to HTTP_REQ_STATE_WAIT_APP_REPLY - * client to HTTP_REQ_STATE_WAIT_APP_METHOD */ - if (hc->req.to_recv == 0) - http_req_state_change (hc, hc->is_server ? HTTP_REQ_STATE_WAIT_APP_REPLY : - HTTP_REQ_STATE_WAIT_APP_METHOD); - - app_wrk = app_worker_get_if_valid (as->app_wrk_index); - if (app_wrk) - app_worker_rx_notify (app_wrk, as); - - if (svm_fifo_max_dequeue_cons (ts->rx_fifo)) - session_enqueue_notify (ts); - - return HTTP_SM_STOP; -} - -static http_sm_result_t -http_req_state_app_io_more_data (http_conn_t *hc, transport_send_params_t *sp) -{ - u32 max_send = 64 << 10, n_segs; - http_buffer_t *hb = &hc->req.tx_buf; - svm_fifo_seg_t *seg; - session_t *ts; - int sent = 0; - - max_send = clib_min (max_send, sp->max_burst_size); - ts = session_get_from_handle (hc->h_tc_session_handle); - if ((seg = http_buffer_get_segs (hb, max_send, &n_segs))) - sent = svm_fifo_enqueue_segments (ts->tx_fifo, seg, n_segs, - 1 /* allow partial */); - - if (sent > 0) - { - /* Ask scheduler to notify app of deq event if needed */ - sp->bytes_dequeued += http_buffer_drain (hb, sent); - sp->max_burst_size -= sent; - } - - /* Not finished sending all data */ - if (!http_buffer_is_drained (hb)) - { - if (sent && svm_fifo_set_event (ts->tx_fifo)) - session_program_tx_io_evt (ts->handle, SESSION_IO_EVT_TX); - - if (svm_fifo_max_enqueue (ts->tx_fifo) < HTTP_FIFO_THRESH) - { - /* Deschedule http session and wait for deq notification if - * underlying ts tx fifo almost full */ - svm_fifo_add_want_deq_ntf (ts->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF); - transport_connection_deschedule (&hc->connection); - sp->flags |= TRANSPORT_SND_F_DESCHED; - } - } - else - { - if (sent && svm_fifo_set_event (ts->tx_fifo)) - session_program_tx_io_evt (ts->handle, SESSION_IO_EVT_TX_FLUSH); - - /* Finished transaction: - * server back to HTTP_REQ_STATE_WAIT_TRANSPORT_METHOD - * client to HTTP_REQ_STATE_WAIT_TRANSPORT_REPLY */ - http_req_state_change (hc, hc->is_server ? - HTTP_REQ_STATE_WAIT_TRANSPORT_METHOD : - HTTP_REQ_STATE_WAIT_TRANSPORT_REPLY); - http_buffer_free (hb); - } - - return HTTP_SM_STOP; -} - -static http_sm_result_t -http_req_state_tunnel_rx (http_conn_t *hc, transport_send_params_t *sp) -{ - u32 max_deq, max_enq, max_read, n_segs = 2; - svm_fifo_seg_t segs[n_segs]; - int n_written = 0; - session_t *as, *ts; - app_worker_t *app_wrk; - - HTTP_DBG (1, "tunnel received data from client"); - - as = session_get_from_handle (hc->h_pa_session_handle); - ts = session_get_from_handle (hc->h_tc_session_handle); - - max_deq = svm_fifo_max_dequeue (ts->rx_fifo); - if (PREDICT_FALSE (max_deq == 0)) - { - HTTP_DBG (1, "max_deq == 0"); - return HTTP_SM_STOP; - } - max_enq = svm_fifo_max_enqueue (as->rx_fifo); - if (max_enq == 0) - { - HTTP_DBG (1, "app's rx fifo full"); - svm_fifo_add_want_deq_ntf (as->rx_fifo, SVM_FIFO_WANT_DEQ_NOTIF); - return HTTP_SM_STOP; - } - max_read = clib_min (max_enq, max_deq); - svm_fifo_segments (ts->rx_fifo, 0, segs, &n_segs, max_read); - n_written = svm_fifo_enqueue_segments (as->rx_fifo, segs, n_segs, 0); - ASSERT (n_written > 0); - HTTP_DBG (1, "transfered %u bytes", n_written); - svm_fifo_dequeue_drop (ts->rx_fifo, n_written); - app_wrk = app_worker_get_if_valid (as->app_wrk_index); - if (app_wrk) - app_worker_rx_notify (app_wrk, as); - if (svm_fifo_max_dequeue_cons (ts->rx_fifo)) - session_program_rx_io_evt (session_handle (ts)); - - return HTTP_SM_STOP; -} - -static http_sm_result_t -http_req_state_tunnel_tx (http_conn_t *hc, transport_send_params_t *sp) -{ - u32 max_deq, max_enq, max_read, n_segs = 2; - svm_fifo_seg_t segs[n_segs]; - session_t *as, *ts; - int n_written = 0; - - HTTP_DBG (1, "tunnel received data from target"); - - as = session_get_from_handle (hc->h_pa_session_handle); - ts = session_get_from_handle (hc->h_tc_session_handle); - - max_deq = svm_fifo_max_dequeue_cons (as->tx_fifo); - if (PREDICT_FALSE (max_deq == 0)) - { - HTTP_DBG (1, "max_deq == 0"); - goto check_fifo; - } - max_enq = svm_fifo_max_enqueue_prod (ts->tx_fifo); - if (max_enq == 0) - { - HTTP_DBG (1, "ts tx fifo full"); - goto check_fifo; - } - max_read = clib_min (max_enq, max_deq); - max_read = clib_min (max_read, sp->max_burst_size); - svm_fifo_segments (as->tx_fifo, 0, segs, &n_segs, max_read); - n_written = svm_fifo_enqueue_segments (ts->tx_fifo, segs, n_segs, 0); - ASSERT (n_written > 0); - HTTP_DBG (1, "transfered %u bytes", n_written); - sp->bytes_dequeued += n_written; - sp->max_burst_size -= n_written; - svm_fifo_dequeue_drop (as->tx_fifo, n_written); - if (svm_fifo_set_event (ts->tx_fifo)) - session_program_tx_io_evt (ts->handle, SESSION_IO_EVT_TX); - -check_fifo: - /* Deschedule and wait for deq notification if ts fifo is almost full */ - if (svm_fifo_max_enqueue (ts->tx_fifo) < HTTP_FIFO_THRESH) - { - svm_fifo_add_want_deq_ntf (ts->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF); - transport_connection_deschedule (&hc->connection); - sp->flags |= TRANSPORT_SND_F_DESCHED; - } - - return HTTP_SM_STOP; -} - -static http_sm_result_t -http_req_state_udp_tunnel_rx (http_conn_t *hc, transport_send_params_t *sp) -{ - http_main_t *hm = &http_main; - u32 to_deq, capsule_size, dgram_size, n_written = 0; - int rv, n_read; - session_t *as, *ts; - app_worker_t *app_wrk; - u8 payload_offset; - u64 payload_len; - session_dgram_hdr_t hdr; - u8 *buf = 0; - - HTTP_DBG (1, "udp tunnel received data from client"); - - as = session_get_from_handle (hc->h_pa_session_handle); - ts = session_get_from_handle (hc->h_tc_session_handle); - buf = hm->rx_bufs[hc->c_thread_index]; - to_deq = svm_fifo_max_dequeue_cons (ts->rx_fifo); - - while (to_deq > 0) - { - /* some bytes remaining to skip? */ - if (PREDICT_FALSE (hc->req.to_skip)) - { - if (hc->req.to_skip >= to_deq) + rx_buf = http_get_rx_buf (hc); + svm_fifo_peek (ts->rx_fifo, 0, http2_conn_preface.len, rx_buf); + if (memcmp (rx_buf, http2_conn_preface.base, + http2_conn_preface.len) == 0) { - svm_fifo_dequeue_drop (ts->rx_fifo, to_deq); - hc->req.to_skip -= to_deq; - goto done; - } - else - { - svm_fifo_dequeue_drop (ts->rx_fifo, hc->req.to_skip); - hc->req.to_skip = 0; - } - } - n_read = - svm_fifo_peek (ts->rx_fifo, 0, HTTP_CAPSULE_HEADER_MAX_SIZE, buf); - ASSERT (n_read > 0); - rv = http_decap_udp_payload_datagram (buf, n_read, &payload_offset, - &payload_len); - HTTP_DBG (1, "rv=%d, payload_offset=%u, payload_len=%llu", rv, - payload_offset, payload_len); - if (PREDICT_FALSE (rv != 0)) - { - if (rv < 0) - { - /* capsule datagram is invalid (session need to be aborted) */ +#if HTTP_2_ENABLE > 0 + hc->version = HTTP_VERSION_2; + http_vfts[hc->version].conn_accept_callback (hc); +#else svm_fifo_dequeue_drop_all (ts->rx_fifo); - session_transport_closing_notify (&hc->connection); - session_transport_closed_notify (&hc->connection); http_disconnect_transport (hc); - return HTTP_SM_STOP; + return 0; +#endif } else - { - /* unknown capsule should be skipped */ - if (payload_len <= to_deq) - { - svm_fifo_dequeue_drop (ts->rx_fifo, payload_len); - to_deq -= payload_len; - continue; - } - else - { - svm_fifo_dequeue_drop (ts->rx_fifo, to_deq); - hc->req.to_skip = payload_len - to_deq; - goto done; - } - } - } - capsule_size = payload_offset + payload_len; - /* check if we have the full capsule */ - if (PREDICT_FALSE (to_deq < capsule_size)) - { - HTTP_DBG (1, "capsule not complete"); - goto done; + hc->version = HTTP_VERSION_1; } - - dgram_size = sizeof (hdr) + payload_len; - if (svm_fifo_max_enqueue_prod (as->rx_fifo) < dgram_size) - { - HTTP_DBG (1, "app's rx fifo full"); - svm_fifo_add_want_deq_ntf (as->rx_fifo, SVM_FIFO_WANT_DEQ_NOTIF); - goto done; - } - - /* read capsule payload */ - rv = svm_fifo_peek (ts->rx_fifo, payload_offset, payload_len, buf); - ASSERT (rv == payload_len); - svm_fifo_dequeue_drop (ts->rx_fifo, capsule_size); - - hdr.data_length = payload_len; - hdr.data_offset = 0; - - /* send datagram header and payload */ - svm_fifo_seg_t segs[2] = { { (u8 *) &hdr, sizeof (hdr) }, - { buf, payload_len } }; - rv = svm_fifo_enqueue_segments (as->rx_fifo, segs, 2, 0); - ASSERT (rv > 0); - - n_written += dgram_size; - to_deq -= capsule_size; - } - -done: - HTTP_DBG (1, "written %lu bytes", n_written); - - if (n_written) - { - app_wrk = app_worker_get_if_valid (as->app_wrk_index); - if (app_wrk) - app_worker_rx_notify (app_wrk, as); - } - if (svm_fifo_max_dequeue_cons (ts->rx_fifo)) - session_program_rx_io_evt (session_handle (ts)); - - return HTTP_SM_STOP; -} - -static http_sm_result_t -http_req_state_udp_tunnel_tx (http_conn_t *hc, transport_send_params_t *sp) -{ - http_main_t *hm = &http_main; - u32 to_deq, capsule_size, dgram_size, n_written = 0; - session_t *as, *ts; - int rv; - session_dgram_pre_hdr_t hdr; - u8 *buf; - u8 *payload; - - HTTP_DBG (1, "udp tunnel received data from target"); - - as = session_get_from_handle (hc->h_pa_session_handle); - ts = session_get_from_handle (hc->h_tc_session_handle); - buf = hm->tx_bufs[hc->c_thread_index]; - to_deq = svm_fifo_max_dequeue_cons (as->tx_fifo); - - while (to_deq > 0) - { - /* read datagram header */ - rv = svm_fifo_peek (as->tx_fifo, 0, sizeof (hdr), (u8 *) &hdr); - ASSERT (rv == sizeof (hdr) && - hdr.data_length <= HTTP_UDP_PAYLOAD_MAX_LEN); - ASSERT (to_deq >= hdr.data_length + SESSION_CONN_HDR_LEN); - dgram_size = hdr.data_length + SESSION_CONN_HDR_LEN; - - if (svm_fifo_max_enqueue_prod (ts->tx_fifo) < - (hdr.data_length + HTTP_UDP_PROXY_DATAGRAM_CAPSULE_OVERHEAD)) - { - HTTP_DBG (1, "ts tx fifo full"); - goto done; - } - - /* create capsule header */ - payload = http_encap_udp_payload_datagram (buf, hdr.data_length); - capsule_size = (payload - buf) + hdr.data_length; - /* read payload */ - rv = svm_fifo_peek (as->tx_fifo, SESSION_CONN_HDR_LEN, hdr.data_length, - payload); - ASSERT (rv == hdr.data_length); - svm_fifo_dequeue_drop (as->tx_fifo, dgram_size); - /* send capsule */ - rv = svm_fifo_enqueue (ts->tx_fifo, capsule_size, buf); - ASSERT (rv == capsule_size); - - n_written += capsule_size; - to_deq -= dgram_size; - } - -done: - HTTP_DBG (1, "written %lu bytes", n_written); - if (n_written) - { - if (svm_fifo_set_event (ts->tx_fifo)) - session_program_tx_io_evt (ts->handle, SESSION_IO_EVT_TX); - } - - /* Deschedule and wait for deq notification if ts fifo is almost full */ - if (svm_fifo_max_enqueue (ts->tx_fifo) < HTTP_FIFO_THRESH) - { - svm_fifo_add_want_deq_ntf (ts->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF); - transport_connection_deschedule (&hc->connection); - sp->flags |= TRANSPORT_SND_F_DESCHED; - } - - return HTTP_SM_STOP; -} - -typedef http_sm_result_t (*http_sm_handler) (http_conn_t *, - transport_send_params_t *sp); - -static http_sm_handler tx_state_funcs[HTTP_REQ_N_STATES] = { - 0, /* idle */ - http_req_state_wait_app_method, - 0, /* wait transport reply */ - 0, /* transport io more data */ - 0, /* wait transport method */ - http_req_state_wait_app_reply, - http_req_state_app_io_more_data, - http_req_state_tunnel_tx, - http_req_state_udp_tunnel_tx, -}; - -static_always_inline int -http_req_state_is_tx_valid (http_conn_t *hc) -{ - return tx_state_funcs[hc->req.state] ? 1 : 0; -} - -static http_sm_handler rx_state_funcs[HTTP_REQ_N_STATES] = { - 0, /* idle */ - 0, /* wait app method */ - http_req_state_wait_transport_reply, - http_req_state_transport_io_more_data, - http_req_state_wait_transport_method, - 0, /* wait app reply */ - 0, /* app io more data */ - http_req_state_tunnel_rx, - http_req_state_udp_tunnel_rx, -}; - -static_always_inline int -http_req_state_is_rx_valid (http_conn_t *hc) -{ - return rx_state_funcs[hc->req.state] ? 1 : 0; -} - -static_always_inline void -http_req_run_state_machine (http_conn_t *hc, transport_send_params_t *sp, - u8 is_tx) -{ - http_sm_result_t res; - - do - { - if (is_tx) - res = tx_state_funcs[hc->req.state](hc, sp); else - res = rx_state_funcs[hc->req.state](hc, sp); - if (res == HTTP_SM_ERROR) - { - HTTP_DBG (1, "error in state machine %d", res); - return; - } + hc->version = HTTP_VERSION_1; + + HTTP_DBG (1, "identified HTTP/%u", + hc->version == HTTP_VERSION_1 ? 1 : 2); + hc_handle.version = hc->version; + ts->opaque = hc_handle.as_u32; } - while (res == HTTP_SM_CONTINUE); + http_vfts[hc_handle.version].transport_rx_callback (hc); - /* Reset the session expiration timer */ - http_conn_timer_update (hc); + if (hc->state == HTTP_CONN_STATE_TRANSPORT_CLOSED) + http_vfts[hc->version].transport_close_callback (hc); + return 0; } -static int -http_ts_rx_callback (session_t *ts) +int +http_ts_builtin_tx_callback (session_t *ts) { http_conn_t *hc; + http_conn_handle_t hc_handle; - HTTP_DBG (1, "hc [%u]%x", ts->thread_index, ts->opaque); - - hc = http_conn_get_w_thread (ts->opaque, ts->thread_index); - - if (hc->state == HTTP_CONN_STATE_CLOSED) - { - HTTP_DBG (1, "conn closed"); - svm_fifo_dequeue_drop_all (ts->rx_fifo); - return 0; - } - - if (!http_req_state_is_rx_valid (hc)) - { - clib_warning ("hc [%u]%x invalid rx state: http req state " - "'%U', session state '%U'", - ts->thread_index, ts->opaque, format_http_req_state, - hc->req.state, format_http_conn_state, hc); - svm_fifo_dequeue_drop_all (ts->rx_fifo); - return 0; - } + hc_handle.as_u32 = ts->opaque; - HTTP_DBG (1, "run state machine"); - http_req_run_state_machine (hc, 0, 0); + hc = http_conn_get_w_thread (hc_handle.conn_index, ts->thread_index); + HTTP_DBG (1, "transport connection reschedule"); + http_vfts[hc->version].transport_conn_reschedule_callback (hc); - if (hc->state == HTTP_CONN_STATE_TRANSPORT_CLOSED) - { - if (!svm_fifo_max_dequeue_cons (ts->rx_fifo)) - session_transport_closing_notify (&hc->connection); - } return 0; } -int -http_ts_builtin_tx_callback (session_t *ts) +static void +http_ts_closed_callback (session_t *ts) { + http_conn_handle_t hc_handle; http_conn_t *hc; - hc = http_conn_get_w_thread (ts->opaque, ts->thread_index); - HTTP_DBG (1, "transport connection reschedule"); - transport_connection_reschedule (&hc->connection); + hc_handle.as_u32 = ts->opaque; + hc = http_conn_get_w_thread (hc_handle.conn_index, ts->thread_index); - return 0; + http_disconnect_transport (hc); + hc->state = HTTP_CONN_STATE_CLOSED; } static void http_ts_cleanup_callback (session_t *ts, session_cleanup_ntf_t ntf) { http_conn_t *hc; + http_conn_handle_t hc_handle; if (ntf == SESSION_CLEANUP_TRANSPORT) return; - hc = http_conn_get_w_thread (ts->opaque, ts->thread_index); - - HTTP_DBG (1, "going to free hc [%u]%x", ts->thread_index, ts->opaque); + hc_handle.as_u32 = ts->opaque; + hc = http_conn_get_w_thread (hc_handle.conn_index, ts->thread_index); - vec_free (hc->req.rx_buf); - vec_free (hc->req.headers); + HTTP_DBG (1, "going to free hc [%u]%x", ts->thread_index, + hc_handle.conn_index); - http_buffer_free (&hc->req.tx_buf); - - if (hc->pending_timer == 0) + if (!(hc->flags & HTTP_CONN_F_PENDING_TIMER)) http_conn_timer_stop (hc); - session_transport_delete_notify (&hc->connection); + /* in case nothing received on cleartext connection */ + if (PREDICT_FALSE (hc->version != HTTP_VERSION_NA)) + http_vfts[hc->version].conn_cleanup_callback (hc); - if (!hc->is_server) + if (!(hc->flags & HTTP_CONN_F_IS_SERVER)) { vec_free (hc->app_name); vec_free (hc->host); @@ -2253,11 +705,8 @@ http_ts_cleanup_callback (session_t *ts, session_cleanup_ntf_t ntf) static void http_ts_ho_cleanup_callback (session_t *ts) { - http_conn_t *ho_hc; HTTP_DBG (1, "half open: %x", ts->opaque); - ho_hc = http_ho_conn_get (ts->opaque); - session_half_open_delete_notify (&ho_hc->connection); - http_ho_conn_free (ho_hc); + http_ho_try_free (ts->opaque); } int @@ -2278,6 +727,7 @@ static session_cb_vft_t http_app_cb_vft = { .session_disconnect_callback = http_ts_disconnect_callback, .session_connected_callback = http_ts_connected_callback, .session_reset_callback = http_ts_reset_callback, + .session_transport_closed_callback = http_ts_closed_callback, .session_cleanup_callback = http_ts_cleanup_callback, .half_open_cleanup_callback = http_ts_ho_cleanup_callback, .add_segment_callback = http_add_segment_callback, @@ -2286,6 +736,10 @@ static session_cb_vft_t http_app_cb_vft = { .builtin_app_tx_callback = http_ts_builtin_tx_callback, }; +/*********************************/ +/* transport proto VFT callbacks */ +/*********************************/ + static clib_error_t * http_transport_enable (vlib_main_t *vm, u8 is_en) { @@ -2295,6 +749,7 @@ http_transport_enable (vlib_main_t *vm, u8 is_en) u64 options[APP_OPTIONS_N_OPTIONS]; http_main_t *hm = &http_main; u32 num_threads, i; + http_engine_vft_t *http_version; if (!is_en) { @@ -2351,6 +806,12 @@ http_transport_enable (vlib_main_t *vm, u8 is_en) http_timers_init (vm, http_conn_timeout_cb, http_conn_invalidate_timer_cb); hm->is_init = 1; + vec_foreach (http_version, http_vfts) + { + if (http_version->enable_callback) + http_version->enable_callback (); + } + return 0; } @@ -2377,9 +838,11 @@ http_transport_connect (transport_endpoint_cfg_t *tep) hc_index = http_ho_conn_alloc (); hc = http_ho_conn_get (hc_index); - hc->h_pa_wrk_index = sep->app_wrk_index; - hc->h_pa_app_api_ctx = sep->opaque; + hc->hc_pa_wrk_index = sep->app_wrk_index; + hc->hc_pa_app_api_ctx = sep->opaque; hc->state = HTTP_CONN_STATE_CONNECTING; + /* TODO: set to HTTP_VERSION_NA in case of TLS */ + hc->version = HTTP_VERSION_1; cargs->api_context = hc_index; ext_cfg = session_endpoint_get_ext_cfg (sep, TRANSPORT_ENDPT_EXT_CFG_HTTP); @@ -2391,7 +854,12 @@ http_transport_connect (transport_endpoint_cfg_t *tep) hc->timeout = http_cfg->timeout; } - hc->is_server = 0; + ext_cfg = session_endpoint_get_ext_cfg (sep, TRANSPORT_ENDPT_EXT_CFG_CRYPTO); + if (ext_cfg) + { + HTTP_DBG (1, "app set tls"); + cargs->sep.transport_proto = TRANSPORT_PROTO_TLS; + } if (vec_len (app->name)) hc->app_name = vec_dup (app->name); @@ -2416,7 +884,7 @@ http_transport_connect (transport_endpoint_cfg_t *tep) ho->opaque = sep->opaque; ho->session_type = session_type_from_proto_and_ip (TRANSPORT_PROTO_HTTP, sep->is_ip4); - hc->h_tc_session_handle = cargs->sh; + hc->hc_tc_session_handle = cargs->sh; hc->c_s_index = ho->session_index; return 0; @@ -2471,19 +939,19 @@ http_start_listen (u32 app_listener_index, transport_endpoint_cfg_t *tep) } /* Grab transport connection listener and link to http listener */ - lhc->h_tc_session_handle = args->handle; - al = app_listener_get_w_handle (lhc->h_tc_session_handle); + lhc->hc_tc_session_handle = args->handle; + al = app_listener_get_w_handle (lhc->hc_tc_session_handle); ts_listener = app_listener_get_session (al); ts_listener->opaque = lhc_index; /* Grab application listener and link to http listener */ app_listener = listen_session_get (app_listener_index); - lhc->h_pa_wrk_index = sep->app_wrk_index; - lhc->h_pa_session_handle = listen_session_get_handle (app_listener); + lhc->hc_pa_wrk_index = sep->app_wrk_index; + lhc->hc_pa_session_handle = listen_session_get_handle (app_listener); lhc->c_s_index = app_listener_index; lhc->c_flags |= TRANSPORT_CONNECTION_F_NO_LOOKUP; - lhc->is_server = 1; + lhc->flags |= HTTP_CONN_F_IS_SERVER; if (vec_len (app->name)) lhc->app_name = vec_dup (app->name); @@ -2502,7 +970,7 @@ http_stop_listen (u32 listener_index) lhc = http_listener_get (listener_index); vnet_unlisten_args_t a = { - .handle = lhc->h_tc_session_handle, + .handle = lhc->hc_tc_session_handle, .app_index = http_main.app_index, .wrk_map_index = 0 /* default wrk */ }; @@ -2516,16 +984,22 @@ http_stop_listen (u32 listener_index) } static void -http_transport_close (u32 hc_index, u32 thread_index) +http_transport_close (u32 rh, clib_thread_index_t thread_index) { - session_t *as; http_conn_t *hc; + u32 hc_index; + http_req_handle_t hr_handle; + hr_handle.as_u32 = rh; + + hc_index = http_vfts[hr_handle.version].hc_index_get_by_req_index ( + hr_handle.req_index, thread_index); HTTP_DBG (1, "App disconnecting [%u]%x", thread_index, hc_index); hc = http_conn_get_w_thread (hc_index, thread_index); if (hc->state == HTTP_CONN_STATE_CONNECTING) { + HTTP_DBG (1, "in connecting state, close now"); hc->state = HTTP_CONN_STATE_APP_CLOSED; http_disconnect_transport (hc); return; @@ -2535,26 +1009,42 @@ http_transport_close (u32 hc_index, u32 thread_index) HTTP_DBG (1, "nothing to do, already closed"); return; } - as = session_get_from_handle (hc->h_pa_session_handle); - /* Nothing more to send, confirm close */ - if (!svm_fifo_max_dequeue_cons (as->tx_fifo)) - { - session_transport_closed_notify (&hc->connection); - http_disconnect_transport (hc); - } - else + http_vfts[hc->version].app_close_callback (hc, hr_handle.req_index, + thread_index); +} + +static void +http_transport_reset (u32 rh, clib_thread_index_t thread_index) +{ + http_conn_t *hc; + u32 hc_index; + http_req_handle_t hr_handle; + + hr_handle.as_u32 = rh; + hc_index = http_vfts[hr_handle.version].hc_index_get_by_req_index ( + hr_handle.req_index, thread_index); + HTTP_DBG (1, "App disconnecting [%u]%x", thread_index, hc_index); + + hc = http_conn_get_w_thread (hc_index, thread_index); + if (hc->state == HTTP_CONN_STATE_CLOSED) { - /* Wait for all data to be written to ts */ - hc->state = HTTP_CONN_STATE_APP_CLOSED; + HTTP_DBG (1, "nothing to do, already closed"); + return; } + + http_vfts[hc->version].app_reset_callback (hc, hr_handle.req_index, + thread_index); } static transport_connection_t * -http_transport_get_connection (u32 hc_index, u32 thread_index) +http_transport_get_connection (u32 rh, clib_thread_index_t thread_index) { - http_conn_t *hc = http_conn_get_w_thread (hc_index, thread_index); - return &hc->connection; + http_req_handle_t hr_handle; + + hr_handle.as_u32 = rh; + return http_vfts[hr_handle.version].req_get_connection (hr_handle.req_index, + thread_index); } static transport_connection_t * @@ -2568,46 +1058,32 @@ static int http_app_tx_callback (void *session, transport_send_params_t *sp) { session_t *as = (session_t *) session; - u32 max_burst_sz, sent; + u32 max_burst_sz, sent, hc_index; http_conn_t *hc; + http_req_handle_t hr_handle; + hr_handle.as_u32 = as->connection_index; - HTTP_DBG (1, "hc [%u]%x", as->thread_index, as->connection_index); + hc_index = http_vfts[hr_handle.version].hc_index_get_by_req_index ( + hr_handle.req_index, as->thread_index); + HTTP_DBG (1, "hc [%u]%x", hc_index, as->connection_index); - hc = http_conn_get_w_thread (as->connection_index, as->thread_index); + hc = http_conn_get_w_thread (hc_index, as->thread_index); - max_burst_sz = sp->max_burst_size * TRANSPORT_PACER_MIN_MSS; - sp->max_burst_size = max_burst_sz; - - if (!http_req_state_is_tx_valid (hc)) + if (hc->state == HTTP_CONN_STATE_CLOSED) { - /* Sometimes the server apps can send the response earlier - * than expected (e.g when rejecting a bad request)*/ - if (hc->req.state == HTTP_REQ_STATE_TRANSPORT_IO_MORE_DATA && - hc->is_server) - { - svm_fifo_dequeue_drop_all (as->rx_fifo); - hc->req.state = HTTP_REQ_STATE_WAIT_APP_REPLY; - } - else - { - clib_warning ("hc [%u]%x invalid tx state: http req state " - "'%U', session state '%U'", - as->thread_index, as->connection_index, - format_http_req_state, hc->req.state, - format_http_conn_state, hc); - svm_fifo_dequeue_drop_all (as->tx_fifo); - return 0; - } + HTTP_DBG (1, "conn closed"); + svm_fifo_dequeue_drop_all (as->tx_fifo); + return 0; } - HTTP_DBG (1, "run state machine"); - http_req_run_state_machine (hc, sp, 1); + max_burst_sz = sp->max_burst_size * TRANSPORT_PACER_MIN_MSS; + sp->max_burst_size = max_burst_sz; + + http_vfts[hc->version].app_tx_callback (hc, hr_handle.req_index, sp); if (hc->state == HTTP_CONN_STATE_APP_CLOSED) - { - if (!svm_fifo_max_dequeue_cons (as->tx_fifo)) - http_disconnect_transport (hc); - } + http_vfts[hc->version].app_close_callback (hc, hr_handle.req_index, + as->thread_index); sent = max_burst_sz - sp->max_burst_size; @@ -2617,38 +1093,36 @@ http_app_tx_callback (void *session, transport_send_params_t *sp) static int http_app_rx_evt_cb (transport_connection_t *tc) { - http_conn_t *hc = (http_conn_t *) tc; - HTTP_DBG (1, "hc [%u]%x", vlib_get_thread_index (), hc->h_hc_index); + http_req_t *req = (http_req_t *) tc; + http_conn_t *hc; + http_req_handle_t hr_handle; + + HTTP_DBG (1, "hc [%u]%x", vlib_get_thread_index (), req->hr_hc_index); - if (hc->req.state == HTTP_REQ_STATE_TUNNEL) - http_req_state_tunnel_rx (hc, 0); + hr_handle.as_u32 = req->hr_req_handle; + hc = http_conn_get_w_thread (req->hr_hc_index, req->c_thread_index); + http_vfts[hr_handle.version].app_rx_evt_callback (hc, hr_handle.req_index, + req->c_thread_index); return 0; } static void -http_transport_get_endpoint (u32 hc_index, u32 thread_index, +http_transport_get_endpoint (u32 rh, clib_thread_index_t thread_index, transport_endpoint_t *tep, u8 is_lcl) { - http_conn_t *hc = http_conn_get_w_thread (hc_index, thread_index); - session_t *ts; - - ts = session_get_from_handle (hc->h_tc_session_handle); - session_get_endpoint (ts, tep, is_lcl); -} - -static u8 * -format_http_connection (u8 *s, va_list *args) -{ - http_conn_t *hc = va_arg (*args, http_conn_t *); + http_conn_t *hc; session_t *ts; + u32 hc_index; + http_req_handle_t hr_handle; - ts = session_get_from_handle (hc->h_tc_session_handle); - s = format (s, "[%d:%d][H] app_wrk %u ts %d:%d", hc->c_thread_index, - hc->c_s_index, hc->h_pa_wrk_index, ts->thread_index, - ts->session_index); + hr_handle.as_u32 = rh; + hc_index = http_vfts[hr_handle.version].hc_index_get_by_req_index ( + hr_handle.req_index, thread_index); + hc = http_conn_get_w_thread (hc_index, thread_index); - return s; + ts = session_get_from_handle (hc->hc_tc_session_handle); + session_get_endpoint (ts, tep, is_lcl); } static u8 * @@ -2658,10 +1132,10 @@ format_http_listener (u8 *s, va_list *args) app_listener_t *al; session_t *lts; - al = app_listener_get_w_handle (lhc->h_tc_session_handle); + al = app_listener_get_w_handle (lhc->hc_tc_session_handle); lts = app_listener_get_session (al); s = format (s, "[%d:%d][H] app_wrk %u ts %d:%d", lhc->c_thread_index, - lhc->c_s_index, lhc->h_pa_wrk_index, lts->thread_index, + lhc->c_s_index, lhc->hc_pa_wrk_index, lts->thread_index, lts->session_index); return s; @@ -2670,22 +1144,18 @@ format_http_listener (u8 *s, va_list *args) static u8 * format_http_transport_connection (u8 *s, va_list *args) { - u32 tc_index = va_arg (*args, u32); - u32 thread_index = va_arg (*args, u32); + http_req_handle_t rh = va_arg (*args, http_req_handle_t); + clib_thread_index_t thread_index = va_arg (*args, u32); u32 verbose = va_arg (*args, u32); + u32 hc_index; http_conn_t *hc; - hc = http_conn_get_w_thread (tc_index, thread_index); - - s = format (s, "%-" SESSION_CLI_ID_LEN "U", format_http_connection, hc); - if (verbose) - { - s = - format (s, "%-" SESSION_CLI_STATE_LEN "U", format_http_conn_state, hc); - if (verbose > 1) - s = format (s, "\n"); - } + hc_index = http_vfts[rh.version].hc_index_get_by_req_index (rh.req_index, + thread_index); + hc = http_conn_get_w_thread (hc_index, thread_index); + s = format (s, "%U", http_vfts[rh.version].format_req, rh.req_index, + thread_index, hc, verbose); return s; } @@ -2714,10 +1184,10 @@ format_http_transport_half_open (u8 *s, va_list *args) session_t *tcp_ho; ho_hc = http_ho_conn_get (ho_index); - tcp_ho = session_get_from_handle (ho_hc->h_tc_session_handle); + tcp_ho = session_get_from_handle (ho_hc->hc_tc_session_handle); s = format (s, "[%d:%d][H] half-open app_wrk %u ts %d:%d", - ho_hc->c_thread_index, ho_hc->c_s_index, ho_hc->h_pa_wrk_index, + ho_hc->c_thread_index, ho_hc->c_s_index, ho_hc->hc_pa_wrk_index, tcp_ho->thread_index, tcp_ho->session_index); return s; } @@ -2739,7 +1209,13 @@ http_transport_cleanup_ho (u32 ho_hc_index) HTTP_DBG (1, "half open: %x", ho_hc_index); ho_hc = http_ho_conn_get (ho_hc_index); - session_cleanup_half_open (ho_hc->h_tc_session_handle); + if (ho_hc->hc_tc_session_handle == SESSION_INVALID_HANDLE) + { + HTTP_DBG (1, "already pending cleanup"); + ho_hc->flags |= HTTP_CONN_F_NO_APP_SESSION; + return; + } + session_cleanup_half_open (ho_hc->hc_tc_session_handle); http_ho_conn_free (ho_hc); } @@ -2749,6 +1225,7 @@ static const transport_proto_vft_t http_proto = { .start_listen = http_start_listen, .stop_listen = http_stop_listen, .close = http_transport_close, + .reset = http_transport_reset, .cleanup_ho = http_transport_cleanup_ho, .custom_tx = http_app_tx_callback, .app_rx_evt = http_app_rx_evt_cb, @@ -2807,6 +1284,28 @@ http_transport_init (vlib_main_t *vm) VLIB_INIT_FUNCTION (http_transport_init); +static uword +unformat_http_version_cfg (unformat_input_t *input, va_list *va) +{ + http_engine_vft_t *http_version; + unformat_input_t sub_input; + int found = 0; + + vec_foreach (http_version, http_vfts) + { + if (!unformat (input, http_version->name)) + continue; + + if (http_version->unformat_cfg_callback && + unformat (input, "%U", unformat_vlib_cli_sub_input, &sub_input)) + { + if (http_version->unformat_cfg_callback (&sub_input)) + found = 1; + } + } + return found; +} + static clib_error_t * http_config_fn (vlib_main_t *vm, unformat_input_t *input) { @@ -2835,6 +1334,8 @@ http_config_fn (vlib_main_t *vm, unformat_input_t *input) if (hm->fifo_size != mem_sz) clib_warning ("invalid fifo size %lu", mem_sz); } + else if (unformat (input, "%U", unformat_http_version_cfg)) + ; else return clib_error_return (0, "unknown input `%U'", format_unformat_error, input); diff --git a/src/plugins/http/http.h b/src/plugins/http/http.h index d61ac0b08c7..434ff965b6a 100644 --- a/src/plugins/http/http.h +++ b/src/plugins/http/http.h @@ -17,15 +17,9 @@ #define SRC_PLUGINS_HTTP_HTTP_H_ #include <ctype.h> - #include <vnet/plugin/plugin.h> -#include <vpp/app/version.h> - -#include <vppinfra/time_range.h> - -#include <vnet/session/application_interface.h> -#include <vnet/session/application.h> -#include <http/http_buffer.h> +#include <vnet/ip/format.h> +#include <vnet/ip/ip46_address.h> #define HTTP_DEBUG 0 @@ -49,20 +43,6 @@ typedef struct transport_endpt_cfg_http http_udp_tunnel_mode_t udp_tunnel_mode; /**< connect-udp mode */ } transport_endpt_cfg_http_t; -typedef struct http_conn_id_ -{ - union - { - session_handle_t app_session_handle; - u32 parent_app_api_ctx; - }; - session_handle_t tc_session_handle; - u32 parent_app_wrk_index; -} http_conn_id_t; - -STATIC_ASSERT (sizeof (http_conn_id_t) <= TRANSPORT_CONN_ID_LEN, - "ctx id must be less than TRANSPORT_CONN_ID_LEN"); - typedef struct { char *base; @@ -71,45 +51,12 @@ typedef struct #define http_token_lit(s) (s), sizeof (s) - 1 -#define foreach_http_conn_state \ - _ (LISTEN, "listen") \ - _ (CONNECTING, "connecting") \ - _ (ESTABLISHED, "established") \ - _ (TRANSPORT_CLOSED, "transport-closed") \ - _ (APP_CLOSED, "app-closed") \ - _ (CLOSED, "closed") - -typedef enum http_conn_state_ -{ -#define _(s, str) HTTP_CONN_STATE_##s, - foreach_http_conn_state -#undef _ -} http_conn_state_t; - -#define foreach_http_req_state \ - _ (0, IDLE, "idle") \ - _ (1, WAIT_APP_METHOD, "wait app method") \ - _ (2, WAIT_TRANSPORT_REPLY, "wait transport reply") \ - _ (3, TRANSPORT_IO_MORE_DATA, "transport io more data") \ - _ (4, WAIT_TRANSPORT_METHOD, "wait transport method") \ - _ (5, WAIT_APP_REPLY, "wait app reply") \ - _ (6, APP_IO_MORE_DATA, "app io more data") \ - _ (7, TUNNEL, "tunnel") \ - _ (8, UDP_TUNNEL, "udp tunnel") - -typedef enum http_req_state_ -{ -#define _(n, s, str) HTTP_REQ_STATE_##s = n, - foreach_http_req_state -#undef _ - HTTP_REQ_N_STATES -} http_req_state_t; - typedef enum http_req_method_ { HTTP_REQ_GET = 0, HTTP_REQ_POST, HTTP_REQ_CONNECT, + HTTP_REQ_UNKNOWN, /* for internal use */ } http_req_method_t; typedef enum http_msg_type_ @@ -118,14 +65,6 @@ typedef enum http_msg_type_ HTTP_MSG_REPLY } http_msg_type_t; -typedef enum http_target_form_ -{ - HTTP_TARGET_ORIGIN_FORM, - HTTP_TARGET_ABSOLUTE_FORM, - HTTP_TARGET_AUTHORITY_FORM, - HTTP_TARGET_ASTERISK_FORM -} http_target_form_t; - #define foreach_http_content_type \ _ (APP_7Z, ".7z", "application/x-7z-compressed") \ _ (APP_DOC, ".doc", "application/msword") \ @@ -271,96 +210,108 @@ typedef enum http_status_code_ } http_status_code_t; #define foreach_http_header_name \ - _ (ACCEPT, "Accept") \ - _ (ACCEPT_CHARSET, "Accept-Charset") \ - _ (ACCEPT_ENCODING, "Accept-Encoding") \ - _ (ACCEPT_LANGUAGE, "Accept-Language") \ - _ (ACCEPT_RANGES, "Accept-Ranges") \ - _ (ACCESS_CONTROL_ALLOW_CREDENTIALS, "Access-Control-Allow-Credentials") \ - _ (ACCESS_CONTROL_ALLOW_HEADERS, "Access-Control-Allow-Headers") \ - _ (ACCESS_CONTROL_ALLOW_METHODS, "Access-Control-Allow-Methods") \ - _ (ACCESS_CONTROL_ALLOW_ORIGIN, "Access-Control-Allow-Origin") \ - _ (ACCESS_CONTROL_EXPOSE_HEADERS, "Access-Control-Expose-Headers") \ - _ (ACCESS_CONTROL_MAX_AGE, "Access-Control-Max-Age") \ - _ (ACCESS_CONTROL_REQUEST_HEADERS, "Access-Control-Request-Headers") \ - _ (ACCESS_CONTROL_REQUEST_METHOD, "Access-Control-Request-Method") \ - _ (AGE, "Age") \ - _ (ALLOW, "Allow") \ - _ (ALPN, "ALPN") \ - _ (ALT_SVC, "Alt-Svc") \ - _ (ALT_USED, "Alt-Used") \ - _ (ALTERNATES, "Alternates") \ - _ (AUTHENTICATION_CONTROL, "Authentication-Control") \ - _ (AUTHENTICATION_INFO, "Authentication-Info") \ - _ (AUTHORIZATION, "Authorization") \ - _ (CACHE_CONTROL, "Cache-Control") \ - _ (CACHE_STATUS, "Cache-Status") \ - _ (CAPSULE_PROTOCOL, "Capsule-Protocol") \ - _ (CDN_CACHE_CONTROL, "CDN-Cache-Control") \ - _ (CDN_LOOP, "CDN-Loop") \ - _ (CLIENT_CERT, "Client-Cert") \ - _ (CLIENT_CERT_CHAIN, "Client-Cert-Chain") \ - _ (CLOSE, "Close") \ - _ (CONNECTION, "Connection") \ - _ (CONTENT_DIGEST, "Content-Digest") \ - _ (CONTENT_DISPOSITION, "Content-Disposition") \ - _ (CONTENT_ENCODING, "Content-Encoding") \ - _ (CONTENT_LANGUAGE, "Content-Language") \ - _ (CONTENT_LENGTH, "Content-Length") \ - _ (CONTENT_LOCATION, "Content-Location") \ - _ (CONTENT_RANGE, "Content-Range") \ - _ (CONTENT_TYPE, "Content-Type") \ - _ (COOKIE, "Cookie") \ - _ (DATE, "Date") \ - _ (DIGEST, "Digest") \ - _ (DPOP, "DPoP") \ - _ (DPOP_NONCE, "DPoP-Nonce") \ - _ (EARLY_DATA, "Early-Data") \ - _ (ETAG, "ETag") \ - _ (EXPECT, "Expect") \ - _ (EXPIRES, "Expires") \ - _ (FORWARDED, "Forwarded") \ - _ (FROM, "From") \ - _ (HOST, "Host") \ - _ (IF_MATCH, "If-Match") \ - _ (IF_MODIFIED_SINCE, "If-Modified-Since") \ - _ (IF_NONE_MATCH, "If-None-Match") \ - _ (IF_RANGE, "If-Range") \ - _ (IF_UNMODIFIED_SINCE, "If-Unmodified-Since") \ - _ (KEEP_ALIVE, "Keep-Alive") \ - _ (LAST_MODIFIED, "Last-Modified") \ - _ (LINK, "Link") \ - _ (LOCATION, "Location") \ - _ (MAX_FORWARDS, "Max-Forwards") \ - _ (ORIGIN, "Origin") \ - _ (PRIORITY, "Priority") \ - _ (PROXY_AUTHENTICATE, "Proxy-Authenticate") \ - _ (PROXY_AUTHENTICATION_INFO, "Proxy-Authentication-Info") \ - _ (PROXY_AUTHORIZATION, "Proxy-Authorization") \ - _ (PROXY_STATUS, "Proxy-Status") \ - _ (RANGE, "Range") \ - _ (REFERER, "Referer") \ - _ (REPR_DIGEST, "Repr-Digest") \ - _ (SET_COOKIE, "Set-Cookie") \ - _ (SIGNATURE, "Signature") \ - _ (SIGNATURE_INPUT, "Signature-Input") \ - _ (STRICT_TRANSPORT_SECURITY, "Strict-Transport-Security") \ - _ (RETRY_AFTER, "Retry-After") \ - _ (SERVER, "Server") \ - _ (TE, "TE") \ - _ (TRAILER, "Trailer") \ - _ (TRANSFER_ENCODING, "Transfer-Encoding") \ - _ (UPGRADE, "Upgrade") \ - _ (USER_AGENT, "User-Agent") \ - _ (VARY, "Vary") \ - _ (VIA, "Via") \ - _ (WANT_CONTENT_DIGEST, "Want-Content-Digest") \ - _ (WANT_REPR_DIGEST, "Want-Repr-Digest") \ - _ (WWW_AUTHENTICATE, "WWW-Authenticate") + _ (ACCEPT_CHARSET, "Accept-Charset", "accept-charset", 15) \ + _ (ACCEPT_ENCODING, "Accept-Encoding", "accept-encoding", 16) \ + _ (ACCEPT_LANGUAGE, "Accept-Language", "accept-language", 17) \ + _ (ACCEPT_RANGES, "Accept-Ranges", "accept-ranges", 18) \ + _ (ACCEPT, "Accept", "accept", 19) \ + _ (ACCESS_CONTROL_ALLOW_CREDENTIALS, "Access-Control-Allow-Credentials", \ + "access-control-allow-credentials", 0) \ + _ (ACCESS_CONTROL_ALLOW_HEADERS, "Access-Control-Allow-Headers", \ + "access-control-allow-headers", 0) \ + _ (ACCESS_CONTROL_ALLOW_METHODS, "Access-Control-Allow-Methods", \ + "access-control-allow-methods", 0) \ + _ (ACCESS_CONTROL_ALLOW_ORIGIN, "Access-Control-Allow-Origin", \ + "access-control-allow-origin", 20) \ + _ (ACCESS_CONTROL_EXPOSE_HEADERS, "Access-Control-Expose-Headers", \ + "access-control-expose-headers", 0) \ + _ (ACCESS_CONTROL_MAX_AGE, "Access-Control-Max-Age", \ + "access-control-max-age", 0) \ + _ (ACCESS_CONTROL_REQUEST_HEADERS, "Access-Control-Request-Headers", \ + "access-control-request-headers", 0) \ + _ (ACCESS_CONTROL_REQUEST_METHOD, "Access-Control-Request-Method", \ + "access-control-request-method", 0) \ + _ (AGE, "Age", "age", 21) \ + _ (ALLOW, "Allow", "allow", 22) \ + _ (ALPN, "ALPN", "alpn", 0) \ + _ (ALT_SVC, "Alt-Svc", "alt-svc", 0) \ + _ (ALT_USED, "Alt-Used", "alt-used", 0) \ + _ (ALTERNATES, "Alternates", "alternates", 0) \ + _ (AUTHENTICATION_CONTROL, "Authentication-Control", \ + "authentication-control", 0) \ + _ (AUTHENTICATION_INFO, "Authentication-Info", "authentication-info", 0) \ + _ (AUTHORIZATION, "Authorization", "authorization", 23) \ + _ (CACHE_CONTROL, "Cache-Control", "cache-control", 24) \ + _ (CACHE_STATUS, "Cache-Status", "cache-status", 0) \ + _ (CAPSULE_PROTOCOL, "Capsule-Protocol", "capsule-protocol", 0) \ + _ (CDN_CACHE_CONTROL, "CDN-Cache-Control", "cdn-cache-control", 0) \ + _ (CDN_LOOP, "CDN-Loop", "cdn-loop", 0) \ + _ (CLIENT_CERT, "Client-Cert", "client-cert", 0) \ + _ (CLIENT_CERT_CHAIN, "Client-Cert-Chain", "client-cert-chain", 0) \ + _ (CLOSE, "Close", "close", 0) \ + _ (CONNECTION, "Connection", "connection", 0) \ + _ (CONTENT_DIGEST, "Content-Digest", "content-digest", 0) \ + _ (CONTENT_DISPOSITION, "Content-Disposition", "content-disposition", 25) \ + _ (CONTENT_ENCODING, "Content-Encoding", "content-encoding", 26) \ + _ (CONTENT_LANGUAGE, "Content-Language", "content-language", 27) \ + _ (CONTENT_LENGTH, "Content-Length", "content-length", 28) \ + _ (CONTENT_LOCATION, "Content-Location", "content-location", 29) \ + _ (CONTENT_RANGE, "Content-Range", "content-range", 30) \ + _ (CONTENT_TYPE, "Content-Type", "content-type", 31) \ + _ (COOKIE, "Cookie", "cookie", 32) \ + _ (DATE, "Date", "date", 33) \ + _ (DIGEST, "Digest", "digest", 0) \ + _ (DPOP, "DPoP", "dpop", 0) \ + _ (DPOP_NONCE, "DPoP-Nonce", "dpop-nonce", 0) \ + _ (EARLY_DATA, "Early-Data", "early-data", 0) \ + _ (ETAG, "ETag", "etag", 34) \ + _ (EXPECT, "Expect", "expect", 35) \ + _ (EXPIRES, "Expires", "expires", 36) \ + _ (FORWARDED, "Forwarded", "forwarded", 0) \ + _ (FROM, "From", "from", 37) \ + _ (HOST, "Host", "host", 38) \ + _ (IF_MATCH, "If-Match", "if-match", 39) \ + _ (IF_MODIFIED_SINCE, "If-Modified-Since", "if-modified-since", 40) \ + _ (IF_NONE_MATCH, "If-None-Match", "if-none-match", 41) \ + _ (IF_RANGE, "If-Range", "if-range", 42) \ + _ (IF_UNMODIFIED_SINCE, "If-Unmodified-Since", "if-unmodified-since", 43) \ + _ (KEEP_ALIVE, "Keep-Alive", "keep-alive", 0) \ + _ (LAST_MODIFIED, "Last-Modified", "last-modified", 44) \ + _ (LINK, "Link", "link", 45) \ + _ (LOCATION, "Location", "location", 46) \ + _ (MAX_FORWARDS, "Max-Forwards", "max-forwards", 47) \ + _ (ORIGIN, "Origin", "origin", 0) \ + _ (PRIORITY, "Priority", "priority", 0) \ + _ (PROXY_AUTHENTICATE, "Proxy-Authenticate", "proxy-authenticate", 48) \ + _ (PROXY_AUTHENTICATION_INFO, "Proxy-Authentication-Info", \ + "proxy-authentication-info", 0) \ + _ (PROXY_AUTHORIZATION, "Proxy-Authorization", "proxy-authorization", 49) \ + _ (PROXY_STATUS, "Proxy-Status", "proxy-status", 0) \ + _ (RANGE, "Range", "range", 50) \ + _ (REFERER, "Referer", "referer", 51) \ + _ (REFRESH, "Refresh", "refresh", 52) \ + _ (REPR_DIGEST, "Repr-Digest", "repr-digest", 0) \ + _ (RETRY_AFTER, "Retry-After", "retry-after", 53) \ + _ (SERVER, "Server", "server", 54) \ + _ (SET_COOKIE, "Set-Cookie", "set-cookie", 55) \ + _ (SIGNATURE, "Signature", "signature", 0) \ + _ (SIGNATURE_INPUT, "Signature-Input", "signature-input", 0) \ + _ (STRICT_TRANSPORT_SECURITY, "Strict-Transport-Security", \ + "strict-transport-security", 56) \ + _ (TE, "TE", "te", 0) \ + _ (TRAILER, "Trailer", "trailer", 0) \ + _ (TRANSFER_ENCODING, "Transfer-Encoding", "transfer-encoding", 57) \ + _ (UPGRADE, "Upgrade", "upgrade", 0) \ + _ (USER_AGENT, "User-Agent", "user-agent", 58) \ + _ (VARY, "Vary", "vary", 59) \ + _ (VIA, "Via", "via", 60) \ + _ (WANT_CONTENT_DIGEST, "Want-Content-Digest", "want-content-digest", 0) \ + _ (WANT_REPR_DIGEST, "Want-Repr-Digest", "want-repr-digest", 0) \ + _ (WWW_AUTHENTICATE, "WWW-Authenticate", "www-authenticate", 61) typedef enum http_header_name_ { -#define _(sym, str) HTTP_HEADER_##sym, +#define _(sym, str_canonical, str_lower, hpack_index) HTTP_HEADER_##sym, foreach_http_header_name #undef _ } http_header_name_t; @@ -399,6 +350,7 @@ typedef enum http_url_scheme_ { HTTP_URL_SCHEME_HTTP, HTTP_URL_SCHEME_HTTPS, + HTTP_URL_SCHEME_UNKNOWN, /* for internal use */ } http_url_scheme_t; typedef struct http_msg_data_ @@ -432,118 +384,6 @@ typedef struct http_msg_ http_msg_data_t data; } http_msg_t; -typedef struct http_req_ -{ - http_req_state_t state; /* state-machine state */ - - http_buffer_t tx_buf; /* message body from app to be sent */ - - /* - * for parsing of incoming message from transport - */ - u8 *rx_buf; /* this should hold at least control data */ - u32 rx_buf_offset; /* current offset during parsing */ - u32 control_data_len; /* start line + headers + empty line */ - - union - { - u64 to_recv; /* remaining bytes of body to receive from transport */ - u64 to_skip; /* remaining bytes of capsule to skip */ - }; - - u8 is_tunnel; - - /* - * parsed metadata for app - */ - union - { - http_status_code_t status_code; - http_req_method_t method; - }; - - http_target_form_t target_form; - http_url_scheme_t scheme; - u32 target_authority_offset; - u32 target_authority_len; - u32 target_path_offset; - u32 target_path_len; - u32 target_query_offset; - u32 target_query_len; - - u32 headers_offset; - u32 headers_len; - - u32 body_offset; - u64 body_len; - - http_field_line_t *headers; - uword content_len_header_index; - uword connection_header_index; - uword upgrade_header_index; - uword host_header_index; - - http_upgrade_proto_t upgrade_proto; -} http_req_t; - -typedef struct http_tc_ -{ - union - { - transport_connection_t connection; - http_conn_id_t c_http_conn_id; - }; -#define h_tc_session_handle c_http_conn_id.tc_session_handle -#define h_pa_wrk_index c_http_conn_id.parent_app_wrk_index -#define h_pa_session_handle c_http_conn_id.app_session_handle -#define h_pa_app_api_ctx c_http_conn_id.parent_app_api_ctx -#define h_hc_index connection.c_index - - http_conn_state_t state; - u32 timer_handle; - u32 timeout; - u8 pending_timer; - u8 *app_name; - u8 *host; - u8 is_server; - http_udp_tunnel_mode_t udp_tunnel_mode; - - http_req_t req; -} http_conn_t; - -typedef struct http_worker_ -{ - http_conn_t *conn_pool; -} http_worker_t; - -typedef struct http_main_ -{ - http_worker_t *wrk; - http_conn_t *listener_pool; - http_conn_t *ho_conn_pool; - u32 app_index; - - u8 **rx_bufs; - u8 **tx_bufs; - u8 **app_header_lists; - - clib_timebase_t timebase; - - u16 *sc_by_u16; - /* - * Runtime config - */ - u8 debug_level; - u8 is_init; - - /* - * Config - */ - u64 first_seg_size; - u64 add_seg_size; - u32 fifo_size; -} http_main_t; - always_inline u8 * format_http_bytes (u8 *s, va_list *va) { @@ -669,7 +509,8 @@ http_percent_decode (u8 *src, u32 len) } /** - * Remove dot segments from path (RFC3986 section 5.2.4) + * Sanitize HTTP path by squashing repeating slashes and removing + * dot segments from path (RFC3986 section 5.2.4) * * @param path Path to sanitize. * @@ -678,18 +519,18 @@ http_percent_decode (u8 *src, u32 len) * The caller is always responsible to free the returned vector. */ always_inline u8 * -http_path_remove_dot_segments (u8 *path) +http_path_sanitize (u8 *path) { u32 *segments = 0, *segments_len = 0, segment_len; u8 *new_path = 0; int i, ii; - if (!path) + if (!path || vec_len (path) == 0) return vec_new (u8, 0); segments = vec_new (u32, 1); /* first segment */ - segments[0] = 0; + segments[0] = (path[0] == '/' ? 1 : 0); /* find all segments */ for (i = 1; i < (vec_len (path) - 1); i++) { @@ -704,7 +545,8 @@ http_path_remove_dot_segments (u8 *path) for (i = 0; i < vec_len (segments_len); i++) { segment_len = segments[i + 1] - segments[i]; - if (segment_len == 2 && path[segments[i]] == '.') + /* aside from dots, skip empty segments (double slashes) */ + if ((segment_len == 2 && path[segments[i]] == '.') || segment_len == 1) segment_len = 0; else if (segment_len == 3 && path[segments[i]] == '.' && path[segments[i] + 1] == '.') @@ -736,124 +578,6 @@ http_path_remove_dot_segments (u8 *path) return new_path; } -always_inline int -_parse_field_name (u8 **pos, u8 *end, u8 **field_name_start, - u32 *field_name_len) -{ - u32 name_len = 0; - u8 *p; - - static uword tchar[4] = { - /* !#$%'*+-.0123456789 */ - 0x03ff6cba00000000, - /* ABCDEFGHIJKLMNOPQRSTUVWXYZ^_`abcdefghijklmnopqrstuvwxyz|~ */ - 0x57ffffffc7fffffe, - 0x0000000000000000, - 0x0000000000000000, - }; - - p = *pos; - - *field_name_start = p; - while (p != end) - { - if (clib_bitmap_get_no_check (tchar, *p)) - { - name_len++; - p++; - } - else if (*p == ':') - { - if (name_len == 0) - { - clib_warning ("empty field name"); - return -1; - } - *field_name_len = name_len; - p++; - *pos = p; - return 0; - } - else - { - clib_warning ("invalid character %d", *p); - return -1; - } - } - clib_warning ("field name end not found"); - return -1; -} - -always_inline int -_parse_field_value (u8 **pos, u8 *end, u8 **field_value_start, - u32 *field_value_len) -{ - u32 value_len = 0; - u8 *p; - - p = *pos; - - /* skip leading whitespace */ - while (1) - { - if (p == end) - { - clib_warning ("field value not found"); - return -1; - } - else if (*p != ' ' && *p != '\t') - { - break; - } - p++; - } - - *field_value_start = p; - while (p != end) - { - if (*p == '\r') - { - if ((end - p) < 1) - { - clib_warning ("incorrect field line end"); - return -1; - } - p++; - if (*p == '\n') - { - if (value_len == 0) - { - clib_warning ("empty field value"); - return -1; - } - p++; - *pos = p; - /* skip trailing whitespace */ - p = *field_value_start + value_len - 1; - while (*p == ' ' || *p == '\t') - { - p--; - value_len--; - } - *field_value_len = value_len; - return 0; - } - clib_warning ("CR without LF"); - return -1; - } - if (*p < ' ' && *p != '\t') - { - clib_warning ("invalid character %d", *p); - return -1; - } - p++; - value_len++; - } - - clib_warning ("field value end not found"); - return -1; -} - typedef struct { http_token_t name; @@ -873,6 +597,16 @@ typedef struct .values = 0, .value_by_name = 0, .buf = 0, .concatenated_values = 0, \ } +/** + * Case-sensitive comparison of two tokens. + * + * @param actual Pointer to the first token. + * @param actual_len Length of the first token. + * @param expected Pointer to the second token. + * @param expected_len Length of the second token. + * + * @return @c 1 if tokens are same, @c 0 otherwise. + */ always_inline u8 http_token_is (const char *actual, uword actual_len, const char *expected, uword expected_len) @@ -903,6 +637,16 @@ http_tolower_word (uword x) return (x | y); } +/** + * Case-insensitive comparison of two tokens. + * + * @param actual Pointer to the first token. + * @param actual_len Length of the first token. + * @param expected Pointer to the second token. + * @param expected_len Length of the second token. + * + * @return @c 1 if tokens are same, @c 0 otherwise. + */ always_inline u8 http_token_is_case (const char *actual, uword actual_len, const char *expected, uword expected_len) @@ -934,6 +678,16 @@ http_token_is_case (const char *actual, uword actual_len, const char *expected, return 1; } +/** + * Check if there is occurrence of token in another token. + * + * @param haystack Pointer to the token being searched. + * @param haystack_len Length of the token being searched. + * @param needle The token to search for. + * @param needle_len Length of the token to search for. + * + * @return @c 1 if in case of success, @c 0 otherwise. + */ always_inline u8 http_token_contains (const char *haystack, uword haystack_len, const char *needle, uword needle_len) @@ -1158,6 +912,13 @@ typedef struct /* Use high bit of header name length as custom header name bit. */ #define HTTP_CUSTOM_HEADER_NAME_BIT (1 << 31) +/** + * Initialize headers list context. + * + * @param ctx Headers list context. + * @param buf Buffer, which store headers list, provided by app. + * @param len Length of headers list buffer. + */ always_inline void http_init_headers_ctx (http_headers_ctx_t *ctx, u8 *buf, u32 len) { @@ -1166,30 +927,53 @@ http_init_headers_ctx (http_headers_ctx_t *ctx, u8 *buf, u32 len) ctx->buf = buf; } -always_inline void +/** + * Add header with predefined name to the headers list. + * + * @param ctx Headers list context. + * @param name Header name ID (see @ref http_header_name_t). + * @param value Header value pointer. + * @param value_len Header value length. + * + * @return @c 0 if in case of success, @c -1 otherwise. + */ +always_inline int http_add_header (http_headers_ctx_t *ctx, http_header_name_t name, const char *value, uword value_len) { http_app_header_t *header; - ASSERT ((ctx->tail_offset + sizeof (http_app_header_t) + value_len) < - ctx->len); + if ((ctx->tail_offset + sizeof (http_app_header_t) + value_len) > ctx->len) + return -1; header = (http_app_header_t *) (ctx->buf + ctx->tail_offset); header->name = (u32) name; header->value.len = (u32) value_len; clib_memcpy (header->value.token, (u8 *) value, value_len); ctx->tail_offset += sizeof (http_app_header_t) + value_len; + return 0; } -always_inline void +/** + * Add header with custom name to the headers list. + * + * @param ctx Headers list context. + * @param name Header name pointer. + * @param name_len Header name length. + * @param value Header value pointer. + * @param value_len Header value length. + * + * @return @c 0 if in case of success, @c -1 otherwise. + */ +always_inline int http_add_custom_header (http_headers_ctx_t *ctx, const char *name, uword name_len, const char *value, uword value_len) { http_custom_token_t *token; - ASSERT ((ctx->tail_offset + 2 * sizeof (http_custom_token_t) + name_len + - value_len) < ctx->len); + if ((ctx->tail_offset + 2 * sizeof (http_custom_token_t) + name_len + + value_len) > ctx->len) + return -1; /* name */ token = (http_custom_token_t *) (ctx->buf + ctx->tail_offset); @@ -1202,6 +986,18 @@ http_add_custom_header (http_headers_ctx_t *ctx, const char *name, token->len = (u32) value_len; clib_memcpy (token->token, (u8 *) value, token->len); ctx->tail_offset += sizeof (http_custom_token_t) + value_len; + return 0; +} + +/** + * Truncate the header list + * + * @param ctx Headers list context. + */ +always_inline void +http_truncate_headers_list (http_headers_ctx_t *ctx) +{ + ctx->tail_offset = 0; } typedef enum http_uri_host_type_ @@ -1491,6 +1287,15 @@ http_parse_authority (u8 *authority, u32 authority_len, return token_start == end ? 0 : -1; } +/** + * Format given authority (RFC3986 section 3.2) + * + * @param authority Authority to format. + * + * @return New vector with formated authority. + * + * The caller is always responsible to free the returned vector. + */ always_inline u8 * http_serialize_authority (http_uri_authority_t *authority) { diff --git a/src/plugins/http/http1.c b/src/plugins/http/http1.c new file mode 100644 index 00000000000..5ecc1f52300 --- /dev/null +++ b/src/plugins/http/http1.c @@ -0,0 +1,1936 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2025 Cisco Systems, Inc. + */ + +#include <vnet/session/application.h> + +#include <http/http.h> +#include <http/http_header_names.h> +#include <http/http_private.h> +#include <http/http_status_codes.h> +#include <http/http_timer.h> + +typedef struct http1_main_ +{ + http_req_t **req_pool; +} http1_main_t; + +static http1_main_t http1_main; + +const char *http1_upgrade_proto_str[] = { "", +#define _(sym, str) str, + foreach_http_upgrade_proto +#undef _ +}; + +/** + * http error boilerplate + */ +static const char *error_template = "HTTP/1.1 %s\r\n" + "Date: %U GMT\r\n" + "Connection: close\r\n" + "Content-Length: 0\r\n\r\n"; + +/** + * http response boilerplate + */ +static const char *response_template = "HTTP/1.1 %s\r\n" + "Date: %U GMT\r\n" + "Server: %v\r\n"; + +static const char *content_len_template = "Content-Length: %llu\r\n"; + +static const char *connection_upgrade_template = "Connection: upgrade\r\n" + "Upgrade: %s\r\n"; + +/** + * http request boilerplate + */ +static const char *get_request_template = "GET %s HTTP/1.1\r\n" + "Host: %v\r\n" + "User-Agent: %v\r\n"; + +static const char *post_request_template = "POST %s HTTP/1.1\r\n" + "Host: %v\r\n" + "User-Agent: %v\r\n" + "Content-Length: %llu\r\n"; + +always_inline http_req_t * +http1_conn_alloc_req (http_conn_t *hc) +{ + http1_main_t *h1m = &http1_main; + http_req_t *req; + u32 req_index; + http_req_handle_t hr_handle; + + pool_get_aligned_safe (h1m->req_pool[hc->c_thread_index], req, + CLIB_CACHE_LINE_BYTES); + clib_memset (req, 0, sizeof (*req)); + req->hr_pa_session_handle = SESSION_INVALID_HANDLE; + req_index = req - h1m->req_pool[hc->c_thread_index]; + hr_handle.version = HTTP_VERSION_1; + hr_handle.req_index = req_index; + req->hr_req_handle = hr_handle.as_u32; + req->hr_hc_index = hc->hc_hc_index; + req->c_thread_index = hc->c_thread_index; + req->c_flags |= TRANSPORT_CONNECTION_F_NO_LOOKUP; + hc->opaque = uword_to_pointer (req_index, void *); + hc->flags |= HTTP_CONN_F_HAS_REQUEST; + return req; +} + +always_inline http_req_t * +http1_req_get (u32 req_index, clib_thread_index_t thread_index) +{ + http1_main_t *h1m = &http1_main; + + return pool_elt_at_index (h1m->req_pool[thread_index], req_index); +} + +always_inline http_req_t * +http1_req_get_if_valid (u32 req_index, clib_thread_index_t thread_index) +{ + http1_main_t *h1m = &http1_main; + + if (pool_is_free_index (h1m->req_pool[thread_index], req_index)) + return 0; + return pool_elt_at_index (h1m->req_pool[thread_index], req_index); +} + +always_inline http_req_t * +http1_conn_get_req (http_conn_t *hc) +{ + http1_main_t *h1m = &http1_main; + u32 req_index; + + req_index = pointer_to_uword (hc->opaque); + return pool_elt_at_index (h1m->req_pool[hc->c_thread_index], req_index); +} + +always_inline void +http1_conn_free_req (http_conn_t *hc) +{ + http1_main_t *h1m = &http1_main; + http_req_t *req; + u32 req_index; + + req_index = pointer_to_uword (hc->opaque); + req = pool_elt_at_index (h1m->req_pool[hc->c_thread_index], req_index); + vec_free (req->headers); + vec_free (req->target); + http_buffer_free (&req->tx_buf); + if (CLIB_DEBUG) + memset (req, 0xba, sizeof (*req)); + pool_put (h1m->req_pool[hc->c_thread_index], req); + hc->flags &= ~HTTP_CONN_F_HAS_REQUEST; +} + +/* Deschedule http session and wait for deq notification if underlying ts tx + * fifo almost full */ +static_always_inline void +http1_check_and_deschedule (http_conn_t *hc, http_req_t *req, + transport_send_params_t *sp) +{ + if (http_io_ts_check_write_thresh (hc)) + { + http_req_deschedule (req, sp); + http_io_ts_add_want_deq_ntf (hc); + } +} + +static void +http1_send_error (http_conn_t *hc, http_status_code_t ec, + transport_send_params_t *sp) +{ + u8 *data; + + if (ec >= HTTP_N_STATUS) + ec = HTTP_STATUS_INTERNAL_ERROR; + + data = format (0, error_template, http_status_code_str[ec], + format_http_time_now, hc); + HTTP_DBG (3, "%v", data); + http_io_ts_write (hc, data, vec_len (data), sp); + vec_free (data); + http_io_ts_after_write (hc, 0); +} + +static int +http1_read_message (http_conn_t *hc, u8 *rx_buf) +{ + u32 max_deq; + + max_deq = http_io_ts_max_read (hc); + if (PREDICT_FALSE (max_deq == 0)) + return -1; + + vec_validate (rx_buf, max_deq - 1); + http_io_ts_read (hc, rx_buf, max_deq, 1); + + return 0; +} + +static int +http1_parse_target (http_req_t *req, u8 *rx_buf) +{ + int i; + u8 *p, *end; + + /* asterisk-form = "*" */ + if ((rx_buf[req->target_path_offset] == '*') && (req->target_path_len == 1)) + { + req->target_form = HTTP_TARGET_ASTERISK_FORM; + /* we do not support OPTIONS request */ + return -1; + } + + /* origin-form = 1*( "/" segment ) [ "?" query ] */ + if (rx_buf[req->target_path_offset] == '/') + { + /* drop leading slash */ + req->target_path_len--; + req->target_path_offset++; + req->target_form = HTTP_TARGET_ORIGIN_FORM; + http_identify_optional_query (req, rx_buf); + /* can't be CONNECT method */ + return req->method == HTTP_REQ_CONNECT ? -1 : 0; + } + + /* absolute-form = + * scheme "://" host [ ":" port ] *( "/" segment ) [ "?" query ] */ + if (req->target_path_len > 8 && + !memcmp (rx_buf + req->target_path_offset, "http", 4)) + { + req->scheme = HTTP_URL_SCHEME_HTTP; + p = rx_buf + req->target_path_offset + 4; + if (*p == 's') + { + p++; + req->scheme = HTTP_URL_SCHEME_HTTPS; + } + if (*p++ == ':') + { + expect_char ('/'); + expect_char ('/'); + req->target_form = HTTP_TARGET_ABSOLUTE_FORM; + req->target_authority_offset = p - rx_buf; + req->target_authority_len = 0; + end = rx_buf + req->target_path_offset + req->target_path_len; + while (p < end) + { + if (*p == '/') + { + p++; /* drop leading slash */ + req->target_path_offset = p - rx_buf; + req->target_path_len = end - p; + break; + } + req->target_authority_len++; + p++; + } + if (!req->target_path_len) + { + clib_warning ("zero length host"); + return -1; + } + http_identify_optional_query (req, rx_buf); + /* can't be CONNECT method */ + return req->method == HTTP_REQ_CONNECT ? -1 : 0; + } + } + + /* authority-form = host ":" port */ + for (i = req->target_path_offset; + i < (req->target_path_offset + req->target_path_len); i++) + { + if ((rx_buf[i] == ':') && (isdigit (rx_buf[i + 1]))) + { + req->target_authority_len = req->target_path_len; + req->target_path_len = 0; + req->target_authority_offset = req->target_path_offset; + req->target_path_offset = 0; + req->target_form = HTTP_TARGET_AUTHORITY_FORM; + /* "authority-form" is only used for CONNECT requests */ + return req->method == HTTP_REQ_CONNECT ? 0 : -1; + } + } + + return -1; +} + +static int +http1_parse_request_line (http_req_t *req, u8 *rx_buf, http_status_code_t *ec) +{ + int i, target_len; + u32 next_line_offset, method_offset; + + /* request-line = method SP request-target SP HTTP-version CRLF */ + i = http_v_find_index (rx_buf, 8, 0, "\r\n"); + if (i < 0) + { + clib_warning ("request line incomplete"); + *ec = HTTP_STATUS_BAD_REQUEST; + return -1; + } + HTTP_DBG (2, "request line length: %d", i); + req->control_data_len = i + 2; + next_line_offset = req->control_data_len; + + /* there should be at least one more CRLF */ + if (vec_len (rx_buf) < (next_line_offset + 2)) + { + clib_warning ("malformed message, too short"); + *ec = HTTP_STATUS_BAD_REQUEST; + return -1; + } + + /* + * RFC9112 2.2: + * In the interest of robustness, a server that is expecting to receive and + * parse a request-line SHOULD ignore at least one empty line (CRLF) + * received prior to the request-line. + */ + method_offset = rx_buf[0] == '\r' && rx_buf[1] == '\n' ? 2 : 0; + /* parse method */ + if (!memcmp (rx_buf + method_offset, "GET ", 4)) + { + HTTP_DBG (0, "GET method"); + req->method = HTTP_REQ_GET; + req->target_path_offset = method_offset + 4; + } + else if (!memcmp (rx_buf + method_offset, "POST ", 5)) + { + HTTP_DBG (0, "POST method"); + req->method = HTTP_REQ_POST; + req->target_path_offset = method_offset + 5; + } + else if (!memcmp (rx_buf + method_offset, "CONNECT ", 8)) + { + HTTP_DBG (0, "CONNECT method"); + req->method = HTTP_REQ_CONNECT; + req->upgrade_proto = HTTP_UPGRADE_PROTO_NA; + req->target_path_offset = method_offset + 8; + req->is_tunnel = 1; + } + else + { + if (rx_buf[method_offset] - 'A' <= 'Z' - 'A') + { + *ec = HTTP_STATUS_NOT_IMPLEMENTED; + return -1; + } + else + { + *ec = HTTP_STATUS_BAD_REQUEST; + return -1; + } + } + + /* find version */ + i = http_v_find_index (rx_buf, next_line_offset - 11, 11, " HTTP/"); + if (i < 0) + { + clib_warning ("HTTP version not present"); + *ec = HTTP_STATUS_BAD_REQUEST; + return -1; + } + /* verify major version */ + if (isdigit (rx_buf[i + 6])) + { + if (rx_buf[i + 6] != '1') + { + clib_warning ("HTTP major version '%c' not supported", + rx_buf[i + 6]); + *ec = HTTP_STATUS_HTTP_VERSION_NOT_SUPPORTED; + return -1; + } + } + else + { + clib_warning ("HTTP major version '%c' is not digit", rx_buf[i + 6]); + *ec = HTTP_STATUS_BAD_REQUEST; + return -1; + } + + /* parse request-target */ + HTTP_DBG (2, "http at %d", i); + target_len = i - req->target_path_offset; + HTTP_DBG (2, "target_len %d", target_len); + if (target_len < 1) + { + clib_warning ("request-target not present"); + *ec = HTTP_STATUS_BAD_REQUEST; + return -1; + } + req->target_path_len = target_len; + req->target_query_offset = 0; + req->target_query_len = 0; + req->target_authority_len = 0; + req->target_authority_offset = 0; + if (http1_parse_target (req, rx_buf)) + { + clib_warning ("invalid target"); + *ec = HTTP_STATUS_BAD_REQUEST; + return -1; + } + HTTP_DBG (2, "request-target path length: %u", req->target_path_len); + HTTP_DBG (2, "request-target path offset: %u", req->target_path_offset); + HTTP_DBG (2, "request-target query length: %u", req->target_query_len); + HTTP_DBG (2, "request-target query offset: %u", req->target_query_offset); + + /* set buffer offset to nex line start */ + req->rx_buf_offset = next_line_offset; + + return 0; +} + +static int +http1_parse_status_line (http_req_t *req, u8 *rx_buf) +{ + int i; + u32 next_line_offset; + u8 *p, *end; + u16 status_code = 0; + + i = http_v_find_index (rx_buf, 0, 0, "\r\n"); + /* status-line = HTTP-version SP status-code SP [ reason-phrase ] CRLF */ + if (i < 0) + { + clib_warning ("status line incomplete"); + return -1; + } + HTTP_DBG (2, "status line length: %d", i); + if (i < 12) + { + clib_warning ("status line too short (%d)", i); + return -1; + } + req->control_data_len = i + 2; + next_line_offset = req->control_data_len; + p = rx_buf; + end = rx_buf + i; + + /* there should be at least one more CRLF */ + if (vec_len (rx_buf) < (next_line_offset + 2)) + { + clib_warning ("malformed message, too short"); + return -1; + } + + /* parse version */ + expect_char ('H'); + expect_char ('T'); + expect_char ('T'); + expect_char ('P'); + expect_char ('/'); + expect_char ('1'); + expect_char ('.'); + if (!isdigit (*p++)) + { + clib_warning ("invalid HTTP minor version"); + return -1; + } + + /* skip space(s) */ + if (*p != ' ') + { + clib_warning ("no space after HTTP version"); + return -1; + } + do + { + p++; + if (p == end) + { + clib_warning ("no status code"); + return -1; + } + } + while (*p == ' '); + + /* parse status code */ + if ((end - p) < 3) + { + clib_warning ("not enough characters for status code"); + return -1; + } + parse_int (status_code, 100); + parse_int (status_code, 10); + parse_int (status_code, 1); + if (status_code < 100 || status_code > 599) + { + clib_warning ("invalid status code %d", status_code); + return -1; + } + req->status_code = http_sc_by_u16 (status_code); + HTTP_DBG (0, "status code: %d", status_code); + + /* set buffer offset to nex line start */ + req->rx_buf_offset = next_line_offset; + + return 0; +} + +always_inline int +http1_parse_field_name (u8 **pos, u8 *end, u8 **field_name_start, + u32 *field_name_len) +{ + u32 name_len = 0; + u8 *p; + + static uword tchar[4] = { + /* !#$%'*+-.0123456789 */ + 0x03ff6cba00000000, + /* ABCDEFGHIJKLMNOPQRSTUVWXYZ^_`abcdefghijklmnopqrstuvwxyz|~ */ + 0x57ffffffc7fffffe, + 0x0000000000000000, + 0x0000000000000000, + }; + + p = *pos; + + *field_name_start = p; + while (p != end) + { + if (clib_bitmap_get_no_check (tchar, *p)) + { + name_len++; + p++; + } + else if (*p == ':') + { + if (name_len == 0) + { + clib_warning ("empty field name"); + return -1; + } + *field_name_len = name_len; + p++; + *pos = p; + return 0; + } + else + { + clib_warning ("invalid character %d", *p); + return -1; + } + } + clib_warning ("field name end not found"); + return -1; +} + +always_inline int +http1_parse_field_value (u8 **pos, u8 *end, u8 **field_value_start, + u32 *field_value_len) +{ + u32 value_len = 0; + u8 *p; + + p = *pos; + + /* skip leading whitespace */ + while (1) + { + if (p == end) + { + clib_warning ("field value not found"); + return -1; + } + else if (*p != ' ' && *p != '\t') + { + break; + } + p++; + } + + *field_value_start = p; + while (p != end) + { + if (*p == '\r') + { + if ((end - p) < 1) + { + clib_warning ("incorrect field line end"); + return -1; + } + p++; + if (*p == '\n') + { + if (value_len == 0) + { + clib_warning ("empty field value"); + return -1; + } + p++; + *pos = p; + /* skip trailing whitespace */ + p = *field_value_start + value_len - 1; + while (*p == ' ' || *p == '\t') + { + p--; + value_len--; + } + *field_value_len = value_len; + return 0; + } + clib_warning ("CR without LF"); + return -1; + } + if (*p < ' ' && *p != '\t') + { + clib_warning ("invalid character %d", *p); + return -1; + } + p++; + value_len++; + } + + clib_warning ("field value end not found"); + return -1; +} + +static int +http1_identify_headers (http_req_t *req, u8 *rx_buf, http_status_code_t *ec) +{ + int rv; + u8 *p, *end, *name_start, *value_start; + u32 name_len, value_len; + http_field_line_t *field_line; + uword header_index; + + vec_reset_length (req->headers); + req->content_len_header_index = ~0; + req->connection_header_index = ~0; + req->upgrade_header_index = ~0; + req->host_header_index = ~0; + req->headers_offset = req->rx_buf_offset; + + /* check if we have any header */ + if ((rx_buf[req->rx_buf_offset] == '\r') && + (rx_buf[req->rx_buf_offset + 1] == '\n')) + { + /* just another CRLF -> no headers */ + HTTP_DBG (2, "no headers"); + req->headers_len = 0; + req->control_data_len += 2; + return 0; + } + + end = vec_end (rx_buf); + p = rx_buf + req->rx_buf_offset; + + while (1) + { + rv = http1_parse_field_name (&p, end, &name_start, &name_len); + if (rv != 0) + { + *ec = HTTP_STATUS_BAD_REQUEST; + return -1; + } + rv = http1_parse_field_value (&p, end, &value_start, &value_len); + if (rv != 0 || (end - p) < 2) + { + *ec = HTTP_STATUS_BAD_REQUEST; + return -1; + } + + vec_add2 (req->headers, field_line, 1); + field_line->name_offset = (name_start - rx_buf) - req->headers_offset; + field_line->name_len = name_len; + field_line->value_offset = (value_start - rx_buf) - req->headers_offset; + field_line->value_len = value_len; + header_index = field_line - req->headers; + + /* find headers that will be used later in preprocessing */ + /* names are case-insensitive (RFC9110 section 5.1) */ + if (req->content_len_header_index == ~0 && + http_token_is_case ( + (const char *) name_start, name_len, + http_header_name_token (HTTP_HEADER_CONTENT_LENGTH))) + req->content_len_header_index = header_index; + else if (req->connection_header_index == ~0 && + http_token_is_case ( + (const char *) name_start, name_len, + http_header_name_token (HTTP_HEADER_CONNECTION))) + req->connection_header_index = header_index; + else if (req->upgrade_header_index == ~0 && + http_token_is_case ( + (const char *) name_start, name_len, + http_header_name_token (HTTP_HEADER_UPGRADE))) + req->upgrade_header_index = header_index; + else if (req->host_header_index == ~0 && + http_token_is_case ((const char *) name_start, name_len, + http_header_name_token (HTTP_HEADER_HOST))) + req->host_header_index = header_index; + + /* are we done? */ + if (*p == '\r' && *(p + 1) == '\n') + break; + } + + req->headers_len = p - (rx_buf + req->headers_offset); + req->control_data_len += (req->headers_len + 2); + HTTP_DBG (2, "headers length: %u", req->headers_len); + HTTP_DBG (2, "headers offset: %u", req->headers_offset); + + return 0; +} + +static int +http1_identify_message_body (http_req_t *req, u8 *rx_buf, + http_status_code_t *ec) +{ + int rv; + + req->body_len = 0; + + if (req->headers_len == 0) + { + HTTP_DBG (2, "no header, no message-body"); + return 0; + } + if (req->is_tunnel) + { + HTTP_DBG (2, "tunnel, no message-body"); + return 0; + } + + /* TODO check for chunked transfer coding */ + + if (req->content_len_header_index == ~0) + { + HTTP_DBG (2, "Content-Length header not present, no message-body"); + return 0; + } + + rv = http_parse_content_length (req, rx_buf); + if (rv) + { + *ec = HTTP_STATUS_BAD_REQUEST; + return rv; + } + + req->body_offset = req->headers_offset + req->headers_len + 2; + HTTP_DBG (2, "body length: %llu", req->body_len); + HTTP_DBG (2, "body offset: %u", req->body_offset); + + return 0; +} + +static void +http1_check_connection_upgrade (http_req_t *req, u8 *rx_buf) +{ + http_field_line_t *connection, *upgrade; + u8 skip; + + skip = (req->method != HTTP_REQ_GET) + (req->connection_header_index == ~0) + + (req->upgrade_header_index == ~0); + if (skip) + return; + + connection = vec_elt_at_index (req->headers, req->connection_header_index); + /* connection options are case-insensitive (RFC9110 7.6.1) */ + if (http_token_is_case ( + http_field_line_value_token (connection, req, rx_buf), + http_token_lit ("upgrade"))) + { + upgrade = vec_elt_at_index (req->headers, req->upgrade_header_index); + + /* check upgrade protocol, we want to ignore something like upgrade to + * newer HTTP version, only tunnels are supported */ + if (0) + ; +#define _(sym, str) \ + else if (http_token_is_case ( \ + http_field_line_value_token (upgrade, req, rx_buf), \ + http_token_lit (str))) req->upgrade_proto = \ + HTTP_UPGRADE_PROTO_##sym; + foreach_http_upgrade_proto +#undef _ + else return; + + req->is_tunnel = 1; + req->method = HTTP_REQ_CONNECT; + } +} + +static void +http1_target_fixup (http_conn_t *hc, http_req_t *req) +{ + http_field_line_t *host; + + if (req->target_form == HTTP_TARGET_ABSOLUTE_FORM) + return; + + /* scheme fixup */ + req->scheme = http_get_transport_proto (hc) == TRANSPORT_PROTO_TLS ? + HTTP_URL_SCHEME_HTTPS : + HTTP_URL_SCHEME_HTTP; + + if (req->target_form == HTTP_TARGET_AUTHORITY_FORM || + req->connection_header_index == ~0) + return; + + /* authority fixup */ + host = vec_elt_at_index (req->headers, req->connection_header_index); + req->target_authority_offset = host->value_offset; + req->target_authority_len = host->value_len; +} + +static void +http1_write_app_headers (http_req_t *req, http_msg_t *msg, u8 **tx_buf) +{ + u8 *app_headers, *p, *end; + u32 *tmp; + + /* read app header list */ + app_headers = http_get_app_header_list (req, msg); + + /* serialize app headers to tx_buf */ + end = app_headers + msg->data.headers_len; + while (app_headers < end) + { + /* custom header name? */ + tmp = (u32 *) app_headers; + if (PREDICT_FALSE (*tmp & HTTP_CUSTOM_HEADER_NAME_BIT)) + { + http_custom_token_t *name, *value; + name = (http_custom_token_t *) app_headers; + u32 name_len = name->len & ~HTTP_CUSTOM_HEADER_NAME_BIT; + app_headers += sizeof (http_custom_token_t) + name_len; + value = (http_custom_token_t *) app_headers; + app_headers += sizeof (http_custom_token_t) + value->len; + vec_add2 (*tx_buf, p, name_len + value->len + 4); + clib_memcpy (p, name->token, name_len); + p += name_len; + *p++ = ':'; + *p++ = ' '; + clib_memcpy (p, value->token, value->len); + p += value->len; + *p++ = '\r'; + *p++ = '\n'; + } + else + { + http_app_header_t *header; + header = (http_app_header_t *) app_headers; + app_headers += sizeof (http_app_header_t) + header->value.len; + http_token_t name = { http_header_name_token (header->name) }; + vec_add2 (*tx_buf, p, name.len + header->value.len + 4); + clib_memcpy (p, name.base, name.len); + p += name.len; + *p++ = ':'; + *p++ = ' '; + clib_memcpy (p, header->value.token, header->value.len); + p += header->value.len; + *p++ = '\r'; + *p++ = '\n'; + } + } +} + +/*************************************/ +/* request state machine handlers RX */ +/*************************************/ + +static http_sm_result_t +http1_req_state_wait_transport_reply (http_conn_t *hc, http_req_t *req, + transport_send_params_t *sp) +{ + int rv; + http_msg_t msg = {}; + u32 len, max_enq, body_sent; + http_status_code_t ec; + u8 *rx_buf; + + rx_buf = http_get_rx_buf (hc); + rv = http1_read_message (hc, rx_buf); + + /* Nothing yet, wait for data or timer expire */ + if (rv) + { + HTTP_DBG (1, "no data to deq"); + return HTTP_SM_STOP; + } + + HTTP_DBG (3, "%v", rx_buf); + + if (vec_len (rx_buf) < 8) + { + clib_warning ("response buffer too short"); + goto error; + } + + rv = http1_parse_status_line (req, rx_buf); + if (rv) + goto error; + + rv = http1_identify_headers (req, rx_buf, &ec); + if (rv) + goto error; + + rv = http1_identify_message_body (req, rx_buf, &ec); + if (rv) + goto error; + + /* send at least "control data" which is necessary minimum, + * if there is some space send also portion of body */ + max_enq = http_io_as_max_write (req); + max_enq -= sizeof (msg); + if (max_enq < req->control_data_len) + { + clib_warning ("not enough room for control data in app's rx fifo"); + goto error; + } + len = clib_min (max_enq, vec_len (rx_buf)); + + msg.type = HTTP_MSG_REPLY; + msg.code = req->status_code; + msg.data.headers_offset = req->headers_offset; + msg.data.headers_len = req->headers_len; + msg.data.body_offset = req->body_offset; + msg.data.body_len = req->body_len; + msg.data.type = HTTP_MSG_DATA_INLINE; + msg.data.len = len; + msg.data.headers_ctx = pointer_to_uword (req->headers); + + svm_fifo_seg_t segs[2] = { { (u8 *) &msg, sizeof (msg) }, { rx_buf, len } }; + + http_io_as_write_segs (req, segs, 2); + + body_sent = len - req->control_data_len; + req->to_recv = req->body_len - body_sent; + if (req->to_recv == 0) + { + /* all sent, we are done */ + http_req_state_change (req, HTTP_REQ_STATE_WAIT_APP_METHOD); + } + else + { + /* stream rest of the response body */ + http_req_state_change (req, HTTP_REQ_STATE_TRANSPORT_IO_MORE_DATA); + } + + http_io_ts_drain (hc, len); + http_io_ts_after_read (hc, 1); + http_app_worker_rx_notify (req); + return HTTP_SM_STOP; + +error: + http_io_ts_drain_all (hc); + http_io_ts_after_read (hc, 1); + session_transport_closing_notify (&req->connection); + session_transport_closed_notify (&req->connection); + http_disconnect_transport (hc); + return HTTP_SM_ERROR; +} + +static http_sm_result_t +http1_req_state_wait_transport_method (http_conn_t *hc, http_req_t *req, + transport_send_params_t *sp) +{ + http_status_code_t ec; + http_msg_t msg; + int rv; + u32 len, max_enq, body_sent; + u64 max_deq; + u8 *rx_buf; + + rx_buf = http_get_rx_buf (hc); + rv = http1_read_message (hc, rx_buf); + + /* Nothing yet, wait for data or timer expire */ + if (rv) + return HTTP_SM_STOP; + + HTTP_DBG (3, "%v", rx_buf); + + if (vec_len (rx_buf) < 8) + { + ec = HTTP_STATUS_BAD_REQUEST; + goto error; + } + + rv = http1_parse_request_line (req, rx_buf, &ec); + if (rv) + goto error; + + rv = http1_identify_headers (req, rx_buf, &ec); + if (rv) + goto error; + + http1_target_fixup (hc, req); + http1_check_connection_upgrade (req, rx_buf); + + rv = http1_identify_message_body (req, rx_buf, &ec); + if (rv) + goto error; + + /* send at least "control data" which is necessary minimum, + * if there is some space send also portion of body */ + max_enq = http_io_as_max_write (req); + max_enq -= sizeof (msg); + if (max_enq < req->control_data_len) + { + clib_warning ("not enough room for control data in app's rx fifo"); + ec = HTTP_STATUS_INTERNAL_ERROR; + goto error; + } + /* do not dequeue more than one HTTP request, we do not support pipelining */ + max_deq = clib_min (req->control_data_len + req->body_len, vec_len (rx_buf)); + len = clib_min (max_enq, max_deq); + + msg.type = HTTP_MSG_REQUEST; + msg.method_type = req->method; + msg.data.type = HTTP_MSG_DATA_INLINE; + msg.data.len = len; + msg.data.scheme = req->scheme; + msg.data.target_authority_offset = req->target_authority_offset; + msg.data.target_authority_len = req->target_authority_len; + msg.data.target_path_offset = req->target_path_offset; + msg.data.target_path_len = req->target_path_len; + msg.data.target_query_offset = req->target_query_offset; + msg.data.target_query_len = req->target_query_len; + msg.data.headers_offset = req->headers_offset; + msg.data.headers_len = req->headers_len; + msg.data.body_offset = req->body_offset; + msg.data.body_len = req->body_len; + msg.data.headers_ctx = pointer_to_uword (req->headers); + msg.data.upgrade_proto = req->upgrade_proto; + + svm_fifo_seg_t segs[2] = { { (u8 *) &msg, sizeof (msg) }, { rx_buf, len } }; + + http_io_as_write_segs (req, segs, 2); + + body_sent = len - req->control_data_len; + req->to_recv = req->body_len - body_sent; + if (req->to_recv == 0) + { + /* drop everything, we do not support pipelining */ + http_io_ts_drain_all (hc); + /* all sent, we are done */ + http_req_state_change (req, HTTP_REQ_STATE_WAIT_APP_REPLY); + } + else + { + http_io_ts_drain (hc, len); + /* stream rest of the response body */ + http_req_state_change (req, HTTP_REQ_STATE_TRANSPORT_IO_MORE_DATA); + } + + http_app_worker_rx_notify (req); + http_io_ts_after_read (hc, 1); + + return HTTP_SM_STOP; + +error: + http_io_ts_drain_all (hc); + http_io_ts_after_read (hc, 1); + http1_send_error (hc, ec, 0); + session_transport_closing_notify (&req->connection); + http_disconnect_transport (hc); + + return HTTP_SM_ERROR; +} + +static http_sm_result_t +http1_req_state_transport_io_more_data (http_conn_t *hc, http_req_t *req, + transport_send_params_t *sp) +{ + u32 max_len, max_deq, max_enq, n_segs = 2; + svm_fifo_seg_t segs[n_segs]; + int n_written; + + max_deq = http_io_ts_max_read (hc); + if (max_deq == 0) + { + HTTP_DBG (1, "no data to deq"); + return HTTP_SM_STOP; + } + + max_enq = http_io_as_max_write (req); + if (max_enq == 0) + { + HTTP_DBG (1, "app's rx fifo full"); + http_io_as_add_want_deq_ntf (req); + return HTTP_SM_STOP; + } + + max_len = clib_min (max_enq, max_deq); + http_io_ts_read_segs (hc, segs, &n_segs, max_len); + + n_written = http_io_as_write_segs (req, segs, n_segs); + + if (n_written > req->to_recv) + { + clib_warning ("http protocol error: received more data than expected"); + session_transport_closing_notify (&req->connection); + http_disconnect_transport (hc); + http_req_state_change (req, HTTP_REQ_STATE_WAIT_APP_METHOD); + return HTTP_SM_ERROR; + } + req->to_recv -= n_written; + http_io_ts_drain (hc, n_written); + HTTP_DBG (1, "drained %d from ts; remains %lu", n_written, req->to_recv); + + /* Finished transaction: + * server back to HTTP_REQ_STATE_WAIT_APP_REPLY + * client to HTTP_REQ_STATE_WAIT_APP_METHOD */ + if (req->to_recv == 0) + http_req_state_change (req, (hc->flags & HTTP_CONN_F_IS_SERVER) ? + HTTP_REQ_STATE_WAIT_APP_REPLY : + HTTP_REQ_STATE_WAIT_APP_METHOD); + + http_app_worker_rx_notify (req); + + http_io_ts_after_read (hc, 0); + + return HTTP_SM_STOP; +} + +static http_sm_result_t +http1_req_state_tunnel_rx (http_conn_t *hc, http_req_t *req, + transport_send_params_t *sp) +{ + u32 max_deq, max_enq, max_read, n_segs = 2; + svm_fifo_seg_t segs[n_segs]; + int n_written = 0; + + HTTP_DBG (1, "tunnel received data from client"); + + max_deq = http_io_ts_max_read (hc); + if (PREDICT_FALSE (max_deq == 0)) + { + HTTP_DBG (1, "max_deq == 0"); + return HTTP_SM_STOP; + } + max_enq = http_io_as_max_write (req); + if (max_enq == 0) + { + HTTP_DBG (1, "app's rx fifo full"); + http_io_as_add_want_deq_ntf (req); + return HTTP_SM_STOP; + } + max_read = clib_min (max_enq, max_deq); + http_io_ts_read_segs (hc, segs, &n_segs, max_read); + n_written = http_io_as_write_segs (req, segs, n_segs); + http_io_ts_drain (hc, n_written); + HTTP_DBG (1, "transfered %u bytes", n_written); + http_app_worker_rx_notify (req); + http_io_ts_after_read (hc, 0); + + return HTTP_SM_STOP; +} + +static http_sm_result_t +http1_req_state_udp_tunnel_rx (http_conn_t *hc, http_req_t *req, + transport_send_params_t *sp) +{ + u32 to_deq, capsule_size, dgram_size, n_read, n_written = 0; + int rv; + u8 payload_offset = 0; + u64 payload_len = 0; + session_dgram_hdr_t hdr; + u8 *buf = 0; + + HTTP_DBG (1, "udp tunnel received data from client"); + + buf = http_get_rx_buf (hc); + to_deq = http_io_ts_max_read (hc); + + while (to_deq > 0) + { + /* some bytes remaining to skip? */ + if (PREDICT_FALSE (req->to_skip)) + { + if (req->to_skip >= to_deq) + { + http_io_ts_drain (hc, to_deq); + req->to_skip -= to_deq; + goto done; + } + else + { + http_io_ts_drain (hc, req->to_skip); + req->to_skip = 0; + } + } + n_read = http_io_ts_read (hc, buf, HTTP_CAPSULE_HEADER_MAX_SIZE, 1); + rv = http_decap_udp_payload_datagram (buf, n_read, &payload_offset, + &payload_len); + HTTP_DBG (1, "rv=%d, payload_offset=%u, payload_len=%llu", rv, + payload_offset, payload_len); + if (PREDICT_FALSE (rv != 0)) + { + if (rv < 0) + { + /* capsule datagram is invalid (session need to be aborted) */ + http_io_ts_drain_all (hc); + session_transport_closing_notify (&req->connection); + session_transport_closed_notify (&req->connection); + http_disconnect_transport (hc); + return HTTP_SM_STOP; + } + else + { + /* unknown capsule should be skipped */ + if (payload_len <= to_deq) + { + http_io_ts_drain (hc, payload_len); + to_deq -= payload_len; + continue; + } + else + { + http_io_ts_drain (hc, to_deq); + req->to_skip = payload_len - to_deq; + goto done; + } + } + } + capsule_size = payload_offset + payload_len; + /* check if we have the full capsule */ + if (PREDICT_FALSE (to_deq < capsule_size)) + { + HTTP_DBG (1, "capsule not complete"); + goto done; + } + + dgram_size = sizeof (hdr) + payload_len; + if (http_io_as_max_write (req) < dgram_size) + { + HTTP_DBG (1, "app's rx fifo full"); + http_io_as_add_want_deq_ntf (req); + goto done; + } + + http_io_ts_drain (hc, payload_offset); + + /* read capsule payload */ + http_io_ts_read (hc, buf, payload_len, 0); + + hdr.data_length = payload_len; + hdr.data_offset = 0; + + /* send datagram header and payload */ + svm_fifo_seg_t segs[2] = { { (u8 *) &hdr, sizeof (hdr) }, + { buf, payload_len } }; + http_io_as_write_segs (req, segs, 2); + + n_written += dgram_size; + to_deq -= capsule_size; + } + +done: + HTTP_DBG (1, "written %lu bytes", n_written); + + if (n_written) + http_app_worker_rx_notify (req); + + http_io_ts_after_read (hc, 0); + + return HTTP_SM_STOP; +} + +/*************************************/ +/* request state machine handlers TX */ +/*************************************/ + +static http_sm_result_t +http1_req_state_wait_app_reply (http_conn_t *hc, http_req_t *req, + transport_send_params_t *sp) +{ + u8 *response; + u32 max_enq; + http_status_code_t sc; + http_msg_t msg; + http_sm_result_t sm_result = HTTP_SM_ERROR; + http_req_state_t next_state = HTTP_REQ_STATE_WAIT_TRANSPORT_METHOD; + + http_get_app_msg (req, &msg); + + if (msg.data.type > HTTP_MSG_DATA_PTR) + { + clib_warning ("no data"); + sc = HTTP_STATUS_INTERNAL_ERROR; + goto error; + } + + if (msg.type != HTTP_MSG_REPLY) + { + clib_warning ("unexpected message type %d", msg.type); + sc = HTTP_STATUS_INTERNAL_ERROR; + goto error; + } + + if (msg.code >= HTTP_N_STATUS) + { + clib_warning ("unsupported status code: %d", msg.code); + return HTTP_SM_ERROR; + } + + response = http_get_tx_buf (hc); + /* + * Add "protocol layer" headers: + * - current time + * - server name + * - data length + */ + response = + format (response, response_template, http_status_code_str[msg.code], + /* Date */ + format_http_time_now, hc, + /* Server */ + hc->app_name); + + /* RFC9110 8.6: A server MUST NOT send Content-Length header field in a + * 2xx (Successful) response to CONNECT or with a status code of 101 + * (Switching Protocols). */ + if (req->is_tunnel && (http_status_code_str[msg.code][0] == '2' || + msg.code == HTTP_STATUS_SWITCHING_PROTOCOLS)) + { + ASSERT (msg.data.body_len == 0); + next_state = HTTP_REQ_STATE_TUNNEL; + if (req->upgrade_proto > HTTP_UPGRADE_PROTO_NA) + { + response = format (response, connection_upgrade_template, + http1_upgrade_proto_str[req->upgrade_proto]); + if (req->upgrade_proto == HTTP_UPGRADE_PROTO_CONNECT_UDP && + hc->udp_tunnel_mode == HTTP_UDP_TUNNEL_DGRAM) + next_state = HTTP_REQ_STATE_UDP_TUNNEL; + } + /* cleanup some stuff we don't need anymore in tunnel mode */ + vec_free (req->headers); + http_buffer_free (&req->tx_buf); + req->to_skip = 0; + } + else + response = format (response, content_len_template, msg.data.body_len); + + /* Add headers from app (if any) */ + if (msg.data.headers_len) + { + HTTP_DBG (0, "got headers from app, len %d", msg.data.headers_len); + http1_write_app_headers (req, &msg, &response); + } + /* Add empty line after headers */ + response = format (response, "\r\n"); + HTTP_DBG (3, "%v", response); + + max_enq = http_io_ts_max_write (hc, sp); + if (max_enq < vec_len (response)) + { + clib_warning ("sending status-line and headers failed!"); + sc = HTTP_STATUS_INTERNAL_ERROR; + goto error; + } + http_io_ts_write (hc, response, vec_len (response), sp); + + if (msg.data.body_len) + { + /* Start sending the actual data */ + http_req_tx_buffer_init (req, &msg); + next_state = HTTP_REQ_STATE_APP_IO_MORE_DATA; + sm_result = HTTP_SM_CONTINUE; + } + else + { + /* No response body, we are done */ + sm_result = HTTP_SM_STOP; + } + + http_req_state_change (req, next_state); + + http_io_ts_after_write (hc, 0); + return sm_result; + +error: + http1_send_error (hc, sc, sp); + session_transport_closing_notify (&req->connection); + http_disconnect_transport (hc); + return HTTP_SM_STOP; +} + +static http_sm_result_t +http1_req_state_wait_app_method (http_conn_t *hc, http_req_t *req, + transport_send_params_t *sp) +{ + http_msg_t msg; + u8 *request = 0, *target; + u32 max_enq; + http_sm_result_t sm_result = HTTP_SM_ERROR; + http_req_state_t next_state; + + http_get_app_msg (req, &msg); + + if (msg.data.type > HTTP_MSG_DATA_PTR) + { + clib_warning ("no data"); + goto error; + } + + if (msg.type != HTTP_MSG_REQUEST) + { + clib_warning ("unexpected message type %d", msg.type); + goto error; + } + + /* read request target */ + target = http_get_app_target (req, &msg); + + request = http_get_tx_buf (hc); + /* currently we support only GET and POST method */ + if (msg.method_type == HTTP_REQ_GET) + { + if (msg.data.body_len) + { + clib_warning ("GET request shouldn't include data"); + goto error; + } + /* + * Add "protocol layer" headers: + * - host + * - user agent + */ + request = format (request, get_request_template, + /* target */ + target, + /* Host */ + hc->host, + /* User-Agent */ + hc->app_name); + + next_state = HTTP_REQ_STATE_WAIT_TRANSPORT_REPLY; + sm_result = HTTP_SM_STOP; + } + else if (msg.method_type == HTTP_REQ_POST) + { + if (!msg.data.body_len) + { + clib_warning ("POST request should include data"); + goto error; + } + /* + * Add "protocol layer" headers: + * - host + * - user agent + * - content length + */ + request = format (request, post_request_template, + /* target */ + target, + /* Host */ + hc->host, + /* User-Agent */ + hc->app_name, + /* Content-Length */ + msg.data.body_len); + + http_req_tx_buffer_init (req, &msg); + + next_state = HTTP_REQ_STATE_APP_IO_MORE_DATA; + sm_result = HTTP_SM_CONTINUE; + } + else + { + clib_warning ("unsupported method %d", msg.method_type); + goto error; + } + + /* Add headers from app (if any) */ + if (msg.data.headers_len) + { + HTTP_DBG (0, "got headers from app, len %d", msg.data.headers_len); + http1_write_app_headers (req, &msg, &request); + } + /* Add empty line after headers */ + request = format (request, "\r\n"); + HTTP_DBG (3, "%v", request); + + max_enq = http_io_ts_max_write (hc, sp); + if (max_enq < vec_len (request)) + { + clib_warning ("sending request-line and headers failed!"); + sm_result = HTTP_SM_ERROR; + goto error; + } + http_io_ts_write (hc, request, vec_len (request), sp); + + http_req_state_change (req, next_state); + + http_io_ts_after_write (hc, 0); + goto done; + +error: + http_io_as_drain_all (req); + session_transport_closing_notify (&req->connection); + session_transport_closed_notify (&req->connection); + http_disconnect_transport (hc); + +done: + return sm_result; +} + +static http_sm_result_t +http1_req_state_app_io_more_data (http_conn_t *hc, http_req_t *req, + transport_send_params_t *sp) +{ + u32 max_write, n_read, n_segs, n_written = 0; + http_buffer_t *hb = &req->tx_buf; + svm_fifo_seg_t *seg; + u8 finished = 0; + + ASSERT (http_buffer_bytes_left (hb) > 0); + max_write = http_io_ts_max_write (hc, sp); + if (max_write == 0) + { + HTTP_DBG (1, "ts tx fifo full"); + goto check_fifo; + } + + n_read = http_buffer_get_segs (hb, max_write, &seg, &n_segs); + if (n_read == 0) + { + HTTP_DBG (1, "no data to deq"); + goto check_fifo; + } + + n_written = http_io_ts_write_segs (hc, seg, n_segs, sp); + + http_buffer_drain (hb, n_written); + finished = http_buffer_bytes_left (hb) == 0; + + if (finished) + { + /* Finished transaction: + * server back to HTTP_REQ_STATE_WAIT_TRANSPORT_METHOD + * client to HTTP_REQ_STATE_WAIT_TRANSPORT_REPLY */ + http_req_state_change (req, (hc->flags & HTTP_CONN_F_IS_SERVER) ? + HTTP_REQ_STATE_WAIT_TRANSPORT_METHOD : + HTTP_REQ_STATE_WAIT_TRANSPORT_REPLY); + http_buffer_free (hb); + } + http_io_ts_after_write (hc, finished); + +check_fifo: + http1_check_and_deschedule (hc, req, sp); + return HTTP_SM_STOP; +} + +static http_sm_result_t +http1_req_state_tunnel_tx (http_conn_t *hc, http_req_t *req, + transport_send_params_t *sp) +{ + u32 max_deq, max_enq, max_read, n_segs = 2; + svm_fifo_seg_t segs[n_segs]; + int n_written = 0; + + HTTP_DBG (1, "tunnel received data from target"); + + max_deq = http_io_as_max_read (req); + if (PREDICT_FALSE (max_deq == 0)) + { + HTTP_DBG (1, "max_deq == 0"); + goto check_fifo; + } + max_enq = http_io_ts_max_write (hc, sp); + if (max_enq == 0) + { + HTTP_DBG (1, "ts tx fifo full"); + goto check_fifo; + } + max_read = clib_min (max_enq, max_deq); + http_io_as_read_segs (req, segs, &n_segs, max_read); + n_written = http_io_ts_write_segs (hc, segs, n_segs, sp); + http_io_as_drain (req, n_written); + http_io_ts_after_write (hc, 0); + +check_fifo: + http1_check_and_deschedule (hc, req, sp); + return HTTP_SM_STOP; +} + +static http_sm_result_t +http1_req_state_udp_tunnel_tx (http_conn_t *hc, http_req_t *req, + transport_send_params_t *sp) +{ + u32 to_deq, capsule_size, dgram_size; + u8 written = 0; + session_dgram_hdr_t hdr; + u8 *buf; + u8 *payload; + + HTTP_DBG (1, "udp tunnel received data from target"); + + buf = http_get_tx_buf (hc); + to_deq = http_io_as_max_read (req); + + while (to_deq > 0) + { + /* read datagram header */ + http_io_as_read (req, (u8 *) &hdr, sizeof (hdr), 1); + ASSERT (hdr.data_length <= HTTP_UDP_PAYLOAD_MAX_LEN); + dgram_size = hdr.data_length + SESSION_CONN_HDR_LEN; + ASSERT (to_deq >= dgram_size); + + if (http_io_ts_max_write (hc, sp) < + (hdr.data_length + HTTP_UDP_PROXY_DATAGRAM_CAPSULE_OVERHEAD)) + { + HTTP_DBG (1, "ts tx fifo full"); + goto done; + } + + /* create capsule header */ + payload = http_encap_udp_payload_datagram (buf, hdr.data_length); + capsule_size = (payload - buf) + hdr.data_length; + /* read payload */ + http_io_as_read (req, payload, hdr.data_length, 1); + http_io_as_drain (req, dgram_size); + /* send capsule */ + http_io_ts_write (hc, buf, capsule_size, sp); + + written = 1; + to_deq -= dgram_size; + } + +done: + if (written) + http_io_ts_after_write (hc, 0); + http1_check_and_deschedule (hc, req, sp); + return HTTP_SM_STOP; +} + +/*************************/ +/* request state machine */ +/*************************/ + +static http_sm_handler tx_state_funcs[HTTP_REQ_N_STATES] = { + 0, /* idle */ + http1_req_state_wait_app_method, + 0, /* wait transport reply */ + 0, /* transport io more data */ + 0, /* wait transport method */ + http1_req_state_wait_app_reply, + http1_req_state_app_io_more_data, + http1_req_state_tunnel_tx, + http1_req_state_udp_tunnel_tx, +}; + +static http_sm_handler rx_state_funcs[HTTP_REQ_N_STATES] = { + 0, /* idle */ + 0, /* wait app method */ + http1_req_state_wait_transport_reply, + http1_req_state_transport_io_more_data, + http1_req_state_wait_transport_method, + 0, /* wait app reply */ + 0, /* app io more data */ + http1_req_state_tunnel_rx, + http1_req_state_udp_tunnel_rx, +}; + +static_always_inline int +http1_req_state_is_tx_valid (http_req_t *req) +{ + return tx_state_funcs[req->state] ? 1 : 0; +} + +static_always_inline int +http1_req_state_is_rx_valid (http_req_t *req) +{ + return rx_state_funcs[req->state] ? 1 : 0; +} + +static_always_inline void +http1_req_run_state_machine (http_conn_t *hc, http_req_t *req, + transport_send_params_t *sp, u8 is_tx) +{ + http_sm_result_t res; + + do + { + if (is_tx) + res = tx_state_funcs[req->state](hc, req, sp); + else + res = rx_state_funcs[req->state](hc, req, 0); + if (res == HTTP_SM_ERROR) + { + HTTP_DBG (1, "error in state machine %d", res); + return; + } + } + while (res == HTTP_SM_CONTINUE); + + /* Reset the session expiration timer */ + http_conn_timer_update (hc); +} + +/*****************/ +/* http core VFT */ +/*****************/ + +static u32 +http1_hc_index_get_by_req_index (u32 req_index, + clib_thread_index_t thread_index) +{ + http_req_t *req; + + req = http1_req_get (req_index, thread_index); + return req->hr_hc_index; +} + +static transport_connection_t * +http1_req_get_connection (u32 req_index, clib_thread_index_t thread_index) +{ + http_req_t *req; + req = http1_req_get (req_index, thread_index); + return &req->connection; +} + +static u8 * +format_http1_req (u8 *s, va_list *args) +{ + http_req_t *req = va_arg (*args, http_req_t *); + http_conn_t *hc = va_arg (*args, http_conn_t *); + session_t *ts; + + ts = session_get_from_handle (hc->hc_tc_session_handle); + s = format (s, "[%d:%d][H1] app_wrk %u hc_index %u ts %d:%d", + req->c_thread_index, req->c_s_index, req->hr_pa_wrk_index, + req->hr_hc_index, ts->thread_index, ts->session_index); + + return s; +} + +static u8 * +http1_format_req (u8 *s, va_list *args) +{ + u32 req_index = va_arg (*args, u32); + clib_thread_index_t thread_index = va_arg (*args, u32); + http_conn_t *hc = va_arg (*args, http_conn_t *); + u32 verbose = va_arg (*args, u32); + http_req_t *req; + + req = http1_req_get (req_index, thread_index); + + s = format (s, "%-" SESSION_CLI_ID_LEN "U", format_http1_req, req, hc); + if (verbose) + { + s = + format (s, "%-" SESSION_CLI_STATE_LEN "U", format_http_conn_state, hc); + if (verbose > 1) + s = format (s, "\n"); + } + + return s; +} + +static void +http1_app_tx_callback (http_conn_t *hc, u32 req_index, + transport_send_params_t *sp) +{ + http_req_t *req; + + req = http1_req_get (req_index, hc->c_thread_index); + + if (!http1_req_state_is_tx_valid (req)) + { + /* Sometimes the server apps can send the response earlier + * than expected (e.g when rejecting a bad request)*/ + if (req->state == HTTP_REQ_STATE_TRANSPORT_IO_MORE_DATA && + (hc->flags & HTTP_CONN_F_IS_SERVER)) + { + http_io_ts_drain_all (hc); + http_req_state_change (req, HTTP_REQ_STATE_WAIT_APP_REPLY); + } + else + { + clib_warning ("hc [%u]%x invalid tx state: http req state " + "'%U', session state '%U'", + hc->c_thread_index, hc->hc_hc_index, + format_http_req_state, req->state, + format_http_conn_state, hc); + http_io_as_drain_all (req); + return; + } + } + + HTTP_DBG (1, "run state machine"); + http1_req_run_state_machine (hc, req, sp, 1); +} + +static void +http1_app_rx_evt_callback (http_conn_t *hc, u32 req_index, + clib_thread_index_t thread_index) +{ + http_req_t *req; + + req = http1_req_get (req_index, thread_index); + + if (req->state == HTTP_REQ_STATE_TUNNEL) + http1_req_state_tunnel_rx (hc, req, 0); +} + +static void +http1_app_close_callback (http_conn_t *hc, u32 req_index, + clib_thread_index_t thread_index) +{ + http_req_t *req; + + req = http1_req_get_if_valid (req_index, thread_index); + if (!req) + { + HTTP_DBG (1, "req already deleted"); + return; + } + /* Nothing more to send, confirm close */ + if (!http_io_as_max_read (req) || hc->state == HTTP_CONN_STATE_CLOSED) + { + HTTP_DBG (1, "nothing more to send, confirm close"); + session_transport_closed_notify (&req->connection); + http_disconnect_transport (hc); + } + else + { + /* Wait for all data to be written to ts */ + hc->state = HTTP_CONN_STATE_APP_CLOSED; + } +} + +static void +http1_app_reset_callback (http_conn_t *hc, u32 req_index, + clib_thread_index_t thread_index) +{ + http_req_t *req; + req = http1_req_get (req_index, thread_index); + session_transport_closed_notify (&req->connection); + http_disconnect_transport (hc); +} + +static int +http1_transport_connected_callback (http_conn_t *hc) +{ + http_req_t *req; + + ASSERT (hc->flags & HTTP_CONN_F_NO_APP_SESSION); + + req = http1_conn_alloc_req (hc); + http_req_state_change (req, HTTP_REQ_STATE_WAIT_APP_METHOD); + return http_conn_established (hc, req); +} + +static void +http1_transport_rx_callback (http_conn_t *hc) +{ + http_req_t *req; + + if (!(hc->flags & HTTP_CONN_F_HAS_REQUEST)) + { + ASSERT (hc->flags & HTTP_CONN_F_IS_SERVER); + /* first request - create request ctx and notify app about new conn */ + req = http1_conn_alloc_req (hc); + http_conn_accept_request (hc, req); + http_req_state_change (req, HTTP_REQ_STATE_WAIT_TRANSPORT_METHOD); + hc->flags &= ~HTTP_CONN_F_NO_APP_SESSION; + } + else + req = http1_conn_get_req (hc); + + if (!http1_req_state_is_rx_valid (req)) + { + if (http_io_ts_max_read (hc)) + clib_warning ("hc [%u]%x invalid rx state: http req state " + "'%U', session state '%U'", + hc->c_thread_index, hc->hc_hc_index, + format_http_req_state, req->state, + format_http_conn_state, hc); + http_io_ts_drain_all (hc); + return; + } + + HTTP_DBG (1, "run state machine"); + http1_req_run_state_machine (hc, req, 0, 0); +} + +static void +http1_transport_close_callback (http_conn_t *hc) +{ + if (!(hc->flags & HTTP_CONN_F_HAS_REQUEST)) + return; + /* Nothing more to rx, propagate to app */ + if (!http_io_ts_max_read (hc)) + { + http_req_t *req = http1_conn_get_req (hc); + session_transport_closing_notify (&req->connection); + } +} + +static void +http1_transport_reset_callback (http_conn_t *hc) +{ + if (!(hc->flags & HTTP_CONN_F_HAS_REQUEST)) + return; + http_req_t *req = http1_conn_get_req (hc); + session_transport_reset_notify (&req->connection); +} + +static void +http1_transport_conn_reschedule_callback (http_conn_t *hc) +{ + ASSERT (hc->flags & HTTP_CONN_F_HAS_REQUEST); + http_req_t *req = http1_conn_get_req (hc); + transport_connection_reschedule (&req->connection); +} + +static void +http1_conn_cleanup_callback (http_conn_t *hc) +{ + http_req_t *req; + if (!(hc->flags & HTTP_CONN_F_HAS_REQUEST)) + return; + + req = http1_conn_get_req (hc); + session_transport_delete_notify (&req->connection); + http1_conn_free_req (hc); +} + +static void +http1_enable_callback (void) +{ + http1_main_t *h1m = &http1_main; + vlib_thread_main_t *vtm = vlib_get_thread_main (); + u32 num_threads; + + num_threads = 1 /* main thread */ + vtm->n_threads; + + vec_validate (h1m->req_pool, num_threads - 1); +} + +const static http_engine_vft_t http1_engine = { + .name = "http1", + .hc_index_get_by_req_index = http1_hc_index_get_by_req_index, + .req_get_connection = http1_req_get_connection, + .format_req = http1_format_req, + .app_tx_callback = http1_app_tx_callback, + .app_rx_evt_callback = http1_app_rx_evt_callback, + .app_close_callback = http1_app_close_callback, + .app_reset_callback = http1_app_reset_callback, + .transport_connected_callback = http1_transport_connected_callback, + .transport_rx_callback = http1_transport_rx_callback, + .transport_close_callback = http1_transport_close_callback, + .transport_conn_reschedule_callback = + http1_transport_conn_reschedule_callback, + .transport_reset_callback = http1_transport_reset_callback, + .conn_cleanup_callback = http1_conn_cleanup_callback, + .enable_callback = http1_enable_callback, +}; + +static clib_error_t * +http1_init (vlib_main_t *vm) +{ + http_register_engine (&http1_engine, HTTP_VERSION_1); + return 0; +} + +VLIB_INIT_FUNCTION (http1_init) = { + .runs_after = VLIB_INITS ("http_transport_init"), +}; diff --git a/src/plugins/http/http2/frame.c b/src/plugins/http/http2/frame.c new file mode 100644 index 00000000000..c9c1931f02a --- /dev/null +++ b/src/plugins/http/http2/frame.c @@ -0,0 +1,339 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2025 Cisco Systems, Inc. + */ + +#include <vppinfra/string.h> +#include <http/http2/frame.h> + +#define MAX_U24 0xFFFFFF + +static_always_inline u8 * +http2_decode_u24 (u8 *src, u32 *value) +{ + *value = 0; + *value = (u32) (src[0] << 16) | (u32) (src[1] << 8) | (u32) src[2]; + return src + 3; +} + +static_always_inline u8 * +http2_encode_u24 (u8 *dst, u32 value) +{ + ASSERT (value <= MAX_U24); + *dst++ = (value >> 16) & 0xFF; + *dst++ = (value >> 8) & 0xFF; + *dst++ = value & 0xFF; + return dst; +} + +/* + * RFC9113 section 4.1 + * + * HTTP Frame { + * Length (24), + * Type (8), + * Flags (8), + * Reserved (1), + * Stream Identifier (31), + * Frame Payload (..), + * } + */ + +__clib_export void +http2_frame_header_read (u8 *src, http2_frame_header_t *fh) +{ + u32 *stream_id; + src = http2_decode_u24 (src, &fh->length); + fh->type = *src++; + fh->flags = *src++; + stream_id = (u32 *) src; + fh->stream_id = clib_net_to_host_u32 (*stream_id) & 0x7FFFFFFF; +} + +static void +http2_frame_header_write (http2_frame_header_t *fh, u8 *dst) +{ + u32 stream_id; + + dst = http2_encode_u24 (dst, fh->length); + *dst++ = fh->type; + *dst++ = fh->flags; + stream_id = clib_host_to_net_u32 (fh->stream_id); + clib_memcpy_fast (dst, &stream_id, sizeof (stream_id)); +} + +__clib_export http2_error_t +http2_frame_read_settings (http2_conn_settings_t *settings, u8 *payload, + u32 payload_len) +{ + http2_settings_entry_t *entry; + u32 value; + + while (payload_len >= sizeof (*entry)) + { + entry = (http2_settings_entry_t *) payload; + switch (clib_net_to_host_u16 (entry->identifier)) + { +#define _(v, label, member, min, max, default_value, err_code) \ + case HTTP2_SETTINGS_##label: \ + value = clib_net_to_host_u32 (entry->value); \ + if (!(value >= min && value <= max)) \ + return err_code; \ + settings->member = value; \ + break; + foreach_http2_settings +#undef _ + /* ignore unknown or unsupported identifier */ + default : break; + } + payload_len -= sizeof (*entry); + payload += sizeof (*entry); + } + + if (payload_len != 0) + return HTTP2_ERROR_FRAME_SIZE_ERROR; + + return HTTP2_ERROR_NO_ERROR; +} + +__clib_export void +http2_frame_write_settings_ack (u8 **dst) +{ + http2_frame_header_t fh = { .flags = HTTP2_FRAME_FLAG_ACK, + .type = HTTP2_FRAME_TYPE_SETTINGS }; + u8 *p = http2_frame_header_alloc (dst); + http2_frame_header_write (&fh, p); +} + +__clib_export void +http2_frame_write_settings (http2_settings_entry_t *settings, u8 **dst) +{ + u8 *p; + u32 length; + http2_settings_entry_t *entry, e; + + ASSERT (settings); + ASSERT (vec_len (settings) > 0); + + length = vec_len (settings) * sizeof (*entry); + http2_frame_header_t fh = { .type = HTTP2_FRAME_TYPE_SETTINGS, + .length = length }; + p = http2_frame_header_alloc (dst); + http2_frame_header_write (&fh, p); + + vec_add2 (*dst, p, length); + vec_foreach (entry, settings) + { + e.identifier = clib_host_to_net_u16 (entry->identifier); + e.value = clib_host_to_net_u32 (entry->value); + clib_memcpy_fast (p, &e, sizeof (e)); + p += sizeof (e); + } +} + +#define WINDOW_UPDATE_LENGTH 4 + +__clib_export http2_error_t +http2_frame_read_window_update (u32 *increment, u8 *payload, u32 payload_len) +{ + u32 *value; + + if (payload_len != WINDOW_UPDATE_LENGTH) + return HTTP2_ERROR_FRAME_SIZE_ERROR; + + value = (u32 *) payload; + + if (value == 0) + return HTTP2_ERROR_PROTOCOL_ERROR; + + *increment = clib_net_to_host_u32 (*value) & 0x7FFFFFFF; + return HTTP2_ERROR_NO_ERROR; +} + +__clib_export void +http2_frame_write_window_update (u32 increment, u32 stream_id, u8 **dst) +{ + u8 *p; + u32 value; + + ASSERT (increment > 0 && increment <= 0x7FFFFFFF); + + http2_frame_header_t fh = { .type = HTTP2_FRAME_TYPE_WINDOW_UPDATE, + .length = WINDOW_UPDATE_LENGTH, + .stream_id = stream_id }; + p = http2_frame_header_alloc (dst); + http2_frame_header_write (&fh, p); + + vec_add2 (*dst, p, WINDOW_UPDATE_LENGTH); + value = clib_host_to_net_u32 (increment); + clib_memcpy_fast (p, &value, WINDOW_UPDATE_LENGTH); +} + +#define RST_STREAM_LENGTH 4 + +__clib_export http2_error_t +http2_frame_read_rst_stream (u32 *error_code, u8 *payload, u32 payload_len) +{ + u32 *value; + + if (payload_len != RST_STREAM_LENGTH) + return HTTP2_ERROR_FRAME_SIZE_ERROR; + + value = (u32 *) payload; + + *error_code = clib_net_to_host_u32 (*value); + return HTTP2_ERROR_NO_ERROR; +} + +__clib_export void +http2_frame_write_rst_stream (http2_error_t error_code, u32 stream_id, + u8 **dst) +{ + u8 *p; + u32 value; + + ASSERT (stream_id > 0 && stream_id <= 0x7FFFFFFF); + + http2_frame_header_t fh = { .type = HTTP2_FRAME_TYPE_RST_STREAM, + .length = RST_STREAM_LENGTH, + .stream_id = stream_id }; + p = http2_frame_header_alloc (dst); + http2_frame_header_write (&fh, p); + + vec_add2 (*dst, p, RST_STREAM_LENGTH); + value = clib_host_to_net_u32 ((u32) error_code); + clib_memcpy_fast (p, &value, RST_STREAM_LENGTH); +} + +#define GOAWAY_MIN_SIZE 8 + +__clib_export http2_error_t +http2_frame_read_goaway (u32 *error_code, u32 *last_stream_id, u8 *payload, + u32 payload_len) +{ + u32 *value; + + if (payload_len < GOAWAY_MIN_SIZE) + return HTTP2_ERROR_FRAME_SIZE_ERROR; + + value = (u32 *) payload; + *last_stream_id = clib_net_to_host_u32 (*value) & 0x7FFFFFFF; + payload += 4; + + value = (u32 *) payload; + *error_code = clib_net_to_host_u32 (*value); + + /* TODO: Additional Debug Data */ + return HTTP2_ERROR_NO_ERROR; +} + +__clib_export void +http2_frame_write_goaway (http2_error_t error_code, u32 last_stream_id, + u8 **dst) +{ + u8 *p; + u32 value; + + ASSERT (last_stream_id <= 0x7FFFFFFF); + + http2_frame_header_t fh = { .type = HTTP2_FRAME_TYPE_GOAWAY, + .length = GOAWAY_MIN_SIZE }; + p = http2_frame_header_alloc (dst); + http2_frame_header_write (&fh, p); + + vec_add2 (*dst, p, GOAWAY_MIN_SIZE); + value = clib_host_to_net_u32 (last_stream_id); + clib_memcpy_fast (p, &value, 4); + p += 4; + value = clib_host_to_net_u32 ((u32) error_code); + clib_memcpy_fast (p, &value, 4); + /* TODO: Additional Debug Data */ +} + +void +http2_frame_write_ping (u8 is_resp, u8 *payload, u8 **dst) +{ + u8 *p; + http2_frame_header_t fh = { + .type = HTTP2_FRAME_TYPE_PING, + .length = HTTP2_PING_PAYLOAD_LEN, + .flags = is_resp ? HTTP2_FRAME_FLAG_ACK : 0, + }; + + p = http2_frame_header_alloc (dst); + http2_frame_header_write (&fh, p); + vec_add2 (*dst, p, HTTP2_PING_PAYLOAD_LEN); + clib_memcpy_fast (p, payload, HTTP2_PING_PAYLOAD_LEN); +} + +#define PRIORITY_DATA_LEN 5 + +__clib_export http2_error_t +http2_frame_read_headers (u8 **headers, u32 *headers_len, u8 *payload, + u32 payload_len, u8 flags) +{ + *headers_len = payload_len; + + if (flags & HTTP2_FRAME_FLAG_PADED) + { + u8 pad_len = *payload++; + if ((u32) pad_len >= payload_len) + return HTTP2_ERROR_PROTOCOL_ERROR; + *headers_len -= (pad_len + 1); + } + + if (flags & HTTP2_FRAME_FLAG_PRIORITY) + { + if (*headers_len <= PRIORITY_DATA_LEN) + return HTTP2_ERROR_FRAME_SIZE_ERROR; + /* just skip, priority scheme defined in RFC7540 is deprecated */ + *headers_len -= PRIORITY_DATA_LEN; + payload += PRIORITY_DATA_LEN; + } + + *headers = payload; + + return HTTP2_ERROR_NO_ERROR; +} + +__clib_export void +http2_frame_write_headers_header (u32 headers_len, u32 stream_id, u8 flags, + u8 *dst) +{ + ASSERT (stream_id > 0 && stream_id <= 0x7FFFFFFF); + + http2_frame_header_t fh = { .type = HTTP2_FRAME_TYPE_HEADERS, + .length = headers_len, + .flags = flags, + .stream_id = stream_id }; + http2_frame_header_write (&fh, dst); +} + +__clib_export http2_error_t +http2_frame_read_data (u8 **data, u32 *data_len, u8 *payload, u32 payload_len, + u8 flags) +{ + *data_len = payload_len; + + if (flags & HTTP2_FRAME_FLAG_PADED) + { + u8 pad_len = *payload++; + if ((u32) pad_len >= payload_len) + return HTTP2_ERROR_PROTOCOL_ERROR; + *data_len -= (pad_len + 1); + } + + *data = payload; + return HTTP2_ERROR_NO_ERROR; +} + +__clib_export void +http2_frame_write_data_header (u32 data_len, u32 stream_id, u8 flags, u8 *dst) +{ + ASSERT (stream_id > 0 && stream_id <= 0x7FFFFFFF); + + http2_frame_header_t fh = { .type = HTTP2_FRAME_TYPE_DATA, + .length = data_len, + .flags = flags, + .stream_id = stream_id }; + http2_frame_header_write (&fh, dst); +} diff --git a/src/plugins/http/http2/frame.h b/src/plugins/http/http2/frame.h new file mode 100644 index 00000000000..53a37c1aa0a --- /dev/null +++ b/src/plugins/http/http2/frame.h @@ -0,0 +1,246 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2025 Cisco Systems, Inc. + */ + +#ifndef SRC_PLUGINS_HTTP_HTTP2_FRAME_H_ +#define SRC_PLUGINS_HTTP_HTTP2_FRAME_H_ + +#include <vppinfra/error.h> +#include <vppinfra/types.h> +#include <http/http2/http2.h> + +#define HTTP2_FRAME_HEADER_SIZE 9 +#define HTTP2_PING_PAYLOAD_LEN 8 + +#define foreach_http2_frame_type \ + _ (0x00, DATA, "DATA") \ + _ (0x01, HEADERS, "HEADERS") \ + _ (0x02, PRIORITY, "PRIORITY") \ + _ (0x03, RST_STREAM, "RST_STREAM") \ + _ (0x04, SETTINGS, "SETTINGS") \ + _ (0x05, PUSH_PROMISE, "PUSH_PROMISE") \ + _ (0x06, PING, "PING") \ + _ (0x07, GOAWAY, "GOAWAY") \ + _ (0x08, WINDOW_UPDATE, "WINDOW_UPDATE") \ + _ (0x09, CONTINUATION, "CONTINUATION") + +typedef enum +{ +#define _(v, n, s) HTTP2_FRAME_TYPE_##n = v, + foreach_http2_frame_type +#undef _ +} __clib_packed http2_frame_type_t; + +STATIC_ASSERT_SIZEOF (http2_frame_type_t, 1); + +#define foreach_http2_frame_flag \ + _ (0, NONE) \ + _ (1, END_STREAM) \ + _ (1, ACK) \ + _ (1 << 2, END_HEADERS) \ + _ (1 << 3, PADED) \ + _ (1 << 5, PRIORITY) + +typedef enum +{ +#define _(v, n) HTTP2_FRAME_FLAG_##n = v, + foreach_http2_frame_flag +#undef _ +} __clib_packed http2_frame_flag_t; + +STATIC_ASSERT_SIZEOF (http2_frame_flag_t, 1); + +typedef struct +{ + u32 length; + http2_frame_type_t type; + u8 flags; + u32 stream_id; +} http2_frame_header_t; + +typedef struct +{ + u16 identifier; + u32 value; +} __clib_packed http2_settings_entry_t; + +/** + * Parse frame header + * + * @param src Pointer to the beginning of the frame + * @param fh Parsed frame header + */ +void http2_frame_header_read (u8 *src, http2_frame_header_t *fh); + +/** + * Add 9 bytes (frame header size) to the end of given vector + * + * @param dst Pointer to vector + * + * @return Pointer to the frame header beginning + */ +static_always_inline u8 * +http2_frame_header_alloc (u8 **dst) +{ + u8 *p; + + vec_add2 (*dst, p, HTTP2_FRAME_HEADER_SIZE); + return p; +} + +/** + * Parse SETTINGS frame payload + * + * @param settings Vector of HTTP/2 settings + * @param payload Payload to parse + * @param payload_len Payload length + * + * @return @c HTTP2_ERROR_NO_ERROR on success, error otherwise + */ +http2_error_t http2_frame_read_settings (http2_conn_settings_t *settings, + u8 *payload, u32 payload_len); + +/** + * Write SETTINGS ACK frame to the end of given vector + * + * @param dst Vector where SETTINGS ACK frame will be written + */ +void http2_frame_write_settings_ack (u8 **dst); + +/** + * Write SETTINGS frame to the end of given vector + * + * @param settings Vector of HTTP/2 settings + * @param dst Vector where SETTINGS frame will be written + */ +void http2_frame_write_settings (http2_settings_entry_t *settings, u8 **dst); + +/** + * Parse WINDOW_UPDATE frame payload + * + * @param increment Parsed window increment value + * @param payload Payload to parse + * @param payload_len Payload length + * + * @return @c HTTP2_ERROR_NO_ERROR on success, error otherwise + */ +http2_error_t http2_frame_read_window_update (u32 *increment, u8 *payload, + u32 payload_len); + +/** + * Write WINDOW_UPDATE frame to the end of given vector + * + * @param increment Window increment value + * @param stream_id Stream ID + * @param dst Vector where WINDOW_UPDATE frame will be written + */ +void http2_frame_write_window_update (u32 increment, u32 stream_id, u8 **dst); + +/** + * Parse RST_STREAM frame payload + * + * @param error_code Parsed error code + * @param payload Payload to parse + * @param payload_len Payload length + * + * @return @c HTTP2_ERROR_NO_ERROR on success, error otherwise + */ +http2_error_t http2_frame_read_rst_stream (u32 *error_code, u8 *payload, + u32 payload_len); + +/** + * Write RST_STREAM frame to the end of given vector + * + * @param error_code Error code + * @param stream_id Stream ID, except 0 + * @param dst Vector where RST_STREAM frame will be written + */ +void http2_frame_write_rst_stream (http2_error_t error_code, u32 stream_id, + u8 **dst); + +/** + * Parse GOAWAY frame payload + * + * @param last_stream_id Parsed last stream ID + * @param error_code Parsed error code + * @param payload Payload to parse + * @param payload_len Payload length + * + * @return @c HTTP2_ERROR_NO_ERROR on success, error otherwise + */ +http2_error_t http2_frame_read_goaway (u32 *last_stream_id, u32 *error_code, + u8 *payload, u32 payload_len); + +/** + * Write GOAWAY frame to the end of given vector + * + * @param error_code Error code + * @param last_stream_id Last stream ID + * @param dst Vector where GOAWAY frame will be written + */ +void http2_frame_write_goaway (http2_error_t error_code, u32 last_stream_id, + u8 **dst); + +/** + * Write PING frame to the end of given vector + * + * @param is_resp Indicate that this is PING response + * @param payload Payload to parse + * @param dst Vector where GOAWAY frame will be written + */ +void http2_frame_write_ping (u8 is_resp, u8 *payload, u8 **dst); + +/** + * Parse HEADERS frame payload + * + * @param headers Pointer to header block fragment + * @param headers_len Header block fragment length + * @param payload Payload to parse + * @param payload_len Payload length + * @param flags Flag field of frame header + * + * @return @c HTTP2_ERROR_NO_ERROR on success, error otherwise + */ +http2_error_t http2_frame_read_headers (u8 **headers, u32 *headers_len, + u8 *payload, u32 payload_len, + u8 flags); + +/** + * Write HEADERS frame header + * + * @param headers_len Header block fragment length + * @param stream_id Stream ID, except 0 + * @param flags Frame header flags + * @param dst Pointer where frame header will be written + * + * @note Use @c http2_frame_header_alloc before + */ +void http2_frame_write_headers_header (u32 headers_len, u32 stream_id, + u8 flags, u8 *dst); + +/** + * Parse DATA frame payload + * + * @param headers Pointer to data + * @param headers_len Data length + * @param payload Payload to parse + * @param payload_len Payload length + * @param flags Flag field of frame header + * + * @return @c HTTP2_ERROR_NO_ERROR on success, error otherwise + */ +http2_error_t http2_frame_read_data (u8 **data, u32 *data_len, u8 *payload, + u32 payload_len, u8 flags); + +/** + * Write DATA frame header + * + * @param data_len Data length + * @param stream_id Stream ID, except 0 + * @param flags Frame header flags + * @param dst Pointer where frame header will be written + */ +void http2_frame_write_data_header (u32 data_len, u32 stream_id, u8 flags, + u8 *dst); + +#endif /* SRC_PLUGINS_HTTP_HTTP2_FRAME_H_ */ diff --git a/src/plugins/http/http2/hpack.c b/src/plugins/http/http2/hpack.c new file mode 100644 index 00000000000..76021ae14a6 --- /dev/null +++ b/src/plugins/http/http2/hpack.c @@ -0,0 +1,1173 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2025 Cisco Systems, Inc. + */ + +#include <vppinfra/error.h> +#include <vppinfra/ring.h> +#include <http/http2/hpack.h> +#include <http/http2/huffman_table.h> +#include <http/http_status_codes.h> + +#define HPACK_STATIC_TABLE_SIZE 61 + +typedef struct +{ + char *name; + uword name_len; + char *value; + uword value_len; +} hpack_static_table_entry_t; + +#define name_val_token_lit(name, value) \ + (name), sizeof (name) - 1, (value), sizeof (value) - 1 + +static hpack_static_table_entry_t + hpack_static_table[HPACK_STATIC_TABLE_SIZE] = { + { name_val_token_lit (":authority", "") }, + { name_val_token_lit (":method", "GET") }, + { name_val_token_lit (":method", "POST") }, + { name_val_token_lit (":path", "/") }, + { name_val_token_lit (":path", "/index.html") }, + { name_val_token_lit (":scheme", "http") }, + { name_val_token_lit (":scheme", "https") }, + { name_val_token_lit (":status", "200") }, + { name_val_token_lit (":status", "204") }, + { name_val_token_lit (":status", "206") }, + { name_val_token_lit (":status", "304") }, + { name_val_token_lit (":status", "400") }, + { name_val_token_lit (":status", "404") }, + { name_val_token_lit (":status", "500") }, + { name_val_token_lit ("accept-charset", "") }, + { name_val_token_lit ("accept-encoding", "gzip, deflate") }, + { name_val_token_lit ("accept-language", "") }, + { name_val_token_lit ("accept-ranges", "") }, + { name_val_token_lit ("accept", "") }, + { name_val_token_lit ("access-control-allow-origin", "") }, + { name_val_token_lit ("age", "") }, + { name_val_token_lit ("allow", "") }, + { name_val_token_lit ("authorization", "") }, + { name_val_token_lit ("cache-control", "") }, + { name_val_token_lit ("content-disposition", "") }, + { name_val_token_lit ("content-encoding", "") }, + { name_val_token_lit ("content-language", "") }, + { name_val_token_lit ("content-length", "") }, + { name_val_token_lit ("content-location", "") }, + { name_val_token_lit ("content-range", "") }, + { name_val_token_lit ("content-type", "") }, + { name_val_token_lit ("cookie", "") }, + { name_val_token_lit ("date", "") }, + { name_val_token_lit ("etag", "") }, + { name_val_token_lit ("etag", "") }, + { name_val_token_lit ("expires", "") }, + { name_val_token_lit ("from", "") }, + { name_val_token_lit ("host", "") }, + { name_val_token_lit ("if-match", "") }, + { name_val_token_lit ("if-modified-since", "") }, + { name_val_token_lit ("if-none-match", "") }, + { name_val_token_lit ("if-range", "") }, + { name_val_token_lit ("if-unmodified-since", "") }, + { name_val_token_lit ("last-modified", "") }, + { name_val_token_lit ("link", "") }, + { name_val_token_lit ("location", "") }, + { name_val_token_lit ("max-forwards", "") }, + { name_val_token_lit ("proxy-authenticate", "") }, + { name_val_token_lit ("proxy-authorization", "") }, + { name_val_token_lit ("range", "") }, + { name_val_token_lit ("referer", "") }, + { name_val_token_lit ("refresh", "") }, + { name_val_token_lit ("retry-after", "") }, + { name_val_token_lit ("server", "") }, + { name_val_token_lit ("set-cookie", "") }, + { name_val_token_lit ("strict-transport-security", "") }, + { name_val_token_lit ("transfer-encoding", "") }, + { name_val_token_lit ("user-agent", "") }, + { name_val_token_lit ("vary", "") }, + { name_val_token_lit ("via", "") }, + { name_val_token_lit ("www-authenticate", "") }, + }; + +typedef struct +{ + char *base; + uword len; + u8 static_table_index; +} hpack_token_t; + +static hpack_token_t hpack_headers[] = { +#define _(sym, str_canonical, str_lower, hpack_index) \ + { http_token_lit (str_lower), hpack_index }, + foreach_http_header_name +#undef _ +}; + +__clib_export uword +hpack_decode_int (u8 **src, u8 *end, u8 prefix_len) +{ + uword value, new_value; + u8 *p, shift = 0, byte; + u16 prefix_max; + + ASSERT (*src < end); + ASSERT (prefix_len >= 1 && prefix_len <= 8); + + p = *src; + prefix_max = (1 << prefix_len) - 1; + value = *p & (u8) prefix_max; + p++; + /* if integer value is less than 2^prefix_len-1 it's encoded within prefix */ + if (value != prefix_max) + { + *src = p; + return value; + } + + while (p != end) + { + byte = *p; + p++; + new_value = value + ((uword) (byte & 0x7F) << shift); + shift += 7; + /* check for overflow */ + if (new_value < value) + return HPACK_INVALID_INT; + value = new_value; + /* MSB of the last byte is zero */ + if ((byte & 0x80) == 0) + { + *src = p; + return value; + } + } + + return HPACK_INVALID_INT; +} + +http2_error_t +hpack_decode_huffman (u8 **src, u8 *end, u8 **buf, uword *buf_len) +{ + u64 accumulator = 0; + u8 accumulator_len = 0; + u8 *p; + hpack_huffman_code_t *code; + + p = *src; + while (1) + { + /* out of space? */ + if (*buf_len == 0) + return HTTP2_ERROR_INTERNAL_ERROR; + /* refill */ + while (p < end && accumulator_len <= 56) + { + accumulator <<= 8; + accumulator_len += 8; + accumulator |= (u64) *p++; + } + /* first try short codes (5 - 8 bits) */ + code = + &huff_code_table_fast[(u8) (accumulator >> (accumulator_len - 8))]; + /* zero code length mean no luck */ + if (PREDICT_TRUE (code->code_len)) + { + **buf = code->symbol; + (*buf)++; + (*buf_len)--; + accumulator_len -= code->code_len; + } + else + { + /* slow path / long codes (10 - 30 bits) */ + u32 tmp; + /* group boundaries are aligned to 32 bits */ + if (accumulator_len < 32) + tmp = accumulator << (32 - accumulator_len); + else + tmp = accumulator >> (accumulator_len - 32); + /* figure out which interval code falls into, this is possible + * because HPACK use canonical Huffman codes + * see Schwartz, E. and B. Kallick, “Generating a canonical prefix + * encoding” + */ + hpack_huffman_group_t *hg = hpack_huffman_get_group (tmp); + /* trim code to correct length */ + u32 code = (accumulator >> (accumulator_len - hg->code_len)) & + ((1 << hg->code_len) - 1); + if (!code) + return HTTP2_ERROR_COMPRESSION_ERROR; + /* find symbol in the list */ + **buf = hg->symbols[code - hg->first_code]; + (*buf)++; + (*buf_len)--; + accumulator_len -= hg->code_len; + } + /* all done */ + if (p == end && accumulator_len < 8) + { + /* there might be one more symbol encoded with short code */ + if (accumulator_len >= 5) + { + /* first check EOF case */ + if (((1 << accumulator_len) - 1) == + (accumulator & ((1 << accumulator_len) - 1))) + break; + + /* out of space? */ + if (*buf_len == 0) + return HTTP2_ERROR_INTERNAL_ERROR; + + /* if bogus EOF check bellow will fail */ + code = &huff_code_table_fast[(u8) (accumulator + << (8 - accumulator_len))]; + **buf = code->symbol; + (*buf)++; + (*buf_len)--; + accumulator_len -= code->code_len; + /* end at byte boundary? */ + if (accumulator_len == 0) + break; + } + /* we must end with EOF here */ + if (((1 << accumulator_len) - 1) != + (accumulator & ((1 << accumulator_len) - 1))) + return HTTP2_ERROR_COMPRESSION_ERROR; + break; + } + } + return HTTP2_ERROR_NO_ERROR; +} + +__clib_export http2_error_t +hpack_decode_string (u8 **src, u8 *end, u8 **buf, uword *buf_len) +{ + u8 *p, is_huffman; + uword len; + + if (*src == end) + return HTTP2_ERROR_COMPRESSION_ERROR; + + p = *src; + /* H flag in first bit */ + is_huffman = *p & 0x80; + + /* length is integer with 7 bit prefix */ + len = hpack_decode_int (&p, end, 7); + if (PREDICT_FALSE (len == HPACK_INVALID_INT)) + return HTTP2_ERROR_COMPRESSION_ERROR; + + /* do we have everything? */ + if (len > (end - p)) + return HTTP2_ERROR_COMPRESSION_ERROR; + + if (is_huffman) + { + *src = (p + len); + return hpack_decode_huffman (&p, p + len, buf, buf_len); + } + else + { + /* enough space? */ + if (len > *buf_len) + return HTTP2_ERROR_INTERNAL_ERROR; + + clib_memcpy (*buf, p, len); + *buf_len -= len; + *buf += len; + *src = (p + len); + return HTTP2_ERROR_NO_ERROR; + } +} + +__clib_export u8 * +hpack_encode_int (u8 *dst, uword value, u8 prefix_len) +{ + u16 prefix_max; + + ASSERT (prefix_len >= 1 && prefix_len <= 8); + + prefix_max = (1 << prefix_len) - 1; + + /* if integer value is less than 2^prefix_len-1 it's encoded within prefix */ + if (value < prefix_max) + { + *dst++ |= (u8) value; + return dst; + } + + /* otherwise all bits of the prefix are set to 1 */ + *dst++ |= (u8) prefix_max; + /* and the value is decreased by 2^prefix_len-1 */ + value -= prefix_max; + /* MSB of each byte is used as continuation flag */ + for (; value >= 0x80; value >>= 7) + *dst++ = 0x80 | (value & 0x7F); + /* except for the last byte */ + *dst++ = (u8) value; + + return dst; +} + +uword +hpack_huffman_encoded_len (const u8 *value, uword value_len) +{ + uword len = 0; + u8 *end; + hpack_huffman_symbol_t *sym; + + end = (u8 *) value + value_len; + while (value != end) + { + sym = &huff_sym_table[*value++]; + len += sym->code_len; + } + /* round up to byte boundary */ + return (len + 7) / 8; +} + +u8 * +hpack_encode_huffman (u8 *dst, const u8 *value, uword value_len) +{ + u8 *end; + hpack_huffman_symbol_t *sym; + u8 accumulator_len = 40; /* leftover (1 byte) + max code_len (4 bytes) */ + u64 accumulator = 0; /* to fit leftover and current code */ + + end = (u8 *) value + value_len; + + while (value != end) + { + sym = &huff_sym_table[*value++]; + /* add current code to leftover of previous one */ + accumulator |= (u64) sym->code << (accumulator_len - sym->code_len); + accumulator_len -= sym->code_len; + /* write only fully occupied bytes (max 4) */ + switch (accumulator_len) + { + case 1 ... 8: +#define WRITE_BYTE() \ + *dst = (u8) (accumulator >> 32); \ + accumulator_len += 8; \ + accumulator <<= 8; \ + dst++; + WRITE_BYTE (); + case 9 ... 16: + WRITE_BYTE (); + case 17 ... 24: + WRITE_BYTE (); + case 25 ... 32: + WRITE_BYTE (); + default: + break; + } + } + + /* padding (0-7 bits)*/ + ASSERT (accumulator_len > 32 && accumulator_len <= 40); + if (accumulator_len != 40) + { + accumulator |= (u64) 0x7F << (accumulator_len - 7); + *dst = (u8) (accumulator >> 32); + dst++; + } + return dst; +} + +__clib_export u8 * +hpack_encode_string (u8 *dst, const u8 *value, uword value_len) +{ + uword huff_len; + + huff_len = hpack_huffman_encoded_len (value, value_len); + /* raw bytes might take fewer bytes */ + if (huff_len >= value_len) + { + *dst = 0; /* clear H flag */ + dst = hpack_encode_int (dst, value_len, 7); + clib_memcpy (dst, value, value_len); + return dst + value_len; + } + + *dst = 0x80; /* set H flag */ + dst = hpack_encode_int (dst, huff_len, 7); + dst = hpack_encode_huffman (dst, value, value_len); + + return dst; +} + +__clib_export void +hpack_dynamic_table_init (hpack_dynamic_table_t *table, u32 max_size) +{ + table->max_size = max_size; + table->size = max_size; + table->used = 0; + clib_ring_new (table->entries, + max_size / HPACK_DYNAMIC_TABLE_ENTRY_OVERHEAD); +} + +__clib_export void +hpack_dynamic_table_free (hpack_dynamic_table_t *table) +{ + hpack_dynamic_table_entry_t *e; + + while ((e = clib_ring_deq (table->entries)) != 0) + vec_free (e->buf); + + clib_ring_free (table->entries); +} + +#define hpack_dynamic_table_entry_value_base(e) \ + ((char *) ((e)->buf + (e)->name_len)) +#define hpack_dynamic_table_entry_value_len(e) \ + (vec_len ((e)->buf) - (e)->name_len) + +always_inline hpack_dynamic_table_entry_t * +hpack_dynamic_table_get (hpack_dynamic_table_t *table, uword index) +{ + if (index > clib_ring_n_enq (table->entries)) + return 0; + + hpack_dynamic_table_entry_t *first = clib_ring_get_first (table->entries); + u32 first_index = first - table->entries; + u32 entry_index = + (first_index + (clib_ring_n_enq (table->entries) - 1 - (u32) index)) % + vec_len (table->entries); + return table->entries + entry_index; +} + +__clib_export u8 * +format_hpack_dynamic_table (u8 *s, va_list *args) +{ + hpack_dynamic_table_t *table = va_arg (*args, hpack_dynamic_table_t *); + u32 i; + hpack_dynamic_table_entry_t *e; + + s = format (s, "HPACK dynamic table:\n"); + for (i = 0; i < clib_ring_n_enq (table->entries); i++) + { + e = hpack_dynamic_table_get (table, i); + s = format (s, "\t[%u] %U: %U\n", i, format_http_bytes, e->buf, + e->name_len, format_http_bytes, + hpack_dynamic_table_entry_value_base (e), + hpack_dynamic_table_entry_value_len (e)); + } + return s; +} + +static inline void +hpack_dynamic_table_evict_one (hpack_dynamic_table_t *table) +{ + u32 entry_size; + hpack_dynamic_table_entry_t *e; + + e = clib_ring_deq (table->entries); + ASSERT (e); + HTTP_DBG (2, "%U: %U", format_http_bytes, e->buf, e->name_len, + format_http_bytes, hpack_dynamic_table_entry_value_base (e), + hpack_dynamic_table_entry_value_len (e)); + entry_size = vec_len (e->buf) + HPACK_DYNAMIC_TABLE_ENTRY_OVERHEAD; + table->used -= entry_size; + vec_reset_length (e->buf); +} + +static void +hpack_dynamic_table_add (hpack_dynamic_table_t *table, http_token_t *name, + http_token_t *value) +{ + u32 entry_size; + hpack_dynamic_table_entry_t *e; + + entry_size = name->len + value->len + HPACK_DYNAMIC_TABLE_ENTRY_OVERHEAD; + + /* make space or evict all */ + while (clib_ring_n_enq (table->entries) && + (table->used + entry_size > table->size)) + hpack_dynamic_table_evict_one (table); + + /* attempt to add entry larger than the maximum size is not error */ + if (entry_size > table->size) + return; + + e = clib_ring_enq (table->entries); + ASSERT (e); + vec_validate (e->buf, name->len + value->len - 1); + clib_memcpy (e->buf, name->base, name->len); + clib_memcpy (e->buf + name->len, value->base, value->len); + e->name_len = name->len; + table->used += entry_size; + + HTTP_DBG (2, "%U: %U", format_http_bytes, e->buf, e->name_len, + format_http_bytes, hpack_dynamic_table_entry_value_base (e), + hpack_dynamic_table_entry_value_len (e)); +} + +static http2_error_t +hpack_get_table_entry (uword index, http_token_t *name, http_token_t *value, + u8 value_is_indexed, hpack_dynamic_table_t *dt) +{ + if (index <= HPACK_STATIC_TABLE_SIZE) + { + hpack_static_table_entry_t *e = &hpack_static_table[index - 1]; + name->base = e->name; + name->len = e->name_len; + if (value_is_indexed) + { + value->base = e->value; + value->len = e->value_len; + } + HTTP_DBG (2, "[%llu] %U: %U", index, format_http_bytes, e->name, + e->name_len, format_http_bytes, e->value, e->value_len); + return HTTP2_ERROR_NO_ERROR; + } + else + { + hpack_dynamic_table_entry_t *e = + hpack_dynamic_table_get (dt, index - HPACK_STATIC_TABLE_SIZE - 1); + if (PREDICT_FALSE (!e)) + { + HTTP_DBG (1, "index %llu not in dynamic table", index); + return HTTP2_ERROR_COMPRESSION_ERROR; + } + name->base = (char *) e->buf; + name->len = e->name_len; + value->base = hpack_dynamic_table_entry_value_base (e); + value->len = hpack_dynamic_table_entry_value_len (e); + HTTP_DBG (2, "[%llu] %U: %U", index, format_http_bytes, name->base, + name->len, format_http_bytes, value->base, value->len); + return HTTP2_ERROR_NO_ERROR; + } +} + +__clib_export http2_error_t +hpack_decode_header (u8 **src, u8 *end, u8 **buf, uword *buf_len, + u32 *name_len, u32 *value_len, hpack_dynamic_table_t *dt) +{ + u8 *p; + u8 value_is_indexed = 0, add_new_entry = 0; + uword old_len, new_max, index = 0; + http_token_t name, value; + http2_error_t rv; + + ASSERT (*src < end); + p = *src; + + /* dynamic table size update */ + while ((*p & 0xE0) == 0x20) + { + new_max = hpack_decode_int (&p, end, 5); + if (p == end || new_max > (uword) dt->max_size) + { + HTTP_DBG (1, "invalid dynamic table size update"); + return HTTP2_ERROR_COMPRESSION_ERROR; + } + while (clib_ring_n_enq (dt->entries) && new_max > dt->used) + hpack_dynamic_table_evict_one (dt); + dt->size = (u32) new_max; + } + + if (*p & 0x80) /* indexed header field */ + { + index = hpack_decode_int (&p, end, 7); + /* index value of 0 is not used */ + if (index == 0 || index == HPACK_INVALID_INT) + { + HTTP_DBG (1, "invalid index"); + return HTTP2_ERROR_COMPRESSION_ERROR; + } + value_is_indexed = 1; + } + else if (*p > 0x40) /* incremental indexing - indexed name */ + { + index = hpack_decode_int (&p, end, 6); + /* index value of 0 is not used */ + if (index == 0 || index == HPACK_INVALID_INT) + { + HTTP_DBG (1, "invalid index"); + return HTTP2_ERROR_COMPRESSION_ERROR; + } + add_new_entry = 1; + } + else if (*p == 0x40) /* incremental indexing - new name */ + { + add_new_entry = 1; + p++; + } + else /* without indexing / never indexed */ + { + if ((*p & 0x0F) == 0) /* new name */ + p++; + else /* indexed name */ + { + index = hpack_decode_int (&p, end, 4); + /* index value of 0 is not used */ + if (index == 0 || index == HPACK_INVALID_INT) + { + HTTP_DBG (1, "invalid index"); + return HTTP2_ERROR_COMPRESSION_ERROR; + } + } + } + + if (index) + { + rv = hpack_get_table_entry (index, &name, &value, value_is_indexed, dt); + if (rv != HTTP2_ERROR_NO_ERROR) + { + HTTP_DBG (1, "entry index %llu error", index); + return rv; + } + if (name.len > *buf_len) + { + HTTP_DBG (1, "not enough space"); + return HTTP2_ERROR_INTERNAL_ERROR; + } + clib_memcpy (*buf, name.base, name.len); + *buf_len -= name.len; + *buf += name.len; + *name_len = name.len; + if (value_is_indexed) + { + if (value.len > *buf_len) + { + HTTP_DBG (1, "not enough space"); + return HTTP2_ERROR_INTERNAL_ERROR; + } + clib_memcpy (*buf, value.base, value.len); + *buf_len -= value.len; + *buf += value.len; + *value_len = value.len; + } + } + else + { + old_len = *buf_len; + name.base = (char *) *buf; + rv = hpack_decode_string (&p, end, buf, buf_len); + if (rv != HTTP2_ERROR_NO_ERROR) + { + HTTP_DBG (1, "invalid header name"); + return rv; + } + *name_len = old_len - *buf_len; + name.len = *name_len; + } + + if (!value_is_indexed) + { + old_len = *buf_len; + value.base = (char *) *buf; + rv = hpack_decode_string (&p, end, buf, buf_len); + if (rv != HTTP2_ERROR_NO_ERROR) + { + HTTP_DBG (1, "invalid header value"); + return rv; + } + *value_len = old_len - *buf_len; + value.len = *value_len; + } + + if (add_new_entry) + hpack_dynamic_table_add (dt, &name, &value); + + *src = p; + return HTTP2_ERROR_NO_ERROR; +} + +static inline u8 +hpack_header_name_is_valid (u8 *name, u32 name_len) +{ + u32 i; + static uword tchar[4] = { + /* !#$%'*+-.0123456789 */ + 0x03ff6cba00000000, + /* ^_`abcdefghijklmnopqrstuvwxyz|~ */ + 0x57ffffffc0000000, + 0x0000000000000000, + 0x0000000000000000, + }; + for (i = 0; i < name_len; i++) + { + if (!clib_bitmap_get_no_check (tchar, name[i])) + return 0; + } + return 1; +} + +static inline u8 +hpack_header_value_is_valid (u8 *value, u32 value_len) +{ + u32 i; + /* VCHAR / SP / HTAB / %x80-FF */ + static uword tchar[4] = { + 0xffffffff00000200, + 0x7fffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + }; + + if (value_len == 0) + return 1; + + /* must not start or end with SP or HTAB */ + if ((value[0] == 0x20 || value[0] == 0x09 || value[value_len - 1] == 0x20 || + value[value_len - 1] == 0x09)) + return 0; + + for (i = 0; i < value_len; i++) + { + if (!clib_bitmap_get_no_check (tchar, value[i])) + return 0; + } + return 1; +} + +static inline http_req_method_t +hpack_parse_method (u8 *value, u32 value_len) +{ + switch (value_len) + { + case 3: + if (!memcmp (value, "GET", 3)) + return HTTP_REQ_GET; + break; + case 4: + if (!memcmp (value, "POST", 4)) + return HTTP_REQ_POST; + break; + case 7: + if (!memcmp (value, "CONNECT", 7)) + return HTTP_REQ_CONNECT; + break; + default: + break; + } + /* HPACK should return only connection errors, this one is stream error */ + return HTTP_REQ_UNKNOWN; +} + +static inline http_url_scheme_t +hpack_parse_scheme (u8 *value, u32 value_len) +{ + switch (value_len) + { + case 4: + if (!memcmp (value, "http", 4)) + return HTTP_URL_SCHEME_HTTP; + break; + case 5: + if (!memcmp (value, "https", 5)) + return HTTP_URL_SCHEME_HTTPS; + break; + default: + break; + } + /* HPACK should return only connection errors, this one is stream error */ + return HTTP_URL_SCHEME_UNKNOWN; +} + +static http2_error_t +hpack_parse_req_pseudo_header (u8 *name, u32 name_len, u8 *value, + u32 value_len, + hpack_request_control_data_t *control_data) +{ + HTTP_DBG (2, "%U: %U", format_http_bytes, name, name_len, format_http_bytes, + value, value_len); + switch (name_len) + { + case 5: + if (!memcmp (name + 1, "path", 4)) + { + if (control_data->parsed_bitmap & HPACK_PSEUDO_HEADER_PATH_PARSED || + value_len == 0) + return HTTP2_ERROR_PROTOCOL_ERROR; + control_data->parsed_bitmap |= HPACK_PSEUDO_HEADER_PATH_PARSED; + control_data->path = value; + control_data->path_len = value_len; + break; + } + return HTTP2_ERROR_PROTOCOL_ERROR; + case 7: + switch (name[1]) + { + case 'm': + if (!memcmp (name + 2, "ethod", 5)) + { + if (control_data->parsed_bitmap & + HPACK_PSEUDO_HEADER_METHOD_PARSED) + return HTTP2_ERROR_PROTOCOL_ERROR; + control_data->parsed_bitmap |= HPACK_PSEUDO_HEADER_METHOD_PARSED; + control_data->method = hpack_parse_method (value, value_len); + break; + } + return HTTP2_ERROR_PROTOCOL_ERROR; + case 's': + if (!memcmp (name + 2, "cheme", 5)) + { + if (control_data->parsed_bitmap & + HPACK_PSEUDO_HEADER_SCHEME_PARSED) + return HTTP2_ERROR_PROTOCOL_ERROR; + control_data->parsed_bitmap |= HPACK_PSEUDO_HEADER_SCHEME_PARSED; + control_data->scheme = hpack_parse_scheme (value, value_len); + break; + } + return HTTP2_ERROR_PROTOCOL_ERROR; + default: + return HTTP2_ERROR_PROTOCOL_ERROR; + } + break; + case 10: + if (!memcmp (name + 1, "authority", 9)) + { + if (control_data->parsed_bitmap & + HPACK_PSEUDO_HEADER_AUTHORITY_PARSED) + return HTTP2_ERROR_PROTOCOL_ERROR; + control_data->parsed_bitmap |= HPACK_PSEUDO_HEADER_AUTHORITY_PARSED; + control_data->authority = value; + control_data->authority_len = value_len; + break; + } + return HTTP2_ERROR_PROTOCOL_ERROR; + default: + return HTTP2_ERROR_PROTOCOL_ERROR; + } + + return HTTP2_ERROR_NO_ERROR; +} + +/* Special treatment for headers like: + * + * RFC9113 8.2.2: any message containing connection-specific header + * fields MUST be treated as malformed (connection, upgrade, keep-alive, + * proxy-connection, transfer-encoding), TE header MUST NOT contain any value + * other than "trailers" + * + * find headers that will be used later in preprocessing (content-length) + */ +always_inline http2_error_t +hpack_preprocess_header (u8 *name, u32 name_len, u8 *value, u32 value_len, + uword index, + hpack_request_control_data_t *control_data) +{ + switch (name_len) + { + case 2: + if (name[0] == 't' && name[1] == 'e' && + !http_token_is_case ((const char *) value, value_len, + http_token_lit ("trailers"))) + return HTTP2_ERROR_PROTOCOL_ERROR; + break; + case 7: + if (!memcmp (name, "upgrade", 7)) + return HTTP2_ERROR_PROTOCOL_ERROR; + break; + case 10: + switch (name[0]) + { + case 'c': + if (!memcmp (name + 1, "onnection", 9)) + return HTTP2_ERROR_PROTOCOL_ERROR; + break; + case 'k': + if (!memcmp (name + 1, "eep-alive", 9)) + return HTTP2_ERROR_PROTOCOL_ERROR; + break; + default: + break; + } + break; + case 14: + if (!memcmp (name, "content-length", 7) && + control_data->content_len_header_index == ~0) + control_data->content_len_header_index = index; + break; + case 16: + if (!memcmp (name, "proxy-connection", 16)) + return HTTP2_ERROR_PROTOCOL_ERROR; + break; + case 17: + if (!memcmp (name, "transfer-encoding", 17)) + return HTTP2_ERROR_PROTOCOL_ERROR; + break; + default: + break; + } + return HTTP2_ERROR_NO_ERROR; +} + +__clib_export http2_error_t +hpack_parse_request (u8 *src, u32 src_len, u8 *dst, u32 dst_len, + hpack_request_control_data_t *control_data, + http_field_line_t **headers, + hpack_dynamic_table_t *dynamic_table) +{ + u8 *p, *end, *b, *name, *value; + u8 regular_header_parsed = 0; + u32 name_len, value_len; + uword b_left; + http_field_line_t *header; + http2_error_t rv; + + p = src; + end = src + src_len; + b = dst; + b_left = dst_len; + control_data->parsed_bitmap = 0; + control_data->headers_len = 0; + control_data->content_len_header_index = ~0; + + while (p != end) + { + name = b; + rv = hpack_decode_header (&p, end, &b, &b_left, &name_len, &value_len, + dynamic_table); + if (rv != HTTP2_ERROR_NO_ERROR) + { + HTTP_DBG (1, "hpack_decode_header: %U", format_http2_error, rv); + return rv; + } + value = name + name_len; + + /* pseudo header */ + if (name[0] == ':') + { + /* all pseudo-headers must be before regular headers */ + if (regular_header_parsed) + { + HTTP_DBG (1, "pseudo-headers after regular header"); + return HTTP2_ERROR_PROTOCOL_ERROR; + } + rv = hpack_parse_req_pseudo_header (name, name_len, value, value_len, + control_data); + if (rv != HTTP2_ERROR_NO_ERROR) + { + HTTP_DBG (1, "hpack_parse_req_pseudo_header: %U", + format_http2_error, rv); + return rv; + } + continue; + } + else + { + if (!hpack_header_name_is_valid (name, name_len)) + return HTTP2_ERROR_PROTOCOL_ERROR; + if (!regular_header_parsed) + { + regular_header_parsed = 1; + control_data->headers = name; + } + } + if (!hpack_header_value_is_valid (value, value_len)) + return HTTP2_ERROR_PROTOCOL_ERROR; + vec_add2 (*headers, header, 1); + HTTP_DBG (2, "%U: %U", format_http_bytes, name, name_len, + format_http_bytes, value, value_len); + header->name_offset = name - control_data->headers; + header->name_len = name_len; + header->value_offset = value - control_data->headers; + header->value_len = value_len; + control_data->headers_len += name_len; + control_data->headers_len += value_len; + if (regular_header_parsed) + { + rv = hpack_preprocess_header (name, name_len, value, value_len, + header - *headers, control_data); + if (rv != HTTP2_ERROR_NO_ERROR) + { + HTTP_DBG (1, "connection-specific header present"); + return rv; + } + } + } + control_data->control_data_len = dst_len - b_left; + HTTP_DBG (2, "%U", format_hpack_dynamic_table, dynamic_table); + return HTTP2_ERROR_NO_ERROR; +} + +static inline u8 * +hpack_encode_header (u8 *dst, http_header_name_t name, const u8 *value, + u32 value_len) +{ + hpack_token_t *name_token; + u8 *a, *b; + u32 orig_len, actual_size; + + orig_len = vec_len (dst); + name_token = &hpack_headers[name]; + if (name_token->static_table_index) + { + /* static table index with 4 bit prefix is max 2 bytes */ + vec_add2 (dst, a, 2 + value_len + HPACK_ENCODED_INT_MAX_LEN); + /* Literal Header Field without Indexing — Indexed Name */ + *a = 0x00; /* zero first 4 bits */ + b = hpack_encode_int (a, name_token->static_table_index, 4); + } + else + { + /* one extra byte for 4 bit prefix */ + vec_add2 (dst, a, + name_token->len + value_len + HPACK_ENCODED_INT_MAX_LEN * 2 + + 1); + b = a; + /* Literal Header Field without Indexing — New Name */ + *b++ = 0x00; + b = hpack_encode_string (b, (const u8 *) name_token->base, + name_token->len); + } + b = hpack_encode_string (b, value, value_len); + + actual_size = b - a; + vec_set_len (dst, orig_len + actual_size); + return dst; +} + +static inline u8 * +hpack_encode_custom_header (u8 *dst, const u8 *name, u32 name_len, + const u8 *value, u32 value_len) +{ + u32 orig_len, actual_size; + u8 *a, *b; + + orig_len = vec_len (dst); + /* one extra byte for 4 bit prefix */ + vec_add2 (dst, a, name_len + value_len + HPACK_ENCODED_INT_MAX_LEN * 2 + 1); + b = a; + /* Literal Header Field without Indexing — New Name */ + *b++ = 0x00; + b = hpack_encode_string (b, name, name_len); + b = hpack_encode_string (b, value, value_len); + actual_size = b - a; + vec_set_len (dst, orig_len + actual_size); + return dst; +} + +static inline u8 * +hpack_encode_status_code (u8 *dst, http_status_code_t sc) +{ + u32 orig_len, actual_size; + u8 *a, *b; + +#define encode_common_sc(_index) \ + vec_add2 (dst, a, 1); \ + *a++ = 0x80 | _index; + + switch (sc) + { + case HTTP_STATUS_OK: + encode_common_sc (8); + break; + case HTTP_STATUS_NO_CONTENT: + encode_common_sc (9); + break; + case HTTP_STATUS_PARTIAL_CONTENT: + encode_common_sc (10); + break; + case HTTP_STATUS_NOT_MODIFIED: + encode_common_sc (11); + break; + case HTTP_STATUS_BAD_REQUEST: + encode_common_sc (12); + break; + case HTTP_STATUS_NOT_FOUND: + encode_common_sc (13); + break; + case HTTP_STATUS_INTERNAL_ERROR: + encode_common_sc (14); + break; + default: + orig_len = vec_len (dst); + vec_add2 (dst, a, 5); + b = a; + /* Literal Header Field without Indexing — Indexed Name */ + *b++ = 8; + b = hpack_encode_string (b, (const u8 *) http_status_code_str[sc], 3); + actual_size = b - a; + vec_set_len (dst, orig_len + actual_size); + break; + } + return dst; +} + +static inline u8 * +hpack_encode_content_len (u8 *dst, u64 content_len) +{ + u8 digit_buffer[20]; + u8 *d = digit_buffer + sizeof (digit_buffer); + u32 orig_len, actual_size; + u8 *a, *b; + + orig_len = vec_len (dst); + vec_add2 (dst, a, 3 + sizeof (digit_buffer)); + b = a; + + /* static table index 28 */ + *b++ = 0x0F; + *b++ = 0x0D; + do + { + *--d = '0' + content_len % 10; + content_len /= 10; + } + while (content_len); + + b = hpack_encode_string (b, d, digit_buffer + sizeof (digit_buffer) - d); + actual_size = b - a; + vec_set_len (dst, orig_len + actual_size); + return dst; +} + +__clib_export void +hpack_serialize_response (u8 *app_headers, u32 app_headers_len, + hpack_response_control_data_t *control_data, + u8 **dst) +{ + u8 *p, *end; + + p = *dst; + + /* status code must be first since it is pseudo-header */ + p = hpack_encode_status_code (p, control_data->sc); + + /* server name */ + p = hpack_encode_header (p, HTTP_HEADER_SERVER, control_data->server_name, + control_data->server_name_len); + + /* date */ + p = hpack_encode_header (p, HTTP_HEADER_DATE, control_data->date, + control_data->date_len); + + /* content length if any */ + if (control_data->content_len != HPACK_ENCODER_SKIP_CONTENT_LEN) + p = hpack_encode_content_len (p, control_data->content_len); + + if (!app_headers_len) + { + *dst = p; + return; + } + + end = app_headers + app_headers_len; + while (app_headers < end) + { + /* custom header name? */ + u32 *tmp = (u32 *) app_headers; + if (PREDICT_FALSE (*tmp & HTTP_CUSTOM_HEADER_NAME_BIT)) + { + http_custom_token_t *name, *value; + name = (http_custom_token_t *) app_headers; + u32 name_len = name->len & ~HTTP_CUSTOM_HEADER_NAME_BIT; + app_headers += sizeof (http_custom_token_t) + name_len; + value = (http_custom_token_t *) app_headers; + app_headers += sizeof (http_custom_token_t) + value->len; + p = hpack_encode_custom_header (p, name->token, name_len, + value->token, value->len); + } + else + { + http_app_header_t *header; + header = (http_app_header_t *) app_headers; + app_headers += sizeof (http_app_header_t) + header->value.len; + p = hpack_encode_header (p, header->name, header->value.token, + header->value.len); + } + } + + *dst = p; +} diff --git a/src/plugins/http/http2/hpack.h b/src/plugins/http/http2/hpack.h new file mode 100644 index 00000000000..69144de133a --- /dev/null +++ b/src/plugins/http/http2/hpack.h @@ -0,0 +1,183 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2025 Cisco Systems, Inc. + */ + +#ifndef SRC_PLUGINS_HTTP_HPACK_H_ +#define SRC_PLUGINS_HTTP_HPACK_H_ + +#include <vppinfra/types.h> +#include <http/http2/http2.h> +#include <http/http.h> + +#define HPACK_INVALID_INT CLIB_UWORD_MAX +#if uword_bits == 64 +#define HPACK_ENCODED_INT_MAX_LEN 10 +#else +#define HPACK_ENCODED_INT_MAX_LEN 6 +#endif + +#define HPACK_DEFAULT_HEADER_TABLE_SIZE 4096 +#define HPACK_DYNAMIC_TABLE_ENTRY_OVERHEAD 32 +#define HPACK_ENCODER_SKIP_CONTENT_LEN ((u64) ~0) + +typedef struct +{ + u8 *buf; + uword name_len; +} hpack_dynamic_table_entry_t; + +typedef struct +{ + /* SETTINGS_HEADER_TABLE_SIZE */ + u32 max_size; + /* dynamic table size update */ + u32 size; + /* current usage (each entry = 32 + name len + value len) */ + u32 used; + /* ring buffer */ + hpack_dynamic_table_entry_t *entries; +} hpack_dynamic_table_t; + +enum +{ +#define _(bit, name, str) HPACK_PSEUDO_HEADER_##name##_PARSED = (1 << bit), + foreach_http2_pseudo_header +#undef _ +}; + +typedef struct +{ + http_req_method_t method; + http_url_scheme_t scheme; + u8 *authority; + u32 authority_len; + u8 *path; + u32 path_len; + u8 *headers; + uword content_len_header_index; + u32 headers_len; + u32 control_data_len; + u16 parsed_bitmap; +} hpack_request_control_data_t; + +typedef struct +{ + http_status_code_t sc; + u64 content_len; + u8 *server_name; + u32 server_name_len; + u8 *date; + u32 date_len; +} hpack_response_control_data_t; + +/** + * Decode unsigned variable-length integer (RFC7541 section 5.1) + * + * @param src Pointer to source buffer which will be advanced + * @param end End of the source buffer + * @param prefix_len Number of bits of the prefix (between 1 and 8) + * + * @return Decoded integer or @c HPACK_INVALID_INT in case of error + */ +uword hpack_decode_int (u8 **src, u8 *end, u8 prefix_len); + +/** + * Encode given value as unsigned variable-length integer (RFC7541 section 5.1) + * + * @param dst Pointer to destination buffer, should have enough space + * @param value Integer value to encode (up to @c CLIB_WORD_MAX) + * @param prefix_len Number of bits of the prefix (between 1 and 8) + * + * @return Advanced pointer to the destination buffer + * + * @note Encoded integer will take maximum @c HPACK_ENCODED_INT_MAX_LEN bytes + */ +u8 *hpack_encode_int (u8 *dst, uword value, u8 prefix_len); + +/** + * Decode + * + * @param src Pointer to source buffer which will be advanced + * @param end End of the source buffer + * @param buf Pointer to the buffer where string is decoded which will be + * advanced by number of written bytes + * @param buf_len Length the buffer, will be decreased + * + * @return @c HTTP2_ERROR_NO_ERROR on success + * + * @note Caller is responsible to check if there is somthing left in source + * buffer first + */ +http2_error_t hpack_decode_huffman (u8 **src, u8 *end, u8 **buf, + uword *buf_len); + +/** + * Encode given string in Huffman codes. + * + * @param dst Pointer to destination buffer, should have enough space + * @param value String to encode + * @param value_len Length of the string + * + * @return Advanced pointer to the destination buffer + */ +u8 *hpack_encode_huffman (u8 *dst, const u8 *value, uword value_len); + +/** + * Number of bytes required to encode given string in Huffman codes + * + * @param value Pointer to buffer with string to encode + * @param value_len Length of the string + * + * @return number of bytes required to encode string in Huffman codes, round up + * to byte boundary + */ +uword hpack_huffman_encoded_len (const u8 *value, uword value_len); + +/** + * Initialize HPACK dynamic table + * + * @param table Dynamic table to initialize + * @param max_size Maximum table size (SETTINGS_HEADER_TABLE_SIZE) + */ +void hpack_dynamic_table_init (hpack_dynamic_table_t *table, u32 max_size); + +/** + * Free HPACK dynamic table + * + * @param table Dynamic table to free + */ +void hpack_dynamic_table_free (hpack_dynamic_table_t *table); + +u8 *format_hpack_dynamic_table (u8 *s, va_list *args); + +/** + * Request parser + * + * @param src Header block to parse + * @param src_len Length of header block + * @param dst Buffer where headers will be decoded + * @param dst_len Length of buffer for decoded headers + * @param control_data Preparsed pseudo-headers + * @param headers List of regular headers + * @param dynamic_table Decoder dynamic table + * + * @return @c HTTP2_ERROR_NO_ERROR on success, connection error otherwise + */ +http2_error_t hpack_parse_request (u8 *src, u32 src_len, u8 *dst, u32 dst_len, + hpack_request_control_data_t *control_data, + http_field_line_t **headers, + hpack_dynamic_table_t *dynamic_table); + +/** + * Serialize response + * + * @param app_headers App header list + * @param app_headers_len App header list length + * @param control_data Header values set by protocol layer + * @param dst Vector where serialized headers will be added + */ +void hpack_serialize_response (u8 *app_headers, u32 app_headers_len, + hpack_response_control_data_t *control_data, + u8 **dst); + +#endif /* SRC_PLUGINS_HTTP_HPACK_H_ */ diff --git a/src/plugins/http/http2/http2.c b/src/plugins/http/http2/http2.c new file mode 100644 index 00000000000..67db185823c --- /dev/null +++ b/src/plugins/http/http2/http2.c @@ -0,0 +1,1492 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2025 Cisco Systems, Inc. + */ + +#include <http/http2/hpack.h> +#include <http/http2/frame.h> +#include <http/http_private.h> +#include <http/http_timer.h> + +#ifndef HTTP_2_ENABLE +#define HTTP_2_ENABLE 0 +#endif + +#define foreach_http2_stream_state \ + _ (IDLE, "IDLE") \ + _ (OPEN, "OPEN") \ + _ (HALF_CLOSED, "HALF-CLOSED") \ + _ (CLOSED, "CLOSED") + +typedef enum http2_stream_state_ +{ +#define _(s, str) HTTP2_STREAM_STATE_##s, + foreach_http2_stream_state +#undef _ +} http2_stream_state_t; + +#define foreach_http2_req_flags _ (APP_CLOSED, "app-closed") + +typedef enum http2_req_flags_bit_ +{ +#define _(sym, str) HTTP2_REQ_F_BIT_##sym, + foreach_http2_req_flags +#undef _ +} http2_req_flags_bit_t; + +typedef enum http2_req_flags_ +{ +#define _(sym, str) HTTP2_REQ_F_##sym = 1 << HTTP2_REQ_F_BIT_##sym, + foreach_http2_req_flags +#undef _ +} __clib_packed http2_req_flags_t; + +typedef struct http2_req_ +{ + http_req_t base; + http2_stream_state_t stream_state; + u8 flags; + u32 stream_id; + u64 peer_window; + u8 *payload; + u32 payload_len; +} http2_req_t; + +#define foreach_http2_conn_flags \ + _ (EXPECT_PREFACE, "expect-preface") \ + _ (PREFACE_VERIFIED, "preface-verified") + +typedef enum http2_conn_flags_bit_ +{ +#define _(sym, str) HTTP2_CONN_F_BIT_##sym, + foreach_http2_conn_flags +#undef _ +} http2_conn_flags_bit_t; + +typedef enum http2_conn_flags_ +{ +#define _(sym, str) HTTP2_CONN_F_##sym = 1 << HTTP2_CONN_F_BIT_##sym, + foreach_http2_conn_flags +#undef _ +} __clib_packed http2_conn_flags_t; + +typedef struct http2_conn_ctx_ +{ + http2_conn_settings_t peer_settings; + hpack_dynamic_table_t decoder_dynamic_table; + u8 flags; + u32 last_opened_stream_id; + u32 last_processed_stream_id; + u64 peer_window; + uword *req_by_stream_id; +} http2_conn_ctx_t; + +typedef struct http2_main_ +{ + http2_conn_ctx_t **conn_pool; + http2_req_t **req_pool; + http2_conn_settings_t settings; +} http2_main_t; + +static http2_main_t http2_main; + +http2_conn_ctx_t * +http2_conn_ctx_alloc_w_thread (http_conn_t *hc) +{ + http2_main_t *h2m = &http2_main; + http2_conn_ctx_t *h2c; + + pool_get_aligned_safe (h2m->conn_pool[hc->c_thread_index], h2c, + CLIB_CACHE_LINE_BYTES); + clib_memset (h2c, 0, sizeof (*h2c)); + h2c->peer_settings = http2_default_conn_settings; + h2c->peer_window = h2c->peer_settings.initial_window_size; + h2c->req_by_stream_id = hash_create (0, sizeof (uword)); + hc->opaque = + uword_to_pointer (h2c - h2m->conn_pool[hc->c_thread_index], void *); + HTTP_DBG (1, "h2c [%u]%x", hc->c_thread_index, + h2c - h2m->conn_pool[hc->c_thread_index]); + return h2c; +} + +static inline http2_conn_ctx_t * +http2_conn_ctx_get_w_thread (http_conn_t *hc) +{ + http2_main_t *h2m = &http2_main; + u32 h2c_index = pointer_to_uword (hc->opaque); + return pool_elt_at_index (h2m->conn_pool[hc->c_thread_index], h2c_index); +} + +static inline void +http2_conn_ctx_free (http_conn_t *hc) +{ + http2_main_t *h2m = &http2_main; + http2_conn_ctx_t *h2c; + + h2c = http2_conn_ctx_get_w_thread (hc); + HTTP_DBG (1, "h2c [%u]%x", hc->c_thread_index, + h2c - h2m->conn_pool[hc->c_thread_index]); + hash_free (h2c->req_by_stream_id); + if (hc->flags & HTTP_CONN_F_HAS_REQUEST) + hpack_dynamic_table_free (&h2c->decoder_dynamic_table); + if (CLIB_DEBUG) + memset (h2c, 0xba, sizeof (*h2c)); + pool_put (h2m->conn_pool[hc->c_thread_index], h2c); +} + +static inline http2_req_t * +http2_conn_alloc_req (http_conn_t *hc, u32 stream_id) +{ + http2_main_t *h2m = &http2_main; + http2_conn_ctx_t *h2c; + http2_req_t *req; + u32 req_index; + http_req_handle_t hr_handle; + + pool_get_aligned_safe (h2m->req_pool[hc->c_thread_index], req, + CLIB_CACHE_LINE_BYTES); + clib_memset (req, 0, sizeof (*req)); + req->base.hr_pa_session_handle = SESSION_INVALID_HANDLE; + req_index = req - h2m->req_pool[hc->c_thread_index]; + hr_handle.version = HTTP_VERSION_2; + hr_handle.req_index = req_index; + req->base.hr_req_handle = hr_handle.as_u32; + req->base.hr_hc_index = hc->hc_hc_index; + req->base.c_thread_index = hc->c_thread_index; + req->stream_id = stream_id; + req->stream_state = HTTP2_STREAM_STATE_IDLE; + h2c = http2_conn_ctx_get_w_thread (hc); + HTTP_DBG (1, "h2c [%u]%x req_index %x stream_id %u", hc->c_thread_index, + h2c - h2m->conn_pool[hc->c_thread_index], req_index, stream_id); + req->peer_window = h2c->peer_settings.initial_window_size; + hash_set (h2c->req_by_stream_id, stream_id, req_index); + return req; +} + +static inline void +http2_conn_free_req (http2_conn_ctx_t *h2c, http2_req_t *req, + clib_thread_index_t thread_index) +{ + http2_main_t *h2m = &http2_main; + + HTTP_DBG (1, "h2c [%u]%x req_index %x stream_id %u", thread_index, + h2c - h2m->conn_pool[thread_index], + ((http_req_handle_t) req->base.hr_req_handle).req_index, + req->stream_id); + vec_free (req->base.headers); + vec_free (req->base.target); + http_buffer_free (&req->base.tx_buf); + hash_unset (h2c->req_by_stream_id, req->stream_id); + if (CLIB_DEBUG) + memset (req, 0xba, sizeof (*req)); + pool_put (h2m->req_pool[thread_index], req); +} + +http2_req_t * +http2_conn_get_req (http_conn_t *hc, u32 stream_id) +{ + http2_main_t *h2m = &http2_main; + http2_conn_ctx_t *h2c; + uword *p; + + h2c = http2_conn_ctx_get_w_thread (hc); + p = hash_get (h2c->req_by_stream_id, stream_id); + if (p) + { + return pool_elt_at_index (h2m->req_pool[hc->c_thread_index], p[0]); + } + else + { + HTTP_DBG (1, "hc [%u]%x streamId %u not found", hc->c_thread_index, + hc->hc_hc_index, stream_id); + return 0; + } +} + +always_inline http2_req_t * +http2_req_get (u32 req_index, clib_thread_index_t thread_index) +{ + http2_main_t *h2m = &http2_main; + + return pool_elt_at_index (h2m->req_pool[thread_index], req_index); +} + +/* send GOAWAY frame and close TCP connection */ +always_inline void +http2_connection_error (http_conn_t *hc, http2_error_t error, + transport_send_params_t *sp) +{ + u8 *response; + u32 req_index, stream_id; + http2_conn_ctx_t *h2c; + http2_req_t *req; + + h2c = http2_conn_ctx_get_w_thread (hc); + + response = http_get_tx_buf (hc); + http2_frame_write_goaway (error, h2c->last_processed_stream_id, &response); + http_io_ts_write (hc, response, vec_len (response), sp); + http_io_ts_after_write (hc, 1); + + hash_foreach (stream_id, req_index, h2c->req_by_stream_id, ({ + req = http2_req_get (req_index, hc->c_thread_index); + if (req->stream_state != HTTP2_STREAM_STATE_CLOSED) + session_transport_reset_notify (&req->base.connection); + })); + http_shutdown_transport (hc); +} + +always_inline void +http2_send_stream_error (http_conn_t *hc, u32 stream_id, http2_error_t error, + transport_send_params_t *sp) +{ + u8 *response; + + response = http_get_tx_buf (hc); + http2_frame_write_rst_stream (error, stream_id, &response); + http_io_ts_write (hc, response, vec_len (response), sp); + http_io_ts_after_write (hc, 1); +} + +/* send RST_STREAM frame and notify app */ +always_inline void +http2_stream_error (http_conn_t *hc, http2_req_t *req, http2_error_t error, + transport_send_params_t *sp) +{ + ASSERT (req->stream_state > HTTP2_STREAM_STATE_IDLE); + + http2_send_stream_error (hc, req->stream_id, error, sp); + req->stream_state = HTTP2_STREAM_STATE_CLOSED; + if (req->flags & HTTP2_REQ_F_APP_CLOSED) + session_transport_closed_notify (&req->base.connection); + else + session_transport_closing_notify (&req->base.connection); +} + +always_inline void +http2_stream_close (http2_req_t *req) +{ + req->stream_state = HTTP2_STREAM_STATE_CLOSED; + if (req->flags & HTTP2_REQ_F_APP_CLOSED) + { + HTTP_DBG (1, "req [%u]%x app already closed, confirm", + req->base.c_thread_index, + ((http_req_handle_t) req->base.hr_req_handle).req_index); + session_transport_closed_notify (&req->base.connection); + } + else + { + HTTP_DBG (1, "req [%u]%x all done closing, notify app", + req->base.c_thread_index, + ((http_req_handle_t) req->base.hr_req_handle).req_index); + session_transport_closing_notify (&req->base.connection); + } +} + +always_inline void +http2_send_server_preface (http_conn_t *hc) +{ + u8 *response; + http2_main_t *h2m = &http2_main; + http2_settings_entry_t *setting, *settings_list = 0; + +#define _(v, label, member, min, max, default_value, err_code) \ + if (h2m->settings.member != default_value) \ + { \ + vec_add2 (settings_list, setting, 1); \ + setting->identifier = HTTP2_SETTINGS_##label; \ + setting->value = h2m->settings.member; \ + } + foreach_http2_settings +#undef _ + + response = http_get_tx_buf (hc); + http2_frame_write_settings (settings_list, &response); + http_io_ts_write (hc, response, vec_len (response), 0); + http_io_ts_after_write (hc, 0); +} + +/*************************************/ +/* request state machine handlers RX */ +/*************************************/ + +static http_sm_result_t +http2_req_state_wait_transport_method (http_conn_t *hc, http2_req_t *req, + transport_send_params_t *sp, + http2_error_t *error) +{ + http2_conn_ctx_t *h2c; + hpack_request_control_data_t control_data; + u8 *buf = 0; + http_msg_t msg; + int rv; + http_req_state_t new_state = HTTP_REQ_STATE_WAIT_APP_REPLY; + + h2c = http2_conn_ctx_get_w_thread (hc); + + /* TODO: configurable buf size with bigger default value */ + vec_validate_init_empty (buf, 1023, 0); + *error = hpack_parse_request (req->payload, req->payload_len, buf, 1023, + &control_data, &req->base.headers, + &h2c->decoder_dynamic_table); + if (*error != HTTP2_ERROR_NO_ERROR) + { + HTTP_DBG (1, "hpack_parse_request failed"); + return HTTP_SM_ERROR; + } + + if (!(control_data.parsed_bitmap & HPACK_PSEUDO_HEADER_METHOD_PARSED)) + { + HTTP_DBG (1, ":method pseudo-header missing in request"); + http2_stream_error (hc, req, HTTP2_ERROR_PROTOCOL_ERROR, sp); + return HTTP_SM_STOP; + } + if (control_data.method == HTTP_REQ_UNKNOWN || + control_data.method == HTTP_REQ_CONNECT) + { + HTTP_DBG (1, "unsupported method"); + http2_stream_error (hc, req, HTTP2_ERROR_PROTOCOL_ERROR, sp); + return HTTP_SM_STOP; + } + if (!(control_data.parsed_bitmap & HPACK_PSEUDO_HEADER_SCHEME_PARSED) && + control_data.method != HTTP_REQ_CONNECT) + { + HTTP_DBG (1, ":scheme pseudo-header missing in request"); + http2_stream_error (hc, req, HTTP2_ERROR_PROTOCOL_ERROR, sp); + return HTTP_SM_STOP; + } + if (control_data.scheme == HTTP_URL_SCHEME_UNKNOWN) + { + HTTP_DBG (1, "unsupported scheme"); + http2_stream_error (hc, req, HTTP2_ERROR_INTERNAL_ERROR, sp); + return HTTP_SM_STOP; + } + if (!(control_data.parsed_bitmap & HPACK_PSEUDO_HEADER_PATH_PARSED) && + control_data.method != HTTP_REQ_CONNECT) + { + HTTP_DBG (1, ":path pseudo-header missing in request"); + http2_stream_error (hc, req, HTTP2_ERROR_PROTOCOL_ERROR, sp); + return HTTP_SM_STOP; + } + if (!(control_data.parsed_bitmap & HPACK_PSEUDO_HEADER_AUTHORITY_PARSED) && + control_data.method != HTTP_REQ_CONNECT) + { + HTTP_DBG (1, ":path pseudo-header missing in request"); + http2_stream_error (hc, req, HTTP2_ERROR_PROTOCOL_ERROR, sp); + return HTTP_SM_STOP; + } + + req->base.control_data_len = control_data.control_data_len; + req->base.headers_offset = control_data.headers - buf; + req->base.headers_len = control_data.headers_len; + if (control_data.content_len_header_index != ~0) + { + req->base.content_len_header_index = + control_data.content_len_header_index; + rv = http_parse_content_length (&req->base, buf); + if (rv) + { + http2_stream_error (hc, req, HTTP2_ERROR_PROTOCOL_ERROR, sp); + return HTTP_SM_STOP; + } + new_state = HTTP_REQ_STATE_TRANSPORT_IO_MORE_DATA; + } + /* TODO: message framing without content length using END_STREAM flag */ + if (req->base.body_len == 0 && req->stream_state == HTTP2_STREAM_STATE_OPEN) + { + HTTP_DBG (1, "no content-length and DATA frame expected"); + *error = HTTP2_ERROR_INTERNAL_ERROR; + return HTTP_SM_ERROR; + } + req->base.to_recv = req->base.body_len; + + req->base.target_path_len = control_data.path_len; + req->base.target_path_offset = control_data.path - buf; + /* drop leading slash */ + req->base.target_path_offset++; + req->base.target_path_len--; + req->base.target_query_offset = 0; + req->base.target_query_len = 0; + http_identify_optional_query (&req->base, buf); + + msg.type = HTTP_MSG_REQUEST; + msg.method_type = control_data.method; + msg.data.type = HTTP_MSG_DATA_INLINE; + msg.data.len = req->base.connection_header_index; + msg.data.scheme = control_data.scheme; + msg.data.target_authority_offset = control_data.authority - buf; + msg.data.target_authority_len = control_data.authority_len; + msg.data.target_path_offset = req->base.target_path_offset; + msg.data.target_path_len = req->base.target_path_len; + msg.data.target_query_offset = req->base.target_query_offset; + msg.data.target_query_len = req->base.target_query_len; + msg.data.headers_offset = req->base.headers_offset; + msg.data.headers_len = req->base.headers_len; + msg.data.headers_ctx = pointer_to_uword (req->base.headers); + msg.data.upgrade_proto = HTTP_UPGRADE_PROTO_NA; + msg.data.body_offset = req->base.control_data_len; + msg.data.body_len = req->base.body_len; + + svm_fifo_seg_t segs[2] = { { (u8 *) &msg, sizeof (msg) }, + { buf, req->base.control_data_len } }; + HTTP_DBG (3, "%U", format_http_bytes, buf, req->base.control_data_len); + http_io_as_write_segs (&req->base, segs, 2); + http_req_state_change (&req->base, new_state); + http_app_worker_rx_notify (&req->base); + + if (req->stream_id > h2c->last_processed_stream_id) + h2c->last_processed_stream_id = req->stream_id; + + return HTTP_SM_STOP; +} + +static http_sm_result_t +http2_req_state_transport_io_more_data (http_conn_t *hc, http2_req_t *req, + transport_send_params_t *sp, + http2_error_t *error) +{ + if (req->payload_len > req->base.to_recv) + { + HTTP_DBG (1, "received more data than expected"); + http2_stream_error (hc, req, HTTP2_ERROR_PROTOCOL_ERROR, sp); + return HTTP_SM_STOP; + } + req->base.to_recv -= req->payload_len; + if (req->stream_state == HTTP2_STREAM_STATE_HALF_CLOSED && + req->base.to_recv != 0) + { + HTTP_DBG (1, "peer closed stream but don't send all data"); + http2_stream_error (hc, req, HTTP2_ERROR_PROTOCOL_ERROR, sp); + return HTTP_SM_STOP; + } + if (req->base.to_recv == 0) + http_req_state_change (&req->base, HTTP_REQ_STATE_WAIT_APP_REPLY); + http_io_as_write (&req->base, req->payload, req->payload_len); + http_app_worker_rx_notify (&req->base); + + return HTTP_SM_STOP; +} + +/*************************************/ +/* request state machine handlers TX */ +/*************************************/ + +static http_sm_result_t +http2_req_state_wait_app_reply (http_conn_t *hc, http2_req_t *req, + transport_send_params_t *sp, + http2_error_t *error) +{ + http_msg_t msg; + u8 *response, *date, *app_headers = 0; + u8 fh[HTTP2_FRAME_HEADER_SIZE]; + hpack_response_control_data_t control_data; + u8 flags = HTTP2_FRAME_FLAG_END_HEADERS; + http_sm_result_t sm_result = HTTP_SM_ERROR; + u32 n_written; + + http_get_app_msg (&req->base, &msg); + ASSERT (msg.type == HTTP_MSG_REPLY); + + response = http_get_tx_buf (hc); + date = format (0, "%U", format_http_time_now, hc); + + control_data.sc = msg.code; + control_data.content_len = msg.data.body_len; + control_data.server_name = hc->app_name; + control_data.server_name_len = vec_len (hc->app_name); + control_data.date = date; + control_data.date_len = vec_len (date); + + if (msg.data.headers_len) + app_headers = http_get_app_header_list (&req->base, &msg); + + hpack_serialize_response (app_headers, msg.data.headers_len, &control_data, + &response); + vec_free (date); + + if (msg.data.body_len) + { + /* start sending the actual data */ + http_req_tx_buffer_init (&req->base, &msg); + http_req_state_change (&req->base, HTTP_REQ_STATE_APP_IO_MORE_DATA); + sm_result = HTTP_SM_CONTINUE; + } + else + { + /* no response body, we are done */ + flags |= HTTP2_FRAME_FLAG_END_STREAM; + sm_result = HTTP_SM_STOP; + http2_stream_close (req); + } + + http2_frame_write_headers_header (vec_len (response), req->stream_id, flags, + fh); + svm_fifo_seg_t segs[2] = { { fh, HTTP2_FRAME_HEADER_SIZE }, + { response, vec_len (response) } }; + n_written = http_io_ts_write_segs (hc, segs, 2, sp); + ASSERT (n_written == (HTTP2_FRAME_HEADER_SIZE + vec_len (response))); + http_io_ts_after_write (hc, 0); + + return sm_result; +} + +static http_sm_result_t +http2_req_state_app_io_more_data (http_conn_t *hc, http2_req_t *req, + transport_send_params_t *sp, + http2_error_t *error) +{ + u32 max_write, max_read, n_segs, n_read, n_written = 0; + svm_fifo_seg_t *app_segs, *segs = 0; + http_buffer_t *hb = &req->base.tx_buf; + u8 fh[HTTP2_FRAME_HEADER_SIZE]; + u8 finished = 0, flags = 0; + + ASSERT (http_buffer_bytes_left (hb) > 0); + max_write = http_io_ts_max_write (hc, sp); + if (max_write <= HTTP2_FRAME_HEADER_SIZE) + { + HTTP_DBG (1, "ts tx fifo full"); + goto check_fifo; + } + max_read = http_buffer_bytes_left (hb); + + n_read = http_buffer_get_segs (hb, max_write - HTTP2_FRAME_HEADER_SIZE, + &app_segs, &n_segs); + if (n_read == 0) + { + HTTP_DBG (1, "no data to deq"); + goto check_fifo; + } + + finished = (max_read - n_read) == 0; + flags = finished ? HTTP2_FRAME_FLAG_END_STREAM : 0; + http2_frame_write_data_header (n_read, req->stream_id, flags, fh); + vec_validate (segs, 0); + segs[0].len = HTTP2_FRAME_HEADER_SIZE; + segs[0].data = fh; + vec_append (segs, app_segs); + + n_written = http_io_ts_write_segs (hc, segs, n_segs + 1, sp); + ASSERT (n_written == (HTTP2_FRAME_HEADER_SIZE + n_read)); + vec_free (segs); + http_buffer_drain (hb, n_read); + + if (finished) + { + http_buffer_free (hb); + if (hc->flags & HTTP_CONN_F_IS_SERVER) + http2_stream_close (req); + else + req->stream_state = HTTP2_STREAM_STATE_HALF_CLOSED; + } + http_io_ts_after_write (hc, finished); + +check_fifo: + if (http_io_ts_check_write_thresh (hc)) + { + http_io_ts_add_want_deq_ntf (hc); + http_req_deschedule (&req->base, sp); + } + return HTTP_SM_STOP; +} + +/*************************/ +/* request state machine */ +/*************************/ + +typedef http_sm_result_t (*http2_sm_handler) (http_conn_t *hc, + http2_req_t *req, + transport_send_params_t *sp, + http2_error_t *error); + +static http2_sm_handler tx_state_funcs[HTTP_REQ_N_STATES] = { + 0, /* idle */ + 0, /* wait app method */ + 0, /* wait transport reply */ + 0, /* transport io more data */ + 0, /* wait transport method */ + http2_req_state_wait_app_reply, + http2_req_state_app_io_more_data, + 0, /* tunnel */ + 0, /* udp tunnel */ +}; + +static http2_sm_handler rx_state_funcs[HTTP_REQ_N_STATES] = { + 0, /* idle */ + 0, /* wait app method */ + 0, /* wait transport reply */ + http2_req_state_transport_io_more_data, + http2_req_state_wait_transport_method, + 0, /* wait app reply */ + 0, /* app io more data */ + 0, /* tunnel */ + 0, /* udp tunnel */ +}; + +static_always_inline int +http2_req_state_is_tx_valid (http2_req_t *req) +{ + return tx_state_funcs[req->base.state] ? 1 : 0; +} + +static_always_inline http2_error_t +http2_req_run_state_machine (http_conn_t *hc, http2_req_t *req, + transport_send_params_t *sp, u8 is_tx) +{ + http_sm_result_t res; + http2_error_t error; + http2_conn_ctx_t *h2c; + + do + { + if (is_tx) + res = tx_state_funcs[req->base.state](hc, req, sp, &error); + else + res = rx_state_funcs[req->base.state](hc, req, 0, &error); + + if (res == HTTP_SM_ERROR) + { + HTTP_DBG (1, "protocol error %U", format_http2_error, error); + return error; + } + } + while (res == HTTP_SM_CONTINUE); + + if (req->stream_state == HTTP2_STREAM_STATE_CLOSED) + { + h2c = http2_conn_ctx_get_w_thread (hc); + session_transport_delete_notify (&req->base.connection); + http2_conn_free_req (h2c, req, hc->c_thread_index); + } + + return HTTP2_ERROR_NO_ERROR; +} + +/******************/ +/* frame handlers */ +/******************/ + +static http2_error_t +http2_handle_headers_frame (http_conn_t *hc, http2_frame_header_t *fh) +{ + http2_main_t *h2m = &http2_main; + http2_req_t *req; + u8 *rx_buf; + http2_error_t rv; + http2_conn_ctx_t *h2c; + + if (!(fh->flags & HTTP2_FRAME_FLAG_END_HEADERS)) + { + /* TODO: fragmented headers */ + return HTTP2_ERROR_INTERNAL_ERROR; + } + + if (hc->flags & HTTP_CONN_F_IS_SERVER) + { + h2c = http2_conn_ctx_get_w_thread (hc); + /* streams initiated by client must use odd-numbered stream id */ + if ((fh->stream_id & 1) == 0) + { + HTTP_DBG (1, "invalid stream id %u", fh->stream_id); + return HTTP2_ERROR_PROTOCOL_ERROR; + } + /* stream id must be greater than all streams that client has opened */ + if (fh->stream_id <= h2c->last_opened_stream_id) + { + HTTP_DBG (1, "closed stream id %u", fh->stream_id); + return HTTP2_ERROR_STREAM_CLOSED; + } + h2c->last_opened_stream_id = fh->stream_id; + if (hash_elts (h2c->req_by_stream_id) == + h2m->settings.max_concurrent_streams) + { + HTTP_DBG (1, "SETTINGS_MAX_CONCURRENT_STREAMS exceeded"); + http_io_ts_drain (hc, fh->length); + http2_send_stream_error (hc, fh->stream_id, + HTTP2_ERROR_REFUSED_STREAM, 0); + return HTTP2_ERROR_NO_ERROR; + } + req = http2_conn_alloc_req (hc, fh->stream_id); + http_conn_accept_request (hc, &req->base); + http_req_state_change (&req->base, HTTP_REQ_STATE_WAIT_TRANSPORT_METHOD); + req->stream_state = HTTP2_STREAM_STATE_OPEN; + hc->flags &= ~HTTP_CONN_F_NO_APP_SESSION; + if (!(hc->flags & HTTP_CONN_F_HAS_REQUEST)) + { + hc->flags |= HTTP_CONN_F_HAS_REQUEST; + hpack_dynamic_table_init ( + &h2c->decoder_dynamic_table, + http2_default_conn_settings.header_table_size); + } + if (fh->flags & HTTP2_FRAME_FLAG_END_STREAM) + req->stream_state = HTTP2_STREAM_STATE_HALF_CLOSED; + } + else + { + /* TODO: client */ + return HTTP2_ERROR_INTERNAL_ERROR; + } + + rx_buf = http_get_rx_buf (hc); + vec_validate (rx_buf, fh->length - 1); + http_io_ts_read (hc, rx_buf, fh->length, 0); + + rv = http2_frame_read_headers (&req->payload, &req->payload_len, rx_buf, + fh->length, fh->flags); + if (rv != HTTP2_ERROR_NO_ERROR) + return rv; + + HTTP_DBG (1, "run state machine"); + return http2_req_run_state_machine (hc, req, 0, 0); +} + +static http2_error_t +http2_handle_data_frame (http_conn_t *hc, http2_frame_header_t *fh) +{ + http2_req_t *req; + u8 *rx_buf; + http2_error_t rv; + http2_conn_ctx_t *h2c; + + req = http2_conn_get_req (hc, fh->stream_id); + if (!req) + { + if (fh->stream_id == 0) + { + HTTP_DBG (1, "DATA frame with stream id 0"); + return HTTP2_ERROR_PROTOCOL_ERROR; + } + h2c = http2_conn_ctx_get_w_thread (hc); + if (fh->stream_id <= h2c->last_opened_stream_id) + { + HTTP_DBG (1, "stream closed, ignoring frame"); + http2_send_stream_error (hc, fh->stream_id, + HTTP2_ERROR_STREAM_CLOSED, 0); + return HTTP2_ERROR_NO_ERROR; + } + else + return HTTP2_ERROR_PROTOCOL_ERROR; + } + + /* bogus state */ + if (hc->flags & HTTP_CONN_F_IS_SERVER && + req->stream_state != HTTP2_STREAM_STATE_OPEN) + { + HTTP_DBG (1, "error: stream already half-closed"); + http2_stream_error (hc, req, HTTP2_ERROR_STREAM_CLOSED, 0); + return HTTP2_ERROR_NO_ERROR; + } + + if (fh->flags & HTTP2_FRAME_FLAG_END_STREAM) + req->stream_state = HTTP2_STREAM_STATE_HALF_CLOSED; + + rx_buf = http_get_rx_buf (hc); + vec_validate (rx_buf, fh->length - 1); + http_io_ts_read (hc, rx_buf, fh->length, 0); + + rv = http2_frame_read_data (&req->payload, &req->payload_len, rx_buf, + fh->length, fh->flags); + if (rv != HTTP2_ERROR_NO_ERROR) + return rv; + + HTTP_DBG (1, "run state machine"); + return http2_req_run_state_machine (hc, req, 0, 0); +} + +static http2_error_t +http2_handle_window_update_frame (http_conn_t *hc, http2_frame_header_t *fh) +{ + u8 *rx_buf; + u32 win_increment; + http2_error_t rv; + + rx_buf = http_get_rx_buf (hc); + vec_validate (rx_buf, fh->length - 1); + http_io_ts_read (hc, rx_buf, fh->length, 0); + + rv = http2_frame_read_window_update (&win_increment, rx_buf, fh->length); + if (rv != HTTP2_ERROR_NO_ERROR) + return rv; + + /* TODO: flow control */ + return HTTP2_ERROR_NO_ERROR; +} + +static http2_error_t +http2_handle_settings_frame (http_conn_t *hc, http2_frame_header_t *fh) +{ + u8 *rx_buf, *resp = 0; + http2_error_t rv; + http2_conn_settings_t new_settings; + http2_conn_ctx_t *h2c; + + if (fh->stream_id != 0) + return HTTP2_ERROR_PROTOCOL_ERROR; + + if (fh->flags == HTTP2_FRAME_FLAG_ACK) + { + if (fh->length != 0) + return HTTP2_ERROR_FRAME_SIZE_ERROR; + /* TODO: we can start using non-default settings */ + } + else + { + if (fh->length < sizeof (http2_settings_entry_t)) + return HTTP2_ERROR_FRAME_SIZE_ERROR; + + rx_buf = http_get_rx_buf (hc); + vec_validate (rx_buf, fh->length - 1); + http_io_ts_read (hc, rx_buf, fh->length, 0); + + h2c = http2_conn_ctx_get_w_thread (hc); + new_settings = h2c->peer_settings; + rv = http2_frame_read_settings (&new_settings, rx_buf, fh->length); + if (rv != HTTP2_ERROR_NO_ERROR) + return rv; + h2c->peer_settings = new_settings; + + /* ACK peer settings */ + http2_frame_write_settings_ack (&resp); + http_io_ts_write (hc, resp, vec_len (resp), 0); + vec_free (resp); + http_io_ts_after_write (hc, 0); + } + + return HTTP2_ERROR_NO_ERROR; +} + +static http2_error_t +http2_handle_rst_stream_frame (http_conn_t *hc, http2_frame_header_t *fh) +{ + u8 *rx_buf; + http2_error_t rv; + http2_req_t *req; + u32 error_code; + http2_conn_ctx_t *h2c; + + if (fh->stream_id == 0) + return HTTP2_ERROR_PROTOCOL_ERROR; + + rx_buf = http_get_rx_buf (hc); + vec_validate (rx_buf, fh->length - 1); + http_io_ts_read (hc, rx_buf, fh->length, 0); + + rv = http2_frame_read_rst_stream (&error_code, rx_buf, fh->length); + if (rv != HTTP2_ERROR_NO_ERROR) + return rv; + + req = http2_conn_get_req (hc, fh->stream_id); + if (!req) + { + h2c = http2_conn_ctx_get_w_thread (hc); + if (fh->stream_id <= h2c->last_opened_stream_id) + { + /* we reset stream, but peer might send something meanwhile */ + HTTP_DBG (1, "stream closed, ignoring frame"); + return HTTP2_ERROR_NO_ERROR; + } + else + return HTTP2_ERROR_PROTOCOL_ERROR; + } + + req->stream_state = HTTP2_STREAM_STATE_CLOSED; + session_transport_reset_notify (&req->base.connection); + + return HTTP2_ERROR_NO_ERROR; +} + +static http2_error_t +http2_handle_goaway_frame (http_conn_t *hc, http2_frame_header_t *fh) +{ + u8 *rx_buf; + http2_error_t rv; + u32 error_code, last_stream_id, req_index, stream_id; + http2_conn_ctx_t *h2c; + http2_req_t *req; + + if (fh->stream_id != 0) + return HTTP2_ERROR_PROTOCOL_ERROR; + + rx_buf = http_get_rx_buf (hc); + vec_validate (rx_buf, fh->length - 1); + http_io_ts_read (hc, rx_buf, fh->length, 0); + + rv = + http2_frame_read_goaway (&error_code, &last_stream_id, rx_buf, fh->length); + if (rv != HTTP2_ERROR_NO_ERROR) + return rv; + + if (error_code == HTTP2_ERROR_NO_ERROR) + { + /* TODO: graceful shutdown (no new streams) */ + } + else + { + /* connection error */ + h2c = http2_conn_ctx_get_w_thread (hc); + hash_foreach (stream_id, req_index, h2c->req_by_stream_id, ({ + req = http2_req_get (req_index, hc->c_thread_index); + session_transport_reset_notify (&req->base.connection); + })); + http_shutdown_transport (hc); + } + + return HTTP2_ERROR_NO_ERROR; +} + +static http2_error_t +http2_handle_ping_frame (http_conn_t *hc, http2_frame_header_t *fh) +{ + u8 *rx_buf, *resp = 0; + + if (fh->stream_id != 0 || fh->length != HTTP2_PING_PAYLOAD_LEN) + return HTTP2_ERROR_PROTOCOL_ERROR; + + rx_buf = http_get_rx_buf (hc); + vec_validate (rx_buf, fh->length - 1); + http_io_ts_read (hc, rx_buf, fh->length, 0); + + /* RFC9113 6.7: The endpoint MUST NOT respond to PING frames with ACK */ + if (fh->flags & HTTP2_FRAME_FLAG_ACK) + return HTTP2_ERROR_NO_ERROR; + + http2_frame_write_ping (1, rx_buf, &resp); + http_io_ts_write (hc, resp, vec_len (resp), 0); + vec_free (resp); + http_io_ts_after_write (hc, 1); + + return HTTP2_ERROR_NO_ERROR; +} + +static http2_error_t +http2_handle_push_promise (http_conn_t *hc, http2_frame_header_t *fh) +{ + if (hc->flags & HTTP_CONN_F_IS_SERVER) + { + HTTP_DBG (1, "error: server received PUSH_PROMISE"); + return HTTP2_ERROR_PROTOCOL_ERROR; + } + /* TODO: client */ + return HTTP2_ERROR_INTERNAL_ERROR; +} + +static_always_inline int +http2_expect_preface (http_conn_t *hc, http2_conn_ctx_t *h2c) +{ + u8 *rx_buf; + + ASSERT (hc->flags & HTTP_CONN_F_IS_SERVER); + h2c->flags &= ~HTTP2_CONN_F_EXPECT_PREFACE; + + /* already done in http core */ + if (h2c->flags & HTTP2_CONN_F_PREFACE_VERIFIED) + return 0; + + rx_buf = http_get_rx_buf (hc); + http_io_ts_read (hc, rx_buf, http2_conn_preface.len, 1); + return memcmp (rx_buf, http2_conn_preface.base, http2_conn_preface.len); +} + +/*****************/ +/* http core VFT */ +/*****************/ + +static u32 +http2_hc_index_get_by_req_index (u32 req_index, + clib_thread_index_t thread_index) +{ + http2_req_t *req; + + req = http2_req_get (req_index, thread_index); + return req->base.hr_hc_index; +} + +static transport_connection_t * +http2_req_get_connection (u32 req_index, clib_thread_index_t thread_index) +{ + http2_req_t *req; + req = http2_req_get (req_index, thread_index); + return &(req->base.connection); +} + +static u8 * +format_http2_req (u8 *s, va_list *args) +{ + http2_req_t *req = va_arg (*args, http2_req_t *); + http_conn_t *hc = va_arg (*args, http_conn_t *); + session_t *ts; + + ts = session_get_from_handle (hc->hc_tc_session_handle); + s = format (s, "[%d:%d][H2] stream_id %u app_wrk %u hc_index %u ts %d:%d", + req->base.c_thread_index, req->base.c_s_index, req->stream_id, + req->base.hr_pa_wrk_index, req->base.hr_hc_index, + ts->thread_index, ts->session_index); + + return s; +} + +static u8 * +http2_format_req (u8 *s, va_list *args) +{ + u32 req_index = va_arg (*args, u32); + clib_thread_index_t thread_index = va_arg (*args, u32); + http_conn_t *hc = va_arg (*args, http_conn_t *); + u32 verbose = va_arg (*args, u32); + http2_req_t *req; + + req = http2_req_get (req_index, thread_index); + + s = format (s, "%-" SESSION_CLI_ID_LEN "U", format_http2_req, req, hc); + if (verbose) + { + s = + format (s, "%-" SESSION_CLI_STATE_LEN "U", format_http_conn_state, hc); + if (verbose > 1) + s = format (s, "\n"); + } + + return s; +} + +static void +http2_app_tx_callback (http_conn_t *hc, u32 req_index, + transport_send_params_t *sp) +{ + http2_req_t *req; + http2_error_t rv; + + HTTP_DBG (1, "hc [%u]%x req_index %u", hc->c_thread_index, hc->hc_hc_index, + req_index); + req = http2_req_get (req_index, hc->c_thread_index); + + if (!http2_req_state_is_tx_valid (req)) + { + if (req->base.state == HTTP_REQ_STATE_TRANSPORT_IO_MORE_DATA && + (hc->flags & HTTP_CONN_F_IS_SERVER)) + { + /* server app might send error earlier */ + http_req_state_change (&req->base, HTTP_REQ_STATE_WAIT_APP_REPLY); + } + else + { + clib_warning ("hc [%u]%x invalid tx state: http req state " + "'%U', session state '%U'", + hc->c_thread_index, hc->hc_hc_index, + format_http_req_state, req->base.state, + format_http_conn_state, hc); + http2_stream_error (hc, req, HTTP2_ERROR_INTERNAL_ERROR, sp); + return; + } + } + + /* peer reset stream, but app might send something meanwhile */ + if (req->stream_state == HTTP2_STREAM_STATE_CLOSED) + { + HTTP_DBG (1, "stream closed, ignoring app data"); + http_io_as_drain_all (&req->base); + return; + } + + HTTP_DBG (1, "run state machine"); + rv = http2_req_run_state_machine (hc, req, sp, 1); + if (rv != HTTP2_ERROR_NO_ERROR) + { + http2_connection_error (hc, rv, sp); + return; + } + + /* reset http connection expiration timer */ + http_conn_timer_update (hc); +} + +static void +http2_app_rx_evt_callback (http_conn_t *hc, u32 req_index, + clib_thread_index_t thread_index) +{ + /* TODO: continue tunnel RX */ +} + +static void +http2_app_close_callback (http_conn_t *hc, u32 req_index, + clib_thread_index_t thread_index) +{ + http2_req_t *req; + + HTTP_DBG (1, "hc [%u]%x req_index %u", hc->c_thread_index, hc->hc_hc_index, + req_index); + req = http2_req_get (req_index, thread_index); + if (!req) + { + HTTP_DBG (1, "req already deleted"); + return; + } + + if (req->stream_state == HTTP2_STREAM_STATE_CLOSED || + hc->state == HTTP_CONN_STATE_CLOSED) + { + HTTP_DBG (1, "nothing more to send, confirm close"); + session_transport_closed_notify (&req->base.connection); + } + else + { + HTTP_DBG (1, "wait for all data to be written to ts"); + req->flags |= HTTP2_REQ_F_APP_CLOSED; + } +} + +static void +http2_app_reset_callback (http_conn_t *hc, u32 req_index, + clib_thread_index_t thread_index) +{ + http2_req_t *req; + + HTTP_DBG (1, "hc [%u]%x req_index %u", hc->c_thread_index, hc->hc_hc_index, + req_index); + req = http2_req_get (req_index, thread_index); + req->flags |= HTTP2_REQ_F_APP_CLOSED; + http2_stream_error (hc, req, HTTP2_ERROR_INTERNAL_ERROR, 0); +} + +static int +http2_transport_connected_callback (http_conn_t *hc) +{ + /* TODO */ + return -1; +} + +static void +http2_transport_rx_callback (http_conn_t *hc) +{ + http2_main_t *h2m = &http2_main; + http2_frame_header_t fh; + u32 to_deq; + u8 *rx_buf; + http2_error_t rv; + http2_conn_ctx_t *h2c; + + HTTP_DBG (1, "hc [%u]%x", hc->c_thread_index, hc->hc_hc_index); + + to_deq = http_io_ts_max_read (hc); + + if (PREDICT_FALSE (to_deq == 0)) + { + HTTP_DBG (1, "no data to deq"); + return; + } + + h2c = http2_conn_ctx_get_w_thread (hc); + if (h2c->flags & HTTP2_CONN_F_EXPECT_PREFACE) + { + if (to_deq < http2_conn_preface.len) + { + HTTP_DBG (1, "to_deq %u is less than conn preface size", to_deq); + http_disconnect_transport (hc); + return; + } + if (http2_expect_preface (hc, h2c)) + { + HTTP_DBG (1, "conn preface verification failed"); + http_disconnect_transport (hc); + return; + } + http2_send_server_preface (hc); + http_io_ts_drain (hc, http2_conn_preface.len); + to_deq -= http2_conn_preface.len; + if (to_deq == 0) + return; + } + + if (PREDICT_FALSE (to_deq < HTTP2_FRAME_HEADER_SIZE)) + { + HTTP_DBG (1, "to_deq %u is less than frame header size", to_deq); + http2_connection_error (hc, HTTP2_ERROR_PROTOCOL_ERROR, 0); + return; + } + + while (to_deq >= HTTP2_FRAME_HEADER_SIZE) + { + rx_buf = http_get_rx_buf (hc); + http_io_ts_read (hc, rx_buf, HTTP2_FRAME_HEADER_SIZE, 1); + to_deq -= HTTP2_FRAME_HEADER_SIZE; + http2_frame_header_read (rx_buf, &fh); + if (fh.length > h2m->settings.max_frame_size) + { + HTTP_DBG (1, "frame length %lu exceeded SETTINGS_MAX_FRAME_SIZE %lu", + fh.length, h2m->settings.max_frame_size); + http2_connection_error (hc, HTTP2_ERROR_FRAME_SIZE_ERROR, 0); + return; + } + if (fh.length > to_deq) + { + HTTP_DBG ( + 1, "frame payload not yet received, to deq %lu, frame length %lu", + to_deq, fh.length); + if (http_io_ts_fifo_size (hc, 1) < + (fh.length + HTTP2_FRAME_HEADER_SIZE)) + { + clib_warning ("ts rx fifo too small to hold frame (%u)", + fh.length + HTTP2_FRAME_HEADER_SIZE); + http2_connection_error (hc, HTTP2_ERROR_PROTOCOL_ERROR, 0); + } + return; + } + http_io_ts_drain (hc, HTTP2_FRAME_HEADER_SIZE); + to_deq -= fh.length; + + HTTP_DBG (1, "frame type 0x%02x", fh.type); + switch (fh.type) + { + case HTTP2_FRAME_TYPE_HEADERS: + rv = http2_handle_headers_frame (hc, &fh); + break; + case HTTP2_FRAME_TYPE_DATA: + rv = http2_handle_data_frame (hc, &fh); + break; + case HTTP2_FRAME_TYPE_WINDOW_UPDATE: + rv = http2_handle_window_update_frame (hc, &fh); + break; + case HTTP2_FRAME_TYPE_SETTINGS: + rv = http2_handle_settings_frame (hc, &fh); + break; + case HTTP2_FRAME_TYPE_RST_STREAM: + rv = http2_handle_rst_stream_frame (hc, &fh); + break; + case HTTP2_FRAME_TYPE_GOAWAY: + rv = http2_handle_goaway_frame (hc, &fh); + break; + case HTTP2_FRAME_TYPE_PING: + rv = http2_handle_ping_frame (hc, &fh); + break; + case HTTP2_FRAME_TYPE_CONTINUATION: + /* TODO */ + rv = HTTP2_ERROR_INTERNAL_ERROR; + break; + case HTTP2_FRAME_TYPE_PUSH_PROMISE: + rv = http2_handle_push_promise (hc, &fh); + break; + case HTTP2_FRAME_TYPE_PRIORITY: /* deprecated */ + default: + /* ignore unknown frame type */ + http_io_ts_drain (hc, fh.length); + rv = HTTP2_ERROR_NO_ERROR; + break; + } + + if (rv != HTTP2_ERROR_NO_ERROR) + { + http2_connection_error (hc, rv, 0); + return; + } + } + + /* reset http connection expiration timer */ + http_conn_timer_update (hc); +} + +static void +http2_transport_close_callback (http_conn_t *hc) +{ + u32 req_index, stream_id, n_open_streams = 0; + http2_req_t *req; + http2_conn_ctx_t *h2c; + + HTTP_DBG (1, "hc [%u]%x", hc->c_thread_index, hc->hc_hc_index); + + if (!(hc->flags & HTTP_CONN_F_HAS_REQUEST)) + { + HTTP_DBG (1, "no request"); + return; + } + + h2c = http2_conn_ctx_get_w_thread (hc); + hash_foreach (stream_id, req_index, h2c->req_by_stream_id, ({ + req = http2_req_get (req_index, hc->c_thread_index); + if (req->stream_state != HTTP2_STREAM_STATE_CLOSED) + { + HTTP_DBG (1, "req_index %u", req_index); + session_transport_closing_notify (&req->base.connection); + n_open_streams++; + } + })); + if (n_open_streams == 0) + { + HTTP_DBG (1, "no open stream disconnecting"); + http_disconnect_transport (hc); + } +} + +static void +http2_transport_reset_callback (http_conn_t *hc) +{ + u32 req_index, stream_id; + http2_req_t *req; + http2_conn_ctx_t *h2c; + + HTTP_DBG (1, "hc [%u]%x", hc->c_thread_index, hc->hc_hc_index); + + if (!(hc->flags & HTTP_CONN_F_HAS_REQUEST)) + return; + + h2c = http2_conn_ctx_get_w_thread (hc); + hash_foreach (stream_id, req_index, h2c->req_by_stream_id, ({ + req = http2_req_get (req_index, hc->c_thread_index); + if (req->stream_state != HTTP2_STREAM_STATE_CLOSED) + { + HTTP_DBG (1, "req_index %u", req_index); + session_transport_reset_notify (&req->base.connection); + } + })); +} + +static void +http2_transport_conn_reschedule_callback (http_conn_t *hc) +{ + /* TODO */ +} + +static void +http2_conn_accept_callback (http_conn_t *hc) +{ + http2_conn_ctx_t *h2c; + + HTTP_DBG (1, "hc [%u]%x", hc->c_thread_index, hc->hc_hc_index); + h2c = http2_conn_ctx_alloc_w_thread (hc); + h2c->flags |= HTTP2_CONN_F_EXPECT_PREFACE; + /* already done in http core */ + if (http_get_transport_proto (hc) == TRANSPORT_PROTO_TCP) + h2c->flags |= HTTP2_CONN_F_PREFACE_VERIFIED; +} + +static void +http2_conn_cleanup_callback (http_conn_t *hc) +{ + u32 req_index, stream_id, *req_index_p, *req_indices = 0; + http2_req_t *req; + http2_conn_ctx_t *h2c; + + HTTP_DBG (1, "hc [%u]%x", hc->c_thread_index, hc->hc_hc_index); + h2c = http2_conn_ctx_get_w_thread (hc); + hash_foreach (stream_id, req_index, h2c->req_by_stream_id, + ({ vec_add1 (req_indices, req_index); })); + + vec_foreach (req_index_p, req_indices) + { + req = http2_req_get (*req_index_p, hc->c_thread_index); + if (req->stream_state != HTTP2_STREAM_STATE_CLOSED) + session_transport_delete_notify (&req->base.connection); + http2_conn_free_req (h2c, req, hc->c_thread_index); + } + + vec_free (req_indices); + http2_conn_ctx_free (hc); +} + +static void +http2_enable_callback (void) +{ + http2_main_t *h2m = &http2_main; + vlib_thread_main_t *vtm = vlib_get_thread_main (); + u32 num_threads; + + num_threads = 1 /* main thread */ + vtm->n_threads; + + vec_validate (h2m->conn_pool, num_threads - 1); + vec_validate (h2m->req_pool, num_threads - 1); +} + +static int +http2_update_settings (http_settings_t type, u32 value) +{ + http2_main_t *h2m = &http2_main; + + switch (type) + { +#define _(v, label, member, min, max, default_value, err_code) \ + case HTTP2_SETTINGS_##label: \ + if (!(value >= min && value <= max)) \ + return -1; \ + h2m->settings.member = value; \ + return 0; + foreach_http2_settings +#undef _ + default : return -1; + } +} + +static uword +http2_unformat_config_callback (unformat_input_t *input) +{ + u32 value; + + if (!input) + return 0; + + unformat_skip_white_space (input); + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "initial-window-size %u", &value)) + { + if (http2_update_settings (HTTP2_SETTINGS_INITIAL_WINDOW_SIZE, + value)) + return 0; + } + else if (unformat (input, "max-frame-size %u", &value)) + { + if (http2_update_settings (HTTP2_SETTINGS_MAX_FRAME_SIZE, value)) + return 0; + } + else if (unformat (input, "max-header-list-size %u", &value)) + { + if (http2_update_settings (HTTP2_SETTINGS_MAX_HEADER_LIST_SIZE, + value)) + return 0; + } + else if (unformat (input, "header-table-size %u", &value)) + { + if (http2_update_settings (HTTP2_SETTINGS_HEADER_TABLE_SIZE, value)) + return 0; + } + else + return 0; + } + return 1; +} + +const static http_engine_vft_t http2_engine = { + .name = "http2", + .hc_index_get_by_req_index = http2_hc_index_get_by_req_index, + .req_get_connection = http2_req_get_connection, + .format_req = http2_format_req, + .app_tx_callback = http2_app_tx_callback, + .app_rx_evt_callback = http2_app_rx_evt_callback, + .app_close_callback = http2_app_close_callback, + .app_reset_callback = http2_app_reset_callback, + .transport_connected_callback = http2_transport_connected_callback, + .transport_rx_callback = http2_transport_rx_callback, + .transport_close_callback = http2_transport_close_callback, + .transport_reset_callback = http2_transport_reset_callback, + .transport_conn_reschedule_callback = + http2_transport_conn_reschedule_callback, + .conn_accept_callback = http2_conn_accept_callback, + .conn_cleanup_callback = http2_conn_cleanup_callback, + .enable_callback = http2_enable_callback, + .unformat_cfg_callback = http2_unformat_config_callback, +}; + +clib_error_t * +http2_init (vlib_main_t *vm) +{ + http2_main_t *h2m = &http2_main; + + clib_warning ("http/2 enabled"); + h2m->settings = http2_default_conn_settings; + h2m->settings.max_concurrent_streams = 100; /* by default unlimited */ + http_register_engine (&http2_engine, HTTP_VERSION_2); + + return 0; +} + +#if HTTP_2_ENABLE > 0 +VLIB_INIT_FUNCTION (http2_init) = { + .runs_after = VLIB_INITS ("http_transport_init"), +}; +#endif diff --git a/src/plugins/http/http2/http2.h b/src/plugins/http/http2/http2.h new file mode 100644 index 00000000000..9fc95344771 --- /dev/null +++ b/src/plugins/http/http2/http2.h @@ -0,0 +1,97 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2025 Cisco Systems, Inc. + */ + +#ifndef SRC_PLUGINS_HTTP_HTTP2_H_ +#define SRC_PLUGINS_HTTP_HTTP2_H_ + +#include <vppinfra/format.h> +#include <vppinfra/types.h> + +/* RFC9113 section 7 */ +#define foreach_http2_error \ + _ (NO_ERROR, "NO_ERROR") \ + _ (PROTOCOL_ERROR, "PROTOCOL_ERROR") \ + _ (INTERNAL_ERROR, "INTERNAL_ERROR") \ + _ (FLOW_CONTROL_ERROR, "FLOW_CONTROL_ERROR") \ + _ (SETTINGS_TIMEOUT, "SETTINGS_TIMEOUT") \ + _ (STREAM_CLOSED, "STREAM_CLOSED") \ + _ (FRAME_SIZE_ERROR, "FRAME_SIZE_ERROR") \ + _ (REFUSED_STREAM, "REFUSED_STREAM") \ + _ (CANCEL, "CANCEL") \ + _ (COMPRESSION_ERROR, "COMPRESSION_ERROR") \ + _ (CONNECT_ERROR, "CONNECT_ERROR") \ + _ (ENHANCE_YOUR_CALM, "ENHANCE_YOUR_CALM") \ + _ (INADEQUATE_SECURITY, "INADEQUATE_SECURITY") \ + _ (HTTP_1_1_REQUIRED, "HTTP_1_1_REQUIRED") + +typedef enum http2_error_ +{ +#define _(s, str) HTTP2_ERROR_##s, + foreach_http2_error +#undef _ +} http2_error_t; + +static inline u8 * +format_http2_error (u8 *s, va_list *va) +{ + http2_error_t e = va_arg (*va, http2_error_t); + u8 *t = 0; + + switch (e) + { +#define _(s, str) \ + case HTTP2_ERROR_##s: \ + t = (u8 *) str; \ + break; + foreach_http2_error +#undef _ + default : return format (s, "BUG: unknown"); + } + return format (s, "%s", t); +} + +#define foreach_http2_pseudo_header \ + _ (0, METHOD, "method") \ + _ (1, SCHEME, "scheme") \ + _ (2, AUTHORITY, "authority") \ + _ (3, PATH, "path") \ + _ (4, STATUS, "status") + +/* value, label, member, min, max, default_value, err_code */ +#define foreach_http2_settings \ + _ (1, HEADER_TABLE_SIZE, header_table_size, 0, CLIB_U32_MAX, 4096, \ + HTTP2_ERROR_NO_ERROR) \ + _ (2, ENABLE_PUSH, enable_push, 0, 1, 1, HTTP2_ERROR_PROTOCOL_ERROR) \ + _ (3, MAX_CONCURRENT_STREAMS, max_concurrent_streams, 0, CLIB_U32_MAX, \ + CLIB_U32_MAX, HTTP2_ERROR_NO_ERROR) \ + _ (4, INITIAL_WINDOW_SIZE, initial_window_size, 0, 0x7FFFFFFF, 65535, \ + HTTP2_ERROR_FLOW_CONTROL_ERROR) \ + _ (5, MAX_FRAME_SIZE, max_frame_size, 16384, 16777215, 16384, \ + HTTP2_ERROR_PROTOCOL_ERROR) \ + _ (6, MAX_HEADER_LIST_SIZE, max_header_list_size, 0, CLIB_U32_MAX, \ + CLIB_U32_MAX, HTTP2_ERROR_NO_ERROR) + +typedef enum +{ +#define _(value, label, member, min, max, default_value, err_code) \ + HTTP2_SETTINGS_##label = value, + foreach_http2_settings +#undef _ +} http_settings_t; + +typedef struct +{ +#define _(value, label, member, min, max, default_value, err_code) u32 member; + foreach_http2_settings +#undef _ +} http2_conn_settings_t; + +static const http2_conn_settings_t http2_default_conn_settings = { +#define _(value, label, member, min, max, default_value, err_code) \ + default_value, + foreach_http2_settings +#undef _ +}; + +#endif /* SRC_PLUGINS_HTTP_HTTP2_H_ */ diff --git a/src/plugins/http/http2/huffman_table.h b/src/plugins/http/http2/huffman_table.h new file mode 100644 index 00000000000..66afffbc54a --- /dev/null +++ b/src/plugins/http/http2/huffman_table.h @@ -0,0 +1,319 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2025 Cisco Systems, Inc. + */ + +/* generated by mk_huffman_table.py */ + +#ifndef SRC_PLUGINS_HTTP_HUFFMAN_TABLE_H_ +#define SRC_PLUGINS_HTTP_HUFFMAN_TABLE_H_ + +#include <vppinfra/types.h> + +typedef struct +{ + u8 code_len; + u32 code; +} hpack_huffman_symbol_t; + +static hpack_huffman_symbol_t huff_sym_table[] = { + { 13, 0x1ff8 }, { 23, 0x7fffd8 }, { 28, 0xfffffe2 }, { 28, 0xfffffe3 }, + { 28, 0xfffffe4 }, { 28, 0xfffffe5 }, { 28, 0xfffffe6 }, { 28, 0xfffffe7 }, + { 28, 0xfffffe8 }, { 24, 0xffffea }, { 30, 0x3ffffffc }, { 28, 0xfffffe9 }, + { 28, 0xfffffea }, { 30, 0x3ffffffd }, { 28, 0xfffffeb }, { 28, 0xfffffec }, + { 28, 0xfffffed }, { 28, 0xfffffee }, { 28, 0xfffffef }, { 28, 0xffffff0 }, + { 28, 0xffffff1 }, { 28, 0xffffff2 }, { 30, 0x3ffffffe }, { 28, 0xffffff3 }, + { 28, 0xffffff4 }, { 28, 0xffffff5 }, { 28, 0xffffff6 }, { 28, 0xffffff7 }, + { 28, 0xffffff8 }, { 28, 0xffffff9 }, { 28, 0xffffffa }, { 28, 0xffffffb }, + { 6, 0x14 }, { 10, 0x3f8 }, { 10, 0x3f9 }, { 12, 0xffa }, + { 13, 0x1ff9 }, { 6, 0x15 }, { 8, 0xf8 }, { 11, 0x7fa }, + { 10, 0x3fa }, { 10, 0x3fb }, { 8, 0xf9 }, { 11, 0x7fb }, + { 8, 0xfa }, { 6, 0x16 }, { 6, 0x17 }, { 6, 0x18 }, + { 5, 0x0 }, { 5, 0x1 }, { 5, 0x2 }, { 6, 0x19 }, + { 6, 0x1a }, { 6, 0x1b }, { 6, 0x1c }, { 6, 0x1d }, + { 6, 0x1e }, { 6, 0x1f }, { 7, 0x5c }, { 8, 0xfb }, + { 15, 0x7ffc }, { 6, 0x20 }, { 12, 0xffb }, { 10, 0x3fc }, + { 13, 0x1ffa }, { 6, 0x21 }, { 7, 0x5d }, { 7, 0x5e }, + { 7, 0x5f }, { 7, 0x60 }, { 7, 0x61 }, { 7, 0x62 }, + { 7, 0x63 }, { 7, 0x64 }, { 7, 0x65 }, { 7, 0x66 }, + { 7, 0x67 }, { 7, 0x68 }, { 7, 0x69 }, { 7, 0x6a }, + { 7, 0x6b }, { 7, 0x6c }, { 7, 0x6d }, { 7, 0x6e }, + { 7, 0x6f }, { 7, 0x70 }, { 7, 0x71 }, { 7, 0x72 }, + { 8, 0xfc }, { 7, 0x73 }, { 8, 0xfd }, { 13, 0x1ffb }, + { 19, 0x7fff0 }, { 13, 0x1ffc }, { 14, 0x3ffc }, { 6, 0x22 }, + { 15, 0x7ffd }, { 5, 0x3 }, { 6, 0x23 }, { 5, 0x4 }, + { 6, 0x24 }, { 5, 0x5 }, { 6, 0x25 }, { 6, 0x26 }, + { 6, 0x27 }, { 5, 0x6 }, { 7, 0x74 }, { 7, 0x75 }, + { 6, 0x28 }, { 6, 0x29 }, { 6, 0x2a }, { 5, 0x7 }, + { 6, 0x2b }, { 7, 0x76 }, { 6, 0x2c }, { 5, 0x8 }, + { 5, 0x9 }, { 6, 0x2d }, { 7, 0x77 }, { 7, 0x78 }, + { 7, 0x79 }, { 7, 0x7a }, { 7, 0x7b }, { 15, 0x7ffe }, + { 11, 0x7fc }, { 14, 0x3ffd }, { 13, 0x1ffd }, { 28, 0xffffffc }, + { 20, 0xfffe6 }, { 22, 0x3fffd2 }, { 20, 0xfffe7 }, { 20, 0xfffe8 }, + { 22, 0x3fffd3 }, { 22, 0x3fffd4 }, { 22, 0x3fffd5 }, { 23, 0x7fffd9 }, + { 22, 0x3fffd6 }, { 23, 0x7fffda }, { 23, 0x7fffdb }, { 23, 0x7fffdc }, + { 23, 0x7fffdd }, { 23, 0x7fffde }, { 24, 0xffffeb }, { 23, 0x7fffdf }, + { 24, 0xffffec }, { 24, 0xffffed }, { 22, 0x3fffd7 }, { 23, 0x7fffe0 }, + { 24, 0xffffee }, { 23, 0x7fffe1 }, { 23, 0x7fffe2 }, { 23, 0x7fffe3 }, + { 23, 0x7fffe4 }, { 21, 0x1fffdc }, { 22, 0x3fffd8 }, { 23, 0x7fffe5 }, + { 22, 0x3fffd9 }, { 23, 0x7fffe6 }, { 23, 0x7fffe7 }, { 24, 0xffffef }, + { 22, 0x3fffda }, { 21, 0x1fffdd }, { 20, 0xfffe9 }, { 22, 0x3fffdb }, + { 22, 0x3fffdc }, { 23, 0x7fffe8 }, { 23, 0x7fffe9 }, { 21, 0x1fffde }, + { 23, 0x7fffea }, { 22, 0x3fffdd }, { 22, 0x3fffde }, { 24, 0xfffff0 }, + { 21, 0x1fffdf }, { 22, 0x3fffdf }, { 23, 0x7fffeb }, { 23, 0x7fffec }, + { 21, 0x1fffe0 }, { 21, 0x1fffe1 }, { 22, 0x3fffe0 }, { 21, 0x1fffe2 }, + { 23, 0x7fffed }, { 22, 0x3fffe1 }, { 23, 0x7fffee }, { 23, 0x7fffef }, + { 20, 0xfffea }, { 22, 0x3fffe2 }, { 22, 0x3fffe3 }, { 22, 0x3fffe4 }, + { 23, 0x7ffff0 }, { 22, 0x3fffe5 }, { 22, 0x3fffe6 }, { 23, 0x7ffff1 }, + { 26, 0x3ffffe0 }, { 26, 0x3ffffe1 }, { 20, 0xfffeb }, { 19, 0x7fff1 }, + { 22, 0x3fffe7 }, { 23, 0x7ffff2 }, { 22, 0x3fffe8 }, { 25, 0x1ffffec }, + { 26, 0x3ffffe2 }, { 26, 0x3ffffe3 }, { 26, 0x3ffffe4 }, { 27, 0x7ffffde }, + { 27, 0x7ffffdf }, { 26, 0x3ffffe5 }, { 24, 0xfffff1 }, { 25, 0x1ffffed }, + { 19, 0x7fff2 }, { 21, 0x1fffe3 }, { 26, 0x3ffffe6 }, { 27, 0x7ffffe0 }, + { 27, 0x7ffffe1 }, { 26, 0x3ffffe7 }, { 27, 0x7ffffe2 }, { 24, 0xfffff2 }, + { 21, 0x1fffe4 }, { 21, 0x1fffe5 }, { 26, 0x3ffffe8 }, { 26, 0x3ffffe9 }, + { 28, 0xffffffd }, { 27, 0x7ffffe3 }, { 27, 0x7ffffe4 }, { 27, 0x7ffffe5 }, + { 20, 0xfffec }, { 24, 0xfffff3 }, { 20, 0xfffed }, { 21, 0x1fffe6 }, + { 22, 0x3fffe9 }, { 21, 0x1fffe7 }, { 21, 0x1fffe8 }, { 23, 0x7ffff3 }, + { 22, 0x3fffea }, { 22, 0x3fffeb }, { 25, 0x1ffffee }, { 25, 0x1ffffef }, + { 24, 0xfffff4 }, { 24, 0xfffff5 }, { 26, 0x3ffffea }, { 23, 0x7ffff4 }, + { 26, 0x3ffffeb }, { 27, 0x7ffffe6 }, { 26, 0x3ffffec }, { 26, 0x3ffffed }, + { 27, 0x7ffffe7 }, { 27, 0x7ffffe8 }, { 27, 0x7ffffe9 }, { 27, 0x7ffffea }, + { 27, 0x7ffffeb }, { 28, 0xffffffe }, { 27, 0x7ffffec }, { 27, 0x7ffffed }, + { 27, 0x7ffffee }, { 27, 0x7ffffef }, { 27, 0x7fffff0 }, { 26, 0x3ffffee }, +}; + +typedef struct +{ + u8 symbol; + u8 code_len; +} hpack_huffman_code_t; + +static hpack_huffman_code_t huff_code_table_fast[] = { + { 0x30, 5 }, { 0x30, 5 }, { 0x30, 5 }, { 0x30, 5 }, { 0x30, 5 }, { 0x30, 5 }, + { 0x30, 5 }, { 0x30, 5 }, { 0x31, 5 }, { 0x31, 5 }, { 0x31, 5 }, { 0x31, 5 }, + { 0x31, 5 }, { 0x31, 5 }, { 0x31, 5 }, { 0x31, 5 }, { 0x32, 5 }, { 0x32, 5 }, + { 0x32, 5 }, { 0x32, 5 }, { 0x32, 5 }, { 0x32, 5 }, { 0x32, 5 }, { 0x32, 5 }, + { 0x61, 5 }, { 0x61, 5 }, { 0x61, 5 }, { 0x61, 5 }, { 0x61, 5 }, { 0x61, 5 }, + { 0x61, 5 }, { 0x61, 5 }, { 0x63, 5 }, { 0x63, 5 }, { 0x63, 5 }, { 0x63, 5 }, + { 0x63, 5 }, { 0x63, 5 }, { 0x63, 5 }, { 0x63, 5 }, { 0x65, 5 }, { 0x65, 5 }, + { 0x65, 5 }, { 0x65, 5 }, { 0x65, 5 }, { 0x65, 5 }, { 0x65, 5 }, { 0x65, 5 }, + { 0x69, 5 }, { 0x69, 5 }, { 0x69, 5 }, { 0x69, 5 }, { 0x69, 5 }, { 0x69, 5 }, + { 0x69, 5 }, { 0x69, 5 }, { 0x6F, 5 }, { 0x6F, 5 }, { 0x6F, 5 }, { 0x6F, 5 }, + { 0x6F, 5 }, { 0x6F, 5 }, { 0x6F, 5 }, { 0x6F, 5 }, { 0x73, 5 }, { 0x73, 5 }, + { 0x73, 5 }, { 0x73, 5 }, { 0x73, 5 }, { 0x73, 5 }, { 0x73, 5 }, { 0x73, 5 }, + { 0x74, 5 }, { 0x74, 5 }, { 0x74, 5 }, { 0x74, 5 }, { 0x74, 5 }, { 0x74, 5 }, + { 0x74, 5 }, { 0x74, 5 }, { 0x20, 6 }, { 0x20, 6 }, { 0x20, 6 }, { 0x20, 6 }, + { 0x25, 6 }, { 0x25, 6 }, { 0x25, 6 }, { 0x25, 6 }, { 0x2D, 6 }, { 0x2D, 6 }, + { 0x2D, 6 }, { 0x2D, 6 }, { 0x2E, 6 }, { 0x2E, 6 }, { 0x2E, 6 }, { 0x2E, 6 }, + { 0x2F, 6 }, { 0x2F, 6 }, { 0x2F, 6 }, { 0x2F, 6 }, { 0x33, 6 }, { 0x33, 6 }, + { 0x33, 6 }, { 0x33, 6 }, { 0x34, 6 }, { 0x34, 6 }, { 0x34, 6 }, { 0x34, 6 }, + { 0x35, 6 }, { 0x35, 6 }, { 0x35, 6 }, { 0x35, 6 }, { 0x36, 6 }, { 0x36, 6 }, + { 0x36, 6 }, { 0x36, 6 }, { 0x37, 6 }, { 0x37, 6 }, { 0x37, 6 }, { 0x37, 6 }, + { 0x38, 6 }, { 0x38, 6 }, { 0x38, 6 }, { 0x38, 6 }, { 0x39, 6 }, { 0x39, 6 }, + { 0x39, 6 }, { 0x39, 6 }, { 0x3D, 6 }, { 0x3D, 6 }, { 0x3D, 6 }, { 0x3D, 6 }, + { 0x41, 6 }, { 0x41, 6 }, { 0x41, 6 }, { 0x41, 6 }, { 0x5F, 6 }, { 0x5F, 6 }, + { 0x5F, 6 }, { 0x5F, 6 }, { 0x62, 6 }, { 0x62, 6 }, { 0x62, 6 }, { 0x62, 6 }, + { 0x64, 6 }, { 0x64, 6 }, { 0x64, 6 }, { 0x64, 6 }, { 0x66, 6 }, { 0x66, 6 }, + { 0x66, 6 }, { 0x66, 6 }, { 0x67, 6 }, { 0x67, 6 }, { 0x67, 6 }, { 0x67, 6 }, + { 0x68, 6 }, { 0x68, 6 }, { 0x68, 6 }, { 0x68, 6 }, { 0x6C, 6 }, { 0x6C, 6 }, + { 0x6C, 6 }, { 0x6C, 6 }, { 0x6D, 6 }, { 0x6D, 6 }, { 0x6D, 6 }, { 0x6D, 6 }, + { 0x6E, 6 }, { 0x6E, 6 }, { 0x6E, 6 }, { 0x6E, 6 }, { 0x70, 6 }, { 0x70, 6 }, + { 0x70, 6 }, { 0x70, 6 }, { 0x72, 6 }, { 0x72, 6 }, { 0x72, 6 }, { 0x72, 6 }, + { 0x75, 6 }, { 0x75, 6 }, { 0x75, 6 }, { 0x75, 6 }, { 0x3A, 7 }, { 0x3A, 7 }, + { 0x42, 7 }, { 0x42, 7 }, { 0x43, 7 }, { 0x43, 7 }, { 0x44, 7 }, { 0x44, 7 }, + { 0x45, 7 }, { 0x45, 7 }, { 0x46, 7 }, { 0x46, 7 }, { 0x47, 7 }, { 0x47, 7 }, + { 0x48, 7 }, { 0x48, 7 }, { 0x49, 7 }, { 0x49, 7 }, { 0x4A, 7 }, { 0x4A, 7 }, + { 0x4B, 7 }, { 0x4B, 7 }, { 0x4C, 7 }, { 0x4C, 7 }, { 0x4D, 7 }, { 0x4D, 7 }, + { 0x4E, 7 }, { 0x4E, 7 }, { 0x4F, 7 }, { 0x4F, 7 }, { 0x50, 7 }, { 0x50, 7 }, + { 0x51, 7 }, { 0x51, 7 }, { 0x52, 7 }, { 0x52, 7 }, { 0x53, 7 }, { 0x53, 7 }, + { 0x54, 7 }, { 0x54, 7 }, { 0x55, 7 }, { 0x55, 7 }, { 0x56, 7 }, { 0x56, 7 }, + { 0x57, 7 }, { 0x57, 7 }, { 0x59, 7 }, { 0x59, 7 }, { 0x6A, 7 }, { 0x6A, 7 }, + { 0x6B, 7 }, { 0x6B, 7 }, { 0x71, 7 }, { 0x71, 7 }, { 0x76, 7 }, { 0x76, 7 }, + { 0x77, 7 }, { 0x77, 7 }, { 0x78, 7 }, { 0x78, 7 }, { 0x79, 7 }, { 0x79, 7 }, + { 0x7A, 7 }, { 0x7A, 7 }, { 0x26, 8 }, { 0x2A, 8 }, { 0x2C, 8 }, { 0x3B, 8 }, + { 0x58, 8 }, { 0x5A, 8 }, { 0x00, 0 }, { 0x00, 0 }, +}; + +typedef struct +{ + u32 first_code; + u8 code_len; + u8 symbols[29]; +} hpack_huffman_group_t; + +/* clang-format off */ + +static hpack_huffman_group_t huff_code_table_slow[] = { + { + 0x3f8, /* first_code */ + 10, /* code_len */ + { + 0x21, 0x22, 0x28, 0x29, 0x3F, + } /* symbols */ + }, + { + 0x7fa, /* first_code */ + 11, /* code_len */ + { + 0x27, 0x2B, 0x7C, + } /* symbols */ + }, + { + 0xffa, /* first_code */ + 12, /* code_len */ + { + 0x23, 0x3E, + } /* symbols */ + }, + { + 0x1ff8, /* first_code */ + 13, /* code_len */ + { + 0x00, 0x24, 0x40, 0x5B, 0x5D, 0x7E, + } /* symbols */ + }, + { + 0x3ffc, /* first_code */ + 14, /* code_len */ + { + 0x5E, 0x7D, + } /* symbols */ + }, + { + 0x7ffc, /* first_code */ + 15, /* code_len */ + { + 0x3C, 0x60, 0x7B, + } /* symbols */ + }, + { + 0x7fff0, /* first_code */ + 19, /* code_len */ + { + 0x5C, 0xC3, 0xD0, + } /* symbols */ + }, + { + 0xfffe6, /* first_code */ + 20, /* code_len */ + { + 0x80, 0x82, 0x83, 0xA2, 0xB8, 0xC2, 0xE0, 0xE2, + } /* symbols */ + }, + { + 0x1fffdc, /* first_code */ + 21, /* code_len */ + { + 0x99, 0xA1, 0xA7, 0xAC, 0xB0, 0xB1, 0xB3, 0xD1, 0xD8, 0xD9, + 0xE3, 0xE5, 0xE6, + } /* symbols */ + }, + { + 0x3fffd2, /* first_code */ + 22, /* code_len */ + { + 0x81, 0x84, 0x85, 0x86, 0x88, 0x92, 0x9A, 0x9C, 0xA0, 0xA3, + 0xA4, 0xA9, 0xAA, 0xAD, 0xB2, 0xB5, 0xB9, 0xBA, 0xBB, 0xBD, + 0xBE, 0xC4, 0xC6, 0xE4, 0xE8, 0xE9, + } /* symbols */ + }, + { + 0x7fffd8, /* first_code */ + 23, /* code_len */ + { + 0x01, 0x87, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8F, 0x93, 0x95, + 0x96, 0x97, 0x98, 0x9B, 0x9D, 0x9E, 0xA5, 0xA6, 0xA8, 0xAE, + 0xAF, 0xB4, 0xB6, 0xB7, 0xBC, 0xBF, 0xC5, 0xE7, 0xEF, + } /* symbols */ + }, + { + 0xffffea, /* first_code */ + 24, /* code_len */ + { + 0x09, 0x8E, 0x90, 0x91, 0x94, 0x9F, 0xAB, 0xCE, 0xD7, 0xE1, + 0xEC, 0xED, + } /* symbols */ + }, + { + 0x1ffffec, /* first_code */ + 25, /* code_len */ + { + 0xC7, 0xCF, 0xEA, 0xEB, + } /* symbols */ + }, + { + 0x3ffffe0, /* first_code */ + 26, /* code_len */ + { + 0xC0, 0xC1, 0xC8, 0xC9, 0xCA, 0xCD, 0xD2, 0xD5, 0xDA, 0xDB, + 0xEE, 0xF0, 0xF2, 0xF3, 0xFF, + } /* symbols */ + }, + { + 0x7ffffde, /* first_code */ + 27, /* code_len */ + { + 0xCB, 0xCC, 0xD3, 0xD4, 0xD6, 0xDD, 0xDE, 0xDF, 0xF1, 0xF4, + 0xF5, 0xF6, 0xF7, 0xF8, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, + } /* symbols */ + }, + { + 0xfffffe2, /* first_code */ + 28, /* code_len */ + { + 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x0B, 0x0C, 0x0E, + 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x17, 0x18, 0x19, + 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x7F, 0xDC, 0xF9, + } /* symbols */ + }, + { + 0x3ffffffc, /* first_code */ + 30, /* code_len */ + { + 0x0A, 0x0D, 0x16, + } /* symbols */ + }, +}; + +/* clang format-on */ + +always_inline hpack_huffman_group_t * +hpack_huffman_get_group (u32 value) +{ + if (value < 0xFF400000) + return &huff_code_table_slow[0]; + else if (value < 0xFFA00000) + return &huff_code_table_slow[1]; + else if (value < 0xFFC00000) + return &huff_code_table_slow[2]; + else if (value < 0xFFF00000) + return &huff_code_table_slow[3]; + else if (value < 0xFFF80000) + return &huff_code_table_slow[4]; + else if (value < 0xFFFE0000) + return &huff_code_table_slow[5]; + else if (value < 0xFFFE6000) + return &huff_code_table_slow[6]; + else if (value < 0xFFFEE000) + return &huff_code_table_slow[7]; + else if (value < 0xFFFF4800) + return &huff_code_table_slow[8]; + else if (value < 0xFFFFB000) + return &huff_code_table_slow[9]; + else if (value < 0xFFFFEA00) + return &huff_code_table_slow[10]; + else if (value < 0xFFFFF600) + return &huff_code_table_slow[11]; + else if (value < 0xFFFFF800) + return &huff_code_table_slow[12]; + else if (value < 0xFFFFFBC0) + return &huff_code_table_slow[13]; + else if (value < 0xFFFFFE20) + return &huff_code_table_slow[14]; + else if (value < 0xFFFFFFF0) + return &huff_code_table_slow[15]; + else + return &huff_code_table_slow[16]; +} + +#endif /* SRC_PLUGINS_HTTP_HUFFMAN_TABLE_H_ */ diff --git a/src/plugins/http/http_buffer.c b/src/plugins/http/http_buffer.c index bc1b8c08630..fd90fbfed8c 100644 --- a/src/plugins/http/http_buffer.c +++ b/src/plugins/http/http_buffer.c @@ -57,8 +57,9 @@ buf_fifo_free (http_buffer_t *hb) vec_free (bf->segs); } -static svm_fifo_seg_t * -buf_fifo_get_segs (http_buffer_t *hb, u32 max_len, u32 *n_segs) +static u32 +buf_fifo_get_segs (http_buffer_t *hb, u32 max_len, svm_fifo_seg_t **fs, + u32 *n_segs) { http_buffer_fifo_t *bf = (http_buffer_fifo_t *) &hb->data; @@ -67,7 +68,7 @@ buf_fifo_get_segs (http_buffer_t *hb, u32 max_len, u32 *n_segs) max_len = clib_min (bf->len - bf->offset, (u64) max_len); - vec_validate (bf->segs, _n_segs); + vec_validate (bf->segs, _n_segs - 1); len = svm_fifo_segments (bf->src, 0, bf->segs, &_n_segs, max_len); if (len < 0) @@ -77,7 +78,8 @@ buf_fifo_get_segs (http_buffer_t *hb, u32 max_len, u32 *n_segs) HTTP_DBG (1, "available to send %u n_segs %u", len, *n_segs); - return bf->segs; + *fs = bf->segs; + return len; } static u32 @@ -92,13 +94,13 @@ buf_fifo_drain (http_buffer_t *hb, u32 len) return len; } -static u8 -buf_fifo_is_drained (http_buffer_t *hb) +static u64 +buf_fifo_bytes_left (http_buffer_t *hb) { http_buffer_fifo_t *bf = (http_buffer_fifo_t *) &hb->data; ASSERT (bf->offset <= bf->len); - return (bf->offset == bf->len); + return (bf->len - bf->offset); } const static http_buffer_vft_t buf_fifo_vft = { @@ -106,7 +108,7 @@ const static http_buffer_vft_t buf_fifo_vft = { .free = buf_fifo_free, .get_segs = buf_fifo_get_segs, .drain = buf_fifo_drain, - .is_drained = buf_fifo_is_drained, + .bytes_left = buf_fifo_bytes_left, }; HTTP_BUFFER_REGISTER_VFT (HTTP_BUFFER_FIFO, buf_fifo_vft); @@ -115,6 +117,7 @@ typedef struct http_buffer_ptr_ { svm_fifo_seg_t *segs; svm_fifo_t *f; + u64 len; } http_buffer_ptr_t; STATIC_ASSERT (sizeof (http_buffer_ptr_t) <= HTTP_BUFFER_DATA_SZ, "buf data"); @@ -135,12 +138,11 @@ buf_ptr_init (http_buffer_t *hb, void *data, u64 len) bf->f = f; bf->segs = 0; - vec_validate (bf->segs, 1); + vec_validate (bf->segs, 0); bf->segs[0].data = uword_to_pointer (ptr, u8 *); - bf->segs[0].len = len; - bf->segs[1] = bf->segs[0]; + bf->len = len; } static void @@ -152,15 +154,17 @@ buf_ptr_free (http_buffer_t *hb) vec_free (bf->segs); } -static svm_fifo_seg_t * -buf_ptr_get_segs (http_buffer_t *hb, u32 max_len, u32 *n_segs) +static u32 +buf_ptr_get_segs (http_buffer_t *hb, u32 max_len, svm_fifo_seg_t **fs, + u32 *n_segs) { http_buffer_ptr_t *bf = (http_buffer_ptr_t *) &hb->data; *n_segs = 1; - bf->segs[1].len = clib_min (bf->segs[0].len, max_len); + bf->segs[0].len = clib_min (bf->len, (u64) max_len); - return &bf->segs[1]; + *fs = bf->segs; + return bf->segs[0].len; } static u32 @@ -168,14 +172,14 @@ buf_ptr_drain (http_buffer_t *hb, u32 len) { http_buffer_ptr_t *bf = (http_buffer_ptr_t *) &hb->data; - ASSERT (bf->segs[0].len >= len); + ASSERT (bf->len >= len); - bf->segs[1].data += len; - bf->segs[0].len -= len; + bf->segs[0].data += len; + bf->len -= len; - HTTP_DBG (1, "drained %u left %u", len, bf->segs[0].len); + HTTP_DBG (1, "drained %u left %u", len, bf->len); - if (!bf->segs[0].len) + if (!bf->len) { svm_fifo_dequeue_drop (bf->f, sizeof (uword)); return sizeof (uword); @@ -184,12 +188,12 @@ buf_ptr_drain (http_buffer_t *hb, u32 len) return 0; } -static u8 -buf_ptr_is_drained (http_buffer_t *hb) +static u64 +buf_ptr_bytes_left (http_buffer_t *hb) { http_buffer_ptr_t *bf = (http_buffer_ptr_t *) &hb->data; - return (bf->segs[0].len == 0); + return bf->len; } const static http_buffer_vft_t buf_ptr_vft = { @@ -197,7 +201,7 @@ const static http_buffer_vft_t buf_ptr_vft = { .free = buf_ptr_free, .get_segs = buf_ptr_get_segs, .drain = buf_ptr_drain, - .is_drained = buf_ptr_is_drained, + .bytes_left = buf_ptr_bytes_left, }; HTTP_BUFFER_REGISTER_VFT (HTTP_BUFFER_PTR, buf_ptr_vft); diff --git a/src/plugins/http/http_buffer.h b/src/plugins/http/http_buffer.h index 1140be42d6e..01b37d4173b 100644 --- a/src/plugins/http/http_buffer.h +++ b/src/plugins/http/http_buffer.h @@ -38,9 +38,10 @@ struct http_buffer_vft_ { void (*init) (http_buffer_t *, void *data, u64 len); void (*free) (http_buffer_t *); - svm_fifo_seg_t *(*get_segs) (http_buffer_t *, u32 max_len, u32 *n_segs); + u32 (*get_segs) (http_buffer_t *, u32 max_len, svm_fifo_seg_t **fs, + u32 *n_segs); u32 (*drain) (http_buffer_t *, u32 len); - u8 (*is_drained) (http_buffer_t *); + u64 (*bytes_left) (http_buffer_t *); }; void http_buffer_init (http_buffer_t *hb, http_buffer_type_t type, @@ -53,10 +54,11 @@ http_buffer_free (http_buffer_t *hb) hb->vft->free (hb); } -static inline svm_fifo_seg_t * -http_buffer_get_segs (http_buffer_t *hb, u32 max_len, u32 *n_segs) +static inline u32 +http_buffer_get_segs (http_buffer_t *hb, u32 max_len, svm_fifo_seg_t **fs, + u32 *n_segs) { - return hb->vft->get_segs (hb, max_len, n_segs); + return hb->vft->get_segs (hb, max_len, fs, n_segs); } static inline u32 @@ -65,10 +67,10 @@ http_buffer_drain (http_buffer_t *hb, u32 len) return hb->vft->drain (hb, len); } -static inline u8 -http_buffer_is_drained (http_buffer_t *hb) +static inline u64 +http_buffer_bytes_left (http_buffer_t *hb) { - return hb->vft->is_drained (hb); + return hb->vft->bytes_left (hb); } #endif /* SRC_PLUGINS_HTTP_HTTP_BUFFER_H_ */ diff --git a/src/plugins/http/http_header_names.h b/src/plugins/http/http_header_names.h index 99acac786db..1778daf10d9 100644 --- a/src/plugins/http/http_header_names.h +++ b/src/plugins/http/http_header_names.h @@ -8,7 +8,8 @@ #include <http/http.h> static http_token_t http_header_names[] = { -#define _(sym, str) { http_token_lit (str) }, +#define _(sym, str_canonical, str_lower, hpack_index) \ + { http_token_lit (str_canonical) }, foreach_http_header_name #undef _ }; diff --git a/src/plugins/http/http_plugin.rst b/src/plugins/http/http_plugin.rst index 995e55e6f0f..4e799a57668 100644 --- a/src/plugins/http/http_plugin.rst +++ b/src/plugins/http/http_plugin.rst @@ -15,7 +15,7 @@ Usage ----- The plugin exposes following inline functions: ``http_validate_abs_path_syntax``, ``http_validate_query_syntax``, -``http_percent_decode``, ``http_path_remove_dot_segments``, ``http_build_header_table``, ``http_get_header``, +``http_percent_decode``, ``http_path_sanitize``, ``http_build_header_table``, ``http_get_header``, ``http_reset_header_table``, ``http_free_header_table``, ``http_init_headers_ctx``, ``http_add_header``, ``http_add_custom_header``, ``http_validate_target_syntax``, ``http_parse_authority``, ``http_serialize_authority``, ``http_parse_masque_host_port``, ``http_decap_udp_payload_datagram``, ``http_encap_udp_payload_datagram``, diff --git a/src/plugins/http/http_private.h b/src/plugins/http/http_private.h new file mode 100644 index 00000000000..1f9812de7fa --- /dev/null +++ b/src/plugins/http/http_private.h @@ -0,0 +1,885 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2025 Cisco Systems, Inc. + */ + +#ifndef SRC_PLUGINS_HTTP_HTTP_PRIVATE_H_ +#define SRC_PLUGINS_HTTP_HTTP_PRIVATE_H_ + +#include <vppinfra/time_range.h> +#include <vnet/session/application.h> +#include <vnet/session/session.h> +#include <vnet/session/transport.h> +#include <http/http.h> +#include <http/http_buffer.h> + +#define HTTP_FIFO_THRESH (16 << 10) + +static const http_token_t http2_conn_preface = { http_token_lit ( + "PRI * HTTP/2.0\r\n\r\nSM\r\n\r\n") }; + +typedef union +{ + struct + { + u32 version : 3; + u32 conn_index : 29; + }; + u32 as_u32; +} http_conn_handle_t; + +STATIC_ASSERT (sizeof (http_conn_handle_t) == sizeof (u32), "must fit in u32"); + +typedef union +{ + struct + { + u32 version : 3; + u32 req_index : 29; + }; + u32 as_u32; +} http_req_handle_t; + +STATIC_ASSERT (sizeof (http_req_handle_t) == sizeof (u32), "must fit in u32"); + +#define foreach_http_conn_state \ + _ (LISTEN, "LISTEN") \ + _ (CONNECTING, "CONNECTING") \ + _ (ESTABLISHED, "ESTABLISHED") \ + _ (TRANSPORT_CLOSED, "TRANSPORT-CLOSED") \ + _ (APP_CLOSED, "APP-CLOSED") \ + _ (CLOSED, "CLOSED") + +typedef enum http_conn_state_ +{ +#define _(s, str) HTTP_CONN_STATE_##s, + foreach_http_conn_state +#undef _ +} http_conn_state_t; + +#define foreach_http_req_state \ + _ (0, IDLE, "idle") \ + _ (1, WAIT_APP_METHOD, "wait app method") \ + _ (2, WAIT_TRANSPORT_REPLY, "wait transport reply") \ + _ (3, TRANSPORT_IO_MORE_DATA, "transport io more data") \ + _ (4, WAIT_TRANSPORT_METHOD, "wait transport method") \ + _ (5, WAIT_APP_REPLY, "wait app reply") \ + _ (6, APP_IO_MORE_DATA, "app io more data") \ + _ (7, TUNNEL, "tunnel") \ + _ (8, UDP_TUNNEL, "udp tunnel") + +typedef enum http_req_state_ +{ +#define _(n, s, str) HTTP_REQ_STATE_##s = n, + foreach_http_req_state +#undef _ + HTTP_REQ_N_STATES +} http_req_state_t; + +typedef enum http_target_form_ +{ + HTTP_TARGET_ORIGIN_FORM, + HTTP_TARGET_ABSOLUTE_FORM, + HTTP_TARGET_AUTHORITY_FORM, + HTTP_TARGET_ASTERISK_FORM +} http_target_form_t; + +typedef enum http_version_ +{ + HTTP_VERSION_1, + HTTP_VERSION_2, + HTTP_VERSION_3, + HTTP_VERSION_NA = 7, +} http_version_t; + +typedef struct http_req_id_ +{ + session_handle_t app_session_handle; + u32 parent_app_wrk_index; + u32 hc_index; +} http_req_id_t; + +STATIC_ASSERT (sizeof (http_req_id_t) <= TRANSPORT_CONN_ID_LEN, + "ctx id must be less than TRANSPORT_CONN_ID_LEN"); + +typedef struct http_req_ +{ + union + { + transport_connection_t connection; + http_req_id_t c_http_req_id; + }; +#define hr_pa_wrk_index c_http_req_id.parent_app_wrk_index +#define hr_pa_session_handle c_http_req_id.app_session_handle +#define hr_hc_index c_http_req_id.hc_index +#define hr_req_handle connection.c_index + + u32 as_fifo_offset; /* for peek */ + + http_req_state_t state; /* state-machine state */ + + http_buffer_t tx_buf; /* message body from app to be sent */ + + /* + * for parsing of incoming message from transport + */ + u32 rx_buf_offset; /* current offset during parsing */ + u32 control_data_len; /* start line + headers + empty line */ + + union + { + u64 to_recv; /* remaining bytes of body to receive from transport */ + u64 to_skip; /* remaining bytes of capsule to skip */ + }; + + u8 is_tunnel; + + /* + * parsed metadata for app + */ + union + { + http_status_code_t status_code; + http_req_method_t method; + }; + + http_target_form_t target_form; + u8 *target; + http_url_scheme_t scheme; + u32 target_authority_offset; + u32 target_authority_len; + u32 target_path_offset; + u32 target_path_len; + u32 target_query_offset; + u32 target_query_len; + + u32 headers_offset; + u32 headers_len; + + u32 body_offset; + u64 body_len; + + http_field_line_t *headers; + uword content_len_header_index; + uword connection_header_index; + uword upgrade_header_index; + uword host_header_index; + + http_upgrade_proto_t upgrade_proto; +} http_req_t; + +#define foreach_http_conn_flags \ + _ (HO_DONE, "ho-done") \ + _ (NO_APP_SESSION, "no-app-session") \ + _ (PENDING_TIMER, "pending-timer") \ + _ (IS_SERVER, "is-server") \ + _ (HAS_REQUEST, "has-request") + +typedef enum http_conn_flags_bit_ +{ +#define _(sym, str) HTTP_CONN_F_BIT_##sym, + foreach_http_conn_flags +#undef _ +} http_conn_flags_bit_t; + +typedef enum http_conn_flags_ +{ +#define _(sym, str) HTTP_CONN_F_##sym = 1 << HTTP_CONN_F_BIT_##sym, + foreach_http_conn_flags +#undef _ +} __clib_packed http_conn_flags_t; + +typedef struct http_conn_id_ +{ + union + { + session_handle_t app_session_handle; + u32 parent_app_api_ctx; + }; + session_handle_t tc_session_handle; + u32 parent_app_wrk_index; +} http_conn_id_t; + +STATIC_ASSERT (sizeof (http_conn_id_t) <= TRANSPORT_CONN_ID_LEN, + "ctx id must be less than TRANSPORT_CONN_ID_LEN"); + +typedef struct http_tc_ +{ + union + { + transport_connection_t connection; + http_conn_id_t c_http_conn_id; + }; +#define hc_tc_session_handle c_http_conn_id.tc_session_handle +#define hc_pa_wrk_index c_http_conn_id.parent_app_wrk_index +#define hc_pa_session_handle c_http_conn_id.app_session_handle +#define hc_pa_app_api_ctx c_http_conn_id.parent_app_api_ctx +#define hc_hc_index connection.c_index + + http_version_t version; + http_conn_state_t state; + u32 timer_handle; + u32 timeout; + u8 *app_name; + u8 *host; + http_conn_flags_t flags; + http_udp_tunnel_mode_t udp_tunnel_mode; + + void *opaque; /* version specific data */ +} http_conn_t; + +typedef struct http_worker_ +{ + http_conn_t *conn_pool; +} http_worker_t; + +typedef struct http_main_ +{ + http_worker_t *wrk; + http_conn_t *listener_pool; + http_conn_t *ho_conn_pool; + u32 *postponed_ho_free; + u32 *ho_free_list; + u32 app_index; + + u8 **rx_bufs; + u8 **tx_bufs; + u8 **app_header_lists; + + clib_timebase_t timebase; + + http_status_code_t *sc_by_u16; + /* + * Runtime config + */ + u8 is_init; + + /* + * Config + */ + u64 first_seg_size; + u64 add_seg_size; + u32 fifo_size; +} http_main_t; + +typedef struct http_engine_vft_ +{ + const char *name; + u32 (*hc_index_get_by_req_index) (u32 req_index, + clib_thread_index_t thread_index); + transport_connection_t *(*req_get_connection) ( + u32 req_index, clib_thread_index_t thread_index); + u8 *(*format_req) (u8 *s, va_list *args); + void (*app_tx_callback) (http_conn_t *hc, u32 req_index, + transport_send_params_t *sp); + void (*app_rx_evt_callback) (http_conn_t *hc, u32 req_index, + clib_thread_index_t thread_index); + void (*app_close_callback) (http_conn_t *hc, u32 req_index, + clib_thread_index_t thread_index); + void (*app_reset_callback) (http_conn_t *hc, u32 req_index, + clib_thread_index_t thread_index); + int (*transport_connected_callback) (http_conn_t *hc); + void (*transport_rx_callback) (http_conn_t *hc); + void (*transport_close_callback) (http_conn_t *hc); + void (*transport_reset_callback) (http_conn_t *hc); + void (*transport_conn_reschedule_callback) (http_conn_t *hc); + void (*conn_accept_callback) (http_conn_t *hc); /* optional */ + void (*conn_cleanup_callback) (http_conn_t *hc); + void (*enable_callback) (void); /* optional */ + uword (*unformat_cfg_callback) (unformat_input_t *input); /* optional */ +} http_engine_vft_t; + +void http_register_engine (const http_engine_vft_t *vft, + http_version_t version); + +/* HTTP state machine result */ +typedef enum http_sm_result_t_ +{ + HTTP_SM_STOP = 0, + HTTP_SM_CONTINUE = 1, + HTTP_SM_ERROR = -1, +} http_sm_result_t; + +typedef http_sm_result_t (*http_sm_handler) (http_conn_t *hc, http_req_t *req, + transport_send_params_t *sp); + +#define expect_char(c) \ + if (*p++ != c) \ + { \ + clib_warning ("unexpected character"); \ + return -1; \ + } + +#define parse_int(val, mul) \ + do \ + { \ + if (!isdigit (*p)) \ + { \ + clib_warning ("expected digit"); \ + return -1; \ + } \ + val += mul * (*p++ - '0'); \ + } \ + while (0) + +#define http_field_line_value_token(_fl, _req, _rx_buf) \ + (const char *) ((_rx_buf) + (_req)->headers_offset + (_fl)->value_offset), \ + (_fl)->value_len + +u8 *format_http_req_state (u8 *s, va_list *va); +u8 *format_http_conn_state (u8 *s, va_list *args); +u8 *format_http_time_now (u8 *s, va_list *args); + +/** + * @brief Find the first occurrence of the string in the vector. + * + * @param vec The vector to be scanned. + * @param offset Search offset in the vector. + * @param num Maximum number of characters to be searched if non-zero. + * @param str The string to be searched. + * + * @return @c -1 if the string is not found within the vector; index otherwise. + */ +int http_v_find_index (u8 *vec, u32 offset, u32 num, char *str); + +/** + * Disconnect HTTP connection. + * + * @param hc HTTP connection to disconnect. + */ +void http_disconnect_transport (http_conn_t *hc); + +/** + * Shutdown HTTP connection. + * + * Close TX side of the underlying transport. + * + * @param hc HTTP connection to shutdown. + */ +void http_shutdown_transport (http_conn_t *hc); + +/** + * Convert numeric representation of status code to @c http_status_code_t. + * + * @param status_code Status code within the range of 100 to 599, inclusive. + * + * @return Registered status code or in case of unrecognized status code as + * equivalent to the x00 status code of that class. + */ +http_status_code_t http_sc_by_u16 (u16 status_code); + +/** + * Read header list sent by app. + * + * @param req HTTP request. + * @param msg HTTP msg sent by app. + * + * @return Pointer to the header list. + * + * @note For immediate processing, not for buffering. + */ +u8 *http_get_app_header_list (http_req_t *req, http_msg_t *msg); + +/** + * Get pre-allocated TX buffer/vector. + * + * @param hc HTTP connection. + * + * @return Pointer to the vector. + * + * @note Vector length is reset to zero, use as temporary storage. + */ +u8 *http_get_tx_buf (http_conn_t *hc); + +/** + * Get pre-allocated RX buffer/vector. + * + * @param hc HTTP connection. + * + * @return Pointer to the vector. + * + * @note Vector length is reset to zero, use as temporary storage. + */ +u8 *http_get_rx_buf (http_conn_t *hc); + +/** + * Read request target path sent by app. + * + * @param req HTTP request. + * @param msg HTTP msg sent by app. + * + * @return Pointer to the target path. + * + * @note Valid only with request lifetime. + */ +u8 *http_get_app_target (http_req_t *req, http_msg_t *msg); + +/** + * Initialize per-request HTTP TX buffer. + * + * @param req HTTP request. + * @param msg HTTP msg sent by app. + * + * @note Use for streaming of body sent by app. + */ +void http_req_tx_buffer_init (http_req_t *req, http_msg_t *msg); + +/** + * Change state of given HTTP request. + * + * @param req HTTP request. + * @param state New state. + */ +always_inline void +http_req_state_change (http_req_t *req, http_req_state_t state) +{ + HTTP_DBG (1, "changing http req state: %U -> %U", format_http_req_state, + req->state, format_http_req_state, state); + ASSERT (req->state != HTTP_REQ_STATE_TUNNEL); + req->state = state; +} + +/** + * Send RX event to the app worker. + * + * @param req HTTP request. + */ +always_inline void +http_app_worker_rx_notify (http_req_t *req) +{ + session_t *as; + app_worker_t *app_wrk; + + as = session_get_from_handle (req->hr_pa_session_handle); + if (!(as->flags & SESSION_F_RX_EVT)) + { + app_wrk = app_worker_get_if_valid (as->app_wrk_index); + if (app_wrk) + { + as->flags |= SESSION_F_RX_EVT; + app_worker_rx_notify (app_wrk, as); + } + } +} + +/** + * Get underlying transport protocol of the HTTP connection. + * + * @param hc HTTP connection. + * + * @return Transport protocol, @ref transport_proto_t. + */ +always_inline transport_proto_t +http_get_transport_proto (http_conn_t *hc) +{ + return session_get_transport_proto ( + session_get_from_handle (hc->hc_tc_session_handle)); +} + +/** + * Read HTTP msg sent by app. + * + * @param req HTTP request. + * @param msg HTTP msq will be stored here. + */ +always_inline void +http_get_app_msg (http_req_t *req, http_msg_t *msg) +{ + session_t *as; + int rv; + + as = session_get_from_handle (req->hr_pa_session_handle); + rv = svm_fifo_dequeue (as->tx_fifo, sizeof (*msg), (u8 *) msg); + ASSERT (rv == sizeof (*msg)); +} + +always_inline void +http_identify_optional_query (http_req_t *req, u8 *rx_buf) +{ + int i; + for (i = req->target_path_offset; + i < (req->target_path_offset + req->target_path_len); i++) + { + if (rx_buf[i] == '?') + { + req->target_query_offset = i + 1; + req->target_query_len = req->target_path_offset + + req->target_path_len - + req->target_query_offset; + req->target_path_len = + req->target_path_len - req->target_query_len - 1; + break; + } + } +} + +always_inline int +http_parse_content_length (http_req_t *req, u8 *rx_buf) +{ + int i; + http_field_line_t *field_line; + u8 *p; + u64 body_len = 0, digit; + + field_line = vec_elt_at_index (req->headers, req->content_len_header_index); + p = rx_buf + req->headers_offset + field_line->value_offset; + for (i = 0; i < field_line->value_len; i++) + { + /* check for digit */ + if (!isdigit (*p)) + { + HTTP_DBG (1, "expected digit"); + return -1; + } + digit = *p - '0'; + u64 new_body_len = body_len * 10 + digit; + /* check for overflow */ + if (new_body_len < body_len) + { + HTTP_DBG (1, "content-length value too big number, overflow"); + return -1; + } + body_len = new_body_len; + p++; + } + + req->body_len = body_len; + + return 0; +} + +always_inline void +http_req_deschedule (http_req_t *req, transport_send_params_t *sp) +{ + transport_connection_deschedule (&req->connection); + sp->flags |= TRANSPORT_SND_F_DESCHED; +} + +/* Abstraction of app session fifo operations */ + +always_inline void +http_io_as_add_want_deq_ntf (http_req_t *req) +{ + session_t *as = session_get_from_handle (req->hr_pa_session_handle); + svm_fifo_add_want_deq_ntf (as->rx_fifo, SVM_FIFO_WANT_DEQ_NOTIF); +} + +always_inline u32 +http_io_as_max_write (http_req_t *req) +{ + session_t *as = session_get_from_handle (req->hr_pa_session_handle); + return svm_fifo_max_enqueue_prod (as->rx_fifo); +} + +always_inline u32 +http_io_as_max_read (http_req_t *req) +{ + session_t *as = session_get_from_handle (req->hr_pa_session_handle); + return svm_fifo_max_dequeue_cons (as->tx_fifo); +} + +always_inline void +http_io_as_write (http_req_t *req, u8 *data, u32 len) +{ + int n_written; + session_t *as = session_get_from_handle (req->hr_pa_session_handle); + + n_written = svm_fifo_enqueue (as->rx_fifo, len, data); + ASSERT (n_written == len); +} + +always_inline u32 +http_io_as_write_segs (http_req_t *req, const svm_fifo_seg_t segs[], + u32 n_segs) +{ + int n_written; + session_t *as = session_get_from_handle (req->hr_pa_session_handle); + n_written = svm_fifo_enqueue_segments (as->rx_fifo, segs, n_segs, 0); + ASSERT (n_written > 0); + return (u32) n_written; +} + +always_inline u32 +http_io_as_read (http_req_t *req, u8 *buf, u32 len, u8 peek) +{ + int n_read; + session_t *as = session_get_from_handle (req->hr_pa_session_handle); + + if (peek) + { + n_read = svm_fifo_peek (as->tx_fifo, req->as_fifo_offset, len, buf); + ASSERT (n_read > 0); + req->as_fifo_offset += len; + return (u32) n_read; + } + + n_read = svm_fifo_dequeue (as->tx_fifo, len, buf); + ASSERT (n_read == len); + return (u32) n_read; +} + +always_inline void +http_io_as_read_segs (http_req_t *req, svm_fifo_seg_t *segs, u32 *n_segs, + u32 max_bytes) +{ + int n_read; + session_t *as = session_get_from_handle (req->hr_pa_session_handle); + n_read = svm_fifo_segments (as->tx_fifo, 0, segs, n_segs, max_bytes); + ASSERT (n_read > 0); +} + +always_inline void +http_io_as_drain (http_req_t *req, u32 len) +{ + session_t *as = session_get_from_handle (req->hr_pa_session_handle); + svm_fifo_dequeue_drop (as->tx_fifo, len); + req->as_fifo_offset = 0; +} + +always_inline void +http_io_as_drain_all (http_req_t *req) +{ + session_t *as = session_get_from_handle (req->hr_pa_session_handle); + svm_fifo_dequeue_drop_all (as->tx_fifo); + req->as_fifo_offset = 0; +} + +/* Abstraction of transport session fifo operations */ + +always_inline u32 +http_io_ts_fifo_size (http_conn_t *hc, u8 is_rx) +{ + session_t *ts = session_get_from_handle (hc->hc_tc_session_handle); + if (is_rx) + return svm_fifo_size (ts->rx_fifo); + else + return svm_fifo_size (ts->tx_fifo); +} + +always_inline u32 +http_io_ts_max_read (http_conn_t *hc) +{ + session_t *ts = session_get_from_handle (hc->hc_tc_session_handle); + return svm_fifo_max_dequeue_cons (ts->rx_fifo); +} + +always_inline u32 +http_io_ts_max_write (http_conn_t *hc, transport_send_params_t *sp) +{ + session_t *ts = session_get_from_handle (hc->hc_tc_session_handle); + return clib_min (svm_fifo_max_enqueue_prod (ts->tx_fifo), + sp->max_burst_size); +} + +always_inline int +http_io_ts_check_write_thresh (http_conn_t *hc) +{ + session_t *ts = session_get_from_handle (hc->hc_tc_session_handle); + return (svm_fifo_max_enqueue_prod (ts->tx_fifo) < HTTP_FIFO_THRESH); +} + +always_inline void +http_io_ts_add_want_deq_ntf (http_conn_t *hc) +{ + session_t *ts = session_get_from_handle (hc->hc_tc_session_handle); + svm_fifo_add_want_deq_ntf (ts->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF); +} + +always_inline u32 +http_io_ts_read (http_conn_t *hc, u8 *buf, u32 len, u8 peek) +{ + int n_read; + session_t *ts = session_get_from_handle (hc->hc_tc_session_handle); + + if (peek) + { + n_read = svm_fifo_peek (ts->rx_fifo, 0, len, buf); + ASSERT (n_read > 0); + return (u32) n_read; + } + + n_read = svm_fifo_dequeue (ts->rx_fifo, len, buf); + ASSERT (n_read == len); + return (u32) n_read; +} + +always_inline void +http_io_ts_read_segs (http_conn_t *hc, svm_fifo_seg_t *segs, u32 *n_segs, + u32 max_bytes) +{ + int n_read; + session_t *ts = session_get_from_handle (hc->hc_tc_session_handle); + n_read = svm_fifo_segments (ts->rx_fifo, 0, segs, n_segs, max_bytes); + ASSERT (n_read > 0); +} + +always_inline void +http_io_ts_drain (http_conn_t *hc, u32 len) +{ + session_t *ts = session_get_from_handle (hc->hc_tc_session_handle); + svm_fifo_dequeue_drop (ts->rx_fifo, len); +} + +always_inline void +http_io_ts_drain_all (http_conn_t *hc) +{ + session_t *ts = session_get_from_handle (hc->hc_tc_session_handle); + svm_fifo_dequeue_drop_all (ts->rx_fifo); +} + +always_inline void +http_io_ts_after_read (http_conn_t *hc, u8 clear_evt) +{ + session_t *ts = session_get_from_handle (hc->hc_tc_session_handle); + if (clear_evt) + { + if (svm_fifo_is_empty_cons (ts->rx_fifo)) + svm_fifo_unset_event (ts->rx_fifo); + } + else + { + if (svm_fifo_max_dequeue_cons (ts->rx_fifo)) + session_program_rx_io_evt (hc->hc_tc_session_handle); + } +} + +always_inline void +http_io_ts_write (http_conn_t *hc, u8 *data, u32 len, + transport_send_params_t *sp) +{ + int n_written; + session_t *ts = session_get_from_handle (hc->hc_tc_session_handle); + + n_written = svm_fifo_enqueue (ts->tx_fifo, len, data); + ASSERT (n_written == len); + if (sp) + { + ASSERT (sp->max_burst_size >= len); + sp->bytes_dequeued += len; + sp->max_burst_size -= len; + } +} + +always_inline u32 +http_io_ts_write_segs (http_conn_t *hc, const svm_fifo_seg_t segs[], + u32 n_segs, transport_send_params_t *sp) +{ + int n_written; + session_t *ts = session_get_from_handle (hc->hc_tc_session_handle); + n_written = svm_fifo_enqueue_segments (ts->tx_fifo, segs, n_segs, 0); + ASSERT (n_written > 0); + sp->bytes_dequeued += n_written; + sp->max_burst_size -= n_written; + return (u32) n_written; +} + +always_inline void +http_io_ts_after_write (http_conn_t *hc, u8 flush) +{ + session_t *ts = session_get_from_handle (hc->hc_tc_session_handle); + + if (!flush) + { + if (svm_fifo_set_event (ts->tx_fifo)) + session_program_tx_io_evt (ts->handle, SESSION_IO_EVT_TX); + } + else + { + if (svm_fifo_set_event (ts->tx_fifo)) + session_program_tx_io_evt (ts->handle, SESSION_IO_EVT_TX_FLUSH); + } +} + +always_inline int +http_conn_accept_request (http_conn_t *hc, http_req_t *req) +{ + session_t *as, *asl; + app_worker_t *app_wrk; + int rv; + + HTTP_DBG (1, "hc [%u]%x req %x", hc->hc_hc_index, hc->c_thread_index, + req->hr_req_handle); + + /* allocate app session and initialize */ + as = session_alloc (hc->c_thread_index); + HTTP_DBG (1, "allocated session 0x%lx", session_handle (as)); + req->c_s_index = as->session_index; + as->app_wrk_index = hc->hc_pa_wrk_index; + as->connection_index = req->hr_req_handle; + as->session_state = SESSION_STATE_ACCEPTING; + asl = listen_session_get_from_handle (hc->hc_pa_session_handle); + as->session_type = asl->session_type; + as->listener_handle = hc->hc_pa_session_handle; + + /* init session fifos and notify app */ + if ((rv = app_worker_init_accepted (as))) + { + HTTP_DBG (1, "failed to allocate fifos"); + req->hr_pa_session_handle = SESSION_INVALID_HANDLE; + session_free (as); + hc->flags |= HTTP_CONN_F_NO_APP_SESSION; + return rv; + } + + req->hr_pa_session_handle = session_handle (as); + req->hr_pa_wrk_index = as->app_wrk_index; + + app_wrk = app_worker_get (as->app_wrk_index); + + if ((rv = app_worker_accept_notify (app_wrk, as))) + { + HTTP_DBG (1, "app accept returned"); + req->hr_pa_session_handle = SESSION_INVALID_HANDLE; + session_free (as); + hc->flags |= HTTP_CONN_F_NO_APP_SESSION; + return rv; + } + + return 0; +} + +always_inline int +http_conn_established (http_conn_t *hc, http_req_t *req) +{ + session_t *as; + app_worker_t *app_wrk; + session_t *ts; + int rv; + + /* allocate app session and initialize */ + as = session_alloc (hc->c_thread_index); + HTTP_DBG (1, "allocated session 0x%lx", session_handle (as)); + req->c_s_index = as->session_index; + as->app_wrk_index = hc->hc_pa_wrk_index; + as->connection_index = req->hr_req_handle; + as->session_state = SESSION_STATE_READY; + as->opaque = hc->hc_pa_app_api_ctx; + ts = session_get_from_handle (hc->hc_tc_session_handle); + as->session_type = session_type_from_proto_and_ip ( + TRANSPORT_PROTO_HTTP, session_type_is_ip4 (ts->session_type)); + + /* init session fifos and notify app */ + app_wrk = app_worker_get_if_valid (hc->hc_pa_wrk_index); + if (!app_wrk) + { + HTTP_DBG (1, "no app worker"); + hc->flags |= HTTP_CONN_F_NO_APP_SESSION; + return -1; + } + + if ((rv = app_worker_init_connected (app_wrk, as))) + { + HTTP_DBG (1, "failed to allocate fifos"); + session_free (as); + hc->flags |= HTTP_CONN_F_NO_APP_SESSION; + return rv; + } + + app_worker_connect_notify (app_wrk, as, 0, hc->hc_pa_app_api_ctx); + + req->hr_pa_session_handle = session_handle (as); + req->hr_pa_wrk_index = as->app_wrk_index; + + return 0; +} + +#endif /* SRC_PLUGINS_HTTP_HTTP_PRIVATE_H_ */ diff --git a/src/plugins/http/http_timer.h b/src/plugins/http/http_timer.h index 43d20d004d8..5ce42032f20 100644 --- a/src/plugins/http/http_timer.h +++ b/src/plugins/http/http_timer.h @@ -16,7 +16,7 @@ #ifndef SRC_PLUGINS_HTTP_HTTP_TIMER_H_ #define SRC_PLUGINS_HTTP_HTTP_TIMER_H_ -#include <http/http.h> +#include <http/http_private.h> #include <vppinfra/tw_timer_2t_1w_2048sl.h> #define HTTP_CONN_TIMEOUT 60 @@ -45,7 +45,8 @@ http_conn_timer_start (http_conn_t *hc) u32 hs_handle; ASSERT (hc->timer_handle == HTTP_TIMER_HANDLE_INVALID); - hs_handle = hc->c_thread_index << 24 | hc->c_c_index; + ASSERT (hc->hc_hc_index <= 0x00FFFFFF); + hs_handle = hc->c_thread_index << 24 | hc->hc_hc_index; clib_spinlock_lock (&twc->tw_lock); hc->timer_handle = @@ -58,7 +59,7 @@ http_conn_timer_stop (http_conn_t *hc) { http_tw_ctx_t *twc = &http_tw_ctx; - hc->pending_timer = 0; + hc->flags &= ~HTTP_CONN_F_PENDING_TIMER; if (hc->timer_handle == HTTP_TIMER_HANDLE_INVALID) return; @@ -79,7 +80,8 @@ http_conn_timer_update (http_conn_t *hc) tw_timer_update_2t_1w_2048sl (&twc->tw, hc->timer_handle, hc->timeout); else { - hs_handle = hc->c_thread_index << 24 | hc->c_c_index; + ASSERT (hc->hc_hc_index <= 0x00FFFFFF); + hs_handle = hc->c_thread_index << 24 | hc->hc_hc_index; hc->timer_handle = tw_timer_start_2t_1w_2048sl (&twc->tw, hs_handle, 0, hc->timeout); } diff --git a/src/plugins/http/test/http_test.c b/src/plugins/http/test/http_test.c index bfaa285eb35..f44d3cbd31b 100644 --- a/src/plugins/http/test/http_test.c +++ b/src/plugins/http/test/http_test.c @@ -6,6 +6,8 @@ #include <vpp/app/version.h> #include <http/http.h> #include <http/http_header_names.h> +#include <http/http2/hpack.h> +#include <http/http2/frame.h> #define HTTP_TEST_I(_cond, _comment, _args...) \ ({ \ @@ -533,6 +535,771 @@ http_test_http_header_table (vlib_main_t *vm) return 0; } +static int +http_test_parse_request (const char *first_req, uword first_req_len, + const char *second_req, uword second_req_len, + const char *third_req, uword third_req_len, + hpack_dynamic_table_t *dynamic_table) +{ + http2_error_t rv; + u8 *buf = 0; + hpack_request_control_data_t control_data; + http_field_line_t *headers = 0; + u16 parsed_bitmap = 0; + + static http2_error_t (*_hpack_parse_request) ( + u8 * src, u32 src_len, u8 * dst, u32 dst_len, + hpack_request_control_data_t * control_data, http_field_line_t * *headers, + hpack_dynamic_table_t * dynamic_table); + + _hpack_parse_request = + vlib_get_plugin_symbol ("http_plugin.so", "hpack_parse_request"); + + parsed_bitmap = + HPACK_PSEUDO_HEADER_METHOD_PARSED | HPACK_PSEUDO_HEADER_SCHEME_PARSED | + HPACK_PSEUDO_HEADER_PATH_PARSED | HPACK_PSEUDO_HEADER_AUTHORITY_PARSED; + + /* first request */ + vec_validate_init_empty (buf, 254, 0); + memset (&control_data, 0, sizeof (control_data)); + rv = _hpack_parse_request ((u8 *) first_req, (u32) first_req_len, buf, 254, + &control_data, &headers, dynamic_table); + if (rv != HTTP2_ERROR_NO_ERROR || + control_data.parsed_bitmap != parsed_bitmap || + control_data.method != HTTP_REQ_GET || + control_data.scheme != HTTP_URL_SCHEME_HTTP || + control_data.path_len != 1 || control_data.authority_len != 15 || + dynamic_table->used != 57 || vec_len (headers) != 0) + return 1; + if (memcmp (control_data.path, "/", 1)) + return 1; + if (memcmp (control_data.authority, "www.example.com", 15)) + return 1; + vec_free (headers); + vec_free (buf); + + /* second request */ + vec_validate_init_empty (buf, 254, 0); + memset (&control_data, 0, sizeof (control_data)); + rv = _hpack_parse_request ((u8 *) second_req, (u32) second_req_len, buf, 254, + &control_data, &headers, dynamic_table); + if (rv != HTTP2_ERROR_NO_ERROR || + control_data.parsed_bitmap != parsed_bitmap || + control_data.method != HTTP_REQ_GET || + control_data.scheme != HTTP_URL_SCHEME_HTTP || + control_data.path_len != 1 || control_data.authority_len != 15 || + dynamic_table->used != 110 || vec_len (headers) != 1 || + control_data.headers_len != 21) + return 2; + if (memcmp (control_data.path, "/", 1)) + return 2; + if (memcmp (control_data.authority, "www.example.com", 15)) + return 2; + if (headers[0].name_len != 13 || headers[0].value_len != 8) + return 2; + if (memcmp (control_data.headers + headers[0].name_offset, "cache-control", + 13)) + return 2; + if (memcmp (control_data.headers + headers[0].value_offset, "no-cache", 8)) + return 2; + vec_free (headers); + vec_free (buf); + + /* third request */ + vec_validate_init_empty (buf, 254, 0); + memset (&control_data, 0, sizeof (control_data)); + rv = _hpack_parse_request ((u8 *) third_req, (u32) third_req_len, buf, 254, + &control_data, &headers, dynamic_table); + if (rv != HTTP2_ERROR_NO_ERROR || + control_data.parsed_bitmap != parsed_bitmap || + control_data.method != HTTP_REQ_GET || + control_data.scheme != HTTP_URL_SCHEME_HTTPS || + control_data.path_len != 11 || control_data.authority_len != 15 || + dynamic_table->used != 164 || vec_len (headers) != 1 || + control_data.headers_len != 22) + return 3; + if (memcmp (control_data.path, "/index.html", 11)) + return 3; + if (memcmp (control_data.authority, "www.example.com", 15)) + return 3; + if (headers[0].name_len != 10 || headers[0].value_len != 12) + return 3; + if (memcmp (control_data.headers + headers[0].name_offset, "custom-key", 10)) + return 3; + if (memcmp (control_data.headers + headers[0].value_offset, "custom-value", + 12)) + return 3; + vec_free (headers); + vec_free (buf); + + return 0; +} + +static int +http_test_hpack (vlib_main_t *vm) +{ + vlib_cli_output (vm, "hpack_decode_int"); + + static uword (*_hpack_decode_int) (u8 * *pos, u8 * end, u8 prefix_len); + _hpack_decode_int = + vlib_get_plugin_symbol ("http_plugin.so", "hpack_decode_int"); + + u8 *pos, *end, *input = 0; + uword value; +#define TEST(i, pl, e) \ + vec_validate (input, sizeof (i) - 2); \ + memcpy (input, i, sizeof (i) - 1); \ + pos = input; \ + end = vec_end (input); \ + value = _hpack_decode_int (&pos, end, (u8) pl); \ + HTTP_TEST ((value == (uword) e && pos == end), \ + "%U with prefix length %u is %llu", format_hex_bytes, input, \ + vec_len (input), (u8) pl, value); \ + vec_free (input); + + TEST ("\x00", 8, 0); + TEST ("\x2A", 8, 42); + TEST ("\x72", 4, 2); + TEST ("\x7F\x00", 7, 127); + TEST ("\x7F\x01", 7, 128); + TEST ("\x9F\x9A\x0A", 5, 1337); + TEST ("\xFF\x80\x01", 7, 255); + /* max value to decode is CLIB_WORD_MAX, CLIB_UWORD_MAX is error */ + TEST ("\x7F\x80\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x7F", 7, CLIB_WORD_MAX); + +#undef TEST + +#define N_TEST(i, pl) \ + vec_validate (input, sizeof (i) - 2); \ + memcpy (input, i, sizeof (i) - 1); \ + pos = input; \ + end = vec_end (input); \ + value = _hpack_decode_int (&pos, end, (u8) pl); \ + HTTP_TEST ((value == HPACK_INVALID_INT), \ + "%U with prefix length %u should be invalid", format_hex_bytes, \ + input, vec_len (input), (u8) pl); \ + vec_free (input); + + /* incomplete */ + N_TEST ("\x7F", 7); + N_TEST ("\x0F\xFF\xFF", 4); + /* overflow */ + N_TEST ("\x0F\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x00", 4); + N_TEST ("\x0F\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x00", 4); + +#undef N_TEST + + vlib_cli_output (vm, "hpack_encode_int"); + + static u8 *(*_hpack_encode_int) (u8 * dst, uword value, u8 prefix_len); + _hpack_encode_int = + vlib_get_plugin_symbol ("http_plugin.so", "hpack_encode_int"); + + u8 *buf = 0; + u8 *p; + +#define TEST(v, pl, e) \ + vec_validate_init_empty (buf, 15, 0); \ + p = _hpack_encode_int (buf, v, (u8) pl); \ + HTTP_TEST (((p - buf) == (sizeof (e) - 1) && !memcmp (buf, e, p - buf)), \ + "%llu with prefix length %u is encoded as %U", v, (u8) pl, \ + format_hex_bytes, buf, p - buf); \ + vec_free (buf); + + TEST (0, 8, "\x00"); + TEST (2, 4, "\x02"); + TEST (42, 8, "\x2A"); + TEST (127, 7, "\x7F\x00"); + TEST (128, 7, "\x7F\x01"); + TEST (255, 7, "\x7F\x80\x01"); + TEST (1337, 5, "\x1F\x9A\x0A"); + TEST (CLIB_WORD_MAX, 7, "\x7F\x80\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x7F"); +#undef TEST + + vlib_cli_output (vm, "hpack_decode_string"); + + static http2_error_t (*_hpack_decode_string) (u8 * *src, u8 * end, u8 * *buf, + uword * buf_len); + _hpack_decode_string = + vlib_get_plugin_symbol ("http_plugin.so", "hpack_decode_string"); + + u8 *bp; + uword blen, len; + http2_error_t rv; + +#define TEST(i, e) \ + vec_validate (input, sizeof (i) - 2); \ + memcpy (input, i, sizeof (i) - 1); \ + pos = input; \ + vec_validate_init_empty (buf, 63, 0); \ + bp = buf; \ + blen = vec_len (buf); \ + rv = _hpack_decode_string (&pos, vec_end (input), &bp, &blen); \ + len = vec_len (buf) - blen; \ + HTTP_TEST ((len == strlen (e) && !memcmp (buf, e, len) && \ + pos == vec_end (input) && bp == buf + len && \ + rv == HTTP2_ERROR_NO_ERROR), \ + "%U is decoded as %U", format_hex_bytes, input, vec_len (input), \ + format_http_bytes, buf, len); \ + vec_free (input); \ + vec_free (buf); + + /* raw coding */ + TEST ("\x07private", "private"); + /* Huffman coding */ + TEST ("\x85\xAE\xC3\x77\x1A\x4B", "private"); + TEST ("\x86\xA8\xEB\x10\x64\x9C\xBF", "no-cache"); + TEST ("\x8C\xF1\xE3\xC2\xE5\xF2\x3A\x6B\xA0\xAB\x90\xF4\xFF", + "www.example.com"); + TEST ("\x96\xD0\x7A\xBE\x94\x10\x54\xD4\x44\xA8\x20\x05\x95\x04\x0B\x81\x66" + "\xE0\x82\xA6\x2D\x1B\xFF", + "Mon, 21 Oct 2013 20:13:21 GMT") + TEST ("\xAD\x94\xE7\x82\x1D\xD7\xF2\xE6\xC7\xB3\x35\xDF\xDF\xCD\x5B\x39\x60" + "\xD5\xAF\x27\x08\x7F\x36\x72\xC1\xAB\x27\x0F\xB5\x29\x1F\x95\x87\x31" + "\x60\x65\xC0\x03\xED\x4E\xE5\xB1\x06\x3D\x50\x07", + "foo=ASDJKHQKBZXOQWEOPIUAXQWEOIU; max-age=3600; version=1"); + TEST ("\x8A\x9C\xB4\x50\x75\x3C\x1E\xCA\x24\xFE\x3F", "hello world!") + TEST ("\x8A\xFF\xFE\x03\x18\xC6\x31\x8C\x63\x18\xC7", "\\aaaaaaaaaaaa"); + TEST ("\x8C\x1F\xFF\xF0\x18\xC6\x31\x80\x03\x18\xC6\x31\x8F", + "a\\aaaaa00aaaaaaa"); + TEST ("\x87\x1F\xFF\xF0\xFF\xFE\x11\xFF", "a\\\\b"); + TEST ("\x84\x1F\xF9\xFE\xA3", "a?'b"); + TEST ("\x84\x1F\xFA\xFF\x23", "a'?b"); + TEST ("\x8D\x1F\xFF\xFF\xFF\x0C\x63\x18\xC0\x01\x8C\x63\x18\xC7", + "\x61\xF9\x61\x61\x61\x61\x61\x30\x30\x61\x61\x61\x61\x61\x61\x61") +#undef TEST + +#define N_TEST(i, e) \ + vec_validate (input, sizeof (i) - 2); \ + memcpy (input, i, sizeof (i) - 1); \ + pos = input; \ + vec_validate_init_empty (buf, 15, 0); \ + bp = buf; \ + blen = vec_len (buf); \ + rv = _hpack_decode_string (&pos, vec_end (input), &bp, &blen); \ + HTTP_TEST ((rv == e), "%U should be invalid (%U)", format_hex_bytes, input, \ + vec_len (input), format_http2_error, rv); \ + vec_free (input); \ + vec_free (buf); + + /* incomplete */ + N_TEST ("\x87", HTTP2_ERROR_COMPRESSION_ERROR); + N_TEST ("\x07priv", HTTP2_ERROR_COMPRESSION_ERROR); + /* invalid length */ + N_TEST ("\x7Fprivate", HTTP2_ERROR_COMPRESSION_ERROR); + /* invalid EOF */ + N_TEST ("\x81\x8C", HTTP2_ERROR_COMPRESSION_ERROR); + /* not enough space for decoding */ + N_TEST ( + "\x96\xD0\x7A\xBE\x94\x10\x54\xD4\x44\xA8\x20\x05\x95\x04\x0B\x81\x66" + "\xE0\x82\xA6\x2D\x1B\xFF", + HTTP2_ERROR_INTERNAL_ERROR); +#undef N_TEST + + vlib_cli_output (vm, "hpack_encode_string"); + + static u8 *(*_hpack_encode_string) (u8 * dst, const u8 *value, + uword value_len); + _hpack_encode_string = + vlib_get_plugin_symbol ("http_plugin.so", "hpack_encode_string"); + +#define TEST(i, e) \ + vec_validate (input, sizeof (i) - 2); \ + memcpy (input, i, sizeof (i) - 1); \ + pos = input; \ + vec_validate_init_empty (buf, 63, 0); \ + p = _hpack_encode_string (buf, input, vec_len (input)); \ + HTTP_TEST (((p - buf) == (sizeof (e) - 1) && !memcmp (buf, e, p - buf)), \ + "%v is encoded as %U", input, format_hex_bytes, buf, p - buf); \ + vec_free (input); \ + vec_free (buf); + + /* Huffman coding */ + TEST ("private", "\x85\xAE\xC3\x77\x1A\x4B"); + TEST ("no-cache", "\x86\xA8\xEB\x10\x64\x9C\xBF"); + TEST ("www.example.com", + "\x8C\xF1\xE3\xC2\xE5\xF2\x3A\x6B\xA0\xAB\x90\xF4\xFF"); + TEST ("Mon, 21 Oct 2013 20:13:21 GMT", + "\x96\xD0\x7A\xBE\x94\x10\x54\xD4\x44\xA8\x20\x05\x95\x04\x0B\x81\x66" + "\xE0\x82\xA6\x2D\x1B\xFF") + TEST ("foo=ASDJKHQKBZXOQWEOPIUAXQWEOIU; max-age=3600; version=1", + "\xAD\x94\xE7\x82\x1D\xD7\xF2\xE6\xC7\xB3\x35\xDF\xDF\xCD\x5B\x39\x60" + "\xD5\xAF\x27\x08\x7F\x36\x72\xC1\xAB\x27\x0F\xB5\x29\x1F\x95\x87\x31" + "\x60\x65\xC0\x03\xED\x4E\xE5\xB1\x06\x3D\x50\x07"); + TEST ("hello world!", "\x8A\x9C\xB4\x50\x75\x3C\x1E\xCA\x24\xFE\x3F") + TEST ("\\aaaaaaaaaaaa", "\x8A\xFF\xFE\x03\x18\xC6\x31\x8C\x63\x18\xC7"); + /* raw coding */ + TEST ("[XZ]", "\x4[XZ]"); +#undef TEST + + vlib_cli_output (vm, "hpack_decode_header"); + + static http2_error_t (*_hpack_decode_header) ( + u8 * *src, u8 * end, u8 * *buf, uword * buf_len, u32 * name_len, + u32 * value_len, hpack_dynamic_table_t * dt); + + _hpack_decode_header = + vlib_get_plugin_symbol ("http_plugin.so", "hpack_decode_header"); + + static void (*_hpack_dynamic_table_init) (hpack_dynamic_table_t * table, + u32 max_size); + + _hpack_dynamic_table_init = + vlib_get_plugin_symbol ("http_plugin.so", "hpack_dynamic_table_init"); + + static void (*_hpack_dynamic_table_free) (hpack_dynamic_table_t * table); + + _hpack_dynamic_table_free = + vlib_get_plugin_symbol ("http_plugin.so", "hpack_dynamic_table_free"); + + u32 name_len, value_len; + hpack_dynamic_table_t table; + + _hpack_dynamic_table_init (&table, 128); + +#define TEST(i, e_name, e_value, dt_size) \ + vec_validate (input, sizeof (i) - 2); \ + memcpy (input, i, sizeof (i) - 1); \ + pos = input; \ + vec_validate_init_empty (buf, 63, 0); \ + bp = buf; \ + blen = vec_len (buf); \ + rv = _hpack_decode_header (&pos, vec_end (input), &bp, &blen, &name_len, \ + &value_len, &table); \ + len = vec_len (buf) - blen; \ + HTTP_TEST ((rv == HTTP2_ERROR_NO_ERROR && table.used == dt_size && \ + name_len == strlen (e_name) && value_len == strlen (e_value) && \ + !memcmp (buf, e_name, name_len) && \ + !memcmp (buf + name_len, e_value, value_len) && \ + vec_len (buf) == (blen + name_len + value_len) && \ + pos == vec_end (input) && bp == buf + name_len + value_len), \ + "%U is decoded as '%U: %U'", format_hex_bytes, input, \ + vec_len (input), format_http_bytes, buf, name_len, \ + format_http_bytes, buf + name_len, value_len); \ + vec_free (input); \ + vec_free (buf); + + /* C.2.1. Literal Header Field with Indexing */ + TEST ("\x40\x0A\x63\x75\x73\x74\x6F\x6D\x2D\x6B\x65\x79\x0D\x63\x75\x73\x74" + "\x6F\x6D\x2D\x68\x65\x61\x64\x65\x72", + "custom-key", "custom-header", 55); + /* C.2.2. Literal Header Field without Indexing */ + TEST ("\x04\x0C\x2F\x73\x61\x6D\x70\x6C\x65\x2F\x70\x61\x74\x68", ":path", + "/sample/path", 55); + /* C.2.3. Literal Header Field Never Indexed */ + TEST ("\x10\x08\x70\x61\x73\x73\x77\x6F\x72\x64\x06\x73\x65\x63\x72\x65\x74", + "password", "secret", 55); + /* C.2.4. Indexed Header Field */ + TEST ("\x82", ":method", "GET", 55); + TEST ("\xBE", "custom-key", "custom-header", 55); + /* Literal Header Field with Indexing - enough space in dynamic table */ + TEST ("\x41\x0F\x77\x77\x77\x2E\x65\x78\x61\x6D\x70\x6C\x65\x2E\x63\x6F\x6D", + ":authority", "www.example.com", 112); + /* verification */ + TEST ("\xBE", ":authority", "www.example.com", 112); + TEST ("\xBF", "custom-key", "custom-header", 112); + /* Literal Header Field with Indexing - eviction */ + TEST ("\x58\x08\x6E\x6F\x2D\x63\x61\x63\x68\x65", "cache-control", + "no-cache", 110); + /* verification */ + TEST ("\xBE", "cache-control", "no-cache", 110); + TEST ("\xBF", ":authority", "www.example.com", 110); + /* Literal Header Field with Indexing - eviction */ + TEST ("\x40\x0A\x63\x75\x73\x74\x6F\x6D\x2D\x6B\x65\x79\x0D\x63\x75\x73\x74" + "\x6F\x6D\x2D\x68\x65\x61\x64\x65\x72", + "custom-key", "custom-header", 108); + /* verification */ + TEST ("\xBE", "custom-key", "custom-header", 108); + TEST ("\xBF", "cache-control", "no-cache", 108); + /* Literal Header Field with Indexing - eviction */ + TEST ("\x41\x0F\x77\x77\x77\x2E\x65\x78\x61\x6D\x70\x6C\x65\x2E\x63\x6F\x6D", + ":authority", "www.example.com", 112); + /* verification */ + TEST ("\xBE", ":authority", "www.example.com", 112); + TEST ("\xBF", "custom-key", "custom-header", 112); + /* Literal Header Field with Indexing - eviction with reference */ + TEST ("\x7F\x00\x0C\x63\x75\x73\x74\x6F\x6D\x2D\x76\x61\x6C\x75\x65", + "custom-key", "custom-value", 111); + /* verification */ + TEST ("\xBE", "custom-key", "custom-value", 111); + TEST ("\xBF", ":authority", "www.example.com", 111); +#undef TEST + + _hpack_dynamic_table_free (&table); + + vlib_cli_output (vm, "hpack_parse_request"); + + int result; + /* C.3. Request Examples without Huffman Coding */ + _hpack_dynamic_table_init (&table, HPACK_DEFAULT_HEADER_TABLE_SIZE); + result = http_test_parse_request ( + http_token_lit ("\x82\x86\x84\x41\x0F\x77\x77\x77\x2E\x65\x78\x61" + "\x6D\x70\x6C\x65\x2E\x63\x6F\x6D"), + http_token_lit ( + "\x82\x86\x84\xBE\x58\x08\x6E\x6F\x2D\x63\x61\x63\x68\x65"), + http_token_lit ( + "\x82\x87\x85\xBF\x40\x0A\x63\x75\x73\x74\x6F\x6D\x2D\x6B" + "\x65\x79\x0C\x63\x75\x73\x74\x6F\x6D\x2D\x76\x61\x6C\x75\x65"), + &table); + _hpack_dynamic_table_free (&table); + HTTP_TEST ((result == 0), "request without Huffman Coding (result=%d)", + result); + /* C.4. Request Examples with Huffman Coding */ + _hpack_dynamic_table_init (&table, HPACK_DEFAULT_HEADER_TABLE_SIZE); + result = http_test_parse_request ( + http_token_lit ( + "\x82\x86\x84\x41\x8C\xF1\xE3\xC2\xE5\xF2\x3A\x6B\xA0\xAB\x90\xF4\xFF"), + http_token_lit ("\x82\x86\x84\xBE\x58\x86\xA8\xEB\x10\x64\x9C\xBF"), + http_token_lit ("\x82\x87\x85\xBF\x40\x88\x25\xA8\x49\xE9\x5B\xA9\x7D\x7F" + "\x89\x25\xA8\x49\xE9\x5B\xB8\xE8\xB4\xBF"), + &table); + _hpack_dynamic_table_free (&table); + HTTP_TEST ((result == 0), "request with Huffman Coding (result=%d)", result); + + vlib_cli_output (vm, "hpack_serialize_response"); + + hpack_response_control_data_t resp_cd; + u8 *server_name; + u8 *date; + + static void (*_hpack_serialize_response) ( + u8 * app_headers, u32 app_headers_len, + hpack_response_control_data_t * control_data, u8 * *dst); + + _hpack_serialize_response = + vlib_get_plugin_symbol ("http_plugin.so", "hpack_serialize_response"); + + server_name = format (0, "http unit tests"); + date = format (0, "Mon, 21 Oct 2013 20:13:21 GMT"); + + vec_validate (buf, 127); + vec_reset_length (buf); + resp_cd.sc = HTTP_STATUS_GATEWAY_TIMEOUT; + resp_cd.content_len = HPACK_ENCODER_SKIP_CONTENT_LEN; + resp_cd.server_name = server_name; + resp_cd.server_name_len = vec_len (server_name); + resp_cd.date = date; + resp_cd.date_len = vec_len (date); + u8 expected1[] = + "\x08\x03\x35\x30\x34\x0F\x27\x8B\x9D\x29\xAD\x4B\x6A\x32\x54\x49\x50\x94" + "\x7F\x0F\x12\x96\xD0\x7A\xBE\x94\x10\x54\xD4\x44\xA8\x20\x05\x95\x04\x0B" + "\x81\x66\xE0\x82\xA6\x2D\x1B\xFF"; + _hpack_serialize_response (0, 0, &resp_cd, &buf); + HTTP_TEST ((vec_len (buf) == (sizeof (expected1) - 1) && + !memcmp (buf, expected1, sizeof (expected1) - 1)), + "response encoded as %U", format_hex_bytes, buf, vec_len (buf)); + vec_reset_length (buf); + + resp_cd.sc = HTTP_STATUS_OK; + resp_cd.content_len = 1024; + http_headers_ctx_t headers; + u8 *headers_buf = 0; + vec_validate (headers_buf, 127); + http_init_headers_ctx (&headers, headers_buf, vec_len (headers_buf)); + http_add_header (&headers, HTTP_HEADER_CONTENT_TYPE, + http_token_lit ("text/plain")); + http_add_header (&headers, HTTP_HEADER_CACHE_STATUS, + http_token_lit ("ExampleCache; hit")); + http_add_custom_header (&headers, http_token_lit ("sandwich"), + http_token_lit ("spam")); + u8 expected2[] = + "\x88\x0F\x27\x8B\x9D\x29\xAD\x4B\x6A\x32\x54\x49\x50\x94\x7F\x0F\x12\x96" + "\xD0\x7A\xBE\x94\x10\x54\xD4\x44\xA8\x20\x05\x95\x04\x0B\x81\x66\xE0\x82" + "\xA6\x2D\x1B\xFF\x0F\x0D\x83\x08\x04\xD7\x0F\x10\x87\x49\x7C\xA5\x8A\xE8" + "\x19\xAA\x00\x88\x20\xC9\x39\x56\x42\x46\x9B\x51\x8D\xC1\xE4\x74\xD7\x41" + "\x6F\x0C\x93\x97\xED\x49\xCC\x9F\x00\x86\x40\xEA\x93\xC1\x89\x3F\x83\x45" + "\x63\xA7"; + _hpack_serialize_response (headers_buf, headers.tail_offset, &resp_cd, &buf); + HTTP_TEST ((vec_len (buf) == (sizeof (expected2) - 1) && + !memcmp (buf, expected2, sizeof (expected2) - 1)), + "response encoded as %U", format_hex_bytes, buf, vec_len (buf)); + vec_free (buf); + vec_free (headers_buf); + vec_free (server_name); + vec_free (date); + + return 0; +} + +static int +http_test_h2_frame (vlib_main_t *vm) +{ + static void (*_http2_frame_header_read) (u8 * src, + http2_frame_header_t * fh); + + _http2_frame_header_read = + vlib_get_plugin_symbol ("http_plugin.so", "http2_frame_header_read"); + + vlib_cli_output (vm, "http2_frame_read_settings"); + + static http2_error_t (*_http2_frame_read_settings) ( + http2_conn_settings_t * settings, u8 * payload, u32 payload_len); + + _http2_frame_read_settings = + vlib_get_plugin_symbol ("http_plugin.so", "http2_frame_read_settings"); + + http2_error_t rv; + http2_frame_header_t fh = { 0 }; + http2_conn_settings_t conn_settings = http2_default_conn_settings; + + u8 settings[] = { 0x0, 0x0, 0x12, 0x4, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x3, 0x0, 0x0, 0x0, 0x64, 0x0, 0x4, 0x40, + 0x0, 0x0, 0x0, 0x0, 0x2, 0x0, 0x0, 0x0, 0x0 }; + _http2_frame_header_read (settings, &fh); + HTTP_TEST ((fh.flags == 0 && fh.type == HTTP2_FRAME_TYPE_SETTINGS && + fh.stream_id == 0 && fh.length == 18), + "frame identified as SETTINGS"); + + rv = _http2_frame_read_settings ( + &conn_settings, settings + HTTP2_FRAME_HEADER_SIZE, fh.length); + HTTP_TEST ((rv == HTTP2_ERROR_NO_ERROR && + conn_settings.max_concurrent_streams == 100 && + conn_settings.initial_window_size == 1073741824 && + conn_settings.enable_push == 0), + "SETTINGS frame payload parsed") + + u8 settings_ack[] = { 0x0, 0x0, 0x0, 0x4, 0x1, 0x0, 0x0, 0x0, 0x0 }; + _http2_frame_header_read (settings_ack, &fh); + HTTP_TEST ((fh.flags == HTTP2_FRAME_FLAG_ACK && + fh.type == HTTP2_FRAME_TYPE_SETTINGS && fh.stream_id == 0 && + fh.length == 0), + "frame identified as SETTINGS ACK"); + + vlib_cli_output (vm, "http2_frame_write_settings_ack"); + + static void (*_http2_frame_write_settings_ack) (u8 * *dst); + + _http2_frame_write_settings_ack = vlib_get_plugin_symbol ( + "http_plugin.so", "http2_frame_write_settings_ack"); + + u8 *buf = 0; + + _http2_frame_write_settings_ack (&buf); + HTTP_TEST ((vec_len (buf) == sizeof (settings_ack)) && + !memcmp (buf, settings_ack, sizeof (settings_ack)), + "SETTINGS ACK frame written"); + vec_free (buf); + + vlib_cli_output (vm, "http2_frame_write_settings"); + + static void (*_http2_frame_write_settings) ( + http2_settings_entry_t * settings, u8 * *dst); + + _http2_frame_write_settings = + vlib_get_plugin_symbol ("http_plugin.so", "http2_frame_write_settings"); + + http2_settings_entry_t *settings_list = 0; + vec_validate (settings_list, 2); + settings_list[0].identifier = HTTP2_SETTINGS_MAX_CONCURRENT_STREAMS; + settings_list[0].value = 100; + settings_list[1].identifier = HTTP2_SETTINGS_INITIAL_WINDOW_SIZE; + settings_list[1].value = 1073741824; + settings_list[2].identifier = HTTP2_SETTINGS_ENABLE_PUSH; + settings_list[2].value = 0; + + _http2_frame_write_settings (settings_list, &buf); + HTTP_TEST ((vec_len (buf) == sizeof (settings) && + !memcmp (buf, settings, sizeof (settings))), + "SETTINGS frame written"); + vec_free (settings_list); + vec_free (buf); + + vlib_cli_output (vm, "http2_frame_read_window_update"); + + static http2_error_t (*_http2_frame_read_window_update) ( + u32 * increment, u8 * payload, u32 payload_len); + + _http2_frame_read_window_update = vlib_get_plugin_symbol ( + "http_plugin.so", "http2_frame_read_window_update"); + + u32 win_increment; + u8 win_update[] = { 0x0, 0x0, 0x4, 0x8, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x3f, 0xff, 0x0, 0x1 }; + _http2_frame_header_read (win_update, &fh); + HTTP_TEST ((fh.flags == 0 && fh.type == HTTP2_FRAME_TYPE_WINDOW_UPDATE && + fh.stream_id == 0 && fh.length == 4), + "frame identified as WINDOW_UPDATE"); + + rv = _http2_frame_read_window_update ( + &win_increment, win_update + HTTP2_FRAME_HEADER_SIZE, fh.length); + HTTP_TEST ((rv == HTTP2_ERROR_NO_ERROR && win_increment == 1073676289), + "WINDOW_UPDATE frame payload parsed") + + vlib_cli_output (vm, "http2_frame_write_window_update"); + + static void (*_http2_frame_write_window_update) (u32 increment, + u32 stream_id, u8 * *dst); + + _http2_frame_write_window_update = vlib_get_plugin_symbol ( + "http_plugin.so", "http2_frame_write_window_update"); + + _http2_frame_write_window_update (1073676289, 0, &buf); + HTTP_TEST ((vec_len (buf) == sizeof (win_update) && + !memcmp (buf, win_update, sizeof (win_update))), + "WINDOW_UPDATE frame written"); + vec_free (buf); + + vlib_cli_output (vm, "http2_frame_read_rst_stream"); + + static http2_error_t (*_http2_frame_read_rst_stream) ( + u32 * error_code, u8 * payload, u32 payload_len); + + _http2_frame_read_rst_stream = + vlib_get_plugin_symbol ("http_plugin.so", "http2_frame_read_rst_stream"); + + u32 error_code; + u8 rst_stream[] = { 0x0, 0x0, 0x4, 0x3, 0x0, 0x0, 0x0, + 0x0, 0x5, 0x0, 0x0, 0x0, 0x01 }; + _http2_frame_header_read (rst_stream, &fh); + HTTP_TEST ((fh.flags == 0 && fh.type == HTTP2_FRAME_TYPE_RST_STREAM && + fh.stream_id == 5 && fh.length == 4), + "frame identified as RST_STREAM"); + + rv = _http2_frame_read_rst_stream ( + &error_code, rst_stream + HTTP2_FRAME_HEADER_SIZE, fh.length); + HTTP_TEST ( + (rv == HTTP2_ERROR_NO_ERROR && error_code == HTTP2_ERROR_PROTOCOL_ERROR), + "RST_STREAM frame payload parsed") + + vlib_cli_output (vm, "http2_frame_write_rst_stream"); + + static void (*_http2_frame_write_rst_stream) (u32 increment, u32 stream_id, + u8 * *dst); + + _http2_frame_write_rst_stream = + vlib_get_plugin_symbol ("http_plugin.so", "http2_frame_write_rst_stream"); + + _http2_frame_write_rst_stream (HTTP2_ERROR_PROTOCOL_ERROR, 5, &buf); + HTTP_TEST ((vec_len (buf) == sizeof (rst_stream) && + !memcmp (buf, rst_stream, sizeof (rst_stream))), + "RST_STREAM frame written"); + vec_free (buf); + + vlib_cli_output (vm, "http2_frame_read_goaway"); + + static http2_error_t (*_http2_frame_read_goaway) ( + u32 * error_code, u32 * last_stream_id, u8 * payload, u32 payload_len); + + _http2_frame_read_goaway = + vlib_get_plugin_symbol ("http_plugin.so", "http2_frame_read_goaway"); + + u32 last_stream_id; + u8 goaway[] = { 0x0, 0x0, 0x8, 0x7, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x5, 0x0, 0x0, 0x0, 0x2 }; + + _http2_frame_header_read (goaway, &fh); + HTTP_TEST ((fh.flags == 0 && fh.type == HTTP2_FRAME_TYPE_GOAWAY && + fh.stream_id == 0 && fh.length == 8), + "frame identified as GOAWAY"); + + rv = _http2_frame_read_goaway (&error_code, &last_stream_id, + goaway + HTTP2_FRAME_HEADER_SIZE, fh.length); + HTTP_TEST ((rv == HTTP2_ERROR_NO_ERROR && + error_code == HTTP2_ERROR_INTERNAL_ERROR && last_stream_id == 5), + "GOAWAY frame payload parsed") + + vlib_cli_output (vm, "http2_frame_write_goaway"); + + static void (*_http2_frame_write_goaway) (http2_error_t error_code, + u32 last_stream_id, u8 * *dst); + + _http2_frame_write_goaway = + vlib_get_plugin_symbol ("http_plugin.so", "http2_frame_write_goaway"); + + _http2_frame_write_goaway (HTTP2_ERROR_INTERNAL_ERROR, 5, &buf); + HTTP_TEST ((vec_len (buf) == sizeof (goaway) && + !memcmp (buf, goaway, sizeof (goaway))), + "GOAWAY frame written"); + vec_free (buf); + + vlib_cli_output (vm, "http2_frame_read_headers"); + + static http2_error_t (*_http2_frame_read_headers) ( + u8 * *headers, u32 * headers_len, u8 * payload, u32 payload_len, u8 flags); + + _http2_frame_read_headers = + vlib_get_plugin_symbol ("http_plugin.so", "http2_frame_read_headers"); + + u8 *h; + u32 h_len; + u8 headers[] = { 0x0, 0x0, 0x28, 0x1, 0x5, 0x0, 0x0, 0x0, 0x3, 0x3f, + 0xe1, 0x1f, 0x82, 0x4, 0x88, 0x62, 0x7b, 0x69, 0x1d, 0x48, + 0x5d, 0x3e, 0x53, 0x86, 0x41, 0x88, 0xaa, 0x69, 0xd2, 0x9a, + 0xc4, 0xb9, 0xec, 0x9b, 0x7a, 0x88, 0x25, 0xb6, 0x50, 0xc3, + 0xab, 0xb8, 0x15, 0xc1, 0x53, 0x3, 0x2a, 0x2f, 0x2a }; + + _http2_frame_header_read (headers, &fh); + HTTP_TEST ((fh.flags == + (HTTP2_FRAME_FLAG_END_HEADERS | HTTP2_FRAME_FLAG_END_STREAM) && + fh.type == HTTP2_FRAME_TYPE_HEADERS && fh.stream_id == 3 && + fh.length == 40), + "frame identified as HEADERS"); + + rv = _http2_frame_read_headers ( + &h, &h_len, headers + HTTP2_FRAME_HEADER_SIZE, fh.length, fh.flags); + HTTP_TEST ((rv == HTTP2_ERROR_NO_ERROR && h_len == 40 && + *h == headers[HTTP2_FRAME_HEADER_SIZE]), + "HEADERS frame payload parsed") + + vlib_cli_output (vm, "http2_frame_write_headers_header"); + + static void (*_http2_frame_write_headers_header) ( + u32 headers_len, u32 stream_id, u8 flags, u8 * dst); + + _http2_frame_write_headers_header = vlib_get_plugin_symbol ( + "http_plugin.so", "http2_frame_write_headers_header"); + + u8 *p = http2_frame_header_alloc (&buf); + _http2_frame_write_headers_header ( + 40, 3, HTTP2_FRAME_FLAG_END_HEADERS | HTTP2_FRAME_FLAG_END_STREAM, p); + HTTP_TEST ((vec_len (buf) == HTTP2_FRAME_HEADER_SIZE && + !memcmp (buf, headers, HTTP2_FRAME_HEADER_SIZE)), + "HEADERS frame header written"); + vec_free (buf); + + vlib_cli_output (vm, "http2_frame_read_data"); + + static http2_error_t (*_http2_frame_read_data) ( + u8 * *data, u32 * data_len, u8 * payload, u32 payload_len, u8 flags); + + _http2_frame_read_data = + vlib_get_plugin_symbol ("http_plugin.so", "http2_frame_read_data"); + + u8 *d; + u32 d_len; + u8 data[] = { 0x0, 0x0, 0x9, 0x0, 0x1, 0x0, 0x0, 0x0, 0x3, + 0x6e, 0x6f, 0x74, 0x20, 0x66, 0x6f, 0x75, 0x6e, 0x64 }; + + _http2_frame_header_read (data, &fh); + HTTP_TEST ((fh.flags == HTTP2_FRAME_FLAG_END_STREAM && + fh.type == HTTP2_FRAME_TYPE_DATA && fh.stream_id == 3 && + fh.length == 9), + "frame identified as DATA"); + + rv = _http2_frame_read_data (&d, &d_len, data + HTTP2_FRAME_HEADER_SIZE, + fh.length, fh.flags); + HTTP_TEST ((rv == HTTP2_ERROR_NO_ERROR && d_len == 9 && + *d == data[HTTP2_FRAME_HEADER_SIZE]), + "DATA frame payload parsed") + + vlib_cli_output (vm, "http2_frame_write_data_header"); + + static void (*_http2_frame_write_data_header) ( + u32 headers_len, u32 stream_id, u8 flags, u8 * dst); + + _http2_frame_write_data_header = + vlib_get_plugin_symbol ("http_plugin.so", "http2_frame_write_data_header"); + + p = http2_frame_header_alloc (&buf); + _http2_frame_write_data_header (9, 3, HTTP2_FRAME_FLAG_END_STREAM, p); + HTTP_TEST ((vec_len (buf) == HTTP2_FRAME_HEADER_SIZE && + !memcmp (buf, data, HTTP2_FRAME_HEADER_SIZE)), + "DATA frame header written"); + vec_free (buf); + + return 0; +} + static clib_error_t * test_http_command_fn (vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd) @@ -550,6 +1317,10 @@ test_http_command_fn (vlib_main_t *vm, unformat_input_t *input, res = http_test_http_token_is_case (vm); else if (unformat (input, "header-table")) res = http_test_http_header_table (vm); + else if (unformat (input, "hpack")) + res = http_test_hpack (vm); + else if (unformat (input, "h2-frame")) + res = http_test_h2_frame (vm); else if (unformat (input, "all")) { if ((res = http_test_parse_authority (vm))) @@ -562,6 +1333,10 @@ test_http_command_fn (vlib_main_t *vm, unformat_input_t *input, goto done; if ((res = http_test_http_header_table (vm))) goto done; + if ((res = http_test_hpack (vm))) + goto done; + if ((res = http_test_h2_frame (vm))) + goto done; } else break; diff --git a/src/plugins/http_static/http_cache.c b/src/plugins/http_static/http_cache.c index 2e63e335d47..61f1f50ea3b 100644 --- a/src/plugins/http_static/http_cache.c +++ b/src/plugins/http_static/http_cache.c @@ -400,6 +400,14 @@ hss_cache_init (hss_cache_t *hc, uword cache_size, u8 debug_level) hc->first_index = hc->last_index = ~0; } +void +hss_cache_free (hss_cache_t *hc) +{ + hss_cache_clear (hc); + BV (clib_bihash_free) (&hc->name_to_data); + clib_spinlock_free (&hc->cache_lock); +} + /** \brief format a file cache entry */ static u8 * diff --git a/src/plugins/http_static/http_cache.h b/src/plugins/http_static/http_cache.h index 21f71a924d5..c1e363443ee 100644 --- a/src/plugins/http_static/http_cache.h +++ b/src/plugins/http_static/http_cache.h @@ -67,6 +67,7 @@ u32 hss_cache_add_and_attach (hss_cache_t *hc, u8 *path, u8 **data, void hss_cache_detach_entry (hss_cache_t *hc, u32 ce_index); u32 hss_cache_clear (hss_cache_t *hc); void hss_cache_init (hss_cache_t *hc, uword cache_size, u8 debug_level); +void hss_cache_free (hss_cache_t *hc); u8 *format_hss_cache (u8 *s, va_list *args); diff --git a/src/plugins/http_static/http_static.api b/src/plugins/http_static/http_static.api index bd0cebc45d2..5c1eaf7b9d2 100644 --- a/src/plugins/http_static/http_static.api +++ b/src/plugins/http_static/http_static.api @@ -3,41 +3,7 @@ This file defines static http server control-plane API messages */ -option version = "2.4.0"; - -/** \brief Configure and enable the static http server - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param fifo_size - size (in bytes) of the session FIFOs - @param cache_size_limit - size (in bytes) of the in-memory file data cache - @param max_age - how long a response is considered fresh (in seconds) - @param prealloc_fifos - number of preallocated fifos (usually 0) - @param private_segment_size - fifo segment size (usually 0) - @param www_root - html root path - @param uri - bind URI, defaults to "tcp://0.0.0.0/80" -*/ - -autoreply define http_static_enable_v2 { - option deprecated; - - /* Client identifier, set from api_main.my_client_index */ - u32 client_index; - - /* Arbitrary context, so client can match reply to request */ - u32 context; - /* Typical options */ - u32 fifo_size; - u32 cache_size_limit; - u32 max_age [default=600]; - /* Unusual options */ - u32 prealloc_fifos; - u32 private_segment_size; - - /* Root of the html path */ - string www_root[256]; - /* The bind URI */ - string uri[256]; -}; +option version = "2.5.0"; /** \brief Configure and enable the static http server @param client_index - opaque cookie to identify the sender @@ -45,6 +11,7 @@ autoreply define http_static_enable_v2 { @param fifo_size - size (in bytes) of the session FIFOs @param cache_size_limit - size (in bytes) of the in-memory file data cache @param max_age - how long a response is considered fresh (in seconds) + @param max_body_size - maximum size of a request body (in bytes) @param keepalive_timeout - timeout during which client connection will stay open (in seconds) @param prealloc_fifos - number of preallocated fifos (usually 0) @param private_segment_size - fifo segment size (usually 0) @@ -52,7 +19,7 @@ autoreply define http_static_enable_v2 { @param uri - bind URI, defaults to "tcp://0.0.0.0/80" */ -autoreply define http_static_enable_v3 { +autoreply define http_static_enable_v4 { option deprecated; /* Client identifier, set from api_main.my_client_index */ @@ -65,6 +32,7 @@ autoreply define http_static_enable_v3 { u32 cache_size_limit; u32 max_age [default=600]; u32 keepalive_timeout [default=60]; + u64 max_body_size [default=8000]; /* Unusual options */ u32 prealloc_fifos; u32 private_segment_size; @@ -76,12 +44,14 @@ autoreply define http_static_enable_v3 { }; /** \brief Configure and enable the static http server + @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param fifo_size - size (in bytes) of the session FIFOs @param cache_size_limit - size (in bytes) of the in-memory file data cache @param max_age - how long a response is considered fresh (in seconds) @param max_body_size - maximum size of a request body (in bytes) + @param rx_buff_thresh - maximum size of a large memory allocation (in bytes) @param keepalive_timeout - timeout during which client connection will stay open (in seconds) @param prealloc_fifos - number of preallocated fifos (usually 0) @param private_segment_size - fifo segment size (usually 0) @@ -89,7 +59,7 @@ autoreply define http_static_enable_v3 { @param uri - bind URI, defaults to "tcp://0.0.0.0/80" */ -autoreply define http_static_enable_v4 { +autoreply define http_static_enable_v5 { /* Client identifier, set from api_main.my_client_index */ u32 client_index; @@ -100,7 +70,8 @@ autoreply define http_static_enable_v4 { u32 cache_size_limit; u32 max_age [default=600]; u32 keepalive_timeout [default=60]; - u64 max_body_size [default=8000]; + u64 max_body_size [default=8192]; + u32 rx_buff_thresh [default=1048576]; /* Unusual options */ u32 prealloc_fifos; u32 private_segment_size; diff --git a/src/plugins/http_static/http_static.c b/src/plugins/http_static/http_static.c index 7a12f37b8d3..85b044fb860 100644 --- a/src/plugins/http_static/http_static.c +++ b/src/plugins/http_static/http_static.c @@ -67,22 +67,25 @@ hss_register_url_handler (hss_url_handler_fn fp, const char *url, static int hss_enable_api (u32 fifo_size, u32 cache_limit, u32 prealloc_fifos, u32 private_segment_size, u8 *www_root, u8 *uri, u32 max_age, - u32 keepalive_timeout, u64 max_body_size) + u32 keepalive_timeout, u64 max_body_size, u32 rx_buff_thresh) { hss_main_t *hsm = &hss_main; int rv; hsm->fifo_size = fifo_size; - hsm->cache_size = cache_limit; hsm->prealloc_fifos = prealloc_fifos; hsm->private_segment_size = private_segment_size; - hsm->www_root = format (0, "%s%c", www_root, 0); - hsm->uri = format (0, "%s%c", uri, 0); - hsm->max_age = max_age; - hsm->max_body_size = max_body_size; - hsm->keepalive_timeout = keepalive_timeout; - - if (vec_len (hsm->www_root) < 2) + if (uri && parse_uri ((char *) uri, &hsm->default_listener.sep)) + return VNET_API_ERROR_INVALID_VALUE; + hsm->default_listener.www_root = format (0, "%s%c", www_root, 0); + hsm->default_listener.cache_size = cache_limit; + hsm->default_listener.max_age = max_age; + hsm->default_listener.max_body_size = max_body_size; + hsm->default_listener.rx_buff_thresh = rx_buff_thresh; + hsm->default_listener.keepalive_timeout = keepalive_timeout; + hsm->have_default_listener = 1; + + if (vec_len (hsm->default_listener.www_root) < 2) return VNET_API_ERROR_INVALID_VALUE; if (hsm->app_index != ~0) @@ -99,8 +102,7 @@ hss_enable_api (u32 fifo_size, u32 cache_limit, u32 prealloc_fifos, case 0: break; default: - vec_free (hsm->www_root); - vec_free (hsm->uri); + vec_free (hsm->default_listener.www_root); return VNET_API_ERROR_INIT_FAILED; } return 0; @@ -108,49 +110,29 @@ hss_enable_api (u32 fifo_size, u32 cache_limit, u32 prealloc_fifos, /* API message handler */ static void -vl_api_http_static_enable_v2_t_handler (vl_api_http_static_enable_v2_t *mp) -{ - vl_api_http_static_enable_v2_reply_t *rmp; - hss_main_t *hsm = &hss_main; - int rv; - - mp->uri[ARRAY_LEN (mp->uri) - 1] = 0; - mp->www_root[ARRAY_LEN (mp->www_root) - 1] = 0; - - rv = hss_enable_api (ntohl (mp->fifo_size), ntohl (mp->cache_size_limit), - ntohl (mp->prealloc_fifos), - ntohl (mp->private_segment_size), mp->www_root, mp->uri, - ntohl (mp->max_age), HSS_DEFAULT_KEEPALIVE_TIMEOUT, - HSS_DEFAULT_MAX_BODY_SIZE); - - REPLY_MACRO (VL_API_HTTP_STATIC_ENABLE_V2_REPLY); -} - -/* API message handler */ -static void -vl_api_http_static_enable_v3_t_handler (vl_api_http_static_enable_v3_t *mp) +vl_api_http_static_enable_v4_t_handler (vl_api_http_static_enable_v4_t *mp) { - vl_api_http_static_enable_v3_reply_t *rmp; + vl_api_http_static_enable_v4_reply_t *rmp; hss_main_t *hsm = &hss_main; int rv; mp->uri[ARRAY_LEN (mp->uri) - 1] = 0; mp->www_root[ARRAY_LEN (mp->www_root) - 1] = 0; - rv = hss_enable_api (ntohl (mp->fifo_size), ntohl (mp->cache_size_limit), - ntohl (mp->prealloc_fifos), - ntohl (mp->private_segment_size), mp->www_root, mp->uri, - ntohl (mp->max_age), ntohl (mp->keepalive_timeout), - HSS_DEFAULT_MAX_BODY_SIZE); + rv = hss_enable_api ( + ntohl (mp->fifo_size), ntohl (mp->cache_size_limit), + ntohl (mp->prealloc_fifos), ntohl (mp->private_segment_size), mp->www_root, + mp->uri, ntohl (mp->max_age), ntohl (mp->keepalive_timeout), + ntohl (mp->max_body_size), HSS_DEFAULT_RX_BUFFER_THRESH); - REPLY_MACRO (VL_API_HTTP_STATIC_ENABLE_V3_REPLY); + REPLY_MACRO (VL_API_HTTP_STATIC_ENABLE_V4_REPLY); } /* API message handler */ static void -vl_api_http_static_enable_v4_t_handler (vl_api_http_static_enable_v4_t *mp) +vl_api_http_static_enable_v5_t_handler (vl_api_http_static_enable_v5_t *mp) { - vl_api_http_static_enable_v4_reply_t *rmp; + vl_api_http_static_enable_v5_reply_t *rmp; hss_main_t *hsm = &hss_main; int rv; @@ -161,9 +143,9 @@ vl_api_http_static_enable_v4_t_handler (vl_api_http_static_enable_v4_t *mp) ntohl (mp->prealloc_fifos), ntohl (mp->private_segment_size), mp->www_root, mp->uri, ntohl (mp->max_age), ntohl (mp->keepalive_timeout), - ntohl (mp->max_body_size)); + ntohl (mp->max_body_size), ntohl (mp->rx_buff_thresh)); - REPLY_MACRO (VL_API_HTTP_STATIC_ENABLE_V4_REPLY); + REPLY_MACRO (VL_API_HTTP_STATIC_ENABLE_V5_REPLY); } #include <http_static/http_static.api.c> diff --git a/src/plugins/http_static/http_static.h b/src/plugins/http_static/http_static.h index e158a32dbc9..2b5c065e287 100644 --- a/src/plugins/http_static/http_static.h +++ b/src/plugins/http_static/http_static.h @@ -25,6 +25,7 @@ #define HSS_DEFAULT_MAX_AGE 600 #define HSS_DEFAULT_MAX_BODY_SIZE 8192 +#define HSS_DEFAULT_RX_BUFFER_THRESH 1 << 20 #define HSS_DEFAULT_KEEPALIVE_TIMEOUT 60 /** @file http_static.h @@ -33,15 +34,20 @@ /** \brief Application session */ -typedef struct +typedef struct hss_session_ { CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); u32 session_index; /** rx thread index */ - u32 thread_index; + clib_thread_index_t thread_index; /** vpp session index, handle */ u32 vpp_session_index; session_handle_t vpp_session_handle; + /** Index of listener for which connection was accepted */ + u32 listener_index; + u8 *target_path; + u8 *target_query; + http_req_method_t rt; /** Fully-resolved file path */ u8 *path; /** Data to send */ @@ -58,6 +64,15 @@ typedef struct http_headers_ctx_t resp_headers; /** Response header buffer */ u8 *headers_buf; + /** RX buffer (POST body) */ + u8 *rx_buff; + /** Current RX buffer offset */ + u64 rx_buff_offset; + /** POST body left to receive */ + u64 left_recv; + /** threshold for switching to pointers */ + u64 use_ptr_thresh; + int (*read_body_handler) (struct hss_session_ *hs, session_t *ts); } hss_session_t; typedef struct hss_session_handle_ @@ -67,7 +82,7 @@ typedef struct hss_session_handle_ struct { u32 session_index; - u32 thread_index; + clib_thread_index_t thread_index; }; u64 as_u64; }; @@ -113,6 +128,36 @@ typedef hss_url_handler_rc_t (*hss_url_handler_fn) (hss_url_handler_args_t *); typedef void (*hss_register_url_fn) (hss_url_handler_fn, char *, int); typedef void (*hss_session_send_fn) (hss_url_handler_args_t *args); +typedef struct hss_listener_ +{ + /** Path to file hash table */ + hss_cache_t cache; + /** The bind session endpoint e.g., tcp://0.0.0.0:80 */ + session_endpoint_cfg_t sep; + /** root path to be served */ + u8 *www_root; + /** Threshold for switching to ptr data in http msgs */ + u64 use_ptr_thresh; + /** Max cache size before LRU occurs */ + u64 cache_size; + /** Maximum size of a request body (in bytes) **/ + u64 max_body_size; + /** Maximum size of a large memory allocation */ + u32 rx_buff_thresh; + /** Timeout during which client connection will stay open */ + u32 keepalive_timeout; + /** How long a response is considered fresh (in seconds) */ + u32 max_age; + /** Formatted max_age: "max-age=xyz" */ + u8 *max_age_formatted; + /** Enable the use of builtinurls */ + u8 enable_url_handlers; + /** Index in listener pool */ + u32 l_index; + /** Listener session handle */ + session_handle_t session_handle; +} hss_listener_t; + /** \brief Main data structure */ typedef struct @@ -120,15 +165,13 @@ typedef struct /** Per thread vector of session pools */ hss_session_t **sessions; + /** Listeners pool */ + hss_listener_t *listeners; + /** Hash tables for built-in GET and POST handlers */ uword *get_url_handlers; uword *post_url_handlers; - hss_cache_t cache; - - /** root path to be served */ - u8 *www_root; - /** Application index */ u32 app_index; @@ -144,6 +187,11 @@ typedef struct * Config */ + /** Listener configured with server, if any */ + hss_listener_t default_listener; + u8 have_default_listener; + u8 is_init; + /** Enable debug messages */ int debug_level; /** Number of preallocated fifos, usually 0 */ @@ -152,22 +200,6 @@ typedef struct u64 private_segment_size; /** Size of the allocated rx, tx fifos, roughly 8K or so */ u32 fifo_size; - /** The bind URI, defaults to tcp://0.0.0.0/80 */ - u8 *uri; - /** Threshold for switching to ptr data in http msgs */ - u64 use_ptr_thresh; - /** Enable the use of builtinurls */ - u8 enable_url_handlers; - /** Max cache size before LRU occurs */ - u64 cache_size; - /** How long a response is considered fresh (in seconds) */ - u32 max_age; - /** Maximum size of a request body (in bytes) **/ - u64 max_body_size; - /** Formatted max_age: "max-age=xyz" */ - u8 *max_age_formatted; - /** Timeout during which client connection will stay open */ - u32 keepalive_timeout; /** hash table of file extensions to mime types string indices */ uword *mime_type_indices_by_file_extensions; @@ -177,6 +209,16 @@ extern hss_main_t hss_main; int hss_create (vlib_main_t *vm); +static inline hss_listener_t * +hss_listener_get (u32 l_index) +{ + hss_main_t *hsm = &hss_main; + + if (pool_is_free_index (hsm->listeners, l_index)) + return 0; + return pool_elt_at_index (hsm->listeners, l_index); +} + /** * Register a GET or POST URL handler */ @@ -184,7 +226,8 @@ void hss_register_url_handler (hss_url_handler_fn fp, const char *url, http_req_method_t type); void hss_session_send_data (hss_url_handler_args_t *args); void hss_builtinurl_json_handlers_init (void); -hss_session_t *hss_session_get (u32 thread_index, u32 hs_index); +hss_session_t *hss_session_get (clib_thread_index_t thread_index, + u32 hs_index); #endif /* __included_http_static_h__ */ diff --git a/src/plugins/http_static/http_static_test.c b/src/plugins/http_static/http_static_test.c index 56487893220..aba7bc4ffbf 100644 --- a/src/plugins/http_static/http_static_test.c +++ b/src/plugins/http_static/http_static_test.c @@ -39,100 +39,10 @@ http_static_test_main_t http_static_test_main; #include <vlibapi/vat_helper_macros.h> static int -api_http_static_enable_v2 (vat_main_t *vam) -{ - unformat_input_t *line_input = vam->input; - vl_api_http_static_enable_v2_t *mp; - u64 tmp; - u8 *www_root = 0; - u8 *uri = 0; - u32 prealloc_fifos = 0; - u32 private_segment_size = 0; - u32 fifo_size = 8 << 10; - u32 cache_size_limit = 1 << 20; - u32 max_age = HSS_DEFAULT_MAX_AGE; - int ret; - - /* Parse args required to build the message */ - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (line_input, "www-root %s", &www_root)) - ; - else if (unformat (line_input, "prealloc-fifos %d", &prealloc_fifos)) - ; - else if (unformat (line_input, "private-segment-size %U", - unformat_memory_size, &tmp)) - { - if (tmp >= 0x100000000ULL) - { - errmsg ("private segment size %llu, too large", tmp); - return -99; - } - private_segment_size = (u32) tmp; - } - else if (unformat (line_input, "fifo-size %U", unformat_memory_size, - &tmp)) - { - if (tmp >= 0x100000000ULL) - { - errmsg ("fifo-size %llu, too large", tmp); - return -99; - } - fifo_size = (u32) tmp; - } - else if (unformat (line_input, "cache-size %U", unformat_memory_size, - &tmp)) - { - if (tmp < (128ULL << 10)) - { - errmsg ("cache-size must be at least 128kb"); - return -99; - } - cache_size_limit = (u32) tmp; - } - else if (unformat (line_input, "max-age %d", &max_age)) - ; - else if (unformat (line_input, "uri %s", &uri)) - ; - else - { - errmsg ("unknown input `%U'", format_unformat_error, line_input); - return -99; - } - } - - if (www_root == 0) - { - errmsg ("Must specify www-root"); - return -99; - } - - if (uri == 0) - uri = format (0, "tcp://0.0.0.0/80%c", 0); - - /* Construct the API message */ - M (HTTP_STATIC_ENABLE_V2, mp); - strncpy_s ((char *) mp->www_root, 256, (const char *) www_root, 256); - strncpy_s ((char *) mp->uri, 256, (const char *) uri, 256); - mp->fifo_size = ntohl (fifo_size); - mp->cache_size_limit = ntohl (cache_size_limit); - mp->prealloc_fifos = ntohl (prealloc_fifos); - mp->private_segment_size = ntohl (private_segment_size); - mp->max_age = ntohl (max_age); - - /* send it... */ - S (mp); - - /* Wait for a reply... */ - W (ret); - return ret; -} - -static int -api_http_static_enable_v3 (vat_main_t *vam) +api_http_static_enable_v4 (vat_main_t *vam) { unformat_input_t *line_input = vam->input; - vl_api_http_static_enable_v3_t *mp; + vl_api_http_static_enable_v4_t *mp; u64 tmp; u8 *www_root = 0; u8 *uri = 0; @@ -142,6 +52,7 @@ api_http_static_enable_v3 (vat_main_t *vam) u32 cache_size_limit = 1 << 20; u32 max_age = HSS_DEFAULT_MAX_AGE; u32 keepalive_timeout = HSS_DEFAULT_KEEPALIVE_TIMEOUT; + u64 max_body_size = HSS_DEFAULT_MAX_BODY_SIZE; int ret; /* Parse args required to build the message */ @@ -188,6 +99,8 @@ api_http_static_enable_v3 (vat_main_t *vam) ; else if (unformat (line_input, "uri %s", &uri)) ; + else if (unformat (line_input, "max-body-size %llu", &max_body_size)) + ; else { errmsg ("unknown input `%U'", format_unformat_error, line_input); @@ -205,7 +118,7 @@ api_http_static_enable_v3 (vat_main_t *vam) uri = format (0, "tcp://0.0.0.0/80%c", 0); /* Construct the API message */ - M (HTTP_STATIC_ENABLE_V3, mp); + M (HTTP_STATIC_ENABLE_V4, mp); strncpy_s ((char *) mp->www_root, 256, (const char *) www_root, 256); strncpy_s ((char *) mp->uri, 256, (const char *) uri, 256); mp->fifo_size = ntohl (fifo_size); @@ -214,6 +127,8 @@ api_http_static_enable_v3 (vat_main_t *vam) mp->private_segment_size = ntohl (private_segment_size); mp->max_age = ntohl (max_age); mp->keepalive_timeout = ntohl (keepalive_timeout); + mp->max_body_size = ntohl (max_body_size); + /* send it... */ S (mp); @@ -223,10 +138,10 @@ api_http_static_enable_v3 (vat_main_t *vam) } static int -api_http_static_enable_v4 (vat_main_t *vam) +api_http_static_enable_v5 (vat_main_t *vam) { unformat_input_t *line_input = vam->input; - vl_api_http_static_enable_v4_t *mp; + vl_api_http_static_enable_v5_t *mp; u64 tmp; u8 *www_root = 0; u8 *uri = 0; @@ -237,6 +152,7 @@ api_http_static_enable_v4 (vat_main_t *vam) u32 max_age = HSS_DEFAULT_MAX_AGE; u32 keepalive_timeout = HSS_DEFAULT_KEEPALIVE_TIMEOUT; u64 max_body_size = HSS_DEFAULT_MAX_BODY_SIZE; + u32 rx_buff_thresh = HSS_DEFAULT_RX_BUFFER_THRESH; int ret; /* Parse args required to build the message */ @@ -283,7 +199,11 @@ api_http_static_enable_v4 (vat_main_t *vam) ; else if (unformat (line_input, "uri %s", &uri)) ; - else if (unformat (line_input, "max-body-size %llu", &max_body_size)) + else if (unformat (line_input, "max-body-size %U", unformat_memory_size, + &max_body_size)) + ; + else if (unformat (line_input, "rx-buff-thresh %U", unformat_memory_size, + &rx_buff_thresh)) ; else { diff --git a/src/plugins/http_static/static_server.c b/src/plugins/http_static/static_server.c index 074416873e3..692cb53abe3 100644 --- a/src/plugins/http_static/static_server.c +++ b/src/plugins/http_static/static_server.c @@ -14,12 +14,14 @@ */ #include <http_static/http_static.h> +#include <vnet/session/application.h> #include <sys/types.h> #include <sys/stat.h> #include <unistd.h> #include <http/http_content_types.h> +#include <http/http_status_codes.h> /** @file static_server.c * Static http server, sufficient to serve .html / .css / .js content. @@ -27,11 +29,59 @@ /*? %%clicmd:group_label Static HTTP Server %% ?*/ #define HSS_FIFO_THRESH (16 << 10) - +#define HSS_HEADER_BUF_MAX_SIZE 16192 hss_main_t hss_main; +static int file_handler_discard_body (hss_session_t *hs, session_t *ts); +static int url_handler_read_body (hss_session_t *hs, session_t *ts); + +static int +hss_add_header (hss_session_t *hs, http_header_name_t name, const char *value, + uword value_len) +{ + u32 needed_size = 0; + while (http_add_header (&hs->resp_headers, name, value, value_len) == -1) + { + if (needed_size) + { + http_truncate_headers_list (&hs->resp_headers); + hs->data_len = 0; + return -1; + } + else + needed_size = hs->resp_headers.tail_offset + + sizeof (http_app_header_t) + value_len; + if (needed_size < HSS_HEADER_BUF_MAX_SIZE) + { + vec_resize (hs->headers_buf, sizeof (http_app_header_t) + value_len); + hs->resp_headers.len = needed_size; + hs->resp_headers.buf = hs->headers_buf; + } + else + { + http_truncate_headers_list (&hs->resp_headers); + hs->data_len = 0; + return -1; + } + } + return 0; +} + +static_always_inline void +hss_confirm_data_read (hss_session_t *hs, u32 n_last_deq) +{ + session_t *ts; + + ts = session_get (hs->vpp_session_index, hs->thread_index); + if (svm_fifo_needs_deq_ntf (ts->rx_fifo, n_last_deq)) + { + svm_fifo_clear_deq_ntf (ts->rx_fifo); + session_program_transport_io_evt (ts->handle, SESSION_IO_EVT_RX); + } +} + static hss_session_t * -hss_session_alloc (u32 thread_index) +hss_session_alloc (clib_thread_index_t thread_index) { hss_main_t *hsm = &hss_main; hss_session_t *hs; @@ -46,7 +96,7 @@ hss_session_alloc (u32 thread_index) } __clib_export hss_session_t * -hss_session_get (u32 thread_index, u32 hs_index) +hss_session_get (clib_thread_index_t thread_index, u32 hs_index) { hss_main_t *hsm = &hss_main; if (pool_is_free_index (hsm->sessions[thread_index], hs_index)) @@ -85,6 +135,7 @@ hss_session_disconnect_transport (hss_session_t *hs) static void start_send_data (hss_session_t *hs, http_status_code_t status) { + hss_main_t *hsm = &hss_main; http_msg_t msg; session_t *ts; u32 n_enq; @@ -93,6 +144,9 @@ start_send_data (hss_session_t *hs, http_status_code_t status) ts = session_get (hs->vpp_session_index, hs->thread_index); + if (hsm->debug_level > 0) + clib_warning ("status code: %U", format_http_status_code, status); + msg.type = HTTP_MSG_REPLY; msg.code = status; msg.data.body_len = hs->data_len; @@ -100,7 +154,7 @@ start_send_data (hss_session_t *hs, http_status_code_t status) msg.data.headers_len = hs->resp_headers.tail_offset; msg.data.len = msg.data.body_len + msg.data.headers_len; - if (msg.data.len > hss_main.use_ptr_thresh) + if (msg.data.len > hs->use_ptr_thresh) { msg.data.type = HTTP_MSG_DATA_PTR; rv = svm_fifo_enqueue (ts->tx_fifo, sizeof (msg), (u8 *) &msg); @@ -175,8 +229,9 @@ hss_session_send_data (hss_url_handler_args_t *args) /* Set content type only if we have some response data */ if (hs->data_len) - http_add_header (&hs->resp_headers, HTTP_HEADER_CONTENT_TYPE, - http_content_type_token (args->ct)); + if (hss_add_header (hs, HTTP_HEADER_CONTENT_TYPE, + http_content_type_token (args->ct))) + args->sc = HTTP_STATUS_INTERNAL_ERROR; start_send_data (hs, args->sc); } @@ -247,15 +302,20 @@ content_type_from_request (u8 *request) } static int -try_url_handler (hss_main_t *hsm, hss_session_t *hs, http_req_method_t rt, - u8 *target_path, u8 *target_query, u8 *data) +try_url_handler (hss_session_t *hs) { + hss_main_t *hsm = &hss_main; http_status_code_t sc = HTTP_STATUS_OK; hss_url_handler_args_t args = {}; uword *p, *url_table; + session_t *ts; + u32 max_deq; + u8 *target_path; int rv; - if (!hsm->enable_url_handlers || !target_path) + target_path = hs->target_path; + + if (!target_path) return -1; /* zero-length? try "index.html" */ @@ -266,28 +326,69 @@ try_url_handler (hss_main_t *hsm, hss_session_t *hs, http_req_method_t rt, /* Look for built-in GET / POST handlers */ url_table = - (rt == HTTP_REQ_GET) ? hsm->get_url_handlers : hsm->post_url_handlers; + (hs->rt == HTTP_REQ_GET) ? hsm->get_url_handlers : hsm->post_url_handlers; p = hash_get_mem (url_table, target_path); if (!p) return -1; + hs->rx_buff = 0; + + /* Read request body */ + if (hs->left_recv) + { + hss_listener_t *l = hss_listener_get (hs->listener_index); + if (hs->left_recv > l->rx_buff_thresh) + { + /* TODO: large body (not buffered in memory) */ + clib_warning ("data length %u above threshold %u", hs->left_recv, + l->rx_buff_thresh); + hs->left_recv = 0; + start_send_data (hs, HTTP_STATUS_INTERNAL_ERROR); + hss_session_disconnect_transport (hs); + return 0; + } + hs->rx_buff_offset = 0; + vec_validate (hs->rx_buff, hs->left_recv - 1); + ts = session_get (hs->vpp_session_index, hs->thread_index); + max_deq = svm_fifo_max_dequeue (ts->rx_fifo); + if (max_deq < hs->left_recv) + { + hs->read_body_handler = url_handler_read_body; + if (max_deq == 0) + return 0; + rv = svm_fifo_dequeue (ts->rx_fifo, max_deq, hs->rx_buff); + ASSERT (rv == max_deq); + hs->rx_buff_offset = max_deq; + hs->left_recv -= max_deq; + hss_confirm_data_read (hs, max_deq); + return 0; + } + rv = svm_fifo_dequeue (ts->rx_fifo, hs->left_recv, + hs->rx_buff + hs->rx_buff_offset); + ASSERT (rv == hs->left_recv); + hss_confirm_data_read (hs, hs->left_recv); + hs->left_recv = 0; + } + hs->path = 0; hs->data_offset = 0; hs->cache_pool_index = ~0; if (hsm->debug_level > 0) - clib_warning ("%s '%s'", (rt == HTTP_REQ_GET) ? "GET" : "POST", + clib_warning ("%s '%s'", (hs->rt == HTTP_REQ_GET) ? "GET" : "POST", target_path); - args.req_type = rt; - args.query = target_query; - args.req_data = data; + args.req_type = hs->rt; + args.query = hs->target_query; + args.req_data = hs->rx_buff; args.sh.thread_index = hs->thread_index; args.sh.session_index = hs->session_index; rv = ((hss_url_handler_fn) p[0]) (&args); + vec_free (hs->rx_buff); + /* Wait for data from handler */ if (rv == HSS_URL_HANDLER_ASYNC) return 0; @@ -295,7 +396,7 @@ try_url_handler (hss_main_t *hsm, hss_session_t *hs, http_req_method_t rt, if (rv == HSS_URL_HANDLER_ERROR) { clib_warning ("builtin handler %llx hit on %s '%s' but failed!", p[0], - (rt == HTTP_REQ_GET) ? "GET" : "POST", target_path); + (hs->rt == HTTP_REQ_GET) ? "GET" : "POST", target_path); sc = HTTP_STATUS_BAD_GATEWAY; } @@ -305,8 +406,9 @@ try_url_handler (hss_main_t *hsm, hss_session_t *hs, http_req_method_t rt, /* Set content type only if we have some response data */ if (hs->data_len) - http_add_header (&hs->resp_headers, HTTP_HEADER_CONTENT_TYPE, - http_content_type_token (args.ct)); + if (hss_add_header (hs, HTTP_HEADER_CONTENT_TYPE, + http_content_type_token (args.ct))) + sc = HTTP_STATUS_INTERNAL_ERROR; start_send_data (hs, sc); @@ -329,8 +431,9 @@ file_path_is_valid (u8 *path) } static u32 -try_index_file (hss_main_t *hsm, hss_session_t *hs, u8 *path) +try_index_file (hss_listener_t *l, hss_session_t *hs, u8 *path) { + hss_main_t *hsm = &hss_main; u8 *port_str = 0, *redirect; transport_endpoint_t endpt; transport_proto_t proto; @@ -358,7 +461,7 @@ try_index_file (hss_main_t *hsm, hss_session_t *hs, u8 *path) /* * We found an index.html file, build a redirect */ - vec_delete (path, vec_len (hsm->www_root) - 1, 0); + vec_delete (path, vec_len (l->www_root) - 1, 0); ts = session_get (hs->vpp_session_index, hs->thread_index); session_get_endpoint (ts, &endpt, 1 /* is_local */); @@ -383,8 +486,10 @@ try_index_file (hss_main_t *hsm, hss_session_t *hs, u8 *path) vec_free (port_str); - http_add_header (&hs->resp_headers, HTTP_HEADER_LOCATION, - (const char *) redirect, vec_len (redirect)); + if (hss_add_header (hs, HTTP_HEADER_LOCATION, (const char *) redirect, + vec_len (redirect))) + return HTTP_STATUS_INTERNAL_ERROR; + vec_free (redirect); hs->data_len = 0; hs->free_data = 1; @@ -393,39 +498,61 @@ try_index_file (hss_main_t *hsm, hss_session_t *hs, u8 *path) } static int -try_file_handler (hss_main_t *hsm, hss_session_t *hs, http_req_method_t rt, - u8 *target) +try_file_handler (hss_session_t *hs) { + hss_main_t *hsm = &hss_main; http_status_code_t sc = HTTP_STATUS_OK; u8 *path, *sanitized_path; - u32 ce_index; + u32 ce_index, max_dequeue; http_content_type_t type; u8 *last_modified; + hss_listener_t *l; + session_t *ts; + + l = hss_listener_get (hs->listener_index); /* Feature not enabled */ - if (!hsm->www_root) + if (!l->www_root) return -1; - /* Remove dot segments to prevent path traversal */ - sanitized_path = http_path_remove_dot_segments (target); + /* Discard request body */ + if (hs->left_recv) + { + ts = session_get (hs->vpp_session_index, hs->thread_index); + max_dequeue = svm_fifo_max_dequeue (ts->rx_fifo); + if (max_dequeue < hs->left_recv) + { + svm_fifo_dequeue_drop (ts->rx_fifo, max_dequeue); + hs->left_recv -= max_dequeue; + hs->read_body_handler = file_handler_discard_body; + hss_confirm_data_read (hs, max_dequeue); + return 0; + } + svm_fifo_dequeue_drop (ts->rx_fifo, hs->left_recv); + hss_confirm_data_read (hs, hs->left_recv); + hs->left_recv = 0; + } + + /* Sanitize received path */ + sanitized_path = http_path_sanitize (hs->target_path); /* * Construct the file to open */ - if (!target) - path = format (0, "%s%c", hsm->www_root, 0); + if (!sanitized_path) + path = format (0, "%s%c", l->www_root, 0); else - path = format (0, "%s/%s%c", hsm->www_root, sanitized_path, 0); + path = format (0, "%s/%s%c", l->www_root, sanitized_path, 0); if (hsm->debug_level > 0) - clib_warning ("%s '%s'", (rt == HTTP_REQ_GET) ? "GET" : "POST", path); + clib_warning ("%s '%s'", (hs->rt == HTTP_REQ_GET) ? "GET" : "POST", path); if (hs->data && hs->free_data) vec_free (hs->data); hs->data_offset = 0; - ce_index = hss_cache_lookup_and_attach (&hsm->cache, path, &hs->data, + ce_index = hss_cache_lookup_and_attach (&l->cache, path, &hs->data, &hs->data_len, &last_modified); if (ce_index == ~0) { @@ -442,10 +569,10 @@ try_file_handler (hss_main_t *hsm, hss_session_t *hs, http_req_method_t rt, sc = HTTP_STATUS_NOT_FOUND; goto done; } - sc = try_index_file (hsm, hs, path); + sc = try_index_file (l, hs, path); goto done; } - ce_index = hss_cache_add_and_attach (&hsm->cache, path, &hs->data, + ce_index = hss_cache_add_and_attach (&l->cache, path, &hs->data, &hs->data_len, &last_modified); if (ce_index == ~0) { @@ -462,14 +589,17 @@ try_file_handler (hss_main_t *hsm, hss_session_t *hs, http_req_method_t rt, * Cache-Control max-age * Last-Modified */ - type = content_type_from_request (target); - http_add_header (&hs->resp_headers, HTTP_HEADER_CONTENT_TYPE, - http_content_type_token (type)); - http_add_header (&hs->resp_headers, HTTP_HEADER_CACHE_CONTROL, - (const char *) hsm->max_age_formatted, - vec_len (hsm->max_age_formatted)); - http_add_header (&hs->resp_headers, HTTP_HEADER_LAST_MODIFIED, - (const char *) last_modified, vec_len (last_modified)); + type = content_type_from_request (sanitized_path); + if (hss_add_header (hs, HTTP_HEADER_CONTENT_TYPE, + http_content_type_token (type)) || + hss_add_header (hs, HTTP_HEADER_CACHE_CONTROL, + (const char *) l->max_age_formatted, + vec_len (l->max_age_formatted)) || + hss_add_header (hs, HTTP_HEADER_LAST_MODIFIED, + (const char *) last_modified, vec_len (last_modified))) + { + sc = HTTP_STATUS_INTERNAL_ERROR; + } done: vec_free (sanitized_path); @@ -481,15 +611,23 @@ done: } static void -handle_request (hss_session_t *hs, http_req_method_t rt, u8 *target_path, - u8 *target_query, u8 *data) +handle_request (hss_session_t *hs) { - hss_main_t *hsm = &hss_main; + hss_listener_t *l; + + l = hss_listener_get (hs->listener_index); + + if (hs->left_recv > l->max_body_size) + { + start_send_data (hs, HTTP_STATUS_CONTENT_TOO_LARGE); + hss_session_disconnect_transport (hs); + return; + } - if (!try_url_handler (hsm, hs, rt, target_path, target_query, data)) + if (l->enable_url_handlers && !try_url_handler (hs)) return; - if (!try_file_handler (hsm, hs, rt, target_path)) + if (!try_file_handler (hs)) return; /* Handler did not find anything return 404 */ @@ -498,18 +636,60 @@ handle_request (hss_session_t *hs, http_req_method_t rt, u8 *target_path, } static int +file_handler_discard_body (hss_session_t *hs, session_t *ts) +{ + u32 max_dequeue, to_discard; + + max_dequeue = svm_fifo_max_dequeue (ts->rx_fifo); + to_discard = clib_min (max_dequeue, hs->left_recv); + svm_fifo_dequeue_drop (ts->rx_fifo, to_discard); + hs->left_recv -= to_discard; + hss_confirm_data_read (hs, to_discard); + if (hs->left_recv == 0) + return try_file_handler (hs); + return 0; +} + +static int +url_handler_read_body (hss_session_t *hs, session_t *ts) +{ + u32 max_dequeue, to_read; + int rv; + + max_dequeue = svm_fifo_max_dequeue (ts->rx_fifo); + to_read = clib_min (max_dequeue, hs->left_recv); + rv = + svm_fifo_dequeue (ts->rx_fifo, to_read, hs->rx_buff + hs->rx_buff_offset); + ASSERT (rv == to_read); + hs->rx_buff_offset += to_read; + hs->left_recv -= to_read; + hss_confirm_data_read (hs, to_read); + if (hs->left_recv == 0) + return try_url_handler (hs); + return 0; +} + +static int hss_ts_rx_callback (session_t *ts) { - hss_main_t *hsm = &hss_main; hss_session_t *hs; - u8 *target_path = 0, *target_query = 0, *data = 0; http_msg_t msg; int rv; hs = hss_session_get (ts->thread_index, ts->opaque); + if (hs->left_recv != 0) + { + ASSERT (hs->read_body_handler); + return hs->read_body_handler (hs, ts); + } + if (hs->free_data) vec_free (hs->data); + hs->data = 0; + hs->data_len = 0; + vec_free (hs->target_path); + vec_free (hs->target_query); http_init_headers_ctx (&hs->resp_headers, hs->headers_buf, vec_len (hs->headers_buf)); @@ -520,72 +700,59 @@ hss_ts_rx_callback (session_t *ts) if (msg.type != HTTP_MSG_REQUEST || (msg.method_type != HTTP_REQ_GET && msg.method_type != HTTP_REQ_POST)) { - http_add_header (&hs->resp_headers, HTTP_HEADER_ALLOW, - http_token_lit ("GET, POST")); - start_send_data (hs, HTTP_STATUS_METHOD_NOT_ALLOWED); + if (hss_add_header (hs, HTTP_HEADER_ALLOW, http_token_lit ("GET, POST"))) + start_send_data (hs, HTTP_STATUS_INTERNAL_ERROR); + else + start_send_data (hs, HTTP_STATUS_METHOD_NOT_ALLOWED); goto err_done; } + hs->rt = msg.method_type; + /* Read target path */ if (msg.data.target_path_len) { - vec_validate (target_path, msg.data.target_path_len - 1); + vec_validate (hs->target_path, msg.data.target_path_len - 1); rv = svm_fifo_peek (ts->rx_fifo, msg.data.target_path_offset, - msg.data.target_path_len, target_path); + msg.data.target_path_len, hs->target_path); ASSERT (rv == msg.data.target_path_len); - if (http_validate_abs_path_syntax (target_path, 0)) + if (http_validate_abs_path_syntax (hs->target_path, 0)) { start_send_data (hs, HTTP_STATUS_BAD_REQUEST); goto err_done; } /* Target path must be a proper C-string in addition to a vector */ - vec_add1 (target_path, 0); + vec_add1 (hs->target_path, 0); } /* Read target query */ if (msg.data.target_query_len) { - vec_validate (target_query, msg.data.target_query_len - 1); + vec_validate (hs->target_query, msg.data.target_query_len - 1); rv = svm_fifo_peek (ts->rx_fifo, msg.data.target_query_offset, - msg.data.target_query_len, target_query); + msg.data.target_query_len, hs->target_query); ASSERT (rv == msg.data.target_query_len); - if (http_validate_query_syntax (target_query, 0)) + if (http_validate_query_syntax (hs->target_query, 0)) { start_send_data (hs, HTTP_STATUS_BAD_REQUEST); goto err_done; } } - /* Read request body for POST requests */ if (msg.data.body_len && msg.method_type == HTTP_REQ_POST) { - if (msg.data.body_len > hsm->max_body_size) - { - start_send_data (hs, HTTP_STATUS_CONTENT_TOO_LARGE); - goto err_done; - } - if (svm_fifo_max_dequeue (ts->rx_fifo) - msg.data.body_offset < - msg.data.body_len) - { - start_send_data (hs, HTTP_STATUS_INTERNAL_ERROR); - goto err_done; - } - vec_validate (data, msg.data.body_len - 1); - rv = svm_fifo_peek (ts->rx_fifo, msg.data.body_offset, msg.data.body_len, - data); - ASSERT (rv == msg.data.body_len); + hs->left_recv = msg.data.body_len; + /* drop everything up to body */ + svm_fifo_dequeue_drop (ts->rx_fifo, msg.data.body_offset); } /* Find and send data */ - handle_request (hs, msg.method_type, target_path, target_query, data); + handle_request (hs); goto done; err_done: hss_session_disconnect_transport (hs); done: - vec_free (target_path); - vec_free (target_query); - vec_free (data); svm_fifo_dequeue_drop (ts->rx_fifo, msg.data.len); return 0; } @@ -631,6 +798,7 @@ static int hss_ts_accept_callback (session_t *ts) { hss_session_t *hs; + session_t *ls; u32 thresh; hs = hss_session_alloc (ts->thread_index); @@ -638,6 +806,11 @@ hss_ts_accept_callback (session_t *ts) hs->vpp_session_index = ts->session_index; hs->vpp_session_handle = session_handle (ts); + /* Link to listener context */ + ls = listen_session_get_from_handle (ts->listener_handle); + hs->listener_index = ls->opaque; + hs->use_ptr_thresh = hss_listener_get (hs->listener_index)->use_ptr_thresh; + /* The application sets a threshold for it's fifo to get notified when * additional data can be enqueued. We want to keep the TX fifo reasonably * full, however avoid entering a state where the @@ -693,7 +866,6 @@ hss_add_segment_callback (u32 client_index, u64 segment_handle) static void hss_ts_cleanup (session_t *s, session_cleanup_ntf_t ntf) { - hss_main_t *hsm = &hss_main; hss_session_t *hs; if (ntf == SESSION_CLEANUP_TRANSPORT) @@ -705,7 +877,9 @@ hss_ts_cleanup (session_t *s, session_cleanup_ntf_t ntf) if (hs->cache_pool_index != ~0) { - hss_cache_detach_entry (&hsm->cache, hs->cache_pool_index); + hss_listener_t *l = hss_listener_get (hs->listener_index); + if (l) + hss_cache_detach_entry (&l->cache, hs->cache_pool_index); hs->cache_pool_index = ~0; } @@ -716,6 +890,8 @@ hss_ts_cleanup (session_t *s, session_cleanup_ntf_t ntf) hs->free_data = 0; vec_free (hs->headers_buf); vec_free (hs->path); + vec_free (hs->target_path); + vec_free (hs->target_query); hss_session_free (hs); } @@ -788,30 +964,22 @@ hss_transport_needs_crypto (transport_proto_t proto) } static int -hss_listen (void) +hss_listen (hss_listener_t *l, session_handle_t *lh) { hss_main_t *hsm = &hss_main; - session_endpoint_cfg_t sep = SESSION_ENDPOINT_CFG_NULL; vnet_listen_args_t _a, *a = &_a; - char *uri = "tcp://0.0.0.0/80"; u8 need_crypto; transport_endpt_ext_cfg_t *ext_cfg; int rv; - transport_endpt_cfg_http_t http_cfg = { hsm->keepalive_timeout, 0 }; + transport_endpt_cfg_http_t http_cfg = { l->keepalive_timeout, 0 }; clib_memset (a, 0, sizeof (*a)); a->app_index = hsm->app_index; - if (hsm->uri) - uri = (char *) hsm->uri; - - if (parse_uri (uri, &sep)) - return -1; - - need_crypto = hss_transport_needs_crypto (sep.transport_proto); + need_crypto = hss_transport_needs_crypto (l->sep.transport_proto); - sep.transport_proto = TRANSPORT_PROTO_HTTP; - clib_memcpy (&a->sep_ext, &sep, sizeof (sep)); + l->sep.transport_proto = TRANSPORT_PROTO_HTTP; + clib_memcpy (&a->sep_ext, &l->sep, sizeof (l->sep)); ext_cfg = session_endpoint_add_ext_cfg ( &a->sep_ext, TRANSPORT_ENDPT_EXT_CFG_HTTP, sizeof (http_cfg)); @@ -825,7 +993,8 @@ hss_listen (void) ext_cfg->crypto.ckpair_index = hsm->ckpair_index; } - rv = vnet_listen (a); + if (!(rv = vnet_listen (a))) + *lh = a->handle; session_endpoint_free_ext_cfgs (&a->sep_ext); @@ -835,13 +1004,75 @@ hss_listen (void) static void hss_url_handlers_init (hss_main_t *hsm) { - if (!hsm->get_url_handlers) + if (hsm->get_url_handlers) + return; + + hsm->get_url_handlers = hash_create_string (0, sizeof (uword)); + hsm->post_url_handlers = hash_create_string (0, sizeof (uword)); + hss_builtinurl_json_handlers_init (); +} + +int +hss_listener_add (hss_listener_t *l_cfg) +{ + hss_main_t *hsm = &hss_main; + session_handle_t lh; + app_listener_t *al; + hss_listener_t *l; + session_t *ls; + + if (hss_listen (l_cfg, &lh)) { - hsm->get_url_handlers = hash_create_string (0, sizeof (uword)); - hsm->post_url_handlers = hash_create_string (0, sizeof (uword)); + clib_warning ("failed to start listening"); + return -1; } - hss_builtinurl_json_handlers_init (); + pool_get (hsm->listeners, l); + *l = *l_cfg; + l->l_index = l - hsm->listeners; + l->session_handle = lh; + + al = app_listener_get_w_handle (lh); + ls = app_listener_get_session (al); + ls->opaque = l->l_index; + + if (l->www_root) + hss_cache_init (&l->cache, l->cache_size, hsm->debug_level); + if (l->enable_url_handlers) + hss_url_handlers_init (hsm); + + l->max_age_formatted = format (0, "max-age=%d", l->max_age); + + return 0; +} + +int +hss_listener_del (hss_listener_t *l_cfg) +{ + hss_main_t *hsm = &hss_main; + hss_listener_t *l; + u8 found = 0; + + pool_foreach (l, hsm->listeners) + { + if (clib_memcmp (&l_cfg->sep, &l->sep, sizeof (l_cfg->sep)) == 0) + { + found = 1; + break; + } + } + + if (!found) + return -1; + + vnet_unlisten_args_t args = { .handle = l->session_handle, hsm->app_index }; + + vec_free (l->www_root); + vec_free (l->max_age_formatted); + hss_cache_free (&l->cache); + pool_put (hsm->listeners, l); + + return vnet_unlisten (&args); } int @@ -854,24 +1085,25 @@ hss_create (vlib_main_t *vm) num_threads = 1 /* main thread */ + vtm->n_threads; vec_validate (hsm->sessions, num_threads - 1); + /* Make sure session layer is enabled */ + session_enable_disable_args_t args = { .is_en = 1, + .rt_engine_type = + RT_BACKEND_ENGINE_RULE_TABLE }; + vnet_session_enable_disable (vm, &args); + if (hss_attach ()) { clib_warning ("failed to attach server"); return -1; } - if (hss_listen ()) + + if (hsm->have_default_listener && hss_listener_add (&hsm->default_listener)) { clib_warning ("failed to start listening"); return -1; } - if (hsm->www_root) - hss_cache_init (&hsm->cache, hsm->cache_size, hsm->debug_level); - - if (hsm->enable_url_handlers) - hss_url_handlers_init (hsm); - - hsm->max_age_formatted = format (0, "max-age=%d", hsm->max_age); + hsm->is_init = 1; return 0; } @@ -882,20 +1114,24 @@ hss_create_command_fn (vlib_main_t *vm, unformat_input_t *input, { unformat_input_t _line_input, *line_input = &_line_input; hss_main_t *hsm = &hss_main; + hss_listener_t *l = &hsm->default_listener; clib_error_t *error = 0; + char *uri = 0; u64 seg_size; int rv; if (hsm->app_index != (u32) ~0) - return clib_error_return (0, "http server already running..."); + return clib_error_return (0, "http static server already initialized..."); hsm->prealloc_fifos = 0; hsm->private_segment_size = 0; hsm->fifo_size = 0; - hsm->cache_size = 10 << 20; - hsm->max_age = HSS_DEFAULT_MAX_AGE; - hsm->max_body_size = HSS_DEFAULT_MAX_BODY_SIZE; - hsm->keepalive_timeout = HSS_DEFAULT_KEEPALIVE_TIMEOUT; + + l->cache_size = 10 << 20; + l->max_age = HSS_DEFAULT_MAX_AGE; + l->max_body_size = HSS_DEFAULT_MAX_BODY_SIZE; + l->rx_buff_thresh = HSS_DEFAULT_RX_BUFFER_THRESH; + l->keepalive_timeout = HSS_DEFAULT_KEEPALIVE_TIMEOUT; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -903,37 +1139,43 @@ hss_create_command_fn (vlib_main_t *vm, unformat_input_t *input, while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { - if (unformat (line_input, "www-root %s", &hsm->www_root)) - ; - else - if (unformat (line_input, "prealloc-fifos %d", &hsm->prealloc_fifos)) - ; - else if (unformat (line_input, "private-segment-size %U", - unformat_memory_size, &seg_size)) + /* Server config */ + if (unformat (line_input, "private-segment-size %U", + unformat_memory_size, &seg_size)) hsm->private_segment_size = seg_size; - else if (unformat (line_input, "fifo-size %d", &hsm->fifo_size)) - hsm->fifo_size <<= 10; - else if (unformat (line_input, "cache-size %U", unformat_memory_size, - &hsm->cache_size)) + else if (unformat (line_input, "fifo-size %U", unformat_memory_size, + &hsm->fifo_size)) ; - else if (unformat (line_input, "uri %s", &hsm->uri)) + else if (unformat (line_input, "prealloc-fifos %d", + &hsm->prealloc_fifos)) ; else if (unformat (line_input, "debug %d", &hsm->debug_level)) ; - else if (unformat (line_input, "keepalive-timeout %d", - &hsm->keepalive_timeout)) - ; else if (unformat (line_input, "debug")) hsm->debug_level = 1; - else if (unformat (line_input, "ptr-thresh %U", unformat_memory_size, - &hsm->use_ptr_thresh)) + /* Default listener parameters */ + else if (unformat (line_input, "uri %s", &uri)) + ; + else if (unformat (line_input, "www-root %s", &l->www_root)) ; else if (unformat (line_input, "url-handlers")) - hsm->enable_url_handlers = 1; - else if (unformat (line_input, "max-age %d", &hsm->max_age)) + l->enable_url_handlers = 1; + else if (unformat (line_input, "cache-size %U", unformat_memory_size, + &l->cache_size)) + ; + else if (unformat (line_input, "max-age %d", &l->max_age)) ; else if (unformat (line_input, "max-body-size %U", unformat_memory_size, - &hsm->max_body_size)) + &l->max_body_size)) + ; + else if (unformat (line_input, "rx-buff-thresh %U", unformat_memory_size, + &l->rx_buff_thresh)) + ; + else if (unformat (line_input, "keepalive-timeout %d", + &l->keepalive_timeout)) + ; + else if (unformat (line_input, "ptr-thresh %U", unformat_memory_size, + &l->use_ptr_thresh)) ; else { @@ -950,28 +1192,33 @@ no_input: if (error) goto done; - if (hsm->www_root == 0 && !hsm->enable_url_handlers) + if (l->www_root) { - error = clib_error_return (0, "Must set www-root or url-handlers"); - goto done; + /* Maintain legacy default uri behavior */ + if (!uri) + uri = "tcp://0.0.0.0:80"; + if (l->cache_size < (128 << 10)) + { + error = clib_error_return (0, "cache-size must be at least 128kb"); + vec_free (l->www_root); + goto done; + } } - if (hsm->cache_size < (128 << 10)) + if (uri) { - error = clib_error_return (0, "cache-size must be at least 128kb"); - vec_free (hsm->www_root); - goto done; + if (parse_uri (uri, &l->sep)) + { + error = clib_error_return (0, "failed to parse uri %s", uri); + goto done; + } + hsm->have_default_listener = 1; } - session_enable_disable_args_t args = { .is_en = 1, - .rt_engine_type = - RT_BACKEND_ENGINE_RULE_TABLE }; - vnet_session_enable_disable (vm, &args); - if ((rv = hss_create (vm))) { error = clib_error_return (0, "server_create returned %d", rv); - vec_free (hsm->www_root); + vec_free (l->www_root); } done: @@ -995,13 +1242,123 @@ done: VLIB_CLI_COMMAND (hss_create_command, static) = { .path = "http static server", .short_help = - "http static server www-root <path> [prealloc-fifos <nn>]\n" + "http static server [www-root <path>] [url-handlers]\n" "[private-segment-size <nnMG>] [fifo-size <nbytes>] [max-age <nseconds>]\n" - "[uri <uri>] [ptr-thresh <nn>] [url-handlers] [debug [nn]]\n" + "[uri <uri>] [ptr-thresh <nn>] [prealloc-fifos <nn>] [debug [nn]]\n" "[keepalive-timeout <nn>] [max-body-size <nn>]\n", .function = hss_create_command_fn, }; +static clib_error_t * +hss_add_del_listener_command_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + hss_main_t *hsm = &hss_main; + clib_error_t *error = 0; + hss_listener_t _l = {}, *l = &_l; + u8 is_add = 1; + char *uri = 0; + + if (!hsm->is_init) + return clib_error_return (0, "Static server not initialized"); + + if (!unformat_user (input, unformat_line_input, line_input)) + return clib_error_return (0, "No input provided"); + + l->cache_size = 10 << 20; + l->max_age = HSS_DEFAULT_MAX_AGE; + l->max_body_size = HSS_DEFAULT_MAX_BODY_SIZE; + l->rx_buff_thresh = HSS_DEFAULT_RX_BUFFER_THRESH; + l->keepalive_timeout = HSS_DEFAULT_KEEPALIVE_TIMEOUT; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "add")) + is_add = 1; + else if (unformat (line_input, "del")) + is_add = 0; + else if (unformat (line_input, "uri %s", &uri)) + ; + else if (unformat (line_input, "www-root %s", &l->www_root)) + ; + else if (unformat (line_input, "url-handlers")) + l->enable_url_handlers = 1; + else if (unformat (line_input, "cache-size %U", unformat_memory_size, + &l->cache_size)) + ; + else if (unformat (line_input, "keepalive-timeout %d", + &l->keepalive_timeout)) + ; + else if (unformat (line_input, "ptr-thresh %U", unformat_memory_size, + &l->use_ptr_thresh)) + ; + else if (unformat (line_input, "max-age %d", &l->max_age)) + ; + else if (unformat (line_input, "max-body-size %U", unformat_memory_size, + &l->max_body_size)) + ; + else if (unformat (line_input, "rx-buff-thresh %U", unformat_memory_size, + &l->rx_buff_thresh)) + ; + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + break; + } + } + unformat_free (line_input); + + if (!uri) + { + error = clib_error_return (0, "Must set uri"); + goto done; + } + + if (parse_uri (uri, &l->sep)) + { + error = clib_error_return (0, "failed to parse uri %s", uri); + goto done; + } + + if (!is_add) + { + hss_listener_del (l); + goto done; + } + + if (l->www_root == 0 && !l->enable_url_handlers) + { + error = clib_error_return (0, "Must set www-root or url-handlers"); + goto done; + } + + if (l->cache_size < (128 << 10)) + { + error = clib_error_return (0, "cache-size must be at least 128kb"); + goto done; + } + + if (hss_listener_add (l)) + { + error = clib_error_return (0, "failed to create listener"); + goto done; + } + +done: + + vec_free (uri); + return error; +} + +VLIB_CLI_COMMAND (hss_add_del_listener_command, static) = { + .path = "http static listener", + .short_help = "http static listener [add|del] uri <uri>\n" + "[www-root <path>] [url-handlers] \n", + .function = hss_add_del_listener_command_fn, +}; + static u8 * format_hss_session (u8 *s, va_list *args) { @@ -1014,14 +1371,29 @@ format_hss_session (u8 *s, va_list *args) return s; } +static u8 * +format_hss_listener (u8 *s, va_list *args) +{ + hss_listener_t *l = va_arg (*args, hss_listener_t *); + int __clib_unused verbose = va_arg (*args, int); + + s = format ( + s, "listener %d, uri %U:%u, www-root %s, cache-size %U url-handlers %d", + l->l_index, format_ip46_address, &l->sep.ip, l->sep.is_ip4, + clib_net_to_host_u16 (l->sep.port), l->www_root, format_memory_size, + l->cache_size, l->enable_url_handlers); + return s; +} + static clib_error_t * hss_show_command_fn (vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd) { - int verbose = 0, show_cache = 0, show_sessions = 0; + int verbose = 0, show_cache = 0, show_sessions = 0, show_listeners = 0; + u32 l_index = 0; hss_main_t *hsm = &hss_main; - if (hsm->www_root == 0) + if (!hsm->is_init) return clib_error_return (0, "Static server disabled"); while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) @@ -1032,17 +1404,26 @@ hss_show_command_fn (vlib_main_t *vm, unformat_input_t *input, verbose = 1; else if (unformat (input, "cache")) show_cache = 1; + else if (unformat (input, "cache %u", &l_index)) + show_cache = 1; else if (unformat (input, "sessions")) show_sessions = 1; + else if (unformat (input, "listeners")) + show_listeners = 1; else break; } - if ((show_cache + show_sessions) == 0) + if ((show_cache + show_sessions + show_listeners) == 0) return clib_error_return (0, "specify one or more of cache, sessions"); if (show_cache) - vlib_cli_output (vm, "%U", format_hss_cache, &hsm->cache, verbose); + { + hss_listener_t *l = hss_listener_get (l_index); + if (l == 0) + return clib_error_return (0, "listener %d not found", l_index); + vlib_cli_output (vm, "%U", format_hss_cache, &l->cache, verbose); + } if (show_sessions) { @@ -1067,6 +1448,15 @@ hss_show_command_fn (vlib_main_t *vm, unformat_input_t *input, } vec_free (session_indices); } + + if (show_listeners) + { + hss_listener_t *l; + pool_foreach (l, hsm->listeners) + { + vlib_cli_output (vm, "%U", format_hss_listener, l, verbose); + } + } return 0; } @@ -1082,7 +1472,8 @@ hss_show_command_fn (vlib_main_t *vm, unformat_input_t *input, ?*/ VLIB_CLI_COMMAND (hss_show_command, static) = { .path = "show http static server", - .short_help = "show http static server sessions cache [verbose [<nn>]]", + .short_help = "show http static server [sessions] [cache] [listeners] " + "[verbose [<nn>]]", .function = hss_show_command_fn, }; @@ -1091,12 +1482,28 @@ hss_clear_cache_command_fn (vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd) { hss_main_t *hsm = &hss_main; - u32 busy_items = 0; + u32 busy_items = 0, l_index = 0; + hss_listener_t *l; - if (hsm->www_root == 0) + if (!hsm->is_init) return clib_error_return (0, "Static server disabled"); - busy_items = hss_cache_clear (&hsm->cache); + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "index %u", &l_index)) + ; + else + { + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + } + + l = hss_listener_get (l_index); + if (l == 0) + return clib_error_return (0, "listener %d not found", l_index); + + busy_items = hss_cache_clear (&l->cache); if (busy_items > 0) vlib_cli_output (vm, "Note: %d busy items still in cache...", busy_items); @@ -1118,7 +1525,7 @@ hss_clear_cache_command_fn (vlib_main_t *vm, unformat_input_t *input, ?*/ VLIB_CLI_COMMAND (clear_hss_cache_command, static) = { .path = "clear http static cache", - .short_help = "clear http static cache", + .short_help = "clear http static cache [index <index>]", .function = hss_clear_cache_command_fn, }; diff --git a/src/plugins/ikev2/ikev2.c b/src/plugins/ikev2/ikev2.c index 0e6751ce851..c8183feddfd 100644 --- a/src/plugins/ikev2/ikev2.c +++ b/src/plugins/ikev2/ikev2.c @@ -2223,7 +2223,7 @@ ikev2_create_tunnel_interface (vlib_main_t *vm, ikev2_sa_t *sa, ikev2_child_sa_t *child, u32 sa_index, u32 child_index, u8 is_rekey, u8 kex) { - u32 thread_index = vlib_get_thread_index (); + clib_thread_index_t thread_index = vlib_get_thread_index (); ikev2_main_t *km = &ikev2_main; ipsec_crypto_alg_t encr_type; ipsec_integ_alg_t integ_type; @@ -3190,7 +3190,7 @@ ikev2_node_internal (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b; u16 nexts[VLIB_FRAME_SIZE], *next = nexts; ikev2_main_per_thread_data_t *ptd = ikev2_get_per_thread_data (); - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; ikev2_stats_t _stats, *stats = &_stats; int res; @@ -5551,6 +5551,7 @@ static uword ikev2_mngr_process_fn (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) { + ipsec_main_t *im = &ipsec_main; ikev2_main_t *km = &ikev2_main; ikev2_profile_t *p; ikev2_child_sa_t *c; @@ -5631,10 +5632,10 @@ ikev2_mngr_process_fn (vlib_main_t * vm, vlib_node_runtime_t * rt, /* process ipsec sas */ ipsec_sa_t *sa; - pool_foreach (sa, ipsec_sa_pool) - { - ikev2_mngr_process_ipsec_sa (sa); - } + pool_foreach (sa, im->sa_pool) + { + ikev2_mngr_process_ipsec_sa (sa); + } ikev2_process_pending_sa_init (vm, km); } diff --git a/src/plugins/ikev2/ikev2_priv.h b/src/plugins/ikev2/ikev2_priv.h index 2751657bff9..58da36d9d59 100644 --- a/src/plugins/ikev2/ikev2_priv.h +++ b/src/plugins/ikev2/ikev2_priv.h @@ -661,7 +661,7 @@ clib_error_t *ikev2_profile_natt_disable (u8 * name); static_always_inline ikev2_main_per_thread_data_t * ikev2_get_per_thread_data () { - u32 thread_index = vlib_get_thread_index (); + clib_thread_index_t thread_index = vlib_get_thread_index (); return vec_elt_at_index (ikev2_main.per_thread_data, thread_index); } #endif /* __included_ikev2_priv_h__ */ diff --git a/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.c b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.c index 1606f72224f..ca6483b3329 100644 --- a/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.c +++ b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.c @@ -21,7 +21,7 @@ #include <vnet/vnet.h> #include <vnet/plugin/plugin.h> #include <ioam/export-common/ioam_export.h> -#include <vnet/vxlan-gpe/vxlan_gpe.h> +#include <plugins/vxlan-gpe/vxlan_gpe.h> #include <vlibapi/api.h> #include <vlibmemory/api.h> diff --git a/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_node.c b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_node.c index 839fd80b443..17084767c1e 100644 --- a/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_node.c +++ b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_node.c @@ -16,8 +16,8 @@ #include <vnet/vnet.h> #include <vppinfra/error.h> #include <vnet/ip/ip.h> -#include <vnet/vxlan-gpe/vxlan_gpe.h> -#include <vnet/vxlan-gpe/vxlan_gpe_packet.h> +#include <plugins/vxlan-gpe/vxlan_gpe.h> +#include <plugins/vxlan-gpe/vxlan_gpe_packet.h> #include <ioam/export-common/ioam_export.h> typedef struct diff --git a/src/plugins/ioam/ip6/ioam_cache_tunnel_select_node.c b/src/plugins/ioam/ip6/ioam_cache_tunnel_select_node.c index 61476ebd85c..a4deae2ca60 100644 --- a/src/plugins/ioam/ip6/ioam_cache_tunnel_select_node.c +++ b/src/plugins/ioam/ip6/ioam_cache_tunnel_select_node.c @@ -646,7 +646,7 @@ vlib_node_registration_t ioam_cache_ts_timer_tick_node; typedef struct { - u32 thread_index; + clib_thread_index_t thread_index; } ioam_cache_ts_timer_tick_trace_t; /* packet trace format function */ @@ -696,7 +696,7 @@ expired_cache_ts_timer_callback (u32 * expired_timers) ioam_cache_main_t *cm = &ioam_cache_main; int i; u32 pool_index; - u32 thread_index = vlib_get_thread_index (); + clib_thread_index_t thread_index = vlib_get_thread_index (); u32 count = 0; for (i = 0; i < vec_len (expired_timers); i++) diff --git a/src/plugins/ioam/lib-vxlan-gpe/ioam_decap.c b/src/plugins/ioam/lib-vxlan-gpe/ioam_decap.c index 801faa98066..d8d52e9f0a1 100644 --- a/src/plugins/ioam/lib-vxlan-gpe/ioam_decap.c +++ b/src/plugins/ioam/lib-vxlan-gpe/ioam_decap.c @@ -17,8 +17,7 @@ #include <vnet/vnet.h> #include <vnet/ip/ip.h> #include <vnet/ethernet/ethernet.h> -#include <vnet/vxlan-gpe/vxlan_gpe.h> -#include <vnet/vxlan-gpe/vxlan_gpe.h> +#include <plugins/vxlan-gpe/vxlan_gpe.h> #include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam_packet.h> #include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h> #include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam_util.h> @@ -68,8 +67,8 @@ vxlan_gpe_decap_ioam (vlib_main_t * vm, vlib_frame_t * from_frame, u8 is_ipv6) { u32 n_left_from, next_index, *from, *to_next; - vxlan_gpe_main_t *ngm = &vxlan_gpe_main; vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main; + vxlan_gpe_main_t *ngm = hm->gpe_main; from = vlib_frame_vector_args (from_frame); n_left_from = from_frame->n_vectors; diff --git a/src/plugins/ioam/lib-vxlan-gpe/ioam_encap.c b/src/plugins/ioam/lib-vxlan-gpe/ioam_encap.c index de375df4f7c..9c742d8c293 100644 --- a/src/plugins/ioam/lib-vxlan-gpe/ioam_encap.c +++ b/src/plugins/ioam/lib-vxlan-gpe/ioam_encap.c @@ -17,7 +17,7 @@ #include <vnet/vnet.h> #include <vnet/ip/ip.h> #include <vnet/ethernet/ethernet.h> -#include <vnet/vxlan-gpe/vxlan_gpe.h> +#include <plugins/vxlan-gpe/vxlan_gpe.h> #include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam_packet.h> #include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h> #include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam_util.h> @@ -71,7 +71,8 @@ vxlan_gpe_encap_ioam_v4 (vlib_main_t * vm, vlib_frame_t * from_frame) { u32 n_left_from, next_index, *from, *to_next; - vxlan_gpe_main_t *ngm = &vxlan_gpe_main; + vxlan_gpe_ioam_main_t *sm = &vxlan_gpe_ioam_main; + vxlan_gpe_main_t *ngm = sm->gpe_main; from = vlib_frame_vector_args (from_frame); n_left_from = from_frame->n_vectors; diff --git a/src/plugins/ioam/lib-vxlan-gpe/ioam_pop.c b/src/plugins/ioam/lib-vxlan-gpe/ioam_pop.c index 2fa0aa29450..a80662b9d12 100644 --- a/src/plugins/ioam/lib-vxlan-gpe/ioam_pop.c +++ b/src/plugins/ioam/lib-vxlan-gpe/ioam_pop.c @@ -17,7 +17,7 @@ #include <vnet/vnet.h> #include <vnet/ip/ip.h> #include <vnet/ethernet/ethernet.h> -#include <vnet/vxlan-gpe/vxlan_gpe.h> +#include <plugins/vxlan-gpe/vxlan_gpe.h> #include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h> /* Statistics (not really errors) */ @@ -231,7 +231,8 @@ vxlan_gpe_pop_ioam (vlib_main_t * vm, vlib_frame_t * from_frame, u8 is_ipv6) { u32 n_left_from, next_index, *from, *to_next; - vxlan_gpe_main_t *ngm = &vxlan_gpe_main; + vxlan_gpe_ioam_main_t *sm = &vxlan_gpe_ioam_main; + vxlan_gpe_main_t *ngm = sm->gpe_main; from = vlib_frame_vector_args (from_frame); n_left_from = from_frame->n_vectors; diff --git a/src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c b/src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c index e3c82725e26..02233cf9841 100644 --- a/src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c +++ b/src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c @@ -18,7 +18,7 @@ #include <vnet/ip/ip.h> #include <vnet/udp/udp_local.h> #include <vnet/ethernet/ethernet.h> -#include <vnet/vxlan-gpe/vxlan_gpe.h> +#include <plugins/vxlan-gpe/vxlan_gpe.h> #include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam_packet.h> #include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h> #include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam_util.h> diff --git a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_api.c b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_api.c index d61832d975a..6de1760b6b7 100644 --- a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_api.c +++ b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_api.c @@ -80,9 +80,9 @@ static void vl_api_vxlan_gpe_ioam_vni_enable_t_handler clib_error_t *error; vxlan4_gpe_tunnel_key_t key4; uword *p = NULL; - vxlan_gpe_main_t *gm = &vxlan_gpe_main; vxlan_gpe_tunnel_t *t = 0; vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main; + vxlan_gpe_main_t *gm = hm->gpe_main; u32 vni; @@ -130,7 +130,8 @@ static void vl_api_vxlan_gpe_ioam_vni_disable_t_handler clib_error_t *error; vxlan4_gpe_tunnel_key_t key4; uword *p = NULL; - vxlan_gpe_main_t *gm = &vxlan_gpe_main; + vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main; + vxlan_gpe_main_t *gm = hm->gpe_main; vxlan_gpe_tunnel_t *t = 0; u32 vni; @@ -214,6 +215,13 @@ ioam_vxlan_gpe_init (vlib_main_t * vm) vlib_node_t *vxlan_gpe_decap_node = NULL; uword next_node = 0; + sm->gpe_main = + vlib_get_plugin_symbol ("vxlan-gpe_plugin.so", "vxlan_gpe_main"); + if (sm->gpe_main == 0) + { + return clib_error_return (0, "vxlan-gpe_plugin.so is not loaded"); + } + sm->vlib_main = vm; sm->vnet_main = vnet_get_main (); sm->unix_time_0 = (u32) time (0); /* Store starting time */ @@ -231,7 +239,7 @@ ioam_vxlan_gpe_init (vlib_main_t * vm) vlib_get_node_by_name (vm, (u8 *) "vxlan4-gpe-input"); next_node = vlib_node_add_next (vm, vxlan_gpe_decap_node->index, decap_node_index); - vxlan_gpe_register_decap_protocol (VXLAN_GPE_PROTOCOL_IOAM, next_node); + sm->gpe_main->register_decap_protocol (VXLAN_GPE_PROTOCOL_IOAM, next_node); vec_new (vxlan_gpe_ioam_sw_interface_t, pool_elts (sm->sw_interfaces)); sm->dst_by_ip4 = hash_create_mem (0, sizeof (fib_prefix_t), sizeof (uword)); @@ -243,7 +251,9 @@ ioam_vxlan_gpe_init (vlib_main_t * vm) return 0; } -VLIB_INIT_FUNCTION (ioam_vxlan_gpe_init); +VLIB_INIT_FUNCTION (ioam_vxlan_gpe_init) = { + .runs_after = VLIB_INITS ("vxlan_gpe_init"), +}; /* * fd.io coding-style-patch-verification: ON diff --git a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.c b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.c index 327afc3fb61..f83c6e1ecc3 100644 --- a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.c +++ b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.c @@ -12,8 +12,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include <vnet/vxlan-gpe/vxlan_gpe.h> -#include <vnet/vxlan-gpe/vxlan_gpe_packet.h> +#include <plugins/vxlan-gpe/vxlan_gpe.h> +#include <plugins/vxlan-gpe/vxlan_gpe_packet.h> #include <vnet/ip/format.h> #include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h> #include <vnet/dpo/load_balance.h> @@ -423,7 +423,7 @@ vxlan_gpe_set_ioam_rewrite_command_fn (vlib_main_t * vxlan4_gpe_tunnel_key_t key4; vxlan6_gpe_tunnel_key_t key6; uword *p; - vxlan_gpe_main_t *gm = &vxlan_gpe_main; + vxlan_gpe_main_t *gm = hm->gpe_main; vxlan_gpe_tunnel_t *t = 0; while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { diff --git a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h index 0711b87abbe..f9374c9bb95 100644 --- a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h +++ b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h @@ -15,12 +15,11 @@ #ifndef __included_vxlan_gpe_ioam_h__ #define __included_vxlan_gpe_ioam_h__ -#include <vnet/vxlan-gpe/vxlan_gpe.h> -#include <vnet/vxlan-gpe/vxlan_gpe_packet.h> +#include <plugins/vxlan-gpe/vxlan_gpe.h> +#include <plugins/vxlan-gpe/vxlan_gpe_packet.h> #include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam_packet.h> #include <vnet/ip/ip.h> - typedef struct vxlan_gpe_sw_interface_ { u32 sw_if_index; @@ -100,7 +99,8 @@ typedef struct vxlan_gpe_ioam_main_ vlib_main_t *vlib_main; /** State convenience vnet_main_t */ vnet_main_t *vnet_main; - + /** State convenience vxlan_gpe_main_t */ + vxlan_gpe_main_t *gpe_main; } vxlan_gpe_ioam_main_t; extern vxlan_gpe_ioam_main_t vxlan_gpe_ioam_main; diff --git a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_packet.h b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_packet.h index a7ef859ec58..515529ce794 100644 --- a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_packet.h +++ b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_packet.h @@ -15,8 +15,8 @@ #ifndef __included_vxlan_gpe_ioam_packet_h__ #define __included_vxlan_gpe_ioam_packet_h__ -#include <vnet/vxlan-gpe/vxlan_gpe.h> -#include <vnet/vxlan-gpe/vxlan_gpe_packet.h> +#include <plugins/vxlan-gpe/vxlan_gpe.h> +#include <plugins/vxlan-gpe/vxlan_gpe_packet.h> #include <vnet/ip/ip.h> diff --git a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_trace.c b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_trace.c index 9c783c747d0..9b1b8b824ff 100644 --- a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_trace.c +++ b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_trace.c @@ -16,8 +16,8 @@ #include <vnet/vnet.h> #include <vppinfra/error.h> -#include <vnet/vxlan-gpe/vxlan_gpe.h> -#include <vnet/vxlan-gpe/vxlan_gpe_packet.h> +#include <plugins/vxlan-gpe/vxlan_gpe.h> +#include <plugins/vxlan-gpe/vxlan_gpe_packet.h> #include <vppinfra/hash.h> #include <vppinfra/error.h> diff --git a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_util.h b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_util.h index c0ad8d9d03a..db7fd5651b1 100644 --- a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_util.h +++ b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_util.h @@ -15,8 +15,8 @@ #ifndef __included_vxlan_gpe_ioam_util_h__ #define __included_vxlan_gpe_ioam_util_h__ -#include <vnet/vxlan-gpe/vxlan_gpe.h> -#include <vnet/vxlan-gpe/vxlan_gpe_packet.h> +#include <plugins/vxlan-gpe/vxlan_gpe.h> +#include <plugins/vxlan-gpe/vxlan_gpe_packet.h> #include <vnet/ip/ip.h> diff --git a/src/plugins/l2tp/l2tp.c b/src/plugins/l2tp/l2tp.c index 907468b5900..cada9dc2656 100644 --- a/src/plugins/l2tp/l2tp.c +++ b/src/plugins/l2tp/l2tp.c @@ -151,7 +151,7 @@ test_counters_command_fn (vlib_main_t * vm, u32 session_index; u32 counter_index; u32 nincr = 0; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; pool_foreach (session, lm->sessions) { diff --git a/src/plugins/lb/lb.c b/src/plugins/lb/lb.c index 7ae1884ff31..0c4f21a4a78 100644 --- a/src/plugins/lb/lb.c +++ b/src/plugins/lb/lb.c @@ -108,7 +108,7 @@ u8 *format_lb_main (u8 * s, va_list * args) s = format(s, " #vips: %u\n", pool_elts(lbm->vips)); s = format(s, " #ass: %u\n", pool_elts(lbm->ass) - 1); - u32 thread_index; + clib_thread_index_t thread_index; for(thread_index = 0; thread_index < tm->n_vlib_mains; thread_index++ ) { lb_hash_t *h = lbm->per_cpu[thread_index].sticky_ht; if (h) { @@ -764,7 +764,7 @@ next: int lb_flush_vip_as (u32 vip_index, u32 as_index) { - u32 thread_index; + clib_thread_index_t thread_index; vlib_thread_main_t *tm = vlib_get_thread_main(); lb_main_t *lbm = &lb_main; diff --git a/src/plugins/lb/node.c b/src/plugins/lb/node.c index a37fe11a9b4..1ddc556a8bf 100644 --- a/src/plugins/lb/node.c +++ b/src/plugins/lb/node.c @@ -124,7 +124,7 @@ format_lb_nat_trace (u8 * s, va_list * args) } lb_hash_t * -lb_get_sticky_table (u32 thread_index) +lb_get_sticky_table (clib_thread_index_t thread_index) { lb_main_t *lbm = &lb_main; lb_hash_t *sticky_ht = lbm->per_cpu[thread_index].sticky_ht; @@ -282,7 +282,7 @@ lb_node_fn (vlib_main_t * vm, { lb_main_t *lbm = &lb_main; u32 n_left_from, *from, next_index, *to_next, n_left_to_next; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; u32 lb_time = lb_hash_time_now (vm); lb_hash_t *sticky_ht = lb_get_sticky_table (thread_index); diff --git a/src/plugins/linux-cp/lcp.api b/src/plugins/linux-cp/lcp.api index e7eaa5a3669..8b0fdb5eb53 100644 --- a/src/plugins/linux-cp/lcp.api +++ b/src/plugins/linux-cp/lcp.api @@ -177,6 +177,42 @@ autoendian define lcp_itf_pair_details option in_progress; }; +/** \brief Enable linux-cp-punt-xc for a given ethertype + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param ethertype - the ethertype to enable +*/ +autoreply define lcp_ethertype_enable +{ + u32 client_index; + u32 context; + u16 ethertype; +}; + +/** \brief Get the enabled ethertypes for linux-cp-punt-xc + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define lcp_ethertype_get +{ + u32 client_index; + u32 context; +}; + +/** \brief Reply to get the enabled ethertypes for linux-cp-punt-xc + @param context - sender context, to match reply w/ request + @param retval - return code for the request + @param count - number of enabled ethertypes + @param ethertypes - array of enabled ethertypes +*/ +define lcp_ethertype_get_reply +{ + u32 context; + i32 retval; + u16 count; + u16 ethertypes[count]; +}; + service { rpc lcp_itf_pair_get returns lcp_itf_pair_get_reply stream lcp_itf_pair_details; diff --git a/src/plugins/linux-cp/lcp_api.c b/src/plugins/linux-cp/lcp_api.c index 74421230e9d..0db502988d7 100644 --- a/src/plugins/linux-cp/lcp_api.c +++ b/src/plugins/linux-cp/lcp_api.c @@ -280,6 +280,40 @@ vl_api_lcp_itf_pair_replace_end_t_handler ( REPLY_MACRO (VL_API_LCP_ITF_PAIR_REPLACE_END_REPLY); } +static void +vl_api_lcp_ethertype_enable_t_handler (vl_api_lcp_ethertype_enable_t *mp) +{ + vl_api_lcp_ethertype_enable_reply_t *rmp; + int rv; + + rv = lcp_ethertype_enable (mp->ethertype); + + REPLY_MACRO (VL_API_LCP_ETHERTYPE_ENABLE_REPLY); +} + +static void +vl_api_lcp_ethertype_get_t_handler (vl_api_lcp_ethertype_get_t *mp) +{ + vl_api_lcp_ethertype_get_reply_t *rmp; + ethernet_type_t *ethertypes = vec_new (ethernet_type_t, 0); + u16 count = 0; + int rv = 0; + + rv = lcp_ethertype_get_enabled (ðertypes); + if (!rv) + count = vec_len (ethertypes); + + REPLY_MACRO3 (VL_API_LCP_ETHERTYPE_GET_REPLY, sizeof (u16) * count, ({ + rmp->count = htons (count); + for (int i = 0; i < count; i++) + { + rmp->ethertypes[i] = htons (ethertypes[i]); + } + })); + + vec_free (ethertypes); +} + /* * Set up the API message handling tables */ diff --git a/src/plugins/linux-cp/lcp_cli.c b/src/plugins/linux-cp/lcp_cli.c index 0dcf600b301..e89afd2a753 100644 --- a/src/plugins/linux-cp/lcp_cli.c +++ b/src/plugins/linux-cp/lcp_cli.c @@ -337,6 +337,62 @@ VLIB_CLI_COMMAND (lcp_itf_pair_show_cmd_node, static) = { .is_mp_safe = 1, }; +static clib_error_t * +lcp_ethertype_enable_cmd (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + ethernet_type_t ethertype; + int rv; + + if (!unformat (input, "%U", unformat_ethernet_type_host_byte_order, + ðertype)) + return clib_error_return (0, "Invalid ethertype"); + + rv = lcp_ethertype_enable (ethertype); + if (rv) + return clib_error_return (0, "Failed to enable ethertype (%d)", rv); + + return 0; +} + +VLIB_CLI_COMMAND (lcp_ethertype_enable_command, static) = { + .path = "lcp ethertype enable", + .short_help = + "lcp ethertype enable (<hex_ethertype_num>|<uc_ethertype_name>)", + .function = lcp_ethertype_enable_cmd, +}; + +static clib_error_t * +lcp_ethertype_show_cmd (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + ethernet_type_t *ethertypes = vec_new (ethernet_type_t, 0); + ethernet_type_t *etype; + int rv; + + rv = lcp_ethertype_get_enabled (ðertypes); + if (rv) + { + vec_free (ethertypes); + return clib_error_return (0, "Failed to get enabled ethertypes (%d)", + rv); + } + + vec_foreach (etype, ethertypes) + { + vlib_cli_output (vm, "0x%04x", *etype); + } + + vec_free (ethertypes); + return 0; +} + +VLIB_CLI_COMMAND (lcp_ethertype_show_command, static) = { + .path = "show lcp ethertype", + .short_help = "show lcp ethertype", + .function = lcp_ethertype_show_cmd, +}; + clib_error_t * lcp_cli_init (vlib_main_t *vm) { diff --git a/src/plugins/linux-cp/lcp_interface.c b/src/plugins/linux-cp/lcp_interface.c index 61665ad4146..31864f791af 100644 --- a/src/plugins/linux-cp/lcp_interface.c +++ b/src/plugins/linux-cp/lcp_interface.c @@ -162,6 +162,22 @@ lcp_itf_pair_get (u32 index) return pool_elt_at_index (lcp_itf_pair_pool, index); } +/* binary-direct API: for access from other plugins, bypassing VAPI. + * Important for parameters and return types to be simple C types, rather + * than structures. See src/plugins/sflow/sflow_dlapi.h for an example. + */ +u32 +lcp_itf_pair_get_vif_index_by_phy (u32 phy_sw_if_index) +{ + if (phy_sw_if_index < vec_len (lip_db_by_phy)) + { + lcp_itf_pair_t *lip = lcp_itf_pair_get (lip_db_by_phy[phy_sw_if_index]); + if (lip) + return lip->lip_vif_index; + } + return INDEX_INVALID; +} + index_t lcp_itf_pair_find_by_vif (u32 vif_index) { @@ -1214,6 +1230,53 @@ lcp_itf_pair_link_up_down (vnet_main_t *vnm, u32 hw_if_index, u32 flags) return 0; } +int +lcp_ethertype_enable (ethernet_type_t ethertype) +{ + ethernet_main_t *em = ðernet_main; + ethernet_type_info_t *eti; + vlib_main_t *vm = vlib_get_main (); + vlib_node_t *node = vlib_get_node_by_name (vm, (u8 *) "linux-cp-punt-xc"); + + if (!node) + return VNET_API_ERROR_UNIMPLEMENTED; + + eti = ethernet_get_type_info (em, ethertype); + if (!eti) + return VNET_API_ERROR_INVALID_VALUE; + + if (eti->node_index != ~0 && eti->node_index != node->index) + return VNET_API_ERROR_INVALID_REGISTRATION; + + ethernet_register_input_type (vm, ethertype, node->index); + return 0; +} + +int +lcp_ethertype_get_enabled (ethernet_type_t **ethertypes_vec) +{ + ethernet_main_t *em = ðernet_main; + ethernet_type_info_t *eti; + vlib_main_t *vm = vlib_get_main (); + vlib_node_t *node = vlib_get_node_by_name (vm, (u8 *) "linux-cp-punt-xc"); + + if (!ethertypes_vec) + return VNET_API_ERROR_INVALID_ARGUMENT; + + if (!node) + return VNET_API_ERROR_UNIMPLEMENTED; + + vec_foreach (eti, em->type_infos) + { + if (eti->node_index == node->index) + { + vec_add1 (*ethertypes_vec, eti->type); + } + } + + return 0; +} + VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION (lcp_itf_pair_link_up_down); static clib_error_t * diff --git a/src/plugins/linux-cp/lcp_interface.h b/src/plugins/linux-cp/lcp_interface.h index cfcd3925a15..8cf6d3f4da1 100644 --- a/src/plugins/linux-cp/lcp_interface.h +++ b/src/plugins/linux-cp/lcp_interface.h @@ -18,6 +18,7 @@ #include <vnet/dpo/dpo.h> #include <vnet/adj/adj.h> #include <vnet/ip/ip_types.h> +#include <vnet/ethernet/ethernet.h> #include <plugins/linux-cp/lcp.h> @@ -198,6 +199,18 @@ void lcp_itf_pair_sync_state (lcp_itf_pair_t *lip); void lcp_itf_pair_sync_state_hw (vnet_hw_interface_t *hi); void lcp_itf_pair_sync_state_all (); +/** + * Enable linux-cp-punt-xc for a given ethertype. + * @param ethertype - ethertype to enable + */ +int lcp_ethertype_enable (ethernet_type_t ethertype); + +/** + * Get the list of ethertypes enabled for linux-cp-punt-xc. + * @param ethertypes_vec - pointer to a vector to store the list of ethertypes + */ +int lcp_ethertype_get_enabled (ethernet_type_t **ethertypes_vec); + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/plugins/linux-cp/lcp_nl.c b/src/plugins/linux-cp/lcp_nl.c index 916877939f0..55d2ea54245 100644 --- a/src/plugins/linux-cp/lcp_nl.c +++ b/src/plugins/linux-cp/lcp_nl.c @@ -29,7 +29,7 @@ #include <netlink/route/addr.h> #include <vlib/vlib.h> -#include <vlib/unix/unix.h> +#include <vlib/file.h> #include <vppinfra/error.h> #include <vppinfra/linux/netns.h> diff --git a/src/plugins/linux-cp/lcp_node.c b/src/plugins/linux-cp/lcp_node.c index 241cc5e4bff..9fa1aa5bd66 100644 --- a/src/plugins/linux-cp/lcp_node.c +++ b/src/plugins/linux-cp/lcp_node.c @@ -39,40 +39,51 @@ typedef enum { -#define _(sym, str) LIP_PUNT_NEXT_##sym, +#define _(sym, str) LIP_PUNT_XC_NEXT_##sym, foreach_lip_punt #undef _ - LIP_PUNT_N_NEXT, -} lip_punt_next_t; + LIP_PUNT_XC_N_NEXT, +} lip_punt_xc_next_t; -typedef struct lip_punt_trace_t_ +typedef struct lip_punt_xc_trace_t_ { + bool is_xc; u32 phy_sw_if_index; u32 host_sw_if_index; -} lip_punt_trace_t; +} lip_punt_xc_trace_t; /* packet trace format function */ static u8 * -format_lip_punt_trace (u8 *s, va_list *args) +format_lip_punt_xc_trace (u8 *s, va_list *args) { CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); - lip_punt_trace_t *t = va_arg (*args, lip_punt_trace_t *); + lip_punt_xc_trace_t *t = va_arg (*args, lip_punt_xc_trace_t *); - s = - format (s, "lip-punt: %u -> %u", t->phy_sw_if_index, t->host_sw_if_index); + if (t->is_xc) + { + s = format (s, "lip-xc: %u -> %u", t->host_sw_if_index, + t->phy_sw_if_index); + } + else + { + s = format (s, "lip-punt: %u -> %u", t->phy_sw_if_index, + t->host_sw_if_index); + } return s; } /** * Pass punted packets from the PHY to the HOST. + * Conditionally x-connect packets from the HOST to the PHY. */ -VLIB_NODE_FN (lip_punt_node) -(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +static_always_inline u32 +lip_punt_xc_inline (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame, bool check_xc) { u32 n_left_from, *from, *to_next, n_left_to_next; - lip_punt_next_t next_index; + lip_punt_xc_next_t next_index; next_index = node->cached_next_index; n_left_from = frame->n_vectors; @@ -89,6 +100,7 @@ VLIB_NODE_FN (lip_punt_node) u32 next0 = ~0; u32 bi0, lipi0; u32 sw_if_index0; + bool is_xc0 = 0; u8 len0; bi0 = to_next[0] = from[0]; @@ -97,18 +109,33 @@ VLIB_NODE_FN (lip_punt_node) to_next += 1; n_left_from -= 1; n_left_to_next -= 1; - next0 = LIP_PUNT_NEXT_DROP; + next0 = LIP_PUNT_XC_NEXT_DROP; b0 = vlib_get_buffer (vm, bi0); sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; lipi0 = lcp_itf_pair_find_by_phy (sw_if_index0); - if (PREDICT_FALSE (lipi0 == INDEX_INVALID)) - goto trace0; + + /* + * lip_punt_node: expect sw_if_index0 is phy in an itf pair + * lip_punt_xc_node: if sw_if_index0 is not phy, expect it is host + */ + if (!check_xc && (PREDICT_FALSE (lipi0 == INDEX_INVALID))) + { + goto trace0; + } + else if (check_xc && (lipi0 == INDEX_INVALID)) + { + is_xc0 = 1; + lipi0 = lcp_itf_pair_find_by_host (sw_if_index0); + if (PREDICT_FALSE (lipi0 == INDEX_INVALID)) + goto trace0; + } lip0 = lcp_itf_pair_get (lipi0); - next0 = LIP_PUNT_NEXT_IO; - vnet_buffer (b0)->sw_if_index[VLIB_TX] = lip0->lip_host_sw_if_index; + next0 = LIP_PUNT_XC_NEXT_IO; + vnet_buffer (b0)->sw_if_index[VLIB_TX] = + is_xc0 ? lip0->lip_phy_sw_if_index : lip0->lip_host_sw_if_index; if (PREDICT_TRUE (lip0->lip_host_type == LCP_ITF_HOST_TAP)) { @@ -129,10 +156,22 @@ VLIB_NODE_FN (lip_punt_node) trace0: if (PREDICT_FALSE ((b0->flags & VLIB_BUFFER_IS_TRACED))) { - lip_punt_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); - t->phy_sw_if_index = sw_if_index0; - t->host_sw_if_index = - (lipi0 == INDEX_INVALID) ? ~0 : lip0->lip_host_sw_if_index; + lip_punt_xc_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + + t->is_xc = is_xc0; + if (is_xc0) + { + t->phy_sw_if_index = + (lipi0 == INDEX_INVALID) ? ~0 : lip0->lip_phy_sw_if_index; + t->host_sw_if_index = sw_if_index0; + } + else + { + t->phy_sw_if_index = sw_if_index0; + t->host_sw_if_index = + (lipi0 == INDEX_INVALID) ? ~0 : lip0->lip_host_sw_if_index; + } } vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, @@ -145,16 +184,41 @@ VLIB_NODE_FN (lip_punt_node) return frame->n_vectors; } +VLIB_NODE_FN (lip_punt_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + return (lip_punt_xc_inline (vm, node, frame, false /* xc */)); +} + +VLIB_NODE_FN (lip_punt_xc_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + return (lip_punt_xc_inline (vm, node, frame, true /* xc */)); +} + VLIB_REGISTER_NODE (lip_punt_node) = { .name = "linux-cp-punt", .vector_size = sizeof (u32), - .format_trace = format_lip_punt_trace, + .format_trace = format_lip_punt_xc_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_next_nodes = LIP_PUNT_XC_N_NEXT, + .next_nodes = { + [LIP_PUNT_XC_NEXT_DROP] = "error-drop", + [LIP_PUNT_XC_NEXT_IO] = "interface-output", + }, +}; + +VLIB_REGISTER_NODE (lip_punt_xc_node) = { + .name = "linux-cp-punt-xc", + .vector_size = sizeof (u32), + .format_trace = format_lip_punt_xc_trace, .type = VLIB_NODE_TYPE_INTERNAL, - .n_next_nodes = LIP_PUNT_N_NEXT, + .n_next_nodes = LIP_PUNT_XC_N_NEXT, .next_nodes = { - [LIP_PUNT_NEXT_DROP] = "error-drop", - [LIP_PUNT_NEXT_IO] = "interface-output", + [LIP_PUNT_XC_NEXT_DROP] = "error-drop", + [LIP_PUNT_XC_NEXT_IO] = "interface-output", }, }; @@ -190,7 +254,7 @@ VLIB_NODE_FN (lcp_punt_l3_node) (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) { u32 n_left_from, *from, *to_next, n_left_to_next; - lip_punt_next_t next_index; + lip_punt_xc_next_t next_index; next_index = node->cached_next_index; n_left_from = frame->n_vectors; diff --git a/src/plugins/linux-cp/lcp_router.c b/src/plugins/linux-cp/lcp_router.c index 0efd53e64ef..27f53357a4d 100644 --- a/src/plugins/linux-cp/lcp_router.c +++ b/src/plugins/linux-cp/lcp_router.c @@ -17,7 +17,6 @@ #include <linux/if.h> #include <linux/mpls.h> -//#include <vlib/vlib.h> #include <vlib/unix/plugin.h> #include <linux-cp/lcp_nl.h> #include <linux-cp/lcp_interface.h> @@ -1543,6 +1542,12 @@ const nl_vft_t lcp_router_vft = { .cb = lcp_router_route_sync_end }, }; +static void +lcp_lcp_router_interface_del_cb (lcp_itf_pair_t *lip) +{ + lcp_router_ip6_mroutes_add_del (lip->lip_phy_sw_if_index, 0); +} + static clib_error_t * lcp_router_init (vlib_main_t *vm) { @@ -1550,6 +1555,12 @@ lcp_router_init (vlib_main_t *vm) nl_register_vft (&lcp_router_vft); + lcp_itf_pair_vft_t lcp_router_interface_del_vft = { + .pair_del_fn = lcp_lcp_router_interface_del_cb, + }; + + lcp_itf_pair_register_vft (&lcp_router_interface_del_vft); + /* * allocate 2 route sources. The low priority source will be for * dynamic routes. If a dynamic route daemon (FRR) tries to remove its diff --git a/src/plugins/lisp/lisp-gpe/decap.c b/src/plugins/lisp/lisp-gpe/decap.c index 18e32675a32..b568fef24fa 100644 --- a/src/plugins/lisp/lisp-gpe/decap.c +++ b/src/plugins/lisp/lisp-gpe/decap.c @@ -102,9 +102,9 @@ next_index_to_iface (lisp_gpe_main_t * lgm, u32 next_index) } static_always_inline void -incr_decap_stats (vnet_main_t * vnm, u32 thread_index, u32 length, - u32 sw_if_index, u32 * last_sw_if_index, u32 * n_packets, - u32 * n_bytes) +incr_decap_stats (vnet_main_t *vnm, clib_thread_index_t thread_index, + u32 length, u32 sw_if_index, u32 *last_sw_if_index, + u32 *n_packets, u32 *n_bytes) { vnet_interface_main_t *im; diff --git a/src/plugins/lisp/lisp-gpe/interface.c b/src/plugins/lisp/lisp-gpe/interface.c index ed2b08f9aaf..5d3ad9463ea 100644 --- a/src/plugins/lisp/lisp-gpe/interface.c +++ b/src/plugins/lisp/lisp-gpe/interface.c @@ -233,7 +233,7 @@ l2_lisp_gpe_interface_tx (vlib_main_t * vm, vlib_node_runtime_t * node, { u32 n_left_from, next_index, *from, *to_next; lisp_gpe_main_t *lgm = &lisp_gpe_main; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; vlib_combined_counter_main_t *cm = &load_balance_main.lbm_to_counters; from = vlib_frame_vector_args (from_frame); diff --git a/src/plugins/mactime/node.c b/src/plugins/mactime/node.c index fad487e666e..dfe7a26c2af 100644 --- a/src/plugins/mactime/node.c +++ b/src/plugins/mactime/node.c @@ -87,7 +87,7 @@ mactime_node_inline (vlib_main_t * vm, clib_bihash_8_8_t *lut = &mm->lookup_table; u32 packets_ok = 0; f64 now; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; vnet_main_t *vnm = vnet_get_main (); vnet_interface_main_t *im = &vnm->interface_main; u8 arc = im->output_feature_arc_index; diff --git a/src/plugins/map/ip4_map.c b/src/plugins/map/ip4_map.c index 652808e6d37..1ff585ceb3a 100644 --- a/src/plugins/map/ip4_map.c +++ b/src/plugins/map/ip4_map.c @@ -154,7 +154,7 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) next_index = node->cached_next_index; map_main_t *mm = &map_main; vlib_combined_counter_main_t *cm = mm->domain_counters; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; u32 *buffer0 = 0; while (n_left_from > 0) diff --git a/src/plugins/map/ip4_map_t.c b/src/plugins/map/ip4_map_t.c index fe29af458a2..ec89056e0cc 100644 --- a/src/plugins/map/ip4_map_t.c +++ b/src/plugins/map/ip4_map_t.c @@ -117,7 +117,7 @@ ip4_map_t_icmp (vlib_main_t * vm, n_left_from = frame->n_vectors; next_index = node->cached_next_index; vlib_combined_counter_main_t *cm = map_main.domain_counters; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; while (n_left_from > 0) { @@ -549,7 +549,7 @@ ip4_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) n_left_from = frame->n_vectors; next_index = node->cached_next_index; vlib_combined_counter_main_t *cm = map_main.domain_counters; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; while (n_left_from > 0) { diff --git a/src/plugins/map/ip6_map.c b/src/plugins/map/ip6_map.c index 3d9b21dfcd9..33d5a0ebbd3 100644 --- a/src/plugins/map/ip6_map.c +++ b/src/plugins/map/ip6_map.c @@ -166,7 +166,7 @@ ip6_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) vlib_node_get_runtime (vm, ip6_map_node.index); map_main_t *mm = &map_main; vlib_combined_counter_main_t *cm = mm->domain_counters; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; @@ -559,7 +559,7 @@ ip6_map_post_ip4_reass (vlib_main_t * vm, vlib_node_get_runtime (vm, ip6_map_post_ip4_reass_node.index); map_main_t *mm = &map_main; vlib_combined_counter_main_t *cm = mm->domain_counters; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; @@ -651,7 +651,7 @@ ip6_map_icmp_relay (vlib_main_t * vm, vlib_node_runtime_t *error_node = vlib_node_get_runtime (vm, ip6_map_icmp_relay_node.index); map_main_t *mm = &map_main; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; u16 *fragment_ids, *fid; from = vlib_frame_vector_args (frame); diff --git a/src/plugins/map/ip6_map_t.c b/src/plugins/map/ip6_map_t.c index f8d894a013a..854410eb462 100644 --- a/src/plugins/map/ip6_map_t.c +++ b/src/plugins/map/ip6_map_t.c @@ -118,7 +118,7 @@ ip6_map_t_icmp (vlib_main_t * vm, n_left_from = frame->n_vectors; next_index = node->cached_next_index; vlib_combined_counter_main_t *cm = map_main.domain_counters; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; while (n_left_from > 0) { @@ -494,7 +494,7 @@ ip6_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) vlib_node_get_runtime (vm, ip6_map_t_node.index); map_main_t *mm = &map_main; vlib_combined_counter_main_t *cm = map_main.domain_counters; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; diff --git a/src/plugins/memif/memif.c b/src/plugins/memif/memif.c index 7e3dd44db2c..ad8512ac81e 100644 --- a/src/plugins/memif/memif.c +++ b/src/plugins/memif/memif.c @@ -31,7 +31,7 @@ #include <limits.h> #include <vlib/vlib.h> -#include <vlib/unix/unix.h> +#include <vlib/file.h> #include <vnet/plugin/plugin.h> #include <vnet/ethernet/ethernet.h> #include <vnet/interface/rx_queue_funcs.h> @@ -379,6 +379,12 @@ memif_connect (memif_if_t * mif) CLIB_CACHE_LINE_BYTES); vec_foreach (dma_info, mq->dma_info) { + vlib_buffer_t *bt = &dma_info->data.buffer_template; + + clib_memset (bt, 0, sizeof (*bt)); + bt->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID; + bt->total_length_not_including_first_buffer = 0; + vnet_buffer (bt)->sw_if_index[VLIB_TX] = (u32) ~0; vec_validate_aligned (dma_info->data.desc_data, pow2_mask (max_log2_ring_sz), CLIB_CACHE_LINE_BYTES); diff --git a/src/plugins/memif/node.c b/src/plugins/memif/node.c index d483f92b2fe..08b248df534 100644 --- a/src/plugins/memif/node.c +++ b/src/plugins/memif/node.c @@ -504,7 +504,7 @@ memif_device_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node, u32 n_left_to_next; u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; vlib_buffer_t *buffer_ptrs[MEMIF_RX_VECTOR_SZ]; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; memif_per_thread_data_t *ptd = vec_elt_at_index (mm->per_thread_data, thread_index); u16 cur_slot, ring_size, n_slots, mask; @@ -763,7 +763,7 @@ memif_device_input_zc_inline (vlib_main_t *vm, vlib_node_runtime_t *node, u16 slot, s0; memif_desc_t *d0; vlib_buffer_t *b0, *b1, *b2, *b3; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; memif_per_thread_data_t *ptd = vec_elt_at_index (mm->per_thread_data, thread_index); u16 cur_slot, last_slot, ring_size, n_slots, mask, head; @@ -1061,7 +1061,7 @@ CLIB_MARCH_FN (memif_dma_completion_cb, void, vlib_main_t *vm, { memif_main_t *mm = &memif_main; memif_if_t *mif = vec_elt_at_index (mm->interfaces, b->cookie >> 16); - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; u32 n_left_to_next = 0; u16 nexts[MEMIF_RX_VECTOR_SZ], *next; u32 _to_next_bufs[MEMIF_RX_VECTOR_SZ], *to_next_bufs = _to_next_bufs, *bi; diff --git a/src/plugins/memif/private.h b/src/plugins/memif/private.h index 43455d00522..af82a8bfaa3 100644 --- a/src/plugins/memif/private.h +++ b/src/plugins/memif/private.h @@ -76,7 +76,7 @@ #define memif_file_del(a) \ do \ { \ - memif_log_debug (0, "clib_file_del idx %u", a - file_main.file_pool); \ + memif_log_debug (0, "clib_file_del idx %u", (a)->index); \ clib_file_del (&file_main, a); \ } \ while (0) diff --git a/src/plugins/memif/socket.c b/src/plugins/memif/socket.c index 001f26f13ef..c2b11fc2ecb 100644 --- a/src/plugins/memif/socket.c +++ b/src/plugins/memif/socket.c @@ -30,7 +30,7 @@ #include <limits.h> #include <vlib/vlib.h> -#include <vlib/unix/unix.h> +#include <vlib/file.h> #include <vnet/plugin/plugin.h> #include <vnet/ethernet/ethernet.h> #include <vpp/app/version.h> diff --git a/src/plugins/nat/det44/det44.h b/src/plugins/nat/det44/det44.h index e576bfb65e8..683f554f03c 100644 --- a/src/plugins/nat/det44/det44.h +++ b/src/plugins/nat/det44/det44.h @@ -38,7 +38,6 @@ #include <vnet/ip/reass/ip4_sv_reass.h> #include <nat/lib/lib.h> -#include <nat/lib/inlines.h> #include <nat/lib/ipfix_logging.h> #include <nat/lib/nat_proto.h> diff --git a/src/plugins/nat/det44/det44_in2out.c b/src/plugins/nat/det44/det44_in2out.c index 3f5e05a064c..39a9ecabac7 100644 --- a/src/plugins/nat/det44/det44_in2out.c +++ b/src/plugins/nat/det44/det44_in2out.c @@ -21,6 +21,7 @@ #include <vlib/vlib.h> #include <vnet/vnet.h> #include <vnet/ip/ip.h> +#include <vnet/ip/ip4_to_ip6.h> #include <vnet/fib/ip4_fib.h> #include <vppinfra/error.h> #include <vppinfra/elog.h> @@ -29,7 +30,6 @@ #include <nat/det44/det44_inlines.h> #include <nat/lib/lib.h> -#include <nat/lib/inlines.h> #include <nat/lib/nat_inlines.h> typedef enum diff --git a/src/plugins/nat/det44/det44_out2in.c b/src/plugins/nat/det44/det44_out2in.c index ab6acd4f8e9..dd89606ff10 100644 --- a/src/plugins/nat/det44/det44_out2in.c +++ b/src/plugins/nat/det44/det44_out2in.c @@ -21,6 +21,7 @@ #include <vlib/vlib.h> #include <vnet/vnet.h> #include <vnet/ip/ip.h> +#include <vnet/ip/ip4_to_ip6.h> #include <vnet/fib/ip4_fib.h> #include <vppinfra/error.h> #include <vppinfra/elog.h> @@ -29,7 +30,6 @@ #include <nat/det44/det44_inlines.h> #include <nat/lib/lib.h> -#include <nat/lib/inlines.h> #include <nat/lib/nat_inlines.h> typedef enum diff --git a/src/plugins/nat/dslite/dslite.h b/src/plugins/nat/dslite/dslite.h index f05670c9bf5..979afb476b7 100644 --- a/src/plugins/nat/dslite/dslite.h +++ b/src/plugins/nat/dslite/dslite.h @@ -22,7 +22,6 @@ #include <nat/lib/lib.h> #include <nat/lib/alloc.h> -#include <nat/lib/inlines.h> typedef struct { diff --git a/src/plugins/nat/dslite/dslite_in2out.c b/src/plugins/nat/dslite/dslite_in2out.c index 522c3cf4123..806969f5f4d 100644 --- a/src/plugins/nat/dslite/dslite_in2out.c +++ b/src/plugins/nat/dslite/dslite_in2out.c @@ -12,6 +12,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include <vnet/ip/ip4_to_ip6.h> #include <nat/dslite/dslite.h> #include <nat/lib/nat_syslog.h> diff --git a/src/plugins/nat/dslite/dslite_out2in.c b/src/plugins/nat/dslite/dslite_out2in.c index 531bbb468bb..9ec48d458e5 100644 --- a/src/plugins/nat/dslite/dslite_out2in.c +++ b/src/plugins/nat/dslite/dslite_out2in.c @@ -12,6 +12,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include <vnet/ip/ip4_to_ip6.h> #include <nat/dslite/dslite.h> typedef enum diff --git a/src/plugins/nat/lib/ipfix_logging.c b/src/plugins/nat/lib/ipfix_logging.c index 593fa09f7e2..f569ccd1918 100644 --- a/src/plugins/nat/lib/ipfix_logging.c +++ b/src/plugins/nat/lib/ipfix_logging.c @@ -22,7 +22,6 @@ #include <vlibmemory/api.h> #include <vppinfra/atomics.h> #include <nat/lib/ipfix_logging.h> -#include <nat/lib/inlines.h> vlib_node_registration_t nat_ipfix_flush_node; nat_ipfix_logging_main_t nat_ipfix_logging_main; diff --git a/src/plugins/nat/lib/nat_syslog.c b/src/plugins/nat/lib/nat_syslog.c index 98777ebf280..93756a561bc 100644 --- a/src/plugins/nat/lib/nat_syslog.c +++ b/src/plugins/nat/lib/nat_syslog.c @@ -21,7 +21,6 @@ #include <vnet/syslog/syslog.h> #include <nat/lib/nat_syslog.h> -#include <nat/lib/inlines.h> #include <nat/lib/nat_syslog_constants.h> diff --git a/src/plugins/nat/nat44-ed/nat44_ed.h b/src/plugins/nat/nat44-ed/nat44_ed.h index 706511475cf..c3a959b0635 100644 --- a/src/plugins/nat/nat44-ed/nat44_ed.h +++ b/src/plugins/nat/nat44-ed/nat44_ed.h @@ -31,7 +31,6 @@ #include <vlibapi/api.h> #include <nat/lib/lib.h> -#include <nat/lib/inlines.h> /* default number of worker handoff frame queue elements */ #define NAT_FQ_NELTS_DEFAULT 64 diff --git a/src/plugins/nat/nat44-ed/nat44_ed_inlines.h b/src/plugins/nat/nat44-ed/nat44_ed_inlines.h index 04e5236b7f9..8cd93f263c6 100644 --- a/src/plugins/nat/nat44-ed/nat44_ed_inlines.h +++ b/src/plugins/nat/nat44-ed/nat44_ed_inlines.h @@ -27,6 +27,7 @@ #include <nat/lib/log.h> #include <nat/lib/ipfix_logging.h> #include <nat/nat44-ed/nat44_ed.h> +#include <vnet/ip/ip4_to_ip6.h> always_inline void init_ed_k (clib_bihash_kv_16_8_t *kv, u32 l_addr, u16 l_port, u32 r_addr, diff --git a/src/plugins/nat/nat44-ei/nat44_ei.c b/src/plugins/nat/nat44-ei/nat44_ei.c index e16625a2946..d1959f72ae7 100644 --- a/src/plugins/nat/nat44-ei/nat44_ei.c +++ b/src/plugins/nat/nat44-ei/nat44_ei.c @@ -21,6 +21,7 @@ #include <vnet/vnet.h> #include <vnet/ip/ip.h> #include <vnet/ip/ip4.h> +#include <vnet/ip/ip4_to_ip6.h> #include <vnet/ip/ip_table.h> #include <vnet/ip/reass/ip4_sv_reass.h> #include <vnet/fib/fib_table.h> diff --git a/src/plugins/nat/nat44-ei/nat44_ei.h b/src/plugins/nat/nat44-ei/nat44_ei.h index b4aa0f26c0b..786fb0cfc2c 100644 --- a/src/plugins/nat/nat44-ei/nat44_ei.h +++ b/src/plugins/nat/nat44-ei/nat44_ei.h @@ -35,7 +35,6 @@ #include <vppinfra/hash.h> #include <nat/lib/lib.h> -#include <nat/lib/inlines.h> #include <nat/lib/nat_proto.h> /* default number of worker handoff frame queue elements */ diff --git a/src/plugins/nat/nat44-ei/nat44_ei_in2out.c b/src/plugins/nat/nat44-ei/nat44_ei_in2out.c index 3b981d69986..2fbf2832d5e 100644 --- a/src/plugins/nat/nat44-ei/nat44_ei_in2out.c +++ b/src/plugins/nat/nat44-ei/nat44_ei_in2out.c @@ -21,6 +21,7 @@ #include <vnet/vnet.h> #include <vnet/ip/ip.h> +#include <vnet/ip/ip4_to_ip6.h> #include <vnet/ethernet/ethernet.h> #include <vnet/udp/udp_local.h> #include <vnet/fib/ip4_fib.h> diff --git a/src/plugins/nat/nat44-ei/nat44_ei_out2in.c b/src/plugins/nat/nat44-ei/nat44_ei_out2in.c index 5d91cb04f7c..805a6962868 100644 --- a/src/plugins/nat/nat44-ei/nat44_ei_out2in.c +++ b/src/plugins/nat/nat44-ei/nat44_ei_out2in.c @@ -21,6 +21,7 @@ #include <vnet/vnet.h> #include <vnet/ip/ip.h> +#include <vnet/ip/ip4_to_ip6.h> #include <vnet/ethernet/ethernet.h> #include <vnet/udp/udp_local.h> #include <vnet/fib/ip4_fib.h> diff --git a/src/plugins/nat/nat64/nat64.c b/src/plugins/nat/nat64/nat64.c index 950eea60e5e..c59cfbbbd55 100644 --- a/src/plugins/nat/nat64/nat64.c +++ b/src/plugins/nat/nat64/nat64.c @@ -15,6 +15,7 @@ #include <vppinfra/crc32.h> #include <vnet/fib/ip4_fib.h> +#include <vnet/ip/ip4_to_ip6.h> #include <vnet/ip/reass/ip4_sv_reass.h> #include <vnet/ip/reass/ip6_sv_reass.h> diff --git a/src/plugins/nat/nat64/nat64.h b/src/plugins/nat/nat64/nat64.h index 9eb8d915390..2577880c7a4 100644 --- a/src/plugins/nat/nat64/nat64.h +++ b/src/plugins/nat/nat64/nat64.h @@ -30,7 +30,6 @@ #include <vnet/ip/reass/ip4_sv_reass.h> #include <nat/lib/lib.h> -#include <nat/lib/inlines.h> #include <nat/lib/nat_inlines.h> #include <nat/nat64/nat64_db.h> diff --git a/src/plugins/nat/nat64/nat64_db.c b/src/plugins/nat/nat64/nat64_db.c index e4e9febcb12..6ba77c58965 100644 --- a/src/plugins/nat/nat64/nat64_db.c +++ b/src/plugins/nat/nat64/nat64_db.c @@ -16,7 +16,6 @@ #include <vnet/fib/fib_table.h> #include <nat/lib/ipfix_logging.h> #include <nat/lib/nat_syslog.h> -#include <nat/lib/inlines.h> #include <nat/nat64/nat64_db.h> int diff --git a/src/plugins/nat/pnat/pnat.api b/src/plugins/nat/pnat/pnat.api index de555c41412..82c2de49682 100644 --- a/src/plugins/nat/pnat/pnat.api +++ b/src/plugins/nat/pnat/pnat.api @@ -165,6 +165,23 @@ define pnat_interfaces_details vl_api_pnat_mask_t lookup_mask[2]; /* PNAT_ATTACHMENT_POINT_MAX */ }; + +autoendian define pnat_flow_lookup +{ + u32 client_index; + u32 context; + vl_api_interface_index_t sw_if_index; + vl_api_pnat_attachment_point_t attachment; + vl_api_pnat_match_tuple_t match; +}; + +autoendian define pnat_flow_lookup_reply +{ + u32 context; + i32 retval; + u32 binding_index; +}; + counters pnat { none { severity info; diff --git a/src/plugins/nat/pnat/pnat_api.c b/src/plugins/nat/pnat/pnat_api.c index a4e7ff192bf..f627307628d 100644 --- a/src/plugins/nat/pnat/pnat_api.c +++ b/src/plugins/nat/pnat/pnat_api.c @@ -93,6 +93,20 @@ static void vl_api_pnat_binding_del_t_handler(vl_api_pnat_binding_del_t *mp) { REPLY_MACRO_END(VL_API_PNAT_BINDING_DEL_REPLY); } +static void vl_api_pnat_flow_lookup_t_handler(vl_api_pnat_flow_lookup_t *mp) { + pnat_main_t *pm = &pnat_main; + vl_api_pnat_flow_lookup_reply_t *rmp; + u32 binding_index; + int rv = 0; + binding_index = + pnat_flow_lookup(mp->sw_if_index, mp->attachment, &mp->match); + if (binding_index == ~0) { + rv = -1; + } + REPLY_MACRO2_END(VL_API_PNAT_FLOW_LOOKUP_REPLY, + ({ rmp->binding_index = binding_index; })); +} + /* * Workaround for a bug in vppapigen that doesn't register the endian handler * for _details messages. When that's fixed it should be possible to use diff --git a/src/plugins/netmap/netmap.c b/src/plugins/netmap/netmap.c index ebef215eb3b..0d92d03151c 100644 --- a/src/plugins/netmap/netmap.c +++ b/src/plugins/netmap/netmap.c @@ -22,7 +22,7 @@ #include <fcntl.h> #include <vlib/vlib.h> -#include <vlib/unix/unix.h> +#include <vlib/file.h> #include <vnet/ethernet/ethernet.h> #include <netmap/net_netmap.h> @@ -53,7 +53,7 @@ close_netmap_if (netmap_main_t * nm, netmap_if_t * nif) { if (nif->clib_file_index != ~0) { - clib_file_del (&file_main, file_main.file_pool + nif->clib_file_index); + clib_file_del_by_index (&file_main, nif->clib_file_index); nif->clib_file_index = ~0; } else if (nif->fd > -1) diff --git a/src/plugins/netmap/node.c b/src/plugins/netmap/node.c index 6169847fa79..85e7db5808b 100644 --- a/src/plugins/netmap/node.c +++ b/src/plugins/netmap/node.c @@ -98,7 +98,7 @@ netmap_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, u32 n_free_bufs; struct netmap_ring *ring; int cur_ring; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; u32 n_buffer_bytes = vlib_buffer_get_default_data_size (vm); if (nif->per_interface_next_index != ~0) @@ -255,7 +255,7 @@ VLIB_NODE_FN (netmap_input_node) (vlib_main_t * vm, { int i; u32 n_rx_packets = 0; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; netmap_main_t *nm = &netmap_main; netmap_if_t *nmi; diff --git a/src/plugins/nsh/nsh.c b/src/plugins/nsh/nsh.c index a2c24e27b26..06dd45be944 100644 --- a/src/plugins/nsh/nsh.c +++ b/src/plugins/nsh/nsh.c @@ -20,7 +20,7 @@ #include <nsh/nsh.h> #include <gre/gre.h> #include <vxlan/vxlan.h> -#include <vnet/vxlan-gpe/vxlan_gpe.h> +#include <plugins/vxlan-gpe/vxlan_gpe.h> #include <vnet/l2/l2_classify.h> #include <vnet/adj/adj.h> #include <vpp/app/version.h> @@ -182,7 +182,8 @@ nsh_md2_set_next_ioam_export_override (uword next) clib_error_t * nsh_init (vlib_main_t * vm) { - vlib_node_t *node, *gre4_input, *gre6_input; + vlib_node_t *node, *gre4_input, *gre6_input, *vxlan4_gpe_input, + *vxlan6_gpe_input; nsh_main_t *nm = &nsh_main; clib_error_t *error = 0; uword next_node; @@ -222,20 +223,24 @@ nsh_init (vlib_main_t * vm) /* Add dispositions to nodes that feed nsh-input */ //alagalah - validate we don't really need to use the node value + vxlan4_gpe_input = vlib_get_node_by_name (vm, (u8 *) "vxlan4-gpe-input"); + vxlan6_gpe_input = vlib_get_node_by_name (vm, (u8 *) "vxlan6-gpe-input"); + nm->vgm = vlib_get_plugin_symbol ("vxlan-gpe_plugin.so", "vxlan_gpe_main"); + if (vxlan4_gpe_input == 0 || vxlan6_gpe_input == 0 || nm->vgm == 0) + { + error = clib_error_return (0, "vxlan_gpe_plugin.so is not loaded"); + return error; + } next_node = - vlib_node_add_next (vm, vxlan4_gpe_input_node.index, - nm->nsh_input_node_index); - vlib_node_add_next (vm, vxlan4_gpe_input_node.index, - nm->nsh_proxy_node_index); - vlib_node_add_next (vm, vxlan4_gpe_input_node.index, + vlib_node_add_next (vm, vxlan4_gpe_input->index, nm->nsh_input_node_index); + vlib_node_add_next (vm, vxlan4_gpe_input->index, nm->nsh_proxy_node_index); + vlib_node_add_next (vm, vxlan4_gpe_input->index, nsh_aware_vnf_proxy_node.index); - vxlan_gpe_register_decap_protocol (VXLAN_GPE_PROTOCOL_NSH, next_node); + nm->vgm->register_decap_protocol (VXLAN_GPE_PROTOCOL_NSH, next_node); - vlib_node_add_next (vm, vxlan6_gpe_input_node.index, - nm->nsh_input_node_index); - vlib_node_add_next (vm, vxlan6_gpe_input_node.index, - nm->nsh_proxy_node_index); - vlib_node_add_next (vm, vxlan6_gpe_input_node.index, + vlib_node_add_next (vm, vxlan6_gpe_input->index, nm->nsh_input_node_index); + vlib_node_add_next (vm, vxlan6_gpe_input->index, nm->nsh_proxy_node_index); + vlib_node_add_next (vm, vxlan6_gpe_input->index, nsh_aware_vnf_proxy_node.index); gre4_input = vlib_get_node_by_name (vm, (u8 *) "gre4-input"); @@ -280,7 +285,9 @@ nsh_init (vlib_main_t * vm) return error; } -VLIB_INIT_FUNCTION (nsh_init); +VLIB_INIT_FUNCTION (nsh_init) = { + .runs_after = VLIB_INITS ("vxlan_gpe_init"), +}; VLIB_PLUGIN_REGISTER () = { .version = VPP_BUILD_VER, diff --git a/src/plugins/nsh/nsh.h b/src/plugins/nsh/nsh.h index 86a9a7e95c3..c408ddb99a2 100644 --- a/src/plugins/nsh/nsh.h +++ b/src/plugins/nsh/nsh.h @@ -18,6 +18,7 @@ #include <vnet/vnet.h> #include <nsh/nsh_packet.h> #include <vnet/ip/ip4_packet.h> +#include <plugins/vxlan-gpe/vxlan_gpe.h> typedef struct { u16 class; @@ -166,6 +167,10 @@ typedef struct { /* convenience */ vlib_main_t * vlib_main; vnet_main_t * vnet_main; + + /* vxlan gpe plugin */ + vxlan_gpe_main_t *vgm; + } nsh_main_t; extern nsh_main_t nsh_main; diff --git a/src/plugins/nsh/nsh_pop.c b/src/plugins/nsh/nsh_pop.c index 8de319e158b..d66cfc9de27 100644 --- a/src/plugins/nsh/nsh_pop.c +++ b/src/plugins/nsh/nsh_pop.c @@ -19,7 +19,7 @@ #include <vnet/plugin/plugin.h> #include <nsh/nsh.h> #include <vnet/gre/packet.h> -#include <vnet/vxlan-gpe/vxlan_gpe.h> +#include <plugins/vxlan-gpe/vxlan_gpe.h> #include <vnet/l2/l2_classify.h> #include <vlibapi/api.h> diff --git a/src/plugins/ping/ping_api.c b/src/plugins/ping/ping_api.c index 5578fa560f2..a5af1033d0e 100644 --- a/src/plugins/ping/ping_api.c +++ b/src/plugins/ping/ping_api.c @@ -122,16 +122,22 @@ vl_api_want_ping_finished_events_t_handler ( while ((sleep_interval = time_ping_sent + ping_interval - vlib_time_now (vm)) > 0.0) { - uword event_type; + uword event_count; vlib_process_wait_for_event_or_clock (vm, sleep_interval); - event_type = vlib_process_get_events (vm, 0); - if (event_type == ~0) + if (dst_addr.version == AF_IP4) + event_count = + vlib_process_get_events_with_type (vm, 0, PING_RESPONSE_IP4); + else if (dst_addr.version == AF_IP6) + event_count = + vlib_process_get_events_with_type (vm, 0, PING_RESPONSE_IP6); + else break; - if (event_type == PING_RESPONSE_IP4 || - event_type == PING_RESPONSE_IP6) - reply_count += 1; + if (event_count == 0) + break; + + reply_count += 1; } } diff --git a/src/plugins/pppoe/pppoe_cp_node.c b/src/plugins/pppoe/pppoe_cp_node.c index c96559679f0..efafc448f98 100644 --- a/src/plugins/pppoe/pppoe_cp_node.c +++ b/src/plugins/pppoe/pppoe_cp_node.c @@ -73,7 +73,7 @@ VLIB_NODE_FN (pppoe_cp_dispatch_node) (vlib_main_t * vm, vnet_main_t * vnm = pem->vnet_main; vnet_interface_main_t * im = &vnm->interface_main; u32 pkts_decapsulated = 0; - u32 thread_index = vlib_get_thread_index(); + clib_thread_index_t thread_index = vlib_get_thread_index (); u32 stats_sw_if_index, stats_n_packets, stats_n_bytes; pppoe_entry_key_t cached_key; pppoe_entry_result_t cached_result; diff --git a/src/plugins/pppoe/pppoe_decap.c b/src/plugins/pppoe/pppoe_decap.c index 854364b1aca..17fbeaabb43 100644 --- a/src/plugins/pppoe/pppoe_decap.c +++ b/src/plugins/pppoe/pppoe_decap.c @@ -54,7 +54,7 @@ VLIB_NODE_FN (pppoe_input_node) (vlib_main_t * vm, vnet_main_t * vnm = pem->vnet_main; vnet_interface_main_t * im = &vnm->interface_main; u32 pkts_decapsulated = 0; - u32 thread_index = vlib_get_thread_index(); + clib_thread_index_t thread_index = vlib_get_thread_index (); u32 stats_sw_if_index, stats_n_packets, stats_n_bytes; pppoe_entry_key_t cached_key; pppoe_entry_result_t cached_result; diff --git a/src/plugins/prom/prom.c b/src/plugins/prom/prom.c index 475e98b1038..0ddc96f7474 100644 --- a/src/plugins/prom/prom.c +++ b/src/plugins/prom/prom.c @@ -382,13 +382,16 @@ prom_stat_segment_client_init (void) stat_segment_adjust (scm, (void *) scm->shared_header->directory_vector); } -void +clib_error_t * prom_enable (vlib_main_t *vm) { prom_main_t *pm = &prom_main; pm->register_url = vlib_get_plugin_symbol ("http_static_plugin.so", "hss_register_url_handler"); + if (pm->register_url == 0) + return clib_error_return (0, "http_static_plugin.so not loaded"); + pm->send_data = vlib_get_plugin_symbol ("http_static_plugin.so", "hss_session_send_data"); pm->register_url (prom_stats_dump, "stats.prom", HTTP_REQ_GET); @@ -400,6 +403,8 @@ prom_enable (vlib_main_t *vm) prom_scraper_process_enable (vm); prom_stat_segment_client_init (); + + return 0; } static clib_error_t * diff --git a/src/plugins/prom/prom.h b/src/plugins/prom/prom.h index 898e4c209d1..a06302c1ff9 100644 --- a/src/plugins/prom/prom.h +++ b/src/plugins/prom/prom.h @@ -44,7 +44,7 @@ typedef enum prom_process_evt_codes_ PROM_SCRAPER_EVT_RUN, } prom_process_evt_codes_t; -void prom_enable (vlib_main_t *vm); +clib_error_t *prom_enable (vlib_main_t *vm); prom_main_t *prom_get_main (void); void prom_stat_patterns_set (u8 **patterns); diff --git a/src/plugins/prom/prom_cli.c b/src/plugins/prom/prom_cli.c index 705e54ac1b8..09407d46235 100644 --- a/src/plugins/prom/prom_cli.c +++ b/src/plugins/prom/prom_cli.c @@ -131,7 +131,7 @@ prom_command_fn (vlib_main_t *vm, unformat_input_t *input, no_input: if (is_enable && !pm->is_enabled) - prom_enable (vm); + return prom_enable (vm); return 0; } diff --git a/src/plugins/pvti/input.c b/src/plugins/pvti/input.c index 6a8806e2795..d7727153aa7 100644 --- a/src/plugins/pvti/input.c +++ b/src/plugins/pvti/input.c @@ -124,7 +124,7 @@ pvti_input_node_common (vlib_main_t *vm, vlib_node_runtime_t *node, pvti_main_t *pvm = &pvti_main; - u32 thread_index = vlib_get_thread_index (); + clib_thread_index_t thread_index = vlib_get_thread_index (); pvti_per_thread_data_t *ptd = vec_elt_at_index (pvm->per_thread_data[is_ip6], thread_index); diff --git a/src/plugins/pvti/output.c b/src/plugins/pvti/output.c index 1939c6f585a..5fb6263795e 100644 --- a/src/plugins/pvti/output.c +++ b/src/plugins/pvti/output.c @@ -340,7 +340,7 @@ pvti_output_node_common (vlib_main_t *vm, vlib_node_runtime_t *node, u8 stream_index = pvti_get_stream_index (is_ip6); - u32 thread_index = vlib_get_thread_index (); + clib_thread_index_t thread_index = vlib_get_thread_index (); pvti_per_thread_data_t *ptd = vec_elt_at_index (pvm->per_thread_data[is_ip6], thread_index); diff --git a/src/plugins/pvti/pvti.h b/src/plugins/pvti/pvti.h index ac097c5ecca..608610362d7 100644 --- a/src/plugins/pvti/pvti.h +++ b/src/plugins/pvti/pvti.h @@ -223,7 +223,7 @@ extern vlib_node_registration_t pvti_periodic_node; always_inline u8 pvti_get_stream_index (int is_ip6) { - u32 thread_index = vlib_get_thread_index (); + clib_thread_index_t thread_index = vlib_get_thread_index (); ASSERT ((thread_index & 0xffffff80) == 0); diff --git a/src/plugins/quic/quic.c b/src/plugins/quic/quic.c index 10651f10e7e..77d35634fa9 100644 --- a/src/plugins/quic/quic.c +++ b/src/plugins/quic/quic.c @@ -51,7 +51,8 @@ static void quic_update_timer (quic_ctx_t * ctx); static void quic_check_quic_session_connected (quic_ctx_t * ctx); static int quic_reset_connection (u64 udp_session_handle, quic_rx_packet_ctx_t * pctx); -static void quic_proto_on_close (u32 ctx_index, u32 thread_index); +static void quic_proto_on_close (u32 ctx_index, + clib_thread_index_t thread_index); static quicly_stream_open_t on_stream_open; static quicly_closed_by_remote_t on_closed_by_remote; @@ -133,7 +134,7 @@ quic_crypto_context_alloc (u8 thread_index) } static crypto_context_t * -quic_crypto_context_get (u32 cr_index, u32 thread_index) +quic_crypto_context_get (u32 cr_index, clib_thread_index_t thread_index) { quic_main_t *qm = &quic_main; ASSERT (cr_index >> 24 == thread_index); @@ -381,7 +382,7 @@ error: /* Helper functions */ static u32 -quic_ctx_alloc (u32 thread_index) +quic_ctx_alloc (clib_thread_index_t thread_index) { quic_main_t *qm = &quic_main; quic_ctx_t *ctx; @@ -401,7 +402,7 @@ static void quic_ctx_free (quic_ctx_t * ctx) { QUIC_DBG (2, "Free ctx %u %x", ctx->c_thread_index, ctx->c_c_index); - u32 thread_index = ctx->c_thread_index; + clib_thread_index_t thread_index = ctx->c_thread_index; QUIC_ASSERT (ctx->timer_handle == QUIC_TIMER_HANDLE_INVALID); if (CLIB_DEBUG) clib_memset (ctx, 0xfb, sizeof (*ctx)); @@ -409,13 +410,13 @@ quic_ctx_free (quic_ctx_t * ctx) } static quic_ctx_t * -quic_ctx_get (u32 ctx_index, u32 thread_index) +quic_ctx_get (u32 ctx_index, clib_thread_index_t thread_index) { return pool_elt_at_index (quic_main.ctx_pool[thread_index], ctx_index); } static quic_ctx_t * -quic_ctx_get_if_valid (u32 ctx_index, u32 thread_index) +quic_ctx_get_if_valid (u32 ctx_index, clib_thread_index_t thread_index) { if (pool_is_free_index (quic_main.ctx_pool[thread_index], ctx_index)) return 0; @@ -1100,7 +1101,7 @@ quic_get_time (quicly_now_t * self) } static u32 -quic_set_time_now (u32 thread_index) +quic_set_time_now (clib_thread_index_t thread_index) { vlib_main_t *vlib_main = vlib_get_main (); f64 time = vlib_time_now (vlib_main); @@ -1396,7 +1397,7 @@ quic_connect (transport_endpoint_cfg_t * tep) } static void -quic_proto_on_close (u32 ctx_index, u32 thread_index) +quic_proto_on_close (u32 ctx_index, clib_thread_index_t thread_index) { int err; quic_ctx_t *ctx = quic_ctx_get_if_valid (ctx_index, thread_index); @@ -1548,7 +1549,7 @@ quic_stop_listen (u32 lctx_index) } static transport_connection_t * -quic_connection_get (u32 ctx_index, u32 thread_index) +quic_connection_get (u32 ctx_index, clib_thread_index_t thread_index) { quic_ctx_t *ctx; ctx = quic_ctx_get (ctx_index, thread_index); @@ -1600,7 +1601,7 @@ static u8 * format_quic_connection (u8 * s, va_list * args) { u32 qc_index = va_arg (*args, u32); - u32 thread_index = va_arg (*args, u32); + clib_thread_index_t thread_index = va_arg (*args, u32); u32 verbose = va_arg (*args, u32); quic_ctx_t *ctx = quic_ctx_get (qc_index, thread_index); s = format (s, "%U", format_quic_ctx, ctx, verbose); @@ -1611,7 +1612,7 @@ static u8 * format_quic_half_open (u8 * s, va_list * args) { u32 qc_index = va_arg (*args, u32); - u32 thread_index = va_arg (*args, u32); + clib_thread_index_t thread_index = va_arg (*args, u32); quic_ctx_t *ctx = quic_ctx_get (qc_index, thread_index); s = format (s, "[#%d][Q] half-open app %u", thread_index, ctx->parent_app_id); @@ -1623,7 +1624,7 @@ static u8 * format_quic_listener (u8 * s, va_list * args) { u32 tci = va_arg (*args, u32); - u32 thread_index = va_arg (*args, u32); + clib_thread_index_t thread_index = va_arg (*args, u32); u32 verbose = va_arg (*args, u32); quic_ctx_t *ctx = quic_ctx_get (tci, thread_index); s = format (s, "%U", format_quic_ctx, ctx, verbose); @@ -1660,7 +1661,7 @@ quic_on_quic_session_connected (quic_ctx_t * ctx) session_t *quic_session; app_worker_t *app_wrk; u32 ctx_id = ctx->c_c_index; - u32 thread_index = ctx->c_thread_index; + clib_thread_index_t thread_index = ctx->c_thread_index; int rv; quic_session = session_alloc (thread_index); @@ -1775,7 +1776,7 @@ static void quic_transfer_connection (u32 ctx_index, u32 dest_thread) { quic_ctx_t *ctx, *temp_ctx; - u32 thread_index = vlib_get_thread_index (); + clib_thread_index_t thread_index = vlib_get_thread_index (); QUIC_DBG (2, "Transferring conn %u to thread %u", ctx_index, dest_thread); @@ -1811,7 +1812,7 @@ quic_udp_session_connected_callback (u32 quic_app_index, u32 ctx_index, app_worker_t *app_wrk; quicly_conn_t *conn; quic_ctx_t *ctx; - u32 thread_index; + clib_thread_index_t thread_index; int ret; quicly_context_t *quicly_ctx; @@ -1918,7 +1919,7 @@ quic_udp_session_accepted_callback (session_t * udp_session) u32 ctx_index; quic_ctx_t *ctx, *lctx; session_t *udp_listen_session; - u32 thread_index = vlib_get_thread_index (); + clib_thread_index_t thread_index = vlib_get_thread_index (); udp_listen_session = listen_session_get_from_handle (udp_session->listener_handle); @@ -2199,7 +2200,7 @@ quic_process_one_rx_packet (u64 udp_session_handle, svm_fifo_t * f, { size_t plen; u32 full_len, ret; - u32 thread_index = vlib_get_thread_index (); + clib_thread_index_t thread_index = vlib_get_thread_index (); u32 cur_deq = svm_fifo_max_dequeue (f) - fifo_offset; quicly_context_t *quicly_ctx; session_t *udp_session; @@ -2281,7 +2282,7 @@ quic_udp_session_rx_callback (session_t * udp_session) u32 max_deq; u64 udp_session_handle = session_handle (udp_session); int rv = 0; - u32 thread_index = vlib_get_thread_index (); + clib_thread_index_t thread_index = vlib_get_thread_index (); u32 cur_deq, fifo_offset, max_packets, i; quic_rx_packet_ctx_t packets_ctx[QUIC_RCV_MAX_PACKETS]; @@ -2306,7 +2307,7 @@ rx_start: #endif for (i = 0; i < max_packets; i++) { - packets_ctx[i].thread_index = UINT32_MAX; + packets_ctx[i].thread_index = CLIB_INVALID_THREAD_INDEX; packets_ctx[i].ctx_index = UINT32_MAX; packets_ctx[i].ptype = QUIC_PACKET_TYPE_DROP; @@ -2421,8 +2422,8 @@ quic_get_transport_listener_endpoint (u32 listener_index, } static void -quic_get_transport_endpoint (u32 ctx_index, u32 thread_index, - transport_endpoint_t * tep, u8 is_lcl) +quic_get_transport_endpoint (u32 ctx_index, clib_thread_index_t thread_index, + transport_endpoint_t *tep, u8 is_lcl) { quic_ctx_t *ctx; ctx = quic_ctx_get (ctx_index, thread_index); diff --git a/src/plugins/quic/quic.h b/src/plugins/quic/quic.h index 081bcb120e9..4474aa15e75 100644 --- a/src/plugins/quic/quic.h +++ b/src/plugins/quic/quic.h @@ -205,7 +205,7 @@ typedef struct quic_session_cache_ typedef struct quic_stream_data_ { u32 ctx_id; - u32 thread_index; + clib_thread_index_t thread_index; u32 app_rx_data_len; /**< bytes received, to be read by external app */ u32 app_tx_data_len; /**< bytes sent */ } quic_stream_data_t; @@ -232,7 +232,7 @@ typedef struct quic_rx_packet_ctx_ quicly_decoded_packet_t packet; u8 data[QUIC_MAX_PACKET_SIZE]; u32 ctx_index; - u32 thread_index; + clib_thread_index_t thread_index; union { struct sockaddr sa; diff --git a/src/plugins/quic/quic_crypto.c b/src/plugins/quic/quic_crypto.c index 9e2c915daaa..4e11eff2431 100644 --- a/src/plugins/quic/quic_crypto.c +++ b/src/plugins/quic/quic_crypto.c @@ -248,8 +248,7 @@ quic_crypto_decrypt_packet (quic_ctx_t *qctx, quic_rx_packet_ctx_t *pctx) pctx->packet.octets.len - aead_off, pn, pctx->packet.octets.base, aead_off)) == SIZE_MAX) { - fprintf (stderr, "%s: aead decryption failure (pn: %d)\n", __FUNCTION__, - pn); + fprintf (stderr, "%s: aead decryption failure (pn: %d)\n", __func__, pn); return; } @@ -349,8 +348,7 @@ quic_crypto_cipher_setup_crypto (ptls_cipher_context_t *_ctx, int is_enc, } else { - QUIC_DBG (1, "%s, Invalid crypto cipher : ", __FUNCTION__, - _ctx->algo->name); + QUIC_DBG (1, "%s, Invalid crypto cipher : ", __func__, _ctx->algo->name); assert (0); } @@ -405,8 +403,7 @@ quic_crypto_aead_setup_crypto (ptls_aead_context_t *_ctx, int is_enc, } else { - QUIC_DBG (1, "%s, invalied aead cipher %s", __FUNCTION__, - _ctx->algo->name); + QUIC_DBG (1, "%s, invalied aead cipher %s", __func__, _ctx->algo->name); assert (0); } diff --git a/src/plugins/rdma/device.c b/src/plugins/rdma/device.c index 8aeb586a42d..a4dbdb02831 100644 --- a/src/plugins/rdma/device.c +++ b/src/plugins/rdma/device.c @@ -23,7 +23,7 @@ #include <vppinfra/linux/sysfs.h> #include <vlib/vlib.h> -#include <vlib/unix/unix.h> +#include <vlib/file.h> #include <vlib/pci/pci.h> #include <vnet/ethernet/ethernet.h> #include <vnet/interface/rx_queue_funcs.h> diff --git a/src/plugins/sflow/CMakeLists.txt b/src/plugins/sflow/CMakeLists.txt index 35433bd24df..c966fcc4480 100644 --- a/src/plugins/sflow/CMakeLists.txt +++ b/src/plugins/sflow/CMakeLists.txt @@ -12,39 +12,23 @@ # See the License for the specific language governing permissions and # limitations under the License. -vpp_find_path(NETLINK_INCLUDE_DIR NAMES linux/netlink.h) -if (NOT NETLINK_INCLUDE_DIR) - message(WARNING "netlink headers not found - sflow plugin disabled") - return() -endif() - if ("${CMAKE_SYSTEM_NAME}" STREQUAL "FreeBSD") message(WARNING "sflow is not supported on FreeBSD - sflow plugin disabled") return() endif() -LIST(FIND excluded_plugins linux-cp exc_index) -if(${exc_index} EQUAL "-1") - message(WARNING "sflow plugin - linux-cp plugin included: compiling VAPI calls") - add_compile_definitions(SFLOW_USE_VAPI) -else() - message(WARNING "sflow plugin - linux-cp plugin excluded: not compiling VAPI calls") -endif() - -include_directories(${CMAKE_SOURCE_DIR}/vpp-api ${CMAKE_CURRENT_BINARY_DIR}/../../vpp-api) add_vpp_plugin(sflow SOURCES sflow.c node.c sflow_common.h sflow.h + sflow_dlapi.h sflow_psample.c sflow_psample.h sflow_psample_fields.h sflow_usersock.c sflow_usersock.h - sflow_vapi.c - sflow_vapi.h MULTIARCH_SOURCES node.c diff --git a/src/plugins/sflow/sflow.c b/src/plugins/sflow/sflow.c index 5aa65062330..14d07d69233 100644 --- a/src/plugins/sflow/sflow.c +++ b/src/plugins/sflow/sflow.c @@ -25,6 +25,7 @@ #include <sflow/sflow.api_enum.h> #include <sflow/sflow.api_types.h> #include <sflow/sflow_psample.h> +#include <sflow/sflow_dlapi.h> #include <vpp-api/client/stat_client.h> #include <vlib/stats/stats.h> @@ -181,8 +182,15 @@ retry: SFLOWUSSpec_setMsgType (&spec, SFLOW_VPP_MSG_IF_COUNTERS); SFLOWUSSpec_setAttr (&spec, SFLOW_VPP_ATTR_PORTNAME, hw->name, vec_len (hw->name)); - SFLOWUSSpec_setAttrInt (&spec, SFLOW_VPP_ATTR_IFINDEX, sfif->hw_if_index); - if (sfif->linux_if_index) + SFLOWUSSpec_setAttrInt (&spec, SFLOW_VPP_ATTR_IFINDEX, sfif->sw_if_index); + + if (smp->lcp_itf_pair_get_vif_index_by_phy) + { + sfif->linux_if_index = + (*smp->lcp_itf_pair_get_vif_index_by_phy) (sfif->sw_if_index); + } + + if (sfif->linux_if_index != INDEX_INVALID) { // We know the corresponding Linux ifIndex for this interface, so include // that here. @@ -238,7 +246,8 @@ total_drops (sflow_main_t *smp) { // sum sendmsg and worker-fifo drops u32 all_drops = smp->psample_send_drops; - for (u32 thread_index = 0; thread_index < smp->total_threads; thread_index++) + for (clib_thread_index_t thread_index = 0; thread_index < smp->total_threads; + thread_index++) { sflow_per_thread_data_t *sfwk = vec_elt_at_index (smp->per_thread_data, thread_index); @@ -313,8 +322,8 @@ read_worker_fifos (sflow_main_t *smp) for (; batch < SFLOW_READ_BATCH; batch++) { u32 psample_send = 0, psample_send_fail = 0; - for (u32 thread_index = 0; thread_index < smp->total_threads; - thread_index++) + for (clib_thread_index_t thread_index = 0; + thread_index < smp->total_threads; thread_index++) { sflow_per_thread_data_t *sfwk = vec_elt_at_index (smp->per_thread_data, thread_index); @@ -380,7 +389,8 @@ read_node_counters (sflow_main_t *smp, sflow_err_ctrs_t *ctrs) { for (u32 ec = 0; ec < SFLOW_N_ERROR; ec++) ctrs->counters[ec] = 0; - for (u32 thread_index = 0; thread_index < smp->total_threads; thread_index++) + for (clib_thread_index_t thread_index = 0; thread_index < smp->total_threads; + thread_index++) { sflow_per_thread_data_t *sfwk = vec_elt_at_index (smp->per_thread_data, thread_index); @@ -433,15 +443,6 @@ sflow_process_samples (vlib_main_t *vm, vlib_node_runtime_t *node, continue; } -#ifdef SFLOW_USE_VAPI -#ifdef SFLOW_TEST_HAMMER_VAPI - sflow_vapi_check_for_linux_if_index_results (&smp->vac, - smp->per_interface_data); - sflow_vapi_read_linux_if_index_numbers (&smp->vac, - smp->per_interface_data); -#endif -#endif - // PSAMPLE channel may need extra step (e.g. to learn family_id) // before it is ready to send EnumSFLOWPSState psState = SFLOWPS_state (&smp->sflow_psample); @@ -458,23 +459,6 @@ sflow_process_samples (vlib_main_t *vm, vlib_node_runtime_t *node, { // second rollover smp->now_mono_S = tnow_S; -#ifdef SFLOW_USE_VAPI - if (!smp->vac.vapi_unavailable) - { - // look up linux if_index numbers - sflow_vapi_check_for_linux_if_index_results ( - &smp->vac, smp->per_interface_data); - if (smp->vapi_requests == 0 || - (tnow_S % SFLOW_VAPI_POLL_INTERVAL) == 0) - { - if (sflow_vapi_read_linux_if_index_numbers ( - &smp->vac, smp->per_interface_data)) - { - smp->vapi_requests++; - } - } - } -#endif // send status info send_sampling_status_info (smp); // poll counters for interfaces that are due @@ -505,7 +489,8 @@ sflow_set_worker_sampling_state (sflow_main_t *smp) vlib_thread_main_t *tm = &vlib_thread_main; smp->total_threads = 1 + tm->n_threads; vec_validate (smp->per_thread_data, smp->total_threads); - for (u32 thread_index = 0; thread_index < smp->total_threads; thread_index++) + for (clib_thread_index_t thread_index = 0; thread_index < smp->total_threads; + thread_index++) { sflow_per_thread_data_t *sfwk = vec_elt_at_index (smp->per_thread_data, thread_index); @@ -539,11 +524,6 @@ sflow_sampling_start (sflow_main_t *smp) smp->psample_seq_egress = 0; smp->psample_send_drops = 0; -#ifdef SFLOW_USE_VAPI - // reset vapi request count so that we make a request the first time - smp->vapi_requests = 0; -#endif - /* open PSAMPLE netlink channel for writing packet samples */ SFLOWPS_open (&smp->sflow_psample); /* open USERSOCK netlink channel for writing counters */ @@ -1027,6 +1007,18 @@ sflow_init (vlib_main_t *vm) /* access to counters - TODO: should this only happen on sflow enable? */ sflow_stat_segment_client_init (); + + smp->lcp_itf_pair_get_vif_index_by_phy = + vlib_get_plugin_symbol (SFLOW_LCP_LIB, SFLOW_LCP_SYM_GET_VIF_BY_PHY); + if (smp->lcp_itf_pair_get_vif_index_by_phy) + { + SFLOW_NOTICE ("linux-cp found - using LIP vif_index, where available"); + } + else + { + SFLOW_NOTICE ("linux-cp not found - using VPP sw_if_index"); + } + return error; } diff --git a/src/plugins/sflow/sflow.h b/src/plugins/sflow/sflow.h index 609ff723816..0ec5ac90688 100644 --- a/src/plugins/sflow/sflow.h +++ b/src/plugins/sflow/sflow.h @@ -22,7 +22,6 @@ #include <vppinfra/hash.h> #include <vppinfra/error.h> #include <sflow/sflow_common.h> -#include <sflow/sflow_vapi.h> #include <sflow/sflow_psample.h> #include <sflow/sflow_usersock.h> @@ -124,6 +123,8 @@ typedef struct sflow_fifo_t fifo; } sflow_per_thread_data_t; +typedef u32 (*IfIndexLookupFn) (u32); + typedef struct { /* API message ID base */ @@ -164,12 +165,7 @@ typedef struct u32 csample_send; u32 csample_send_drops; u32 unixsock_seq; -#ifdef SFLOW_USE_VAPI - /* vapi query helper thread (transient) */ - CLIB_CACHE_LINE_ALIGN_MARK (_vapi); - sflow_vapi_client_t vac; - int vapi_requests; -#endif + IfIndexLookupFn lcp_itf_pair_get_vif_index_by_phy; } sflow_main_t; extern sflow_main_t sflow_main; diff --git a/src/plugins/sflow/sflow_common.h b/src/plugins/sflow/sflow_common.h index 29784638bb9..26f306b5741 100644 --- a/src/plugins/sflow/sflow_common.h +++ b/src/plugins/sflow/sflow_common.h @@ -15,8 +15,6 @@ #ifndef __included_sflow_common_h__ #define __included_sflow_common_h__ -// #define SFLOW_USE_VAPI (set by CMakeLists.txt) - extern vlib_log_class_t sflow_logger; #define SFLOW_DBG(...) vlib_log_debug (sflow_logger, __VA_ARGS__); #define SFLOW_INFO(...) vlib_log_info (sflow_logger, __VA_ARGS__); diff --git a/src/plugins/nat/lib/inlines.h b/src/plugins/sflow/sflow_dlapi.h index 24e3ba83a5b..e983bc8f6fe 100644 --- a/src/plugins/nat/lib/inlines.h +++ b/src/plugins/sflow/sflow_dlapi.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020 Cisco and/or its affiliates. + * Copyright (c) 2025 InMon Corp. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: @@ -12,29 +12,18 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -/** - * @brief Common NAT inline functions +#ifndef __included_sflow_dlapi_h__ +#define __included_sflow_dlapi_h__ +/* Dynamic-link API + * If present, linux-cp plugin will be queried to learn the + * Linux if_index for each VPP if_index. If that plugin is not + * compiled and loaded, or if the function symbol is not found, + * then the interfaces will be reported to NETLINK_USERSOCK + * without this extra mapping. */ -#ifndef included_nat_inlines_h__ -#define included_nat_inlines_h__ - -#include <vnet/ip/icmp46_packet.h> - -static_always_inline u64 -icmp_type_is_error_message (u8 icmp_type) -{ - int bmp = 0; - bmp |= 1 << ICMP4_destination_unreachable; - bmp |= 1 << ICMP4_time_exceeded; - bmp |= 1 << ICMP4_parameter_problem; - bmp |= 1 << ICMP4_source_quench; - bmp |= 1 << ICMP4_redirect; - bmp |= 1 << ICMP4_alternate_host_address; - - return (1ULL << icmp_type) & bmp; -} - -#endif /* included_nat_inlines_h__ */ +#define SFLOW_LCP_LIB "linux_cp_plugin.so" +#define SFLOW_LCP_SYM_GET_VIF_BY_PHY "lcp_itf_pair_get_vif_index_by_phy" +#endif /* __included_sflow_dyn_api_h__ */ /* * fd.io coding-style-patch-verification: ON * diff --git a/src/plugins/sflow/sflow_psample.c b/src/plugins/sflow/sflow_psample.c index 0e4fcfbe790..41df454d999 100644 --- a/src/plugins/sflow/sflow_psample.c +++ b/src/plugins/sflow/sflow_psample.c @@ -13,11 +13,6 @@ * limitations under the License. */ -#if defined(__cplusplus) -extern "C" -{ -#endif - #include <vlib/vlib.h> #include <vnet/vnet.h> #include <vnet/pg/pg.h> diff --git a/src/plugins/sflow/sflow_vapi.c b/src/plugins/sflow/sflow_vapi.c deleted file mode 100644 index cdc89a54c80..00000000000 --- a/src/plugins/sflow/sflow_vapi.c +++ /dev/null @@ -1,226 +0,0 @@ -/* - * Copyright (c) 2024 InMon Corp. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include <sflow/sflow_vapi.h> - -#ifdef SFLOW_USE_VAPI - -#include <vlibapi/api.h> -#include <vlibmemory/api.h> -#include <vpp/app/version.h> -#include <stdbool.h> - -#include <vapi/vapi.h> -#include <vapi/memclnt.api.vapi.h> -#include <vapi/vlib.api.vapi.h> - -#ifdef included_interface_types_api_types_h -#define defined_vapi_enum_if_status_flags -#define defined_vapi_enum_mtu_proto -#define defined_vapi_enum_link_duplex -#define defined_vapi_enum_sub_if_flags -#define defined_vapi_enum_rx_mode -#define defined_vapi_enum_if_type -#define defined_vapi_enum_direction -#endif -#include <vapi/lcp.api.vapi.h> - -DEFINE_VAPI_MSG_IDS_LCP_API_JSON; - -static vapi_error_e -my_pair_get_cb (struct vapi_ctx_s *ctx, void *callback_ctx, vapi_error_e rv, - bool is_last, vapi_payload_lcp_itf_pair_get_v2_reply *reply) -{ - // this is a no-op, but it seems like it's presence is still required. For - // example, it is called if the pair lookup does not find anything. - return VAPI_OK; -} - -static vapi_error_e -my_pair_details_cb (struct vapi_ctx_s *ctx, void *callback_ctx, - vapi_error_e rv, bool is_last, - vapi_payload_lcp_itf_pair_details *details) -{ - sflow_per_interface_data_t *sfif = - (sflow_per_interface_data_t *) callback_ctx; - // Setting this here will mean it is sent to hsflowd with the interface - // counters. - sfif->linux_if_index = details->vif_index; - return VAPI_OK; -} - -static vapi_error_e -sflow_vapi_connect (sflow_vapi_client_t *vac) -{ - vapi_error_e rv = VAPI_OK; - vapi_ctx_t ctx = vac->vapi_ctx; - if (ctx == NULL) - { - // first time - open and connect. - if ((rv = vapi_ctx_alloc (&ctx)) != VAPI_OK) - { - SFLOW_ERR ("vap_ctx_alloc() returned %d", rv); - } - else - { - vac->vapi_ctx = ctx; - if ((rv = vapi_connect_from_vpp ( - ctx, "api_from_sflow_plugin", SFLOW_VAPI_MAX_REQUEST_Q, - SFLOW_VAPI_MAX_RESPONSE_Q, VAPI_MODE_BLOCKING, true)) != - VAPI_OK) - { - SFLOW_ERR ("vapi_connect_from_vpp() returned %d", rv); - } - else - { - // Connected - but is there a handler for the request we want to - // send? - if (!vapi_is_msg_available (ctx, - vapi_msg_id_lcp_itf_pair_add_del_v2)) - { - SFLOW_WARN ("vapi_is_msg_available() returned false => " - "linux-cp plugin not loaded"); - rv = VAPI_EUSER; - } - } - } - } - return rv; -} - -// in forked thread -static void * -get_lcp_itf_pairs (void *magic) -{ - sflow_vapi_client_t *vac = magic; - vapi_error_e rv = VAPI_OK; - - sflow_per_interface_data_t *intfs = vac->vapi_itfs; - vlib_set_thread_name (SFLOW_VAPI_THREAD_NAME); - if ((rv = sflow_vapi_connect (vac)) != VAPI_OK) - { - vac->vapi_unavailable = true; - } - else - { - vapi_ctx_t ctx = vac->vapi_ctx; - - for (int ii = 1; ii < vec_len (intfs); ii++) - { - sflow_per_interface_data_t *sfif = vec_elt_at_index (intfs, ii); - if (sfif && sfif->sflow_enabled) - { - // TODO: if we try non-blocking we might not be able to just pour - // all the requests in here. Might be better to do them one at a - // time - e.g. when we poll for counters. - vapi_msg_lcp_itf_pair_get_v2 *msg = - vapi_alloc_lcp_itf_pair_get_v2 (ctx); - if (msg) - { - msg->payload.sw_if_index = sfif->sw_if_index; - if ((rv = vapi_lcp_itf_pair_get_v2 (ctx, msg, my_pair_get_cb, - sfif, my_pair_details_cb, - sfif)) != VAPI_OK) - { - SFLOW_ERR ("vapi_lcp_itf_pair_get_v2 returned %d", rv); - // vapi.h: "message must be freed by vapi_msg_free if not - // consumed by vapi_send" - vapi_msg_free (ctx, msg); - } - } - } - } - // We no longer disconnect or free the client structures - // vapi_disconnect_from_vpp (ctx); - // vapi_ctx_free (ctx); - } - // indicate that we are done - more portable that using pthread_tryjoin_np() - vac->vapi_request_status = (int) rv; - clib_atomic_store_rel_n (&vac->vapi_request_active, false); - // TODO: how to tell if heap-allocated data is stored separately per thread? - // And if so, how to tell the allocator to GC all data for the thread when it - // exits? - return (void *) rv; -} - -int -sflow_vapi_read_linux_if_index_numbers (sflow_vapi_client_t *vac, - sflow_per_interface_data_t *itfs) -{ - -#ifdef SFLOW_VAPI_TEST_PLUGIN_SYMBOL - // don't even fork the query thread if the symbol is not there - if (!vlib_get_plugin_symbol ("linux_cp_plugin.so", "lcp_itf_pair_get")) - { - return false; - } -#endif - // previous query is done and results extracted? - int req_active = clib_atomic_load_acq_n (&vac->vapi_request_active); - if (req_active == false && vac->vapi_itfs == NULL) - { - // make a copy of the current interfaces vector for the lookup thread to - // write into - vac->vapi_itfs = vec_dup (itfs); - pthread_attr_t attr; - pthread_attr_init (&attr); - pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED); - pthread_attr_setstacksize (&attr, VLIB_THREAD_STACK_SIZE); - vac->vapi_request_active = true; - pthread_create (&vac->vapi_thread, &attr, get_lcp_itf_pairs, vac); - pthread_attr_destroy (&attr); - return true; - } - return false; -} - -int -sflow_vapi_check_for_linux_if_index_results (sflow_vapi_client_t *vac, - sflow_per_interface_data_t *itfs) -{ - // request completed? - // TODO: if we use non-blocking mode do we have to call something here to - // receive results? - int req_active = clib_atomic_load_acq_n (&vac->vapi_request_active); - if (req_active == false && vac->vapi_itfs != NULL) - { - // yes, extract what we learned - // TODO: would not have to do this if vector were array of pointers - // to sflow_per_interface_data_t rather than an actual array, but - // it does mean we have very clear separation between the threads. - for (int ii = 1; ii < vec_len (vac->vapi_itfs); ii++) - { - sflow_per_interface_data_t *sfif1 = - vec_elt_at_index (vac->vapi_itfs, ii); - sflow_per_interface_data_t *sfif2 = vec_elt_at_index (itfs, ii); - if (sfif1 && sfif2 && sfif1->sflow_enabled && sfif2->sflow_enabled) - sfif2->linux_if_index = sfif1->linux_if_index; - } - vec_free (vac->vapi_itfs); - vac->vapi_itfs = NULL; - return true; - } - return false; -} - -#endif /* SFLOW_USE_VAPI */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/plugins/sflow/sflow_vapi.h b/src/plugins/sflow/sflow_vapi.h deleted file mode 100644 index 640fe997684..00000000000 --- a/src/plugins/sflow/sflow_vapi.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (c) 2024 InMon Corp. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef __included_sflow_vapi_h__ -#define __included_sflow_vapi_h__ - -#include <vnet/vnet.h> -#include <sflow/sflow_common.h> - -#ifdef SFLOW_USE_VAPI - -#define SFLOW_VAPI_POLL_INTERVAL 5 -#define SFLOW_VAPI_MAX_REQUEST_Q 8 -#define SFLOW_VAPI_MAX_RESPONSE_Q 16 -#define SFLOW_VAPI_THREAD_NAME "sflow_vapi" // must be <= 15 characters - -// #define SFLOW_VAPI_TEST_PLUGIN_SYMBOL - -typedef struct -{ - volatile int vapi_request_active; // to sync main <-> vapi_thread - pthread_t vapi_thread; - sflow_per_interface_data_t *vapi_itfs; - int vapi_unavailable; - int vapi_request_status; // written by vapi_thread - void *vapi_ctx; -} sflow_vapi_client_t; - -int sflow_vapi_read_linux_if_index_numbers (sflow_vapi_client_t *vac, - sflow_per_interface_data_t *itfs); -int -sflow_vapi_check_for_linux_if_index_results (sflow_vapi_client_t *vac, - sflow_per_interface_data_t *itfs); - -#endif /* SFLOW_USE_VAPI */ -#endif /* __included_sflow_vapi_h__ */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/plugins/snort/enqueue.c b/src/plugins/snort/enqueue.c index 84efb4d432f..ae04c58bba0 100644 --- a/src/plugins/snort/enqueue.c +++ b/src/plugins/snort/enqueue.c @@ -93,7 +93,7 @@ snort_enq_node_inline (vlib_main_t *vm, vlib_node_runtime_t *node, snort_main_t *sm = &snort_main; snort_instance_t *si = 0; snort_qpair_t *qp = 0; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; u32 n_left = frame->n_vectors; u32 n_trace = 0; u32 total_enq = 0, n_unprocessed = 0; diff --git a/src/plugins/snort/main.c b/src/plugins/snort/main.c index 9bab1185b60..c87ecfd7ebd 100644 --- a/src/plugins/snort/main.c +++ b/src/plugins/snort/main.c @@ -392,6 +392,18 @@ snort_instance_create (vlib_main_t *vm, char *name, u8 log2_queue_sz, u8 align = CLIB_CACHE_LINE_BYTES; int rv = 0; + if (sm->listener == 0) + { + clib_error_t *err; + err = snort_listener_init (vm); + if (err) + { + log_err ("listener init failed: %U", format_clib_error, err); + clib_error_free (err); + return VNET_API_ERROR_INIT_FAILED; + } + } + if (snort_get_instance_by_name (name)) return VNET_API_ERROR_ENTRY_ALREADY_EXISTS; @@ -831,7 +843,7 @@ snort_init (vlib_main_t *vm) if (!sm->socket_name) snort_set_default_socket (sm, 0); - return snort_listener_init (vm); + return 0; } VLIB_INIT_FUNCTION (snort_init); diff --git a/src/plugins/srtp/srtp.c b/src/plugins/srtp/srtp.c index 5426b7aa03f..f86b7be980e 100644 --- a/src/plugins/srtp/srtp.c +++ b/src/plugins/srtp/srtp.c @@ -19,11 +19,11 @@ static srtp_main_t srtp_main; -static void srtp_disconnect (u32 ctx_handle, u32 thread_index); +static void srtp_disconnect (u32 ctx_handle, clib_thread_index_t thread_index); static void srtp_disconnect_transport (srtp_tc_t *ctx); static inline u32 -srtp_ctx_alloc_w_thread (u32 thread_index) +srtp_ctx_alloc_w_thread (clib_thread_index_t thread_index) { srtp_tc_t *ctx; pool_get_aligned_safe (srtp_main.ctx_pool[thread_index], ctx, @@ -36,7 +36,7 @@ srtp_ctx_alloc_w_thread (u32 thread_index) } static inline srtp_tc_t * -srtp_ctx_get_w_thread (u32 ctx_index, u32 thread_index) +srtp_ctx_get_w_thread (u32 ctx_index, clib_thread_index_t thread_index) { return pool_elt_at_index (srtp_main.ctx_pool[thread_index], ctx_index); } @@ -82,7 +82,7 @@ srtp_ctx_free (srtp_tc_t *ctx) } static inline u32 -srtp_ctx_attach (u32 thread_index, void *ctx_ptr) +srtp_ctx_attach (clib_thread_index_t thread_index, void *ctx_ptr) { srtp_tc_t *ctx; @@ -688,7 +688,7 @@ srtp_disconnect_transport (srtp_tc_t *ctx) } static void -srtp_disconnect (u32 ctx_handle, u32 thread_index) +srtp_disconnect (u32 ctx_handle, clib_thread_index_t thread_index) { session_t *app_session; srtp_tc_t *ctx; @@ -801,7 +801,7 @@ srtp_stop_listen (u32 lctx_index) } transport_connection_t * -srtp_connection_get (u32 ctx_index, u32 thread_index) +srtp_connection_get (u32 ctx_index, clib_thread_index_t thread_index) { srtp_tc_t *ctx; ctx = srtp_ctx_get_w_thread (ctx_index, thread_index); @@ -895,7 +895,7 @@ u8 * format_srtp_connection (u8 *s, va_list *args) { u32 ctx_index = va_arg (*args, u32); - u32 thread_index = va_arg (*args, u32); + clib_thread_index_t thread_index = va_arg (*args, u32); u32 verbose = va_arg (*args, u32); srtp_tc_t *ctx; @@ -935,7 +935,7 @@ format_srtp_half_open (u8 *s, va_list *args) } static void -srtp_transport_endpoint_get (u32 ctx_handle, u32 thread_index, +srtp_transport_endpoint_get (u32 ctx_handle, clib_thread_index_t thread_index, transport_endpoint_t *tep, u8 is_lcl) { srtp_tc_t *ctx = srtp_ctx_get_w_thread (ctx_handle, thread_index); diff --git a/src/plugins/srv6-ad-flow/node.c b/src/plugins/srv6-ad-flow/node.c index 66be2dc7972..28fbc105d84 100644 --- a/src/plugins/srv6-ad-flow/node.c +++ b/src/plugins/srv6-ad-flow/node.c @@ -583,7 +583,7 @@ srv6_ad_flow_localsid_fn (vlib_main_t *vm, vlib_node_runtime_t *node, ip6_sr_main_t *srm = &sr_main; f64 now = vlib_time_now (vm); u32 n_left_from, next_index, *from, *to_next, n_left_to_next; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; diff --git a/src/plugins/srv6-am/node.c b/src/plugins/srv6-am/node.c index beef6a30910..012afda581b 100644 --- a/src/plugins/srv6-am/node.c +++ b/src/plugins/srv6-am/node.c @@ -147,7 +147,7 @@ srv6_am_localsid_fn (vlib_main_t * vm, n_left_from = frame->n_vectors; next_index = node->cached_next_index; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; while (n_left_from > 0) { diff --git a/src/plugins/srv6-mobile/node.c b/src/plugins/srv6-mobile/node.c index ed0697a8009..c8f619cf044 100644 --- a/src/plugins/srv6-mobile/node.c +++ b/src/plugins/srv6-mobile/node.c @@ -325,7 +325,7 @@ VLIB_NODE_FN (srv6_end_m_gtp4_e) srv6_end_main_v4_t *sm = &srv6_end_main_v4; ip6_sr_main_t *sm2 = &sr_main; u32 n_left_from, next_index, *from, *to_next; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; u32 good_n = 0, bad_n = 0; @@ -1327,7 +1327,7 @@ VLIB_NODE_FN (srv6_end_m_gtp6_e) srv6_end_main_v6_t *sm = &srv6_end_main_v6; ip6_sr_main_t *sm2 = &sr_main; u32 n_left_from, next_index, *from, *to_next; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; u32 good_n = 0, bad_n = 0; @@ -2088,7 +2088,7 @@ VLIB_NODE_FN (srv6_end_m_gtp6_d) srv6_end_main_v6_decap_t *sm = &srv6_end_main_v6_decap; ip6_sr_main_t *sm2 = &sr_main; u32 n_left_from, next_index, *from, *to_next; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; ip6_sr_localsid_t *ls0; srv6_end_gtp6_d_param_t *ls_param; @@ -2238,7 +2238,7 @@ VLIB_NODE_FN (srv6_end_m_gtp6_d_di) srv6_end_main_v6_decap_di_t *sm = &srv6_end_main_v6_decap_di; ip6_sr_main_t *sm2 = &sr_main; u32 n_left_from, next_index, *from, *to_next; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; srv6_end_gtp6_d_param_t *ls_param; u32 good_n = 0, bad_n = 0; @@ -2686,7 +2686,7 @@ VLIB_NODE_FN (srv6_end_m_gtp6_dt) srv6_end_main_v6_dt_t *sm = &srv6_end_main_v6_dt; ip6_sr_main_t *sm2 = &sr_main; u32 n_left_from, next_index, *from, *to_next; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; u32 good_n = 0, bad_n = 0; diff --git a/src/plugins/tlsmbedtls/tls_mbedtls.c b/src/plugins/tlsmbedtls/tls_mbedtls.c index 2f4757e28a1..44d48b1edb4 100644 --- a/src/plugins/tlsmbedtls/tls_mbedtls.c +++ b/src/plugins/tlsmbedtls/tls_mbedtls.c @@ -127,7 +127,7 @@ mbedtls_ctx_get_w_thread (u32 ctx_index, u8 thread_index) static int tls_init_ctr_seed_drbgs (void) { - u32 thread_index = vlib_get_thread_index (); + clib_thread_index_t thread_index = vlib_get_thread_index (); mbedtls_main_t *tm = &mbedtls_main; u8 *pers; int rv; diff --git a/src/plugins/tlsopenssl/tls_async.c b/src/plugins/tlsopenssl/tls_async.c index cd08da5d9ea..e28d730e307 100644 --- a/src/plugins/tlsopenssl/tls_async.c +++ b/src/plugins/tlsopenssl/tls_async.c @@ -114,8 +114,8 @@ openssl_async_t openssl_async_main; static vlib_node_registration_t tls_async_process_node; /* to avoid build warning */ -void session_send_rpc_evt_to_thread (u32 thread_index, void *fp, - void *rpc_args); +void session_send_rpc_evt_to_thread (clib_thread_index_t thread_index, + void *fp, void *rpc_args); void evt_pool_init (vlib_main_t * vm) @@ -528,7 +528,7 @@ openssl_async_node_enable_disable (u8 is_en) } int -tls_async_do_job (int eidx, u32 thread_index) +tls_async_do_job (int eidx, clib_thread_index_t thread_index) { tls_ctx_t *ctx; openssl_evt_t *event; diff --git a/src/plugins/tlsopenssl/tls_openssl.c b/src/plugins/tlsopenssl/tls_openssl.c index f0be025a207..5b57e706586 100644 --- a/src/plugins/tlsopenssl/tls_openssl.c +++ b/src/plugins/tlsopenssl/tls_openssl.c @@ -35,7 +35,7 @@ openssl_main_t openssl_main; static u32 -openssl_ctx_alloc_w_thread (u32 thread_index) +openssl_ctx_alloc_w_thread (clib_thread_index_t thread_index) { openssl_main_t *om = &openssl_main; openssl_ctx_t **ctx; @@ -102,7 +102,7 @@ openssl_ctx_detach (tls_ctx_t *ctx) } static u32 -openssl_ctx_attach (u32 thread_index, void *ctx_ptr) +openssl_ctx_attach (clib_thread_index_t thread_index, void *ctx_ptr) { openssl_main_t *om = &openssl_main; session_handle_t sh; diff --git a/src/plugins/tlspicotls/pico_vpp_crypto.c b/src/plugins/tlspicotls/pico_vpp_crypto.c index 3d28d50b352..e8e4a875e33 100644 --- a/src/plugins/tlspicotls/pico_vpp_crypto.c +++ b/src/plugins/tlspicotls/pico_vpp_crypto.c @@ -107,8 +107,7 @@ ptls_vpp_crypto_cipher_setup_crypto (ptls_cipher_context_t * _ctx, int is_enc, } else { - TLS_DBG (1, "%s, Invalid crypto cipher : ", __FUNCTION__, - _ctx->algo->name); + TLS_DBG (1, "%s, Invalid crypto cipher : ", __func__, _ctx->algo->name); assert (0); } @@ -226,8 +225,7 @@ ptls_vpp_crypto_aead_setup_crypto (ptls_aead_context_t *_ctx, int is_enc, } else { - TLS_DBG (1, "%s, invalied aead cipher %s", __FUNCTION__, - _ctx->algo->name); + TLS_DBG (1, "%s, invalied aead cipher %s", __func__, _ctx->algo->name); return -1; } diff --git a/src/plugins/unittest/ipsec_test.c b/src/plugins/unittest/ipsec_test.c index 98253eeb12a..869d53367b6 100644 --- a/src/plugins/unittest/ipsec_test.c +++ b/src/plugins/unittest/ipsec_test.c @@ -40,19 +40,26 @@ test_ipsec_command_fn (vlib_main_t *vm, unformat_input_t *input, if (~0 != sa_id) { ipsec_sa_t *sa; + ipsec_sa_inb_rt_t *irt; + ipsec_sa_outb_rt_t *ort; u32 sa_index; sa_index = ipsec_sa_find_and_lock (sa_id); sa = ipsec_sa_get (sa_index); + irt = ipsec_sa_get_inb_rt (sa); + ort = ipsec_sa_get_outb_rt (sa); - sa->seq = seq_num & 0xffffffff; - sa->seq_hi = seq_num >> 32; + if (ort) + ort->seq64 = seq_num; - /* clear the window */ - if (ipsec_sa_is_set_ANTI_REPLAY_HUGE (sa)) - clib_bitmap_zero (sa->replay_window_huge); - else - sa->replay_window = 0; + if (irt) + { + irt->seq64 = seq_num; + + /* clear the window */ + uword_bitmap_clear (irt->replay_window, + irt->anti_replay_window_size / uword_bits); + } ipsec_sa_unlock (sa_index); } diff --git a/src/plugins/unittest/session_test.c b/src/plugins/unittest/session_test.c index 993f1be41a9..667851901c4 100644 --- a/src/plugins/unittest/session_test.c +++ b/src/plugins/unittest/session_test.c @@ -16,6 +16,7 @@ #include <arpa/inet.h> #include <vnet/session/application.h> #include <vnet/session/session.h> +#include <vnet/session/transport.h> #include <sys/epoll.h> #include <vnet/session/session_rules_table.h> @@ -50,6 +51,11 @@ placeholder_session_reset_callback (session_t * s) volatile u32 connected_session_index = ~0; volatile u32 connected_session_thread = ~0; +static u32 placeholder_accept; +volatile u32 accepted_session_index; +volatile u32 accepted_session_thread; +volatile int app_session_error = 0; + int placeholder_session_connected_callback (u32 app_index, u32 api_context, session_t * s, session_error_t err) @@ -81,13 +87,22 @@ placeholder_del_segment_callback (u32 client_index, u64 segment_handle) void placeholder_session_disconnect_callback (session_t * s) { - clib_warning ("called..."); + if (!(s->session_index == connected_session_index && + s->thread_index == connected_session_thread) && + !(s->session_index == accepted_session_index && + s->thread_index == accepted_session_thread)) + { + clib_warning (0, "unexpected disconnect s %u thread %u", + s->session_index, s->thread_index); + app_session_error = 1; + } + vnet_disconnect_args_t da = { + .handle = session_handle (s), + .app_index = app_worker_get (s->app_wrk_index)->app_index + }; + vnet_disconnect_session (&da); } -static u32 placeholder_accept; -volatile u32 accepted_session_index; -volatile u32 accepted_session_thread; - int placeholder_session_accept_callback (session_t * s) { @@ -105,12 +120,39 @@ placeholder_server_rx_callback (session_t * s) return -1; } +void +placeholder_cleanup_callback (session_t *s, session_cleanup_ntf_t ntf) +{ + if (ntf == SESSION_CLEANUP_TRANSPORT) + return; + + if (s->session_index == connected_session_index && + s->thread_index == connected_session_thread) + { + connected_session_index = ~0; + connected_session_thread = ~0; + } + else if (s->session_index == accepted_session_index && + s->thread_index == accepted_session_thread) + { + accepted_session_index = ~0; + accepted_session_thread = ~0; + } + else + { + clib_warning (0, "unexpected cleanup s %u thread %u", s->session_index, + s->thread_index); + app_session_error = 1; + } +} + static session_cb_vft_t placeholder_session_cbs = { .session_reset_callback = placeholder_session_reset_callback, .session_connected_callback = placeholder_session_connected_callback, .session_accept_callback = placeholder_session_accept_callback, .session_disconnect_callback = placeholder_session_disconnect_callback, .builtin_app_rx_callback = placeholder_server_rx_callback, + .session_cleanup_callback = placeholder_cleanup_callback, .add_segment_callback = placeholder_add_segment_callback, .del_segment_callback = placeholder_del_segment_callback, }; @@ -278,6 +320,7 @@ session_test_endpoint_cfg (vlib_main_t * vm, unformat_input_t * input) u64 options[APP_OPTIONS_N_OPTIONS], placeholder_secret = 1234; u16 placeholder_server_port = 1234, placeholder_client_port = 5678; session_endpoint_cfg_t server_sep = SESSION_ENDPOINT_CFG_NULL; + u32 client_vrf = 0, server_vrf = 1; ip4_address_t intf_addr[3]; transport_connection_t *tc; session_t *s; @@ -288,25 +331,25 @@ session_test_endpoint_cfg (vlib_main_t * vm, unformat_input_t * input) * Create the loopbacks */ intf_addr[0].as_u32 = clib_host_to_net_u32 (0x01010101); - session_create_lookpback (0, &sw_if_index[0], &intf_addr[0]); + session_create_lookpback (client_vrf, &sw_if_index[0], &intf_addr[0]); intf_addr[1].as_u32 = clib_host_to_net_u32 (0x02020202); - session_create_lookpback (1, &sw_if_index[1], &intf_addr[1]); + session_create_lookpback (server_vrf, &sw_if_index[1], &intf_addr[1]); - session_add_del_route_via_lookup_in_table (0, 1, &intf_addr[1], 32, - 1 /* is_add */ ); - session_add_del_route_via_lookup_in_table (1, 0, &intf_addr[0], 32, - 1 /* is_add */ ); + session_add_del_route_via_lookup_in_table ( + client_vrf, server_vrf, &intf_addr[1], 32, 1 /* is_add */); + session_add_del_route_via_lookup_in_table ( + server_vrf, client_vrf, &intf_addr[0], 32, 1 /* is_add */); /* * Insert namespace */ - appns_id = format (0, "appns1"); + appns_id = format (0, "appns_server"); vnet_app_namespace_add_del_args_t ns_args = { .ns_id = appns_id, .secret = placeholder_secret, - .sw_if_index = sw_if_index[1], - .ip4_fib_id = 0, + .sw_if_index = sw_if_index[1], /* server interface*/ + .ip4_fib_id = 0, /* sw_if_index takes precedence */ .is_add = 1 }; error = vnet_app_namespace_add_del (&ns_args); @@ -357,10 +400,10 @@ session_test_endpoint_cfg (vlib_main_t * vm, unformat_input_t * input) * Connect and force lcl ip */ client_sep.is_ip4 = 1; - client_sep.ip.ip4.as_u32 = clib_host_to_net_u32 (0x02020202); + client_sep.ip.ip4.as_u32 = intf_addr[1].as_u32; client_sep.port = placeholder_server_port; client_sep.peer.is_ip4 = 1; - client_sep.peer.ip.ip4.as_u32 = clib_host_to_net_u32 (0x01010101); + client_sep.peer.ip.ip4.as_u32 = intf_addr[0].as_u32; client_sep.peer.port = placeholder_client_port; client_sep.transport_proto = TRANSPORT_PROTO_TCP; @@ -401,6 +444,35 @@ session_test_endpoint_cfg (vlib_main_t * vm, unformat_input_t * input) SESSION_TEST ((tc->lcl_port == placeholder_client_port), "ports should be equal"); + /* Disconnect server session, should lead to faster port cleanup on client */ + vnet_disconnect_args_t disconnect_args = { + .handle = + session_make_handle (accepted_session_index, accepted_session_thread), + .app_index = server_index, + }; + + error = vnet_disconnect_session (&disconnect_args); + SESSION_TEST ((error == 0), "disconnect should work"); + + /* wait for stuff to happen */ + tries = 0; + while (connected_session_index != ~0 && ++tries < 100) + { + vlib_worker_thread_barrier_release (vm); + vlib_process_suspend (vm, 100e-3); + vlib_worker_thread_barrier_sync (vm); + } + + /* Active closes take longer to cleanup, don't wait */ + + clib_warning ("waited %.1f seconds for disconnect", tries / 10.0); + SESSION_TEST ((connected_session_index == ~0), "session should not exist"); + SESSION_TEST ((connected_session_thread == ~0), "thread should not exist"); + SESSION_TEST (transport_port_local_in_use () == 0, + "port should be cleaned up"); + SESSION_TEST ((app_session_error == 0), "no app session errors"); + + /* Start cleanup by detaching apps */ vnet_app_detach_args_t detach_args = { .app_index = server_index, .api_client_index = ~0, @@ -416,13 +488,167 @@ session_test_endpoint_cfg (vlib_main_t * vm, unformat_input_t * input) /* Allow the disconnects to finish before removing the routes. */ vlib_process_suspend (vm, 10e-3); - session_add_del_route_via_lookup_in_table (0, 1, &intf_addr[1], 32, - 0 /* is_add */ ); - session_add_del_route_via_lookup_in_table (1, 0, &intf_addr[0], 32, - 0 /* is_add */ ); + session_add_del_route_via_lookup_in_table ( + client_vrf, server_vrf, &intf_addr[1], 32, 0 /* is_add */); + session_add_del_route_via_lookup_in_table ( + server_vrf, client_vrf, &intf_addr[0], 32, 0 /* is_add */); + + session_delete_loopback (sw_if_index[0]); + session_delete_loopback (sw_if_index[1]); + + /* + * Redo the test but with client in the non-default namespace + */ + + /* Create the loopbacks */ + client_vrf = 1; + server_vrf = 0; + session_create_lookpback (client_vrf, &sw_if_index[0], &intf_addr[0]); + session_create_lookpback (server_vrf, &sw_if_index[1], &intf_addr[1]); + + session_add_del_route_via_lookup_in_table ( + client_vrf, server_vrf, &intf_addr[1], 32, 1 /* is_add */); + session_add_del_route_via_lookup_in_table ( + server_vrf, client_vrf, &intf_addr[0], 32, 1 /* is_add */); + + /* Insert new client namespace */ + vec_free (appns_id); + appns_id = format (0, "appns_client"); + ns_args.ns_id = appns_id; + ns_args.sw_if_index = sw_if_index[0]; /* client interface*/ + ns_args.is_add = 1; + + error = vnet_app_namespace_add_del (&ns_args); + SESSION_TEST ((error == 0), "app ns insertion should succeed: %U", + format_session_error, error); + + /* Attach client */ + attach_args.name = format (0, "session_test_client"); + attach_args.namespace_id = appns_id; + attach_args.options[APP_OPTIONS_ADD_SEGMENT_SIZE] = 0; + attach_args.options[APP_OPTIONS_NAMESPACE_SECRET] = placeholder_secret; + attach_args.api_client_index = ~0; + + error = vnet_application_attach (&attach_args); + SESSION_TEST ((error == 0), "client app attached: %U", format_session_error, + error); + client_index = attach_args.app_index; + vec_free (attach_args.name); + + /* Attach server */ + attach_args.name = format (0, "session_test_server"); + attach_args.namespace_id = 0; + attach_args.options[APP_OPTIONS_ADD_SEGMENT_SIZE] = 32 << 20; + attach_args.options[APP_OPTIONS_NAMESPACE_SECRET] = 0; + attach_args.api_client_index = ~0; + error = vnet_application_attach (&attach_args); + SESSION_TEST ((error == 0), "server app attached: %U", format_session_error, + error); + vec_free (attach_args.name); + server_index = attach_args.app_index; + + /* Bind server */ + clib_memset (&server_sep, 0, sizeof (server_sep)); + server_sep.is_ip4 = 1; + server_sep.port = placeholder_server_port; + bind_args.sep_ext = server_sep; + bind_args.app_index = server_index; + error = vnet_listen (&bind_args); + SESSION_TEST ((error == 0), "server bind should work: %U", + format_session_error, error); + + /* Connect client */ + connected_session_index = connected_session_thread = ~0; + accepted_session_index = accepted_session_thread = ~0; + clib_memset (&client_sep, 0, sizeof (client_sep)); + client_sep.is_ip4 = 1; + client_sep.ip.ip4.as_u32 = intf_addr[1].as_u32; + client_sep.port = placeholder_server_port; + client_sep.peer.is_ip4 = 1; + client_sep.peer.ip.ip4.as_u32 = intf_addr[0].as_u32; + client_sep.peer.port = placeholder_client_port; + client_sep.transport_proto = TRANSPORT_PROTO_TCP; + + connect_args.sep_ext = client_sep; + connect_args.app_index = client_index; + error = vnet_connect (&connect_args); + SESSION_TEST ((error == 0), "connect should work"); + + /* wait for stuff to happen */ + while (connected_session_index == ~0 && ++tries < 100) + { + vlib_worker_thread_barrier_release (vm); + vlib_process_suspend (vm, 100e-3); + vlib_worker_thread_barrier_sync (vm); + } + while (accepted_session_index == ~0 && ++tries < 100) + { + vlib_worker_thread_barrier_release (vm); + vlib_process_suspend (vm, 100e-3); + vlib_worker_thread_barrier_sync (vm); + } + + clib_warning ("waited %.1f seconds for connections", tries / 10.0); + SESSION_TEST ((connected_session_index != ~0), "session should exist"); + SESSION_TEST ((connected_session_thread != ~0), "thread should exist"); + SESSION_TEST ((accepted_session_index != ~0), "session should exist"); + SESSION_TEST ((accepted_session_thread != ~0), "thread should exist"); + s = session_get (connected_session_index, connected_session_thread); + tc = session_get_transport (s); + SESSION_TEST ((tc != 0), "transport should exist"); + SESSION_TEST ( + (memcmp (&tc->lcl_ip, &client_sep.peer.ip, sizeof (tc->lcl_ip)) == 0), + "ips should be equal"); + SESSION_TEST ((tc->lcl_port == placeholder_client_port), + "ports should be equal"); + + /* Disconnect server session, for faster port cleanup on client */ + disconnect_args.app_index = server_index; + disconnect_args.handle = + session_make_handle (accepted_session_index, accepted_session_thread); + + error = vnet_disconnect_session (&disconnect_args); + SESSION_TEST ((error == 0), "disconnect should work"); + + /* wait for stuff to happen */ + tries = 0; + while (connected_session_index != ~0 && ++tries < 100) + { + vlib_worker_thread_barrier_release (vm); + vlib_process_suspend (vm, 100e-3); + vlib_worker_thread_barrier_sync (vm); + } + + /* Active closes take longer to cleanup, don't wait */ + + clib_warning ("waited %.1f seconds for disconnect", tries / 10.0); + SESSION_TEST ((connected_session_index == ~0), "session should not exist"); + SESSION_TEST ((connected_session_thread == ~0), "thread should not exist"); + SESSION_TEST ((app_session_error == 0), "no app session errors"); + SESSION_TEST (transport_port_local_in_use () == 0, + "port should be cleaned up"); + + /* Start cleanup by detaching apps */ + detach_args.app_index = server_index; + vnet_application_detach (&detach_args); + detach_args.app_index = client_index; + vnet_application_detach (&detach_args); + + ns_args.is_add = 0; + error = vnet_app_namespace_add_del (&ns_args); + SESSION_TEST ((error == 0), "app ns delete should succeed: %d", error); + + /* Allow the disconnects to finish before removing the routes. */ + vlib_process_suspend (vm, 10e-3); + + session_add_del_route_via_lookup_in_table ( + client_vrf, server_vrf, &intf_addr[1], 32, 0 /* is_add */); + session_add_del_route_via_lookup_in_table ( + server_vrf, client_vrf, &intf_addr[0], 32, 0 /* is_add */); session_delete_loopback (sw_if_index[0]); session_delete_loopback (sw_if_index[1]); + return 0; } @@ -1781,6 +2007,11 @@ session_test_proxy (vlib_main_t * vm, unformat_input_t * input) unformat_free (&tmp_input); vec_free (attach_args.name); session_delete_loopback (sw_if_index); + + /* Revert default appns sw_if_index */ + app_ns = app_namespace_get_default (); + app_ns->sw_if_index = ~0; + return 0; } @@ -2131,7 +2362,10 @@ session_get_memory_usage (void) s = format (s, "%U\n", format_clib_mem_heap, heap, 0); ss = strstr ((char *) s, "used:"); if (ss) - sscanf (ss, "used: %f", &used); + { + if (sscanf (ss, "used: %f", &used) != 1) + clib_warning ("invalid 'used' value"); + } else clib_warning ("substring 'used:' not found from show memory"); vec_free (s); @@ -2501,6 +2735,8 @@ session_test (vlib_main_t * vm, done: if (res) return clib_error_return (0, "Session unit test failed"); + + vlib_cli_output (vm, "SUCCESS"); return 0; } diff --git a/src/plugins/unittest/svm_fifo_test.c b/src/plugins/unittest/svm_fifo_test.c index 9feb37cbc25..c6031c59987 100644 --- a/src/plugins/unittest/svm_fifo_test.c +++ b/src/plugins/unittest/svm_fifo_test.c @@ -2856,6 +2856,8 @@ svm_fifo_test (vlib_main_t * vm, unformat_input_t * input, done: if (res) return clib_error_return (0, "svm fifo unit test failed"); + + vlib_cli_output (vm, "SUCCESS"); return 0; } diff --git a/src/plugins/unittest/tcp_test.c b/src/plugins/unittest/tcp_test.c index bd39474ce93..6236ccdfe08 100644 --- a/src/plugins/unittest/tcp_test.c +++ b/src/plugins/unittest/tcp_test.c @@ -1002,16 +1002,16 @@ tbt_seq_lt (u32 a, u32 b) } static void -tcp_test_set_time (u32 thread_index, u32 val) +tcp_test_set_time (clib_thread_index_t thread_index, u32 val) { session_main.wrk[thread_index].last_vlib_time = val; - tcp_set_time_now (&tcp_main.wrk_ctx[thread_index], val); + tcp_set_time_now (&tcp_main.wrk[thread_index], val); } static int tcp_test_delivery (vlib_main_t * vm, unformat_input_t * input) { - u32 thread_index = 0, snd_una, *min_seqs = 0; + clib_thread_index_t thread_index = 0, snd_una, *min_seqs = 0; tcp_rate_sample_t _rs = { 0 }, *rs = &_rs; tcp_connection_t _tc, *tc = &_tc; sack_scoreboard_t *sb = &tc->sack_sb; @@ -1337,7 +1337,7 @@ tcp_test_delivery (vlib_main_t * vm, unformat_input_t * input) static int tcp_test_bt (vlib_main_t * vm, unformat_input_t * input) { - u32 thread_index = 0; + clib_thread_index_t thread_index = 0; tcp_rate_sample_t _rs = { 0 }, *rs = &_rs; tcp_connection_t _tc, *tc = &_tc; int __clib_unused verbose = 0, i; @@ -1594,6 +1594,8 @@ tcp_test (vlib_main_t * vm, done: if (res) return clib_error_return (0, "TCP unit test failed"); + + vlib_cli_output (vm, "SUCCESS"); return 0; } diff --git a/src/plugins/urpf/urpf_dp.h b/src/plugins/urpf/urpf_dp.h index b17fed7e04b..edb4ec79171 100644 --- a/src/plugins/urpf/urpf_dp.h +++ b/src/plugins/urpf/urpf_dp.h @@ -98,8 +98,8 @@ urpf_perform_check_x1 (ip_address_family_t af, vlib_dir_t dir, lb_index = ip4_fib_forwarding_lookup (fib_index, &ip->src_address); /* Pass multicast. */ - lpass = (ip4_address_is_multicast (&ip->src_address) || - ip4_address_is_global_broadcast (&ip->src_address)); + lpass = (ip4_address_is_multicast (&ip->dst_address) || + ip4_address_is_global_broadcast (&ip->dst_address)); } else { @@ -108,7 +108,7 @@ urpf_perform_check_x1 (ip_address_family_t af, vlib_dir_t dir, ip = (ip6_header_t *) h; lb_index = ip6_fib_table_fwding_lookup (fib_index, &ip->src_address); - lpass = ip6_address_is_multicast (&ip->src_address); + lpass = ip6_address_is_multicast (&ip->dst_address); } llb = load_balance_get (lb_index); @@ -157,10 +157,10 @@ urpf_perform_check_x2 (ip_address_family_t af, vlib_dir_t dir, ip4_fib_forwarding_lookup_x2 (fib_index0, fib_index1, &ip0->src_address, &ip1->src_address, &lb_index0, &lb_index1); /* Pass multicast. */ - lpass0 = (ip4_address_is_multicast (&ip0->src_address) || - ip4_address_is_global_broadcast (&ip0->src_address)); - lpass1 = (ip4_address_is_multicast (&ip1->src_address) || - ip4_address_is_global_broadcast (&ip1->src_address)); + lpass0 = (ip4_address_is_multicast (&ip0->dst_address) || + ip4_address_is_global_broadcast (&ip0->dst_address)); + lpass1 = (ip4_address_is_multicast (&ip1->dst_address) || + ip4_address_is_global_broadcast (&ip1->dst_address)); } else { @@ -171,8 +171,8 @@ urpf_perform_check_x2 (ip_address_family_t af, vlib_dir_t dir, lb_index0 = ip6_fib_table_fwding_lookup (fib_index0, &ip0->src_address); lb_index1 = ip6_fib_table_fwding_lookup (fib_index1, &ip1->src_address); - lpass0 = ip6_address_is_multicast (&ip0->src_address); - lpass1 = ip6_address_is_multicast (&ip1->src_address); + lpass0 = ip6_address_is_multicast (&ip0->dst_address); + lpass1 = ip6_address_is_multicast (&ip1->dst_address); } llb0 = load_balance_get (lb_index0); diff --git a/src/plugins/vhost/vhost_user.c b/src/plugins/vhost/vhost_user.c index fdee984f97b..592a126c683 100644 --- a/src/plugins/vhost/vhost_user.c +++ b/src/plugins/vhost/vhost_user.c @@ -31,7 +31,7 @@ #include <linux/if_tun.h> #include <vlib/vlib.h> -#include <vlib/unix/unix.h> +#include <vlib/file.h> #include <vnet/ethernet/ethernet.h> #include <vnet/devices/devices.h> @@ -325,15 +325,13 @@ vhost_user_vring_close (vhost_user_intf_t * vui, u32 qid) if (vring->kickfd_idx != ~0) { - clib_file_t *uf = pool_elt_at_index (file_main.file_pool, - vring->kickfd_idx); + clib_file_t *uf = clib_file_get (&file_main, vring->kickfd_idx); clib_file_del (&file_main, uf); vring->kickfd_idx = ~0; } if (vring->callfd_idx != ~0) { - clib_file_t *uf = pool_elt_at_index (file_main.file_pool, - vring->callfd_idx); + clib_file_t *uf = clib_file_get (&file_main, vring->callfd_idx); clib_file_del (&file_main, uf); vring->callfd_idx = ~0; } @@ -349,7 +347,7 @@ vhost_user_vring_close (vhost_user_intf_t * vui, u32 qid) u16 q = vui->vrings[qid].qid; u32 queue_index = vui->vrings[qid].queue_index; u32 mode = vui->vrings[qid].mode; - u32 thread_index = vui->vrings[qid].thread_index; + clib_thread_index_t thread_index = vui->vrings[qid].thread_index; vhost_user_vring_init (vui, qid); vui->vrings[qid].qid = q; vui->vrings[qid].queue_index = queue_index; @@ -367,7 +365,7 @@ vhost_user_if_disconnect (vhost_user_intf_t * vui) if (vui->clib_file_index != ~0) { - clib_file_del (&file_main, file_main.file_pool + vui->clib_file_index); + clib_file_del_by_index (&file_main, vui->clib_file_index); vui->clib_file_index = ~0; } @@ -750,8 +748,8 @@ vhost_user_socket_read (clib_file_t * uf) /* if there is old fd, delete and close it */ if (vui->vrings[q].callfd_idx != ~0) { - clib_file_t *uf = pool_elt_at_index (file_main.file_pool, - vui->vrings[q].callfd_idx); + clib_file_t *uf = + clib_file_get (&file_main, vui->vrings[q].callfd_idx); clib_file_del (&file_main, uf); vui->vrings[q].callfd_idx = ~0; } @@ -823,8 +821,8 @@ vhost_user_socket_read (clib_file_t * uf) if (vui->vrings[q].kickfd_idx != ~0) { - clib_file_t *uf = pool_elt_at_index (file_main.file_pool, - vui->vrings[q].kickfd_idx); + clib_file_t *uf = + clib_file_get (&file_main, vui->vrings[q].kickfd_idx); clib_file_del (&file_main, uf); vui->vrings[q].kickfd_idx = ~0; } @@ -1148,7 +1146,7 @@ vhost_user_socksvr_accept_ready (clib_file_t * uf) { vu_log_debug (vui, "Close client socket for vhost interface %d, fd %d", vui->sw_if_index, UNIX_GET_FD (vui->clib_file_index)); - clib_file_del (&file_main, file_main.file_pool + vui->clib_file_index); + clib_file_del_by_index (&file_main, vui->clib_file_index); } vu_log_debug (vui, "New client socket for vhost interface %d, fd %d", @@ -1408,8 +1406,7 @@ vhost_user_term_if (vhost_user_intf_t * vui) if (vui->unix_server_index != ~0) { //Close server socket - clib_file_t *uf = pool_elt_at_index (file_main.file_pool, - vui->unix_server_index); + clib_file_t *uf = clib_file_get (&file_main, vui->unix_server_index); clib_file_del (&file_main, uf); vui->unix_server_index = ~0; unlink (vui->sock_filename); @@ -1444,7 +1441,7 @@ vhost_user_delete_if (vnet_main_t * vnm, vlib_main_t * vm, u32 sw_if_index) vhost_user_vring_t *txvq = &vui->vrings[qid]; if ((txvq->mode == VNET_HW_IF_RX_MODE_POLLING) && - (txvq->thread_index != ~0)) + (txvq->thread_index != CLIB_INVALID_THREAD_INDEX)) { vhost_cpu_t *cpu = vec_elt_at_index (vum->cpus, txvq->thread_index); ASSERT (cpu->polling_q_count != 0); diff --git a/src/plugins/vhost/vhost_user.h b/src/plugins/vhost/vhost_user.h index a3582affb4b..9e461979007 100644 --- a/src/plugins/vhost/vhost_user.h +++ b/src/plugins/vhost/vhost_user.h @@ -62,11 +62,13 @@ dev->hw_if_index, ##__VA_ARGS__); \ }; -#define UNIX_GET_FD(unixfd_idx) ({ \ - typeof(unixfd_idx) __unixfd_idx = (unixfd_idx); \ - (__unixfd_idx != ~0) ? \ - pool_elt_at_index (file_main.file_pool, \ - __unixfd_idx)->file_descriptor : -1; }) +#define UNIX_GET_FD(unixfd_idx) \ + ({ \ + typeof (unixfd_idx) __unixfd_idx = (unixfd_idx); \ + (__unixfd_idx != ~0) ? \ + clib_file_get (&file_main, __unixfd_idx)->file_descriptor : \ + -1; \ + }) #define foreach_virtio_trace_flags \ _ (SIMPLE_CHAINED, 0, "Simple descriptor chaining") \ @@ -229,7 +231,7 @@ typedef struct u16 last_kick; u8 first_kick; u32 queue_index; - u32 thread_index; + clib_thread_index_t thread_index; } vhost_user_vring_t; #define VHOST_USER_EVENT_START_TIMER 1 diff --git a/src/plugins/vhost/vhost_user_input.c b/src/plugins/vhost/vhost_user_input.c index ca5072485ff..5dc1eedf52a 100644 --- a/src/plugins/vhost/vhost_user_input.c +++ b/src/plugins/vhost/vhost_user_input.c @@ -31,7 +31,7 @@ #include <linux/if_tun.h> #include <vlib/vlib.h> -#include <vlib/unix/unix.h> +#include <vlib/file.h> #include <vnet/ethernet/ethernet.h> #include <vnet/devices/devices.h> diff --git a/src/plugins/vhost/vhost_user_output.c b/src/plugins/vhost/vhost_user_output.c index 58fd4309f8c..3052ae39ec1 100644 --- a/src/plugins/vhost/vhost_user_output.c +++ b/src/plugins/vhost/vhost_user_output.c @@ -32,7 +32,7 @@ #include <linux/if_tun.h> #include <vlib/vlib.h> -#include <vlib/unix/unix.h> +#include <vlib/file.h> #include <vnet/ethernet/ethernet.h> #include <vnet/devices/devices.h> @@ -382,7 +382,7 @@ vhost_user_device_class_packed (vlib_main_t *vm, vlib_node_runtime_t *node, vhost_user_main_t *vum = &vhost_user_main; u32 qid = rxvq->qid; u8 error; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; vhost_cpu_t *cpu = &vum->cpus[thread_index]; u32 map_hint = 0; u8 retry = 8; @@ -698,7 +698,7 @@ VNET_DEVICE_CLASS_TX_FN (vhost_user_device_class) (vlib_main_t * vm, u32 qid; vhost_user_vring_t *rxvq; u8 error; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; vhost_cpu_t *cpu = &vum->cpus[thread_index]; u32 map_hint = 0; u8 retry = 8; @@ -1051,7 +1051,7 @@ vhost_user_interface_rx_mode_change (vnet_main_t * vnm, u32 hw_if_index, return clib_error_return (0, "unsupported"); } - if (txvq->thread_index == ~0) + if (txvq->thread_index == CLIB_INVALID_THREAD_INDEX) return clib_error_return (0, "Queue initialization is not finished yet"); cpu = vec_elt_at_index (vum->cpus, txvq->thread_index); diff --git a/src/plugins/vmxnet3/input.c b/src/plugins/vmxnet3/input.c index 25632546b6d..55fb418e501 100644 --- a/src/plugins/vmxnet3/input.c +++ b/src/plugins/vmxnet3/input.c @@ -203,7 +203,7 @@ vmxnet3_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vmxnet3_rx_comp *rx_comp; u32 desc_idx; vmxnet3_rxq_t *rxq; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; u32 buffer_indices[VLIB_FRAME_SIZE], *bi; u16 nexts[VLIB_FRAME_SIZE], *next; vmxnet3_rx_ring *ring; diff --git a/src/plugins/vmxnet3/vmxnet3.h b/src/plugins/vmxnet3/vmxnet3.h index 89602f8ee9e..8de992eaffe 100644 --- a/src/plugins/vmxnet3/vmxnet3.h +++ b/src/plugins/vmxnet3/vmxnet3.h @@ -523,7 +523,7 @@ typedef struct u32 mode; u8 buffer_pool_index; u32 queue_index; - u32 thread_index; + clib_thread_index_t thread_index; vmxnet3_rx_ring rx_ring[VMXNET3_RX_RING_SIZE]; vmxnet3_rx_desc *rx_desc[VMXNET3_RX_RING_SIZE]; vmxnet3_rx_comp *rx_comp; diff --git a/src/plugins/vrrp/vrrp_periodic.c b/src/plugins/vrrp/vrrp_periodic.c index 5f9d7ae938e..e3a374a112d 100644 --- a/src/plugins/vrrp/vrrp_periodic.c +++ b/src/plugins/vrrp/vrrp_periodic.c @@ -187,7 +187,19 @@ vrrp_periodic_process (vlib_main_t * vm, timer = pool_elt_at_index (pm->vr_timers, next_timer); timeout = timer->expire_time - now; - vlib_process_wait_for_event_or_clock (vm, timeout); + /* + * Adding a virtual MAC to some NICs can take a significant amount + * of time (~1s). If a lot of VRs enter the master state around the + * same time, the process node can stay active for a very long time + * processing all of the transitions. + * + * Try to force a 10us sleep between processing events to ensure + * that the process node does not prevent API messages and RPCs + * from being handled for an extended period. This prevents + * vlib_process_wait_for_event_or_clock() from returning + * immediately. + */ + vlib_process_wait_for_event_or_clock (vm, clib_max (timeout, 10e-6)); } event_type = vlib_process_get_events (vm, (uword **) & event_data); diff --git a/src/plugins/vxlan-gpe/CMakeLists.txt b/src/plugins/vxlan-gpe/CMakeLists.txt new file mode 100644 index 00000000000..987ebcc2df9 --- /dev/null +++ b/src/plugins/vxlan-gpe/CMakeLists.txt @@ -0,0 +1,32 @@ +# Copyright (c) 2024 OpenInfra Foundation Europe +# Copyright (c) 2025 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +add_vpp_plugin(vxlan-gpe + SOURCES + encap.c + decap.c + vxlan_gpe.c + vxlan_gpe_api.c + vxlan_gpe_packet.h + plugin.c + + INSTALL_HEADERS + vxlan_gpe.h + + MULTIARCH_SOURCES + decap.c + + API_FILES + vxlan_gpe.api +) diff --git a/src/vnet/vxlan-gpe/FEATURE.yaml b/src/plugins/vxlan-gpe/FEATURE.yaml index f4ec2f4c517..f4ec2f4c517 100644 --- a/src/vnet/vxlan-gpe/FEATURE.yaml +++ b/src/plugins/vxlan-gpe/FEATURE.yaml diff --git a/src/vnet/vxlan-gpe/decap.c b/src/plugins/vxlan-gpe/decap.c index d4c7424630d..80f2facef29 100644 --- a/src/vnet/vxlan-gpe/decap.c +++ b/src/plugins/vxlan-gpe/decap.c @@ -22,7 +22,7 @@ #include <vlib/vlib.h> #include <vnet/udp/udp_local.h> -#include <vnet/vxlan-gpe/vxlan_gpe.h> +#include <vxlan-gpe/vxlan_gpe.h> /** * @brief Struct for VXLAN GPE decap packet tracing @@ -210,7 +210,7 @@ vxlan_gpe_input (vlib_main_t * vm, vxlan4_gpe_tunnel_cache_t last4; vxlan6_gpe_tunnel_cache_t last6; u32 pkts_decapsulated = 0; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; u32 stats_sw_if_index, stats_n_packets, stats_n_bytes; if (is_ip4) @@ -617,7 +617,7 @@ VLIB_NODE_FN (vxlan6_gpe_input_node) (vlib_main_t * vm, */ static char *vxlan_gpe_error_strings[] = { #define vxlan_gpe_error(n,s) s, -#include <vnet/vxlan-gpe/vxlan_gpe_error.def> +#include <vxlan-gpe/vxlan_gpe_error.def> #undef vxlan_gpe_error #undef _ }; diff --git a/src/vnet/vxlan-gpe/dir.dox b/src/plugins/vxlan-gpe/dir.dox index c154733b21f..c154733b21f 100644 --- a/src/vnet/vxlan-gpe/dir.dox +++ b/src/plugins/vxlan-gpe/dir.dox diff --git a/src/vnet/vxlan-gpe/encap.c b/src/plugins/vxlan-gpe/encap.c index a769861577d..701c3af55b5 100644 --- a/src/vnet/vxlan-gpe/encap.c +++ b/src/plugins/vxlan-gpe/encap.c @@ -23,7 +23,7 @@ #include <vnet/ip/ip.h> #include <vnet/ethernet/ethernet.h> #include <vnet/udp/udp_inlines.h> -#include <vnet/vxlan-gpe/vxlan_gpe.h> +#include <vxlan-gpe/vxlan_gpe.h> /** Statistics (not really errors) */ #define foreach_vxlan_gpe_encap_error \ @@ -156,7 +156,7 @@ vxlan_gpe_encap (vlib_main_t * vm, vnet_main_t *vnm = ngm->vnet_main; vnet_interface_main_t *im = &vnm->interface_main; u32 pkts_encapsulated = 0; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; u32 stats_sw_if_index, stats_n_packets, stats_n_bytes; vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs; diff --git a/src/plugins/vxlan-gpe/plugin.c b/src/plugins/vxlan-gpe/plugin.c new file mode 100644 index 00000000000..5a711a39d78 --- /dev/null +++ b/src/plugins/vxlan-gpe/plugin.c @@ -0,0 +1,26 @@ +/* + * plugin.c: vxlan-gpe + * + * Copyright (c) OpenInfra Foundation Europe. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/plugin/plugin.h> +#include <vpp/app/version.h> +// register a plugin + +VLIB_PLUGIN_REGISTER () = { + .version = VPP_BUILD_VER, + .description = "VxLan GPE Tunnels", +}; diff --git a/src/vnet/vxlan-gpe/vxlan-gpe-rfc.txt b/src/plugins/vxlan-gpe/vxlan-gpe-rfc.txt index 35cee50f573..35cee50f573 100644 --- a/src/vnet/vxlan-gpe/vxlan-gpe-rfc.txt +++ b/src/plugins/vxlan-gpe/vxlan-gpe-rfc.txt diff --git a/src/vnet/vxlan-gpe/vxlan_gpe.api b/src/plugins/vxlan-gpe/vxlan_gpe.api index 3cbd7ab7f71..3cbd7ab7f71 100644 --- a/src/vnet/vxlan-gpe/vxlan_gpe.api +++ b/src/plugins/vxlan-gpe/vxlan_gpe.api diff --git a/src/vnet/vxlan-gpe/vxlan_gpe.c b/src/plugins/vxlan-gpe/vxlan_gpe.c index 5a5262ea9db..abb2049a356 100644 --- a/src/vnet/vxlan-gpe/vxlan_gpe.c +++ b/src/plugins/vxlan-gpe/vxlan_gpe.c @@ -17,7 +17,7 @@ * @brief Common utility functions for IPv4 and IPv6 VXLAN GPE tunnels * */ -#include <vnet/vxlan-gpe/vxlan_gpe.h> +#include <vxlan-gpe/vxlan_gpe.h> #include <vnet/fib/fib.h> #include <vnet/ip/format.h> #include <vnet/fib/fib_entry.h> @@ -44,7 +44,7 @@ * You can refer to this kind of L2 overlay bridge domain as a VXLAN-GPE segment. */ -vxlan_gpe_main_t vxlan_gpe_main; +vxlan_gpe_main_t vxlan_gpe_main __clib_export; static u8 * format_decap_next (u8 * s, va_list * args) @@ -1212,11 +1212,13 @@ VNET_FEATURE_INIT (ip6_vxlan_gpe_bypass, static) = * @return error * */ -clib_error_t * -vxlan_gpe_init (vlib_main_t * vm) +__clib_export clib_error_t * +vxlan_gpe_init (vlib_main_t *vm) { vxlan_gpe_main_t *ngm = &vxlan_gpe_main; + ngm->register_decap_protocol = vxlan_gpe_register_decap_protocol; + ngm->unregister_decap_protocol = vxlan_gpe_unregister_decap_protocol; ngm->vnet_main = vnet_get_main (); ngm->vlib_main = vm; diff --git a/src/vnet/vxlan-gpe/vxlan_gpe.h b/src/plugins/vxlan-gpe/vxlan_gpe.h index aabaafeee6f..138ae840ef5 100644 --- a/src/vnet/vxlan-gpe/vxlan_gpe.h +++ b/src/plugins/vxlan-gpe/vxlan_gpe.h @@ -29,7 +29,7 @@ #include <vnet/l2/l2_output.h> #include <vnet/l2/l2_bd.h> #include <vnet/ethernet/ethernet.h> -#include <vnet/vxlan-gpe/vxlan_gpe_packet.h> +#include <vxlan-gpe/vxlan_gpe_packet.h> #include <vnet/ip/ip4_packet.h> #include <vnet/ip/ip6_packet.h> #include <vnet/udp/udp_packet.h> @@ -196,11 +196,16 @@ typedef enum typedef enum { #define vxlan_gpe_error(n,s) VXLAN_GPE_ERROR_##n, -#include <vnet/vxlan-gpe/vxlan_gpe_error.def> +#include <plugins/vxlan-gpe/vxlan_gpe_error.def> #undef vxlan_gpe_error VXLAN_GPE_N_ERROR, } vxlan_gpe_input_error_t; +typedef void (*vxlan_gpe_register_decap_protocol_callback_t) ( + u8 protocol_id, uword next_node_index); +typedef void (*vxlan_gpe_unregister_decap_protocol_callback_t) ( + u8 protocol_id, uword next_node_index); + /** Struct for VXLAN GPE node state */ typedef struct { @@ -233,6 +238,10 @@ typedef struct /** List of next nodes for the decap indexed on protocol */ uword decap_next_node_list[VXLAN_GPE_PROTOCOL_MAX]; + + /* export callbacks to register/unregister decapsulation protocol */ + vxlan_gpe_register_decap_protocol_callback_t register_decap_protocol; + vxlan_gpe_unregister_decap_protocol_callback_t unregister_decap_protocol; } vxlan_gpe_main_t; extern vxlan_gpe_main_t vxlan_gpe_main; @@ -279,13 +288,10 @@ typedef enum VXLAN_GPE_ENCAP_N_NEXT } vxlan_gpe_encap_next_t; - +void vxlan_gpe_register_decap_protocol (u8 protocol_id, uword next_node_index); void vxlan_gpe_unregister_decap_protocol (u8 protocol_id, uword next_node_index); -void vxlan_gpe_register_decap_protocol (u8 protocol_id, - uword next_node_index); - void vnet_int_vxlan_gpe_bypass_mode (u32 sw_if_index, u8 is_ip6, u8 is_enable); diff --git a/src/vnet/vxlan-gpe/vxlan_gpe_api.c b/src/plugins/vxlan-gpe/vxlan_gpe_api.c index cc74e1f58d4..e82445498e8 100644 --- a/src/vnet/vxlan-gpe/vxlan_gpe_api.c +++ b/src/plugins/vxlan-gpe/vxlan_gpe_api.c @@ -23,13 +23,13 @@ #include <vnet/interface.h> #include <vnet/api_errno.h> #include <vnet/feature/feature.h> -#include <vnet/vxlan-gpe/vxlan_gpe.h> +#include <vxlan-gpe/vxlan_gpe.h> #include <vnet/fib/fib_table.h> #include <vnet/format_fns.h> #include <vnet/ip/ip_types_api.h> -#include <vnet/vxlan-gpe/vxlan_gpe.api_enum.h> -#include <vnet/vxlan-gpe/vxlan_gpe.api_types.h> +#include <vxlan-gpe/vxlan_gpe.api_enum.h> +#include <vxlan-gpe/vxlan_gpe.api_types.h> #define REPLY_MSG_ID_BASE msg_id_base #include <vlibapi/api_helper_macros.h> diff --git a/src/vnet/vxlan-gpe/vxlan_gpe_error.def b/src/plugins/vxlan-gpe/vxlan_gpe_error.def index 9cf1b1cb656..9cf1b1cb656 100644 --- a/src/vnet/vxlan-gpe/vxlan_gpe_error.def +++ b/src/plugins/vxlan-gpe/vxlan_gpe_error.def diff --git a/src/vnet/vxlan-gpe/vxlan_gpe_packet.h b/src/plugins/vxlan-gpe/vxlan_gpe_packet.h index f5e5ddc2347..f5e5ddc2347 100644 --- a/src/vnet/vxlan-gpe/vxlan_gpe_packet.h +++ b/src/plugins/vxlan-gpe/vxlan_gpe_packet.h diff --git a/src/plugins/vxlan/decap.c b/src/plugins/vxlan/decap.c index 5f28c5e97bb..4ad35bc2d5d 100644 --- a/src/plugins/vxlan/decap.c +++ b/src/plugins/vxlan/decap.c @@ -193,7 +193,7 @@ vxlan_input (vlib_main_t * vm, last_tunnel_cache4 last4; last_tunnel_cache6 last6; u32 pkts_dropped = 0; - u32 thread_index = vlib_get_thread_index (); + clib_thread_index_t thread_index = vlib_get_thread_index (); if (is_ip4) clib_memset (&last4, 0xff, sizeof last4); @@ -1039,7 +1039,7 @@ VLIB_NODE_FN (vxlan4_flow_input_node) (vlib_main_t * vm, [VXLAN_FLOW_NEXT_L2_INPUT] = im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, }; - u32 thread_index = vlib_get_thread_index (); + clib_thread_index_t thread_index = vlib_get_thread_index (); u32 *from = vlib_frame_vector_args (f); u32 n_left_from = f->n_vectors; diff --git a/src/plugins/vxlan/encap.c b/src/plugins/vxlan/encap.c index 98464d809ba..60181bff451 100644 --- a/src/plugins/vxlan/encap.c +++ b/src/plugins/vxlan/encap.c @@ -78,7 +78,7 @@ vxlan_encap_inline (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_combined_counter_main_t *tx_counter = im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX; u32 pkts_encapsulated = 0; - u32 thread_index = vlib_get_thread_index (); + clib_thread_index_t thread_index = vlib_get_thread_index (); u32 sw_if_index0 = 0, sw_if_index1 = 0; u32 next0 = 0, next1 = 0; vxlan_tunnel_t *t0 = NULL, *t1 = NULL; diff --git a/src/plugins/wireguard/wireguard_input.c b/src/plugins/wireguard/wireguard_input.c index 1eb7fbfed0b..0ae0480fc2c 100644 --- a/src/plugins/wireguard/wireguard_input.c +++ b/src/plugins/wireguard/wireguard_input.c @@ -698,7 +698,7 @@ wg_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs; vlib_buffer_t *lb; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; vnet_crypto_op_t **crypto_ops; const u16 drop_next = WG_INPUT_NEXT_PUNT; message_type_t header_type; diff --git a/src/plugins/wireguard/wireguard_output_tun.c b/src/plugins/wireguard/wireguard_output_tun.c index c9411f6ff20..7bbec11fdcb 100644 --- a/src/plugins/wireguard/wireguard_output_tun.c +++ b/src/plugins/wireguard/wireguard_output_tun.c @@ -436,7 +436,7 @@ wg_output_tun_inline (vlib_main_t *vm, vlib_node_runtime_t *node, vnet_crypto_op_t **crypto_ops; u16 nexts[VLIB_FRAME_SIZE], *next = nexts; vlib_buffer_t *sync_bufs[VLIB_FRAME_SIZE]; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; u16 n_sync = 0; const u16 drop_next = WG_OUTPUT_NEXT_ERROR; const u8 is_async = wg_op_mode_is_set_ASYNC (); diff --git a/src/scripts/generate_version_h b/src/scripts/generate_version_h index e8379550186..42359b7aa9c 100755 --- a/src/scripts/generate_version_h +++ b/src/scripts/generate_version_h @@ -2,6 +2,11 @@ : ${VPP_BUILD_USER:=$(whoami)} : ${VPP_BUILD_HOST:=$(hostname)} : ${VPP_BUILD_TOPDIR:=$(git rev-parse --show-toplevel 2> /dev/null)} +if [ -n "${2}" ] && [ "${2}" != "default" ] ; then + VPP_PLATFORM="-${2}" +else + VPP_PLATFORM="" +fi DATE_FMT="+%Y-%m-%dT%H:%M:%S" SOURCE_DATE_EPOCH="${SOURCE_DATE_EPOCH:-$(date +%s)}" VPP_BUILD_DATE=$(date -u -d "@$SOURCE_DATE_EPOCH" "$DATE_FMT" 2>/dev/null || date -u -r "$SOURCE_DATE_EPOCH" "$DATE_FMT" 2>/dev/null || date -u "$DATE_FMT") @@ -29,6 +34,6 @@ cat > ${1} << __EOF__ #define VPP_BUILD_USER "$VPP_BUILD_USER" #define VPP_BUILD_HOST "$VPP_BUILD_HOST" #define VPP_BUILD_TOPDIR "$VPP_BUILD_TOPDIR" -#define VPP_BUILD_VER "$(scripts/version)" +#define VPP_BUILD_VER "$(scripts/version)${VPP_PLATFORM}" #endif __EOF__ diff --git a/src/scripts/host-stack/cc_plots.py b/src/scripts/host-stack/cc_plots.py deleted file mode 100755 index f7953f223d4..00000000000 --- a/src/scripts/host-stack/cc_plots.py +++ /dev/null @@ -1,246 +0,0 @@ -#!/usr/bin/env python3 - -import sys -import re -import argparse -import matplotlib.pyplot as plt -from matplotlib.lines import Line2D - - -class Point: - "CC event" - - def __init__(self, x, y): - self.x = x - self.y = y - - -def listx(points): - return list(map(lambda pt: pt.x, points)) - - -def listy(points): - return list(map(lambda pt: pt.y, points)) - - -def plot_data(d): - plt.figure(1) - - cwndx = listx(d["cwnd"]) - cwndy = listy(d["cwnd"]) - congx = listx(d["congestion"]) - congy = listy(d["congestion"]) - rcvrdx = listx(d["recovered"]) - rcvrdy = listy(d["recovered"]) - rxttx = listx(d["rxtTimeout"]) - rxtty = listy(d["rxtTimeout"]) - - # cwnd/ssthresh/cc events - plt.subplot(311) - plt.title("cwnd/ssthresh") - pcwnd = plt.plot(cwndx, cwndy, "r") - psst = plt.plot(cwndx, d["ssthresh"], "y-") - pcong = plt.plot(congx, congy, "yo") - precov = plt.plot(rcvrdx, rcvrdy, "co") - prxtt = plt.plot(rxttx, rxtty, "mo") - - marker1 = Line2D(range(1), range(1), color="r") - marker2 = Line2D(range(1), range(1), color="y") - marker3 = Line2D(range(1), range(1), color="w", marker="o", markerfacecolor="y") - marker4 = Line2D(range(1), range(1), color="w", marker="o", markerfacecolor="c") - marker5 = Line2D(range(1), range(1), color="w", marker="o", markerfacecolor="m") - plt.legend( - (marker1, marker2, marker3, marker4, marker5), - ("cwnd", "ssthresh", "congestion", "recovered", "rxt-timeout"), - loc=4, - ) - axes = plt.gca() - axes.set_ylim([-20e4, max(cwndy) + 20e4]) - - # snd variables - plt.subplot(312) - plt.title("cc variables") - plt.plot(cwndx, d["space"], "g-", markersize=1) - plt.plot(cwndx, d["flight"], "b-", markersize=1) - plt.plot(cwndx, d["sacked"], "m:", markersize=1) - plt.plot(cwndx, d["lost"], "y:", markersize=1) - plt.plot(cwndx, d["cc-space"], "k:", markersize=1) - plt.plot(cwndx, cwndy, "ro", markersize=2) - - plt.plot(congx, congy, "y^", markersize=10, markerfacecolor="y") - plt.plot(rcvrdx, rcvrdy, "c^", markersize=10, markerfacecolor="c") - plt.plot(rxttx, rxtty, "m^", markersize=10, markerfacecolor="m") - - # plt.plot(cwndx, d["snd_wnd"], 'ko', markersize=1) - plt.legend( - ( - "snd-space", - "flight", - "sacked", - "lost", - "cc-space", - "cwnd", - "congestion", - "recovered", - "rxt-timeout", - ), - loc=1, - ) - - # rto/srrt/rttvar - plt.subplot(313) - plt.title("rtt") - plt.plot(cwndx, d["srtt"], "g-") - plt.plot(cwndx, [x / 1000 for x in d["mrtt-us"]], "r-") - plt.plot(cwndx, d["rttvar"], "b-") - plt.legend(["srtt", "mrtt-us", "rttvar"]) - axes = plt.gca() - # plt.plot(cwndx, rto, 'r-') - # axes.set_ylim([0, int(max(rto[2:len(rto)])) + 50]) - - # show - plt.show() - - -def find_pattern(file_path, session_idx): - is_active_open = 1 - listener_pattern = "l\[\d\]" - if is_active_open: - initial_pattern = "\[\d\](\.\d+:\d+\->\.\d+:\d+)\s+open:\s" - else: - initial_pattern = "\[\d\](\.\d+:\d+\->\.\d+:\d+)\s" - idx = 0 - f = open(file_path, "r") - for line in f: - # skip listener lines (server) - if re.search(listener_pattern, line) != None: - continue - match = re.search(initial_pattern, line) - if match == None: - continue - if idx < session_idx: - idx += 1 - continue - filter_pattern = str(match.group(1)) + "\s+(.+)" - print("pattern is %s" % filter_pattern) - f.close() - return filter_pattern - raise Exception("Could not find initial pattern") - - -def compute_time(min, sec, msec): - return int(min) * 60 + int(sec) + int(msec) / 1000.0 - - -def run(file_path, session_idx): - filter_sessions = 1 - filter_pattern = "" - - patterns = { - "time": "^\d+:(\d+):(\d+):(\d+):\d+", - "listener": "l\[\d\]", - "cc": "cwnd (\d+) flight (\d+) space (\d+) ssthresh (\d+) snd_wnd (\d+)", - "cc-snd": "cc_space (\d+) sacked (\d+) lost (\d+)", - "rtt": "rto (\d+) srtt (\d+) mrtt-us (\d+) rttvar (\d+)", - "rxtt": "rxt-timeout", - "congestion": "congestion", - "recovered": "recovered", - } - d = { - "cwnd": [], - "space": [], - "flight": [], - "ssthresh": [], - "snd_wnd": [], - "cc-space": [], - "lost": [], - "sacked": [], - "rto": [], - "srtt": [], - "mrtt-us": [], - "rttvar": [], - "rxtTimeout": [], - "congestion": [], - "recovered": [], - } - - if filter_sessions: - filter_pattern = find_pattern(file_path, session_idx) - f = open(file_path, "r") - - stats_index = 0 - start_time = 0 - - for line in f: - # skip listener lines (server) - if re.search(patterns["listener"], line) != None: - continue - # filter sessions - if filter_sessions: - match = re.search(filter_pattern, line) - if match == None: - continue - - original_line = line - line = match.group(1) - match = re.search(patterns["time"], original_line) - if match == None: - print("something went wrong! no time!") - continue - time = compute_time(match.group(1), match.group(2), match.group(3)) - if start_time == 0: - start_time = time - - time = time - start_time - match = re.search(patterns["cc"], line) - if match != None: - d["cwnd"].append(Point(time, int(match.group(1)))) - d["flight"].append(int(match.group(2))) - d["space"].append(int(match.group(3))) - d["ssthresh"].append(int(match.group(4))) - d["snd_wnd"].append(int(match.group(5))) - stats_index += 1 - continue - match = re.search(patterns["cc-snd"], line) - if match != None: - d["cc-space"].append(int(match.group(1))) - d["sacked"].append(int(match.group(2))) - d["lost"].append(int(match.group(3))) - match = re.search(patterns["rtt"], line) - if match != None: - d["rto"].append(int(match.group(1))) - d["srtt"].append(int(match.group(2))) - d["mrtt-us"].append(int(match.group(3))) - d["rttvar"].append(int(match.group(4))) - if stats_index == 0: - continue - match = re.search(patterns["rxtt"], line) - if match != None: - d["rxtTimeout"].append(Point(time, d["cwnd"][stats_index - 1].y + 1e4)) - continue - match = re.search(patterns["congestion"], line) - if match != None: - d["congestion"].append(Point(time, d["cwnd"][stats_index - 1].y - 1e4)) - continue - match = re.search(patterns["recovered"], line) - if match != None: - d["recovered"].append(Point(time, d["cwnd"][stats_index - 1].y)) - continue - - plot_data(d) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Plot tcp cc logs") - parser.add_argument( - "-f", action="store", dest="file", required=True, help="elog file in txt format" - ) - parser.add_argument( - "-s", - action="store", - dest="session_index", - default=0, - help="session index for which to plot cc logs", - ) - results = parser.parse_args() - run(results.file, int(results.session_index)) diff --git a/src/scripts/host-stack/convert_evt b/src/scripts/host-stack/convert_evt deleted file mode 100755 index 1aba67d0268..00000000000 --- a/src/scripts/host-stack/convert_evt +++ /dev/null @@ -1,12 +0,0 @@ -#!/usr/bin/env bash - -# This depends on c2cpel and cpeldump. Enable their compilation by: -# ccmake build-root/build-vpp-native/vpp/ -# and turning on VPP_BUILD_PERFTOOL - -BIN_PATH=../../../build-root/install-vpp-native/vpp/bin -C2CPEL_BIN=$BIN_PATH/c2cpel -CPELDUMP_BIN=$BIN_PATH/cpeldump - -$C2CPEL_BIN --in $1 --out /tmp/tmp_file.cpel -$CPELDUMP_BIN --in /tmp/tmp_file.cpel --out $2 diff --git a/src/vat2/main.c b/src/vat2/main.c index bf415854db1..2949c4899aa 100644 --- a/src/vat2/main.c +++ b/src/vat2/main.c @@ -253,16 +253,15 @@ print_help (void) "Send API message to VPP and print reply\n" "\n" "-d, --debug Print additional information\n" - "-p, --prefix <prefix> Specify shared memory prefix to connect " - "to a given VPP instance\n" + "--dump-apis List all APIs available in VAT2 (might " + "not reflect running VPP)\n" "-f, --file <filename> File containing a JSON object with the " "arguments for the message to send\n" + "-p, --plugin-path Plugin path\n" + "-s, --prefix <prefix> Specify shared memory prefix to connect " + "to a given VPP instance\n" "-t, --template <message-name> Print a template JSON object for given API" - " message\n" - "--dump-apis List all APIs available in VAT2 (might " - "not reflect running VPP)\n" - "--plugin-path Pluing path" - "\n"; + " message\n"; printf ("%s", help_string); } @@ -281,38 +280,38 @@ main (int argc, char **argv) char *msgname = 0; static struct option long_options[] = { { "debug", no_argument, 0, 'd' }, - { "prefix", required_argument, 0, 's' }, - { "file", required_argument, 0, 'f' }, { "dump-apis", no_argument, 0, 0 }, - { "template", required_argument, 0, 't' }, + { "file", required_argument, 0, 'f' }, { "plugin-path", required_argument, 0, 'p' }, + { "prefix", required_argument, 0, 's' }, + { "template", required_argument, 0, 't' }, { 0, 0, 0, 0 } }; - while ((c = getopt_long (argc, argv, "hdp:f:t:", long_options, + while ((c = getopt_long (argc, argv, "df:p:s:t:", long_options, &option_index)) != -1) { switch (c) { case 0: - if (option_index == 3) + if (option_index == 1) dump_api = true; break; case 'd': vat2_debug = true; break; - case 't': - template = optarg; - break; - case 's': - prefix = optarg; - break; case 'f': filename = optarg; break; case 'p': pluginpath = optarg; break; + case 's': + prefix = optarg; + break; + case 't': + template = optarg; + break; case '?': print_help (); return 1; diff --git a/src/vcl/vcl_locked.c b/src/vcl/vcl_locked.c index f38df8fbf47..7ba9fab25fa 100644 --- a/src/vcl/vcl_locked.c +++ b/src/vcl/vcl_locked.c @@ -743,6 +743,7 @@ vls_listener_wrk_start_listen (vcl_locked_session_t * vls, u32 wrk_index) if (ls->flags & VCL_SESSION_F_PENDING_LISTEN) return; + ls->flags &= ~VCL_SESSION_F_LISTEN_NO_MQ; vcl_send_session_listen (wrk, ls); vls_listener_wrk_set (vls, wrk_index, 1 /* is_active */); @@ -759,7 +760,7 @@ vls_listener_wrk_stop_listen (vcl_locked_session_t * vls, u32 wrk_index) if (s->session_state != VCL_STATE_LISTEN) return; vcl_send_session_unlisten (wrk, s); - s->session_state = VCL_STATE_LISTEN_NO_MQ; + s->flags |= VCL_SESSION_F_LISTEN_NO_MQ; vls_listener_wrk_set (vls, wrk_index, 0 /* is_active */ ); } @@ -912,7 +913,7 @@ vls_share_session (vls_worker_t * vls_wrk, vcl_locked_session_t * vls) if (s->session_state == VCL_STATE_LISTEN) { - s->session_state = VCL_STATE_LISTEN_NO_MQ; + s->flags |= VCL_SESSION_F_LISTEN_NO_MQ; s->rx_fifo = s->tx_fifo = 0; } else if (s->rx_fifo) @@ -1384,36 +1385,41 @@ vls_mp_checks (vcl_locked_session_t * vls, int is_add) switch (s->session_state) { case VCL_STATE_LISTEN: - if (is_add) + if (!(s->flags & VCL_SESSION_F_LISTEN_NO_MQ)) { - vls_listener_wrk_set (vls, vls->vcl_wrk_index, 1 /* is_active */); - break; + if (is_add) + { + vls_listener_wrk_set (vls, vls->vcl_wrk_index, + 1 /* is_active */); + break; + } + /* Although removal from epoll means listener no longer accepts new + * sessions, the accept queue built by vpp cannot be drained by + * stopping the listener. Morover, some applications, e.g., nginx, + * might constantly remove and add listeners to their epfds. Removing + * listeners in such situations causes a lot of churn in vpp as + * segments and segment managers need to be recreated. */ + /* vls_listener_wrk_stop_listen (vls, vls->vcl_wrk_index); */ + } + else + { + if (!is_add) + break; + + /* Register worker as listener */ + vls_listener_wrk_start_listen (vls, vls->vcl_wrk_index); + + /* If owner worker did not attempt to accept/xpoll on the session, + * force a listen stop for it, since it may not be interested in + * accepting new sessions. + * This is pretty much a hack done to give app workers the illusion + * that it is fine to listen and not accept new sessions for a + * given listener. Without it, we would accumulate unhandled + * accepts on the passive worker message queue. */ + owner_wrk = vls_shared_get_owner (vls); + if (!vls_listener_wrk_is_active (vls, owner_wrk)) + vls_listener_wrk_stop_listen (vls, owner_wrk); } - /* Although removal from epoll means listener no longer accepts new - * sessions, the accept queue built by vpp cannot be drained by stopping - * the listener. Morover, some applications, e.g., nginx, might - * constantly remove and add listeners to their epfds. Removing - * listeners in such situations causes a lot of churn in vpp as segments - * and segment managers need to be recreated. */ - /* vls_listener_wrk_stop_listen (vls, vls->vcl_wrk_index); */ - break; - case VCL_STATE_LISTEN_NO_MQ: - if (!is_add) - break; - - /* Register worker as listener */ - vls_listener_wrk_start_listen (vls, vls->vcl_wrk_index); - - /* If owner worker did not attempt to accept/xpoll on the session, - * force a listen stop for it, since it may not be interested in - * accepting new sessions. - * This is pretty much a hack done to give app workers the illusion - * that it is fine to listen and not accept new sessions for a - * given listener. Without it, we would accumulate unhandled - * accepts on the passive worker message queue. */ - owner_wrk = vls_shared_get_owner (vls); - if (!vls_listener_wrk_is_active (vls, owner_wrk)) - vls_listener_wrk_stop_listen (vls, owner_wrk); break; default: break; diff --git a/src/vcl/vcl_private.c b/src/vcl/vcl_private.c index d9814394f0d..80fbd8b4c7e 100644 --- a/src/vcl/vcl_private.c +++ b/src/vcl/vcl_private.c @@ -49,7 +49,7 @@ vcl_mq_epoll_add_api_sock (vcl_worker_t *wrk) struct epoll_event e = { 0 }; int rv; - e.data.u32 = ~0; + e.data.u32 = VCL_EP_SAPIFD_EVT; rv = epoll_ctl (wrk->mqs_epfd, EPOLL_CTL_ADD, cs->fd, &e); if (rv != EEXIST && rv < 0) return -1; @@ -189,6 +189,55 @@ vcl_worker_cleanup_cb (void *arg) } void +vcl_worker_detached_start_signal_mq (vcl_worker_t *wrk) +{ + /* Generate mq epfd events using pipes to hopefully force + * calls into epoll_wait which retries attaching to vpp */ + if (!wrk->detached_pipefds[0]) + { + if (pipe (wrk->detached_pipefds)) + { + VDBG (0, "failed to add mq eventfd to mq epoll fd"); + exit (1); + } + } + + struct epoll_event evt = {}; + evt.events = EPOLLIN; + evt.data.u32 = VCL_EP_PIPEFD_EVT; + if (epoll_ctl (wrk->mqs_epfd, EPOLL_CTL_ADD, wrk->detached_pipefds[0], + &evt) < 0) + { + VDBG (0, "failed to add mq eventfd to mq epoll fd"); + exit (1); + } + + int __clib_unused rv; + u8 sig = 1; + rv = write (wrk->detached_pipefds[1], &sig, 1); +} + +void +vcl_worker_detached_signal_mq (vcl_worker_t *wrk) +{ + int __clib_unused rv; + u8 buf; + rv = read (wrk->detached_pipefds[0], &buf, 1); + rv = write (wrk->detached_pipefds[1], &buf, 1); +} + +void +vcl_worker_detached_stop_signal_mq (vcl_worker_t *wrk) +{ + if (epoll_ctl (wrk->mqs_epfd, EPOLL_CTL_DEL, wrk->detached_pipefds[0], 0) < + 0) + { + VDBG (0, "failed to del mq eventfd to mq epoll fd"); + exit (1); + } +} + +void vcl_worker_detach_sessions (vcl_worker_t *wrk) { session_event_t *e; @@ -201,17 +250,17 @@ vcl_worker_detach_sessions (vcl_worker_t *wrk) { if (s->session_state == VCL_STATE_LISTEN) { - s->session_state = VCL_STATE_LISTEN_NO_MQ; + s->flags |= VCL_SESSION_F_LISTEN_NO_MQ; continue; } if ((s->flags & VCL_SESSION_F_IS_VEP) || - s->session_state == VCL_STATE_LISTEN_NO_MQ || s->session_state == VCL_STATE_CLOSED) continue; hash_set (seg_indices_map, s->tx_fifo->segment_index, 1); s->session_state = VCL_STATE_DETACHED; + s->flags |= VCL_SESSION_F_APP_CLOSING; vec_add2 (wrk->unhandled_evts_vector, e, 1); e->event_type = SESSION_CTRL_EVT_DISCONNECTED; e->session_index = s->session_index; @@ -221,13 +270,26 @@ vcl_worker_detach_sessions (vcl_worker_t *wrk) hash_foreach (seg_index, val, seg_indices_map, ({ vec_add1 (seg_indices, seg_index); })); + /* If multi-threaded apps, wait for all threads to hopefully finish + * their blocking operations */ + if (wrk->pre_wait_fn) + wrk->pre_wait_fn (VCL_INVALID_SESSION_INDEX); + sleep (1); + if (wrk->post_wait_fn) + wrk->post_wait_fn (VCL_INVALID_SESSION_INDEX); + vcl_segment_detach_segments (seg_indices); /* Detach worker's mqs segment */ vcl_segment_detach (vcl_vpp_worker_segment_handle (wrk->wrk_index)); + wrk->app_event_queue = 0; + wrk->ctrl_mq = 0; + vec_free (seg_indices); hash_free (seg_indices_map); + + vcl_worker_detached_start_signal_mq (wrk); } void @@ -364,8 +426,8 @@ vcl_session_read_ready (vcl_session_t * s) } else { - return (s->session_state == VCL_STATE_DISCONNECT) ? - VPPCOM_ECONNRESET : VPPCOM_ENOTCONN; + return (s->session_state == VCL_STATE_DISCONNECT) ? VPPCOM_ECONNRESET : + VPPCOM_ENOTCONN; } } @@ -773,9 +835,6 @@ vcl_session_state_str (vcl_session_state_t state) case VCL_STATE_UPDATED: st = "STATE_UPDATED"; break; - case VCL_STATE_LISTEN_NO_MQ: - st = "STATE_LISTEN_NO_MQ"; - break; default: st = "UNKNOWN_STATE"; break; diff --git a/src/vcl/vcl_private.h b/src/vcl/vcl_private.h index c98e1cde9b1..609653f20a4 100644 --- a/src/vcl/vcl_private.h +++ b/src/vcl/vcl_private.h @@ -32,6 +32,8 @@ #endif #define VPPCOM_DEBUG vcm->debug +#define VCL_EP_SAPIFD_EVT ((u32) ~0) +#define VCL_EP_PIPEFD_EVT ((u32) (~0 - 1)) extern __thread uword __vcl_worker_index; @@ -71,7 +73,6 @@ typedef enum vcl_session_state_ VCL_STATE_DISCONNECT, VCL_STATE_DETACHED, VCL_STATE_UPDATED, - VCL_STATE_LISTEN_NO_MQ, } vcl_session_state_t; typedef struct epoll_event vppcom_epoll_event_t; @@ -144,6 +145,7 @@ typedef enum vcl_session_flags_ VCL_SESSION_F_PENDING_FREE = 1 << 7, VCL_SESSION_F_PENDING_LISTEN = 1 << 8, VCL_SESSION_F_APP_CLOSING = 1 << 9, + VCL_SESSION_F_LISTEN_NO_MQ = 1 << 10, } __clib_packed vcl_session_flags_t; typedef enum vcl_worker_wait_ @@ -325,6 +327,9 @@ typedef struct vcl_worker_ /* functions to be called pre/post wait if vcl managed by vls */ vcl_worker_wait_mq_fn pre_wait_fn; vcl_worker_wait_mq_fn post_wait_fn; + + /* mq_epfd signal pipes when wrk detached from vpp */ + int detached_pipefds[2]; } vcl_worker_t; STATIC_ASSERT (sizeof (session_disconnected_msg_t) <= 16, @@ -563,9 +568,8 @@ vcl_session_table_lookup_listener (vcl_worker_t * wrk, u64 handle) return 0; } - ASSERT (s->session_state == VCL_STATE_LISTEN - || s->session_state == VCL_STATE_LISTEN_NO_MQ - || vcl_session_is_connectable_listener (wrk, s)); + ASSERT (s->session_state == VCL_STATE_LISTEN || + vcl_session_is_connectable_listener (wrk, s)); return s; } @@ -800,6 +804,9 @@ void vcl_worker_detach_sessions (vcl_worker_t *wrk); void vcl_worker_set_wait_mq_fns (vcl_worker_wait_mq_fn pre_wait, vcl_worker_wait_mq_fn post_wait); +void vcl_worker_detached_start_signal_mq (vcl_worker_t *wrk); +void vcl_worker_detached_signal_mq (vcl_worker_t *wrk); +void vcl_worker_detached_stop_signal_mq (vcl_worker_t *wrk); /* * VCL Binary API */ diff --git a/src/vcl/vppcom.c b/src/vcl/vppcom.c index 19d58c349b7..6f84178de79 100644 --- a/src/vcl/vppcom.c +++ b/src/vcl/vppcom.c @@ -519,8 +519,7 @@ vcl_session_reset_handler (vcl_worker_t * wrk, } /* Caught a reset before actually accepting the session */ - if (session->session_state == VCL_STATE_LISTEN || - session->session_state == VCL_STATE_LISTEN_NO_MQ) + if (session->session_state == VCL_STATE_LISTEN) { if (!vcl_flag_accepted_session (session, reset_msg->handle, VCL_ACCEPTED_F_RESET)) @@ -712,8 +711,7 @@ vcl_session_disconnected_handler (vcl_worker_t * wrk, return 0; /* Caught a disconnect before actually accepting the session */ - if (session->session_state == VCL_STATE_LISTEN || - session->session_state == VCL_STATE_LISTEN_NO_MQ) + if (session->session_state == VCL_STATE_LISTEN) { if (!vcl_flag_accepted_session (session, msg->handle, VCL_ACCEPTED_F_CLOSED)) @@ -1085,8 +1083,7 @@ vcl_handle_mq_event (vcl_worker_t * wrk, session_event_t * e) * VPP_CLOSING state instead can been marked as ACCEPTED_F_CLOSED. */ if (vcl_session_has_attr (s, VCL_SESS_ATTR_NONBLOCK) && - !(s->session_state == VCL_STATE_LISTEN || - s->session_state == VCL_STATE_LISTEN_NO_MQ)) + !(s->session_state == VCL_STATE_LISTEN)) { s->session_state = VCL_STATE_VPP_CLOSING; s->flags |= VCL_SESSION_F_PENDING_DISCONNECT; @@ -1114,8 +1111,7 @@ vcl_handle_mq_event (vcl_worker_t * wrk, session_event_t * e) * DISCONNECT state instead can been marked as ACCEPTED_F_RESET. */ if (vcl_session_has_attr (s, VCL_SESS_ATTR_NONBLOCK) && - !(s->session_state == VCL_STATE_LISTEN || - s->session_state == VCL_STATE_LISTEN_NO_MQ)) + !(s->session_state == VCL_STATE_LISTEN)) { s->flags |= VCL_SESSION_F_PENDING_DISCONNECT; s->session_state = VCL_STATE_DISCONNECT; @@ -1331,6 +1327,12 @@ vppcom_session_unbind (u32 session_handle) } clib_fifo_free (session->accept_evts_fifo); + if (session->flags & VCL_SESSION_F_LISTEN_NO_MQ) + { + vcl_session_free (wrk, session); + return VPPCOM_OK; + } + vcl_send_session_unlisten (wrk, session); VDBG (0, "session %u [0x%llx]: sending unbind!", session->session_index, @@ -1402,6 +1404,8 @@ vcl_api_retry_attach (vcl_worker_t *wrk) { vcl_session_t *s; + vcl_worker_detached_signal_mq (wrk); + clib_spinlock_lock (&vcm->workers_lock); if (vcl_is_first_reattach_to_execute ()) { @@ -1410,12 +1414,14 @@ vcl_api_retry_attach (vcl_worker_t *wrk) clib_spinlock_unlock (&vcm->workers_lock); return; } + vcl_worker_detached_stop_signal_mq (wrk); vcl_set_reattach_counter (); clib_spinlock_unlock (&vcm->workers_lock); } else { vcl_set_reattach_counter (); + vcl_worker_detached_stop_signal_mq (wrk); clib_spinlock_unlock (&vcm->workers_lock); vcl_worker_register_with_vpp (); } @@ -1425,10 +1431,11 @@ vcl_api_retry_attach (vcl_worker_t *wrk) { if (s->flags & VCL_SESSION_F_IS_VEP) continue; - if (s->session_state == VCL_STATE_LISTEN_NO_MQ) + if (s->session_state == VCL_STATE_LISTEN) vppcom_session_listen (vcl_session_handle (s), 10); else - VDBG (0, "internal error: unexpected state %d", s->session_state); + VDBG (0, "reattach error: %u unexpected state %d", s->session_index, + s->session_state); } } @@ -1769,12 +1776,20 @@ vppcom_session_listen (uint32_t listen_sh, uint32_t q_len) return VPPCOM_EBADFD; listen_vpp_handle = listen_session->vpp_handle; - if (listen_session->session_state == VCL_STATE_LISTEN) + if (listen_session->session_state == VCL_STATE_LISTEN && + !(listen_session->flags & VCL_SESSION_F_LISTEN_NO_MQ)) + { + VDBG (0, "session %u [0x%llx]: already in listen state!", listen_sh, + listen_vpp_handle); + return VPPCOM_OK; + } + if (PREDICT_FALSE (!wrk->ctrl_mq)) { - VDBG (0, "session %u [0x%llx]: already in listen state!", - listen_sh, listen_vpp_handle); + listen_session->session_state = VCL_STATE_LISTEN; + listen_session->flags |= VCL_SESSION_F_LISTEN_NO_MQ; return VPPCOM_OK; } + listen_session->flags &= ~VCL_SESSION_F_LISTEN_NO_MQ; VDBG (0, "session %u: sending vpp listen request...", listen_sh); @@ -1851,7 +1866,6 @@ again: return VPPCOM_EBADFD; if ((ls->session_state != VCL_STATE_LISTEN) && - (ls->session_state != VCL_STATE_LISTEN_NO_MQ) && (!vcl_session_is_connectable_listener (wrk, ls))) { VDBG (0, "ERROR: session [0x%llx]: not in listen state! state (%s)", @@ -2653,6 +2667,9 @@ vcl_select_handle_mq_event (vcl_worker_t * wrk, session_event_t * e, *bits_set += 1; } break; + case SESSION_CTRL_EVT_BOUND: + vcl_session_bound_handler (wrk, (session_bound_msg_t *) e->data); + break; case SESSION_CTRL_EVT_UNLISTEN_REPLY: vcl_session_unlisten_reply_handler (wrk, e->data); break; @@ -3588,8 +3605,13 @@ vppcom_epoll_wait_eventfd (vcl_worker_t *wrk, struct epoll_event *events, for (i = 0; i < n_mq_evts; i++) { - if (PREDICT_FALSE (wrk->mq_events[i].data.u32 == ~0)) + if (PREDICT_FALSE (wrk->mq_events[i].data.u32 >= VCL_EP_PIPEFD_EVT)) { + if (wrk->mq_events[i].data.u32 == VCL_EP_PIPEFD_EVT) + { + vcl_api_retry_attach (wrk); + continue; + } /* api socket was closed */ vcl_api_handle_disconnect (wrk); continue; @@ -4544,17 +4566,31 @@ vppcom_session_sendto (uint32_t session_handle, void *buffer, if (ep->app_tlvs) vcl_handle_ep_app_tlvs (s, ep); - /* Session not connected/bound in vpp. Create it by 'connecting' it */ + /* Session not connected/bound in vpp. Create it by binding it */ if (PREDICT_FALSE (s->session_state == VCL_STATE_CLOSED)) { u32 session_index = s->session_index; f64 timeout = vcm->cfg.session_timeout; int rv; - vcl_send_session_connect (wrk, s); - rv = vppcom_wait_for_session_state_change (session_index, - VCL_STATE_READY, - timeout); + /* VPP assumes sockets are bound, not ideal, but for now + * connect socket, grab lcl ip:port pair and use it to bind */ + if (s->transport.rmt_port == 0 || + ip46_address_is_zero (&s->transport.lcl_ip)) + { + vcl_send_session_connect (wrk, s); + rv = vppcom_wait_for_session_state_change ( + session_index, VCL_STATE_READY, timeout); + if (rv < 0) + return rv; + vcl_send_session_disconnect (wrk, s); + rv = vppcom_wait_for_session_state_change ( + session_index, VCL_STATE_DETACHED, timeout); + s->session_state = VCL_STATE_CLOSED; + } + vcl_send_session_listen (wrk, s); + rv = vppcom_wait_for_session_state_change ( + session_index, VCL_STATE_LISTEN, timeout); if (rv < 0) return rv; s = vcl_session_get (wrk, session_index); diff --git a/src/vlib/CMakeLists.txt b/src/vlib/CMakeLists.txt index 3c354b764dd..b4fc1775194 100644 --- a/src/vlib/CMakeLists.txt +++ b/src/vlib/CMakeLists.txt @@ -71,6 +71,19 @@ set(PLATFORM_SOURCES ) endif() +set(VLIB_LIBS vppinfra svm ${CMAKE_DL_LIBS} ${EPOLL_LIB}) + +vpp_find_path(LIBIBERTY_INCLUDE_DIR libiberty/demangle.h) +vpp_find_library(LIBIBERTY_LIB NAMES iberty libiberty) + +if (LIBIBERTY_INCLUDE_DIR AND LIBUNWIND_LIB) + message(STATUS "libiberty found at ${LIBIBERTY_LIB}") + list(APPEND VLIB_LIBS ${LIBIBERTY_LIB}) + add_definitions(-DHAVE_LIBIBERTY) +else() + message(WARNING "libiberty not found - stack trace demangle disabled") +endif() + add_vpp_library(vlib SOURCES buffer.c @@ -79,6 +92,7 @@ add_vpp_library(vlib counter.c drop.c error.c + file.c format.c handoff_trace.c init.c @@ -104,7 +118,6 @@ add_vpp_library(vlib time.c trace.c unix/cli.c - unix/input.c unix/main.c unix/plugin.c unix/util.c @@ -130,6 +143,7 @@ add_vpp_library(vlib dma/dma.h error_funcs.h error.h + file.h format_funcs.h global_funcs.h init.h @@ -149,6 +163,7 @@ add_vpp_library(vlib time.h trace_funcs.h trace.h + tw_funcs.h unix/mc_socket.h unix/plugin.h unix/unix.h @@ -159,7 +174,7 @@ add_vpp_library(vlib API_FILES pci/pci_types.api - LINK_LIBRARIES vppinfra svm ${CMAKE_DL_LIBS} ${EPOLL_LIB} + LINK_LIBRARIES ${VLIB_LIBS} DEPENDS api_headers ) diff --git a/src/vlib/buffer_funcs.c b/src/vlib/buffer_funcs.c index d910b25afac..ce09a51c600 100644 --- a/src/vlib/buffer_funcs.c +++ b/src/vlib/buffer_funcs.c @@ -297,7 +297,7 @@ vlib_buffer_enqueue_to_thread_inline (vlib_main_t *vm, u32 drop_list[VLIB_FRAME_SIZE], n_drop = 0; vlib_frame_bitmap_t mask, used_elts = {}; vlib_frame_queue_elt_t *hf = 0; - u16 thread_index; + clib_thread_index_t thread_index; u32 n_comp, off = 0, n_left = n_packets; thread_index = thread_indices[0]; diff --git a/src/vlib/cli.c b/src/vlib/cli.c index 4198b4b0976..38a8c2aa19c 100644 --- a/src/vlib/cli.c +++ b/src/vlib/cli.c @@ -39,6 +39,7 @@ #include <vlib/vlib.h> #include <vlib/stats/stats.h> +#include <vlib/file.h> #include <vlib/unix/unix.h> #include <vppinfra/callback.h> #include <vppinfra/cpu.h> @@ -1226,20 +1227,20 @@ restart_cmd_fn (vlib_main_t * vm, unformat_input_t * input, { vlib_global_main_t *vgm = vlib_get_global_main (); clib_file_main_t *fm = &file_main; - clib_file_t *f; /* environ(7) does not indicate a header for this */ extern char **environ; /* Close all known open files */ - pool_foreach (f, fm->file_pool) - { + pool_foreach_pointer (f, fm->file_pool) + { if (f->file_descriptor > 2) close(f->file_descriptor); } /* Exec ourself */ - execve (vgm->name, (char **) vgm->argv, environ); + if (execve ((void *) vgm->argv[0], (char **) vgm->argv, environ)) + return clib_error_return_unix (0, "execve failed"); return 0; } diff --git a/src/vlib/counter.h b/src/vlib/counter.h index f9da576a5f2..a9c261770d4 100644 --- a/src/vlib/counter.h +++ b/src/vlib/counter.h @@ -68,7 +68,7 @@ u32 vlib_simple_counter_n_counters (const vlib_simple_counter_main_t * cm); /** Pre-fetch a per-thread simple counter for the given object index */ always_inline void vlib_prefetch_simple_counter (const vlib_simple_counter_main_t *cm, - u32 thread_index, u32 index) + clib_thread_index_t thread_index, u32 index) { counter_t *my_counters; @@ -86,8 +86,9 @@ vlib_prefetch_simple_counter (const vlib_simple_counter_main_t *cm, @param increment - (u64) quantitiy to add to the counter */ always_inline void -vlib_increment_simple_counter (vlib_simple_counter_main_t * cm, - u32 thread_index, u32 index, u64 increment) +vlib_increment_simple_counter (vlib_simple_counter_main_t *cm, + clib_thread_index_t thread_index, u32 index, + u64 increment) { counter_t *my_counters; @@ -102,8 +103,9 @@ vlib_increment_simple_counter (vlib_simple_counter_main_t * cm, @param increment - (u64) quantitiy remove from the counter value */ always_inline void -vlib_decrement_simple_counter (vlib_simple_counter_main_t * cm, - u32 thread_index, u32 index, u64 decrement) +vlib_decrement_simple_counter (vlib_simple_counter_main_t *cm, + clib_thread_index_t thread_index, u32 index, + u64 decrement) { counter_t *my_counters; @@ -121,8 +123,9 @@ vlib_decrement_simple_counter (vlib_simple_counter_main_t * cm, @param value - (u64) quantitiy to set to the counter */ always_inline void -vlib_set_simple_counter (vlib_simple_counter_main_t * cm, - u32 thread_index, u32 index, u64 value) +vlib_set_simple_counter (vlib_simple_counter_main_t *cm, + clib_thread_index_t thread_index, u32 index, + u64 value) { counter_t *my_counters; @@ -246,9 +249,9 @@ void vlib_clear_combined_counters (vlib_combined_counter_main_t * cm); */ always_inline void -vlib_increment_combined_counter (vlib_combined_counter_main_t * cm, - u32 thread_index, - u32 index, u64 n_packets, u64 n_bytes) +vlib_increment_combined_counter (vlib_combined_counter_main_t *cm, + clib_thread_index_t thread_index, u32 index, + u64 n_packets, u64 n_bytes) { vlib_counter_t *my_counters; @@ -261,8 +264,8 @@ vlib_increment_combined_counter (vlib_combined_counter_main_t * cm, /** Pre-fetch a per-thread combined counter for the given object index */ always_inline void -vlib_prefetch_combined_counter (const vlib_combined_counter_main_t * cm, - u32 thread_index, u32 index) +vlib_prefetch_combined_counter (const vlib_combined_counter_main_t *cm, + clib_thread_index_t thread_index, u32 index) { vlib_counter_t *cpu_counters; diff --git a/src/vlib/file.c b/src/vlib/file.c new file mode 100644 index 00000000000..286b0d1f2ad --- /dev/null +++ b/src/vlib/file.c @@ -0,0 +1,305 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2025 Cisco Systems, Inc. + */ + +#include <vlib/vlib.h> +#include <vlib/unix/unix.h> + +#include <sys/epoll.h> +#include <sys/eventfd.h> +#include <limits.h> + +VLIB_REGISTER_LOG_CLASS (vlib_file_log, static) = { + .class_name = "vlib", + .subclass_name = "file", +}; + +#define log_debug(fmt, ...) \ + vlib_log_debug (vlib_file_log.class, fmt, __VA_ARGS__) +#define log_warn(fmt, ...) \ + vlib_log_warn (vlib_file_log.class, fmt, __VA_ARGS__) +#define log_err(fmt, ...) vlib_log_err (vlib_file_log.class, fmt, __VA_ARGS__) + +clib_file_main_t file_main; + +static void +vlib_file_update (clib_file_t *f, clib_file_update_type_t update_type) +{ + vlib_main_t *vm = vlib_get_main_by_index (f->polling_thread_index); + int op = -1, add_del = 0; + + struct epoll_event e = { + .events = EPOLLIN, + .data.ptr = f, + }; + + if (f->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE) + e.events |= EPOLLOUT; + if (f->flags & UNIX_FILE_EVENT_EDGE_TRIGGERED) + e.events |= EPOLLET; + + switch (update_type) + { + case UNIX_FILE_UPDATE_ADD: + op = EPOLL_CTL_ADD; + add_del = 1; + break; + + case UNIX_FILE_UPDATE_MODIFY: + op = EPOLL_CTL_MOD; + break; + + case UNIX_FILE_UPDATE_DELETE: + op = EPOLL_CTL_DEL; + add_del = -1; + break; + + default: + log_err ("%s: unknown update_type %d", __func__, update_type); + return; + } + + if (epoll_ctl (vm->epoll_fd, op, (int) f->file_descriptor, &e) < 0) + { + log_err ("%s: epoll_ctl() failed, errno %d", __func__, errno); + return; + } + + vm->n_epoll_fds += add_del; +} + +static clib_error_t * +wake_read_fn (struct clib_file *f) +{ + u64 val, __clib_unused rv; + rv = read ((int) f->file_descriptor, &val, sizeof (u64)); + return 0; +} + +void +vlib_file_poll_init (vlib_main_t *vm) +{ + vm->epoll_fd = epoll_create (1); + + if (vm->epoll_fd < 0) + clib_panic ("failed to initialize epoll for thread %u", vm->thread_index); + + vm->wakeup_fd = eventfd (0, EFD_NONBLOCK); + + if (vm->wakeup_fd < 0) + clib_panic ("failed to initialize wakeup event for thread %u", + vm->thread_index); + + if (!file_main.file_update) + file_main.file_update = vlib_file_update; + + clib_file_add (&file_main, &(clib_file_t){ + .polling_thread_index = vm->thread_index, + .file_descriptor = vm->wakeup_fd, + .description = format (0, "wakeup thread %u", + vm->thread_index), + .read_function = wake_read_fn, + }); +} + +void +vlib_file_poll (vlib_main_t *vm) +{ + vlib_node_main_t *nm = &vm->node_main; + unix_main_t *um = &unix_main; + struct epoll_event *e, epoll_events[16]; + int n_fds_ready; + int is_main = (vm->thread_index == 0); + int timeout_ms = 0, max_timeout_ms = 10; + u32 ticks; + + /* + * If we've been asked for a fixed-sleep between main loop polls, + * do so right away. + */ + if (PREDICT_FALSE (is_main && um->poll_sleep_usec)) + { + struct timespec ts, tsrem; + ts.tv_sec = 0; + ts.tv_nsec = 1000L * um->poll_sleep_usec; + + while (nanosleep (&ts, &tsrem) < 0) + ts = tsrem; + + goto epoll; + } + + /* we are busy, skip some loops before polling again */ + if (vlib_last_vectors_per_main_loop (vm) >= 2) + goto skip_loops; + + /* at least one node is polling */ + if (nm->input_node_counts_by_state[VLIB_NODE_STATE_POLLING]) + goto skip_loops; + + /* pending APIs in the queue */ + if (is_main && vm->api_queue_nonempty) + goto skip_loops; + + if (is_main == 0) + { + if (*vlib_worker_threads->wait_at_barrier) + goto epoll; + + if (vlib_get_first_main ()->time_last_barrier_release + 0.5 >= + vlib_time_now (vm)) + goto skip_loops; + } + + /* check for pending interrupts */ + for (int nt = 0; nt < VLIB_N_NODE_TYPE; nt++) + if (nm->node_interrupts[nt] && + clib_interrupt_is_any_pending (nm->node_interrupts[nt])) + goto epoll; + + /* at this point we know that thread is going to sleep, so let's annonce + * to other threads that they need to wakeup us if they need our attention */ + __atomic_store_n (&vm->thread_sleeps, 1, __ATOMIC_RELAXED); + + ticks = vlib_tw_timer_first_expires_in_ticks (vm); + + if (ticks != TW_SLOTS_PER_RING) + { + timeout_ms = (int) (ticks / ((u32) VLIB_TW_TICKS_PER_SECOND / 1000)); + timeout_ms = clib_min (timeout_ms, max_timeout_ms); + } + else + timeout_ms = max_timeout_ms; + + goto epoll; + +skip_loops: + /* Don't come back for a respectable number of dispatch cycles */ + vm->file_poll_skip_loops = 1024; + +epoll: + n_fds_ready = epoll_wait (vm->epoll_fd, epoll_events, + ARRAY_LEN (epoll_events), timeout_ms); + + __atomic_store_n (&vm->thread_sleeps, 0, __ATOMIC_RELAXED); + __atomic_store_n (&vm->wakeup_pending, 0, __ATOMIC_RELAXED); + + if (n_fds_ready < 0) + { + if (unix_error_is_fatal (errno)) + vlib_panic_with_error (vm, clib_error_return_unix (0, "epoll_wait")); + + /* non fatal error (e.g. EINTR). */ + return; + } + + vm->epoll_waits += 1; + vm->epoll_files_ready += n_fds_ready; + + for (e = epoll_events; e < epoll_events + n_fds_ready; e++) + { + clib_file_t *f = e->data.ptr; + clib_error_t *err; + + if (PREDICT_FALSE (!f->active)) + { + foreach_int (flag, EPOLLIN, EPOLLOUT, EPOLLERR) + if (e->events & flag) + { + const char *str[] = { + [EPOLLIN] = "EPOLLIN", + [EPOLLOUT] = "EPOLLOUT", + [EPOLLERR] = "EPOLLERR", + }; + log_debug ("epoll event %s dropped due to inactive file", + str[flag]); + } + continue; + } + else if (PREDICT_TRUE (!(e->events & EPOLLERR))) + { + if (e->events & EPOLLIN) + { + f->read_events++; + err = f->read_function (f); + if (err) + { + log_err ("file read error: %U", format_clib_error, err); + clib_error_free (err); + } + } + if (e->events & EPOLLOUT) + { + f->write_events++; + err = f->write_function (f); + if (err) + { + log_err ("file write error: %U", format_clib_error, err); + clib_error_free (err); + } + } + } + else + { + if (f->error_function) + { + f->error_events++; + err = f->error_function (f); + if (err) + { + log_err ("file error: %U", format_clib_error, err); + clib_error_free (err); + } + } + else if (f->dont_close == 0) + close ((int) f->file_descriptor); + } + } + + /* maximum epoll events received, there may be more ... */ + if (n_fds_ready == ARRAY_LEN (epoll_events)) + { + timeout_ms = 0; + goto epoll; + } + + /* removing fd from epoll instance doesn't remove event from epoll queue + * so we need to be sure epoll queue is empty before freeing */ + clib_file_free_deleted (&file_main, vm->thread_index); +} + +static clib_error_t * +show_files (vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd) +{ + clib_error_t *error = 0; + clib_file_main_t *fm = &file_main; + char path[PATH_MAX]; + u8 *s = 0; + + vlib_cli_output (vm, "%3s %6s %12s %12s %12s %-32s %s", "FD", "Thread", + "Read", "Write", "Error", "File Name", "Description"); + + pool_foreach_pointer (f, fm->file_pool) + { + ssize_t rv; + s = format (s, "/proc/self/fd/%d%c", f->file_descriptor, 0); + rv = readlink ((char *) s, path, PATH_MAX - 1); + + path[rv < 0 ? 0 : rv] = 0; + + vlib_cli_output (vm, "%3d %6d %12d %12d %12d %-32s %v", + f->file_descriptor, f->polling_thread_index, + f->read_events, f->write_events, f->error_events, path, + f->description); + vec_reset_length (s); + } + vec_free (s); + + return error; +} + +VLIB_CLI_COMMAND (cli_show_files, static) = { + .path = "show files", + .short_help = "Show files in use", + .function = show_files, +}; diff --git a/src/vlib/file.h b/src/vlib/file.h new file mode 100644 index 00000000000..82bbb22f650 --- /dev/null +++ b/src/vlib/file.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2025 Cisco Systems, Inc. + */ + +#ifndef __vlib_file_h__ +#define __vlib_file_h__ + +#include <vppinfra/file.h> + +extern clib_file_main_t file_main; + +void vlib_file_poll_init (vlib_main_t *vm); +void vlib_file_poll (vlib_main_t *vm); +#endif /* __vlib_file_h__ */ diff --git a/src/vlib/format.c b/src/vlib/format.c index 98010620a5d..8ed2535fe8a 100644 --- a/src/vlib/format.c +++ b/src/vlib/format.c @@ -213,7 +213,7 @@ unformat_vlib_tmpfile (unformat_input_t * input, va_list * args) u8 * format_vlib_thread_name (u8 * s, va_list * args) { - u32 thread_index = va_arg (*args, u32); + clib_thread_index_t thread_index = va_arg (*args, u32); if (thread_index == 0) return format (s, "main"); @@ -226,7 +226,7 @@ format_vlib_thread_name (u8 * s, va_list * args) u8 * format_vlib_thread_name_and_index (u8 * s, va_list * args) { - u32 thread_index = va_arg (*args, u32); + clib_thread_index_t thread_index = va_arg (*args, u32); return format (s, "%U (%u)", format_vlib_thread_name, thread_index, thread_index); diff --git a/src/vlib/freebsd/pci.c b/src/vlib/freebsd/pci.c index a4e9eb2dda6..92c27c24373 100644 --- a/src/vlib/freebsd/pci.c +++ b/src/vlib/freebsd/pci.c @@ -375,6 +375,4 @@ freebsd_pci_init (vlib_main_t *vm) return 0; } -VLIB_INIT_FUNCTION (freebsd_pci_init) = { - .runs_after = VLIB_INITS ("unix_input_init"), -}; +VLIB_INIT_FUNCTION (freebsd_pci_init); diff --git a/src/vlib/global_funcs.h b/src/vlib/global_funcs.h index 3c0fdb78364..2558e611750 100644 --- a/src/vlib/global_funcs.h +++ b/src/vlib/global_funcs.h @@ -26,7 +26,7 @@ vlib_get_n_threads () } always_inline vlib_main_t * -vlib_get_main_by_index (u32 thread_index) +vlib_get_main_by_index (clib_thread_index_t thread_index) { vlib_main_t *vm; vm = vlib_global_main.vlib_mains[thread_index]; diff --git a/src/vlib/linux/pci.c b/src/vlib/linux/pci.c index 29ca3d97523..bf9c6f27cd5 100644 --- a/src/vlib/linux/pci.c +++ b/src/vlib/linux/pci.c @@ -42,6 +42,7 @@ #include <vppinfra/unix.h> #include <vlib/vlib.h> +#include <vlib/file.h> #include <vlib/pci/pci.h> #include <vlib/unix/unix.h> #include <vlib/linux/vfio.h> @@ -1578,15 +1579,4 @@ linux_pci_init (vlib_main_t * vm) return 0; } -VLIB_INIT_FUNCTION (linux_pci_init) = -{ - .runs_after = VLIB_INITS("unix_input_init"), -}; - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ +VLIB_INIT_FUNCTION (linux_pci_init); diff --git a/src/vlib/linux/vmbus.c b/src/vlib/linux/vmbus.c index 9dc9d554ebd..27a5e271fd0 100644 --- a/src/vlib/linux/vmbus.c +++ b/src/vlib/linux/vmbus.c @@ -455,15 +455,4 @@ linux_vmbus_init (vlib_main_t * vm) return 0; } -VLIB_INIT_FUNCTION (linux_vmbus_init) = -{ - .runs_before = VLIB_INITS("unix_input_init"), -}; - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ +VLIB_INIT_FUNCTION (linux_vmbus_init); diff --git a/src/vlib/log.c b/src/vlib/log.c index 60fb9fb5178..be6b759c61a 100644 --- a/src/vlib/log.c +++ b/src/vlib/log.c @@ -108,6 +108,17 @@ log_level_is_enabled (vlib_log_level_t level, vlib_log_level_t configured) return 1; } +static void +log_size_validate (vlib_log_main_t *lm) +{ + if (vec_len (lm->entries) < lm->size) + { + CLIB_SPINLOCK_LOCK (lm->lock); + vec_validate (lm->entries, lm->size); + CLIB_SPINLOCK_UNLOCK (lm->lock); + } +} + void vlib_log (vlib_log_level_t level, vlib_log_class_t class, char *fmt, ...) { @@ -122,14 +133,10 @@ vlib_log (vlib_log_level_t level, vlib_log_class_t class, char *fmt, ...) int syslog_enabled = log_level_is_enabled (level, sc->syslog_level); u8 *s = 0; - /* make sure we are running on the main thread to avoid use in dataplane - code, for dataplane logging consider use of event-logger */ - ASSERT (vlib_get_thread_index () == 0); - if ((log_enabled || syslog_enabled) == 0) return; - vec_validate (lm->entries, lm->size); + log_size_validate (lm); if ((delta > lm->unthrottle_time) || (sc->is_throttling == 0 && (delta > 1))) @@ -197,13 +204,19 @@ vlib_log (vlib_log_level_t level, vlib_log_class_t class, char *fmt, ...) if (log_enabled) { + CLIB_SPINLOCK_LOCK (lm->lock); e = vec_elt_at_index (lm->entries, lm->next); - vec_free (e->string); + lm->next = (lm->next + 1) % lm->size; + if (lm->size > lm->count) + lm->count++; e->level = level; e->class = class; - e->string = s; e->timestamp = t; - s = 0; + e->thread_index = vm->thread_index; + CLIB_SWAP (e->string, s); + CLIB_SPINLOCK_UNLOCK (lm->lock); + + vec_free (s); if (lm->add_to_elog) { @@ -235,10 +248,6 @@ vlib_log (vlib_log_level_t level, vlib_log_class_t class, char *fmt, ...) ed->string_index = elog_string (&vlib_global_main.elog_main, "%v%c", e->string, 0); } - - lm->next = (lm->next + 1) % lm->size; - if (lm->size > lm->count) - lm->count++; } vec_free (s); @@ -356,7 +365,7 @@ vlib_log_init (vlib_main_t *vm) gettimeofday (&lm->time_zero_timeval, 0); lm->time_zero = vlib_time_now (vm); - vec_validate (lm->entries, lm->size); + log_size_validate (lm); while (r) { @@ -384,23 +393,30 @@ show_log (vlib_main_t * vm, { clib_error_t *error = 0; vlib_log_main_t *lm = &log_main; - vlib_log_entry_t *e; + vlib_log_entry_t *e, *entries; int i = last_log_entry (); - int count = lm->count; + int count; f64 time_offset; time_offset = (f64) lm->time_zero_timeval.tv_sec + (((f64) lm->time_zero_timeval.tv_usec) * 1e-6) - lm->time_zero; + CLIB_SPINLOCK_LOCK (lm->lock); + count = lm->count; + entries = vec_dup (lm->entries); + CLIB_SPINLOCK_UNLOCK (lm->lock); + while (count--) { - e = vec_elt_at_index (lm->entries, i); + e = vec_elt_at_index (entries, i); vlib_cli_output (vm, "%U %-10U %-14U %v", format_time_float, NULL, e->timestamp + time_offset, format_vlib_log_level, e->level, format_vlib_log_class, e->class, e->string); i = (i + 1) % lm->size; } + vec_free (entries); + return error; } @@ -464,8 +480,10 @@ clear_log (vlib_main_t * vm, vlib_log_main_t *lm = &log_main; vlib_log_entry_t *e; int i = last_log_entry (); - int count = lm->count; + int count; + CLIB_SPINLOCK_LOCK (lm->lock); + count = lm->count; while (count--) { e = vec_elt_at_index (lm->entries, i); @@ -475,6 +493,8 @@ clear_log (vlib_main_t * vm, lm->count = 0; lm->next = 0; + CLIB_SPINLOCK_UNLOCK (lm->lock); + vlib_log_info (log_log.class, "log cleared"); return error; } @@ -667,7 +687,7 @@ set_log_size (vlib_main_t * vm, if (unformat (line_input, "%d", &size)) { lm->size = size; - vec_validate (lm->entries, lm->size); + log_size_validate (lm); } else return clib_error_return (0, "unknown input `%U'", @@ -809,7 +829,7 @@ log_config (vlib_main_t * vm, unformat_input_t * input) while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { if (unformat (input, "size %d", &lm->size)) - vec_validate (lm->entries, lm->size); + log_size_validate (lm); else if (unformat (input, "unthrottle-time %d", &lm->unthrottle_time)) ; else if (unformat (input, "default-log-level %U", diff --git a/src/vlib/log.h b/src/vlib/log.h index 45e2b59946c..5bacd7f45cb 100644 --- a/src/vlib/log.h +++ b/src/vlib/log.h @@ -41,7 +41,8 @@ typedef enum typedef struct { - vlib_log_level_t level; + clib_thread_index_t thread_index; + u8 level; /* vlib_log_level_t */ vlib_log_class_t class; f64 timestamp; u8 *string; @@ -95,6 +96,7 @@ typedef struct vlib_log_entry_t *entries; vlib_log_class_data_t *classes; int size, next, count; + u8 lock; int default_rate_limit; int default_log_level; diff --git a/src/vlib/main.c b/src/vlib/main.c index a2f833711ab..ffa4a696422 100644 --- a/src/vlib/main.c +++ b/src/vlib/main.c @@ -41,11 +41,10 @@ #include <vppinfra/format.h> #include <vlib/vlib.h> #include <vlib/threads.h> +#include <vlib/file.h> #include <vlib/stats/stats.h> #include <vppinfra/tw_timer_1t_3w_1024sl_ov.h> -#include <vlib/unix/unix.h> - #define VLIB_FRAME_MAGIC (0xabadc0ed) always_inline u32 * @@ -258,11 +257,6 @@ vlib_next_frame_change_ownership (vlib_main_t * vm, node = vec_elt (nm->nodes, node_runtime->node_index); - /* Only internal & input nodes are allowed to call other nodes. */ - ASSERT (node->type == VLIB_NODE_TYPE_INTERNAL - || node->type == VLIB_NODE_TYPE_INPUT - || node->type == VLIB_NODE_TYPE_PROCESS); - ASSERT (vec_len (node->next_nodes) == node_runtime->n_next_nodes); next_frame = @@ -871,16 +865,16 @@ add_trajectory_trace (vlib_buffer_t * b, u32 node_index) } static_always_inline u64 -dispatch_node (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_node_type_t type, - vlib_node_state_t dispatch_state, - vlib_frame_t * frame, u64 last_time_stamp) +dispatch_node (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_node_type_t type, vlib_frame_t *frame, + vlib_node_dispatch_reason_t dispatch_reason, + u64 last_time_stamp) { uword n, v; u64 t; vlib_node_main_t *nm = &vm->node_main; vlib_next_frame_t *nf; + vlib_node_type_atts_t attr = node_type_attrs[type]; if (CLIB_DEBUG > 0) { @@ -888,15 +882,14 @@ dispatch_node (vlib_main_t * vm, ASSERT (n->type == type); } - /* Only non-internal nodes may be disabled. */ - if (type != VLIB_NODE_TYPE_INTERNAL && node->state != dispatch_state) + if (attr.can_be_disabled == 0 && node->state == VLIB_NODE_STATE_DISABLED) { - ASSERT (type != VLIB_NODE_TYPE_INTERNAL); + ASSERT (0); /* disabled node should not be dispatched */ return last_time_stamp; } - if ((type == VLIB_NODE_TYPE_PRE_INPUT || type == VLIB_NODE_TYPE_INPUT) - && dispatch_state != VLIB_NODE_STATE_INTERRUPT) + if (attr.decrement_main_loop_per_calls_if_polling && + node->state == VLIB_NODE_STATE_POLLING) { u32 c = node->input_main_loops_per_call; /* Only call node when count reaches zero. */ @@ -938,18 +931,13 @@ dispatch_node (vlib_main_t * vm, vlib_buffer_t *b = vlib_get_buffer (vm, from[i]); add_trajectory_trace (b, node->node_index); } - if (PREDICT_TRUE (vm->dispatch_wrapper_fn == 0)) - n = node->function (vm, node, frame); - else - n = vm->dispatch_wrapper_fn (vm, node, frame); } + + node->dispatch_reason = dispatch_reason; + if (PREDICT_TRUE (vm->dispatch_wrapper_fn == 0)) + n = node->function (vm, node, frame); else - { - if (PREDICT_TRUE (vm->dispatch_wrapper_fn == 0)) - n = node->function (vm, node, frame); - else - n = vm->dispatch_wrapper_fn (vm, node, frame); - } + n = vm->dispatch_wrapper_fn (vm, node, frame); t = clib_cpu_time_now (); @@ -968,11 +956,12 @@ dispatch_node (vlib_main_t * vm, /* When in adaptive mode and vector rate crosses threshold switch to polling mode and vice versa. */ - if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_ADAPTIVE_MODE)) + if (PREDICT_FALSE (attr.supports_adaptive_mode && + node->flags & VLIB_NODE_FLAG_ADAPTIVE_MODE)) { ELOG_TYPE_DECLARE (e) = { - .function = (char *) __FUNCTION__, + .function = (char *) __func__, .format = "%s vector length %d, switching to %s", .format_args = "T4i4t4", .n_enum_strings = 2, @@ -985,8 +974,8 @@ dispatch_node (vlib_main_t * vm, u32 node_name, vector_length, is_polling; } *ed; - if ((dispatch_state == VLIB_NODE_STATE_INTERRUPT - && v >= nm->polling_threshold_vector_length) && + if ((node->state == VLIB_NODE_STATE_INTERRUPT && + v >= nm->polling_threshold_vector_length) && !(node->flags & VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE)) { @@ -1012,8 +1001,8 @@ dispatch_node (vlib_main_t * vm, ed->is_polling = 1; } } - else if (dispatch_state == VLIB_NODE_STATE_POLLING - && v <= nm->interrupt_threshold_vector_length) + else if (node->state == VLIB_NODE_STATE_POLLING && + v <= nm->interrupt_threshold_vector_length) { vlib_node_t *n = vlib_get_node (vm, node->node_index); if (node->flags & @@ -1103,10 +1092,9 @@ dispatch_pending_node (vlib_main_t * vm, uword pending_frame_index, n->flags |= (nf->flags & VLIB_FRAME_TRACE) ? VLIB_NODE_FLAG_TRACE : 0; nf->flags &= ~VLIB_FRAME_TRACE; - last_time_stamp = dispatch_node (vm, n, - VLIB_NODE_TYPE_INTERNAL, - VLIB_NODE_STATE_POLLING, - f, last_time_stamp); + last_time_stamp = + dispatch_node (vm, n, VLIB_NODE_TYPE_INTERNAL, f, + VLIB_NODE_DISPATCH_REASON_PENDING_FRAME, last_time_stamp); /* Internal node vector-rate accounting, for summary stats */ vm->internal_node_vectors += f->n_vectors; vm->internal_node_calls++; @@ -1260,6 +1248,18 @@ vlib_process_resume (vlib_main_t * vm, vlib_process_t * p) return r; } +static void +process_timer_start (vlib_main_t *vm, vlib_process_t *p, u32 runtime_index) +{ + vlib_tw_event_t e = { .type = VLIB_TW_EVENT_T_PROCESS_NODE, + .index = runtime_index }; + + if (p->resume_clock_interval == 0) + return; + + p->stop_timer_handle = vlib_tw_timer_start (vm, e, p->resume_clock_interval); +} + static u64 dispatch_process (vlib_main_t * vm, vlib_process_t * p, vlib_frame_t * f, u64 last_time_stamp) @@ -1309,15 +1309,7 @@ dispatch_process (vlib_main_t * vm, p->n_suspends += 1; p->suspended_process_frame_index = pf - nm->suspended_process_frames; - if (p->resume_clock_interval) - { - TWT (tw_timer_wheel) *tw = (TWT (tw_timer_wheel) *) nm->timing_wheel; - p->stop_timer_handle = - TW (tw_timer_start) (tw, - vlib_timing_wheel_data_set_suspended_process ( - node->runtime_index) /* [sic] pool idex */, - 0 /* timer_id */, p->resume_clock_interval); - } + process_timer_start (vm, p, node->runtime_index); } else p->state = VLIB_PROCESS_STATE_NOT_STARTED; @@ -1419,15 +1411,7 @@ dispatch_suspended_process (vlib_main_t *vm, vlib_process_restore_t *r, /* Suspend it again. */ n_vectors = 0; p->n_suspends += 1; - if (p->resume_clock_interval) - { - p->stop_timer_handle = - TW (tw_timer_start) ((TWT (tw_timer_wheel) *) nm->timing_wheel, - vlib_timing_wheel_data_set_suspended_process - (node->runtime_index) /* [sic] pool idex */ , - 0 /* timer_id */ , - p->resume_clock_interval); - } + process_timer_start (vm, p, node->runtime_index); } else { @@ -1452,6 +1436,44 @@ dispatch_suspended_process (vlib_main_t *vm, vlib_process_restore_t *r, return t; } +static __clib_warn_unused_result u32 * +process_expired_timers (u32 *v) +{ + vlib_main_t *vm = vlib_get_main (); + vlib_node_main_t *nm = &vm->node_main; + u32 *handle; + + v = vlib_tw_timer_expire_timers (vm, v); + + vec_foreach (handle, v) + { + vlib_tw_event_t e = { .as_u32 = *handle }; + vlib_process_restore_t restore = {}; + + if (e.type == VLIB_TW_EVENT_T_TIMED_EVENT) + { + restore.reason = VLIB_PROCESS_RESTORE_REASON_TIMED_EVENT; + restore.timed_event_data_pool_index = e.index; + vec_add1 (nm->process_restore_current, restore); + } + else if (e.type == VLIB_TW_EVENT_T_PROCESS_NODE) + { + vlib_process_t *p = vec_elt (nm->processes, e.index); + p->stop_timer_handle = ~0; + restore.reason = VLIB_PROCESS_RESTORE_REASON_CLOCK; + restore.runtime_index = e.index; + vec_add1 (nm->process_restore_current, restore); + } + else if (e.type == VLIB_TW_EVENT_T_SCHED_NODE) + { + vec_add1 (nm->sched_node_pending, e.index); + } + else + ASSERT (0); + } + return v; +} + static_always_inline void vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) { @@ -1462,6 +1484,7 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) f64 now; vlib_frame_queue_main_t *fqm; u32 frame_queue_check_counter = 0; + u32 *expired_timers = 0; /* Initialize pending node vector. */ if (is_main) @@ -1485,7 +1508,6 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) if (!nm->interrupt_threshold_vector_length) nm->interrupt_threshold_vector_length = 5; - vm->cpu_id = clib_get_current_cpu_id (); vm->numa_node = clib_get_current_numa_node (); os_set_numa_index (vm->numa_node); @@ -1548,58 +1570,64 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) clib_call_callbacks (vm->worker_thread_main_loop_callbacks, vm, cpu_time_now); - /* Process pre-input nodes. */ cpu_time_now = clib_cpu_time_now (); - vec_foreach (n, nm->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT]) - cpu_time_now = dispatch_node (vm, n, - VLIB_NODE_TYPE_PRE_INPUT, - VLIB_NODE_STATE_POLLING, - /* frame */ 0, - cpu_time_now); - - if (clib_interrupt_is_any_pending (nm->pre_input_node_interrupts)) - { - int int_num = -1; - while ((int_num = clib_interrupt_get_next_and_clear ( - nm->pre_input_node_interrupts, int_num)) != -1) + if (vm->file_poll_skip_loops) + vm->file_poll_skip_loops--; + else + vlib_file_poll (vm); + + for (vlib_node_type_t nt = 0; nt < VLIB_N_NODE_TYPE; nt++) + { + if (node_type_attrs[nt].can_be_polled) + vec_foreach (n, nm->nodes_by_type[nt]) + if (n->state == VLIB_NODE_STATE_POLLING) + cpu_time_now = dispatch_node ( + vm, n, nt, + /* frame */ 0, VLIB_NODE_DISPATCH_REASON_POLL, cpu_time_now); + + if (node_type_attrs[nt].may_receive_interrupts && + nm->node_interrupts[nt] && + clib_interrupt_is_any_pending (nm->node_interrupts[nt])) { - vlib_node_runtime_t *n; - n = vec_elt_at_index ( - nm->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT], int_num); - cpu_time_now = dispatch_node (vm, n, VLIB_NODE_TYPE_PRE_INPUT, - VLIB_NODE_STATE_INTERRUPT, - /* frame */ 0, cpu_time_now); + int int_num = -1; + + while ((int_num = clib_interrupt_get_next_and_clear ( + nm->node_interrupts[nt], int_num)) != -1) + { + vlib_node_runtime_t *n; + n = vec_elt_at_index (nm->nodes_by_type[nt], int_num); + cpu_time_now = dispatch_node ( + vm, n, nt, + /* frame */ 0, VLIB_NODE_DISPATCH_REASON_INTERRUPT, + cpu_time_now); + } } } - /* Next process input nodes. */ - vec_foreach (n, nm->nodes_by_type[VLIB_NODE_TYPE_INPUT]) - cpu_time_now = dispatch_node (vm, n, - VLIB_NODE_TYPE_INPUT, - VLIB_NODE_STATE_POLLING, - /* frame */ 0, - cpu_time_now); - - if (PREDICT_TRUE (is_main && vm->queue_signal_pending == 0)) - vm->queue_signal_callback (vm); - - if (clib_interrupt_is_any_pending (nm->input_node_interrupts)) + /* Process sched nodes. */ + if (vec_len (nm->sched_node_pending)) { - int int_num = -1; - - while ((int_num = clib_interrupt_get_next_and_clear ( - nm->input_node_interrupts, int_num)) != -1) + vec_foreach_index (i, nm->sched_node_pending) { - vlib_node_runtime_t *n; - n = vec_elt_at_index (nm->nodes_by_type[VLIB_NODE_TYPE_INPUT], - int_num); - cpu_time_now = dispatch_node (vm, n, VLIB_NODE_TYPE_INPUT, - VLIB_NODE_STATE_INTERRUPT, - /* frame */ 0, cpu_time_now); + vlib_node_t *n = vlib_get_node (vm, nm->sched_node_pending[i]); + if (n->type == VLIB_NODE_TYPE_SCHED) + { + vlib_node_runtime_t *nr = + vlib_node_get_runtime (vm, n->index); + nr->stop_timer_handle_plus_1 = 0; + cpu_time_now = dispatch_node ( + vm, nr, VLIB_NODE_TYPE_SCHED, + /* frame */ 0, VLIB_NODE_DISPATCH_REASON_SCHED, + cpu_time_now); + } } + vec_reset_length (nm->sched_node_pending); } + if (PREDICT_TRUE (is_main && vm->queue_signal_pending == 0)) + vm->queue_signal_callback (vm); + /* Input nodes may have added work to the pending vector. Process pending vector until there is nothing left. All pending vectors will be processed from input -> output. */ @@ -1632,8 +1660,7 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) if (PREDICT_FALSE (vm->elog_trace_graph_dispatch)) ed = ELOG_DATA (&vlib_global_main.elog_main, es); - TW (tw_timer_expire_timers) - ((TWT (tw_timer_wheel) *) nm->timing_wheel, vlib_time_now (vm)); + expired_timers = process_expired_timers (expired_timers); ASSERT (nm->process_restore_current != 0); @@ -1664,7 +1691,7 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) p->stop_timer_handle = ~0; void *data; data = vlib_process_signal_event_helper ( - nm, n, p, te->event_type_index, te->n_data_elts, + vm, nm, n, p, te->event_type_index, te->n_data_elts, te->n_data_elt_bytes); if (te->n_data_bytes < sizeof (te->inline_event_data)) clib_memcpy_fast (data, te->inline_event_data, @@ -1691,6 +1718,9 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) nm->process_restore_next); } } + else + expired_timers = process_expired_timers (expired_timers); + vlib_increment_main_loop_counter (vm); /* Record time stamp in case there are no enabled nodes and above calls do not update time stamp. */ @@ -1725,18 +1755,6 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) } } -static void -vlib_main_loop (vlib_main_t * vm) -{ - vlib_main_or_worker_loop (vm, /* is_main */ 1); -} - -void -vlib_worker_loop (vlib_main_t * vm) -{ - vlib_main_or_worker_loop (vm, /* is_main */ 0); -} - vlib_global_main_t vlib_global_main; void @@ -1862,34 +1880,6 @@ vl_api_get_elog_trace_api_messages (void) return 0; } -static void -process_expired_timer_cb (u32 *expired_timer_handles) -{ - vlib_main_t *vm = vlib_get_main (); - vlib_node_main_t *nm = &vm->node_main; - u32 *handle; - - vec_foreach (handle, expired_timer_handles) - { - u32 index = vlib_timing_wheel_data_get_index (*handle); - vlib_process_restore_t restore = {}; - - if (vlib_timing_wheel_data_is_timed_event (*handle)) - { - restore.reason = VLIB_PROCESS_RESTORE_REASON_TIMED_EVENT; - restore.timed_event_data_pool_index = index; - } - else - { - vlib_process_t *p = vec_elt (nm->processes, index); - p->stop_timer_handle = ~0; - restore.reason = VLIB_PROCESS_RESTORE_REASON_CLOCK; - restore.runtime_index = index; - } - vec_add1 (nm->process_restore_current, restore); - } -} - /* Main function. */ int vlib_main (vlib_main_t * volatile vm, unformat_input_t * input) @@ -1905,12 +1895,13 @@ vlib_main (vlib_main_t * volatile vm, unformat_input_t * input) vgm->configured_elog_ring_size != vgm->elog_main.event_ring_size) elog_resize (&vgm->elog_main, vgm->configured_elog_ring_size); vl_api_set_elog_main (vlib_get_elog_main ()); - (void) vl_api_set_elog_trace_api_messages (1); /* Default name. */ if (!vgm->name) vgm->name = "VLIB"; + vm->numa_node = clib_get_current_numa_node (); + if ((error = vlib_physmem_init (vm))) { clib_error_report (error); @@ -1982,26 +1973,20 @@ vlib_main (vlib_main_t * volatile vm, unformat_input_t * input) goto done; } + vlib_tw_init (vm); + vlib_file_poll_init (vm); + /* See unix/main.c; most likely already set up */ if (vgm->init_functions_called == 0) vgm->init_functions_called = hash_create (0, /* value bytes */ 0); if ((error = vlib_call_all_init_functions (vm))) goto done; - nm->timing_wheel = clib_mem_alloc_aligned (sizeof (TWT (tw_timer_wheel)), - CLIB_CACHE_LINE_BYTES); - vec_validate (nm->process_restore_current, 10); vec_validate (nm->process_restore_next, 10); vec_set_len (nm->process_restore_current, 0); vec_set_len (nm->process_restore_next, 0); - /* Create the process timing wheel */ - TW (tw_timer_wheel_init) - ((TWT (tw_timer_wheel) *) nm->timing_wheel, - process_expired_timer_cb /* callback */, 10e-6 /* timer period 10us */, - ~0 /* max expirations per call */); - vec_validate (vm->pending_rpc_requests, 0); vec_set_len (vm->pending_rpc_requests, 0); vec_validate (vm->processing_rpc_requests, 0); @@ -2052,7 +2037,7 @@ vlib_main (vlib_main_t * volatile vm, unformat_input_t * input) goto done; } - vlib_main_loop (vm); + vlib_main_or_worker_loop (vm, /* is_main */ 1); done: /* Stop worker threads, barrier will not be released */ @@ -2072,6 +2057,40 @@ done: return vm->main_loop_exit_status; } +static void +vlib_worker_thread_fn (void *arg) +{ + vlib_global_main_t *vgm = vlib_get_global_main (); + vlib_worker_thread_t *w = (vlib_worker_thread_t *) arg; + vlib_main_t *vm = vlib_get_main (); + clib_error_t *e; + + ASSERT (vm->thread_index == vlib_get_thread_index ()); + vm->numa_node = clib_get_current_numa_node (); + + vlib_worker_thread_init (w); + clib_time_init (&vm->clib_time); + clib_mem_set_heap (w->thread_mheap); + vlib_tw_init (vm); + vlib_file_poll_init (vm); + + vm->worker_init_functions_called = hash_create (0, 0); + + e = vlib_call_init_exit_functions_no_sort ( + vm, &vgm->worker_init_function_registrations, 1 /* call_once */, + 0 /* is_global */); + if (e) + clib_error_report (e); + + vlib_main_or_worker_loop (vm, /* is_main */ 0); +} + +VLIB_REGISTER_THREAD (worker_thread_reg, static) = { + .name = "workers", + .short_name = "wk", + .function = vlib_worker_thread_fn, +}; + vlib_main_t * vlib_get_main_not_inline (void) { diff --git a/src/vlib/main.h b/src/vlib/main.h index 94b8c4fa954..1700369738d 100644 --- a/src/vlib/main.h +++ b/src/vlib/main.h @@ -204,10 +204,19 @@ typedef struct vlib_main_t clib_random_buffer_t random_buffer; /* thread, cpu and numa_node indices */ - u32 thread_index; - u32 cpu_id; + clib_thread_index_t thread_index; u32 numa_node; + /* epoll and eventfd */ + int epoll_fd; + int wakeup_fd; + int n_epoll_fds; + u32 file_poll_skip_loops; + u64 epoll_files_ready; + u64 epoll_waits; + u8 wakeup_pending; + u8 thread_sleeps; + /* control-plane API queue signal pending, length indication */ volatile u32 queue_signal_pending; volatile u32 api_queue_nonempty; @@ -258,6 +267,10 @@ typedef struct vlib_main_t u32 buffer_alloc_success_seed; f64 buffer_alloc_success_rate; + /* Timing wheel for scheduling time-based node dispatch. */ + void *timing_wheel; + u32 n_tw_timers; + #ifdef CLIB_SANITIZE_ADDR /* address sanitizer stack save */ void *asan_stack_save; @@ -319,18 +332,13 @@ typedef struct vlib_global_main_t /* Global main structure. */ extern vlib_global_main_t vlib_global_main; -void vlib_worker_loop (vlib_main_t * vm); - always_inline f64 vlib_time_now (vlib_main_t * vm) { -#if CLIB_DEBUG > 0 - extern __thread uword __os_thread_index; -#endif /* * Make sure folks don't pass &vlib_global_main from a worker thread. */ - ASSERT (vm->thread_index == __os_thread_index); + ASSERT (vm->thread_index == os_get_thread_index ()); return clib_time_now (&vm->clib_time) + vm->time_offset; } diff --git a/src/vlib/node.c b/src/vlib/node.c index c0572f3cf83..edeb6dc70a7 100644 --- a/src/vlib/node.c +++ b/src/vlib/node.c @@ -518,7 +518,7 @@ vlib_register_node (vlib_main_t *vm, vlib_node_registration_t *r, char *fmt, vlib_node_runtime_t *rt; u32 i; - if (n->type == VLIB_NODE_TYPE_PROCESS) + if (node_type_attrs[n->type].is_process) { vlib_process_t *p; uword log2_n_stack_bytes; @@ -559,15 +559,14 @@ vlib_register_node (vlib_main_t *vm, vlib_node_registration_t *r, char *fmt, { vec_add2_aligned (nm->nodes_by_type[n->type], rt, 1, /* align */ CLIB_CACHE_LINE_BYTES); - if (n->type == VLIB_NODE_TYPE_INPUT) - clib_interrupt_resize (&nm->input_node_interrupts, - vec_len (nm->nodes_by_type[n->type])); - else if (n->type == VLIB_NODE_TYPE_PRE_INPUT) - clib_interrupt_resize (&nm->pre_input_node_interrupts, - vec_len (nm->nodes_by_type[n->type])); + n->runtime_index = rt - nm->nodes_by_type[n->type]; } + if (node_type_attrs[n->type].may_receive_interrupts) + clib_interrupt_resize (&nm->node_interrupts[n->type], + vec_len (nm->nodes_by_type[n->type])); + if (n->type == VLIB_NODE_TYPE_INPUT) nm->input_node_counts_by_state[n->state] += 1; diff --git a/src/vlib/node.h b/src/vlib/node.h index bb6d8f818a8..52edab9e488 100644 --- a/src/vlib/node.h +++ b/src/vlib/node.h @@ -83,9 +83,46 @@ typedef enum /* "Process" nodes which can be suspended and later resumed. */ VLIB_NODE_TYPE_PROCESS, + /* Nodes to by called by per-thread timing wheel. */ + VLIB_NODE_TYPE_SCHED, + VLIB_N_NODE_TYPE, } vlib_node_type_t; +typedef struct +{ + u8 can_be_disabled : 1; + u8 may_receive_interrupts : 1; + u8 decrement_main_loop_per_calls_if_polling : 1; + u8 supports_adaptive_mode : 1; + u8 can_be_polled : 1; + u8 is_process : 1; +} vlib_node_type_atts_t; + +static const vlib_node_type_atts_t node_type_attrs[VLIB_N_NODE_TYPE] ={ + [VLIB_NODE_TYPE_PRE_INPUT] = { + .can_be_disabled = 1, + .may_receive_interrupts = 1, + .decrement_main_loop_per_calls_if_polling = 1, + .can_be_polled = 1, + }, + [VLIB_NODE_TYPE_INPUT] = { + .can_be_disabled = 1, + .may_receive_interrupts = 1, + .decrement_main_loop_per_calls_if_polling = 1, + .supports_adaptive_mode = 1, + .can_be_polled = 1, + }, + [VLIB_NODE_TYPE_PROCESS] = { + .can_be_disabled = 1, + .is_process = 1, + }, + [VLIB_NODE_TYPE_SCHED] = { + .can_be_disabled = 1, + .may_receive_interrupts = 1, + }, +}; + typedef struct _vlib_node_fn_registration { vlib_node_function_t *function; @@ -245,7 +282,26 @@ typedef enum foreach_vlib_node_state #undef _ VLIB_N_NODE_STATE, -} vlib_node_state_t; +} __clib_packed vlib_node_state_t; + +typedef enum +{ + VLIB_NODE_DISPATCH_REASON_UNKNOWN = 0, + VLIB_NODE_DISPATCH_REASON_PENDING_FRAME, + VLIB_NODE_DISPATCH_REASON_POLL, + VLIB_NODE_DISPATCH_REASON_INTERRUPT, + VLIB_NODE_DISPATCH_REASON_SCHED, + VLIB_NODE_DISPATCH_N_REASON, +} __clib_packed vlib_node_dispatch_reason_t; + +#define vlib_node_dispatch_reason_enum_strings \ + { \ + [VLIB_NODE_DISPATCH_REASON_UNKNOWN] = "unknown", \ + [VLIB_NODE_DISPATCH_REASON_PENDING_FRAME] = "pending-frame", \ + [VLIB_NODE_DISPATCH_REASON_POLL] = "poll", \ + [VLIB_NODE_DISPATCH_REASON_INTERRUPT] = "interrupt", \ + [VLIB_NODE_DISPATCH_REASON_SCHED] = "scheduled", \ + } typedef struct vlib_node_t { @@ -498,7 +554,10 @@ typedef struct vlib_node_runtime_t u16 flags; /**< Copy of main node flags. */ - u16 state; /**< Input node state. */ + vlib_node_state_t state; /**< Input node state. */ + + vlib_node_dispatch_reason_t + dispatch_reason; /**< Reason for running this node. */ u16 n_next_nodes; @@ -507,6 +566,9 @@ typedef struct vlib_node_runtime_t last time this node ran. Set to zero before first run of this node. */ + u32 stop_timer_handle_plus_1; /**< Timing wheel stop handle for + SCHED node incremented by 1, + 0 = no timer running. */ CLIB_ALIGN_MARK (runtime_data_pad, 8); @@ -679,30 +741,6 @@ typedef struct } vlib_signal_timed_event_data_t; -always_inline uword -vlib_timing_wheel_data_is_timed_event (u32 d) -{ - return d & 1; -} - -always_inline u32 -vlib_timing_wheel_data_set_suspended_process (u32 i) -{ - return 0 + 2 * i; -} - -always_inline u32 -vlib_timing_wheel_data_set_timed_event (u32 i) -{ - return 1 + 2 * i; -} - -always_inline uword -vlib_timing_wheel_data_get_index (u32 d) -{ - return d / 2; -} - typedef struct { clib_march_variant_type_t index; @@ -727,8 +765,7 @@ typedef struct vlib_node_runtime_t *nodes_by_type[VLIB_N_NODE_TYPE]; /* Node runtime indices for input nodes with pending interrupts. */ - void *input_node_interrupts; - void *pre_input_node_interrupts; + void *node_interrupts[VLIB_N_NODE_TYPE]; /* Input nodes are switched from/to interrupt to/from polling mode when average vector length goes above/below polling/interrupt @@ -742,14 +779,14 @@ typedef struct /* Vector of internal node's frames waiting to be called. */ vlib_pending_frame_t *pending_frames; - /* Timing wheel for scheduling time-based node dispatch. */ - void *timing_wheel; - vlib_signal_timed_event_data_t *signal_timed_event_data_pool; /* Vector of process nodes waiting for restore */ vlib_process_restore_t *process_restore_current; + /* Vector of sched nodes waiting to be calleed */ + u32 *sched_node_pending; + /* Vector of process nodes waiting for restore in next greaph scheduler run */ vlib_process_restore_t *process_restore_next; diff --git a/src/vlib/node_cli.c b/src/vlib/node_cli.c index 16e904e8433..375b17bd7ae 100644 --- a/src/vlib/node_cli.c +++ b/src/vlib/node_cli.c @@ -734,6 +734,9 @@ show_node (vlib_main_t * vm, unformat_input_t * input, case VLIB_NODE_TYPE_PROCESS: type_str = "process"; break; + case VLIB_NODE_TYPE_SCHED: + type_str = "sched"; + break; default: type_str = "unknown"; } diff --git a/src/vlib/node_funcs.h b/src/vlib/node_funcs.h index ffa17ba7bb1..17677ee7aec 100644 --- a/src/vlib/node_funcs.h +++ b/src/vlib/node_funcs.h @@ -47,7 +47,6 @@ #include <vppinfra/clib.h> #include <vppinfra/fifo.h> -#include <vppinfra/tw_timer_1t_3w_1024sl_ov.h> #include <vppinfra/interrupt.h> #ifdef CLIB_SANITIZE_ADDR @@ -249,24 +248,58 @@ vlib_node_set_interrupt_pending (vlib_main_t *vm, u32 node_index) { vlib_node_main_t *nm = &vm->node_main; vlib_node_t *n = vec_elt (nm->nodes, node_index); - void *interrupts = 0; + void *interrupts = nm->node_interrupts[n->type]; - if (n->type == VLIB_NODE_TYPE_INPUT) - interrupts = nm->input_node_interrupts; - else if (n->type == VLIB_NODE_TYPE_PRE_INPUT) - interrupts = nm->pre_input_node_interrupts; - else - { - ASSERT (0); - return; - } + ASSERT (interrupts); if (vm != vlib_get_main ()) - clib_interrupt_set_atomic (interrupts, n->runtime_index); + { + clib_interrupt_set_atomic (interrupts, n->runtime_index); + vlib_thread_wakeup (vm->thread_index); + } else clib_interrupt_set (interrupts, n->runtime_index); } +always_inline int +vlib_node_is_scheduled (vlib_main_t *vm, u32 node_index) +{ + vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, node_index); + return rt->stop_timer_handle_plus_1 ? 1 : 0; +} + +always_inline void +vlib_node_schedule (vlib_main_t *vm, u32 node_index, f64 dt) +{ + u64 ticks; + + vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, node_index); + vlib_tw_event_t e = { + .type = VLIB_TW_EVENT_T_SCHED_NODE, + .index = node_index, + }; + + ASSERT (vm == vlib_get_main ()); + ASSERT (vlib_node_is_scheduled (vm, node_index) == 0); + + dt = flt_round_nearest (dt * VLIB_TW_TICKS_PER_SECOND); + ticks = clib_max ((u64) dt, 1); + + rt->stop_timer_handle_plus_1 = 1 + vlib_tw_timer_start (vm, e, ticks); +} + +always_inline void +vlib_node_unschedule (vlib_main_t *vm, u32 node_index) +{ + vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, node_index); + + ASSERT (vm == vlib_get_main ()); + ASSERT (vlib_node_is_scheduled (vm, node_index) == 1); + + vlib_tw_timer_stop (vm, rt->stop_timer_handle_plus_1 - 1); + rt->stop_timer_handle_plus_1 = 0; +} + always_inline vlib_process_t * vlib_get_process_from_node (vlib_main_t * vm, vlib_node_t * node) { @@ -570,14 +603,14 @@ vlib_get_current_process_node_index (vlib_main_t * vm) return process->node_runtime.node_index; } -/** Returns TRUE if a process suspend time is less than 10us +/** Returns TRUE if a process suspend time is less than vlib timer wheel tick @param dt - remaining poll time in seconds - @returns 1 if dt < 10e-6, 0 otherwise + @returns 1 if dt < 1/VLIB_TW_TICKS_PER_SECOND, 0 otherwise */ always_inline uword vlib_process_suspend_time_is_zero (f64 dt) { - return dt < 10e-6; + return dt < (1 / VLIB_TW_TICKS_PER_SECOND); } /** Suspend a vlib cooperative multi-tasking thread for a period of time @@ -601,7 +634,7 @@ vlib_process_suspend (vlib_main_t * vm, f64 dt) if (r == VLIB_PROCESS_RESUME_LONGJMP_SUSPEND) { /* expiration time in 10us ticks */ - p->resume_clock_interval = dt * 1e5; + p->resume_clock_interval = dt * VLIB_TW_TICKS_PER_SECOND; vlib_process_start_switch_stack (vm, 0); clib_longjmp (&p->return_longjmp, VLIB_PROCESS_RETURN_LONGJMP_SUSPEND); } @@ -912,7 +945,7 @@ vlib_process_wait_for_event_or_clock (vlib_main_t * vm, f64 dt) r = clib_setjmp (&p->resume_longjmp, VLIB_PROCESS_RESUME_LONGJMP_SUSPEND); if (r == VLIB_PROCESS_RESUME_LONGJMP_SUSPEND) { - p->resume_clock_interval = dt * 1e5; + p->resume_clock_interval = dt * VLIB_TW_TICKS_PER_SECOND; vlib_process_start_switch_stack (vm, 0); clib_longjmp (&p->return_longjmp, VLIB_PROCESS_RETURN_LONGJMP_SUSPEND); } @@ -963,10 +996,8 @@ vlib_process_delete_one_time_event (vlib_main_t * vm, uword node_index, } always_inline void * -vlib_process_signal_event_helper (vlib_node_main_t * nm, - vlib_node_t * n, - vlib_process_t * p, - uword t, +vlib_process_signal_event_helper (vlib_main_t *vm, vlib_node_main_t *nm, + vlib_node_t *n, vlib_process_t *p, uword t, uword n_data_elts, uword n_data_elt_bytes) { uword add_to_pending = 0, delete_from_wheel = 0; @@ -1016,8 +1047,7 @@ vlib_process_signal_event_helper (vlib_node_main_t * nm, break; } - if (TW (tw_timer_handle_is_free) ((TWT (tw_timer_wheel) *) nm->timing_wheel, - p->stop_timer_handle)) + if (vlib_tw_timer_handle_is_free (vm, p->stop_timer_handle)) delete_from_wheel = 0; /* Never add current process to pending vector since current process is @@ -1036,8 +1066,7 @@ vlib_process_signal_event_helper (vlib_node_main_t * nm, if (delete_from_wheel) { - TW (tw_timer_stop) - ((TWT (tw_timer_wheel) *) nm->timing_wheel, p->stop_timer_handle); + vlib_tw_timer_stop (vm, p->stop_timer_handle); p->stop_timer_handle = ~0; } @@ -1069,7 +1098,7 @@ vlib_process_signal_event_data (vlib_main_t * vm, else t = h[0]; - return vlib_process_signal_event_helper (nm, n, p, t, n_data_elts, + return vlib_process_signal_event_helper (vm, nm, n, p, t, n_data_elts, n_data_elt_bytes); } @@ -1097,7 +1126,7 @@ vlib_process_signal_event_at_time (vlib_main_t * vm, t = h[0]; if (vlib_process_suspend_time_is_zero (dt)) - return vlib_process_signal_event_helper (nm, n, p, t, n_data_elts, + return vlib_process_signal_event_helper (vm, nm, n, p, t, n_data_elts, n_data_elt_bytes); else { @@ -1118,11 +1147,12 @@ vlib_process_signal_event_at_time (vlib_main_t * vm, te->event_type_index = t; p->stop_timer_handle = - TW (tw_timer_start) ((TWT (tw_timer_wheel) *) nm->timing_wheel, - vlib_timing_wheel_data_set_timed_event - (te - nm->signal_timed_event_data_pool), - 0 /* timer_id */ , - (vlib_time_now (vm) + dt) * 1e5); + vlib_tw_timer_start (vm, + (vlib_tw_event_t){ + .type = VLIB_TW_EVENT_T_TIMED_EVENT, + .index = te - nm->signal_timed_event_data_pool, + }, + dt * VLIB_TW_TICKS_PER_SECOND); /* Inline data big enough to hold event? */ if (te->n_data_bytes < sizeof (te->inline_event_data)) @@ -1146,8 +1176,8 @@ vlib_process_signal_one_time_event_data (vlib_main_t * vm, vlib_node_main_t *nm = &vm->node_main; vlib_node_t *n = vlib_get_node (vm, node_index); vlib_process_t *p = vec_elt (nm->processes, n->runtime_index); - return vlib_process_signal_event_helper (nm, n, p, type_index, n_data_elts, - n_data_elt_bytes); + return vlib_process_signal_event_helper (vm, nm, n, p, type_index, + n_data_elts, n_data_elt_bytes); } always_inline void diff --git a/src/vlib/punt_node.c b/src/vlib/punt_node.c index 4b81a61715a..800d520733d 100644 --- a/src/vlib/punt_node.c +++ b/src/vlib/punt_node.c @@ -68,14 +68,10 @@ format_punt_trace (u8 * s, va_list * args) } always_inline u32 -punt_replicate (vlib_main_t * vm, - vlib_node_runtime_t * node, - u32 thread_index, - vlib_buffer_t * b0, - u32 bi0, - vlib_punt_reason_t pr0, - u32 * next_index, - u32 * n_left_to_next, u32 ** to_next, u32 * n_dispatched) +punt_replicate (vlib_main_t *vm, vlib_node_runtime_t *node, + clib_thread_index_t thread_index, vlib_buffer_t *b0, u32 bi0, + vlib_punt_reason_t pr0, u32 *next_index, u32 *n_left_to_next, + u32 **to_next, u32 *n_dispatched) { /* multiple clients => replicate a copy to each */ u16 n_clones0, n_cloned0, clone0; @@ -134,13 +130,10 @@ punt_replicate (vlib_main_t * vm, } always_inline u32 -punt_dispatch_one (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_combined_counter_main_t * cm, - u32 thread_index, - u32 bi0, - u32 * next_index, - u32 * n_left_to_next, u32 ** to_next, u32 * n_dispatched) +punt_dispatch_one (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_combined_counter_main_t *cm, + clib_thread_index_t thread_index, u32 bi0, u32 *next_index, + u32 *n_left_to_next, u32 **to_next, u32 *n_dispatched) { vlib_punt_reason_t pr0; vlib_buffer_t *b0; diff --git a/src/vlib/stats/init.c b/src/vlib/stats/init.c index 8b382daf333..50f71b3eb11 100644 --- a/src/vlib/stats/init.c +++ b/src/vlib/stats/init.c @@ -3,6 +3,7 @@ */ #include <vlib/vlib.h> +#include <vlib/file.h> #include <vlib/unix/unix.h> #include <vlib/stats/stats.h> @@ -253,6 +254,4 @@ statseg_init (vlib_main_t *vm) return stats_segment_socket_init (); } -VLIB_INIT_FUNCTION (statseg_init) = { - .runs_after = VLIB_INITS ("unix_input_init", "linux_epoll_input_init"), -}; +VLIB_INIT_FUNCTION (statseg_init); diff --git a/src/vlib/threads.c b/src/vlib/threads.c index fa8d949d549..a1839e787c3 100644 --- a/src/vlib/threads.c +++ b/src/vlib/threads.c @@ -183,6 +183,7 @@ vlib_thread_init (vlib_main_t * vm) u32 first_index = 1; u32 i; uword *avail_cpu; + uword n_cpus; u32 stats_num_worker_threads_dir_index; stats_num_worker_threads_dir_index = @@ -190,12 +191,24 @@ vlib_thread_init (vlib_main_t * vm) ASSERT (stats_num_worker_threads_dir_index != ~0); /* get bitmaps of active cpu cores and sockets */ - tm->cpu_core_bitmap = os_get_online_cpu_core_bitmap (); tm->cpu_socket_bitmap = os_get_online_cpu_node_bitmap (); + if (!tm->cpu_translate) + tm->cpu_core_bitmap = os_get_online_cpu_core_bitmap (); + else + { + /* get bitmap of cpu core affinity */ + if ((tm->cpu_core_bitmap = os_get_cpu_affinity_bitmap ()) == 0) + return clib_error_return (0, "could not fetch cpu affinity bmp"); + } avail_cpu = clib_bitmap_dup (tm->cpu_core_bitmap); /* skip cores */ + n_cpus = clib_bitmap_count_set_bits (avail_cpu); + if (tm->skip_cores >= n_cpus) + return clib_error_return ( + 0, "skip-core value greater or equal to available cpus"); + for (i = 0; i < tm->skip_cores; i++) { uword c = clib_bitmap_first_set (avail_cpu); @@ -213,8 +226,20 @@ vlib_thread_init (vlib_main_t * vm) if (tm->main_lcore != ~0) { if (clib_bitmap_get (avail_cpu, tm->main_lcore) == 0) - return clib_error_return (0, "cpu %u is not available to be used" - " for the main thread", tm->main_lcore); + { + if (tm->cpu_translate) + return clib_error_return ( + 0, + "cpu %u (relative cpu %u) is not available to be used" + " for the main thread in relative mode", + tm->main_lcore, + os_translate_cpu_from_affinity_bitmap (tm->main_lcore)); + else + return clib_error_return (0, + "cpu %u is not available to be used" + " for the main thread", + tm->main_lcore); + } avail_cpu = clib_bitmap_set (avail_cpu, tm->main_lcore, 0); } @@ -297,11 +322,23 @@ vlib_thread_init (vlib_main_t * vm) uword c; clib_bitmap_foreach (c, tr->coremask) { if (clib_bitmap_get(avail_cpu, c) == 0) - return clib_error_return (0, "cpu %u is not available to be used" - " for the '%s' thread",c, tr->name); + { + if (tm->cpu_translate) + return clib_error_return ( + 0, + "cpu %u (relative cpu %u) is not available to be used" + " for the '%s' thread in relative mode", + c, os_translate_cpu_from_affinity_bitmap (c), tr->name); + else + return clib_error_return ( + 0, + "cpu %u is not available to be used" + " for the '%s' thread", + c, tr->name); + } - avail_cpu = clib_bitmap_set(avail_cpu, c, 0); - } + avail_cpu = clib_bitmap_set (avail_cpu, c, 0); + } } else { @@ -313,7 +350,7 @@ vlib_thread_init (vlib_main_t * vm) uword c = clib_bitmap_first_set (avail_cpu); /* Use CPU 0 as a last resort */ - if (c == ~0 && avail_c0) + if (c == ~0 && avail_c0 && !tm->cpu_translate) { c = 0; avail_c0 = 0; @@ -323,7 +360,7 @@ vlib_thread_init (vlib_main_t * vm) return clib_error_return (0, "no available cpus to be used for" " the '%s' thread #%u", - tr->name, tr->count); + tr->name, j); avail_cpu = clib_bitmap_set (avail_cpu, 0, avail_c0); avail_cpu = clib_bitmap_set (avail_cpu, c, 0); @@ -705,52 +742,29 @@ start_workers (vlib_main_t * vm) vec_add1 (nm_clone->nodes, n); n++; } - nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL] = - vec_dup_aligned (nm->nodes_by_type[VLIB_NODE_TYPE_INTERNAL], - CLIB_CACHE_LINE_BYTES); - vec_foreach (rt, - nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL]) - { - vlib_node_t *n = vlib_get_node (vm, rt->node_index); - /* copy initial runtime_data from node */ - if (n->runtime_data && n->runtime_data_bytes > 0) - clib_memcpy (rt->runtime_data, n->runtime_data, - clib_min (VLIB_NODE_RUNTIME_DATA_SIZE, - n->runtime_data_bytes)); - } - nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT] = - vec_dup_aligned (nm->nodes_by_type[VLIB_NODE_TYPE_INPUT], - CLIB_CACHE_LINE_BYTES); - clib_interrupt_init ( - &nm_clone->input_node_interrupts, - vec_len (nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT])); - clib_interrupt_init ( - &nm_clone->pre_input_node_interrupts, - vec_len (nm_clone->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT])); - vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT]) - { - vlib_node_t *n = vlib_get_node (vm, rt->node_index); - /* copy initial runtime_data from node */ - if (n->runtime_data && n->runtime_data_bytes > 0) - clib_memcpy (rt->runtime_data, n->runtime_data, - clib_min (VLIB_NODE_RUNTIME_DATA_SIZE, - n->runtime_data_bytes)); - } - - nm_clone->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT] = - vec_dup_aligned (nm->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT], - CLIB_CACHE_LINE_BYTES); - vec_foreach (rt, - nm_clone->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT]) - { - vlib_node_t *n = vlib_get_node (vm, rt->node_index); - /* copy initial runtime_data from node */ - if (n->runtime_data && n->runtime_data_bytes > 0) - clib_memcpy (rt->runtime_data, n->runtime_data, - clib_min (VLIB_NODE_RUNTIME_DATA_SIZE, - n->runtime_data_bytes)); - } + foreach_int (nt, VLIB_NODE_TYPE_INTERNAL, + VLIB_NODE_TYPE_PRE_INPUT, VLIB_NODE_TYPE_INPUT, + VLIB_NODE_TYPE_SCHED) + { + u32 n_nodes = vec_len (nm_clone->nodes_by_type[nt]); + nm_clone->nodes_by_type[nt] = vec_dup_aligned ( + nm->nodes_by_type[nt], CLIB_CACHE_LINE_BYTES); + + if (node_type_attrs[nt].may_receive_interrupts) + clib_interrupt_init (&nm_clone->node_interrupts[nt], + n_nodes); + + vec_foreach (rt, nm_clone->nodes_by_type[nt]) + { + vlib_node_t *n = vlib_get_node (vm, rt->node_index); + /* copy initial runtime_data from node */ + if (n->runtime_data && n->runtime_data_bytes > 0) + clib_memcpy (rt->runtime_data, n->runtime_data, + clib_min (VLIB_NODE_RUNTIME_DATA_SIZE, + n->runtime_data_bytes)); + } + } nm_clone->processes = vec_dup_aligned (nm->processes, CLIB_CACHE_LINE_BYTES); @@ -1016,101 +1030,53 @@ vlib_worker_thread_node_refork (void) vec_free (old_nodes_clone); + /* re-clone nodes */ - /* re-clone internal nodes */ - old_rt = nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL]; - nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL] = - vec_dup_aligned (nm->nodes_by_type[VLIB_NODE_TYPE_INTERNAL], - CLIB_CACHE_LINE_BYTES); - - vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL]) - { - vlib_node_t *n = vlib_get_node (vm, rt->node_index); - /* copy runtime_data, will be overwritten later for existing rt */ - if (n->runtime_data && n->runtime_data_bytes > 0) - clib_memcpy_fast (rt->runtime_data, n->runtime_data, - clib_min (VLIB_NODE_RUNTIME_DATA_SIZE, - n->runtime_data_bytes)); - } - - for (j = 0; j < vec_len (old_rt); j++) + foreach_int (nt, VLIB_NODE_TYPE_INTERNAL, VLIB_NODE_TYPE_PRE_INPUT, + VLIB_NODE_TYPE_INPUT, VLIB_NODE_TYPE_SCHED) { - rt = vlib_node_get_runtime (vm_clone, old_rt[j].node_index); - rt->state = old_rt[j].state; - rt->flags = old_rt[j].flags; - clib_memcpy_fast (rt->runtime_data, old_rt[j].runtime_data, - VLIB_NODE_RUNTIME_DATA_SIZE); - } + old_rt = nm_clone->nodes_by_type[nt]; + u32 n_nodes = vec_len (nm->nodes_by_type[nt]); - vec_free (old_rt); - - /* re-clone input nodes */ - old_rt = nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT]; - nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT] = - vec_dup_aligned (nm->nodes_by_type[VLIB_NODE_TYPE_INPUT], - CLIB_CACHE_LINE_BYTES); - clib_interrupt_resize ( - &nm_clone->input_node_interrupts, - vec_len (nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT])); - clib_interrupt_resize ( - &nm_clone->pre_input_node_interrupts, - vec_len (nm_clone->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT])); - - vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT]) - { - vlib_node_t *n = vlib_get_node (vm, rt->node_index); - /* copy runtime_data, will be overwritten later for existing rt */ - if (n->runtime_data && n->runtime_data_bytes > 0) - clib_memcpy_fast (rt->runtime_data, n->runtime_data, - clib_min (VLIB_NODE_RUNTIME_DATA_SIZE, - n->runtime_data_bytes)); - } + nm_clone->nodes_by_type[nt] = + vec_dup_aligned (nm->nodes_by_type[nt], CLIB_CACHE_LINE_BYTES); - for (j = 0; j < vec_len (old_rt); j++) - { - rt = vlib_node_get_runtime (vm_clone, old_rt[j].node_index); - rt->state = old_rt[j].state; - rt->flags = old_rt[j].flags; - clib_memcpy_fast (rt->runtime_data, old_rt[j].runtime_data, - VLIB_NODE_RUNTIME_DATA_SIZE); - } - - for (j = vec_len (old_rt); - j < vec_len (nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT]); j++) - { - rt = &nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT][j]; - nm_clone->input_node_counts_by_state[rt->state] += 1; - } + if (nm_clone->node_interrupts[nt]) + clib_interrupt_resize (&nm_clone->node_interrupts[nt], n_nodes); - vec_free (old_rt); + vec_foreach (rt, nm_clone->nodes_by_type[nt]) + { + vlib_node_t *n = vlib_get_node (vm, rt->node_index); + /* copy runtime_data, will be overwritten later for existing rt */ + if (n->runtime_data && n->runtime_data_bytes > 0) + clib_memcpy_fast ( + rt->runtime_data, n->runtime_data, + clib_min (VLIB_NODE_RUNTIME_DATA_SIZE, n->runtime_data_bytes)); + } - /* re-clone pre-input nodes */ - old_rt = nm_clone->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT]; - nm_clone->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT] = - vec_dup_aligned (nm->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT], - CLIB_CACHE_LINE_BYTES); + for (j = 0; j < vec_len (old_rt); j++) + { + rt = vlib_node_get_runtime (vm_clone, old_rt[j].node_index); + rt->state = old_rt[j].state; + rt->flags = old_rt[j].flags; + clib_memcpy_fast (rt->runtime_data, old_rt[j].runtime_data, + VLIB_NODE_RUNTIME_DATA_SIZE); + } - vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT]) - { - vlib_node_t *n = vlib_get_node (vm, rt->node_index); - /* copy runtime_data, will be overwritten later for existing rt */ - if (n->runtime_data && n->runtime_data_bytes > 0) - clib_memcpy_fast (rt->runtime_data, n->runtime_data, - clib_min (VLIB_NODE_RUNTIME_DATA_SIZE, - n->runtime_data_bytes)); - } + if (nt == VLIB_NODE_TYPE_INPUT) + { + for (j = vec_len (old_rt); + j < vec_len (nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT]); + j++) + { + rt = &nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT][j]; + nm_clone->input_node_counts_by_state[rt->state] += 1; + } + } - for (j = 0; j < vec_len (old_rt); j++) - { - rt = vlib_node_get_runtime (vm_clone, old_rt[j].node_index); - rt->state = old_rt[j].state; - rt->flags = old_rt[j].flags; - clib_memcpy_fast (rt->runtime_data, old_rt[j].runtime_data, - VLIB_NODE_RUNTIME_DATA_SIZE); + vec_free (old_rt); } - vec_free (old_rt); - vec_free (nm_clone->processes); nm_clone->processes = vec_dup_aligned (nm->processes, CLIB_CACHE_LINE_BYTES); @@ -1177,6 +1143,8 @@ cpu_config (vlib_main_t * vm, unformat_input_t * input) ; else if (unformat (input, "skip-cores %u", &tm->skip_cores)) ; + else if (unformat (input, "relative")) + tm->cpu_translate = 1; else if (unformat (input, "numa-heap-size %U", unformat_memory_size, &tm->numa_heap_size)) ; @@ -1235,6 +1203,11 @@ cpu_config (vlib_main_t * vm, unformat_input_t * input) if (use_corelist && tm->main_lcore == ~0) return clib_error_return (0, "main-core must be specified when using " "corelist-* or coremask-* attribute"); + + if (tm->skip_cores != 0 && tm->main_lcore == ~0) + return clib_error_return ( + 0, "main-core must be specified when using skip-cores attribute"); + if (tm->sched_priority != ~0) { if (tm->sched_policy == SCHED_FIFO || tm->sched_policy == SCHED_RR) @@ -1267,6 +1240,36 @@ cpu_config (vlib_main_t * vm, unformat_input_t * input) tr = tr->next; } + /* for relative mode, update requested main-core and corelists */ + if (tm->cpu_translate) + { + + if (tm->main_lcore == ~0) + clib_error ("main-core must be specified in relative mode"); + int cpu_translate_main_core = + os_translate_cpu_to_affinity_bitmap (tm->main_lcore); + if (cpu_translate_main_core == -1) + clib_error ("cpu %u is not available to be used" + " for the main thread in relative mode", + tm->main_lcore); + tm->main_lcore = cpu_translate_main_core; + + tr = tm->next; + uword *translated_cpu_bmp; + while (tr && tr->coremask) + { + translated_cpu_bmp = + os_translate_cpu_bmp_to_affinity_bitmap (tr->coremask); + + if (!translated_cpu_bmp) + clib_error ("could not translate corelist associated to %s", + tr->name); + clib_bitmap_free (tr->coremask); + tr->coremask = translated_cpu_bmp; + tr = tr->next; + } + } + return 0; } @@ -1304,7 +1307,7 @@ vlib_worker_thread_initial_barrier_sync_and_release (vlib_main_t * vm) { if ((now = vlib_time_now (vm)) > deadline) { - fformat (stderr, "%s: worker thread deadlock\n", __FUNCTION__); + fformat (stderr, "%s: worker thread deadlock\n", __func__); os_panic (); } CLIB_PAUSE (); @@ -1404,12 +1407,16 @@ vlib_worker_thread_barrier_sync_int (vlib_main_t * vm, const char *func_name) deadline = now + BARRIER_SYNC_TIMEOUT; - *vlib_worker_threads->wait_at_barrier = 1; + __atomic_store_n (vlib_worker_threads->wait_at_barrier, 1, __ATOMIC_RELEASE); + + for (clib_thread_index_t ti = 1; ti < vlib_get_n_threads (); ti++) + vlib_thread_wakeup (ti); + while (*vlib_worker_threads->workers_at_barrier != count) { if ((now = vlib_time_now (vm)) > deadline) { - fformat (stderr, "%s: worker thread deadlock\n", __FUNCTION__); + fformat (stderr, "%s: worker thread deadlock\n", __func__); os_panic (); } } @@ -1477,15 +1484,14 @@ vlib_worker_thread_barrier_release (vlib_main_t * vm) * time offset. See vlib_time_now(...) */ vm->time_last_barrier_release = vlib_time_now (vm); - CLIB_MEMORY_STORE_BARRIER (); - *vlib_worker_threads->wait_at_barrier = 0; + __atomic_store_n (vlib_worker_threads->wait_at_barrier, 0, __ATOMIC_RELEASE); while (*vlib_worker_threads->workers_at_barrier > 0) { if ((now = vlib_time_now (vm)) > deadline) { - fformat (stderr, "%s: worker thread deadlock\n", __FUNCTION__); + fformat (stderr, "%s: worker thread deadlock\n", __func__); os_panic (); } } @@ -1502,7 +1508,7 @@ vlib_worker_thread_barrier_release (vlib_main_t * vm) if ((now = vlib_time_now (vm)) > deadline) { fformat (stderr, "%s: worker thread refork deadlock\n", - __FUNCTION__); + __func__); os_panic (); } } @@ -1546,7 +1552,7 @@ vlib_workers_sync (void) if (!(*vlib_worker_threads->wait_at_barrier) && !clib_atomic_swap_rel_n (&vlib_worker_threads->wait_before_barrier, 1)) { - u32 thread_index = vlib_get_thread_index (); + clib_thread_index_t thread_index = vlib_get_thread_index (); vlib_rpc_call_main_thread (vlib_worker_sync_rpc, (u8 *) &thread_index, sizeof (thread_index)); vlib_worker_flush_pending_rpc_requests (vlib_get_main ()); @@ -1630,37 +1636,6 @@ vlib_worker_flush_pending_rpc_requests (vlib_main_t *vm) clib_spinlock_unlock_if_init (&vm_global->pending_rpc_lock); } -void -vlib_worker_thread_fn (void *arg) -{ - vlib_global_main_t *vgm = vlib_get_global_main (); - vlib_worker_thread_t *w = (vlib_worker_thread_t *) arg; - vlib_main_t *vm = vlib_get_main (); - clib_error_t *e; - - ASSERT (vm->thread_index == vlib_get_thread_index ()); - - vlib_worker_thread_init (w); - clib_time_init (&vm->clib_time); - clib_mem_set_heap (w->thread_mheap); - - vm->worker_init_functions_called = hash_create (0, 0); - - e = vlib_call_init_exit_functions_no_sort ( - vm, &vgm->worker_init_function_registrations, 1 /* call_once */, - 0 /* is_global */); - if (e) - clib_error_report (e); - - vlib_worker_loop (vm); -} - -VLIB_REGISTER_THREAD (worker_thread_reg, static) = { - .name = "workers", - .short_name = "wk", - .function = vlib_worker_thread_fn, -}; - extern clib_march_fn_registration *vlib_frame_queue_dequeue_with_aux_fn_march_fn_registrations; extern clib_march_fn_registration diff --git a/src/vlib/threads.h b/src/vlib/threads.h index c671aa78c39..46a1476ee4b 100644 --- a/src/vlib/threads.h +++ b/src/vlib/threads.h @@ -166,7 +166,10 @@ u32 vlib_frame_queue_main_init (u32 node_index, u32 frame_queue_nelts); #define BARRIER_SYNC_TIMEOUT (1.0) #endif -#define vlib_worker_thread_barrier_sync(X) {vlib_worker_thread_barrier_sync_int(X, __FUNCTION__);} +#define vlib_worker_thread_barrier_sync(X) \ + { \ + vlib_worker_thread_barrier_sync_int (X, __func__); \ + } void vlib_worker_thread_barrier_sync_int (vlib_main_t * vm, const char *func_name); @@ -183,7 +186,7 @@ void vlib_worker_wait_one_loop (void); */ void vlib_worker_flush_pending_rpc_requests (vlib_main_t *vm); -static_always_inline uword +static_always_inline clib_thread_index_t vlib_get_thread_index (void) { return __os_thread_index; @@ -195,7 +198,7 @@ vlib_smp_unsafe_warning (void) if (CLIB_DEBUG > 0) { if (vlib_get_thread_index ()) - fformat (stderr, "%s: SMP unsafe warning...\n", __FUNCTION__); + fformat (stderr, "%s: SMP unsafe warning...\n", __func__); } } @@ -257,6 +260,9 @@ typedef struct int use_pthreads; + /* Translate requested cpu configuration to vpp affinity mask */ + int cpu_translate; + /* Number of vlib_main / vnet_main clones */ u32 n_vlib_mains; @@ -337,7 +343,7 @@ vlib_get_worker_thread_index (u32 worker_index) } always_inline u32 -vlib_get_worker_index (u32 thread_index) +vlib_get_worker_index (clib_thread_index_t thread_index) { return thread_index - 1; } @@ -355,7 +361,7 @@ vlib_worker_thread_barrier_check (void) { vlib_global_main_t *vgm = vlib_get_global_main (); vlib_main_t *vm = vlib_get_main (); - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; f64 t = vlib_time_now (vm); if (PREDICT_FALSE (vec_len (vm->barrier_perf_callbacks) != 0)) @@ -372,7 +378,7 @@ vlib_worker_thread_barrier_check (void) struct { - u32 thread_index; + clib_thread_index_t thread_index; } __clib_packed *ed; ed = ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, w->elog_track); @@ -419,7 +425,7 @@ vlib_worker_thread_barrier_check (void) struct { - u32 thread_index; + clib_thread_index_t thread_index; } __clib_packed *ed; ed = ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, @@ -444,7 +450,7 @@ vlib_worker_thread_barrier_check (void) struct { - u32 thread_index; + clib_thread_index_t thread_index; u32 duration; } __clib_packed *ed; @@ -498,13 +504,16 @@ void vlib_workers_sync (void); * Release barrier after workers sync */ void vlib_workers_continue (void); +static_always_inline void +vlib_thread_wakeup (clib_thread_index_t thread_index) +{ + vlib_main_t *vm = vlib_get_main_by_index (thread_index); + ssize_t __clib_unused rv; + u64 val = 1; -#endif /* included_vlib_threads_h */ + if (__atomic_load_n (&vm->thread_sleeps, __ATOMIC_RELAXED)) + if (__atomic_exchange_n (&vm->wakeup_pending, 1, __ATOMIC_RELAXED) == 0) + rv = write (vm->wakeup_fd, &val, sizeof (u64)); +} -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ +#endif /* included_vlib_threads_h */ diff --git a/src/vlib/threads_cli.c b/src/vlib/threads_cli.c index 2872a025d66..ccc34fc8d8d 100644 --- a/src/vlib/threads_cli.c +++ b/src/vlib/threads_cli.c @@ -14,6 +14,8 @@ */ #define _GNU_SOURCE +#include <vppinfra/bitmap.h> +#include <vppinfra/unix.h> #include <vppinfra/format.h> #include <vlib/vlib.h> @@ -46,16 +48,20 @@ show_threads_fn (vlib_main_t * vm, const vlib_thread_main_t *tm = vlib_get_thread_main (); vlib_worker_thread_t *w; int i; + u8 *line = NULL; - vlib_cli_output (vm, "%-7s%-20s%-12s%-8s%-25s%-7s%-7s%-7s%-10s", - "ID", "Name", "Type", "LWP", "Sched Policy (Priority)", - "lcore", "Core", "Socket", "State"); + line = format (line, "%-7s%-20s%-12s%-8s%-25s%-7s%-7s%-7s%-10s", "ID", + "Name", "Type", "LWP", "Sched Policy (Priority)", "lcore", + "Core", "Socket", "State"); + if (tm->cpu_translate) + line = format (line, "%-15s", "Relative Core"); + vlib_cli_output (vm, "%v", line); + vec_free (line); #if !defined(__powerpc64__) for (i = 0; i < vec_len (vlib_worker_threads); i++) { w = vlib_worker_threads + i; - u8 *line = NULL; line = format (line, "%-7d%-20s%-12s%-8d", i, @@ -69,7 +75,13 @@ show_threads_fn (vlib_main_t * vm, { int core_id = w->core_id; int numa_id = w->numa_id; - line = format (line, "%-7u%-7u%-7u%", cpu_id, core_id, numa_id); + line = format (line, "%-7u%-7u%-17u%", cpu_id, core_id, numa_id); + if (tm->cpu_translate) + { + int cpu_translate_core_id = + os_translate_cpu_from_affinity_bitmap (cpu_id); + line = format (line, "%-7u", cpu_translate_core_id); + } } else { diff --git a/src/vlib/time.h b/src/vlib/time.h index 61873bb2ef3..fa8cdb2ec8d 100644 --- a/src/vlib/time.h +++ b/src/vlib/time.h @@ -7,14 +7,13 @@ #define included_vlib_time_h #include <vlib/vlib.h> -#include <vppinfra/tw_timer_1t_3w_1024sl_ov.h> +#include <vlib/tw_funcs.h> static inline f64 vlib_time_get_next_timer (vlib_main_t *vm) { - vlib_node_main_t *nm = &vm->node_main; - TWT (tw_timer_wheel) *wheel = nm->timing_wheel; - return TW (tw_timer_first_expires_in_ticks) (wheel) * wheel->timer_interval; + TWT (tw_timer_wheel) *wheel = vm->timing_wheel; + return vlib_tw_timer_first_expires_in_ticks (vm) * wheel->timer_interval; } static inline void diff --git a/src/vlib/tw_funcs.h b/src/vlib/tw_funcs.h new file mode 100644 index 00000000000..c64aaa3db45 --- /dev/null +++ b/src/vlib/tw_funcs.h @@ -0,0 +1,90 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2025 Cisco Systems, Inc. + */ + +#ifndef __vlib_tw_funcs_h__ +#define __vlib_tw_funcs_h__ + +#include <vppinfra/tw_timer_1t_3w_1024sl_ov.h> +#define VLIB_TW_TICKS_PER_SECOND 1e5 /* 10 us */ + +typedef enum +{ + VLIB_TW_EVENT_T_PROCESS_NODE = 1, + VLIB_TW_EVENT_T_TIMED_EVENT = 2, + VLIB_TW_EVENT_T_SCHED_NODE = 3, +} vlib_tw_event_type_t; + +typedef union +{ + struct + { + u32 type : 2; /* vlib_tw_event_type_t */ + u32 index : 30; + }; + u32 as_u32; +} vlib_tw_event_t; + +static_always_inline u32 +vlib_tw_timer_start (vlib_main_t *vm, vlib_tw_event_t e, u64 interval) +{ + TWT (tw_timer_wheel) *tw = (TWT (tw_timer_wheel) *) vm->timing_wheel; + vm->n_tw_timers++; + return TW (tw_timer_start) (tw, e.as_u32, 0 /* timer_id */, interval); +} + +static_always_inline void +vlib_tw_timer_stop (vlib_main_t *vm, u32 handle) +{ + TWT (tw_timer_wheel) *tw = (TWT (tw_timer_wheel) *) vm->timing_wheel; + ASSERT (vm->n_tw_timers > 0); + vm->n_tw_timers--; + TW (tw_timer_stop) (tw, handle); +} + +static_always_inline int +vlib_tw_timer_handle_is_free (vlib_main_t *vm, u32 handle) +{ + TWT (tw_timer_wheel) *tw = (TWT (tw_timer_wheel) *) vm->timing_wheel; + return TW (tw_timer_handle_is_free) (tw, handle); +} + +static_always_inline u32 +vlib_tw_timer_first_expires_in_ticks (vlib_main_t *vm) +{ + return TW (tw_timer_first_expires_in_ticks) ( + (TWT (tw_timer_wheel) *) vm->timing_wheel); +} + +static_always_inline void +vlib_tw_init (vlib_main_t *vm) +{ + TWT (tw_timer_wheel) *tw = (TWT (tw_timer_wheel) *) vm->timing_wheel; + tw = clib_mem_alloc_aligned (sizeof (TWT (tw_timer_wheel)), + CLIB_CACHE_LINE_BYTES); + /* Create the process timing wheel */ + TW (tw_timer_wheel_init) + (tw, 0 /* callback */, 1 / VLIB_TW_TICKS_PER_SECOND, + ~0 /* max expirations per call */); + vm->timing_wheel = tw; + vm->n_tw_timers = 0; +} + +static_always_inline u32 * +vlib_tw_timer_expire_timers (vlib_main_t *vm, u32 *v) +{ + TWT (tw_timer_wheel) *tw = (TWT (tw_timer_wheel) *) vm->timing_wheel; + + vec_reset_length (v); + + if (vm->n_tw_timers > 0) + { + v = TW (tw_timer_expire_timers_vec) (tw, vlib_time_now (vm), v); + ASSERT (vec_len (v) <= vm->n_tw_timers); + vm->n_tw_timers -= vec_len (v); + } + + return v; +} + +#endif /* __vlib_tw_funcs_h__ */ diff --git a/src/vlib/unix/cli.c b/src/vlib/unix/cli.c index 051c5730aed..8f0f00c8b9f 100644 --- a/src/vlib/unix/cli.c +++ b/src/vlib/unix/cli.c @@ -46,6 +46,7 @@ /*? %%syscfg:group_label Command line session %% ?*/ #include <vlib/vlib.h> +#include <vlib/file.h> #include <vlib/unix/unix.h> #include <ctype.h> @@ -1102,7 +1103,7 @@ unix_vlib_cli_output (uword cli_file_index, u8 * buffer, uword buffer_bytes) clib_file_t *uf; cf = pool_elt_at_index (cm->cli_file_pool, cli_file_index); - uf = pool_elt_at_index (fm->file_pool, cf->clib_file_index); + uf = clib_file_get (fm, cf->clib_file_index); if (cf->no_pager || um->cli_pager_buffer_limit == 0 || cf->height == 0) { @@ -1244,7 +1245,7 @@ unix_cli_file_welcome (unix_cli_main_t * cm, unix_cli_file_t * cf) { unix_main_t *um = &unix_main; clib_file_main_t *fm = &file_main; - clib_file_t *uf = pool_elt_at_index (fm->file_pool, cf->clib_file_index); + clib_file_t *uf = clib_file_get (fm, cf->clib_file_index); unix_cli_banner_t *banner; int i, len; @@ -2460,7 +2461,7 @@ static int unix_cli_line_edit (unix_cli_main_t * cm, unix_main_t * um, clib_file_main_t * fm, unix_cli_file_t * cf) { - clib_file_t *uf = pool_elt_at_index (fm->file_pool, cf->clib_file_index); + clib_file_t *uf = clib_file_get (fm, cf->clib_file_index); int i; for (i = 0; i < vec_len (cf->input_vector); i++) @@ -2628,7 +2629,7 @@ more: /* Re-fetch pointer since pool may have moved. */ cf = pool_elt_at_index (cm->cli_file_pool, cli_file_index); - uf = pool_elt_at_index (fm->file_pool, cf->clib_file_index); + uf = clib_file_get (fm, cf->clib_file_index); done: /* reset vector; we'll re-use it later */ @@ -2707,7 +2708,7 @@ unix_cli_kill (unix_cli_main_t * cm, uword cli_file_index) } cf = pool_elt_at_index (cm->cli_file_pool, cli_file_index); - uf = pool_elt_at_index (fm->file_pool, cf->clib_file_index); + uf = clib_file_get (fm, cf->clib_file_index); /* Quit/EOF on stdin means quit program. */ if (uf->file_descriptor == STDIN_FILENO) @@ -3015,7 +3016,7 @@ unix_cli_listen_read_ready (clib_file_t * uf) cf->height = UNIX_CLI_DEFAULT_TERMINAL_HEIGHT; /* Send the telnet options */ - uf = pool_elt_at_index (fm->file_pool, cf->clib_file_index); + uf = clib_file_get (fm, cf->clib_file_index); unix_vlib_cli_output_raw (cf, uf, charmode_option, ARRAY_LEN (charmode_option)); @@ -3050,7 +3051,7 @@ unix_cli_resize_interrupt (int signum) unix_cli_main_t *cm = &unix_cli_main; unix_cli_file_t *cf = pool_elt_at_index (cm->cli_file_pool, cm->stdin_cli_file_index); - clib_file_t *uf = pool_elt_at_index (fm->file_pool, cf->clib_file_index); + clib_file_t *uf = clib_file_get (fm, cf->clib_file_index); struct winsize ws; (void) signum; @@ -3548,45 +3549,6 @@ VLIB_CLI_COMMAND (cli_unix_show_errors, static) = { .function = unix_show_errors, }; -/** CLI command to show various unix error statistics. */ -static clib_error_t * -unix_show_files (vlib_main_t * vm, - unformat_input_t * input, vlib_cli_command_t * cmd) -{ - clib_error_t *error = 0; - clib_file_main_t *fm = &file_main; - clib_file_t *f; - char path[PATH_MAX]; - u8 *s = 0; - - vlib_cli_output (vm, "%3s %6s %12s %12s %12s %-32s %s", "FD", "Thread", - "Read", "Write", "Error", "File Name", "Description"); - - pool_foreach (f, fm->file_pool) - { - int rv; - s = format (s, "/proc/self/fd/%d%c", f->file_descriptor, 0); - rv = readlink((char *) s, path, PATH_MAX - 1); - - path[rv < 0 ? 0 : rv] = 0; - - vlib_cli_output (vm, "%3d %6d %12d %12d %12d %-32s %v", - f->file_descriptor, f->polling_thread_index, - f->read_events, f->write_events, f->error_events, - path, f->description); - vec_reset_length (s); - } - vec_free (s); - - return error; -} - -VLIB_CLI_COMMAND (cli_unix_show_files, static) = { - .path = "show unix files", - .short_help = "Show Unix files in use", - .function = unix_show_files, -}; - /** CLI command to show session command history. */ static clib_error_t * unix_cli_show_history (vlib_main_t * vm, @@ -3713,7 +3675,7 @@ unix_cli_show_cli_sessions (vlib_main_t * vm, { int j = 0; - uf = pool_elt_at_index (fm->file_pool, cf->clib_file_index); + uf = clib_file_get (fm, cf->clib_file_index); table_format_cell (t, i, j++, "%u", cf->process_node_index); table_format_cell (t, i, j++, "%u", uf->file_descriptor); table_format_cell (t, i, j++, "%v", cf->name); diff --git a/src/vlib/unix/input.c b/src/vlib/unix/input.c deleted file mode 100644 index e96cd902466..00000000000 --- a/src/vlib/unix/input.c +++ /dev/null @@ -1,431 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - * input.c: Unix file input - * - * Copyright (c) 2008 Eliot Dresselhaus - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include <vlib/vlib.h> -#include <vlib/unix/unix.h> -#include <signal.h> -#include <unistd.h> -#include <vppinfra/tw_timer_1t_3w_1024sl_ov.h> - -/* FIXME autoconf */ -#define HAVE_LINUX_EPOLL - -#ifdef HAVE_LINUX_EPOLL - -#include <sys/epoll.h> - -typedef struct -{ - CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); - int epoll_fd; - struct epoll_event *epoll_events; - int n_epoll_fds; - - /* Statistics. */ - u64 epoll_files_ready; - u64 epoll_waits; -} linux_epoll_main_t; - -static linux_epoll_main_t *linux_epoll_mains = 0; - -static void -linux_epoll_file_update (clib_file_t * f, clib_file_update_type_t update_type) -{ - clib_file_main_t *fm = &file_main; - linux_epoll_main_t *em = vec_elt_at_index (linux_epoll_mains, - f->polling_thread_index); - struct epoll_event e = { 0 }; - int op, add_del = 0; - - e.events = EPOLLIN; - if (f->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE) - e.events |= EPOLLOUT; - if (f->flags & UNIX_FILE_EVENT_EDGE_TRIGGERED) - e.events |= EPOLLET; - e.data.u32 = f - fm->file_pool; - - op = -1; - - switch (update_type) - { - case UNIX_FILE_UPDATE_ADD: - op = EPOLL_CTL_ADD; - add_del = 1; - break; - - case UNIX_FILE_UPDATE_MODIFY: - op = EPOLL_CTL_MOD; - break; - - case UNIX_FILE_UPDATE_DELETE: - op = EPOLL_CTL_DEL; - add_del = -1; - break; - - default: - clib_warning ("unknown update_type %d", update_type); - return; - } - - /* worker threads open epoll fd only if needed */ - if (update_type == UNIX_FILE_UPDATE_ADD && em->epoll_fd == -1) - { - em->epoll_fd = epoll_create (1); - if (em->epoll_fd < 0) - { - clib_unix_warning ("epoll_create"); - return; - } - em->n_epoll_fds = 0; - } - - if (epoll_ctl (em->epoll_fd, op, f->file_descriptor, &e) < 0) - { - clib_unix_warning ("epoll_ctl"); - return; - } - - em->n_epoll_fds += add_del; - - if (em->n_epoll_fds == 0) - { - close (em->epoll_fd); - em->epoll_fd = -1; - } -} - -static_always_inline uword -linux_epoll_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, - vlib_frame_t * frame, u32 thread_index) -{ - unix_main_t *um = &unix_main; - clib_file_main_t *fm = &file_main; - linux_epoll_main_t *em = vec_elt_at_index (linux_epoll_mains, thread_index); - struct epoll_event *e; - int n_fds_ready; - int is_main = (thread_index == 0); - - { - vlib_node_main_t *nm = &vm->node_main; - u32 ticks_until_expiration; - f64 timeout; - f64 now; - int timeout_ms = 0, max_timeout_ms = 10; - f64 vector_rate = vlib_last_vectors_per_main_loop (vm); - - if (is_main == 0) - now = vlib_time_now (vm); - - /* - * If we've been asked for a fixed-sleep between main loop polls, - * do so right away. - */ - if (PREDICT_FALSE (is_main && um->poll_sleep_usec)) - { - struct timespec ts, tsrem; - timeout = 0; - timeout_ms = 0; - node->input_main_loops_per_call = 0; - ts.tv_sec = 0; - ts.tv_nsec = 1000 * um->poll_sleep_usec; - - while (nanosleep (&ts, &tsrem) < 0) - { - ts = tsrem; - } - } - /* If we're not working very hard, decide how long to sleep */ - else if (is_main && vector_rate < 2 && vm->api_queue_nonempty == 0 - && nm->input_node_counts_by_state[VLIB_NODE_STATE_POLLING] == 0) - { - ticks_until_expiration = TW (tw_timer_first_expires_in_ticks) - ((TWT (tw_timer_wheel) *) nm->timing_wheel); - - /* Nothing on the fast wheel, sleep 10ms */ - if (ticks_until_expiration == TW_SLOTS_PER_RING) - { - timeout = 10e-3; - timeout_ms = max_timeout_ms; - } - else - { - timeout = (f64) ticks_until_expiration *1e-5; - if (timeout < 1e-3) - timeout_ms = 0; - else - { - timeout_ms = timeout * 1e3; - /* Must be between 1 and 10 ms. */ - timeout_ms = clib_max (1, timeout_ms); - timeout_ms = clib_min (max_timeout_ms, timeout_ms); - } - } - node->input_main_loops_per_call = 0; - } - else if (is_main == 0 && vector_rate < 2 && - (vlib_get_first_main ()->time_last_barrier_release + 0.5 < now) && - nm->input_node_counts_by_state[VLIB_NODE_STATE_POLLING] == 0) - { - timeout = 10e-3; - timeout_ms = max_timeout_ms; - node->input_main_loops_per_call = 0; - } - else /* busy */ - { - /* Don't come back for a respectable number of dispatch cycles */ - node->input_main_loops_per_call = 1024; - } - - /* Allow any signal to wakeup our sleep. */ - if (is_main || em->epoll_fd != -1) - { - static sigset_t unblock_all_signals; - n_fds_ready = epoll_pwait (em->epoll_fd, - em->epoll_events, - vec_len (em->epoll_events), - timeout_ms, &unblock_all_signals); - - /* This kludge is necessary to run over absurdly old kernels */ - if (n_fds_ready < 0 && errno == ENOSYS) - { - n_fds_ready = epoll_wait (em->epoll_fd, - em->epoll_events, - vec_len (em->epoll_events), timeout_ms); - } - - } - else - { - /* - * Worker thread, no epoll fd's, sleep for 100us at a time - * and check for a barrier sync request - */ - if (timeout_ms) - { - struct timespec ts, tsrem; - f64 limit = now + (f64) timeout_ms * 1e-3; - - while (vlib_time_now (vm) < limit) - { - /* Sleep for 100us at a time */ - ts.tv_sec = 0; - ts.tv_nsec = 1000 * 100; - - while (nanosleep (&ts, &tsrem) < 0) - ts = tsrem; - if (*vlib_worker_threads->wait_at_barrier || - clib_interrupt_is_any_pending ( - nm->input_node_interrupts) || - clib_interrupt_is_any_pending ( - nm->pre_input_node_interrupts)) - goto done; - } - } - goto done; - } - } - - if (n_fds_ready < 0) - { - if (unix_error_is_fatal (errno)) - vlib_panic_with_error (vm, clib_error_return_unix (0, "epoll_wait")); - - /* non fatal error (e.g. EINTR). */ - goto done; - } - - em->epoll_waits += 1; - em->epoll_files_ready += n_fds_ready; - - for (e = em->epoll_events; e < em->epoll_events + n_fds_ready; e++) - { - u32 i = e->data.u32; - clib_file_t *f; - clib_error_t *errors[4]; - int n_errors = 0; - - /* - * Under rare scenarios, epoll may still post us events for the - * deleted file descriptor. We just deal with it and throw away the - * events for the corresponding file descriptor. - */ - f = fm->file_pool + i; - if (PREDICT_FALSE (pool_is_free (fm->file_pool, f))) - { - if (e->events & EPOLLIN) - { - errors[n_errors] = - clib_error_return (0, "epoll event EPOLLIN dropped due " - "to free index %u", i); - n_errors++; - } - if (e->events & EPOLLOUT) - { - errors[n_errors] = - clib_error_return (0, "epoll event EPOLLOUT dropped due " - "to free index %u", i); - n_errors++; - } - if (e->events & EPOLLERR) - { - errors[n_errors] = - clib_error_return (0, "epoll event EPOLLERR dropped due " - "to free index %u", i); - n_errors++; - } - } - else if (PREDICT_TRUE (!(e->events & EPOLLERR))) - { - if (e->events & EPOLLIN) - { - f->read_events++; - errors[n_errors] = f->read_function (f); - /* Make sure f is valid if the file pool moves */ - if (pool_is_free_index (fm->file_pool, i)) - continue; - f = pool_elt_at_index (fm->file_pool, i); - n_errors += errors[n_errors] != 0; - } - if (e->events & EPOLLOUT) - { - f->write_events++; - errors[n_errors] = f->write_function (f); - n_errors += errors[n_errors] != 0; - } - } - else - { - if (f->error_function) - { - f->error_events++; - errors[n_errors] = f->error_function (f); - n_errors += errors[n_errors] != 0; - } - else - close (f->file_descriptor); - } - - ASSERT (n_errors < ARRAY_LEN (errors)); - for (i = 0; i < n_errors; i++) - { - unix_save_error (um, errors[i]); - } - } - -done: - if (PREDICT_FALSE (vm->cpu_id != clib_get_current_cpu_id ())) - { - vm->cpu_id = clib_get_current_cpu_id (); - vm->numa_node = clib_get_current_numa_node (); - } - - return 0; -} - -static uword -linux_epoll_input (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame) -{ - u32 thread_index = vlib_get_thread_index (); - - if (thread_index == 0) - return linux_epoll_input_inline (vm, node, frame, 0); - else - return linux_epoll_input_inline (vm, node, frame, thread_index); -} - -VLIB_REGISTER_NODE (linux_epoll_input_node,static) = { - .function = linux_epoll_input, - .type = VLIB_NODE_TYPE_PRE_INPUT, - .name = "unix-epoll-input", -}; - -clib_error_t * -linux_epoll_input_init (vlib_main_t * vm) -{ - linux_epoll_main_t *em; - clib_file_main_t *fm = &file_main; - vlib_thread_main_t *tm = vlib_get_thread_main (); - - - vec_validate_aligned (linux_epoll_mains, tm->n_vlib_mains, - CLIB_CACHE_LINE_BYTES); - - vec_foreach (em, linux_epoll_mains) - { - /* Allocate some events. */ - vec_resize (em->epoll_events, VLIB_FRAME_SIZE); - - if (linux_epoll_mains == em) - { - em->epoll_fd = epoll_create (1); - if (em->epoll_fd < 0) - return clib_error_return_unix (0, "epoll_create"); - } - else - em->epoll_fd = -1; - } - - fm->file_update = linux_epoll_file_update; - - return 0; -} - -VLIB_INIT_FUNCTION (linux_epoll_input_init); - -#endif /* HAVE_LINUX_EPOLL */ - -static clib_error_t * -unix_input_init (vlib_main_t * vm) -{ - return 0; -} - -VLIB_INIT_FUNCTION (unix_input_init) = -{ - .runs_before = VLIB_INITS ("linux_epoll_input_init"), -}; - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vlib/unix/main.c b/src/vlib/unix/main.c index 11d0cb1160c..cd1f1e1c99a 100644 --- a/src/vlib/unix/main.c +++ b/src/vlib/unix/main.c @@ -54,6 +54,10 @@ #include <sys/resource.h> #include <unistd.h> +#ifdef HAVE_LIBIBERTY +#include <libiberty/demangle.h> +#endif + /** Default CLI pager limit is not configured in startup.conf */ #define UNIX_CLI_DEFAULT_PAGER_LIMIT 100000 @@ -64,7 +68,6 @@ char *vlib_default_runtime_dir __attribute__ ((weak)); char *vlib_default_runtime_dir = "vlib"; unix_main_t unix_main; -clib_file_main_t file_main; static clib_error_t * unix_main_init (vlib_main_t * vm) @@ -74,10 +77,7 @@ unix_main_init (vlib_main_t * vm) return 0; } -VLIB_INIT_FUNCTION (unix_main_init) = -{ - .runs_before = VLIB_INITS ("unix_input_init"), -}; +VLIB_INIT_FUNCTION (unix_main_init); static int unsetup_signal_handlers (int sig) @@ -226,8 +226,20 @@ unix_signal_handler (int signum, siginfo_t * si, ucontext_t * uc) { if (color) syslog_msg = format (syslog_msg, ANSI_FG_YELLOW); - syslog_msg = - format (syslog_msg, " %s + 0x%x", sf->name, sf->offset); +#if HAVE_LIBIBERTY + if (strncmp (sf->name, "_Z", 2) == 0) + { + char *demangled = cplus_demangle (sf->name, DMGL_AUTO); + syslog_msg = format (syslog_msg, " %s", + demangled ? demangled : sf->name); + if (demangled) + free (demangled); + } + else +#endif + syslog_msg = format (syslog_msg, " %s", sf->name); + + syslog_msg = format (syslog_msg, " + 0x%x", sf->offset); if (color) syslog_msg = format (syslog_msg, ANSI_FG_DEFAULT); } @@ -374,6 +386,7 @@ unix_config (vlib_main_t * vm, unformat_input_t * input) clib_error_t *error = 0; gid_t gid; int pidfd = -1; + int use_current_dir = 0; /* Defaults */ um->cli_pager_buffer_limit = UNIX_CLI_DEFAULT_PAGER_LIMIT; @@ -397,6 +410,8 @@ unix_config (vlib_main_t * vm, unformat_input_t * input) else if (unformat (input, "cli-listen %s", &um->cli_listen_socket.config)) ; + else if (unformat (input, "use-current-dir")) + use_current_dir = 1; else if (unformat (input, "runtime-dir %s", &um->runtime_dir)) ; else if (unformat (input, "cli-line-mode")) @@ -486,6 +501,13 @@ unix_config (vlib_main_t * vm, unformat_input_t * input) format_unformat_error, input); } + if (use_current_dir) + { + char cwd[PATH_MAX]; + if (getcwd (cwd, PATH_MAX)) + um->runtime_dir = format (um->runtime_dir, "%s", cwd); + } + if (um->runtime_dir == 0) { uid_t uid = geteuid (); diff --git a/src/vlib/unix/mc_socket.c b/src/vlib/unix/mc_socket.c index 1f3b4e9a8f1..396e442d4fa 100644 --- a/src/vlib/unix/mc_socket.c +++ b/src/vlib/unix/mc_socket.c @@ -827,8 +827,7 @@ static void * catchup_add_pending_output (mc_socket_catchup_t * c, uword n_bytes, u8 * set_output_vector) { - clib_file_t *uf = pool_elt_at_index (file_main.file_pool, - c->clib_file_index); + clib_file_t *uf = clib_file_get (&file_main, c->clib_file_index); u8 *result = 0; if (set_output_vector) diff --git a/src/vlib/unix/plugin.c b/src/vlib/unix/plugin.c index 77e4633e14a..c784c5b44ad 100644 --- a/src/vlib/unix/plugin.c +++ b/src/vlib/unix/plugin.c @@ -748,11 +748,11 @@ config_one_plugin (vlib_main_t * vm, char *name, unformat_input_t * input) } vec_add2 (pm->configs, pc, 1); - hash_set_mem (pm->config_index_by_name, name, pc - pm->configs); pc->is_enabled = is_enable; pc->is_disabled = is_disable; pc->skip_version_check = skip_version_check; - pc->name = name; + pc->name = vec_dup (name); + hash_set_mem (pm->config_index_by_name, pc->name, pc - pm->configs); done: return error; @@ -816,6 +816,7 @@ done: unformat_vlib_cli_sub_input, &sub_input)) { error = config_one_plugin (vm, (char *) s, &sub_input); + vec_free (s); unformat_free (&sub_input); if (error) goto done2; diff --git a/src/vlib/unix/unix.h b/src/vlib/unix/unix.h index 4b5f98a2e66..d0b7a4c7005 100644 --- a/src/vlib/unix/unix.h +++ b/src/vlib/unix/unix.h @@ -121,7 +121,6 @@ typedef enum /* Global main structure. */ extern unix_main_t unix_main; -extern clib_file_main_t file_main; always_inline void unix_save_error (unix_main_t * um, clib_error_t * error) diff --git a/src/vlib/vlib.h b/src/vlib/vlib.h index 36f8a361abc..1e2b25eba3b 100644 --- a/src/vlib/vlib.h +++ b/src/vlib/vlib.h @@ -71,6 +71,7 @@ typedef u32 vlib_log_class_t; #include <vlib/threads.h> #include <vlib/physmem_funcs.h> #include <vlib/buffer_funcs.h> +#include <vlib/tw_funcs.h> #include <vlib/error_funcs.h> #include <vlib/format_funcs.h> #include <vlib/node_funcs.h> diff --git a/src/vlibapi/api.h b/src/vlibapi/api.h index 74957a6f0f6..2729b88dd26 100644 --- a/src/vlibapi/api.h +++ b/src/vlibapi/api.h @@ -25,6 +25,7 @@ #include <svm/svm.h> #include <svm/queue.h> #include <vlib/vlib.h> +#include <vlib/file.h> #include <vlib/unix/unix.h> #include <vlibapi/api_common.h> diff --git a/src/vlibmemory/socket_api.c b/src/vlibmemory/socket_api.c index 26be8d09522..83b63592d44 100644 --- a/src/vlibmemory/socket_api.c +++ b/src/vlibmemory/socket_api.c @@ -227,7 +227,7 @@ socket_cleanup_pending_remove_registration_cb (u32 *preg_index) clib_file_main_t *fm = &file_main; u32 pending_remove_file_index = vl_api_registration_file_index (rp); - clib_file_t *zf = fm->file_pool + pending_remove_file_index; + clib_file_t *zf = clib_file_get (fm, pending_remove_file_index); clib_file_del (fm, zf); vl_socket_free_registration_index (rp - socket_main.registration_pool); diff --git a/src/vnet/CMakeLists.txt b/src/vnet/CMakeLists.txt index b6227d45a2a..5c9c5cc0dc5 100644 --- a/src/vnet/CMakeLists.txt +++ b/src/vnet/CMakeLists.txt @@ -522,6 +522,7 @@ list(APPEND VNET_API_FILES bfd/bfd.api) list(APPEND VNET_SOURCES crypto/cli.c + crypto/config.c crypto/crypto.c crypto/format.c crypto/main.c @@ -589,6 +590,7 @@ list(APPEND VNET_HEADERS ipsec/ipsec_tun.h ipsec/ipsec_types_api.h ipsec/ipsec_punt.h + ipsec/ipsec_funcs.h ipsec/esp.h ipsec/ah.h ) @@ -749,29 +751,6 @@ list(APPEND VNET_HEADERS list(APPEND VNET_API_FILES mpls/mpls.api) ############################################################################## -# Tunnel protocol: vxlan-gpe -############################################################################## - -list(APPEND VNET_SOURCES - vxlan-gpe/vxlan_gpe.c - vxlan-gpe/encap.c - vxlan-gpe/decap.c - vxlan-gpe/vxlan_gpe_api.c -) - -list (APPEND VNET_MULTIARCH_SOURCES - vxlan-gpe/decap.c -) - -list(APPEND VNET_HEADERS - vxlan-gpe/vxlan_gpe.h - vxlan-gpe/vxlan_gpe_packet.h - vxlan-gpe/vxlan_gpe_error.def -) - -list(APPEND VNET_API_FILES vxlan-gpe/vxlan_gpe.api) - -############################################################################## # ipv6 segment routing ############################################################################## diff --git a/src/vnet/adj/adj_l2.c b/src/vnet/adj/adj_l2.c index 5413eca6212..8bf9b8225ad 100644 --- a/src/vnet/adj/adj_l2.c +++ b/src/vnet/adj/adj_l2.c @@ -53,7 +53,7 @@ adj_l2_rewrite_inline (vlib_main_t * vm, { u32 * from = vlib_frame_vector_args (frame); u32 n_left_from, n_left_to_next, * to_next, next_index; - u32 thread_index = vlib_get_thread_index(); + clib_thread_index_t thread_index = vlib_get_thread_index(); ethernet_main_t * em = ðernet_main; n_left_from = frame->n_vectors; diff --git a/src/vnet/adj/adj_nsh.c b/src/vnet/adj/adj_nsh.c index 1b4fa6c15b9..9c59d70bf17 100644 --- a/src/vnet/adj/adj_nsh.c +++ b/src/vnet/adj/adj_nsh.c @@ -55,7 +55,7 @@ adj_nsh_rewrite_inline (vlib_main_t * vm, { u32 * from = vlib_frame_vector_args (frame); u32 n_left_from, n_left_to_next, * to_next, next_index; - u32 thread_index = vlib_get_thread_index(); + clib_thread_index_t thread_index = vlib_get_thread_index(); n_left_from = frame->n_vectors; next_index = node->cached_next_index; diff --git a/src/vnet/bier/bier_lookup.c b/src/vnet/bier/bier_lookup.c index f7a21a1c744..50e07d1a2fc 100644 --- a/src/vnet/bier/bier_lookup.c +++ b/src/vnet/bier/bier_lookup.c @@ -83,7 +83,7 @@ bier_lookup (vlib_main_t * vm, { u32 n_left_from, next_index, * from, * to_next; bier_lookup_main_t *blm = &bier_lookup_main; - u32 thread_index = vlib_get_thread_index(); + clib_thread_index_t thread_index = vlib_get_thread_index (); bier_bit_mask_bucket_t buckets_copy[BIER_HDR_BUCKETS_4096]; from = vlib_frame_vector_args (from_frame); @@ -347,7 +347,7 @@ clib_error_t * bier_lookup_module_init (vlib_main_t * vm) { bier_lookup_main_t *blm = &bier_lookup_main; - u32 thread_index; + clib_thread_index_t thread_index; vec_validate (blm->blm_clones, vlib_num_workers()); vec_validate (blm->blm_fmasks, vlib_num_workers()); diff --git a/src/vnet/bier/bier_output.c b/src/vnet/bier/bier_output.c index 5c19103d6a3..99fccf09f27 100644 --- a/src/vnet/bier/bier_output.c +++ b/src/vnet/bier/bier_output.c @@ -68,7 +68,7 @@ bier_output (vlib_main_t * vm, { vlib_combined_counter_main_t *cm = &bier_fmask_counters; u32 n_left_from, next_index, * from, * to_next; - u32 thread_index; + clib_thread_index_t thread_index; thread_index = vm->thread_index; from = vlib_frame_vector_args (from_frame); diff --git a/src/vnet/bonding/cli.c b/src/vnet/bonding/cli.c index cdc935ff10f..33a313b8052 100644 --- a/src/vnet/bonding/cli.c +++ b/src/vnet/bonding/cli.c @@ -609,7 +609,7 @@ bond_add_member (vlib_main_t * vm, bond_add_member_args_t * args) vnet_interface_main_t *im = &vnm->interface_main; vnet_hw_interface_t *bif_hw, *mif_hw; vnet_sw_interface_t *sw; - u32 thread_index; + clib_thread_index_t thread_index; u32 mif_if_index; bif = bond_get_bond_if_by_sw_if_index (args->group); diff --git a/src/vnet/bonding/device.c b/src/vnet/bonding/device.c index a0b93fccde1..5081ddfed57 100644 --- a/src/vnet/bonding/device.c +++ b/src/vnet/bonding/device.c @@ -186,19 +186,19 @@ bond_lb_broadcast (vlib_main_t *vm, bond_if_t *bif, vlib_buffer_t *b0, vlib_buffer_t *c0; int port; u32 sw_if_index; - u16 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; bond_per_thread_data_t *ptd = vec_elt_at_index (bm->per_thread_data, thread_index); for (port = 1; port < n_members; port++) { - sw_if_index = *vec_elt_at_index (bif->active_members, port); - c0 = vlib_buffer_copy (vm, b0); - if (PREDICT_TRUE (c0 != 0)) - { - vnet_buffer (c0)->sw_if_index[VLIB_TX] = sw_if_index; - bond_tx_add_to_queue (ptd, port, vlib_get_buffer_index (vm, c0)); - } + sw_if_index = *vec_elt_at_index (ptd->active_members, port); + c0 = vlib_buffer_copy (vm, b0); + if (PREDICT_TRUE (c0 != 0)) + { + vnet_buffer (c0)->sw_if_index[VLIB_TX] = sw_if_index; + bond_tx_add_to_queue (ptd, port, vlib_get_buffer_index (vm, c0)); + } } return 0; @@ -351,8 +351,8 @@ bond_hash_to_port (u32 * h, u32 n_left, u32 n_members, } static_always_inline void -bond_update_sw_if_index (bond_per_thread_data_t * ptd, bond_if_t * bif, - u32 * bi, vlib_buffer_t ** b, u32 * data, u32 n_left, +bond_update_sw_if_index (bond_per_thread_data_t *ptd, bond_if_t *bif, u32 *bi, + vlib_buffer_t **b, u32 *data, u32 n_left, int single_sw_if_index) { u32 sw_if_index = data[0]; @@ -381,13 +381,13 @@ bond_update_sw_if_index (bond_per_thread_data_t * ptd, bond_if_t * bif, else { vnet_buffer (b[0])->sw_if_index[VLIB_TX] = - *vec_elt_at_index (bif->active_members, h[0]); + *vec_elt_at_index (ptd->active_members, h[0]); vnet_buffer (b[1])->sw_if_index[VLIB_TX] = - *vec_elt_at_index (bif->active_members, h[1]); + *vec_elt_at_index (ptd->active_members, h[1]); vnet_buffer (b[2])->sw_if_index[VLIB_TX] = - *vec_elt_at_index (bif->active_members, h[2]); + *vec_elt_at_index (ptd->active_members, h[2]); vnet_buffer (b[3])->sw_if_index[VLIB_TX] = - *vec_elt_at_index (bif->active_members, h[3]); + *vec_elt_at_index (ptd->active_members, h[3]); bond_tx_add_to_queue (ptd, h[0], bi[0]); bond_tx_add_to_queue (ptd, h[1], bi[1]); @@ -410,7 +410,7 @@ bond_update_sw_if_index (bond_per_thread_data_t * ptd, bond_if_t * bif, else { vnet_buffer (b[0])->sw_if_index[VLIB_TX] = - *vec_elt_at_index (bif->active_members, h[0]); + *vec_elt_at_index (ptd->active_members, h[0]); bond_tx_add_to_queue (ptd, h[0], bi[0]); } @@ -422,8 +422,9 @@ bond_update_sw_if_index (bond_per_thread_data_t * ptd, bond_if_t * bif, } static_always_inline void -bond_tx_trace (vlib_main_t * vm, vlib_node_runtime_t * node, bond_if_t * bif, - vlib_buffer_t ** b, u32 n_left, u32 * h) +bond_tx_trace (vlib_main_t *vm, vlib_node_runtime_t *node, + bond_per_thread_data_t *ptd, vlib_buffer_t **b, u32 n_left, + u32 *h) { uword n_trace = vlib_get_trace_count (vm, node); @@ -441,15 +442,12 @@ bond_tx_trace (vlib_main_t * vm, vlib_node_runtime_t * node, bond_if_t * bif, t0->ethernet = *eth; t0->sw_if_index = vnet_buffer (b[0])->sw_if_index[VLIB_TX]; if (!h) - { - t0->bond_sw_if_index = - *vec_elt_at_index (bif->active_members, 0); - } + t0->bond_sw_if_index = *vec_elt_at_index (ptd->active_members, 0); else { - t0->bond_sw_if_index = - *vec_elt_at_index (bif->active_members, h[0]); - h++; + t0->bond_sw_if_index = + *vec_elt_at_index (ptd->active_members, h[0]); + h++; } } b++; @@ -463,7 +461,7 @@ VNET_DEVICE_CLASS_TX_FN (bond_dev_class) (vlib_main_t * vm, { vnet_interface_output_runtime_t *rund = (void *) node->runtime_data; bond_main_t *bm = &bond_main; - u16 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; bond_if_t *bif = pool_elt_at_index (bm->interfaces, rund->dev_instance); uword n_members; vlib_buffer_t *bufs[VLIB_FRAME_SIZE]; @@ -473,7 +471,7 @@ VNET_DEVICE_CLASS_TX_FN (bond_dev_class) (vlib_main_t * vm, vnet_main_t *vnm = vnet_get_main (); bond_per_thread_data_t *ptd = vec_elt_at_index (bm->per_thread_data, thread_index); - u32 p, sw_if_index; + u32 p, sw_if_index, n_numa_members; if (PREDICT_FALSE (bif->admin_up == 0)) { @@ -487,9 +485,10 @@ VNET_DEVICE_CLASS_TX_FN (bond_dev_class) (vlib_main_t * vm, return frame->n_vectors; } - n_members = vec_len (bif->active_members); - if (PREDICT_FALSE (n_members == 0)) + clib_spinlock_lock_if_init (&bif->lockp); + if (PREDICT_FALSE (vec_len (bif->active_members) == 0)) { + clib_spinlock_unlock_if_init (&bif->lockp); vlib_buffer_free (vm, vlib_frame_vector_args (frame), frame->n_vectors); vlib_increment_simple_counter (vnet_main.interface_main.sw_if_counters + VNET_INTERFACE_COUNTER_DROP, @@ -500,14 +499,25 @@ VNET_DEVICE_CLASS_TX_FN (bond_dev_class) (vlib_main_t * vm, return frame->n_vectors; } + /* + * Take a snapshot of the active members as members may be freed + * asynchronously + */ + vec_validate (ptd->active_members, vec_len (bif->active_members) - 1); + vec_copy (ptd->active_members, bif->active_members); + n_numa_members = bif->n_numa_members; + clib_spinlock_unlock_if_init (&bif->lockp); + + n_members = vec_len (ptd->active_members); + vlib_get_buffers (vm, from, bufs, n_left); /* active-backup mode, ship everything to first sw if index */ if ((bif->lb == BOND_LB_AB) || PREDICT_FALSE (n_members == 1)) { - sw_if_index = *vec_elt_at_index (bif->active_members, 0); + sw_if_index = *vec_elt_at_index (ptd->active_members, 0); - bond_tx_trace (vm, node, bif, bufs, frame->n_vectors, 0); + bond_tx_trace (vm, node, ptd, bufs, frame->n_vectors, 0); bond_update_sw_if_index (ptd, bif, from, bufs, &sw_if_index, n_left, /* single_sw_if_index */ 1); goto done; @@ -515,10 +525,10 @@ VNET_DEVICE_CLASS_TX_FN (bond_dev_class) (vlib_main_t * vm, if (bif->lb == BOND_LB_BC) { - sw_if_index = *vec_elt_at_index (bif->active_members, 0); + sw_if_index = *vec_elt_at_index (ptd->active_members, 0); bond_tx_no_hash (vm, bif, bufs, hashes, n_left, n_members, BOND_LB_BC); - bond_tx_trace (vm, node, bif, bufs, frame->n_vectors, 0); + bond_tx_trace (vm, node, ptd, bufs, frame->n_vectors, 0); bond_update_sw_if_index (ptd, bif, from, bufs, &sw_if_index, n_left, /* single_sw_if_index */ 1); goto done; @@ -527,7 +537,7 @@ VNET_DEVICE_CLASS_TX_FN (bond_dev_class) (vlib_main_t * vm, /* if have at least one member on local numa node, only members on local numa node will transmit pkts when bif->local_numa_only is enabled */ if (bif->n_numa_members >= 1) - n_members = bif->n_numa_members; + n_members = n_numa_members; if (bif->lb == BOND_LB_RR) bond_tx_no_hash (vm, bif, bufs, hashes, n_left, n_members, BOND_LB_RR); @@ -541,7 +551,7 @@ VNET_DEVICE_CLASS_TX_FN (bond_dev_class) (vlib_main_t * vm, else bond_hash_to_port (h, frame->n_vectors, n_members, 0); - bond_tx_trace (vm, node, bif, bufs, frame->n_vectors, h); + bond_tx_trace (vm, node, ptd, bufs, frame->n_vectors, h); bond_update_sw_if_index (ptd, bif, from, bufs, hashes, frame->n_vectors, /* single_sw_if_index */ 0); @@ -552,7 +562,7 @@ done: vlib_frame_t *f; u32 *to_next; - sw_if_index = *vec_elt_at_index (bif->active_members, p); + sw_if_index = *vec_elt_at_index (ptd->active_members, p); if (PREDICT_TRUE (ptd->per_port_queue[p].n_buffers)) { f = vnet_get_frame_to_sw_interface (vnm, sw_if_index); @@ -564,6 +574,7 @@ done: ptd->per_port_queue[p].n_buffers = 0; } } + vec_reset_length (ptd->active_members); return frame->n_vectors; } diff --git a/src/vnet/bonding/node.c b/src/vnet/bonding/node.c index 66de1e4dd80..347aa56dbc0 100644 --- a/src/vnet/bonding/node.c +++ b/src/vnet/bonding/node.c @@ -197,7 +197,7 @@ VLIB_NODE_FN (bond_input_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - u16 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; u32 *from, n_left; vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b; u32 sw_if_indices[VLIB_FRAME_SIZE], *sw_if_index; diff --git a/src/vnet/bonding/node.h b/src/vnet/bonding/node.h index c6602ef01b9..c6efa5b2e72 100644 --- a/src/vnet/bonding/node.h +++ b/src/vnet/bonding/node.h @@ -165,6 +165,7 @@ typedef struct { bond_per_port_queue_t *per_port_queue; void **data; + u32 *active_members; } bond_per_thread_data_t; typedef struct diff --git a/src/vnet/buffer.h b/src/vnet/buffer.h index 247af56f403..276cb1115f1 100644 --- a/src/vnet/buffer.h +++ b/src/vnet/buffer.h @@ -340,7 +340,7 @@ typedef struct u32 __pad[3]; u32 sad_index; u32 protect_index; - u16 thread_index; + clib_thread_index_t thread_index; } ipsec; /* MAP */ @@ -502,7 +502,7 @@ typedef struct */ struct { - u32 thread_index; + clib_thread_index_t thread_index; u32 pool_index; u32 id; } reass; diff --git a/src/vnet/classify/vnet_classify.c b/src/vnet/classify/vnet_classify.c index 77c1c81f9c4..998615e5d33 100644 --- a/src/vnet/classify/vnet_classify.c +++ b/src/vnet/classify/vnet_classify.c @@ -232,7 +232,7 @@ static inline void make_working_copy vnet_classify_bucket_t working_bucket __attribute__ ((aligned (8))); void *oldheap; vnet_classify_entry_t *working_copy; - u32 thread_index = vlib_get_thread_index (); + clib_thread_index_t thread_index = vlib_get_thread_index (); int working_copy_length, required_length; if (thread_index >= vec_len (t->working_copies)) @@ -427,7 +427,7 @@ vnet_classify_add_del (vnet_classify_table_t *t, vnet_classify_entry_t *add_v, u32 hash, new_hash; u32 limit; u32 old_log2_pages, new_log2_pages; - u32 thread_index = vlib_get_thread_index (); + clib_thread_index_t thread_index = vlib_get_thread_index (); u8 *key_minus_skip; int resplit_once = 0; int mark_bucket_linear; diff --git a/src/vnet/crypto/config.c b/src/vnet/crypto/config.c new file mode 100644 index 00000000000..09f39b38b4e --- /dev/null +++ b/src/vnet/crypto/config.c @@ -0,0 +1,105 @@ +/* + * config.c: crypto engines configuration + * + * Copyright (c) 2025 Cisco and/or its affiliates. + * SPDX-License-Identifier: Apache-2.0 + * https://spdx.org/licenses/Apache-2.0.html + */ + +#include <vlib/vlib.h> +#include <vnet/crypto/crypto.h> + +static clib_error_t * +config_one_crypto (vlib_main_t *vm, char *name, unformat_input_t *input) +{ + vnet_crypto_main_t *cm = &crypto_main; + vnet_crypto_config_t *pc; + clib_error_t *error = 0; + uword *p; + int is_enable = 0; + int is_disable = 0; + + if (cm->config_index_by_name == 0) + cm->config_index_by_name = hash_create_string (0, sizeof (uword)); + + p = hash_get_mem (cm->config_index_by_name, name); + if (p) + { + error = clib_error_return (0, "crypto '%s' already configured", name); + goto done; + } + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "enable")) + is_enable = 1; + else if (unformat (input, "disable")) + is_disable = 1; + else + { + error = clib_error_return (0, "unknown input '%U'", + format_unformat_error, input); + goto done; + } + } + + if (is_enable && is_disable) + { + error = clib_error_return (0, + "please specify either enable or disable" + " for crypto '%s'", + name); + goto done; + } + + vec_add2 (cm->configs, pc, 1); + pc->is_enabled = is_enable; + pc->is_disabled = is_disable; + pc->name = vec_dup (name); + hash_set_mem (cm->config_index_by_name, pc->name, pc - cm->configs); + +done: + return error; +} + +static clib_error_t * +crypto_engines_config (vlib_main_t *vm, unformat_input_t *input) +{ + vnet_crypto_main_t *cm = &crypto_main; + clib_error_t *error = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + unformat_input_t sub_input; + u8 *s = 0; + if (unformat (input, "default %U", unformat_vlib_cli_sub_input, + &sub_input)) + { + cm->default_disabled = unformat (&sub_input, "disable") ? 1 : 0; + unformat_free (&sub_input); + } + else if (unformat (input, "%s %U", &s, unformat_vlib_cli_sub_input, + &sub_input)) + { + error = config_one_crypto (vm, (char *) s, &sub_input); + vec_free (s); + unformat_free (&sub_input); + if (error) + goto done; + } + else + { + error = clib_error_return (0, "unknown input '%U'", + format_unformat_error, input); + { + vec_free (s); + goto done; + } + } + } + +done: + return error; +} + +VLIB_EARLY_CONFIG_FUNCTION (crypto_engines_config, "crypto-engines"); diff --git a/src/vnet/crypto/crypto.c b/src/vnet/crypto/crypto.c index 35e7768375d..765dc499078 100644 --- a/src/vnet/crypto/crypto.c +++ b/src/vnet/crypto/crypto.c @@ -18,6 +18,8 @@ VLIB_REGISTER_LOG_CLASS (crypto_main_log, static) = { #define log_debug(f, ...) \ vlib_log (VLIB_LOG_LEVEL_DEBUG, crypto_main_log.class, f, ##__VA_ARGS__) +#define log_notice(f, ...) \ + vlib_log (VLIB_LOG_LEVEL_NOTICE, crypto_main_log.class, f, ##__VA_ARGS__) #define log_err(f, ...) \ vlib_log (VLIB_LOG_LEVEL_ERR, crypto_main_log.class, f, ##__VA_ARGS__) @@ -381,17 +383,44 @@ vnet_crypto_register_key_handler (vlib_main_t *vm, u32 engine_index, return; } +static vnet_crypto_key_t * +vnet_crypoto_key_alloc (u32 length) +{ + vnet_crypto_main_t *cm = &crypto_main; + u8 expected = 0; + vnet_crypto_key_t *k, **kp; + u32 alloc_sz = sizeof (vnet_crypto_key_t) + round_pow2 (length, 16); + + while (!__atomic_compare_exchange_n (&cm->keys_lock, &expected, 1, 0, + __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) + { + while (__atomic_load_n (&cm->keys_lock, __ATOMIC_RELAXED)) + CLIB_PAUSE (); + expected = 0; + } + + pool_get (cm->keys, kp); + + __atomic_store_n (&cm->keys_lock, 0, __ATOMIC_RELEASE); + + k = clib_mem_alloc_aligned (alloc_sz, alignof (vnet_crypto_key_t)); + kp[0] = k; + *k = (vnet_crypto_key_t){ + .index = kp - cm->keys, + .length = length, + }; + + return k; +} + u32 vnet_crypto_key_add (vlib_main_t * vm, vnet_crypto_alg_t alg, u8 * data, u16 length) { - u32 index; vnet_crypto_main_t *cm = &crypto_main; vnet_crypto_engine_t *engine; - vnet_crypto_key_t *key, **kp; + vnet_crypto_key_t *key; vnet_crypto_alg_data_t *ad = cm->algs + alg; - u32 alloc_sz = sizeof (vnet_crypto_key_t) + round_pow2 (length, 16); - u8 need_barrier_sync = 0; ASSERT (alg != 0); @@ -407,29 +436,14 @@ vnet_crypto_key_add (vlib_main_t * vm, vnet_crypto_alg_t alg, u8 * data, return ~0; } - need_barrier_sync = pool_get_will_expand (cm->keys); - /* If the cm->keys will expand, stop the parade. */ - if (need_barrier_sync) - vlib_worker_thread_barrier_sync (vm); + key = vnet_crypoto_key_alloc (length); + key->alg = alg; - pool_get (cm->keys, kp); - - if (need_barrier_sync) - vlib_worker_thread_barrier_release (vm); - - key = clib_mem_alloc_aligned (alloc_sz, _Alignof (vnet_crypto_key_t)); - kp[0] = key; - index = kp - cm->keys; - *key = (vnet_crypto_key_t){ - .index = index, - .alg = alg, - .length = length, - }; clib_memcpy (key->data, data, length); vec_foreach (engine, cm->engines) if (engine->key_op_handler) - engine->key_op_handler (VNET_CRYPTO_KEY_OP_ADD, index); - return index; + engine->key_op_handler (VNET_CRYPTO_KEY_OP_ADD, key->index); + return key->index; } void @@ -478,10 +492,9 @@ vnet_crypto_key_add_linked (vlib_main_t * vm, vnet_crypto_key_index_t index_crypto, vnet_crypto_key_index_t index_integ) { - u32 index, need_barrier_sync; vnet_crypto_main_t *cm = &crypto_main; vnet_crypto_engine_t *engine; - vnet_crypto_key_t *key_crypto, *key_integ, *key, **kp; + vnet_crypto_key_t *key_crypto, *key_integ, *key; vnet_crypto_alg_t linked_alg; key_crypto = cm->keys[index_crypto]; @@ -491,33 +504,17 @@ vnet_crypto_key_add_linked (vlib_main_t * vm, if (linked_alg == ~0) return ~0; - need_barrier_sync = pool_get_will_expand (cm->keys); - /* If the cm->keys will expand, stop the parade. */ - if (need_barrier_sync) - vlib_worker_thread_barrier_sync (vm); - - pool_get (cm->keys, kp); - - if (need_barrier_sync) - vlib_worker_thread_barrier_release (vm); - - key = clib_mem_alloc_aligned (sizeof (vnet_crypto_key_t), - _Alignof (vnet_crypto_key_t)); - kp[0] = key; - index = kp - cm->keys; - *key = (vnet_crypto_key_t){ - .index = index, - .is_link = 1, - .index_crypto = index_crypto, - .index_integ = index_integ, - .alg = linked_alg, - }; + key = vnet_crypoto_key_alloc (0); + key->is_link = 1; + key->index_crypto = index_crypto; + key->index_integ = index_integ; + key->alg = linked_alg; vec_foreach (engine, cm->engines) if (engine->key_op_handler) - engine->key_op_handler (VNET_CRYPTO_KEY_OP_ADD, index); + engine->key_op_handler (VNET_CRYPTO_KEY_OP_ADD, key->index); - return index; + return key->index; } u32 @@ -569,11 +566,14 @@ static void vnet_crypto_load_engines (vlib_main_t *vm) { vlib_thread_main_t *tm = vlib_get_thread_main (); + vnet_crypto_main_t *cm = &crypto_main; + vnet_crypto_config_t *pc; u8 *path; char *p; u32 path_len; struct dirent *entry; DIR *dp; + uword *config_index; path = os_get_exec_path (); log_debug ("exec path is %s", path); @@ -628,6 +628,31 @@ vnet_crypto_load_engines (vlib_main_t *vm) continue; } + /* follow crypto-engines config section directive */ + config_index = hash_get_mem (cm->config_index_by_name, r->name); + if (config_index) + { + pc = vec_elt_at_index (cm->configs, config_index[0]); + if (pc->is_disabled) + { + log_notice ("crypto disabled: %s", r->name); + dlclose (handle); + continue; + } + if (cm->default_disabled && pc->is_enabled == 0) + { + log_notice ("crypto disabled (default): %s", r->name); + dlclose (handle); + continue; + } + } + else if (cm->default_disabled) + { + log_notice ("crypto disabled (default): %s", r->name); + dlclose (handle); + continue; + } + if (r->per_thread_data_sz) { u64 sz = diff --git a/src/vnet/crypto/crypto.h b/src/vnet/crypto/crypto.h index a4b6ab97620..a56f4c42c0b 100644 --- a/src/vnet/crypto/crypto.h +++ b/src/vnet/crypto/crypto.h @@ -326,7 +326,7 @@ typedef struct vnet_crypto_async_frame_elt_t elts[VNET_CRYPTO_FRAME_SIZE]; u32 buffer_indices[VNET_CRYPTO_FRAME_SIZE]; u16 next_node_index[VNET_CRYPTO_FRAME_SIZE]; - u32 enqueue_thread_index; + clib_thread_index_t enqueue_thread_index; } vnet_crypto_async_frame_t; typedef struct @@ -353,9 +353,9 @@ typedef void (vnet_crypto_key_fn_t) (vnet_crypto_key_op_t kop, /** async crypto function handlers **/ typedef int (vnet_crypto_frame_enq_fn_t) (vlib_main_t *vm, vnet_crypto_async_frame_t *frame); -typedef vnet_crypto_async_frame_t * - (vnet_crypto_frame_dequeue_t) (vlib_main_t * vm, u32 * nb_elts_processed, - u32 * enqueue_thread_idx); +typedef vnet_crypto_async_frame_t *( + vnet_crypto_frame_dequeue_t) (vlib_main_t *vm, u32 *nb_elts_processed, + clib_thread_index_t *enqueue_thread_idx); u32 vnet_crypto_register_engine (vlib_main_t * vm, char *name, int prio, @@ -420,16 +420,28 @@ typedef struct typedef struct { - vnet_crypto_alg_data_t algs[VNET_CRYPTO_N_ALGS]; + char *name; + u8 is_disabled; + u8 is_enabled; +} vnet_crypto_config_t; + +typedef struct +{ + vnet_crypto_key_t **keys; + u8 keys_lock; + u32 crypto_node_index; vnet_crypto_thread_t *threads; vnet_crypto_frame_dequeue_t **dequeue_handlers; - vnet_crypto_op_data_t opt_data[VNET_CRYPTO_N_OP_IDS]; vnet_crypto_engine_t *engines; - vnet_crypto_key_t **keys; + /* configs and hash by name */ + vnet_crypto_config_t *configs; + uword *config_index_by_name; uword *engine_index_by_name; uword *alg_index_by_name; vnet_crypto_async_next_node_t *next_nodes; - u32 crypto_node_index; + vnet_crypto_alg_data_t algs[VNET_CRYPTO_N_ALGS]; + vnet_crypto_op_data_t opt_data[VNET_CRYPTO_N_OP_IDS]; + u8 default_disabled; } vnet_crypto_main_t; extern vnet_crypto_main_t crypto_main; diff --git a/src/vnet/crypto/node.c b/src/vnet/crypto/node.c index 7d023f3ff9d..c0d258ae963 100644 --- a/src/vnet/crypto/node.c +++ b/src/vnet/crypto/node.c @@ -78,7 +78,7 @@ crypto_dequeue_frame (vlib_main_t * vm, vlib_node_runtime_t * node, { vnet_crypto_main_t *cm = &crypto_main; u32 n_elts = 0; - u32 enqueue_thread_idx = ~0; + clib_thread_index_t enqueue_thread_idx = CLIB_INVALID_THREAD_INDEX; vnet_crypto_async_frame_t *cf = (hdl) (vm, &n_elts, &enqueue_thread_idx); *n_total += n_elts; diff --git a/src/vnet/dev/bus/pci.c b/src/vnet/dev/bus/pci.c index 4bb8660f4b4..a8d374f9510 100644 --- a/src/vnet/dev/bus/pci.c +++ b/src/vnet/dev/bus/pci.c @@ -6,7 +6,7 @@ #include <vnet/dev/dev.h> #include <vnet/dev/bus/pci.h> #include <vnet/dev/log.h> -#include <vlib/unix/unix.h> +#include <vlib/file.h> VLIB_REGISTER_LOG_CLASS (dev_log, static) = { .class_name = "dev", @@ -318,7 +318,8 @@ vnet_dev_pci_msix_add_handler (vlib_main_t *vm, vnet_dev_t *dev, void vnet_dev_pci_msix_set_polling_thread (vlib_main_t *vm, vnet_dev_t *dev, - u16 line, u16 thread_index) + u16 line, + clib_thread_index_t thread_index) { vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev); u32 index; diff --git a/src/vnet/dev/dev.h b/src/vnet/dev/dev.h index f3f7563317e..ad2e793907f 100644 --- a/src/vnet/dev/dev.h +++ b/src/vnet/dev/dev.h @@ -711,7 +711,7 @@ void vnet_dev_poll_port_remove (vlib_main_t *, vnet_dev_port_t *, typedef struct { - u16 thread_index; + clib_thread_index_t thread_index; u8 completed; u8 in_order; vnet_dev_port_t *port; diff --git a/src/vnet/dev/runtime.c b/src/vnet/dev/runtime.c index 944c3ef32fa..7a6f39a6be6 100644 --- a/src/vnet/dev/runtime.c +++ b/src/vnet/dev/runtime.c @@ -55,7 +55,7 @@ static uword vnet_dev_rt_mgmt_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) { - u16 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; vnet_dev_rt_op_t *op, *ops = __atomic_load_n (&rt_ops, __ATOMIC_ACQUIRE); u32 n_pending = 0; uword rv = 0; diff --git a/src/vnet/devices/devices.h b/src/vnet/devices/devices.h index cadf1f857a6..5c904dffc13 100644 --- a/src/vnet/devices/devices.h +++ b/src/vnet/devices/devices.h @@ -81,7 +81,7 @@ vnet_get_aggregate_rx_packets (void) } static inline void -vnet_device_increment_rx_packets (u32 thread_index, u64 count) +vnet_device_increment_rx_packets (clib_thread_index_t thread_index, u64 count) { vnet_device_main_t *vdm = &vnet_device_main; vnet_device_per_worker_data_t *pwd; diff --git a/src/vnet/devices/virtio/node.c b/src/vnet/devices/virtio/node.c index 027e1ed4e74..730c1ff17d8 100644 --- a/src/vnet/devices/virtio/node.c +++ b/src/vnet/devices/virtio/node.c @@ -262,7 +262,7 @@ virtio_device_input_gso_inline (vlib_main_t *vm, vlib_node_runtime_t *node, int checksum_offload_enabled, int packed) { vnet_main_t *vnm = vnet_get_main (); - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; uword n_trace = vlib_get_trace_count (vm, node); u32 next_index; const int hdr_sz = vif->virtio_net_hdr_sz; diff --git a/src/vnet/devices/virtio/virtio.c b/src/vnet/devices/virtio/virtio.c index 682ec32ceff..3ac209aa571 100644 --- a/src/vnet/devices/virtio/virtio.c +++ b/src/vnet/devices/virtio/virtio.c @@ -29,7 +29,7 @@ #include <vlib/vlib.h> #include <vlib/pci/pci.h> -#include <vlib/unix/unix.h> +#include <vlib/file.h> #include <vnet/ethernet/ethernet.h> #include <vnet/ip/ip4_packet.h> #include <vnet/ip/ip6_packet.h> diff --git a/src/vnet/dpo/interface_rx_dpo.c b/src/vnet/dpo/interface_rx_dpo.c index 5a519d344c1..9f5cb6be059 100644 --- a/src/vnet/dpo/interface_rx_dpo.c +++ b/src/vnet/dpo/interface_rx_dpo.c @@ -242,7 +242,7 @@ interface_rx_dpo_inline (vlib_main_t * vm, u8 is_l2) { u32 n_left_from, next_index, * from, * to_next; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; vnet_interface_main_t *im; im = &vnet_get_main ()->interface_main; diff --git a/src/vnet/dpo/load_balance.c b/src/vnet/dpo/load_balance.c index 8f2a0de6ea8..f6f9392a42b 100644 --- a/src/vnet/dpo/load_balance.c +++ b/src/vnet/dpo/load_balance.c @@ -1030,6 +1030,7 @@ load_balance_module_init (void) * This should never be used, but just in case, stack it on a drop. */ lbi = load_balance_create(1, DPO_PROTO_IP4, 0); + ASSERT(0 == lbi); load_balance_set_bucket(lbi, 0, drop_dpo_get(DPO_PROTO_IP4)); load_balance_logger = @@ -1038,6 +1039,12 @@ load_balance_module_init (void) load_balance_map_module_init(); } +void +load_balance_pool_alloc (uword size) +{ + pool_alloc_aligned(load_balance_pool, size, CLIB_CACHE_LINE_BYTES); +} + static clib_error_t * load_balance_show (vlib_main_t * vm, unformat_input_t * input, diff --git a/src/vnet/dpo/load_balance.h b/src/vnet/dpo/load_balance.h index eee073f5892..76aa7982401 100644 --- a/src/vnet/dpo/load_balance.h +++ b/src/vnet/dpo/load_balance.h @@ -260,5 +260,6 @@ load_balance_get_bucket_i (const load_balance_t *lb, } extern void load_balance_module_init(void); +extern void load_balance_pool_alloc (uword size); #endif diff --git a/src/vnet/dpo/lookup_dpo.c b/src/vnet/dpo/lookup_dpo.c index 9ce94eebe5c..265f3d93023 100644 --- a/src/vnet/dpo/lookup_dpo.c +++ b/src/vnet/dpo/lookup_dpo.c @@ -290,7 +290,7 @@ lookup_dpo_ip4_inline (vlib_main_t * vm, int table_from_interface) { u32 n_left_from, next_index, * from, * to_next; - u32 thread_index = vlib_get_thread_index(); + clib_thread_index_t thread_index = vlib_get_thread_index (); vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters; from = vlib_frame_vector_args (from_frame); @@ -647,7 +647,7 @@ lookup_dpo_ip6_inline (vlib_main_t * vm, { vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters; u32 n_left_from, next_index, * from, * to_next; - u32 thread_index = vlib_get_thread_index(); + clib_thread_index_t thread_index = vlib_get_thread_index (); from = vlib_frame_vector_args (from_frame); n_left_from = from_frame->n_vectors; @@ -989,7 +989,7 @@ lookup_dpo_mpls_inline (vlib_main_t * vm, int table_from_interface) { u32 n_left_from, next_index, * from, * to_next; - u32 thread_index = vlib_get_thread_index(); + clib_thread_index_t thread_index = vlib_get_thread_index (); vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters; from = vlib_frame_vector_args (from_frame); diff --git a/src/vnet/dpo/replicate_dpo.c b/src/vnet/dpo/replicate_dpo.c index 0474fd82984..07254f5cf6a 100644 --- a/src/vnet/dpo/replicate_dpo.c +++ b/src/vnet/dpo/replicate_dpo.c @@ -742,7 +742,7 @@ replicate_inline (vlib_main_t * vm, vlib_combined_counter_main_t * cm = &replicate_main.repm_counters; replicate_main_t * rm = &replicate_main; u32 n_left_from, * from, * to_next, next_index; - u32 thread_index = vlib_get_thread_index(); + clib_thread_index_t thread_index = vlib_get_thread_index (); from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; diff --git a/src/vnet/ethernet/interface.c b/src/vnet/ethernet/interface.c index f1bb6b81070..39073d761ea 100644 --- a/src/vnet/ethernet/interface.c +++ b/src/vnet/ethernet/interface.c @@ -507,7 +507,7 @@ simulated_ethernet_interface_tx (vlib_main_t * vm, u32 n_left_from, *from; u32 next_index = 0; u32 n_bytes; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; vnet_main_t *vnm = vnet_get_main (); vnet_interface_main_t *im = &vnm->interface_main; l2_input_config_t *config; diff --git a/src/vnet/ethernet/node.c b/src/vnet/ethernet/node.c index 03cbdde1c2b..2d7f0913994 100644 --- a/src/vnet/ethernet/node.c +++ b/src/vnet/ethernet/node.c @@ -1218,7 +1218,7 @@ ethernet_input_inline (vlib_main_t * vm, vlib_node_runtime_t *error_node; u32 n_left_from, next_index, *to_next; u32 stats_sw_if_index, stats_n_packets, stats_n_bytes; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; u32 cached_sw_if_index = ~0; u32 cached_is_l2 = 0; /* shut up gcc */ vnet_hw_interface_t *hi = NULL; /* used for main interface only */ diff --git a/src/vnet/ethernet/p2p_ethernet_input.c b/src/vnet/ethernet/p2p_ethernet_input.c index 3d81e99cff2..140a3fd95d3 100644 --- a/src/vnet/ethernet/p2p_ethernet_input.c +++ b/src/vnet/ethernet/p2p_ethernet_input.c @@ -63,7 +63,7 @@ VLIB_NODE_FN (p2p_ethernet_input_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; u32 n_trace = vlib_get_trace_count (vm, node); u32 n_left_from, *from, *to_next; u32 next_index; diff --git a/src/vnet/feature/feature.api b/src/vnet/feature/feature.api index 7b52a6630cb..e3054b53b9d 100644 --- a/src/vnet/feature/feature.api +++ b/src/vnet/feature/feature.api @@ -38,6 +38,25 @@ autoreply define feature_enable_disable { string feature_name[64]; }; +autoendian define feature_is_enabled +{ + u32 client_index; + u32 context; + + string arc_name[64]; + string feature_name[64]; + vl_api_interface_index_t sw_if_index; +}; + +autoendian define feature_is_enabled_reply +{ + u32 client_index; + u32 context; + i32 retval; + + bool is_enabled; +}; + /* * Local Variables: * eval: (c-set-style "gnu") diff --git a/src/vnet/feature/feature_api.c b/src/vnet/feature/feature_api.c index a8c74277788..152e44bd288 100644 --- a/src/vnet/feature/feature_api.c +++ b/src/vnet/feature/feature_api.c @@ -74,6 +74,30 @@ vl_api_feature_enable_disable_t_handler (vl_api_feature_enable_disable_t * mp) REPLY_MACRO (VL_API_FEATURE_ENABLE_DISABLE_REPLY); } +static void +vl_api_feature_is_enabled_t_handler (vl_api_feature_is_enabled_t *mp) +{ + vl_api_feature_is_enabled_reply_t *rmp = NULL; + i32 rv = 0; + bool is_enabled = false; + + VALIDATE_SW_IF_INDEX_END (mp); + + u8 *arc_name = format (0, "%s%c", mp->arc_name, 0); + u8 *feature_name = format (0, "%s%c", mp->feature_name, 0); + + is_enabled = vnet_feature_is_enabled ( + (const char *) arc_name, (const char *) feature_name, mp->sw_if_index); + + vec_free (feature_name); + vec_free (arc_name); + + BAD_SW_IF_INDEX_LABEL; + + REPLY_MACRO2_END (VL_API_FEATURE_IS_ENABLED_REPLY, + ({ rmp->is_enabled = is_enabled; })); +} + #include <vnet/feature/feature.api.c> static clib_error_t * diff --git a/src/vnet/fib/fib_entry.c b/src/vnet/fib/fib_entry.c index adf880b8bbb..c86941fce9a 100644 --- a/src/vnet/fib/fib_entry.c +++ b/src/vnet/fib/fib_entry.c @@ -1772,6 +1772,12 @@ fib_entry_module_init (void) fib_entry_track_module_init(); } +void +fib_entry_pool_alloc (uword size) +{ + pool_alloc(fib_entry_pool, size); +} + fib_route_path_t * fib_entry_encode (fib_node_index_t fib_entry_index) { diff --git a/src/vnet/fib/fib_entry.h b/src/vnet/fib/fib_entry.h index 7331f803ec4..2c88d1e5f6a 100644 --- a/src/vnet/fib/fib_entry.h +++ b/src/vnet/fib/fib_entry.h @@ -480,6 +480,7 @@ extern void fib_entry_set_flow_hash_config(fib_node_index_t fib_entry_index, flow_hash_config_t hash_config); extern void fib_entry_module_init(void); +extern void fib_entry_pool_alloc(uword size); extern u32 fib_entry_get_stats_index(fib_node_index_t fib_entry_index); diff --git a/src/vnet/gso/node.c b/src/vnet/gso/node.c index c1d4459476e..c4f4b74cd92 100644 --- a/src/vnet/gso/node.c +++ b/src/vnet/gso/node.c @@ -471,7 +471,7 @@ drop_one_buffer_and_count (vlib_main_t * vm, vnet_main_t * vnm, vlib_node_runtime_t * node, u32 * pbi0, u32 sw_if_index, u32 drop_error_code) { - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; vlib_simple_counter_main_t *cm; cm = @@ -498,7 +498,7 @@ vnet_gso_node_inline (vlib_main_t * vm, u32 *from = vlib_frame_vector_args (frame); u32 n_left_from = frame->n_vectors; u32 *from_end = from + n_left_from; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; vnet_interface_main_t *im = &vnm->interface_main; vnet_interface_per_thread_data_t *ptd = vec_elt_at_index (im->per_thread_data, thread_index); diff --git a/src/vnet/interface.h b/src/vnet/interface.h index eb557fdef8b..81d7729700b 100644 --- a/src/vnet/interface.h +++ b/src/vnet/interface.h @@ -615,7 +615,7 @@ typedef struct u32 dev_instance; /* index of thread pollling this queue */ - u32 thread_index; + clib_thread_index_t thread_index; /* file index of queue interrupt line */ u32 file_index; diff --git a/src/vnet/interface/rx_queue.c b/src/vnet/interface/rx_queue.c index b1fc82f38e9..84068a05ea5 100644 --- a/src/vnet/interface/rx_queue.c +++ b/src/vnet/interface/rx_queue.c @@ -16,7 +16,7 @@ #include <vnet/vnet.h> #include <vnet/devices/devices.h> #include <vnet/interface/rx_queue_funcs.h> -#include <vlib/unix/unix.h> +#include <vlib/file.h> VLIB_REGISTER_LOG_CLASS (if_rxq_log, static) = { .class_name = "interface", @@ -27,7 +27,7 @@ VLIB_REGISTER_LOG_CLASS (if_rxq_log, static) = { #define log_err(fmt, ...) vlib_log_err (if_rxq_log.class, fmt, __VA_ARGS__) static u32 -next_thread_index (vnet_main_t *vnm, u32 thread_index) +next_thread_index (vnet_main_t *vnm, clib_thread_index_t thread_index) { vnet_device_main_t *vdm = &vnet_device_main; if (vdm->first_worker_thread_index == 0) @@ -62,7 +62,7 @@ vnet_hw_if_get_rx_queue_index_by_id (vnet_main_t *vnm, u32 hw_if_index, u32 vnet_hw_if_register_rx_queue (vnet_main_t *vnm, u32 hw_if_index, u32 queue_id, - u32 thread_index) + clib_thread_index_t thread_index) { vnet_interface_main_t *im = &vnm->interface_main; vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index); @@ -225,7 +225,7 @@ vnet_hw_if_get_rx_queue_mode (vnet_main_t *vnm, u32 queue_index) void vnet_hw_if_set_rx_queue_thread_index (vnet_main_t *vnm, u32 queue_index, - u32 thread_index) + clib_thread_index_t thread_index) { vnet_hw_if_rx_queue_t *rxq = vnet_hw_if_get_rx_queue (vnm, queue_index); vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, rxq->hw_if_index); diff --git a/src/vnet/interface/rx_queue_funcs.h b/src/vnet/interface/rx_queue_funcs.h index 906d7118296..4295463f4b9 100644 --- a/src/vnet/interface/rx_queue_funcs.h +++ b/src/vnet/interface/rx_queue_funcs.h @@ -20,7 +20,8 @@ u32 vnet_hw_if_get_rx_queue_index_by_id (vnet_main_t *vnm, u32 hw_if_index, u32 queue_id); u32 vnet_hw_if_register_rx_queue (vnet_main_t *vnm, u32 hw_if_index, - u32 queue_id, u32 thread_idnex); + u32 queue_id, + clib_thread_index_t thread_index); void vnet_hw_if_unregister_rx_queue (vnet_main_t *vnm, u32 queue_index); void vnet_hw_if_unregister_all_rx_queues (vnet_main_t *vnm, u32 hw_if_index); void vnet_hw_if_set_rx_queue_file_index (vnet_main_t *vnm, u32 queue_index, @@ -32,7 +33,7 @@ int vnet_hw_if_set_rx_queue_mode (vnet_main_t *vnm, u32 queue_index, vnet_hw_if_rx_mode vnet_hw_if_get_rx_queue_mode (vnet_main_t *vnm, u32 queue_index); void vnet_hw_if_set_rx_queue_thread_index (vnet_main_t *vnm, u32 queue_index, - u32 thread_index); + clib_thread_index_t thread_index); vnet_hw_if_rxq_poll_vector_t * vnet_hw_if_generate_rxq_int_poll_vector (vlib_main_t *vm, vlib_node_runtime_t *node); diff --git a/src/vnet/interface/tx_queue.c b/src/vnet/interface/tx_queue.c index 8a6cd9da304..6c7c938c576 100644 --- a/src/vnet/interface/tx_queue.c +++ b/src/vnet/interface/tx_queue.c @@ -107,7 +107,7 @@ vnet_hw_if_unregister_all_tx_queues (vnet_main_t *vnm, u32 hw_if_index) void vnet_hw_if_tx_queue_assign_thread (vnet_main_t *vnm, u32 queue_index, - u32 thread_index) + clib_thread_index_t thread_index) { vnet_hw_if_tx_queue_t *txq = vnet_hw_if_get_tx_queue (vnm, queue_index); vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, txq->hw_if_index); @@ -122,7 +122,7 @@ vnet_hw_if_tx_queue_assign_thread (vnet_main_t *vnm, u32 queue_index, void vnet_hw_if_tx_queue_unassign_thread (vnet_main_t *vnm, u32 queue_index, - u32 thread_index) + clib_thread_index_t thread_index) { vnet_hw_if_tx_queue_t *txq = vnet_hw_if_get_tx_queue (vnm, queue_index); vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, txq->hw_if_index); diff --git a/src/vnet/interface/tx_queue_funcs.h b/src/vnet/interface/tx_queue_funcs.h index 8fcf7c336a8..14792e0f023 100644 --- a/src/vnet/interface/tx_queue_funcs.h +++ b/src/vnet/interface/tx_queue_funcs.h @@ -13,9 +13,9 @@ u32 vnet_hw_if_register_tx_queue (vnet_main_t *vnm, u32 hw_if_index, void vnet_hw_if_unregister_tx_queue (vnet_main_t *vnm, u32 queue_index); void vnet_hw_if_unregister_all_tx_queues (vnet_main_t *vnm, u32 hw_if_index); void vnet_hw_if_tx_queue_assign_thread (vnet_main_t *vnm, u32 queue_index, - u32 thread_index); + clib_thread_index_t thread_index); void vnet_hw_if_tx_queue_unassign_thread (vnet_main_t *vnm, u32 queue_index, - u32 thread_index); + clib_thread_index_t thread_index); /* inline functions */ diff --git a/src/vnet/interface_api.c b/src/vnet/interface_api.c index 65f3a02c86b..d835a36f46d 100644 --- a/src/vnet/interface_api.c +++ b/src/vnet/interface_api.c @@ -1330,7 +1330,7 @@ vl_api_sw_interface_set_tx_placement_t_handler ( size = mp->array_size; for (u32 i = 0; i < size; i++) { - u32 thread_index = mp->threads[i]; + clib_thread_index_t thread_index = mp->threads[i]; bitmap = clib_bitmap_set (bitmap, thread_index, 1); } diff --git a/src/vnet/interface_cli.c b/src/vnet/interface_cli.c index cc214c10f6b..4d3c98f6aa5 100644 --- a/src/vnet/interface_cli.c +++ b/src/vnet/interface_cli.c @@ -1694,7 +1694,7 @@ VLIB_CLI_COMMAND (show_interface_rx_placement, static) = { }; clib_error_t * set_hw_interface_rx_placement (u32 hw_if_index, u32 queue_id, - u32 thread_index, u8 is_main) + clib_thread_index_t thread_index, u8 is_main) { vnet_main_t *vnm = vnet_get_main (); vnet_device_main_t *vdm = &vnet_device_main; @@ -1731,7 +1731,7 @@ set_interface_rx_placement (vlib_main_t *vm, unformat_input_t *input, vnet_main_t *vnm = vnet_get_main (); u32 hw_if_index = (u32) ~ 0; u32 queue_id = (u32) 0; - u32 thread_index = (u32) ~ 0; + clib_thread_index_t thread_index = CLIB_INVALID_THREAD_INDEX; u8 is_main = 0; if (!unformat_user (input, unformat_line_input, line_input)) @@ -1831,11 +1831,12 @@ set_hw_interface_tx_queue (u32 hw_if_index, u32 queue_id, uword *bitmap) vlib_thread_main_t *vtm = vlib_get_thread_main (); vnet_hw_if_tx_queue_t *txq; u32 queue_index; - u32 thread_index; + clib_thread_index_t thread_index; /* highest set bit in bitmap should not exceed last worker thread index */ thread_index = clib_bitmap_last_set (bitmap); - if ((thread_index != ~0) && (thread_index >= vtm->n_vlib_mains)) + if ((thread_index != CLIB_INVALID_THREAD_INDEX) && + (thread_index >= vtm->n_vlib_mains)) return VNET_API_ERROR_INVALID_VALUE; queue_index = diff --git a/src/vnet/interface_funcs.h b/src/vnet/interface_funcs.h index 511df4920e4..54e789679b0 100644 --- a/src/vnet/interface_funcs.h +++ b/src/vnet/interface_funcs.h @@ -426,7 +426,8 @@ clib_error_t *set_hw_interface_change_rx_mode (vnet_main_t * vnm, /* Set rx-placement on the interface */ clib_error_t *set_hw_interface_rx_placement (u32 hw_if_index, u32 queue_id, - u32 thread_index, u8 is_main); + clib_thread_index_t thread_index, + u8 is_main); /* Set tx-queue placement on the interface */ int set_hw_interface_tx_queue (u32 hw_if_index, u32 queue_id, uword *bitmap); diff --git a/src/vnet/interface_test.c b/src/vnet/interface_test.c index 2d0c0ee81d1..f2889fd45fc 100644 --- a/src/vnet/interface_test.c +++ b/src/vnet/interface_test.c @@ -911,7 +911,7 @@ vl_api_sw_interface_tx_placement_details_t_handler ( for (u32 i = 0; i < size; i++) { - u32 thread_index = ntohl (mp->threads[i]); + clib_thread_index_t thread_index = ntohl (mp->threads[i]); bitmap = clib_bitmap_set (bitmap, thread_index, 1); } diff --git a/src/vnet/ip-neighbor/ip4_neighbor.c b/src/vnet/ip-neighbor/ip4_neighbor.c index 61b9e768fe5..1d8d39ddcb8 100644 --- a/src/vnet/ip-neighbor/ip4_neighbor.c +++ b/src/vnet/ip-neighbor/ip4_neighbor.c @@ -56,7 +56,7 @@ VLIB_REGISTER_LOG_CLASS (ip4_neighbor_log, static) = { vlib_log_debug (ip4_neighbor_log.class, fmt, __VA_ARGS__) void -ip4_neighbor_probe_dst (u32 sw_if_index, u32 thread_index, +ip4_neighbor_probe_dst (u32 sw_if_index, clib_thread_index_t thread_index, const ip4_address_t *dst) { ip4_address_t src; @@ -74,7 +74,8 @@ ip4_neighbor_probe_dst (u32 sw_if_index, u32 thread_index, void ip4_neighbor_advertise (vlib_main_t *vm, vnet_main_t *vnm, u32 sw_if_index, - u32 thread_index, const ip4_address_t *addr) + clib_thread_index_t thread_index, + const ip4_address_t *addr) { vnet_hw_interface_t *hi = vnet_get_sup_hw_interface (vnm, sw_if_index); ip4_main_t *i4m = &ip4_main; @@ -142,7 +143,7 @@ ip4_arp_inline (vlib_main_t * vm, vnet_main_t *vnm = vnet_get_main (); u32 *from, *to_next_drop; uword n_left_from, n_left_to_next_drop, next_index; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; u64 seed; if (node->flags & VLIB_NODE_FLAG_TRACE) diff --git a/src/vnet/ip-neighbor/ip4_neighbor.h b/src/vnet/ip-neighbor/ip4_neighbor.h index 7941ebdbced..3327c525d68 100644 --- a/src/vnet/ip-neighbor/ip4_neighbor.h +++ b/src/vnet/ip-neighbor/ip4_neighbor.h @@ -20,10 +20,12 @@ #include <vnet/ethernet/arp_packet.h> #include <vnet/ip-neighbor/ip_neighbor_types.h> -extern void ip4_neighbor_probe_dst (u32 sw_if_index, u32 thread_index, +extern void ip4_neighbor_probe_dst (u32 sw_if_index, + clib_thread_index_t thread_index, const ip4_address_t *dst); extern void ip4_neighbor_advertise (vlib_main_t *vm, vnet_main_t *vnm, - u32 sw_if_index, u32 thread_index, + u32 sw_if_index, + clib_thread_index_t thread_index, const ip4_address_t *addr); always_inline vlib_buffer_t * diff --git a/src/vnet/ip-neighbor/ip6_neighbor.c b/src/vnet/ip-neighbor/ip6_neighbor.c index ca8aed3d4ca..79a4a30ff53 100644 --- a/src/vnet/ip-neighbor/ip6_neighbor.c +++ b/src/vnet/ip-neighbor/ip6_neighbor.c @@ -32,7 +32,7 @@ VLIB_REGISTER_LOG_CLASS (ip6_neighbor_log, static) = { #define log_debug(fmt, ...) \ vlib_log_debug (ip6_neighbor_log.class, fmt, __VA_ARGS__) void -ip6_neighbor_probe_dst (u32 sw_if_index, u32 thread_index, +ip6_neighbor_probe_dst (u32 sw_if_index, clib_thread_index_t thread_index, const ip6_address_t *dst) { ip6_address_t src; @@ -45,7 +45,8 @@ ip6_neighbor_probe_dst (u32 sw_if_index, u32 thread_index, void ip6_neighbor_advertise (vlib_main_t *vm, vnet_main_t *vnm, u32 sw_if_index, - u32 thread_index, const ip6_address_t *addr) + clib_thread_index_t thread_index, + const ip6_address_t *addr) { vnet_hw_interface_t *hi = vnet_get_sup_hw_interface (vnm, sw_if_index); ip6_main_t *i6m = &ip6_main; @@ -129,7 +130,7 @@ ip6_discover_neighbor_inline (vlib_main_t * vm, u32 *from, *to_next_drop; uword n_left_from, n_left_to_next_drop; u64 seed; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; if (node->flags & VLIB_NODE_FLAG_TRACE) ip6_forward_next_trace (vm, node, frame, VLIB_TX); diff --git a/src/vnet/ip-neighbor/ip6_neighbor.h b/src/vnet/ip-neighbor/ip6_neighbor.h index c6e718dc2ff..31dc1eab033 100644 --- a/src/vnet/ip-neighbor/ip6_neighbor.h +++ b/src/vnet/ip-neighbor/ip6_neighbor.h @@ -31,15 +31,17 @@ extern vlib_packet_template_t ip6_neighbor_packet_template; extern void ip6_neighbor_advertise (vlib_main_t *vm, vnet_main_t *vnm, - u32 sw_if_index, u32 thread_index, + u32 sw_if_index, + clib_thread_index_t thread_index, const ip6_address_t *addr); -extern void ip6_neighbor_probe_dst (u32 sw_if_index, u32 thread_index, +extern void ip6_neighbor_probe_dst (u32 sw_if_index, + clib_thread_index_t thread_index, const ip6_address_t *dst); always_inline vlib_buffer_t * ip6_neighbor_probe (vlib_main_t *vm, vnet_main_t *vnm, u32 sw_if_index, - u32 thread_index, const ip6_address_t *src, + clib_thread_index_t thread_index, const ip6_address_t *src, const ip6_address_t *dst) { icmp6_neighbor_solicitation_header_t *h0; diff --git a/src/vnet/ip-neighbor/ip_neighbor.c b/src/vnet/ip-neighbor/ip_neighbor.c index 614b78489cd..73fa0b30317 100644 --- a/src/vnet/ip-neighbor/ip_neighbor.c +++ b/src/vnet/ip-neighbor/ip_neighbor.c @@ -1092,7 +1092,7 @@ ip_neighbor_register (ip_address_family_t af, const ip_neighbor_vft_t * vft) } void -ip_neighbor_probe_dst (u32 sw_if_index, u32 thread_index, +ip_neighbor_probe_dst (u32 sw_if_index, clib_thread_index_t thread_index, ip_address_family_t af, const ip46_address_t *dst) { if (!vnet_sw_interface_is_admin_up (vnet_get_main (), sw_if_index)) diff --git a/src/vnet/ip-neighbor/ip_neighbor.h b/src/vnet/ip-neighbor/ip_neighbor.h index cc888ba2054..813c2bb1e2d 100644 --- a/src/vnet/ip-neighbor/ip_neighbor.h +++ b/src/vnet/ip-neighbor/ip_neighbor.h @@ -56,7 +56,8 @@ extern void ip_neighbor_learn (const ip_neighbor_learn_t * l); extern void ip_neighbor_update (vnet_main_t * vnm, adj_index_t ai); extern void ip_neighbor_probe (const ip_adjacency_t * adj); -extern void ip_neighbor_probe_dst (u32 sw_if_index, u32 thread_index, +extern void ip_neighbor_probe_dst (u32 sw_if_index, + clib_thread_index_t thread_index, ip_address_family_t af, const ip46_address_t *ip); diff --git a/src/vnet/ip/icmp4.c b/src/vnet/ip/icmp4.c index fa4a0e12276..acbe06bfc1d 100644 --- a/src/vnet/ip/icmp4.c +++ b/src/vnet/ip/icmp4.c @@ -251,7 +251,7 @@ ip4_icmp_error (vlib_main_t * vm, u32 *from, *to_next; uword n_left_from, n_left_to_next; ip4_icmp_error_next_t next_index; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; diff --git a/src/vnet/ip/icmp6.c b/src/vnet/ip/icmp6.c index f93ebce4bf1..b37554c3d78 100644 --- a/src/vnet/ip/icmp6.c +++ b/src/vnet/ip/icmp6.c @@ -292,7 +292,7 @@ ip6_icmp_error (vlib_main_t * vm, u32 *from, *to_next; uword n_left_from, n_left_to_next; ip6_icmp_error_next_t next_index; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c index 81d6cd1a0bd..cabefd81230 100644 --- a/src/vnet/ip/ip4_forward.c +++ b/src/vnet/ip/ip4_forward.c @@ -117,7 +117,7 @@ VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm, { vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters; u32 n_left, *from; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs; u16 nexts[VLIB_FRAME_SIZE], *next; @@ -2113,7 +2113,7 @@ ip4_rewrite_inline (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_node_get_runtime (vm, ip4_input_node.index); n_left_from = frame->n_vectors; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; vlib_get_buffers (vm, from, bufs, n_left_from); clib_memset_u16 (nexts, IP4_REWRITE_NEXT_DROP, n_left_from); diff --git a/src/vnet/ip/ip4_forward.h b/src/vnet/ip/ip4_forward.h index 54150d4dab4..8122d57e653 100644 --- a/src/vnet/ip/ip4_forward.h +++ b/src/vnet/ip/ip4_forward.h @@ -59,7 +59,7 @@ ip4_lookup_inline (vlib_main_t * vm, ip4_main_t *im = &ip4_main; vlib_combined_counter_main_t *cm = &load_balance_main.lbm_to_counters; u32 n_left, *from; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; vlib_buffer_t *bufs[VLIB_FRAME_SIZE]; vlib_buffer_t **b = bufs; u16 nexts[VLIB_FRAME_SIZE], *next; diff --git a/src/vnet/ip/ip4_input.c b/src/vnet/ip/ip4_input.c index af2b89ab2ec..28b9fb8b279 100644 --- a/src/vnet/ip/ip4_input.c +++ b/src/vnet/ip/ip4_input.c @@ -96,7 +96,7 @@ ip4_input_check_sw_if_index (vlib_main_t * vm, { ip4_main_t *im = &ip4_main; ip_lookup_main_t *lm = &im->lookup_main; - u32 thread_index; + clib_thread_index_t thread_index; if (*last_sw_if_index == sw_if_index) { (*cnt)++; @@ -125,7 +125,7 @@ ip4_input_inline (vlib_main_t * vm, { vnet_main_t *vnm = vnet_get_main (); u32 n_left_from, *from; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; vlib_node_runtime_t *error_node = vlib_node_get_runtime (vm, ip4_input_node.index); vlib_simple_counter_main_t *cm; diff --git a/src/vnet/ip/ip4_mtrie.c b/src/vnet/ip/ip4_mtrie.c index 00855f7db43..df70dc9edca 100644 --- a/src/vnet/ip/ip4_mtrie.c +++ b/src/vnet/ip/ip4_mtrie.c @@ -190,7 +190,7 @@ ip4_mtrie_8_init (ip4_mtrie_8_t *m) { ip4_mtrie_8_ply_t *root; - pool_get (ip4_ply_pool, root); + pool_get_aligned (ip4_ply_pool, root, CLIB_CACHE_LINE_BYTES); m->root_ply = root - ip4_ply_pool; ply_8_init (root, IP4_MTRIE_LEAF_EMPTY, 0, 0); @@ -853,13 +853,19 @@ ip4_mtrie_module_init (vlib_main_t * vm) clib_error_t *error = NULL; /* Burn one ply so index 0 is taken */ - pool_get (ip4_ply_pool, p); + pool_get_aligned (ip4_ply_pool, p, CLIB_CACHE_LINE_BYTES); return (error); } VLIB_INIT_FUNCTION (ip4_mtrie_module_init); +void +ip4_mtrie_pool_alloc (uword size) +{ + pool_alloc_aligned (ip4_ply_pool, size, CLIB_CACHE_LINE_BYTES); +} + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/vnet/ip/ip4_mtrie.h b/src/vnet/ip/ip4_mtrie.h index 16c524745be..2631f07eb2b 100644 --- a/src/vnet/ip/ip4_mtrie.h +++ b/src/vnet/ip/ip4_mtrie.h @@ -179,6 +179,11 @@ format_function_t format_ip4_mtrie_8; extern ip4_mtrie_8_ply_t *ip4_ply_pool; /** + * @brief Pre-allocate the pool of plys + */ +extern void ip4_mtrie_pool_alloc (uword size); + +/** * Is the leaf terminal (i.e. an LB index) or non-terminal (i.e. a PLY index) */ always_inline u32 diff --git a/src/vnet/ip/ip4_to_ip6.h b/src/vnet/ip/ip4_to_ip6.h index d356fd5411c..3c14a59f174 100644 --- a/src/vnet/ip/ip4_to_ip6.h +++ b/src/vnet/ip/ip4_to_ip6.h @@ -37,6 +37,20 @@ static u8 icmp_to_icmp6_updater_pointer_table[] = #define frag_id_4to6(id) (id) +always_inline u64 +icmp_type_is_error_message (u8 icmp_type) +{ + int bmp = 0; + bmp |= 1 << ICMP4_destination_unreachable; + bmp |= 1 << ICMP4_time_exceeded; + bmp |= 1 << ICMP4_parameter_problem; + bmp |= 1 << ICMP4_source_quench; + bmp |= 1 << ICMP4_redirect; + bmp |= 1 << ICMP4_alternate_host_address; + + return (1ULL << icmp_type) & bmp; +} + /** * @brief Get TCP/UDP port number or ICMP id from IPv4 packet. * @@ -70,9 +84,14 @@ ip4_get_port (ip4_header_t *ip, u8 sender) * - outer ICMP header length (2*sizeof (icmp46_header_t)) * - inner IP header length * - first 8 bytes of payload of original packet in case of ICMP error + * + * Also make sure we only attempt to parse payload as IP packet if it's + * an ICMP error. */ else if (clib_net_to_host_u16 (ip->length) >= - 2 * sizeof (ip4_header_t) + 2 * sizeof (icmp46_header_t) + 8) + 2 * sizeof (ip4_header_t) + 2 * sizeof (icmp46_header_t) + + 8 && + icmp_type_is_error_message (icmp->type)) { ip = (ip4_header_t *) (icmp + 2); if (PREDICT_TRUE ((ip->protocol == IP_PROTOCOL_TCP) || diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c index 31adc90ecab..3c1f40beff5 100644 --- a/src/vnet/ip/ip6_forward.c +++ b/src/vnet/ip/ip6_forward.c @@ -750,7 +750,7 @@ VLIB_NODE_FN (ip6_load_balance_node) (vlib_main_t * vm, { vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters; u32 n_left, *from; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; ip6_main_t *im = &ip6_main; vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs; u16 nexts[VLIB_FRAME_SIZE], *next; @@ -1781,7 +1781,7 @@ ip6_rewrite_inline_with_gso (vlib_main_t * vm, n_left_from = frame->n_vectors; next_index = node->cached_next_index; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; while (n_left_from > 0) { diff --git a/src/vnet/ip/ip6_forward.h b/src/vnet/ip/ip6_forward.h index 8e5dd256ceb..71b6cc9ae04 100644 --- a/src/vnet/ip/ip6_forward.h +++ b/src/vnet/ip/ip6_forward.h @@ -60,7 +60,7 @@ ip6_lookup_inline (vlib_main_t * vm, vlib_combined_counter_main_t *cm = &load_balance_main.lbm_to_counters; u32 n_left_from, n_left_to_next, *from, *to_next; ip_lookup_next_t next; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; diff --git a/src/vnet/ip/ip6_input.c b/src/vnet/ip/ip6_input.c index ae59b765d2e..a79a17ca64a 100644 --- a/src/vnet/ip/ip6_input.c +++ b/src/vnet/ip/ip6_input.c @@ -73,7 +73,7 @@ VLIB_NODE_FN (ip6_input_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_node_runtime_t *error_node = vlib_node_get_runtime (vm, ip6_input_node.index); vlib_simple_counter_main_t *cm; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; diff --git a/src/vnet/ip/ip6_to_ip4.h b/src/vnet/ip/ip6_to_ip4.h index ebabcd0b797..931d2da0fa3 100644 --- a/src/vnet/ip/ip6_to_ip4.h +++ b/src/vnet/ip/ip6_to_ip4.h @@ -168,7 +168,19 @@ ip6_get_port (vlib_main_t *vm, vlib_buffer_t *b, ip6_header_t *ip6, if (dst_port) *dst_port = ((u16 *) (icmp))[2]; } - else if (clib_net_to_host_u16 (ip6->payload_length) >= 64) + /* + * if there is enough data and ICMP type indicates ICMP error, then parse + * inner packet + * + * ICMP6 errors are: + * 1 - destination_unreachable + * 2 - packet_too_big + * 3 - time_exceeded + * 4 - parameter_problem + */ + else if (clib_net_to_host_u16 (ip6->payload_length) >= 64 && + icmp->type >= ICMP6_destination_unreachable && + icmp->type <= ICMP6_parameter_problem) { u16 ip6_pay_len; ip6_header_t *inner_ip6; diff --git a/src/vnet/ip/ip_init.c b/src/vnet/ip/ip_init.c index c2490f196ef..cfc3644a1bf 100644 --- a/src/vnet/ip/ip_init.c +++ b/src/vnet/ip/ip_init.c @@ -38,6 +38,9 @@ */ #include <vnet/ip/ip.h> +#include <vnet/ip/ip4_mtrie.h> +#include <vnet/fib/fib_entry.h> +#include <vnet/dpo/load_balance.h> ip_main_t ip_main; @@ -112,6 +115,39 @@ VLIB_INIT_FUNCTION (ip_main_init) = { "flow_classify_init"), }; +static clib_error_t * +ip_config_init (vlib_main_t *vm, unformat_input_t *input) +{ + uword lbsz = 0, fibentrysz = 0, mtriesz = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "load-balance-pool-size %U", unformat_memory_size, + &lbsz)) + ; + else if (unformat (input, "fib-entry-pool-size %U", unformat_memory_size, + &fibentrysz)) + ; + else if (unformat (input, "ip4-mtrie-pool-size %U", unformat_memory_size, + &mtriesz)) + ; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + + if (lbsz) + load_balance_pool_alloc (lbsz); + if (fibentrysz) + fib_entry_pool_alloc (fibentrysz); + if (mtriesz) + ip4_mtrie_pool_alloc (mtriesz); + + return 0; +} + +VLIB_CONFIG_FUNCTION (ip_config_init, "l3fib"); + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/vnet/ip/punt.c b/src/vnet/ip/punt.c index 3c46549634a..220a71ad5df 100644 --- a/src/vnet/ip/punt.c +++ b/src/vnet/ip/punt.c @@ -27,13 +27,12 @@ #include <vnet/udp/udp.h> #include <vnet/tcp/tcp.h> #include <vnet/ip/punt.h> -#include <vlib/unix/unix.h> +#include <vlib/file.h> #include <stdio.h> #include <unistd.h> #include <sys/socket.h> #include <sys/uio.h> -#include <stdlib.h> punt_main_t punt_main; diff --git a/src/vnet/ip/punt_node.c b/src/vnet/ip/punt_node.c index 6400e49c626..9898a663154 100644 --- a/src/vnet/ip/punt_node.c +++ b/src/vnet/ip/punt_node.c @@ -247,7 +247,7 @@ punt_socket_inline2 (vlib_main_t *vm, vlib_node_runtime_t *node, ip_address_family_t af, ip_protocol_t protocol) { u32 *buffers = vlib_frame_vector_args (frame); - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; uword n_packets = frame->n_vectors; punt_main_t *pm = &punt_main; int i; diff --git a/src/vnet/ip/reass/ip4_full_reass.c b/src/vnet/ip/reass/ip4_full_reass.c index bab7d479dcf..808acb03ab8 100644 --- a/src/vnet/ip/reass/ip4_full_reass.c +++ b/src/vnet/ip/reass/ip4_full_reass.c @@ -91,7 +91,7 @@ typedef union struct { u32 reass_index; - u32 memory_owner_thread_index; + clib_thread_index_t memory_owner_thread_index; }; u64 as_u64; } ip4_full_reass_val_t; @@ -147,10 +147,10 @@ typedef struct // number of fragments in this reassembly u32 fragments_n; // thread owning memory for this context (whose pool contains this ctx) - u32 memory_owner_thread_index; + clib_thread_index_t memory_owner_thread_index; // thread which received fragment with offset 0 and which sends out the // completed reassembly - u32 sendout_thread_index; + clib_thread_index_t sendout_thread_index; } ip4_full_reass_t; typedef struct @@ -246,8 +246,8 @@ typedef struct ip4_full_reass_range_trace_t trace_range; u32 size_diff; u32 op_id; - u32 thread_id; - u32 thread_id_to; + clib_thread_index_t thread_id; + clib_thread_index_t thread_id_to; u32 fragment_first; u32 fragment_last; u32 total_data_len; @@ -345,10 +345,10 @@ format_ip4_full_reass_trace (u8 * s, va_list * args) } static void -ip4_full_reass_add_trace (vlib_main_t * vm, vlib_node_runtime_t * node, - ip4_full_reass_t * reass, u32 bi, +ip4_full_reass_add_trace (vlib_main_t *vm, vlib_node_runtime_t *node, + ip4_full_reass_t *reass, u32 bi, ip4_full_reass_trace_operation_e action, - u32 size_diff, u32 thread_id_to) + u32 size_diff, clib_thread_index_t thread_id_to) { vlib_buffer_t *b = vlib_get_buffer (vm, bi); vnet_buffer_opaque_t *vnb = vnet_buffer (b); @@ -917,11 +917,12 @@ ip4_full_reass_remove_range_from_chain (vlib_main_t * vm, } always_inline ip4_full_reass_rc_t -ip4_full_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node, - ip4_full_reass_main_t * rm, - ip4_full_reass_per_thread_t * rt, - ip4_full_reass_t * reass, u32 * bi0, u32 * next0, - u32 * error0, bool is_custom, u32 * handoff_thread_idx) +ip4_full_reass_update (vlib_main_t *vm, vlib_node_runtime_t *node, + ip4_full_reass_main_t *rm, + ip4_full_reass_per_thread_t *rt, + ip4_full_reass_t *reass, u32 *bi0, u32 *next0, + u32 *error0, bool is_custom, + clib_thread_index_t *handoff_thread_idx) { vlib_buffer_t *fb = vlib_get_buffer (vm, *bi0); vnet_buffer_opaque_t *fvnb = vnet_buffer (fb); @@ -1256,7 +1257,7 @@ ip4_full_reass_inline (vlib_main_t *vm, vlib_node_runtime_t *node, } else if (reass) { - u32 handoff_thread_idx; + clib_thread_index_t handoff_thread_idx; u32 counter = ~0; switch (ip4_full_reass_update (vm, node, rm, rt, reass, &bi0, &next0, &error0, CUSTOM == type, diff --git a/src/vnet/ip/reass/ip4_sv_reass.c b/src/vnet/ip/reass/ip4_sv_reass.c index 50b4b22eb60..6d14526e6a7 100644 --- a/src/vnet/ip/reass/ip4_sv_reass.c +++ b/src/vnet/ip/reass/ip4_sv_reass.c @@ -65,7 +65,7 @@ typedef union struct { u32 reass_index; - u32 thread_index; + clib_thread_index_t thread_index; }; u64 as_u64; } ip4_sv_reass_val_t; @@ -1684,7 +1684,7 @@ static char *ip4_sv_reass_handoff_error_strings[] = { typedef struct { - u32 thread_index; + clib_thread_index_t thread_index; } ip4_sv_reass_handoff_trace_t; static u8 * diff --git a/src/vnet/ip/reass/ip6_sv_reass.c b/src/vnet/ip/reass/ip6_sv_reass.c index 69b27c5aa8e..b2934d99721 100644 --- a/src/vnet/ip/reass/ip6_sv_reass.c +++ b/src/vnet/ip/reass/ip6_sv_reass.c @@ -67,7 +67,7 @@ typedef union struct { u32 reass_index; - u32 thread_index; + clib_thread_index_t thread_index; }; u64 as_u64; } ip6_sv_reass_val_t; @@ -1399,7 +1399,7 @@ static char *ip6_sv_reassembly_handoff_error_strings[] = { typedef struct { - u32 thread_index; + clib_thread_index_t thread_index; } ip6_sv_reassembly_handoff_trace_t; static u8 * diff --git a/src/vnet/ipfix-export/flow_report.c b/src/vnet/ipfix-export/flow_report.c index 4eb93520ed8..7d94b4aa6e2 100644 --- a/src/vnet/ipfix-export/flow_report.c +++ b/src/vnet/ipfix-export/flow_report.c @@ -312,7 +312,7 @@ vnet_flow_rewrite_generic_callback (ipfix_exporter_t *exp, flow_report_t *fr, vlib_buffer_t * vnet_ipfix_exp_get_buffer (vlib_main_t *vm, ipfix_exporter_t *exp, - flow_report_t *fr, u32 thread_index) + flow_report_t *fr, clib_thread_index_t thread_index) { u32 bi0; vlib_buffer_t *b0; @@ -343,7 +343,8 @@ vnet_ipfix_exp_get_buffer (vlib_main_t *vm, ipfix_exporter_t *exp, void vnet_ipfix_exp_send_buffer (vlib_main_t *vm, ipfix_exporter_t *exp, flow_report_t *fr, flow_report_stream_t *stream, - u32 thread_index, vlib_buffer_t *b0) + clib_thread_index_t thread_index, + vlib_buffer_t *b0) { flow_report_main_t *frm = &flow_report_main; vlib_frame_t *f; diff --git a/src/vnet/ipfix-export/flow_report.h b/src/vnet/ipfix-export/flow_report.h index cd0cafb6158..1a94ce9b9a6 100644 --- a/src/vnet/ipfix-export/flow_report.h +++ b/src/vnet/ipfix-export/flow_report.h @@ -255,7 +255,8 @@ vnet_ipfix_exporter_lookup (const ip_address_t *ipfix_collector); */ vlib_buffer_t *vnet_ipfix_exp_get_buffer (vlib_main_t *vm, ipfix_exporter_t *exp, - flow_report_t *fr, u32 thread_index); + flow_report_t *fr, + clib_thread_index_t thread_index); /* * Send the provided buffer. At this stage the buffer should be populated @@ -265,7 +266,8 @@ vlib_buffer_t *vnet_ipfix_exp_get_buffer (vlib_main_t *vm, void vnet_ipfix_exp_send_buffer (vlib_main_t *vm, ipfix_exporter_t *exp, flow_report_t *fr, flow_report_stream_t *stream, - u32 thread_index, vlib_buffer_t *b0); + clib_thread_index_t thread_index, + vlib_buffer_t *b0); #endif /* __included_vnet_flow_report_h__ */ diff --git a/src/vnet/ipip/node.c b/src/vnet/ipip/node.c index a289cc885df..ae9317f446f 100644 --- a/src/vnet/ipip/node.c +++ b/src/vnet/ipip/node.c @@ -66,7 +66,7 @@ ipip_input (vlib_main_t * vm, vlib_node_runtime_t * node, ipip_main_t *gm = &ipip_main; u32 n_left_from, next_index, *from, *to_next, n_left_to_next; u32 tunnel_sw_if_index = ~0; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; u32 len; vnet_interface_main_t *im = &gm->vnet_main->interface_main; diff --git a/src/vnet/ipsec/ah.h b/src/vnet/ipsec/ah.h index 450c9cfd6dc..08842702e76 100644 --- a/src/vnet/ipsec/ah.h +++ b/src/vnet/ipsec/ah.h @@ -74,8 +74,8 @@ ah_decrypt_err_to_sa_err (u32 err) always_inline void ah_encrypt_set_next_index (vlib_buffer_t *b, vlib_node_runtime_t *node, - u32 thread_index, u32 err, u16 index, u16 *nexts, - u16 drop_next, u32 sa_index) + clib_thread_index_t thread_index, u32 err, + u16 index, u16 *nexts, u16 drop_next, u32 sa_index) { ipsec_set_next_index (b, node, thread_index, err, ah_encrypt_err_to_sa_err (err), index, nexts, @@ -84,8 +84,8 @@ ah_encrypt_set_next_index (vlib_buffer_t *b, vlib_node_runtime_t *node, always_inline void ah_decrypt_set_next_index (vlib_buffer_t *b, vlib_node_runtime_t *node, - u32 thread_index, u32 err, u16 index, u16 *nexts, - u16 drop_next, u32 sa_index) + clib_thread_index_t thread_index, u32 err, + u16 index, u16 *nexts, u16 drop_next, u32 sa_index) { ipsec_set_next_index (b, node, thread_index, err, ah_decrypt_err_to_sa_err (err), index, nexts, diff --git a/src/vnet/ipsec/ah_decrypt.c b/src/vnet/ipsec/ah_decrypt.c index ec4db0fed57..e95cc9e851e 100644 --- a/src/vnet/ipsec/ah_decrypt.c +++ b/src/vnet/ipsec/ah_decrypt.c @@ -118,7 +118,7 @@ ah_decrypt_inline (vlib_main_t * vm, int is_ip6) { u32 n_left, *from; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; u16 buffer_data_size = vlib_buffer_get_default_data_size (vm); ah_decrypt_packet_data_t pkt_data[VLIB_FRAME_SIZE], *pd = pkt_data; vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs; @@ -127,7 +127,7 @@ ah_decrypt_inline (vlib_main_t * vm, ipsec_per_thread_data_t *ptd = vec_elt_at_index (im->ptd, thread_index); from = vlib_frame_vector_args (from_frame); n_left = from_frame->n_vectors; - ipsec_sa_t *sa0 = 0; + ipsec_sa_inb_rt_t *irt = 0; bool anti_replay_result; u32 current_sa_index = ~0, current_sa_bytes = 0, current_sa_pkts = 0; @@ -149,25 +149,25 @@ ah_decrypt_inline (vlib_main_t * vm, current_sa_index, current_sa_pkts, current_sa_bytes); current_sa_index = vnet_buffer (b[0])->ipsec.sad_index; - sa0 = ipsec_sa_get (current_sa_index); + irt = ipsec_sa_get_inb_rt_by_index (current_sa_index); current_sa_bytes = current_sa_pkts = 0; vlib_prefetch_combined_counter (&ipsec_sa_counters, thread_index, current_sa_index); } - if (PREDICT_FALSE ((u16) ~0 == sa0->thread_index)) + if (PREDICT_FALSE ((u16) ~0 == irt->thread_index)) { /* this is the first packet to use this SA, claim the SA * for this thread. this could happen simultaneously on * another thread */ - clib_atomic_cmp_and_swap (&sa0->thread_index, ~0, + clib_atomic_cmp_and_swap (&irt->thread_index, ~0, ipsec_sa_assign_thread (thread_index)); } - if (PREDICT_TRUE (thread_index != sa0->thread_index)) + if (PREDICT_TRUE (thread_index != irt->thread_index)) { - vnet_buffer (b[0])->ipsec.thread_index = sa0->thread_index; + vnet_buffer (b[0])->ipsec.thread_index = irt->thread_index; next[0] = AH_DECRYPT_NEXT_HANDOFF; goto next; } @@ -202,16 +202,8 @@ ah_decrypt_inline (vlib_main_t * vm, pd->seq = clib_host_to_net_u32 (ah0->seq_no); /* anti-replay check */ - if (PREDICT_FALSE (ipsec_sa_is_set_ANTI_REPLAY_HUGE (sa0))) - { - anti_replay_result = ipsec_sa_anti_replay_and_sn_advance ( - sa0, pd->seq, ~0, false, &pd->seq_hi, true); - } - else - { - anti_replay_result = ipsec_sa_anti_replay_and_sn_advance ( - sa0, pd->seq, ~0, false, &pd->seq_hi, false); - } + anti_replay_result = ipsec_sa_anti_replay_and_sn_advance ( + irt, pd->seq, ~0, false, &pd->seq_hi); if (anti_replay_result) { ah_decrypt_set_next_index (b[0], node, vm->thread_index, @@ -223,13 +215,14 @@ ah_decrypt_inline (vlib_main_t * vm, current_sa_bytes += b[0]->current_length; current_sa_pkts += 1; - pd->icv_size = sa0->integ_icv_size; + pd->icv_size = irt->integ_icv_size; pd->nexthdr_cached = ah0->nexthdr; - if (PREDICT_TRUE (sa0->integ_alg != IPSEC_INTEG_ALG_NONE)) + if (PREDICT_TRUE (irt->integ_icv_size)) { - if (PREDICT_FALSE (ipsec_sa_is_set_USE_ESN (sa0) && - pd->current_data + b[0]->current_length - + sizeof (u32) > buffer_data_size)) + if (PREDICT_FALSE (irt->use_esn && pd->current_data + + b[0]->current_length + + sizeof (u32) > + buffer_data_size)) { ah_decrypt_set_next_index ( b[0], node, vm->thread_index, AH_DECRYPT_ERROR_NO_TAIL_SPACE, @@ -239,16 +232,16 @@ ah_decrypt_inline (vlib_main_t * vm, vnet_crypto_op_t *op; vec_add2_aligned (ptd->integ_ops, op, 1, CLIB_CACHE_LINE_BYTES); - vnet_crypto_op_init (op, sa0->integ_op_id); + vnet_crypto_op_init (op, irt->integ_op_id); op->src = (u8 *) ih4; op->len = b[0]->current_length; op->digest = (u8 *) ih4 - pd->icv_size; op->flags = VNET_CRYPTO_OP_FLAG_HMAC_CHECK; op->digest_len = pd->icv_size; - op->key_index = sa0->integ_key_index; + op->key_index = irt->integ_key_index; op->user_data = b - bufs; - if (ipsec_sa_is_set_USE_ESN (sa0)) + if (irt->use_esn) { u32 seq_hi = clib_host_to_net_u32 (pd->seq_hi); @@ -311,37 +304,21 @@ ah_decrypt_inline (vlib_main_t * vm, if (next[0] < AH_DECRYPT_N_NEXT) goto trace; - sa0 = ipsec_sa_get (pd->sa_index); + irt = ipsec_sa_get_inb_rt_by_index (pd->sa_index); - if (PREDICT_TRUE (sa0->integ_alg != IPSEC_INTEG_ALG_NONE)) + if (PREDICT_TRUE (irt->integ_icv_size)) { /* redo the anti-reply check. see esp_decrypt for details */ - if (PREDICT_FALSE (ipsec_sa_is_set_ANTI_REPLAY_HUGE (sa0))) + if (ipsec_sa_anti_replay_and_sn_advance (irt, pd->seq, pd->seq_hi, + true, NULL)) { - if (ipsec_sa_anti_replay_and_sn_advance ( - sa0, pd->seq, pd->seq_hi, true, NULL, true)) - { - ah_decrypt_set_next_index ( - b[0], node, vm->thread_index, AH_DECRYPT_ERROR_REPLAY, 0, - next, AH_DECRYPT_NEXT_DROP, pd->sa_index); - goto trace; - } - n_lost = ipsec_sa_anti_replay_advance ( - sa0, thread_index, pd->seq, pd->seq_hi, true); - } - else - { - if (ipsec_sa_anti_replay_and_sn_advance ( - sa0, pd->seq, pd->seq_hi, true, NULL, false)) - { - ah_decrypt_set_next_index ( - b[0], node, vm->thread_index, AH_DECRYPT_ERROR_REPLAY, 0, - next, AH_DECRYPT_NEXT_DROP, pd->sa_index); - goto trace; - } - n_lost = ipsec_sa_anti_replay_advance ( - sa0, thread_index, pd->seq, pd->seq_hi, false); + ah_decrypt_set_next_index (b[0], node, vm->thread_index, + AH_DECRYPT_ERROR_REPLAY, 0, next, + AH_DECRYPT_NEXT_DROP, pd->sa_index); + goto trace; } + n_lost = ipsec_sa_anti_replay_advance (irt, thread_index, pd->seq, + pd->seq_hi); vlib_prefetch_simple_counter ( &ipsec_sa_err_counters[IPSEC_SA_ERROR_LOST], thread_index, pd->sa_index); @@ -354,7 +331,7 @@ ah_decrypt_inline (vlib_main_t * vm, b[0]->flags &= ~(VNET_BUFFER_F_L4_CHECKSUM_COMPUTED | VNET_BUFFER_F_L4_CHECKSUM_CORRECT); - if (PREDICT_TRUE (ipsec_sa_is_set_IS_TUNNEL (sa0))) + if (PREDICT_TRUE (irt->is_tunnel)) { /* tunnel mode */ if (PREDICT_TRUE (pd->nexthdr_cached == IP_PROTOCOL_IP_IN_IP)) next[0] = AH_DECRYPT_NEXT_IP4_INPUT; @@ -424,10 +401,10 @@ ah_decrypt_inline (vlib_main_t * vm, trace: if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED)) { - sa0 = ipsec_sa_get (vnet_buffer (b[0])->ipsec.sad_index); + ipsec_sa_t *sa = ipsec_sa_get (vnet_buffer (b[0])->ipsec.sad_index); ah_decrypt_trace_t *tr = vlib_add_trace (vm, node, b[0], sizeof (*tr)); - tr->integ_alg = sa0->integ_alg; + tr->integ_alg = sa->integ_alg; tr->seq_num = pd->seq; } diff --git a/src/vnet/ipsec/ah_encrypt.c b/src/vnet/ipsec/ah_encrypt.c index 86694660878..1b32b8d2c7c 100644 --- a/src/vnet/ipsec/ah_encrypt.c +++ b/src/vnet/ipsec/ah_encrypt.c @@ -43,8 +43,7 @@ typedef struct { u32 sa_index; u32 spi; - u32 seq_lo; - u32 seq_hi; + u64 seq; ipsec_integ_alg_t integ_alg; } ah_encrypt_trace_t; @@ -56,9 +55,9 @@ format_ah_encrypt_trace (u8 * s, va_list * args) CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); ah_encrypt_trace_t *t = va_arg (*args, ah_encrypt_trace_t *); - s = format (s, "ah: sa-index %d spi %u (0x%08x) seq %u:%u integrity %U", - t->sa_index, t->spi, t->spi, t->seq_hi, t->seq_lo, - format_ipsec_integ_alg, t->integ_alg); + s = format (s, "ah: sa-index %d spi %u (0x%08x) seq %lu integrity %U", + t->sa_index, t->spi, t->spi, t->seq, format_ipsec_integ_alg, + t->integ_alg); return s; } @@ -128,7 +127,7 @@ ah_encrypt_inline (vlib_main_t * vm, vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs; u16 nexts[VLIB_FRAME_SIZE], *next = nexts; ipsec_per_thread_data_t *ptd = vec_elt_at_index (im->ptd, thread_index); - ipsec_sa_t *sa0 = 0; + ipsec_sa_outb_rt_t *ort = 0; ip4_and_ah_header_t *ih0, *oh0 = 0; ip6_and_ah_header_t *ih6_0, *oh6_0 = 0; u32 current_sa_index = ~0, current_sa_bytes = 0, current_sa_pkts = 0; @@ -158,7 +157,7 @@ ah_encrypt_inline (vlib_main_t * vm, current_sa_index, current_sa_pkts, current_sa_bytes); current_sa_index = vnet_buffer (b[0])->ipsec.sad_index; - sa0 = ipsec_sa_get (current_sa_index); + ort = ipsec_sa_get_outb_rt_by_index (current_sa_index); current_sa_bytes = current_sa_pkts = 0; vlib_prefetch_combined_counter (&ipsec_sa_counters, thread_index, @@ -168,23 +167,23 @@ ah_encrypt_inline (vlib_main_t * vm, pd->sa_index = current_sa_index; next[0] = AH_ENCRYPT_NEXT_DROP; - if (PREDICT_FALSE ((u16) ~0 == sa0->thread_index)) + if (PREDICT_FALSE ((u16) ~0 == ort->thread_index)) { /* this is the first packet to use this SA, claim the SA * for this thread. this could happen simultaneously on * another thread */ - clib_atomic_cmp_and_swap (&sa0->thread_index, ~0, + clib_atomic_cmp_and_swap (&ort->thread_index, ~0, ipsec_sa_assign_thread (thread_index)); } - if (PREDICT_TRUE (thread_index != sa0->thread_index)) + if (PREDICT_TRUE (thread_index != ort->thread_index)) { - vnet_buffer (b[0])->ipsec.thread_index = sa0->thread_index; + vnet_buffer (b[0])->ipsec.thread_index = ort->thread_index; next[0] = AH_ENCRYPT_NEXT_HANDOFF; goto next; } - if (PREDICT_FALSE (esp_seq_advance (sa0))) + if (PREDICT_FALSE (esp_seq_advance (ort))) { ah_encrypt_set_next_index (b[0], node, vm->thread_index, AH_ENCRYPT_ERROR_SEQ_CYCLED, 0, next, @@ -199,7 +198,7 @@ ah_encrypt_inline (vlib_main_t * vm, ssize_t adv; ih0 = vlib_buffer_get_current (b[0]); - if (PREDICT_TRUE (ipsec_sa_is_set_IS_TUNNEL (sa0))) + if (PREDICT_TRUE (ort->is_tunnel)) { if (is_ip6) adv = -sizeof (ip6_and_ah_header_t); @@ -211,11 +210,11 @@ ah_encrypt_inline (vlib_main_t * vm, adv = -sizeof (ah_header_t); } - icv_size = sa0->integ_icv_size; + icv_size = ort->integ_icv_size; const u8 padding_len = ah_calc_icv_padding_len (icv_size, is_ip6); adv -= padding_len; /* transport mode save the eth header before it is overwritten */ - if (PREDICT_FALSE (!ipsec_sa_is_set_IS_TUNNEL (sa0))) + if (PREDICT_FALSE (!ort->is_tunnel)) { const u32 l2_len = vnet_buffer (b[0])->ip.save_rewrite_length; u8 *l2_hdr_in = (u8 *) vlib_buffer_get_current (b[0]) - l2_len; @@ -238,16 +237,16 @@ ah_encrypt_inline (vlib_main_t * vm, oh6_0->ip6.ip_version_traffic_class_and_flow_label = ih6_0->ip6.ip_version_traffic_class_and_flow_label; - if (PREDICT_FALSE (ipsec_sa_is_set_IS_TUNNEL (sa0))) + if (PREDICT_FALSE (ort->is_tunnel)) { - ip6_set_dscp_network_order (&oh6_0->ip6, sa0->tunnel.t_dscp); - tunnel_encap_fixup_6o6 (sa0->tunnel_flags, &ih6_0->ip6, + ip6_set_dscp_network_order (&oh6_0->ip6, ort->t_dscp); + tunnel_encap_fixup_6o6 (ort->tunnel_flags, &ih6_0->ip6, &oh6_0->ip6); } pd->ip_version_traffic_class_and_flow_label = oh6_0->ip6.ip_version_traffic_class_and_flow_label; - if (PREDICT_TRUE (ipsec_sa_is_set_IS_TUNNEL (sa0))) + if (PREDICT_TRUE (ort->is_tunnel)) { next_hdr_type = IP_PROTOCOL_IPV6; } @@ -260,8 +259,8 @@ ah_encrypt_inline (vlib_main_t * vm, clib_memcpy_fast (&oh6_0->ip6, &ip6_hdr_template, 8); oh6_0->ah.reserved = 0; oh6_0->ah.nexthdr = next_hdr_type; - oh6_0->ah.spi = clib_net_to_host_u32 (sa0->spi); - oh6_0->ah.seq_no = clib_net_to_host_u32 (sa0->seq); + oh6_0->ah.spi = ort->spi_be; + oh6_0->ah.seq_no = clib_net_to_host_u32 (ort->seq64); oh6_0->ip6.payload_length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b[0]) - sizeof (ip6_header_t)); @@ -274,18 +273,18 @@ ah_encrypt_inline (vlib_main_t * vm, oh0 = vlib_buffer_get_current (b[0]); pd->ttl = ih0->ip4.ttl; - if (PREDICT_FALSE (ipsec_sa_is_set_IS_TUNNEL (sa0))) + if (PREDICT_FALSE (ort->is_tunnel)) { - if (sa0->tunnel.t_dscp) - pd->tos = sa0->tunnel.t_dscp << 2; + if (ort->t_dscp) + pd->tos = ort->t_dscp << 2; else { pd->tos = ih0->ip4.tos; - if (!(sa0->tunnel_flags & + if (!(ort->tunnel_flags & TUNNEL_ENCAP_DECAP_FLAG_ENCAP_COPY_DSCP)) pd->tos &= 0x3; - if (!(sa0->tunnel_flags & + if (!(ort->tunnel_flags & TUNNEL_ENCAP_DECAP_FLAG_ENCAP_COPY_ECN)) pd->tos &= 0xfc; } @@ -298,7 +297,7 @@ ah_encrypt_inline (vlib_main_t * vm, pd->current_data = b[0]->current_data; clib_memset (oh0, 0, sizeof (ip4_and_ah_header_t)); - if (PREDICT_TRUE (ipsec_sa_is_set_IS_TUNNEL (sa0))) + if (PREDICT_TRUE (ort->is_tunnel)) { next_hdr_type = IP_PROTOCOL_IP_IN_IP; } @@ -314,57 +313,51 @@ ah_encrypt_inline (vlib_main_t * vm, oh0->ip4.length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b[0])); - oh0->ah.spi = clib_net_to_host_u32 (sa0->spi); - oh0->ah.seq_no = clib_net_to_host_u32 (sa0->seq); + oh0->ah.spi = ort->spi_be; + oh0->ah.seq_no = clib_net_to_host_u32 (ort->seq64); oh0->ah.nexthdr = next_hdr_type; oh0->ah.hdrlen = (sizeof (ah_header_t) + icv_size + padding_len) / 4 - 2; } - if (PREDICT_TRUE (!is_ip6 && ipsec_sa_is_set_IS_TUNNEL (sa0) && - !ipsec_sa_is_set_IS_TUNNEL_V6 (sa0))) + if (PREDICT_TRUE (!is_ip6 && ort->is_tunnel && !ort->is_tunnel_v6)) { - clib_memcpy_fast (&oh0->ip4.address_pair, - &sa0->ip4_hdr.address_pair, + clib_memcpy_fast (&oh0->ip4.address_pair, &ort->ip4_hdr.address_pair, sizeof (ip4_address_pair_t)); - next[0] = sa0->dpo.dpoi_next_node; - vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = sa0->dpo.dpoi_index; + next[0] = ort->dpo.dpoi_next_node; + vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = ort->dpo.dpoi_index; } - else if (is_ip6 && ipsec_sa_is_set_IS_TUNNEL (sa0) && - ipsec_sa_is_set_IS_TUNNEL_V6 (sa0)) + else if (is_ip6 && ort->is_tunnel && ort->is_tunnel_v6) { - clib_memcpy_fast (&oh6_0->ip6.src_address, - &sa0->ip6_hdr.src_address, + clib_memcpy_fast (&oh6_0->ip6.src_address, &ort->ip6_hdr.src_address, sizeof (ip6_address_t) * 2); - next[0] = sa0->dpo.dpoi_next_node; - vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = sa0->dpo.dpoi_index; + next[0] = ort->dpo.dpoi_next_node; + vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = ort->dpo.dpoi_index; } - if (PREDICT_TRUE (sa0->integ_op_id)) + if (PREDICT_TRUE (ort->integ_op_id)) { vnet_crypto_op_t *op; vec_add2_aligned (ptd->integ_ops, op, 1, CLIB_CACHE_LINE_BYTES); - vnet_crypto_op_init (op, sa0->integ_op_id); + vnet_crypto_op_init (op, ort->integ_op_id); op->src = vlib_buffer_get_current (b[0]); op->len = b[0]->current_length; op->digest = vlib_buffer_get_current (b[0]) + ip_hdr_size + sizeof (ah_header_t); clib_memset (op->digest, 0, icv_size); op->digest_len = icv_size; - op->key_index = sa0->integ_key_index; + op->key_index = ort->integ_key_index; op->user_data = b - bufs; - if (ipsec_sa_is_set_USE_ESN (sa0)) + if (ort->use_esn) { - u32 seq_hi = clib_host_to_net_u32 (sa0->seq_hi); - - op->len += sizeof (seq_hi); - clib_memcpy (op->src + b[0]->current_length, &seq_hi, - sizeof (seq_hi)); + *(u32u *) (op->src + b[0]->current_length) = + clib_host_to_net_u32 (ort->seq64 >> 32); + op->len += sizeof (u32); } } - if (!ipsec_sa_is_set_IS_TUNNEL (sa0)) + if (!ort->is_tunnel) { next[0] = AH_ENCRYPT_NEXT_INTERFACE_OUTPUT; vlib_buffer_advance (b[0], -sizeof (ethernet_header_t)); @@ -373,13 +366,14 @@ ah_encrypt_inline (vlib_main_t * vm, next: if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED)) { - sa0 = ipsec_sa_get (pd->sa_index); + ipsec_sa_t *sa = ipsec_sa_get (pd->sa_index); + ipsec_sa_outb_rt_t *ort = + ipsec_sa_get_outb_rt_by_index (pd->sa_index); ah_encrypt_trace_t *tr = vlib_add_trace (vm, node, b[0], sizeof (*tr)); - tr->spi = sa0->spi; - tr->seq_lo = sa0->seq; - tr->seq_hi = sa0->seq_hi; - tr->integ_alg = sa0->integ_alg; + tr->spi = sa->spi; + tr->seq = ort->seq64; + tr->integ_alg = sa->integ_alg; tr->sa_index = pd->sa_index; } diff --git a/src/vnet/ipsec/esp.h b/src/vnet/ipsec/esp.h index 1c3ce776ad2..c855843f35d 100644 --- a/src/vnet/ipsec/esp.h +++ b/src/vnet/ipsec/esp.h @@ -79,46 +79,28 @@ typedef struct esp_aead_t_ u32 data[3]; } __clib_packed esp_aead_t; -#define ESP_SEQ_MAX (4294967295UL) - u8 *format_esp_header (u8 * s, va_list * args); /* TODO seq increment should be atomic to be accessed by multiple workers */ always_inline int -esp_seq_advance (ipsec_sa_t * sa) +esp_seq_advance (ipsec_sa_outb_rt_t *ort) { - if (PREDICT_TRUE (ipsec_sa_is_set_USE_ESN (sa))) - { - if (PREDICT_FALSE (sa->seq == ESP_SEQ_MAX)) - { - if (PREDICT_FALSE (ipsec_sa_is_set_USE_ANTI_REPLAY (sa) && - sa->seq_hi == ESP_SEQ_MAX)) - return 1; - sa->seq_hi++; - } - sa->seq++; - } - else - { - if (PREDICT_FALSE (ipsec_sa_is_set_USE_ANTI_REPLAY (sa) && - sa->seq == ESP_SEQ_MAX)) - return 1; - sa->seq++; - } - + u64 max = ort->use_esn ? CLIB_U64_MAX : CLIB_U32_MAX; + if (ort->seq64 == max) + return 1; + ort->seq64++; return 0; } always_inline u16 -esp_aad_fill (u8 *data, const esp_header_t *esp, const ipsec_sa_t *sa, - u32 seq_hi) +esp_aad_fill (u8 *data, const esp_header_t *esp, int use_esn, u32 seq_hi) { esp_aead_t *aad; aad = (esp_aead_t *) data; aad->data[0] = esp->spi; - if (ipsec_sa_is_set_USE_ESN (sa)) + if (use_esn) { /* SPI, seq-hi, seq-low */ aad->data[1] = (u32) clib_host_to_net_u32 (seq_hi); @@ -187,8 +169,8 @@ esp_decrypt_err_to_sa_err (u32 err) always_inline void esp_encrypt_set_next_index (vlib_buffer_t *b, vlib_node_runtime_t *node, - u32 thread_index, u32 err, u16 index, u16 *nexts, - u16 drop_next, u32 sa_index) + clib_thread_index_t thread_index, u32 err, + u16 index, u16 *nexts, u16 drop_next, u32 sa_index) { ipsec_set_next_index (b, node, thread_index, err, esp_encrypt_err_to_sa_err (err), index, nexts, @@ -197,8 +179,8 @@ esp_encrypt_set_next_index (vlib_buffer_t *b, vlib_node_runtime_t *node, always_inline void esp_decrypt_set_next_index (vlib_buffer_t *b, vlib_node_runtime_t *node, - u32 thread_index, u32 err, u16 index, u16 *nexts, - u16 drop_next, u32 sa_index) + clib_thread_index_t thread_index, u32 err, + u16 index, u16 *nexts, u16 drop_next, u32 sa_index) { ipsec_set_next_index (b, node, thread_index, err, esp_decrypt_err_to_sa_err (err), index, nexts, @@ -218,7 +200,8 @@ typedef struct { u8 icv_sz; u8 iv_sz; - ipsec_sa_flags_t flags; + u8 udp_sz; + u8 is_transport; u32 sa_index; }; u64 sa_data; diff --git a/src/vnet/ipsec/esp_decrypt.c b/src/vnet/ipsec/esp_decrypt.c index 6384bb927a8..7f7cd57488d 100644 --- a/src/vnet/ipsec/esp_decrypt.c +++ b/src/vnet/ipsec/esp_decrypt.c @@ -60,8 +60,7 @@ typedef enum typedef struct { u32 seq; - u32 sa_seq; - u32 sa_seq_hi; + u64 sa_seq64; u32 pkt_seq_hi; ipsec_crypto_alg_t crypto_alg; ipsec_integ_alg_t integ_alg; @@ -81,10 +80,10 @@ format_esp_decrypt_trace (u8 * s, va_list * args) esp_decrypt_trace_t *t = va_arg (*args, esp_decrypt_trace_t *); s = format (s, - "esp: crypto %U integrity %U pkt-seq %d sa-seq %u sa-seq-hi %u " + "esp: crypto %U integrity %U pkt-seq %d sa-seq %lu " "pkt-seq-hi %u", format_ipsec_crypto_alg, t->crypto_alg, format_ipsec_integ_alg, - t->integ_alg, t->seq, t->sa_seq, t->sa_seq_hi, t->pkt_seq_hi); + t->integ_alg, t->seq, t->sa_seq64, t->pkt_seq_hi); return s; } @@ -251,11 +250,12 @@ esp_move_icv (vlib_main_t * vm, vlib_buffer_t * first, } static_always_inline u16 -esp_insert_esn (vlib_main_t *vm, ipsec_sa_t *sa, esp_decrypt_packet_data_t *pd, - esp_decrypt_packet_data2_t *pd2, u32 *data_len, u8 **digest, - u16 *len, vlib_buffer_t *b, u8 *payload) +esp_insert_esn (vlib_main_t *vm, ipsec_sa_inb_rt_t *irt, + esp_decrypt_packet_data_t *pd, esp_decrypt_packet_data2_t *pd2, + u32 *data_len, u8 **digest, u16 *len, vlib_buffer_t *b, + u8 *payload) { - if (!ipsec_sa_is_set_USE_ESN (sa)) + if (!irt->use_esn) return 0; /* shift ICV by 4 bytes to insert ESN */ u32 seq_hi = clib_host_to_net_u32 (pd->seq_hi); @@ -288,17 +288,17 @@ esp_insert_esn (vlib_main_t *vm, ipsec_sa_t *sa, esp_decrypt_packet_data_t *pd, } static_always_inline u8 * -esp_move_icv_esn (vlib_main_t * vm, vlib_buffer_t * first, - esp_decrypt_packet_data_t * pd, - esp_decrypt_packet_data2_t * pd2, u16 icv_sz, - ipsec_sa_t * sa, u8 * extra_esn, u32 * len) +esp_move_icv_esn (vlib_main_t *vm, vlib_buffer_t *first, + esp_decrypt_packet_data_t *pd, + esp_decrypt_packet_data2_t *pd2, u16 icv_sz, + ipsec_sa_inb_rt_t *irt, u8 *extra_esn, u32 *len) { u16 dif = 0; u8 *digest = esp_move_icv (vm, first, pd, pd2, icv_sz, &dif); if (dif) *len -= dif; - if (ipsec_sa_is_set_USE_ESN (sa)) + if (irt->use_esn) { u32 seq_hi = clib_host_to_net_u32 (pd->seq_hi); u16 space_left = vlib_buffer_space_left_at_end (vm, pd2->lb); @@ -326,9 +326,9 @@ esp_move_icv_esn (vlib_main_t * vm, vlib_buffer_t * first, static_always_inline int esp_decrypt_chain_integ (vlib_main_t *vm, ipsec_per_thread_data_t *ptd, const esp_decrypt_packet_data_t *pd, - esp_decrypt_packet_data2_t *pd2, ipsec_sa_t *sa0, - vlib_buffer_t *b, u8 icv_sz, u8 *start_src, - u32 start_len, u8 **digest, u16 *n_ch, + esp_decrypt_packet_data2_t *pd2, + ipsec_sa_inb_rt_t *irt, vlib_buffer_t *b, u8 icv_sz, + u8 *start_src, u32 start_len, u8 **digest, u16 *n_ch, u32 *integ_total_len) { vnet_crypto_op_chunk_t *ch; @@ -350,7 +350,7 @@ esp_decrypt_chain_integ (vlib_main_t *vm, ipsec_per_thread_data_t *ptd, ch->len = cb->current_length; else ch->len = cb->current_length - icv_sz; - if (ipsec_sa_is_set_USE_ESN (sa0)) + if (irt->use_esn) { u32 seq_hi = clib_host_to_net_u32 (pd->seq_hi); u8 tmp[ESP_MAX_ICV_SIZE]; @@ -422,11 +422,11 @@ esp_decrypt_chain_integ (vlib_main_t *vm, ipsec_per_thread_data_t *ptd, } static_always_inline u32 -esp_decrypt_chain_crypto (vlib_main_t * vm, ipsec_per_thread_data_t * ptd, - esp_decrypt_packet_data_t * pd, - esp_decrypt_packet_data2_t * pd2, - ipsec_sa_t * sa0, vlib_buffer_t * b, u8 icv_sz, - u8 * start, u32 start_len, u8 ** tag, u16 * n_ch) +esp_decrypt_chain_crypto (vlib_main_t *vm, ipsec_per_thread_data_t *ptd, + esp_decrypt_packet_data_t *pd, + esp_decrypt_packet_data2_t *pd2, + ipsec_sa_inb_rt_t *irt, vlib_buffer_t *b, u8 icv_sz, + u8 *start, u32 start_len, u8 **tag, u16 *n_ch) { vnet_crypto_op_chunk_t *ch; vlib_buffer_t *cb = b; @@ -445,7 +445,7 @@ esp_decrypt_chain_crypto (vlib_main_t * vm, ipsec_per_thread_data_t * ptd, ch->src = ch->dst = vlib_buffer_get_current (cb); if (pd2->lb == cb) { - if (ipsec_sa_is_set_IS_AEAD (sa0)) + if (irt->is_aead) { if (pd2->lb->current_length < icv_sz) { @@ -496,8 +496,9 @@ esp_decrypt_chain_crypto (vlib_main_t * vm, ipsec_per_thread_data_t * ptd, static_always_inline esp_decrypt_error_t esp_decrypt_prepare_sync_op (vlib_main_t *vm, ipsec_per_thread_data_t *ptd, - ipsec_sa_t *sa0, u8 *payload, u16 len, u8 icv_sz, - u8 iv_sz, esp_decrypt_packet_data_t *pd, + ipsec_sa_inb_rt_t *irt, u8 *payload, u16 len, + u8 icv_sz, u8 iv_sz, + esp_decrypt_packet_data_t *pd, esp_decrypt_packet_data2_t *pd2, vlib_buffer_t *b, u32 index) { @@ -506,10 +507,10 @@ esp_decrypt_prepare_sync_op (vlib_main_t *vm, ipsec_per_thread_data_t *ptd, vnet_crypto_op_t _op, *op = &_op; const u8 esp_sz = sizeof (esp_header_t); - if (PREDICT_TRUE (sa0->integ_op_id != VNET_CRYPTO_OP_NONE)) + if (PREDICT_TRUE (irt->integ_op_id != VNET_CRYPTO_OP_NONE)) { - vnet_crypto_op_init (op, sa0->integ_op_id); - op->key_index = sa0->integ_key_index; + vnet_crypto_op_init (op, irt->integ_op_id); + op->key_index = irt->integ_key_index; op->src = payload; op->flags = VNET_CRYPTO_OP_FLAG_HMAC_CHECK; op->user_data = index; @@ -531,9 +532,8 @@ esp_decrypt_prepare_sync_op (vlib_main_t *vm, ipsec_per_thread_data_t *ptd, if (pd2->lb->current_length < icv_sz) { u8 extra_esn = 0; - op->digest = - esp_move_icv_esn (vm, b, pd, pd2, icv_sz, sa0, - &extra_esn, &op->len); + op->digest = esp_move_icv_esn (vm, b, pd, pd2, icv_sz, irt, + &extra_esn, &op->len); if (extra_esn) { @@ -558,7 +558,7 @@ esp_decrypt_prepare_sync_op (vlib_main_t *vm, ipsec_per_thread_data_t *ptd, op->flags |= VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS; op->chunk_index = vec_len (ptd->chunks); - if (esp_decrypt_chain_integ (vm, ptd, pd, pd2, sa0, b, icv_sz, + if (esp_decrypt_chain_integ (vm, ptd, pd, pd2, irt, b, icv_sz, payload, pd->current_length, &op->digest, &op->n_chunks, 0) < 0) return ESP_DECRYPT_ERROR_NO_BUFFERS; @@ -566,7 +566,7 @@ esp_decrypt_prepare_sync_op (vlib_main_t *vm, ipsec_per_thread_data_t *ptd, else { integ_ops = &ptd->integ_ops; - esp_insert_esn (vm, sa0, pd, pd2, &op->len, &op->digest, &len, b, + esp_insert_esn (vm, irt, pd, pd2, &op->len, &op->digest, &len, b, payload); } out: @@ -576,27 +576,28 @@ esp_decrypt_prepare_sync_op (vlib_main_t *vm, ipsec_per_thread_data_t *ptd, payload += esp_sz; len -= esp_sz; - if (sa0->crypto_dec_op_id != VNET_CRYPTO_OP_NONE) + if (irt->cipher_op_id != VNET_CRYPTO_OP_NONE) { - vnet_crypto_op_init (op, sa0->crypto_dec_op_id); - op->key_index = sa0->crypto_key_index; + vnet_crypto_op_init (op, irt->cipher_op_id); + op->key_index = irt->cipher_key_index; op->iv = payload; - if (ipsec_sa_is_set_IS_CTR (sa0)) + if (irt->is_ctr) { /* construct nonce in a scratch space in front of the IP header */ esp_ctr_nonce_t *nonce = (esp_ctr_nonce_t *) (payload - esp_sz - pd->hdr_sz - sizeof (*nonce)); - if (ipsec_sa_is_set_IS_AEAD (sa0)) + if (irt->is_aead) { /* constuct aad in a scratch space in front of the nonce */ esp_header_t *esp0 = (esp_header_t *) (payload - esp_sz); op->aad = (u8 *) nonce - sizeof (esp_aead_t); - op->aad_len = esp_aad_fill (op->aad, esp0, sa0, pd->seq_hi); + op->aad_len = + esp_aad_fill (op->aad, esp0, irt->use_esn, pd->seq_hi); op->tag = payload + len; op->tag_len = 16; - if (PREDICT_FALSE (ipsec_sa_is_set_IS_NULL_GMAC (sa0))) + if (PREDICT_FALSE (irt->is_null_gmac)) { /* RFC-4543 ENCR_NULL_AUTH_AES_GMAC: IV is part of AAD */ payload -= iv_sz; @@ -607,7 +608,7 @@ esp_decrypt_prepare_sync_op (vlib_main_t *vm, ipsec_per_thread_data_t *ptd, { nonce->ctr = clib_host_to_net_u32 (1); } - nonce->salt = sa0->salt; + nonce->salt = irt->salt; ASSERT (sizeof (u64) == iv_sz); nonce->iv = *(u64 *) op->iv; op->iv = (u8 *) nonce; @@ -621,9 +622,9 @@ esp_decrypt_prepare_sync_op (vlib_main_t *vm, ipsec_per_thread_data_t *ptd, /* buffer is chained */ op->flags |= VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS; op->chunk_index = vec_len (ptd->chunks); - esp_decrypt_chain_crypto (vm, ptd, pd, pd2, sa0, b, icv_sz, - payload, len - pd->iv_sz + pd->icv_sz, - &op->tag, &op->n_chunks); + esp_decrypt_chain_crypto (vm, ptd, pd, pd2, irt, b, icv_sz, payload, + len - pd->iv_sz + pd->icv_sz, &op->tag, + &op->n_chunks); crypto_ops = &ptd->chained_crypto_ops; } else @@ -639,8 +640,9 @@ esp_decrypt_prepare_sync_op (vlib_main_t *vm, ipsec_per_thread_data_t *ptd, static_always_inline esp_decrypt_error_t esp_decrypt_prepare_async_frame (vlib_main_t *vm, ipsec_per_thread_data_t *ptd, - vnet_crypto_async_frame_t *f, ipsec_sa_t *sa0, - u8 *payload, u16 len, u8 icv_sz, u8 iv_sz, + vnet_crypto_async_frame_t *f, + ipsec_sa_inb_rt_t *irt, u8 *payload, u16 len, + u8 icv_sz, u8 iv_sz, esp_decrypt_packet_data_t *pd, esp_decrypt_packet_data2_t *pd2, u32 bi, vlib_buffer_t *b, u16 async_next) @@ -649,17 +651,17 @@ esp_decrypt_prepare_async_frame (vlib_main_t *vm, ipsec_per_thread_data_t *ptd, esp_decrypt_packet_data_t *async_pd = &(esp_post_data (b))->decrypt_data; esp_decrypt_packet_data2_t *async_pd2 = esp_post_data2 (b); u8 *tag = payload + len, *iv = payload + esp_sz, *aad = 0; - const u32 key_index = sa0->crypto_key_index; + const u32 key_index = irt->cipher_key_index; u32 crypto_len, integ_len = 0; i16 crypto_start_offset, integ_start_offset = 0; u8 flags = 0; - if (!ipsec_sa_is_set_IS_AEAD (sa0)) + if (!irt->is_aead) { /* linked algs */ integ_start_offset = payload - b->data; integ_len = len; - if (PREDICT_TRUE (sa0->integ_op_id != VNET_CRYPTO_OP_NONE)) + if (PREDICT_TRUE (irt->integ_op_id != VNET_CRYPTO_OP_NONE)) flags |= VNET_CRYPTO_OP_FLAG_HMAC_CHECK; if (pd->is_chain) @@ -674,8 +676,8 @@ esp_decrypt_prepare_async_frame (vlib_main_t *vm, ipsec_per_thread_data_t *ptd, if (pd2->lb->current_length < icv_sz) { u8 extra_esn = 0; - tag = esp_move_icv_esn (vm, b, pd, pd2, icv_sz, sa0, - &extra_esn, &integ_len); + tag = esp_move_icv_esn (vm, b, pd, pd2, icv_sz, irt, &extra_esn, + &integ_len); if (extra_esn) { @@ -698,7 +700,7 @@ esp_decrypt_prepare_async_frame (vlib_main_t *vm, ipsec_per_thread_data_t *ptd, tag = vlib_buffer_get_tail (pd2->lb) - icv_sz; flags |= VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS; - if (esp_decrypt_chain_integ (vm, ptd, pd, pd2, sa0, b, icv_sz, + if (esp_decrypt_chain_integ (vm, ptd, pd, pd2, irt, b, icv_sz, payload, pd->current_length, &tag, 0, &integ_len) < 0) { @@ -707,7 +709,7 @@ esp_decrypt_prepare_async_frame (vlib_main_t *vm, ipsec_per_thread_data_t *ptd, } } else - esp_insert_esn (vm, sa0, pd, pd2, &integ_len, &tag, &len, b, payload); + esp_insert_esn (vm, irt, pd, pd2, &integ_len, &tag, &len, b, payload); } out: @@ -716,19 +718,19 @@ out: len -= esp_sz; iv = payload; - if (ipsec_sa_is_set_IS_CTR (sa0)) + if (irt->is_ctr) { /* construct nonce in a scratch space in front of the IP header */ esp_ctr_nonce_t *nonce = (esp_ctr_nonce_t *) (payload - esp_sz - pd->hdr_sz - sizeof (*nonce)); - if (ipsec_sa_is_set_IS_AEAD (sa0)) + if (irt->is_aead) { /* constuct aad in a scratch space in front of the nonce */ esp_header_t *esp0 = (esp_header_t *) (payload - esp_sz); aad = (u8 *) nonce - sizeof (esp_aead_t); - esp_aad_fill (aad, esp0, sa0, pd->seq_hi); + esp_aad_fill (aad, esp0, irt->use_esn, pd->seq_hi); tag = payload + len; - if (PREDICT_FALSE (ipsec_sa_is_set_IS_NULL_GMAC (sa0))) + if (PREDICT_FALSE (irt->is_null_gmac)) { /* RFC-4543 ENCR_NULL_AUTH_AES_GMAC: IV is part of AAD */ payload -= iv_sz; @@ -739,7 +741,7 @@ out: { nonce->ctr = clib_host_to_net_u32 (1); } - nonce->salt = sa0->salt; + nonce->salt = irt->salt; ASSERT (sizeof (u64) == iv_sz); nonce->iv = *(u64 *) iv; iv = (u8 *) nonce; @@ -753,10 +755,9 @@ out: /* buffer is chained */ flags |= VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS; - crypto_len = esp_decrypt_chain_crypto (vm, ptd, pd, pd2, sa0, b, icv_sz, - payload, - len - pd->iv_sz + pd->icv_sz, - &tag, 0); + crypto_len = + esp_decrypt_chain_crypto (vm, ptd, pd, pd2, irt, b, icv_sz, payload, + len - pd->iv_sz + pd->icv_sz, &tag, 0); } *async_pd = *pd; @@ -779,10 +780,9 @@ esp_decrypt_post_crypto (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_buffer_t *b, u16 *next, int is_ip6, int is_tun, int is_async) { - ipsec_sa_t *sa0 = ipsec_sa_get (pd->sa_index); + ipsec_sa_inb_rt_t *irt = ipsec_sa_get_inb_rt_by_index (pd->sa_index); vlib_buffer_t *lb = b; const u8 esp_sz = sizeof (esp_header_t); - const u8 tun_flags = IPSEC_SA_FLAG_IS_TUNNEL | IPSEC_SA_FLAG_IS_TUNNEL_V6; u8 pad_length = 0, next_header = 0; u16 icv_sz; u64 n_lost; @@ -809,32 +809,16 @@ esp_decrypt_post_crypto (vlib_main_t *vm, vlib_node_runtime_t *node, * a sequence s, s+1, s+2, s+3, ... s+n and nothing will prevent any * implementation, sequential or batching, from decrypting these. */ - if (PREDICT_FALSE (ipsec_sa_is_set_ANTI_REPLAY_HUGE (sa0))) + if (ipsec_sa_anti_replay_and_sn_advance (irt, pd->seq, pd->seq_hi, true, + NULL)) { - if (ipsec_sa_anti_replay_and_sn_advance (sa0, pd->seq, pd->seq_hi, true, - NULL, true)) - { - esp_decrypt_set_next_index (b, node, vm->thread_index, - ESP_DECRYPT_ERROR_REPLAY, 0, next, - ESP_DECRYPT_NEXT_DROP, pd->sa_index); - return; - } - n_lost = ipsec_sa_anti_replay_advance (sa0, vm->thread_index, pd->seq, - pd->seq_hi, true); - } - else - { - if (ipsec_sa_anti_replay_and_sn_advance (sa0, pd->seq, pd->seq_hi, true, - NULL, false)) - { - esp_decrypt_set_next_index (b, node, vm->thread_index, - ESP_DECRYPT_ERROR_REPLAY, 0, next, - ESP_DECRYPT_NEXT_DROP, pd->sa_index); - return; - } - n_lost = ipsec_sa_anti_replay_advance (sa0, vm->thread_index, pd->seq, - pd->seq_hi, false); + esp_decrypt_set_next_index (b, node, vm->thread_index, + ESP_DECRYPT_ERROR_REPLAY, 0, next, + ESP_DECRYPT_NEXT_DROP, pd->sa_index); + return; } + n_lost = + ipsec_sa_anti_replay_advance (irt, vm->thread_index, pd->seq, pd->seq_hi); vlib_prefetch_simple_counter (&ipsec_sa_err_counters[IPSEC_SA_ERROR_LOST], vm->thread_index, pd->sa_index); @@ -899,10 +883,9 @@ esp_decrypt_post_crypto (vlib_main_t *vm, vlib_node_runtime_t *node, b->flags &= ~(VNET_BUFFER_F_L4_CHECKSUM_COMPUTED | VNET_BUFFER_F_L4_CHECKSUM_CORRECT); - if ((pd->flags & tun_flags) == 0 && !is_tun) /* transport mode */ + if (pd->is_transport && !is_tun) /* transport mode */ { - u8 udp_sz = (is_ip6 == 0 && pd->flags & IPSEC_SA_FLAG_UDP_ENCAP) ? - sizeof (udp_header_t) : 0; + u8 udp_sz = is_ip6 ? 0 : pd->udp_sz; u16 ip_hdr_sz = pd->hdr_sz - udp_sz; u8 *old_ip = b->data + pd->current_data - ip_hdr_sz - udp_sz; u8 *ip = old_ip + adv + udp_sz; @@ -1012,7 +995,7 @@ esp_decrypt_post_crypto (vlib_main_t *vm, vlib_node_runtime_t *node, if (is_tun) { - if (ipsec_sa_is_set_IS_PROTECT (sa0)) + if (irt->is_protect) { /* * There are two encap possibilities @@ -1085,7 +1068,7 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node, { ipsec_main_t *im = &ipsec_main; const u16 *next_by_next_header = im->next_header_registrations; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; u16 len; ipsec_per_thread_data_t *ptd = vec_elt_at_index (im->ptd, thread_index); u32 *from = vlib_frame_vector_args (from_frame); @@ -1101,21 +1084,18 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node, esp_decrypt_packet_data_t cpd = { }; u32 current_sa_index = ~0, current_sa_bytes = 0, current_sa_pkts = 0; const u8 esp_sz = sizeof (esp_header_t); - ipsec_sa_t *sa0 = 0; + ipsec_sa_inb_rt_t *irt = 0; bool anti_replay_result; - int is_async = im->async_mode; + int is_async = 0; vnet_crypto_op_id_t async_op = ~0; vnet_crypto_async_frame_t *async_frames[VNET_CRYPTO_N_OP_IDS]; esp_decrypt_error_t err; vlib_get_buffers (vm, from, b, n_left); - if (!is_async) - { - vec_reset_length (ptd->crypto_ops); - vec_reset_length (ptd->integ_ops); - vec_reset_length (ptd->chained_crypto_ops); - vec_reset_length (ptd->chained_integ_ops); - } + vec_reset_length (ptd->crypto_ops); + vec_reset_length (ptd->integ_ops); + vec_reset_length (ptd->chained_crypto_ops); + vec_reset_length (ptd->chained_integ_ops); vec_reset_length (ptd->async_frames); vec_reset_length (ptd->chunks); clib_memset (sync_nexts, -1, sizeof (sync_nexts)); @@ -1157,29 +1137,28 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node, current_sa_index = vnet_buffer (b[0])->ipsec.sad_index; vlib_prefetch_combined_counter (&ipsec_sa_counters, thread_index, current_sa_index); - sa0 = ipsec_sa_get (current_sa_index); + irt = ipsec_sa_get_inb_rt_by_index (current_sa_index); - /* fetch the second cacheline ASAP */ - clib_prefetch_load (sa0->cacheline1); - cpd.icv_sz = sa0->integ_icv_size; - cpd.iv_sz = sa0->crypto_iv_size; - cpd.flags = sa0->flags; + cpd.icv_sz = irt->integ_icv_size; + cpd.iv_sz = irt->cipher_iv_size; + cpd.udp_sz = irt->udp_sz; + cpd.is_transport = irt->is_transport; cpd.sa_index = current_sa_index; - is_async = im->async_mode | ipsec_sa_is_set_IS_ASYNC (sa0); + is_async = irt->is_async; } - if (PREDICT_FALSE ((u16) ~0 == sa0->thread_index)) + if (PREDICT_FALSE ((u16) ~0 == irt->thread_index)) { /* this is the first packet to use this SA, claim the SA * for this thread. this could happen simultaneously on * another thread */ - clib_atomic_cmp_and_swap (&sa0->thread_index, ~0, + clib_atomic_cmp_and_swap (&irt->thread_index, ~0, ipsec_sa_assign_thread (thread_index)); } - if (PREDICT_FALSE (thread_index != sa0->thread_index)) + if (PREDICT_FALSE (thread_index != irt->thread_index)) { - vnet_buffer (b[0])->ipsec.thread_index = sa0->thread_index; + vnet_buffer (b[0])->ipsec.thread_index = irt->thread_index; err = ESP_DECRYPT_ERROR_HANDOFF; esp_decrypt_set_next_index (b[0], node, thread_index, err, n_noop, noop_nexts, ESP_DECRYPT_NEXT_HANDOFF, @@ -1209,16 +1188,8 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node, pd->current_length = b[0]->current_length; /* anti-reply check */ - if (PREDICT_FALSE (ipsec_sa_is_set_ANTI_REPLAY_HUGE (sa0))) - { - anti_replay_result = ipsec_sa_anti_replay_and_sn_advance ( - sa0, pd->seq, ~0, false, &pd->seq_hi, true); - } - else - { - anti_replay_result = ipsec_sa_anti_replay_and_sn_advance ( - sa0, pd->seq, ~0, false, &pd->seq_hi, false); - } + anti_replay_result = ipsec_sa_anti_replay_and_sn_advance ( + irt, pd->seq, ~0, false, &pd->seq_hi); if (anti_replay_result) { @@ -1244,7 +1215,7 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node, if (is_async) { - async_op = sa0->crypto_async_dec_op_id; + async_op = irt->async_op_id; /* get a frame for this op if we don't yet have one or it's full */ @@ -1267,7 +1238,7 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node, } err = esp_decrypt_prepare_async_frame ( - vm, ptd, async_frames[async_op], sa0, payload, len, cpd.icv_sz, + vm, ptd, async_frames[async_op], irt, payload, len, cpd.icv_sz, cpd.iv_sz, pd, pd2, from[b - bufs], b[0], async_next_node); if (ESP_DECRYPT_ERROR_RX_PKTS != err) { @@ -1278,7 +1249,7 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node, } else { - err = esp_decrypt_prepare_sync_op (vm, ptd, sa0, payload, len, + err = esp_decrypt_prepare_sync_op (vm, ptd, irt, payload, len, cpd.icv_sz, cpd.iv_sz, pd, pd2, b[0], n_sync); if (err != ESP_DECRYPT_ERROR_RX_PKTS) @@ -1391,12 +1362,13 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node, { esp_decrypt_trace_t *tr; tr = vlib_add_trace (vm, node, b[0], sizeof (*tr)); - sa0 = ipsec_sa_get (current_sa_index); - tr->crypto_alg = sa0->crypto_alg; - tr->integ_alg = sa0->integ_alg; + ipsec_sa_t *sa = ipsec_sa_get (current_sa_index); + ipsec_sa_inb_rt_t *irt = + ipsec_sa_get_inb_rt_by_index (current_sa_index); + tr->crypto_alg = sa->crypto_alg; + tr->integ_alg = sa->integ_alg; tr->seq = pd->seq; - tr->sa_seq = sa0->seq; - tr->sa_seq_hi = sa0->seq_hi; + tr->sa_seq64 = irt->seq64; tr->pkt_seq_hi = pd->seq_hi; } @@ -1456,18 +1428,19 @@ esp_decrypt_post_inline (vlib_main_t * vm, /*trace: */ if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED)) { - ipsec_sa_t *sa0 = ipsec_sa_get (pd->sa_index); + ipsec_sa_t *sa; + ipsec_sa_inb_rt_t *irt; esp_decrypt_trace_t *tr; esp_decrypt_packet_data_t *async_pd = &(esp_post_data (b[0]))->decrypt_data; tr = vlib_add_trace (vm, node, b[0], sizeof (*tr)); - sa0 = ipsec_sa_get (async_pd->sa_index); + sa = ipsec_sa_get (async_pd->sa_index); + irt = ipsec_sa_get_inb_rt_by_index (async_pd->sa_index); - tr->crypto_alg = sa0->crypto_alg; - tr->integ_alg = sa0->integ_alg; + tr->crypto_alg = sa->crypto_alg; + tr->integ_alg = sa->integ_alg; tr->seq = pd->seq; - tr->sa_seq = sa0->seq; - tr->sa_seq_hi = sa0->seq_hi; + tr->sa_seq64 = irt->seq64; } n_left--; diff --git a/src/vnet/ipsec/esp_encrypt.c b/src/vnet/ipsec/esp_encrypt.c index 4338cb01e5d..1f2cc243074 100644 --- a/src/vnet/ipsec/esp_encrypt.c +++ b/src/vnet/ipsec/esp_encrypt.c @@ -49,8 +49,7 @@ typedef struct { u32 sa_index; u32 spi; - u32 seq; - u32 sa_seq_hi; + u64 seq; u8 udp_encap; ipsec_crypto_alg_t crypto_alg; ipsec_integ_alg_t integ_alg; @@ -71,13 +70,11 @@ format_esp_encrypt_trace (u8 * s, va_list * args) CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); esp_encrypt_trace_t *t = va_arg (*args, esp_encrypt_trace_t *); - s = - format (s, - "esp: sa-index %d spi %u (0x%08x) seq %u sa-seq-hi %u crypto %U integrity %U%s", - t->sa_index, t->spi, t->spi, t->seq, t->sa_seq_hi, - format_ipsec_crypto_alg, - t->crypto_alg, format_ipsec_integ_alg, t->integ_alg, - t->udp_encap ? " udp-encap-enabled" : ""); + s = format ( + s, "esp: sa-index %d spi %u (0x%08x) seq %lu crypto %U integrity %U%s", + t->sa_index, t->spi, t->spi, t->seq, format_ipsec_crypto_alg, + t->crypto_alg, format_ipsec_integ_alg, t->integ_alg, + t->udp_encap ? " udp-encap-enabled" : ""); return s; } @@ -162,9 +159,9 @@ esp_update_ip4_hdr (ip4_header_t * ip4, u16 len, int is_transport, int is_udp) } static_always_inline void -esp_fill_udp_hdr (ipsec_sa_t * sa, udp_header_t * udp, u16 len) +esp_fill_udp_hdr (ipsec_sa_outb_rt_t *ort, udp_header_t *udp, u16 len) { - clib_memcpy_fast (udp, &sa->udp_hdr, sizeof (udp_header_t)); + clib_memcpy_fast (udp, &ort->udp_hdr, sizeof (udp_header_t)); udp->length = clib_net_to_host_u16 (len); } @@ -223,12 +220,12 @@ esp_get_ip6_hdr_len (ip6_header_t * ip6, ip6_ext_header_t ** ext_hdr) * message. You can refer to NIST SP800-38a and NIST SP800-38d for more * details. */ static_always_inline void * -esp_generate_iv (ipsec_sa_t *sa, void *payload, int iv_sz) +esp_generate_iv (ipsec_sa_outb_rt_t *ort, void *payload, int iv_sz) { ASSERT (iv_sz >= sizeof (u64)); u64 *iv = (u64 *) (payload - iv_sz); clib_memset_u8 (iv, 0, iv_sz); - *iv = clib_pcg64i_random_r (&sa->iv_prng); + *iv = clib_pcg64i_random_r (&ort->iv_prng); return iv; } @@ -294,10 +291,9 @@ esp_process_ops (vlib_main_t * vm, vlib_node_runtime_t * node, } static_always_inline u32 -esp_encrypt_chain_crypto (vlib_main_t * vm, ipsec_per_thread_data_t * ptd, - ipsec_sa_t * sa0, vlib_buffer_t * b, - vlib_buffer_t * lb, u8 icv_sz, u8 * start, - u32 start_len, u16 * n_ch) +esp_encrypt_chain_crypto (vlib_main_t *vm, ipsec_per_thread_data_t *ptd, + vlib_buffer_t *b, vlib_buffer_t *lb, u8 icv_sz, + u8 *start, u32 start_len, u16 *n_ch) { vnet_crypto_op_chunk_t *ch; vlib_buffer_t *cb = b; @@ -331,10 +327,10 @@ esp_encrypt_chain_crypto (vlib_main_t * vm, ipsec_per_thread_data_t * ptd, } static_always_inline u32 -esp_encrypt_chain_integ (vlib_main_t * vm, ipsec_per_thread_data_t * ptd, - ipsec_sa_t * sa0, vlib_buffer_t * b, - vlib_buffer_t * lb, u8 icv_sz, u8 * start, - u32 start_len, u8 * digest, u16 * n_ch) +esp_encrypt_chain_integ (vlib_main_t *vm, ipsec_per_thread_data_t *ptd, + ipsec_sa_outb_rt_t *ort, vlib_buffer_t *b, + vlib_buffer_t *lb, u8 icv_sz, u8 *start, + u32 start_len, u8 *digest, u16 *n_ch) { vnet_crypto_op_chunk_t *ch; vlib_buffer_t *cb = b; @@ -352,12 +348,11 @@ esp_encrypt_chain_integ (vlib_main_t * vm, ipsec_per_thread_data_t * ptd, if (lb == cb) { total_len += ch->len = cb->current_length - icv_sz; - if (ipsec_sa_is_set_USE_ESN (sa0)) + if (ort->use_esn) { - u32 seq_hi = clib_net_to_host_u32 (sa0->seq_hi); - clib_memcpy_fast (digest, &seq_hi, sizeof (seq_hi)); - ch->len += sizeof (seq_hi); - total_len += sizeof (seq_hi); + *(u32u *) digest = clib_net_to_host_u32 (ort->seq64 >> 32); + ch->len += sizeof (u32); + total_len += sizeof (u32); } } else @@ -379,16 +374,16 @@ esp_encrypt_chain_integ (vlib_main_t * vm, ipsec_per_thread_data_t * ptd, always_inline void esp_prepare_sync_op (vlib_main_t *vm, ipsec_per_thread_data_t *ptd, vnet_crypto_op_t **crypto_ops, - vnet_crypto_op_t **integ_ops, ipsec_sa_t *sa0, u32 seq_hi, - u8 *payload, u16 payload_len, u8 iv_sz, u8 icv_sz, u32 bi, - vlib_buffer_t **b, vlib_buffer_t *lb, u32 hdr_len, - esp_header_t *esp) + vnet_crypto_op_t **integ_ops, ipsec_sa_outb_rt_t *ort, + u32 seq_hi, u8 *payload, u16 payload_len, u8 iv_sz, + u8 icv_sz, u32 bi, vlib_buffer_t **b, vlib_buffer_t *lb, + u32 hdr_len, esp_header_t *esp) { - if (sa0->crypto_enc_op_id) + if (ort->cipher_op_id) { vnet_crypto_op_t *op; vec_add2_aligned (crypto_ops[0], op, 1, CLIB_CACHE_LINE_BYTES); - vnet_crypto_op_init (op, sa0->crypto_enc_op_id); + vnet_crypto_op_init (op, ort->cipher_op_id); u8 *crypto_start = payload; /* esp_add_footer_and_icv() in esp_encrypt_inline() makes sure we always * have enough space for ESP header and footer which includes ICV */ @@ -396,24 +391,24 @@ esp_prepare_sync_op (vlib_main_t *vm, ipsec_per_thread_data_t *ptd, u16 crypto_len = payload_len - icv_sz; /* generate the IV in front of the payload */ - void *pkt_iv = esp_generate_iv (sa0, payload, iv_sz); + void *pkt_iv = esp_generate_iv (ort, payload, iv_sz); - op->key_index = sa0->crypto_key_index; + op->key_index = ort->cipher_key_index; op->user_data = bi; - if (ipsec_sa_is_set_IS_CTR (sa0)) + if (ort->is_ctr) { /* construct nonce in a scratch space in front of the IP header */ esp_ctr_nonce_t *nonce = (esp_ctr_nonce_t *) (pkt_iv - hdr_len - sizeof (*nonce)); - if (ipsec_sa_is_set_IS_AEAD (sa0)) + if (ort->is_aead) { /* constuct aad in a scratch space in front of the nonce */ op->aad = (u8 *) nonce - sizeof (esp_aead_t); - op->aad_len = esp_aad_fill (op->aad, esp, sa0, seq_hi); + op->aad_len = esp_aad_fill (op->aad, esp, ort->use_esn, seq_hi); op->tag = payload + crypto_len; op->tag_len = 16; - if (PREDICT_FALSE (ipsec_sa_is_set_IS_NULL_GMAC (sa0))) + if (PREDICT_FALSE (ort->is_null_gmac)) { /* RFC-4543 ENCR_NULL_AUTH_AES_GMAC: IV is part of AAD */ crypto_start -= iv_sz; @@ -425,7 +420,7 @@ esp_prepare_sync_op (vlib_main_t *vm, ipsec_per_thread_data_t *ptd, nonce->ctr = clib_host_to_net_u32 (1); } - nonce->salt = sa0->salt; + nonce->salt = ort->salt; nonce->iv = *(u64 *) pkt_iv; op->iv = (u8 *) nonce; } @@ -445,9 +440,8 @@ esp_prepare_sync_op (vlib_main_t *vm, ipsec_per_thread_data_t *ptd, op->flags |= VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS; op->chunk_index = vec_len (ptd->chunks); op->tag = vlib_buffer_get_tail (lb) - icv_sz; - esp_encrypt_chain_crypto (vm, ptd, sa0, b[0], lb, icv_sz, - crypto_start, crypto_len + icv_sz, - &op->n_chunks); + esp_encrypt_chain_crypto (vm, ptd, b[0], lb, icv_sz, crypto_start, + crypto_len + icv_sz, &op->n_chunks); } else { @@ -457,14 +451,14 @@ esp_prepare_sync_op (vlib_main_t *vm, ipsec_per_thread_data_t *ptd, } } - if (sa0->integ_op_id) + if (ort->integ_op_id) { vnet_crypto_op_t *op; vec_add2_aligned (integ_ops[0], op, 1, CLIB_CACHE_LINE_BYTES); - vnet_crypto_op_init (op, sa0->integ_op_id); + vnet_crypto_op_init (op, ort->integ_op_id); op->src = payload - iv_sz - sizeof (esp_header_t); op->digest = payload + payload_len - icv_sz; - op->key_index = sa0->integ_key_index; + op->key_index = ort->integ_key_index; op->digest_len = icv_sz; op->len = payload_len - icv_sz + iv_sz + sizeof (esp_header_t); op->user_data = bi; @@ -476,13 +470,12 @@ esp_prepare_sync_op (vlib_main_t *vm, ipsec_per_thread_data_t *ptd, op->chunk_index = vec_len (ptd->chunks); op->digest = vlib_buffer_get_tail (lb) - icv_sz; - esp_encrypt_chain_integ (vm, ptd, sa0, b[0], lb, icv_sz, + esp_encrypt_chain_integ (vm, ptd, ort, b[0], lb, icv_sz, payload - iv_sz - sizeof (esp_header_t), - payload_len + iv_sz + - sizeof (esp_header_t), op->digest, - &op->n_chunks); + payload_len + iv_sz + sizeof (esp_header_t), + op->digest, &op->n_chunks); } - else if (ipsec_sa_is_set_USE_ESN (sa0)) + else if (ort->use_esn) { u32 tmp = clib_net_to_host_u32 (seq_hi); clib_memcpy_fast (op->digest, &tmp, sizeof (seq_hi)); @@ -494,15 +487,15 @@ esp_prepare_sync_op (vlib_main_t *vm, ipsec_per_thread_data_t *ptd, static_always_inline void esp_prepare_async_frame (vlib_main_t *vm, ipsec_per_thread_data_t *ptd, vnet_crypto_async_frame_t *async_frame, - ipsec_sa_t *sa, vlib_buffer_t *b, esp_header_t *esp, - u8 *payload, u32 payload_len, u8 iv_sz, u8 icv_sz, - u32 bi, u16 next, u32 hdr_len, u16 async_next, - vlib_buffer_t *lb) + ipsec_sa_outb_rt_t *ort, vlib_buffer_t *b, + esp_header_t *esp, u8 *payload, u32 payload_len, + u8 iv_sz, u8 icv_sz, u32 bi, u16 next, u32 hdr_len, + u16 async_next, vlib_buffer_t *lb) { esp_post_data_t *post = esp_post_data (b); u8 *tag, *iv, *aad = 0; u8 flag = 0; - const u32 key_index = sa->crypto_key_index; + const u32 key_index = ort->cipher_key_index; i16 crypto_start_offset, integ_start_offset; u16 crypto_total_len, integ_total_len; @@ -514,19 +507,19 @@ esp_prepare_async_frame (vlib_main_t *vm, ipsec_per_thread_data_t *ptd, tag = payload + crypto_total_len; /* generate the IV in front of the payload */ - void *pkt_iv = esp_generate_iv (sa, payload, iv_sz); + void *pkt_iv = esp_generate_iv (ort, payload, iv_sz); - if (ipsec_sa_is_set_IS_CTR (sa)) + if (ort->is_ctr) { /* construct nonce in a scratch space in front of the IP header */ esp_ctr_nonce_t *nonce = (esp_ctr_nonce_t *) (pkt_iv - hdr_len - sizeof (*nonce)); - if (ipsec_sa_is_set_IS_AEAD (sa)) + if (ort->is_aead) { /* constuct aad in a scratch space in front of the nonce */ aad = (u8 *) nonce - sizeof (esp_aead_t); - esp_aad_fill (aad, esp, sa, sa->seq_hi); - if (PREDICT_FALSE (ipsec_sa_is_set_IS_NULL_GMAC (sa))) + esp_aad_fill (aad, esp, ort->use_esn, ort->seq64 >> 32); + if (PREDICT_FALSE (ort->is_null_gmac)) { /* RFC-4543 ENCR_NULL_AUTH_AES_GMAC: IV is part of AAD */ crypto_start_offset -= iv_sz; @@ -538,7 +531,7 @@ esp_prepare_async_frame (vlib_main_t *vm, ipsec_per_thread_data_t *ptd, nonce->ctr = clib_host_to_net_u32 (1); } - nonce->salt = sa->salt; + nonce->salt = ort->salt; nonce->iv = *(u64 *) pkt_iv; iv = (u8 *) nonce; } @@ -558,11 +551,11 @@ esp_prepare_async_frame (vlib_main_t *vm, ipsec_per_thread_data_t *ptd, flag |= VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS; tag = vlib_buffer_get_tail (lb) - icv_sz; crypto_total_len = esp_encrypt_chain_crypto ( - vm, ptd, sa, b, lb, icv_sz, b->data + crypto_start_offset, + vm, ptd, b, lb, icv_sz, b->data + crypto_start_offset, crypto_total_len + icv_sz, 0); } - if (sa->integ_op_id) + if (ort->integ_op_id) { integ_start_offset -= iv_sz + sizeof (esp_header_t); integ_total_len += iv_sz + sizeof (esp_header_t); @@ -570,15 +563,14 @@ esp_prepare_async_frame (vlib_main_t *vm, ipsec_per_thread_data_t *ptd, if (b != lb) { integ_total_len = esp_encrypt_chain_integ ( - vm, ptd, sa, b, lb, icv_sz, + vm, ptd, ort, b, lb, icv_sz, payload - iv_sz - sizeof (esp_header_t), payload_len + iv_sz + sizeof (esp_header_t), tag, 0); } - else if (ipsec_sa_is_set_USE_ESN (sa)) + else if (ort->use_esn) { - u32 seq_hi = clib_net_to_host_u32 (sa->seq_hi); - clib_memcpy_fast (tag, &seq_hi, sizeof (seq_hi)); - integ_total_len += sizeof (seq_hi); + *(u32u *) tag = clib_net_to_host_u32 (ort->seq64 >> 32); + integ_total_len += sizeof (u32); } } @@ -615,18 +607,17 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node, u32 *from = vlib_frame_vector_args (frame); u32 n_left = frame->n_vectors; vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; u16 buffer_data_size = vlib_buffer_get_default_data_size (vm); u32 current_sa_index = ~0, current_sa_packets = 0; u32 current_sa_bytes = 0, spi = 0; u8 esp_align = 4, iv_sz = 0, icv_sz = 0; - ipsec_sa_t *sa0 = 0; - u8 sa_drop_no_crypto = 0; + ipsec_sa_outb_rt_t *ort = 0; vlib_buffer_t *lb; vnet_crypto_op_t **crypto_ops = &ptd->crypto_ops; vnet_crypto_op_t **integ_ops = &ptd->integ_ops; vnet_crypto_async_frame_t *async_frames[VNET_CRYPTO_N_OP_IDS]; - int is_async = im->async_mode; + int is_async = 0; vnet_crypto_op_id_t async_op = ~0; u16 drop_next = (lt == VNET_LINK_IP6 ? ESP_ENCRYPT_NEXT_DROP6 : @@ -708,27 +699,20 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node, current_sa_packets, current_sa_bytes); current_sa_packets = current_sa_bytes = 0; - sa0 = ipsec_sa_get (sa_index0); + ort = ipsec_sa_get_outb_rt_by_index (sa_index0); current_sa_index = sa_index0; - sa_drop_no_crypto = ((sa0->crypto_alg == IPSEC_CRYPTO_ALG_NONE && - sa0->integ_alg == IPSEC_INTEG_ALG_NONE) && - !ipsec_sa_is_set_NO_ALGO_NO_DROP (sa0)); - vlib_prefetch_combined_counter (&ipsec_sa_counters, thread_index, current_sa_index); - /* fetch the second cacheline ASAP */ - clib_prefetch_load (sa0->cacheline1); - - spi = clib_net_to_host_u32 (sa0->spi); - esp_align = sa0->esp_block_align; - icv_sz = sa0->integ_icv_size; - iv_sz = sa0->crypto_iv_size; - is_async = im->async_mode | ipsec_sa_is_set_IS_ASYNC (sa0); + spi = ort->spi_be; + icv_sz = ort->integ_icv_size; + esp_align = ort->esp_block_align; + iv_sz = ort->cipher_iv_size; + is_async = ort->is_async; } - if (PREDICT_FALSE (sa_drop_no_crypto != 0)) + if (PREDICT_FALSE (ort->drop_no_crypto != 0)) { err = ESP_ENCRYPT_ERROR_NO_ENCRYPTION; esp_encrypt_set_next_index (b[0], node, thread_index, err, n_noop, @@ -736,18 +720,18 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node, goto trace; } - if (PREDICT_FALSE ((u16) ~0 == sa0->thread_index)) + if (PREDICT_FALSE ((u16) ~0 == ort->thread_index)) { /* this is the first packet to use this SA, claim the SA * for this thread. this could happen simultaneously on * another thread */ - clib_atomic_cmp_and_swap (&sa0->thread_index, ~0, + clib_atomic_cmp_and_swap (&ort->thread_index, ~0, ipsec_sa_assign_thread (thread_index)); } - if (PREDICT_FALSE (thread_index != sa0->thread_index)) + if (PREDICT_FALSE (thread_index != ort->thread_index)) { - vnet_buffer (b[0])->ipsec.thread_index = sa0->thread_index; + vnet_buffer (b[0])->ipsec.thread_index = ort->thread_index; err = ESP_ENCRYPT_ERROR_HANDOFF; esp_encrypt_set_next_index (b[0], node, thread_index, err, n_noop, noop_nexts, handoff_next, @@ -772,7 +756,7 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node, lb = vlib_get_buffer (vm, lb->next_buffer); } - if (PREDICT_FALSE (esp_seq_advance (sa0))) + if (PREDICT_FALSE (esp_seq_advance (ort))) { err = ESP_ENCRYPT_ERROR_SEQ_CYCLED; esp_encrypt_set_next_index (b[0], node, thread_index, err, n_noop, @@ -783,7 +767,7 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node, /* space for IV */ hdr_len = iv_sz; - if (ipsec_sa_is_set_IS_TUNNEL (sa0)) + if (ort->is_tunnel) { payload = vlib_buffer_get_current (b[0]); next_hdr_ptr = esp_add_footer_and_icv ( @@ -806,40 +790,39 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node, esp = (esp_header_t *) (payload - hdr_len); /* optional UDP header */ - if (ipsec_sa_is_set_UDP_ENCAP (sa0)) + if (ort->udp_encap) { hdr_len += sizeof (udp_header_t); - esp_fill_udp_hdr (sa0, (udp_header_t *) (payload - hdr_len), + esp_fill_udp_hdr (ort, (udp_header_t *) (payload - hdr_len), payload_len_total + hdr_len); } /* IP header */ - if (ipsec_sa_is_set_IS_TUNNEL_V6 (sa0)) + if (ort->is_tunnel_v6) { ip6_header_t *ip6; u16 len = sizeof (ip6_header_t); hdr_len += len; ip6 = (ip6_header_t *) (payload - hdr_len); - clib_memcpy_fast (ip6, &sa0->ip6_hdr, sizeof (ip6_header_t)); + clib_memcpy_fast (ip6, &ort->ip6_hdr, sizeof (ip6_header_t)); if (VNET_LINK_IP6 == lt) { *next_hdr_ptr = IP_PROTOCOL_IPV6; - tunnel_encap_fixup_6o6 (sa0->tunnel_flags, - (const ip6_header_t *) payload, - ip6); + tunnel_encap_fixup_6o6 (ort->tunnel_flags, + (const ip6_header_t *) payload, ip6); } else if (VNET_LINK_IP4 == lt) { *next_hdr_ptr = IP_PROTOCOL_IP_IN_IP; - tunnel_encap_fixup_4o6 (sa0->tunnel_flags, b[0], + tunnel_encap_fixup_4o6 (ort->tunnel_flags, b[0], (const ip4_header_t *) payload, ip6); } else if (VNET_LINK_MPLS == lt) { *next_hdr_ptr = IP_PROTOCOL_MPLS_IN_IP; tunnel_encap_fixup_mplso6 ( - sa0->tunnel_flags, b[0], + ort->tunnel_flags, b[0], (const mpls_unicast_header_t *) payload, ip6); } else @@ -855,27 +838,25 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node, u16 len = sizeof (ip4_header_t); hdr_len += len; ip4 = (ip4_header_t *) (payload - hdr_len); - clib_memcpy_fast (ip4, &sa0->ip4_hdr, sizeof (ip4_header_t)); + clib_memcpy_fast (ip4, &ort->ip4_hdr, sizeof (ip4_header_t)); if (VNET_LINK_IP6 == lt) { *next_hdr_ptr = IP_PROTOCOL_IPV6; - tunnel_encap_fixup_6o4_w_chksum (sa0->tunnel_flags, - (const ip6_header_t *) - payload, ip4); + tunnel_encap_fixup_6o4_w_chksum ( + ort->tunnel_flags, (const ip6_header_t *) payload, ip4); } else if (VNET_LINK_IP4 == lt) { *next_hdr_ptr = IP_PROTOCOL_IP_IN_IP; - tunnel_encap_fixup_4o4_w_chksum (sa0->tunnel_flags, - (const ip4_header_t *) - payload, ip4); + tunnel_encap_fixup_4o4_w_chksum ( + ort->tunnel_flags, (const ip4_header_t *) payload, ip4); } else if (VNET_LINK_MPLS == lt) { *next_hdr_ptr = IP_PROTOCOL_MPLS_IN_IP; tunnel_encap_fixup_mplso4_w_chksum ( - sa0->tunnel_flags, (const mpls_unicast_header_t *) payload, + ort->tunnel_flags, (const mpls_unicast_header_t *) payload, ip4); } else @@ -885,8 +866,7 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node, esp_update_ip4_hdr (ip4, len, /* is_transport */ 0, 0); } - if (ipsec_sa_is_set_UDP_ENCAP (sa0) && - ipsec_sa_is_set_IS_TUNNEL_V6 (sa0)) + if (ort->udp_encap && ort->is_tunnel_v6) { i16 l3_off = b[0]->current_data - hdr_len; i16 l4_off = l3_off + sizeof (ip6_header_t); @@ -894,7 +874,7 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node, set_ip6_udp_cksum_offload (b[0], l3_off, l4_off); } - dpo = &sa0->dpo; + dpo = &ort->dpo; if (!is_tun) { sync_next[0] = dpo->dpoi_next_node; @@ -953,7 +933,7 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node, esp = (esp_header_t *) (payload - hdr_len); /* optional UDP header */ - if (ipsec_sa_is_set_UDP_ENCAP (sa0)) + if (ort->udp_encap) { hdr_len += sizeof (udp_header_t); udp = (udp_header_t *) (payload - hdr_len); @@ -1010,7 +990,7 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node, if (udp) { udp_len = len - ip_len; - esp_fill_udp_hdr (sa0, udp, udp_len); + esp_fill_udp_hdr (ort, udp, udp_len); } if (udp && (VNET_LINK_IP6 == lt)) @@ -1036,11 +1016,11 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node, } esp->spi = spi; - esp->seq = clib_net_to_host_u32 (sa0->seq); + esp->seq = clib_net_to_host_u32 (ort->seq64); if (is_async) { - async_op = sa0->crypto_async_enc_op_id; + async_op = ort->async_op_id; /* get a frame for this op if we don't yet have one or it's full */ @@ -1063,15 +1043,15 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node, vec_add1 (ptd->async_frames, async_frames[async_op]); } - esp_prepare_async_frame (vm, ptd, async_frames[async_op], sa0, b[0], + esp_prepare_async_frame (vm, ptd, async_frames[async_op], ort, b[0], esp, payload, payload_len, iv_sz, icv_sz, from[b - bufs], sync_next[0], hdr_len, async_next_node, lb); } else - esp_prepare_sync_op (vm, ptd, crypto_ops, integ_ops, sa0, sa0->seq_hi, - payload, payload_len, iv_sz, icv_sz, n_sync, b, - lb, hdr_len, esp); + esp_prepare_sync_op (vm, ptd, crypto_ops, integ_ops, ort, + ort->seq64 >> 32, payload, payload_len, iv_sz, + icv_sz, n_sync, b, lb, hdr_len, esp); vlib_buffer_advance (b[0], 0LL - hdr_len); @@ -1087,13 +1067,13 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node, clib_memset_u8 (tr, 0xff, sizeof (*tr)); else { + ipsec_sa_t *sa = ipsec_sa_get (sa_index0); tr->sa_index = sa_index0; - tr->spi = sa0->spi; - tr->seq = sa0->seq; - tr->sa_seq_hi = sa0->seq_hi; - tr->udp_encap = ipsec_sa_is_set_UDP_ENCAP (sa0); - tr->crypto_alg = sa0->crypto_alg; - tr->integ_alg = sa0->integ_alg; + tr->spi = sa->spi; + tr->seq = ort->seq64; + tr->udp_encap = ort->udp_encap; + tr->crypto_alg = sa->crypto_alg; + tr->integ_alg = sa->integ_alg; } } diff --git a/src/vnet/ipsec/ipsec.c b/src/vnet/ipsec/ipsec.c index a1d4d56768c..b95b65dfeea 100644 --- a/src/vnet/ipsec/ipsec.c +++ b/src/vnet/ipsec/ipsec.c @@ -312,9 +312,9 @@ clib_error_t * ipsec_rsc_in_use (ipsec_main_t * im) { /* return an error is crypto resource are in use */ - if (pool_elts (ipsec_sa_pool) > 0) + if (pool_elts (im->sa_pool) > 0) return clib_error_return (0, "%d SA entries configured", - pool_elts (ipsec_sa_pool)); + pool_elts (im->sa_pool)); if (ipsec_itf_count () > 0) return clib_error_return (0, "%d IPSec interface configured", ipsec_itf_count ()); @@ -384,7 +384,7 @@ ipsec_set_async_mode (u32 is_enabled) im->async_mode = is_enabled; /* change SA crypto op data */ - pool_foreach (sa, ipsec_sa_pool) + pool_foreach (sa, im->sa_pool) ipsec_sa_set_async_mode (sa, is_enabled); } diff --git a/src/vnet/ipsec/ipsec.h b/src/vnet/ipsec/ipsec.h index 3409d0e4fb9..bdc4ee4d455 100644 --- a/src/vnet/ipsec/ipsec.h +++ b/src/vnet/ipsec/ipsec.h @@ -118,6 +118,9 @@ typedef struct const u8 iv_size; const u8 block_align; const u8 icv_size; + const u8 is_aead : 1; + const u8 is_ctr : 1; + const u8 is_null_gmac : 1; } ipsec_main_crypto_alg_t; typedef struct @@ -263,6 +266,10 @@ typedef struct u8 async_mode; u16 msg_id_base; + + ipsec_sa_t *sa_pool; + ipsec_sa_inb_rt_t **inb_sa_runtimes; + ipsec_sa_outb_rt_t **outb_sa_runtimes; } ipsec_main_t; typedef enum ipsec_format_flags_t_ @@ -354,8 +361,9 @@ ipsec_spinlock_unlock (i32 *lock) */ always_inline void ipsec_set_next_index (vlib_buffer_t *b, vlib_node_runtime_t *node, - u32 thread_index, u32 err, u32 ipsec_sa_err, u16 index, - u16 *nexts, u16 drop_next, u32 sa_index) + clib_thread_index_t thread_index, u32 err, + u32 ipsec_sa_err, u16 index, u16 *nexts, u16 drop_next, + u32 sa_index) { nexts[index] = drop_next; b->error = node->errors[err]; @@ -396,6 +404,8 @@ extern clib_error_t *ipsec_register_next_header (vlib_main_t *vm, u8 next_header, const char *next_node); +#include <vnet/ipsec/ipsec_funcs.h> + #endif /* __IPSEC_H__ */ /* diff --git a/src/vnet/ipsec/ipsec_api.c b/src/vnet/ipsec/ipsec_api.c index 21216b1a614..e5b38992de2 100644 --- a/src/vnet/ipsec/ipsec_api.c +++ b/src/vnet/ipsec/ipsec_api.c @@ -40,6 +40,26 @@ #define REPLY_MSG_ID_BASE ipsec_main.msg_id_base #include <vlibapi/api_helper_macros.h> +static inline u64 +ipsec_sa_get_inb_seq (ipsec_sa_t *sa) +{ + ipsec_sa_inb_rt_t *irt = ipsec_sa_get_inb_rt (sa); + u64 seq = irt->seq64; + if (!ipsec_sa_is_set_USE_ESN (sa)) + seq = (u32) seq; + return seq; +} + +static inline u64 +ipsec_sa_get_outb_seq (ipsec_sa_t *sa) +{ + ipsec_sa_outb_rt_t *ort = ipsec_sa_get_outb_rt (sa); + u64 seq; + + seq = ort->seq64; + return seq; +} + static void vl_api_ipsec_spd_add_del_t_handler (vl_api_ipsec_spd_add_del_t * mp) { @@ -950,6 +970,8 @@ ipsec_sa_dump_match_sa (index_t itpi, void *arg) static walk_rc_t send_ipsec_sa_details (ipsec_sa_t * sa, void *arg) { + ipsec_main_t *im = &ipsec_main; + ipsec_sa_inb_rt_t *irt = ipsec_sa_get_inb_rt (sa); ipsec_dump_walk_ctx_t *ctx = arg; vl_api_ipsec_sa_details_t *mp; @@ -975,7 +997,7 @@ send_ipsec_sa_details (ipsec_sa_t * sa, void *arg) if (ipsec_sa_is_set_IS_PROTECT (sa)) { ipsec_sa_dump_match_ctx_t ctx = { - .sai = sa - ipsec_sa_pool, + .sai = sa - im->sa_pool, .sw_if_index = ~0, }; ipsec_tun_protect_walk (ipsec_sa_dump_match_sa, &ctx); @@ -992,22 +1014,16 @@ send_ipsec_sa_details (ipsec_sa_t * sa, void *arg) } if (ipsec_sa_is_set_UDP_ENCAP (sa)) { - mp->entry.udp_src_port = sa->udp_hdr.src_port; - mp->entry.udp_dst_port = sa->udp_hdr.dst_port; + mp->entry.udp_src_port = clib_host_to_net_u16 (sa->udp_src_port); + mp->entry.udp_dst_port = clib_host_to_net_u16 (sa->udp_dst_port); } - mp->seq_outbound = clib_host_to_net_u64 (((u64) sa->seq)); - mp->last_seq_inbound = clib_host_to_net_u64 (((u64) sa->seq)); - if (ipsec_sa_is_set_USE_ESN (sa)) - { - mp->seq_outbound |= (u64) (clib_host_to_net_u32 (sa->seq_hi)); - mp->last_seq_inbound |= (u64) (clib_host_to_net_u32 (sa->seq_hi)); - } - if (ipsec_sa_is_set_USE_ANTI_REPLAY (sa)) - { - mp->replay_window = - clib_host_to_net_u64 (ipsec_sa_anti_replay_get_64b_window (sa)); - } + mp->seq_outbound = clib_host_to_net_u64 (ipsec_sa_get_outb_seq (sa)); + mp->last_seq_inbound = clib_host_to_net_u64 (ipsec_sa_get_inb_seq (sa)); + + if (ipsec_sa_is_set_USE_ANTI_REPLAY (sa) && irt) + mp->replay_window = + clib_host_to_net_u64 (ipsec_sa_anti_replay_get_64b_window (irt)); mp->stat_index = clib_host_to_net_u32 (sa->stat_index); @@ -1036,6 +1052,8 @@ vl_api_ipsec_sa_dump_t_handler (vl_api_ipsec_sa_dump_t * mp) static walk_rc_t send_ipsec_sa_v2_details (ipsec_sa_t * sa, void *arg) { + ipsec_main_t *im = &ipsec_main; + ipsec_sa_inb_rt_t *irt = ipsec_sa_get_inb_rt (sa); ipsec_dump_walk_ctx_t *ctx = arg; vl_api_ipsec_sa_v2_details_t *mp; @@ -1061,7 +1079,7 @@ send_ipsec_sa_v2_details (ipsec_sa_t * sa, void *arg) if (ipsec_sa_is_set_IS_PROTECT (sa)) { ipsec_sa_dump_match_ctx_t ctx = { - .sai = sa - ipsec_sa_pool, + .sai = sa - im->sa_pool, .sw_if_index = ~0, }; ipsec_tun_protect_walk (ipsec_sa_dump_match_sa, &ctx); @@ -1078,26 +1096,20 @@ send_ipsec_sa_v2_details (ipsec_sa_t * sa, void *arg) } if (ipsec_sa_is_set_UDP_ENCAP (sa)) { - mp->entry.udp_src_port = sa->udp_hdr.src_port; - mp->entry.udp_dst_port = sa->udp_hdr.dst_port; + mp->entry.udp_src_port = clib_host_to_net_u16 (sa->udp_src_port); + mp->entry.udp_dst_port = clib_host_to_net_u16 (sa->udp_dst_port); } mp->entry.tunnel_flags = tunnel_encap_decap_flags_encode (sa->tunnel.t_encap_decap_flags); mp->entry.dscp = ip_dscp_encode (sa->tunnel.t_dscp); - mp->seq_outbound = clib_host_to_net_u64 (((u64) sa->seq)); - mp->last_seq_inbound = clib_host_to_net_u64 (((u64) sa->seq)); - if (ipsec_sa_is_set_USE_ESN (sa)) - { - mp->seq_outbound |= (u64) (clib_host_to_net_u32 (sa->seq_hi)); - mp->last_seq_inbound |= (u64) (clib_host_to_net_u32 (sa->seq_hi)); - } - if (ipsec_sa_is_set_USE_ANTI_REPLAY (sa)) - { - mp->replay_window = - clib_host_to_net_u64 (ipsec_sa_anti_replay_get_64b_window (sa)); - } + mp->seq_outbound = clib_host_to_net_u64 (ipsec_sa_get_outb_seq (sa)); + mp->last_seq_inbound = clib_host_to_net_u64 (ipsec_sa_get_inb_seq (sa)); + + if (ipsec_sa_is_set_USE_ANTI_REPLAY (sa) && irt) + mp->replay_window = + clib_host_to_net_u64 (ipsec_sa_anti_replay_get_64b_window (irt)); mp->stat_index = clib_host_to_net_u32 (sa->stat_index); @@ -1126,6 +1138,8 @@ vl_api_ipsec_sa_v2_dump_t_handler (vl_api_ipsec_sa_v2_dump_t *mp) static walk_rc_t send_ipsec_sa_v3_details (ipsec_sa_t *sa, void *arg) { + ipsec_main_t *im = &ipsec_main; + ipsec_sa_inb_rt_t *irt = ipsec_sa_get_inb_rt (sa); ipsec_dump_walk_ctx_t *ctx = arg; vl_api_ipsec_sa_v3_details_t *mp; @@ -1150,7 +1164,7 @@ send_ipsec_sa_v3_details (ipsec_sa_t *sa, void *arg) if (ipsec_sa_is_set_IS_PROTECT (sa)) { ipsec_sa_dump_match_ctx_t ctx = { - .sai = sa - ipsec_sa_pool, + .sai = sa - im->sa_pool, .sw_if_index = ~0, }; ipsec_tun_protect_walk (ipsec_sa_dump_match_sa, &ctx); @@ -1165,22 +1179,16 @@ send_ipsec_sa_v3_details (ipsec_sa_t *sa, void *arg) if (ipsec_sa_is_set_UDP_ENCAP (sa)) { - mp->entry.udp_src_port = sa->udp_hdr.src_port; - mp->entry.udp_dst_port = sa->udp_hdr.dst_port; + mp->entry.udp_src_port = clib_host_to_net_u16 (sa->udp_src_port); + mp->entry.udp_dst_port = clib_host_to_net_u16 (sa->udp_dst_port); } - mp->seq_outbound = clib_host_to_net_u64 (((u64) sa->seq)); - mp->last_seq_inbound = clib_host_to_net_u64 (((u64) sa->seq)); - if (ipsec_sa_is_set_USE_ESN (sa)) - { - mp->seq_outbound |= (u64) (clib_host_to_net_u32 (sa->seq_hi)); - mp->last_seq_inbound |= (u64) (clib_host_to_net_u32 (sa->seq_hi)); - } - if (ipsec_sa_is_set_USE_ANTI_REPLAY (sa)) - { - mp->replay_window = - clib_host_to_net_u64 (ipsec_sa_anti_replay_get_64b_window (sa)); - } + mp->seq_outbound = clib_host_to_net_u64 (ipsec_sa_get_outb_seq (sa)); + mp->last_seq_inbound = clib_host_to_net_u64 (ipsec_sa_get_inb_seq (sa)); + + if (ipsec_sa_is_set_USE_ANTI_REPLAY (sa) && irt) + mp->replay_window = + clib_host_to_net_u64 (ipsec_sa_anti_replay_get_64b_window (irt)); mp->stat_index = clib_host_to_net_u32 (sa->stat_index); @@ -1209,8 +1217,12 @@ vl_api_ipsec_sa_v3_dump_t_handler (vl_api_ipsec_sa_v3_dump_t *mp) static walk_rc_t send_ipsec_sa_v4_details (ipsec_sa_t *sa, void *arg) { + ipsec_main_t *im = &ipsec_main; + ipsec_sa_inb_rt_t *irt = ipsec_sa_get_inb_rt (sa); + ipsec_sa_outb_rt_t *ort = ipsec_sa_get_outb_rt (sa); ipsec_dump_walk_ctx_t *ctx = arg; vl_api_ipsec_sa_v4_details_t *mp; + clib_thread_index_t thread_index = 0; mp = vl_msg_api_alloc (sizeof (*mp)); clib_memset (mp, 0, sizeof (*mp)); @@ -1233,7 +1245,7 @@ send_ipsec_sa_v4_details (ipsec_sa_t *sa, void *arg) if (ipsec_sa_is_set_IS_PROTECT (sa)) { ipsec_sa_dump_match_ctx_t ctx = { - .sai = sa - ipsec_sa_pool, + .sai = sa - im->sa_pool, .sw_if_index = ~0, }; ipsec_tun_protect_walk (ipsec_sa_dump_match_sa, &ctx); @@ -1248,24 +1260,23 @@ send_ipsec_sa_v4_details (ipsec_sa_t *sa, void *arg) if (ipsec_sa_is_set_UDP_ENCAP (sa)) { - mp->entry.udp_src_port = sa->udp_hdr.src_port; - mp->entry.udp_dst_port = sa->udp_hdr.dst_port; + mp->entry.udp_src_port = clib_host_to_net_u16 (sa->udp_src_port); + mp->entry.udp_dst_port = clib_host_to_net_u16 (sa->udp_dst_port); } - mp->seq_outbound = clib_host_to_net_u64 (((u64) sa->seq)); - mp->last_seq_inbound = clib_host_to_net_u64 (((u64) sa->seq)); - if (ipsec_sa_is_set_USE_ESN (sa)) - { - mp->seq_outbound |= (u64) (clib_host_to_net_u32 (sa->seq_hi)); - mp->last_seq_inbound |= (u64) (clib_host_to_net_u32 (sa->seq_hi)); - } - if (ipsec_sa_is_set_USE_ANTI_REPLAY (sa)) - { - mp->replay_window = - clib_host_to_net_u64 (ipsec_sa_anti_replay_get_64b_window (sa)); - } + mp->seq_outbound = clib_host_to_net_u64 (ipsec_sa_get_outb_seq (sa)); + mp->last_seq_inbound = clib_host_to_net_u64 (ipsec_sa_get_inb_seq (sa)); + + if (ipsec_sa_is_set_USE_ANTI_REPLAY (sa) && irt) + mp->replay_window = + clib_host_to_net_u64 (ipsec_sa_anti_replay_get_64b_window (irt)); + + if (ort) + thread_index = ort->thread_index; + else if (irt) + thread_index = irt->thread_index; - mp->thread_index = clib_host_to_net_u32 (sa->thread_index); + mp->thread_index = clib_host_to_net_u32 (thread_index); mp->stat_index = clib_host_to_net_u32 (sa->stat_index); vl_api_send_msg (ctx->reg, (u8 *) mp); @@ -1293,8 +1304,12 @@ vl_api_ipsec_sa_v4_dump_t_handler (vl_api_ipsec_sa_v4_dump_t *mp) static walk_rc_t send_ipsec_sa_v5_details (ipsec_sa_t *sa, void *arg) { + ipsec_main_t *im = &ipsec_main; + ipsec_sa_inb_rt_t *irt = ipsec_sa_get_inb_rt (sa); + ipsec_sa_outb_rt_t *ort = ipsec_sa_get_outb_rt (sa); ipsec_dump_walk_ctx_t *ctx = arg; vl_api_ipsec_sa_v5_details_t *mp; + clib_thread_index_t thread_index = 0; mp = vl_msg_api_alloc (sizeof (*mp)); clib_memset (mp, 0, sizeof (*mp)); @@ -1317,7 +1332,7 @@ send_ipsec_sa_v5_details (ipsec_sa_t *sa, void *arg) if (ipsec_sa_is_set_IS_PROTECT (sa)) { ipsec_sa_dump_match_ctx_t ctx = { - .sai = sa - ipsec_sa_pool, + .sai = sa - im->sa_pool, .sw_if_index = ~0, }; ipsec_tun_protect_walk (ipsec_sa_dump_match_sa, &ctx); @@ -1332,27 +1347,27 @@ send_ipsec_sa_v5_details (ipsec_sa_t *sa, void *arg) if (ipsec_sa_is_set_UDP_ENCAP (sa)) { - mp->entry.udp_src_port = sa->udp_hdr.src_port; - mp->entry.udp_dst_port = sa->udp_hdr.dst_port; + mp->entry.udp_src_port = clib_host_to_net_u16 (sa->udp_src_port); + mp->entry.udp_dst_port = clib_host_to_net_u16 (sa->udp_dst_port); } - mp->seq_outbound = clib_host_to_net_u64 (((u64) sa->seq)); - mp->last_seq_inbound = clib_host_to_net_u64 (((u64) sa->seq)); - if (ipsec_sa_is_set_USE_ESN (sa)) - { - mp->seq_outbound |= (u64) (clib_host_to_net_u32 (sa->seq_hi)); - mp->last_seq_inbound |= (u64) (clib_host_to_net_u32 (sa->seq_hi)); - } - if (ipsec_sa_is_set_USE_ANTI_REPLAY (sa)) + mp->seq_outbound = clib_host_to_net_u64 (ipsec_sa_get_outb_seq (sa)); + mp->last_seq_inbound = clib_host_to_net_u64 (ipsec_sa_get_inb_seq (sa)); + + if (ipsec_sa_is_set_USE_ANTI_REPLAY (sa) && irt) { mp->replay_window = - clib_host_to_net_u64 (ipsec_sa_anti_replay_get_64b_window (sa)); - + clib_host_to_net_u64 (ipsec_sa_anti_replay_get_64b_window (irt)); mp->entry.anti_replay_window_size = - clib_host_to_net_u32 (IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE (sa)); + clib_host_to_net_u32 (irt->anti_replay_window_size); } - mp->thread_index = clib_host_to_net_u32 (sa->thread_index); + if (ort) + thread_index = ort->thread_index; + else if (irt) + thread_index = irt->thread_index; + + mp->thread_index = clib_host_to_net_u32 (thread_index); mp->stat_index = clib_host_to_net_u32 (sa->stat_index); vl_api_send_msg (ctx->reg, (u8 *) mp); @@ -1427,11 +1442,11 @@ vl_api_ipsec_select_backend_t_handler (vl_api_ipsec_select_backend_t * mp) vl_api_ipsec_select_backend_reply_t *rmp; ipsec_protocol_t protocol; int rv = 0; - if (pool_elts (ipsec_sa_pool) > 0) - { - rv = VNET_API_ERROR_INSTANCE_IN_USE; - goto done; - } + if (pool_elts (im->sa_pool) > 0) + { + rv = VNET_API_ERROR_INSTANCE_IN_USE; + goto done; + } rv = ipsec_proto_decode (mp->protocol, &protocol); diff --git a/src/vnet/ipsec/ipsec_cli.c b/src/vnet/ipsec/ipsec_cli.c index 07d9df8f204..77a29d263eb 100644 --- a/src/vnet/ipsec/ipsec_cli.c +++ b/src/vnet/ipsec/ipsec_cli.c @@ -473,7 +473,7 @@ ipsec_sa_show_all (vlib_main_t * vm, ipsec_main_t * im, u8 detail) { u32 sai; - pool_foreach_index (sai, ipsec_sa_pool) + pool_foreach_index (sai, im->sa_pool) { vlib_cli_output (vm, "%U", format_ipsec_sa, sai, (detail ? IPSEC_FORMAT_DETAIL : IPSEC_FORMAT_BRIEF)); @@ -583,6 +583,7 @@ static clib_error_t * clear_ipsec_sa_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { + ipsec_main_t *im = &ipsec_main; u32 sai = ~0; while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) @@ -595,14 +596,14 @@ clear_ipsec_sa_command_fn (vlib_main_t * vm, if (~0 == sai) { - pool_foreach_index (sai, ipsec_sa_pool) + pool_foreach_index (sai, im->sa_pool) { ipsec_sa_clear (sai); } } else { - if (pool_is_free_index (ipsec_sa_pool, sai)) + if (pool_is_free_index (im->sa_pool, sai)) return clib_error_return (0, "unknown SA index: %d", sai); else ipsec_sa_clear (sai); diff --git a/src/vnet/ipsec/ipsec_format.c b/src/vnet/ipsec/ipsec_format.c index e421a0d96b4..e27892185e7 100644 --- a/src/vnet/ipsec/ipsec_format.c +++ b/src/vnet/ipsec/ipsec_format.c @@ -441,19 +441,24 @@ format_ipsec_sa_flags (u8 * s, va_list * args) u8 * format_ipsec_sa (u8 * s, va_list * args) { + ipsec_main_t *im = &ipsec_main; u32 sai = va_arg (*args, u32); ipsec_format_flags_t flags = va_arg (*args, ipsec_format_flags_t); vlib_counter_t counts; counter_t errors; ipsec_sa_t *sa; + ipsec_sa_inb_rt_t *irt; + ipsec_sa_outb_rt_t *ort; - if (pool_is_free_index (ipsec_sa_pool, sai)) + if (pool_is_free_index (im->sa_pool, sai)) { s = format (s, "No such SA index: %d", sai); goto done; } sa = ipsec_sa_get (sai); + irt = ipsec_sa_get_inb_rt (sa); + ort = ipsec_sa_get_outb_rt (sa); s = format (s, "[%d] sa %u (0x%x) spi %u (0x%08x) protocol:%s flags:[%U]", sai, sa->id, sa->id, sa->spi, sa->spi, @@ -464,12 +469,20 @@ format_ipsec_sa (u8 * s, va_list * args) s = format (s, "\n locks %d", sa->node.fn_locks); s = format (s, "\n salt 0x%x", clib_net_to_host_u32 (sa->salt)); - s = format (s, "\n thread-index:%d", sa->thread_index); - s = format (s, "\n seq %u seq-hi %u", sa->seq, sa->seq_hi); - s = format (s, "\n window-size: %llu", - IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE (sa)); - s = format (s, "\n window: Bl <- %U Tl", format_ipsec_replay_window, - ipsec_sa_anti_replay_get_64b_window (sa)); + if (irt) + s = format (s, "\n inbound thread-index:%d", irt->thread_index); + if (ort) + s = format (s, "\n outbound thread-index:%d", ort->thread_index); + if (irt) + s = format (s, "\n inbound seq %lu", irt->seq64); + if (ort) + s = format (s, "\n outbound seq %lu", ort->seq64); + if (irt) + { + s = format (s, "\n window-size: %llu", irt->anti_replay_window_size); + s = format (s, "\n window: Bl <- %U Tl", format_ipsec_replay_window, + ipsec_sa_anti_replay_get_64b_window (irt)); + } s = format (s, "\n crypto alg %U", format_ipsec_crypto_alg, sa->crypto_alg); if (sa->crypto_alg && (flags & IPSEC_FORMAT_INSECURE)) @@ -482,9 +495,8 @@ format_ipsec_sa (u8 * s, va_list * args) s = format (s, " key %U", format_ipsec_key, &sa->integ_key); else s = format (s, " key [redacted]"); - s = format (s, "\n UDP:[src:%d dst:%d]", - clib_host_to_net_u16 (sa->udp_hdr.src_port), - clib_host_to_net_u16 (sa->udp_hdr.dst_port)); + s = + format (s, "\n UDP:[src:%d dst:%d]", sa->udp_src_port, sa->udp_dst_port); vlib_get_combined_counter (&ipsec_sa_counters, sai, &counts); s = format (s, "\n tx/rx:[packets:%Ld bytes:%Ld]", counts.packets, diff --git a/src/vnet/ipsec/ipsec_funcs.h b/src/vnet/ipsec/ipsec_funcs.h new file mode 100644 index 00000000000..29788b3d765 --- /dev/null +++ b/src/vnet/ipsec/ipsec_funcs.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2025 Cisco Systems, Inc. + */ + +#ifndef __IPSEC_FUNCS_H__ +#define __IPSEC_FUNCS_H__ + +#include <vlib/vlib.h> +#include <vnet/ipsec/ipsec.h> + +always_inline ipsec_sa_t * +ipsec_sa_get (u32 sa_index) +{ + return (pool_elt_at_index (ipsec_main.sa_pool, sa_index)); +} + +static_always_inline ipsec_sa_outb_rt_t * +ipsec_sa_get_outb_rt_by_index (u32 sa_index) +{ + return ipsec_main.outb_sa_runtimes[sa_index]; +} + +static_always_inline ipsec_sa_inb_rt_t * +ipsec_sa_get_inb_rt_by_index (u32 sa_index) +{ + return ipsec_main.inb_sa_runtimes[sa_index]; +} + +static_always_inline ipsec_sa_outb_rt_t * +ipsec_sa_get_outb_rt (ipsec_sa_t *sa) +{ + return ipsec_sa_get_outb_rt_by_index (sa - ipsec_main.sa_pool); +} + +static_always_inline ipsec_sa_inb_rt_t * +ipsec_sa_get_inb_rt (ipsec_sa_t *sa) +{ + return ipsec_sa_get_inb_rt_by_index (sa - ipsec_main.sa_pool); +} + +#endif /* __IPSEC_FUNCS_H__ */ diff --git a/src/vnet/ipsec/ipsec_input.c b/src/vnet/ipsec/ipsec_input.c index 6a25f6c583c..d649c705bb0 100644 --- a/src/vnet/ipsec/ipsec_input.c +++ b/src/vnet/ipsec/ipsec_input.c @@ -211,6 +211,39 @@ ipsec_input_policy_match (ipsec_spd_t *spd, u32 sa, u32 da, return 0; } +always_inline uword +ip6_addr_match_range (ip6_address_t *a, ip6_address_t *la, ip6_address_t *ua) +{ + if ((memcmp (a->as_u64, la->as_u64, 2 * sizeof (u64)) >= 0) && + (memcmp (a->as_u64, ua->as_u64, 2 * sizeof (u64)) <= 0)) + return 1; + + return 0; +} + +always_inline ipsec_policy_t * +ipsec6_input_policy_match (ipsec_spd_t *spd, ip6_address_t *sa, + ip6_address_t *da, + ipsec_spd_policy_type_t policy_type) +{ + ipsec_main_t *im = &ipsec_main; + ipsec_policy_t *p; + u32 *i; + + vec_foreach (i, spd->policies[policy_type]) + { + p = pool_elt_at_index (im->policies, *i); + + if (!ip6_addr_match_range (sa, &p->raddr.start.ip6, &p->raddr.stop.ip6)) + continue; + + if (!ip6_addr_match_range (da, &p->laddr.start.ip6, &p->laddr.stop.ip6)) + continue; + return p; + } + return 0; +} + always_inline ipsec_policy_t * ipsec_input_protect_policy_match (ipsec_spd_t *spd, u32 sa, u32 da, u32 spi) { @@ -263,16 +296,6 @@ ipsec_input_protect_policy_match (ipsec_spd_t *spd, u32 sa, u32 da, u32 spi) return 0; } -always_inline uword -ip6_addr_match_range (ip6_address_t * a, ip6_address_t * la, - ip6_address_t * ua) -{ - if ((memcmp (a->as_u64, la->as_u64, 2 * sizeof (u64)) >= 0) && - (memcmp (a->as_u64, ua->as_u64, 2 * sizeof (u64)) <= 0)) - return 1; - return 0; -} - always_inline void ipsec_collect_ah_trace (vlib_buffer_t **b, vlib_node_runtime_t *node, vlib_main_t *vm, ip4_header_t *ip0, ah_header_t *ah0, @@ -295,10 +318,11 @@ ipsec_collect_ah_trace (vlib_buffer_t **b, vlib_node_runtime_t *node, always_inline void ipsec_ah_packet_process (vlib_main_t *vm, ipsec_main_t *im, ip4_header_t *ip0, - ah_header_t *ah0, u32 thread_index, ipsec_spd_t *spd0, - vlib_buffer_t **b, vlib_node_runtime_t *node, - u64 *ipsec_bypassed, u64 *ipsec_dropped, - u64 *ipsec_matched, u64 *ipsec_unprocessed, u16 *next) + ah_header_t *ah0, clib_thread_index_t thread_index, + ipsec_spd_t *spd0, vlib_buffer_t **b, + vlib_node_runtime_t *node, u64 *ipsec_bypassed, + u64 *ipsec_dropped, u64 *ipsec_matched, + u64 *ipsec_unprocessed, u16 *next) { ipsec_policy_t *p0 = NULL; @@ -429,7 +453,7 @@ ipsec_ah_packet_process (vlib_main_t *vm, ipsec_main_t *im, ip4_header_t *ip0, always_inline void ipsec_esp_packet_process (vlib_main_t *vm, ipsec_main_t *im, ip4_header_t *ip0, udp_header_t *udp0, esp_header_t *esp0, - u32 thread_index, ipsec_spd_t *spd0, + clib_thread_index_t thread_index, ipsec_spd_t *spd0, vlib_buffer_t **b, vlib_node_runtime_t *node, u64 *ipsec_bypassed, u64 *ipsec_dropped, u64 *ipsec_matched, u64 *ipsec_unprocessed, @@ -514,7 +538,7 @@ udp_or_esp: has_space0 = vlib_buffer_has_space (b[0], (clib_address_t) (esp0 + 1) - (clib_address_t) ip0); - if (PREDICT_TRUE ((p0 != NULL) & (has_space0))) + if (PREDICT_TRUE ((p0 != NULL) && (has_space0))) { *ipsec_matched += 1; @@ -740,8 +764,6 @@ VLIB_NODE_FN (ipsec4_input_node) (vlib_main_t * vm, spd0, b, node, &ipsec_bypassed, &ipsec_dropped, &ipsec_matched, &ipsec_unprocessed, next); - if (ipsec_bypassed > 0) - goto ipsec_bypassed; } } else if (PREDICT_TRUE (ip0->protocol == IP_PROTOCOL_IPSEC_ESP)) @@ -751,8 +773,6 @@ VLIB_NODE_FN (ipsec4_input_node) (vlib_main_t * vm, spd0, b, node, &ipsec_bypassed, &ipsec_dropped, &ipsec_matched, &ipsec_unprocessed, next); - if (ipsec_bypassed > 0) - goto ipsec_bypassed; } else if (ip0->protocol == IP_PROTOCOL_IPSEC_AH) { @@ -764,7 +784,6 @@ VLIB_NODE_FN (ipsec4_input_node) (vlib_main_t * vm, } else { - ipsec_bypassed: ipsec_unprocessed += 1; } n_left_from -= 1; @@ -813,6 +832,142 @@ VLIB_REGISTER_NODE (ipsec4_input_node) = { extern vlib_node_registration_t ipsec6_input_node; +always_inline void +ipsec6_esp_packet_process (vlib_main_t *vm, ipsec_main_t *im, + ip6_header_t *ip0, esp_header_t *esp0, + clib_thread_index_t thread_index, ipsec_spd_t *spd0, + vlib_buffer_t **b, vlib_node_runtime_t *node, + u64 *ipsec_bypassed, u64 *ipsec_dropped, + u64 *ipsec_matched, u64 *ipsec_unprocessed, + u32 *next) + +{ + ipsec_policy_t *p0 = NULL; + u32 pi0 = ~0; + u8 has_space0 = 0; + ipsec_policy_t *policies[1]; + ipsec_fp_5tuple_t tuples[1]; + bool ip_v6 = true; + + if (im->fp_spd_ipv6_in_is_enabled && + PREDICT_TRUE (INDEX_INVALID != spd0->fp_spd.ip6_in_lookup_hash_idx)) + ipsec_fp_in_5tuple_from_ip6_range ( + &tuples[0], &ip0->src_address, &ip0->dst_address, + clib_net_to_host_u32 (esp0->spi), IPSEC_SPD_POLICY_IP6_INBOUND_PROTECT); + + if (esp0->spi != 0) + { + if (im->fp_spd_ipv6_in_is_enabled && + PREDICT_TRUE (INDEX_INVALID != spd0->fp_spd.ip6_in_lookup_hash_idx)) + { + ipsec_fp_in_policy_match_n (&spd0->fp_spd, ip_v6, tuples, policies, + 1); + p0 = policies[0]; + } + else /* linear search if fast path is not enabled */ + { + p0 = ipsec6_input_protect_policy_match ( + spd0, &ip0->src_address, &ip0->dst_address, + clib_net_to_host_u32 (esp0->spi)); + } + has_space0 = vlib_buffer_has_space (b[0], (clib_address_t) (esp0 + 1) - + (clib_address_t) ip0); + + if (PREDICT_TRUE ((p0 != NULL) && (has_space0))) + { + *ipsec_matched += 1; + + pi0 = p0 - im->policies; + vlib_increment_combined_counter ( + &ipsec_spd_policy_counters, thread_index, pi0, 1, + clib_net_to_host_u16 (ip0->payload_length)); + + vnet_buffer (b[0])->ipsec.sad_index = p0->sa_index; + next[0] = im->esp6_decrypt_next_index; + vlib_buffer_advance (b[0], ((u8 *) esp0 - (u8 *) ip0)); + goto trace0; + } + } + + if (im->fp_spd_ipv6_in_is_enabled && + PREDICT_TRUE (INDEX_INVALID != spd0->fp_spd.ip6_in_lookup_hash_idx)) + { + tuples->action = IPSEC_SPD_POLICY_IP6_INBOUND_BYPASS; + ipsec_fp_in_policy_match_n (&spd0->fp_spd, ip_v6, tuples, policies, 1); + p0 = policies[0]; + } + else + { + p0 = + ipsec6_input_policy_match (spd0, &ip0->src_address, &ip0->dst_address, + IPSEC_SPD_POLICY_IP6_INBOUND_BYPASS); + } + + if (PREDICT_TRUE ((p0 != NULL))) + { + *ipsec_bypassed += 1; + + pi0 = p0 - im->policies; + vlib_increment_combined_counter ( + &ipsec_spd_policy_counters, thread_index, pi0, 1, + clib_net_to_host_u16 (ip0->payload_length)); + goto trace0; + } + else + { + p0 = NULL; + pi0 = ~0; + } + + if (im->fp_spd_ipv6_in_is_enabled && + PREDICT_TRUE (INDEX_INVALID != spd0->fp_spd.ip6_in_lookup_hash_idx)) + { + tuples->action = IPSEC_SPD_POLICY_IP6_INBOUND_DISCARD; + ipsec_fp_in_policy_match_n (&spd0->fp_spd, ip_v6, tuples, policies, 1); + p0 = policies[0]; + } + else + { + p0 = + ipsec6_input_policy_match (spd0, &ip0->src_address, &ip0->dst_address, + IPSEC_SPD_POLICY_IP6_INBOUND_DISCARD); + } + + if (PREDICT_TRUE ((p0 != NULL))) + { + *ipsec_dropped += 1; + + pi0 = p0 - im->policies; + vlib_increment_combined_counter ( + &ipsec_spd_policy_counters, thread_index, pi0, 1, + clib_net_to_host_u16 (ip0->payload_length)); + next[0] = IPSEC_INPUT_NEXT_DROP; + goto trace0; + } + else + { + p0 = 0; + pi0 = ~0; + } + + /* Drop by default if no match on PROTECT, BYPASS or DISCARD */ + *ipsec_unprocessed += 1; + next[0] = IPSEC_INPUT_NEXT_DROP; + +trace0: + if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) && + PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED)) + { + ipsec_input_trace_t *tr = vlib_add_trace (vm, node, b[0], sizeof (*tr)); + + tr->proto = ip0->protocol; + tr->sa_id = p0 ? p0->sa_id : ~0; + tr->spi = has_space0 ? clib_net_to_host_u32 (esp0->spi) : ~0; + tr->seq = has_space0 ? clib_net_to_host_u32 (esp0->seq) : ~0; + tr->spd = spd0->id; + tr->policy_index = pi0; + } +} VLIB_NODE_FN (ipsec6_input_node) (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -822,9 +977,6 @@ VLIB_NODE_FN (ipsec6_input_node) (vlib_main_t * vm, ipsec_main_t *im = &ipsec_main; u32 ipsec_unprocessed = 0; u32 ipsec_matched = 0; - ipsec_policy_t *policies[1]; - ipsec_fp_5tuple_t tuples[1]; - bool ip_v6 = true; from = vlib_frame_vector_args (from_frame); n_left_from = from_frame->n_vectors; @@ -843,12 +995,13 @@ VLIB_NODE_FN (ipsec6_input_node) (vlib_main_t * vm, u32 bi0, next0, pi0 = ~0; vlib_buffer_t *b0; ip6_header_t *ip0; - esp_header_t *esp0; + esp_header_t *esp0 = NULL; ip4_ipsec_config_t *c0; ipsec_spd_t *spd0; ipsec_policy_t *p0 = 0; - ah_header_t *ah0; u32 header_size = sizeof (ip0[0]); + u64 ipsec_unprocessed = 0, ipsec_matched = 0; + u64 ipsec_dropped = 0, ipsec_bypassed = 0; bi0 = to_next[0] = from[0]; from += 1; @@ -864,113 +1017,76 @@ VLIB_NODE_FN (ipsec6_input_node) (vlib_main_t * vm, spd0 = pool_elt_at_index (im->spds, c0->spd_index); ip0 = vlib_buffer_get_current (b0); + + if (ip0->protocol == IP_PROTOCOL_UDP) + { + udp_header_t *udp0 = (udp_header_t *) ((u8 *) ip0 + header_size); + + /* RFC5996 Section 2.23: "Port 4500 is reserved for + * UDP-encapsulated ESP and IKE." + * RFC5996 Section 3.1: "IKE messages use UDP ports 500 and/or + 4500" + */ + if ((clib_host_to_net_u16 (500) == udp0->dst_port) || + (clib_host_to_net_u16 (4500) == udp0->dst_port)) + esp0 = (esp_header_t *) ((u8 *) udp0 + sizeof (udp_header_t)); + } + else if (ip0->protocol == IP_PROTOCOL_IPSEC_ESP) esp0 = (esp_header_t *) ((u8 *) ip0 + header_size); - ah0 = (ah_header_t *) ((u8 *) ip0 + header_size); - if (PREDICT_TRUE (ip0->protocol == IP_PROTOCOL_IPSEC_ESP)) - { -#if 0 - clib_warning - ("packet received from %U to %U spi %u size %u spd_id %u", - format_ip6_address, &ip0->src_address, format_ip6_address, - &ip0->dst_address, clib_net_to_host_u32 (esp0->spi), - clib_net_to_host_u16 (ip0->payload_length) + header_size, - spd0->id); -#endif - if (im->fp_spd_ipv6_in_is_enabled && - PREDICT_TRUE (INDEX_INVALID != - spd0->fp_spd.ip6_in_lookup_hash_idx)) - { - ipsec_fp_in_5tuple_from_ip6_range ( - &tuples[0], &ip0->src_address, &ip0->dst_address, - clib_net_to_host_u32 (esp0->spi), - IPSEC_SPD_POLICY_IP6_INBOUND_PROTECT); - ipsec_fp_in_policy_match_n (&spd0->fp_spd, ip_v6, tuples, - policies, 1); - p0 = policies[0]; - } - else - p0 = ipsec6_input_protect_policy_match ( - spd0, &ip0->src_address, &ip0->dst_address, - clib_net_to_host_u32 (esp0->spi)); - - if (PREDICT_TRUE (p0 != 0)) - { - ipsec_matched += 1; - - pi0 = p0 - im->policies; - vlib_increment_combined_counter - (&ipsec_spd_policy_counters, - thread_index, pi0, 1, - clib_net_to_host_u16 (ip0->payload_length) + - header_size); - - vnet_buffer (b0)->ipsec.sad_index = p0->sa_index; - next0 = im->esp6_decrypt_next_index; - vlib_buffer_advance (b0, header_size); - /* TODO Add policy matching for bypass and discard policy - * type */ - goto trace0; - } - else - { - pi0 = ~0; - ipsec_unprocessed += 1; - next0 = IPSEC_INPUT_NEXT_DROP; - } - } + if (esp0 != NULL) + { + ipsec6_esp_packet_process (vm, im, ip0, esp0, thread_index, spd0, + &b0, node, &ipsec_bypassed, + &ipsec_dropped, &ipsec_matched, + &ipsec_unprocessed, &next0); + } else if (ip0->protocol == IP_PROTOCOL_IPSEC_AH) { - p0 = ipsec6_input_protect_policy_match (spd0, - &ip0->src_address, - &ip0->dst_address, - clib_net_to_host_u32 - (ah0->spi)); - - if (PREDICT_TRUE (p0 != 0)) - { - ipsec_matched += 1; - pi0 = p0 - im->policies; - vlib_increment_combined_counter - (&ipsec_spd_policy_counters, - thread_index, pi0, 1, - clib_net_to_host_u16 (ip0->payload_length) + - header_size); - - vnet_buffer (b0)->ipsec.sad_index = p0->sa_index; - next0 = im->ah6_decrypt_next_index; - goto trace0; - } - else - { - pi0 = ~0; - ipsec_unprocessed += 1; - next0 = IPSEC_INPUT_NEXT_DROP; - } + ah_header_t *ah0 = (ah_header_t *) ((u8 *) ip0 + header_size); + + p0 = ipsec6_input_protect_policy_match ( + spd0, &ip0->src_address, &ip0->dst_address, + clib_net_to_host_u32 (ah0->spi)); + + if (PREDICT_TRUE (p0 != 0)) + { + ipsec_matched += 1; + pi0 = p0 - im->policies; + vlib_increment_combined_counter ( + &ipsec_spd_policy_counters, thread_index, pi0, 1, + clib_net_to_host_u16 (ip0->payload_length) + header_size); + + vnet_buffer (b0)->ipsec.sad_index = p0->sa_index; + next0 = im->ah6_decrypt_next_index; + if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) && + PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + ipsec_input_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + + if (p0) + { + tr->sa_id = p0->sa_id; + tr->policy_type = p0->type; + } + + tr->proto = ip0->protocol; + tr->spi = clib_net_to_host_u32 (ah0->spi); + tr->spd = spd0->id; + tr->policy_index = pi0; + } + } + else + { + pi0 = ~0; + ipsec_unprocessed += 1; + next0 = IPSEC_INPUT_NEXT_DROP; + } } else { - ipsec_unprocessed += 1; - } - - trace0: - if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) && - PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) - { - ipsec_input_trace_t *tr = - vlib_add_trace (vm, node, b0, sizeof (*tr)); - - if (p0) - { - tr->sa_id = p0->sa_id; - tr->policy_type = p0->type; - } - - tr->proto = ip0->protocol; - tr->spi = clib_net_to_host_u32 (esp0->spi); - tr->seq = clib_net_to_host_u32 (esp0->seq); - tr->spd = spd0->id; - tr->policy_index = pi0; + ipsec_unprocessed += 1; } vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, diff --git a/src/vnet/ipsec/ipsec_sa.c b/src/vnet/ipsec/ipsec_sa.c index d37d89d5e3e..2347a00f052 100644 --- a/src/vnet/ipsec/ipsec_sa.c +++ b/src/vnet/ipsec/ipsec_sa.c @@ -33,8 +33,6 @@ vlib_combined_counter_main_t ipsec_sa_counters = { /* Per-SA error counters */ vlib_simple_counter_main_t ipsec_sa_err_counters[IPSEC_SA_N_ERRORS]; -ipsec_sa_t *ipsec_sa_pool; - static clib_error_t * ipsec_call_add_del_callbacks (ipsec_main_t * im, ipsec_sa_t * sa, u32 sa_index, int is_add) @@ -77,39 +75,71 @@ static void ipsec_sa_stack (ipsec_sa_t * sa) { ipsec_main_t *im = &ipsec_main; + ipsec_sa_outb_rt_t *ort = ipsec_sa_get_outb_rt (sa); dpo_id_t tmp = DPO_INVALID; tunnel_contribute_forwarding (&sa->tunnel, &tmp); if (IPSEC_PROTOCOL_AH == sa->protocol) dpo_stack_from_node ((ipsec_sa_is_set_IS_TUNNEL_V6 (sa) ? - im->ah6_encrypt_node_index : - im->ah4_encrypt_node_index), &sa->dpo, &tmp); + im->ah6_encrypt_node_index : + im->ah4_encrypt_node_index), + &ort->dpo, &tmp); else dpo_stack_from_node ((ipsec_sa_is_set_IS_TUNNEL_V6 (sa) ? - im->esp6_encrypt_node_index : - im->esp4_encrypt_node_index), &sa->dpo, &tmp); + im->esp6_encrypt_node_index : + im->esp4_encrypt_node_index), + &ort->dpo, &tmp); dpo_reset (&tmp); } void ipsec_sa_set_async_mode (ipsec_sa_t *sa, int is_enabled) { + u32 cipher_key_index, integ_key_index; + vnet_crypto_op_id_t inb_cipher_op_id, outb_cipher_op_id, integ_op_id; + u32 is_async; if (is_enabled) { - sa->crypto_key_index = sa->crypto_async_key_index; - sa->crypto_enc_op_id = sa->crypto_async_enc_op_id; - sa->crypto_dec_op_id = sa->crypto_async_dec_op_id; - sa->integ_key_index = ~0; - sa->integ_op_id = ~0; + if (sa->linked_key_index != ~0) + cipher_key_index = sa->linked_key_index; + else + cipher_key_index = sa->crypto_sync_key_index; + + outb_cipher_op_id = sa->crypto_async_enc_op_id; + inb_cipher_op_id = sa->crypto_async_dec_op_id; + integ_key_index = ~0; + integ_op_id = ~0; + is_async = 1; } else { - sa->crypto_key_index = sa->crypto_sync_key_index; - sa->crypto_enc_op_id = sa->crypto_sync_enc_op_id; - sa->crypto_dec_op_id = sa->crypto_sync_dec_op_id; - sa->integ_key_index = sa->integ_sync_key_index; - sa->integ_op_id = sa->integ_sync_op_id; + cipher_key_index = sa->crypto_sync_key_index; + outb_cipher_op_id = sa->crypto_sync_enc_op_id; + inb_cipher_op_id = sa->crypto_sync_dec_op_id; + integ_key_index = sa->integ_sync_key_index; + integ_op_id = sa->integ_sync_op_id; + is_async = 0; + } + + if (ipsec_sa_get_inb_rt (sa)) + { + ipsec_sa_inb_rt_t *irt = ipsec_sa_get_inb_rt (sa); + irt->cipher_key_index = cipher_key_index; + irt->integ_key_index = integ_key_index; + irt->cipher_op_id = inb_cipher_op_id; + irt->integ_op_id = integ_op_id; + irt->is_async = is_async; + } + + if (ipsec_sa_get_outb_rt (sa)) + { + ipsec_sa_outb_rt_t *ort = ipsec_sa_get_outb_rt (sa); + ort->cipher_key_index = cipher_key_index; + ort->integ_key_index = integ_key_index; + ort->cipher_op_id = outb_cipher_op_id; + ort->integ_op_id = integ_op_id; + ort->is_async = is_async; } } @@ -117,32 +147,11 @@ void ipsec_sa_set_crypto_alg (ipsec_sa_t * sa, ipsec_crypto_alg_t crypto_alg) { ipsec_main_t *im = &ipsec_main; + ipsec_main_crypto_alg_t *alg = im->crypto_algs + crypto_alg; sa->crypto_alg = crypto_alg; - sa->crypto_iv_size = im->crypto_algs[crypto_alg].iv_size; - sa->esp_block_align = clib_max (4, im->crypto_algs[crypto_alg].block_align); - sa->crypto_sync_enc_op_id = im->crypto_algs[crypto_alg].enc_op_id; - sa->crypto_sync_dec_op_id = im->crypto_algs[crypto_alg].dec_op_id; - sa->crypto_calg = im->crypto_algs[crypto_alg].alg; - ASSERT (sa->crypto_iv_size <= ESP_MAX_IV_SIZE); - ASSERT (sa->esp_block_align <= ESP_MAX_BLOCK_SIZE); - if (IPSEC_CRYPTO_ALG_IS_GCM (crypto_alg) || - IPSEC_CRYPTO_ALG_CTR_AEAD_OTHERS (crypto_alg)) - { - sa->integ_icv_size = im->crypto_algs[crypto_alg].icv_size; - ipsec_sa_set_IS_CTR (sa); - ipsec_sa_set_IS_AEAD (sa); - } - else if (IPSEC_CRYPTO_ALG_IS_CTR (crypto_alg)) - { - ipsec_sa_set_IS_CTR (sa); - } - else if (IPSEC_CRYPTO_ALG_IS_NULL_GMAC (crypto_alg)) - { - sa->integ_icv_size = im->crypto_algs[crypto_alg].icv_size; - ipsec_sa_set_IS_CTR (sa); - ipsec_sa_set_IS_AEAD (sa); - ipsec_sa_set_IS_NULL_GMAC (sa); - } + sa->crypto_sync_enc_op_id = alg->enc_op_id; + sa->crypto_sync_dec_op_id = alg->dec_op_id; + sa->crypto_calg = alg->alg; } void @@ -150,14 +159,12 @@ ipsec_sa_set_integ_alg (ipsec_sa_t * sa, ipsec_integ_alg_t integ_alg) { ipsec_main_t *im = &ipsec_main; sa->integ_alg = integ_alg; - sa->integ_icv_size = im->integ_algs[integ_alg].icv_size; sa->integ_sync_op_id = im->integ_algs[integ_alg].op_id; sa->integ_calg = im->integ_algs[integ_alg].alg; - ASSERT (sa->integ_icv_size <= ESP_MAX_ICV_SIZE); } -void -ipsec_sa_set_async_op_ids (ipsec_sa_t * sa) +static void +ipsec_sa_set_async_op_ids (ipsec_sa_t *sa) { if (ipsec_sa_is_set_USE_ESN (sa)) { @@ -191,12 +198,89 @@ ipsec_sa_set_async_op_ids (ipsec_sa_t * sa) #undef _ } +static void +ipsec_sa_init_runtime (ipsec_sa_t *sa) +{ + ipsec_main_t *im = &ipsec_main; + ipsec_main_crypto_alg_t *alg = im->crypto_algs + sa->crypto_alg; + u8 integ_icv_size; + + if (alg->is_aead) + integ_icv_size = im->crypto_algs[sa->crypto_alg].icv_size; + else + integ_icv_size = im->integ_algs[sa->integ_alg].icv_size; + ASSERT (integ_icv_size <= ESP_MAX_ICV_SIZE); + + if (ipsec_sa_get_inb_rt (sa)) + { + ipsec_sa_inb_rt_t *irt = ipsec_sa_get_inb_rt (sa); + irt->use_anti_replay = ipsec_sa_is_set_USE_ANTI_REPLAY (sa); + irt->use_esn = ipsec_sa_is_set_USE_ESN (sa); + irt->is_tunnel = ipsec_sa_is_set_IS_TUNNEL (sa); + irt->is_transport = + !(ipsec_sa_is_set_IS_TUNNEL (sa) || ipsec_sa_is_set_IS_TUNNEL_V6 (sa)); + irt->udp_sz = ipsec_sa_is_set_UDP_ENCAP (sa) ? sizeof (udp_header_t) : 0; + irt->is_ctr = alg->is_ctr; + irt->is_aead = alg->is_aead; + irt->is_null_gmac = alg->is_null_gmac; + irt->cipher_iv_size = im->crypto_algs[sa->crypto_alg].iv_size; + irt->integ_icv_size = integ_icv_size; + irt->salt = sa->salt; + irt->async_op_id = sa->crypto_async_dec_op_id; + ASSERT (irt->cipher_iv_size <= ESP_MAX_IV_SIZE); + } + + if (ipsec_sa_get_outb_rt (sa)) + { + ipsec_sa_outb_rt_t *ort = ipsec_sa_get_outb_rt (sa); + ort->use_anti_replay = ipsec_sa_is_set_USE_ANTI_REPLAY (sa); + ort->use_esn = ipsec_sa_is_set_USE_ESN (sa); + ort->is_ctr = alg->is_ctr; + ort->is_aead = alg->is_aead; + ort->is_null_gmac = alg->is_null_gmac; + ort->is_tunnel = ipsec_sa_is_set_IS_TUNNEL (sa); + ort->is_tunnel_v6 = ipsec_sa_is_set_IS_TUNNEL_V6 (sa); + ort->udp_encap = ipsec_sa_is_set_UDP_ENCAP (sa); + ort->esp_block_align = + clib_max (4, im->crypto_algs[sa->crypto_alg].block_align); + ort->cipher_iv_size = im->crypto_algs[sa->crypto_alg].iv_size; + ort->integ_icv_size = integ_icv_size; + ort->salt = sa->salt; + ort->spi_be = clib_host_to_net_u32 (sa->spi); + ort->tunnel_flags = sa->tunnel.t_encap_decap_flags; + ort->async_op_id = sa->crypto_async_enc_op_id; + ort->t_dscp = sa->tunnel.t_dscp; + + ASSERT (ort->cipher_iv_size <= ESP_MAX_IV_SIZE); + ASSERT (ort->esp_block_align <= ESP_MAX_BLOCK_SIZE); + } + ipsec_sa_update_runtime (sa); +} + +void +ipsec_sa_update_runtime (ipsec_sa_t *sa) +{ + if (ipsec_sa_get_inb_rt (sa)) + { + ipsec_sa_inb_rt_t *irt = ipsec_sa_get_inb_rt (sa); + irt->is_protect = ipsec_sa_is_set_IS_PROTECT (sa); + } + if (ipsec_sa_get_outb_rt (sa)) + { + ipsec_sa_outb_rt_t *ort = ipsec_sa_get_outb_rt (sa); + ort->drop_no_crypto = sa->crypto_alg == IPSEC_CRYPTO_ALG_NONE && + sa->integ_alg == IPSEC_INTEG_ALG_NONE && + !ipsec_sa_is_set_NO_ALGO_NO_DROP (sa); + } +} + int ipsec_sa_update (u32 id, u16 src_port, u16 dst_port, const tunnel_t *tun, bool is_tun) { ipsec_main_t *im = &ipsec_main; ipsec_sa_t *sa; + ipsec_sa_outb_rt_t *ort; u32 sa_index; uword *p; int rv; @@ -206,7 +290,8 @@ ipsec_sa_update (u32 id, u16 src_port, u16 dst_port, const tunnel_t *tun, return VNET_API_ERROR_NO_SUCH_ENTRY; sa = ipsec_sa_get (p[0]); - sa_index = sa - ipsec_sa_pool; + ort = ipsec_sa_get_outb_rt (sa); + sa_index = sa - im->sa_pool; if (is_tun && ipsec_sa_is_set_IS_TUNNEL (sa) && (ip_address_cmp (&tun->t_src, &sa->tunnel.t_src) != 0 || @@ -267,16 +352,16 @@ ipsec_sa_update (u32 id, u16 src_port, u16 dst_port, const tunnel_t *tun, tunnel_copy (tun, &sa->tunnel); if (!ipsec_sa_is_set_IS_INBOUND (sa)) { - dpo_reset (&sa->dpo); + dpo_reset (&ort->dpo); - sa->tunnel_flags = sa->tunnel.t_encap_decap_flags; + ort->tunnel_flags = sa->tunnel.t_encap_decap_flags; rv = tunnel_resolve (&sa->tunnel, FIB_NODE_TYPE_IPSEC_SA, sa_index); if (rv) { hash_unset (im->sa_index_by_sa_id, sa->id); - pool_put (ipsec_sa_pool, sa); + pool_put (im->sa_pool, sa); return rv; } ipsec_sa_stack (sa); @@ -285,39 +370,42 @@ ipsec_sa_update (u32 id, u16 src_port, u16 dst_port, const tunnel_t *tun, { tunnel_build_v6_hdr (&sa->tunnel, (ipsec_sa_is_set_UDP_ENCAP (sa) ? - IP_PROTOCOL_UDP : - IP_PROTOCOL_IPSEC_ESP), - &sa->ip6_hdr); + IP_PROTOCOL_UDP : + IP_PROTOCOL_IPSEC_ESP), + &ort->ip6_hdr); } else { tunnel_build_v4_hdr (&sa->tunnel, (ipsec_sa_is_set_UDP_ENCAP (sa) ? - IP_PROTOCOL_UDP : - IP_PROTOCOL_IPSEC_ESP), - &sa->ip4_hdr); + IP_PROTOCOL_UDP : + IP_PROTOCOL_IPSEC_ESP), + &ort->ip4_hdr); } } } if (ipsec_sa_is_set_UDP_ENCAP (sa)) { - if (dst_port != IPSEC_UDP_PORT_NONE && - dst_port != clib_net_to_host_u16 (sa->udp_hdr.dst_port)) + if (dst_port != IPSEC_UDP_PORT_NONE && dst_port != sa->udp_dst_port) { if (ipsec_sa_is_set_IS_INBOUND (sa)) { - ipsec_unregister_udp_port ( - clib_net_to_host_u16 (sa->udp_hdr.dst_port), - !ipsec_sa_is_set_IS_TUNNEL_V6 (sa)); + ipsec_unregister_udp_port (sa->udp_dst_port, + !ipsec_sa_is_set_IS_TUNNEL_V6 (sa)); ipsec_register_udp_port (dst_port, !ipsec_sa_is_set_IS_TUNNEL_V6 (sa)); } - sa->udp_hdr.dst_port = clib_host_to_net_u16 (dst_port); + sa->udp_dst_port = dst_port; + if (ort) + ort->udp_hdr.dst_port = clib_host_to_net_u16 (dst_port); + } + if (src_port != IPSEC_UDP_PORT_NONE && src_port != (sa->udp_src_port)) + { + sa->udp_src_port = src_port; + if (ort) + ort->udp_hdr.src_port = clib_host_to_net_u16 (src_port); } - if (src_port != IPSEC_UDP_PORT_NONE && - src_port != clib_net_to_host_u16 (sa->udp_hdr.src_port)) - sa->udp_hdr.src_port = clib_host_to_net_u16 (src_port); } return (0); } @@ -332,9 +420,13 @@ ipsec_sa_add_and_lock (u32 id, u32 spi, ipsec_protocol_t proto, { vlib_main_t *vm = vlib_get_main (); ipsec_main_t *im = &ipsec_main; + ipsec_main_crypto_alg_t *alg = im->crypto_algs + crypto_alg; + ipsec_sa_inb_rt_t *irt; + ipsec_sa_outb_rt_t *ort; clib_error_t *err; ipsec_sa_t *sa; - u32 sa_index; + u32 sa_index, irt_sz; + clib_thread_index_t thread_index = (vlib_num_workers ()) ? ~0 : 0; u64 rand[2]; uword *p; int rv; @@ -346,13 +438,42 @@ ipsec_sa_add_and_lock (u32 id, u32 spi, ipsec_protocol_t proto, if (getrandom (rand, sizeof (rand), 0) != sizeof (rand)) return VNET_API_ERROR_INIT_FAILED; - pool_get_aligned_zero (ipsec_sa_pool, sa, CLIB_CACHE_LINE_BYTES); + pool_get_aligned_zero (im->sa_pool, sa, CLIB_CACHE_LINE_BYTES); + sa_index = sa - im->sa_pool; + sa->flags = flags; + + if (ipsec_sa_is_set_USE_ANTI_REPLAY (sa) && anti_replay_window_size > 64) + /* window size rounded up to next power of 2 */ + anti_replay_window_size = 1 << max_log2 (anti_replay_window_size); + else + anti_replay_window_size = 64; + + vec_validate (im->inb_sa_runtimes, sa_index); + vec_validate (im->outb_sa_runtimes, sa_index); + + irt_sz = sizeof (ipsec_sa_inb_rt_t); + irt_sz += anti_replay_window_size / 8; + irt_sz = round_pow2 (irt_sz, CLIB_CACHE_LINE_BYTES); + + irt = clib_mem_alloc_aligned (irt_sz, alignof (ipsec_sa_inb_rt_t)); + ort = clib_mem_alloc_aligned (sizeof (ipsec_sa_outb_rt_t), + alignof (ipsec_sa_outb_rt_t)); + im->inb_sa_runtimes[sa_index] = irt; + im->outb_sa_runtimes[sa_index] = ort; + + *irt = (ipsec_sa_inb_rt_t){ + .thread_index = thread_index, + .anti_replay_window_size = anti_replay_window_size, + }; - clib_pcg64i_srandom_r (&sa->iv_prng, rand[0], rand[1]); + *ort = (ipsec_sa_outb_rt_t){ + .thread_index = thread_index, + }; + + clib_pcg64i_srandom_r (&ort->iv_prng, rand[0], rand[1]); fib_node_init (&sa->node, FIB_NODE_TYPE_IPSEC_SA); fib_node_lock (&sa->node); - sa_index = sa - ipsec_sa_pool; vlib_validate_combined_counter (&ipsec_sa_counters, sa_index); vlib_zero_combined_counter (&ipsec_sa_counters, sa_index); @@ -367,9 +488,8 @@ ipsec_sa_add_and_lock (u32 id, u32 spi, ipsec_protocol_t proto, sa->spi = spi; sa->stat_index = sa_index; sa->protocol = proto; - sa->flags = flags; sa->salt = salt; - sa->thread_index = (vlib_num_workers ()) ? ~0 : 0; + if (integ_alg != IPSEC_INTEG_ALG_NONE) { ipsec_sa_set_integ_alg (sa, integ_alg); @@ -378,9 +498,6 @@ ipsec_sa_add_and_lock (u32 id, u32 spi, ipsec_protocol_t proto, ipsec_sa_set_crypto_alg (sa, crypto_alg); ipsec_sa_set_async_op_ids (sa); - if (ipsec_sa_is_set_USE_ANTI_REPLAY (sa) && anti_replay_window_size > 64) - ipsec_sa_set_ANTI_REPLAY_HUGE (sa); - clib_memcpy (&sa->crypto_key, ck, sizeof (sa->crypto_key)); if (crypto_alg != IPSEC_CRYPTO_ALG_NONE) @@ -389,7 +506,7 @@ ipsec_sa_add_and_lock (u32 id, u32 spi, ipsec_protocol_t proto, vm, im->crypto_algs[crypto_alg].alg, (u8 *) ck->data, ck->len); if (~0 == sa->crypto_sync_key_index) { - pool_put (ipsec_sa_pool, sa); + pool_put (im->sa_pool, sa); return VNET_API_ERROR_KEY_LENGTH; } } @@ -400,17 +517,17 @@ ipsec_sa_add_and_lock (u32 id, u32 spi, ipsec_protocol_t proto, vm, im->integ_algs[integ_alg].alg, (u8 *) ik->data, ik->len); if (~0 == sa->integ_sync_key_index) { - pool_put (ipsec_sa_pool, sa); + pool_put (im->sa_pool, sa); return VNET_API_ERROR_KEY_LENGTH; } } - if (sa->crypto_async_enc_op_id && !ipsec_sa_is_set_IS_AEAD (sa)) - sa->crypto_async_key_index = + if (sa->crypto_async_enc_op_id && alg->is_aead == 0) + sa->linked_key_index = vnet_crypto_key_add_linked (vm, sa->crypto_sync_key_index, sa->integ_sync_key_index); // AES-CBC & HMAC else - sa->crypto_async_key_index = sa->crypto_sync_key_index; + sa->linked_key_index = ~0; if (im->async_mode) { @@ -429,14 +546,14 @@ ipsec_sa_add_and_lock (u32 id, u32 spi, ipsec_protocol_t proto, if (err) { clib_warning ("%v", err->what); - pool_put (ipsec_sa_pool, sa); + pool_put (im->sa_pool, sa); return VNET_API_ERROR_UNIMPLEMENTED; } err = ipsec_call_add_del_callbacks (im, sa, sa_index, 1); if (err) { - pool_put (ipsec_sa_pool, sa); + pool_put (im->sa_pool, sa); return VNET_API_ERROR_SYSCALL_ERROR_1; } @@ -446,13 +563,12 @@ ipsec_sa_add_and_lock (u32 id, u32 spi, ipsec_protocol_t proto, if (ipsec_sa_is_set_IS_TUNNEL (sa) && !ipsec_sa_is_set_IS_INBOUND (sa)) { - sa->tunnel_flags = sa->tunnel.t_encap_decap_flags; rv = tunnel_resolve (&sa->tunnel, FIB_NODE_TYPE_IPSEC_SA, sa_index); if (rv) { - pool_put (ipsec_sa_pool, sa); + pool_put (im->sa_pool, sa); return rv; } ipsec_sa_stack (sa); @@ -464,7 +580,7 @@ ipsec_sa_add_and_lock (u32 id, u32 spi, ipsec_protocol_t proto, (ipsec_sa_is_set_UDP_ENCAP (sa) ? IP_PROTOCOL_UDP : IP_PROTOCOL_IPSEC_ESP), - &sa->ip6_hdr); + &ort->ip6_hdr); } else { @@ -472,44 +588,38 @@ ipsec_sa_add_and_lock (u32 id, u32 spi, ipsec_protocol_t proto, (ipsec_sa_is_set_UDP_ENCAP (sa) ? IP_PROTOCOL_UDP : IP_PROTOCOL_IPSEC_ESP), - &sa->ip4_hdr); + &ort->ip4_hdr); } } if (ipsec_sa_is_set_UDP_ENCAP (sa)) { if (dst_port == IPSEC_UDP_PORT_NONE) - sa->udp_hdr.dst_port = clib_host_to_net_u16 (UDP_DST_PORT_ipsec); - else - sa->udp_hdr.dst_port = clib_host_to_net_u16 (dst_port); - + dst_port = UDP_DST_PORT_ipsec; if (src_port == IPSEC_UDP_PORT_NONE) - sa->udp_hdr.src_port = clib_host_to_net_u16 (UDP_DST_PORT_ipsec); - else - sa->udp_hdr.src_port = clib_host_to_net_u16 (src_port); + src_port = UDP_DST_PORT_ipsec; + sa->udp_dst_port = dst_port; + sa->udp_src_port = src_port; + if (ort) + { + ort->udp_hdr.src_port = clib_host_to_net_u16 (src_port); + ort->udp_hdr.dst_port = clib_host_to_net_u16 (dst_port); + } if (ipsec_sa_is_set_IS_INBOUND (sa)) - ipsec_register_udp_port (clib_host_to_net_u16 (sa->udp_hdr.dst_port), - !ipsec_sa_is_set_IS_TUNNEL_V6 (sa)); + ipsec_register_udp_port (dst_port, !ipsec_sa_is_set_IS_TUNNEL_V6 (sa)); } - /* window size rounded up to next power of 2 */ - if (ipsec_sa_is_set_ANTI_REPLAY_HUGE (sa)) - { - anti_replay_window_size = 1 << max_log2 (anti_replay_window_size); - sa->replay_window_huge = - clib_bitmap_set_region (0, 0, 1, anti_replay_window_size); - } - else - { - sa->replay_window = ~0; - } + for (u32 i = 0; i < anti_replay_window_size / uword_bits; i++) + irt->replay_window[i] = ~0ULL; hash_set (im->sa_index_by_sa_id, sa->id, sa_index); if (sa_out_index) *sa_out_index = sa_index; + ipsec_sa_init_runtime (sa); + return (0); } @@ -519,33 +629,37 @@ ipsec_sa_del (ipsec_sa_t * sa) vlib_main_t *vm = vlib_get_main (); ipsec_main_t *im = &ipsec_main; u32 sa_index; + ipsec_sa_inb_rt_t *irt = ipsec_sa_get_inb_rt (sa); + ipsec_sa_outb_rt_t *ort = ipsec_sa_get_outb_rt (sa); - sa_index = sa - ipsec_sa_pool; + sa_index = sa - im->sa_pool; hash_unset (im->sa_index_by_sa_id, sa->id); tunnel_unresolve (&sa->tunnel); /* no recovery possible when deleting an SA */ (void) ipsec_call_add_del_callbacks (im, sa, sa_index, 0); - if (ipsec_sa_is_set_IS_ASYNC (sa)) - { - if (!ipsec_sa_is_set_IS_AEAD (sa)) - vnet_crypto_key_del (vm, sa->crypto_async_key_index); - } + if (sa->linked_key_index != ~0) + vnet_crypto_key_del (vm, sa->linked_key_index); if (ipsec_sa_is_set_UDP_ENCAP (sa) && ipsec_sa_is_set_IS_INBOUND (sa)) - ipsec_unregister_udp_port (clib_net_to_host_u16 (sa->udp_hdr.dst_port), + ipsec_unregister_udp_port (sa->udp_dst_port, !ipsec_sa_is_set_IS_TUNNEL_V6 (sa)); if (ipsec_sa_is_set_IS_TUNNEL (sa) && !ipsec_sa_is_set_IS_INBOUND (sa)) - dpo_reset (&sa->dpo); + dpo_reset (&ort->dpo); if (sa->crypto_alg != IPSEC_CRYPTO_ALG_NONE) vnet_crypto_key_del (vm, sa->crypto_sync_key_index); if (sa->integ_alg != IPSEC_INTEG_ALG_NONE) vnet_crypto_key_del (vm, sa->integ_sync_key_index); - if (ipsec_sa_is_set_ANTI_REPLAY_HUGE (sa)) - clib_bitmap_free (sa->replay_window_huge); - pool_put (ipsec_sa_pool, sa); + foreach_pointer (p, irt, ort) + if (p) + clib_mem_free (p); + + im->inb_sa_runtimes[sa_index] = 0; + im->outb_sa_runtimes[sa_index] = 0; + + pool_put (im->sa_pool, sa); } int @@ -554,23 +668,33 @@ ipsec_sa_bind (u32 id, u32 worker, bool bind) ipsec_main_t *im = &ipsec_main; uword *p; ipsec_sa_t *sa; + ipsec_sa_inb_rt_t *irt; + ipsec_sa_outb_rt_t *ort; + clib_thread_index_t thread_index; p = hash_get (im->sa_index_by_sa_id, id); if (!p) return VNET_API_ERROR_INVALID_VALUE; sa = ipsec_sa_get (p[0]); + irt = ipsec_sa_get_inb_rt (sa); + ort = ipsec_sa_get_outb_rt (sa); if (!bind) { - sa->thread_index = ~0; - return 0; + thread_index = ~0; + goto done; } if (worker >= vlib_num_workers ()) return VNET_API_ERROR_INVALID_WORKER; - sa->thread_index = vlib_get_worker_thread_index (worker); + thread_index = vlib_get_worker_thread_index (worker); +done: + if (irt) + irt->thread_index = thread_index; + if (ort) + ort->thread_index = thread_index; return 0; } @@ -646,9 +770,10 @@ ipsec_sa_clear (index_t sai) void ipsec_sa_walk (ipsec_sa_walk_cb_t cb, void *ctx) { + ipsec_main_t *im = &ipsec_main; ipsec_sa_t *sa; - pool_foreach (sa, ipsec_sa_pool) + pool_foreach (sa, im->sa_pool) { if (WALK_CONTINUE != cb (sa, ctx)) break; diff --git a/src/vnet/ipsec/ipsec_sa.h b/src/vnet/ipsec/ipsec_sa.h index 640d9288a42..330043809ae 100644 --- a/src/vnet/ipsec/ipsec_sa.h +++ b/src/vnet/ipsec/ipsec_sa.h @@ -52,24 +52,6 @@ typedef enum IPSEC_CRYPTO_N_ALG, } __clib_packed ipsec_crypto_alg_t; -#define IPSEC_CRYPTO_ALG_IS_NULL_GMAC(_alg) \ - ((_alg == IPSEC_CRYPTO_ALG_AES_NULL_GMAC_128) || \ - (_alg == IPSEC_CRYPTO_ALG_AES_NULL_GMAC_192) || \ - (_alg == IPSEC_CRYPTO_ALG_AES_NULL_GMAC_256)) - -#define IPSEC_CRYPTO_ALG_IS_GCM(_alg) \ - (((_alg == IPSEC_CRYPTO_ALG_AES_GCM_128) || \ - (_alg == IPSEC_CRYPTO_ALG_AES_GCM_192) || \ - (_alg == IPSEC_CRYPTO_ALG_AES_GCM_256))) - -#define IPSEC_CRYPTO_ALG_IS_CTR(_alg) \ - (((_alg == IPSEC_CRYPTO_ALG_AES_CTR_128) || \ - (_alg == IPSEC_CRYPTO_ALG_AES_CTR_192) || \ - (_alg == IPSEC_CRYPTO_ALG_AES_CTR_256))) - -#define IPSEC_CRYPTO_ALG_CTR_AEAD_OTHERS(_alg) \ - (_alg == IPSEC_CRYPTO_ALG_CHACHA20_POLY1305) - #define foreach_ipsec_integ_alg \ _ (0, NONE, "none") \ _ (1, MD5_96, "md5-96") /* RFC2403 */ \ @@ -117,12 +99,8 @@ typedef struct ipsec_key_t_ _ (16, UDP_ENCAP, "udp-encap") \ _ (32, IS_PROTECT, "Protect") \ _ (64, IS_INBOUND, "inbound") \ - _ (128, IS_AEAD, "aead") \ - _ (256, IS_CTR, "ctr") \ _ (512, IS_ASYNC, "async") \ - _ (1024, NO_ALGO_NO_DROP, "no-algo-no-drop") \ - _ (2048, IS_NULL_GMAC, "null-gmac") \ - _ (4096, ANTI_REPLAY_HUGE, "anti-replay-huge") + _ (1024, NO_ALGO_NO_DROP, "no-algo-no-drop") typedef enum ipsec_sad_flags_t_ { @@ -165,51 +143,82 @@ typedef enum typedef struct { CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + u16 is_aead : 1; + u16 is_ctr : 1; + u16 is_null_gmac : 1; + u16 use_esn : 1; + u16 use_anti_replay : 1; + u16 is_protect : 1; + u16 is_tunnel : 1; + u16 is_transport : 1; + u16 is_async : 1; + u16 cipher_op_id; + u16 integ_op_id; + u8 cipher_iv_size; + u8 integ_icv_size; + u8 udp_sz; + clib_thread_index_t thread_index; + u32 salt; + u64 seq64; + u16 async_op_id; + vnet_crypto_key_index_t cipher_key_index; + vnet_crypto_key_index_t integ_key_index; + u32 anti_replay_window_size; + uword replay_window[]; +} ipsec_sa_inb_rt_t; +typedef struct +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + u16 is_aead : 1; + u16 is_ctr : 1; + u16 is_null_gmac : 1; + u16 is_tunnel : 1; + u16 is_tunnel_v6 : 1; + u16 udp_encap : 1; + u16 use_esn : 1; + u16 use_anti_replay : 1; + u16 drop_no_crypto : 1; + u16 is_async : 1; + u16 cipher_op_id; + u16 integ_op_id; + u8 cipher_iv_size; + u8 esp_block_align; + u8 integ_icv_size; + ip_dscp_t t_dscp; + tunnel_encap_decap_flags_t tunnel_flags; + clib_thread_index_t thread_index; + u16 async_op_id; + u32 salt; + u32 spi_be; + u64 seq64; + dpo_id_t dpo; clib_pcg64i_random_t iv_prng; - + vnet_crypto_key_index_t cipher_key_index; + vnet_crypto_key_index_t integ_key_index; union { - u64 replay_window; - clib_bitmap_t *replay_window_huge; + ip4_header_t ip4_hdr; + ip6_header_t ip6_hdr; }; - dpo_id_t dpo; + udp_header_t udp_hdr; +} ipsec_sa_outb_rt_t; - vnet_crypto_key_index_t crypto_key_index; - vnet_crypto_key_index_t integ_key_index; +typedef struct +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); u32 spi; - u32 seq; - u32 seq_hi; - u16 crypto_enc_op_id; - u16 crypto_dec_op_id; - u16 integ_op_id; ipsec_sa_flags_t flags; - u16 thread_index; - - u16 integ_icv_size : 6; - u16 crypto_iv_size : 5; - u16 esp_block_align : 5; - - CLIB_CACHE_LINE_ALIGN_MARK (cacheline1); - union - { - ip4_header_t ip4_hdr; - ip6_header_t ip6_hdr; - }; - udp_header_t udp_hdr; + u16 udp_src_port; + u16 udp_dst_port; /* Salt used in CTR modes (incl. GCM) - stored in network byte order */ u32 salt; ipsec_protocol_t protocol; - tunnel_encap_decap_flags_t tunnel_flags; - u8 __pad[2]; - - /* data accessed by dataplane code should be above this comment */ - CLIB_CACHE_LINE_ALIGN_MARK (cacheline2); /* Elements with u64 size multiples */ tunnel_t tunnel; @@ -222,7 +231,7 @@ typedef struct vnet_crypto_alg_t crypto_calg; u32 crypto_sync_key_index; u32 integ_sync_key_index; - u32 crypto_async_key_index; + u32 linked_key_index; /* elements with u16 size */ u16 crypto_sync_enc_op_id; @@ -243,13 +252,6 @@ STATIC_ASSERT (VNET_CRYPTO_N_OP_IDS < (1 << 16), "crypto ops overflow"); STATIC_ASSERT (ESP_MAX_ICV_SIZE < (1 << 6), "integer icv overflow"); STATIC_ASSERT (ESP_MAX_IV_SIZE < (1 << 5), "esp iv overflow"); STATIC_ASSERT (ESP_MAX_BLOCK_SIZE < (1 << 5), "esp alignment overflow"); -STATIC_ASSERT_OFFSET_OF (ipsec_sa_t, cacheline1, CLIB_CACHE_LINE_BYTES); -STATIC_ASSERT_OFFSET_OF (ipsec_sa_t, cacheline2, 2 * CLIB_CACHE_LINE_BYTES); - -/** - * Pool of IPSec SAs - */ -extern ipsec_sa_t *ipsec_sa_pool; /* * Ensure that the IPsec data does not overlap with the IP data in @@ -291,6 +293,7 @@ extern void ipsec_mk_key (ipsec_key_t *key, const u8 *data, u8 len); extern int ipsec_sa_update (u32 id, u16 src_port, u16 dst_port, const tunnel_t *tun, bool is_tun); +extern void ipsec_sa_update_runtime (ipsec_sa_t *sa); extern int ipsec_sa_add_and_lock ( u32 id, u32 spi, ipsec_protocol_t proto, ipsec_crypto_alg_t crypto_alg, const ipsec_key_t *ck, ipsec_integ_alg_t integ_alg, const ipsec_key_t *ik, @@ -323,85 +326,35 @@ extern uword unformat_ipsec_key (unformat_input_t *input, va_list *args); #define IPSEC_UDP_PORT_NONE ((u16) ~0) -/* - * Anti Replay definitions - */ - -#define IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE(_sa) \ - (u32) (PREDICT_FALSE (ipsec_sa_is_set_ANTI_REPLAY_HUGE (_sa)) ? \ - clib_bitmap_bytes (_sa->replay_window_huge) * 8 : \ - BITS (_sa->replay_window)) - -#define IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE_KNOWN_WIN(_sa, _is_huge) \ - (u32) (_is_huge ? clib_bitmap_bytes (_sa->replay_window_huge) * 8 : \ - BITS (_sa->replay_window)) - -#define IPSEC_SA_ANTI_REPLAY_WINDOW_N_SEEN(_sa) \ - (u64) (PREDICT_FALSE (ipsec_sa_is_set_ANTI_REPLAY_HUGE (_sa)) ? \ - clib_bitmap_count_set_bits (_sa->replay_window_huge) : \ - count_set_bits (_sa->replay_window)) - -#define IPSEC_SA_ANTI_REPLAY_WINDOW_N_SEEN_KNOWN_WIN(_sa, _is_huge) \ - (u64) (_is_huge ? clib_bitmap_count_set_bits (_sa->replay_window_huge) : \ - count_set_bits (_sa->replay_window)) - -#define IPSEC_SA_ANTI_REPLAY_WINDOW_MAX_INDEX(_sa) \ - (u32) (IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE (_sa) - 1) - -#define IPSEC_SA_ANTI_REPLAY_WINDOW_MAX_INDEX_KNOWN_WIN(_sa, _is_huge) \ - (u32) (IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE (_sa, _is_huge) - 1) - -/* - * sequence number less than the lower bound are outside of the window - * From RFC4303 Appendix A: - * Bl = Tl - W + 1 - */ -#define IPSEC_SA_ANTI_REPLAY_WINDOW_LOWER_BOUND(_sa) \ - (u32) (_sa->seq - IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE (_sa) + 1) - -#define IPSEC_SA_ANTI_REPLAY_WINDOW_LOWER_BOUND_KNOWN_WIN(_sa, _is_huge) \ - (u32) (_sa->seq - \ - IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE_KNOWN_WIN (_sa, _is_huge) + 1) - always_inline u64 -ipsec_sa_anti_replay_get_64b_window (const ipsec_sa_t *sa) +ipsec_sa_anti_replay_get_64b_window (const ipsec_sa_inb_rt_t *irt) { - if (!ipsec_sa_is_set_ANTI_REPLAY_HUGE (sa)) - return sa->replay_window; - u64 w; - u32 window_size = IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE (sa); - u32 tl_win_index = sa->seq & (window_size - 1); + u32 window_size = irt->anti_replay_window_size; + u32 tl_win_index = irt->seq64 & (window_size - 1); + uword *bmp = (uword *) irt->replay_window; if (PREDICT_TRUE (tl_win_index >= 63)) - return clib_bitmap_get_multiple (sa->replay_window_huge, tl_win_index - 63, - 64); + return uword_bitmap_get_multiple (bmp, tl_win_index - 63, 64); - w = clib_bitmap_get_multiple_no_check (sa->replay_window_huge, 0, - tl_win_index + 1) + w = uword_bitmap_get_multiple_no_check (bmp, 0, tl_win_index + 1) << (63 - tl_win_index); - w |= clib_bitmap_get_multiple_no_check (sa->replay_window_huge, - window_size - 63 + tl_win_index, - 63 - tl_win_index); + w |= uword_bitmap_get_multiple_no_check ( + bmp, window_size - 63 + tl_win_index, 63 - tl_win_index); return w; } always_inline int -ipsec_sa_anti_replay_check (const ipsec_sa_t *sa, u32 seq, bool ar_huge) +ipsec_sa_anti_replay_check (const ipsec_sa_inb_rt_t *irt, u32 window_size, + u32 seq) { - u32 window_size = IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE_KNOWN_WIN (sa, ar_huge); - /* we assume that the packet is in the window. * if the packet falls left (sa->seq - seq >= window size), * the result is wrong */ - if (ar_huge) - return clib_bitmap_get (sa->replay_window_huge, seq & (window_size - 1)); - else - return (sa->replay_window >> (window_size + seq - sa->seq - 1)) & 1; - - return 0; + return uword_bitmap_is_bit_set ((uword *) irt->replay_window, + seq & (window_size - 1)); } /* @@ -419,36 +372,37 @@ ipsec_sa_anti_replay_check (const ipsec_sa_t *sa, u32 seq, bool ar_huge) * the high sequence number is set. */ always_inline int -ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq, +ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_inb_rt_t *irt, u32 seq, u32 hi_seq_used, bool post_decrypt, - u32 *hi_seq_req, bool ar_huge) + u32 *hi_seq_req) { ASSERT ((post_decrypt == false) == (hi_seq_req != 0)); - u32 window_size = IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE_KNOWN_WIN (sa, ar_huge); - u32 window_lower_bound = - IPSEC_SA_ANTI_REPLAY_WINDOW_LOWER_BOUND_KNOWN_WIN (sa, ar_huge); + u32 window_size = irt->anti_replay_window_size; + u32 exp_lo = irt->seq64; + u32 exp_hi = irt->seq64 >> 32; + u32 window_lower_bound = exp_lo - window_size + 1; - if (!ipsec_sa_is_set_USE_ESN (sa)) + if (!irt->use_esn) { if (hi_seq_req) /* no ESN, therefore the hi-seq is always 0 */ *hi_seq_req = 0; - if (!ipsec_sa_is_set_USE_ANTI_REPLAY (sa)) + if (!irt->use_anti_replay) return 0; - if (PREDICT_TRUE (seq > sa->seq)) + if (PREDICT_TRUE (seq > exp_lo)) return 0; /* does the packet fall out on the left of the window */ - if (sa->seq >= seq + window_size) + if (exp_lo >= seq + window_size) return 1; - return ipsec_sa_anti_replay_check (sa, seq, ar_huge); + return ipsec_sa_anti_replay_check (irt, window_size, seq); } - if (!ipsec_sa_is_set_USE_ANTI_REPLAY (sa)) + if (!irt->use_anti_replay) { /* there's no AR configured for this SA, but in order * to know whether a packet has wrapped the hi ESN we need @@ -463,20 +417,20 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq, */ if (hi_seq_req) { - if (seq >= sa->seq) + if (seq >= exp_lo) /* The packet's sequence number is larger that the SA's. * that can't be a warp - unless we lost more than * 2^32 packets ... how could we know? */ - *hi_seq_req = sa->seq_hi; + *hi_seq_req = exp_hi; else { /* The packet's SN is less than the SAs, so either the SN has * wrapped or the SN is just old. */ - if (sa->seq - seq > (1 << 30)) + if (exp_lo - seq > (1 << 30)) /* It's really really really old => it wrapped */ - *hi_seq_req = sa->seq_hi + 1; + *hi_seq_req = exp_hi + 1; else - *hi_seq_req = sa->seq_hi; + *hi_seq_req = exp_hi; } } /* @@ -486,7 +440,7 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq, return 0; } - if (PREDICT_TRUE (window_size > 0 && sa->seq >= window_size - 1)) + if (PREDICT_TRUE (exp_lo >= window_size - 1)) { /* * the last sequence number VPP received is more than one @@ -503,7 +457,7 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq, */ if (post_decrypt) { - if (hi_seq_used == sa->seq_hi) + if (hi_seq_used == exp_hi) /* the high sequence number used to succesfully decrypt this * packet is the same as the last-sequence number of the SA. * that means this packet did not cause a wrap. @@ -520,7 +474,7 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq, /* pre-decrypt it might be the packet that causes a wrap, we * need to decrypt it to find out */ if (hi_seq_req) - *hi_seq_req = sa->seq_hi + 1; + *hi_seq_req = exp_hi + 1; return 0; } } @@ -531,13 +485,13 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq, * end of the window. */ if (hi_seq_req) - *hi_seq_req = sa->seq_hi; - if (seq <= sa->seq) + *hi_seq_req = exp_hi; + if (seq <= exp_lo) /* * The received seq number is within bounds of the window * check if it's a duplicate */ - return ipsec_sa_anti_replay_check (sa, seq, ar_huge); + return ipsec_sa_anti_replay_check (irt, window_size, seq); else /* * The received sequence number is greater than the window @@ -562,15 +516,15 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq, /* * the sequence number is less than the lower bound. */ - if (seq <= sa->seq) + if (seq <= exp_lo) { /* * the packet is within the window upper bound. * check for duplicates. */ if (hi_seq_req) - *hi_seq_req = sa->seq_hi; - return ipsec_sa_anti_replay_check (sa, seq, ar_huge); + *hi_seq_req = exp_hi; + return ipsec_sa_anti_replay_check (irt, window_size, seq); } else { @@ -584,7 +538,7 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq, * we've lost close to 2^32 packets. */ if (hi_seq_req) - *hi_seq_req = sa->seq_hi; + *hi_seq_req = exp_hi; return 0; } } @@ -597,8 +551,8 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq, * received packet, the SA has moved on to a higher sequence number. */ if (hi_seq_req) - *hi_seq_req = sa->seq_hi - 1; - return ipsec_sa_anti_replay_check (sa, seq, ar_huge); + *hi_seq_req = exp_hi - 1; + return ipsec_sa_anti_replay_check (irt, window_size, seq); } } @@ -608,120 +562,97 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq, } always_inline u32 -ipsec_sa_anti_replay_window_shift (ipsec_sa_t *sa, u32 inc, bool ar_huge) +ipsec_sa_anti_replay_window_shift (ipsec_sa_inb_rt_t *irt, u32 window_size, + u32 inc) { + uword *window = irt->replay_window; + u32 window_mask = window_size - 1; u32 n_lost = 0; u32 seen = 0; - u32 window_size = IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE_KNOWN_WIN (sa, ar_huge); if (inc < window_size) { - if (ar_huge) - { - /* the number of packets we saw in this section of the window */ - clib_bitmap_t *window = sa->replay_window_huge; - u32 window_lower_bound = (sa->seq + 1) & (window_size - 1); - u32 window_next_lower_bound = - (window_lower_bound + inc) & (window_size - 1); + /* the number of packets we saw in this section of the window */ + u32 window_lower_bound = (irt->seq64 + 1) & window_mask; + u32 window_next_lower_bound = (window_lower_bound + inc) & window_mask; - uword i_block, i_word_start, i_word_end, full_words; - uword n_blocks = window_size >> log2_uword_bits; - uword mask; + uword i_block, i_word_start, i_word_end, full_words; + uword n_blocks = window_size >> log2_uword_bits; + uword mask; - i_block = window_lower_bound >> log2_uword_bits; + i_block = window_lower_bound >> log2_uword_bits; - i_word_start = window_lower_bound & (uword_bits - 1); - i_word_end = window_next_lower_bound & (uword_bits - 1); + i_word_start = window_lower_bound & (uword_bits - 1); + i_word_end = window_next_lower_bound & (uword_bits - 1); - /* We stay in the same word */ - if (i_word_start + inc <= uword_bits) - { - mask = pow2_mask (inc) << i_word_start; - seen += count_set_bits (window[i_block] & mask); - window[i_block] &= ~mask; - } - else + /* We stay in the same word */ + if (i_word_start + inc <= uword_bits) + { + mask = pow2_mask (inc) << i_word_start; + seen += count_set_bits (window[i_block] & mask); + window[i_block] &= ~mask; + } + else + { + full_words = + (inc + i_word_start - uword_bits - i_word_end) >> log2_uword_bits; + + /* count set bits in the first word */ + mask = (uword) ~0 << i_word_start; + seen += count_set_bits (window[i_block] & mask); + window[i_block] &= ~mask; + i_block = (i_block + 1) & (n_blocks - 1); + + /* count set bits in the next full words */ + /* even if the last word need to be fully counted, we treat it + * apart */ + while (full_words >= 8) { - full_words = (inc + i_word_start - uword_bits - i_word_end) >> - log2_uword_bits; - - /* count set bits in the first word */ - mask = (uword) ~0 << i_word_start; - seen += count_set_bits (window[i_block] & mask); - window[i_block] &= ~mask; - i_block = (i_block + 1) & (n_blocks - 1); - - /* count set bits in the next full words */ - /* even if the last word need to be fully counted, we treat it - * apart */ - while (full_words >= 8) - { - if (full_words >= 16) - { - /* prefect the next 8 blocks (64 bytes) */ - clib_prefetch_store ( - &window[(i_block + 8) & (n_blocks - 1)]); - } - - seen += count_set_bits (window[i_block]); - seen += - count_set_bits (window[(i_block + 1) & (n_blocks - 1)]); - seen += - count_set_bits (window[(i_block + 2) & (n_blocks - 1)]); - seen += - count_set_bits (window[(i_block + 3) & (n_blocks - 1)]); - seen += - count_set_bits (window[(i_block + 4) & (n_blocks - 1)]); - seen += - count_set_bits (window[(i_block + 5) & (n_blocks - 1)]); - seen += - count_set_bits (window[(i_block + 6) & (n_blocks - 1)]); - seen += - count_set_bits (window[(i_block + 7) & (n_blocks - 1)]); - window[i_block] = 0; - window[(i_block + 1) & (n_blocks - 1)] = 0; - window[(i_block + 2) & (n_blocks - 1)] = 0; - window[(i_block + 3) & (n_blocks - 1)] = 0; - window[(i_block + 4) & (n_blocks - 1)] = 0; - window[(i_block + 5) & (n_blocks - 1)] = 0; - window[(i_block + 6) & (n_blocks - 1)] = 0; - window[(i_block + 7) & (n_blocks - 1)] = 0; - - i_block = (i_block + 8) & (n_blocks - 1); - full_words -= 8; - } - while (full_words > 0) + if (full_words >= 16) { - // last word is treated after the loop - seen += count_set_bits (window[i_block]); - window[i_block] = 0; - i_block = (i_block + 1) & (n_blocks - 1); - full_words--; + /* prefect the next 8 blocks (64 bytes) */ + clib_prefetch_store ( + &window[(i_block + 8) & (n_blocks - 1)]); } - /* the last word */ - mask = pow2_mask (i_word_end); - seen += count_set_bits (window[i_block] & mask); - window[i_block] &= ~mask; + seen += count_set_bits (window[i_block]); + seen += count_set_bits (window[(i_block + 1) & (n_blocks - 1)]); + seen += count_set_bits (window[(i_block + 2) & (n_blocks - 1)]); + seen += count_set_bits (window[(i_block + 3) & (n_blocks - 1)]); + seen += count_set_bits (window[(i_block + 4) & (n_blocks - 1)]); + seen += count_set_bits (window[(i_block + 5) & (n_blocks - 1)]); + seen += count_set_bits (window[(i_block + 6) & (n_blocks - 1)]); + seen += count_set_bits (window[(i_block + 7) & (n_blocks - 1)]); + window[i_block] = 0; + window[(i_block + 1) & (n_blocks - 1)] = 0; + window[(i_block + 2) & (n_blocks - 1)] = 0; + window[(i_block + 3) & (n_blocks - 1)] = 0; + window[(i_block + 4) & (n_blocks - 1)] = 0; + window[(i_block + 5) & (n_blocks - 1)] = 0; + window[(i_block + 6) & (n_blocks - 1)] = 0; + window[(i_block + 7) & (n_blocks - 1)] = 0; + + i_block = (i_block + 8) & (n_blocks - 1); + full_words -= 8; + } + while (full_words > 0) + { + // last word is treated after the loop + seen += count_set_bits (window[i_block]); + window[i_block] = 0; + i_block = (i_block + 1) & (n_blocks - 1); + full_words--; } - clib_bitmap_set_no_check (window, - (sa->seq + inc) & (window_size - 1), 1); - } - else - { - /* - * count how many holes there are in the portion - * of the window that we will right shift of the end - * as a result of this increments - */ - u64 old = sa->replay_window & pow2_mask (inc); - /* the number of packets we saw in this section of the window */ - seen = count_set_bits (old); - sa->replay_window = - ((sa->replay_window) >> inc) | (1ULL << (window_size - 1)); + /* the last word */ + mask = pow2_mask (i_word_end); + seen += count_set_bits (window[i_block] & mask); + window[i_block] &= ~mask; } + uword_bitmap_set_bits_at_index (window, (irt->seq64 + inc) & window_mask, + 1); + /* * the number we missed is the size of the window section * minus the number we saw. @@ -730,24 +661,17 @@ ipsec_sa_anti_replay_window_shift (ipsec_sa_t *sa, u32 inc, bool ar_huge) } else { + u32 n_uwords = window_size / uword_bits; /* holes in the replay window are lost packets */ - n_lost = window_size - - IPSEC_SA_ANTI_REPLAY_WINDOW_N_SEEN_KNOWN_WIN (sa, ar_huge); + n_lost = window_size - uword_bitmap_count_set_bits (window, n_uwords); /* any sequence numbers that now fall outside the window * are forever lost */ n_lost += inc - window_size; - if (PREDICT_FALSE (ar_huge)) - { - clib_bitmap_zero (sa->replay_window_huge); - clib_bitmap_set_no_check (sa->replay_window_huge, - (sa->seq + inc) & (window_size - 1), 1); - } - else - { - sa->replay_window = 1ULL << (window_size - 1); - } + uword_bitmap_clear (window, n_uwords); + uword_bitmap_set_bits_at_index (window, (irt->seq64 + inc) & window_mask, + 1); } return n_lost; @@ -763,66 +687,46 @@ ipsec_sa_anti_replay_window_shift (ipsec_sa_t *sa, u32 inc, bool ar_huge) * the branch cost. */ always_inline u64 -ipsec_sa_anti_replay_advance (ipsec_sa_t *sa, u32 thread_index, u32 seq, - u32 hi_seq, bool ar_huge) +ipsec_sa_anti_replay_advance (ipsec_sa_inb_rt_t *irt, + clib_thread_index_t thread_index, u32 seq, + u32 hi_seq) { u64 n_lost = 0; - u32 window_size = IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE_KNOWN_WIN (sa, ar_huge); + u32 window_size = irt->anti_replay_window_size; + u32 masked_seq = seq & (window_size - 1); + u32 exp_lo = irt->seq64; + u32 exp_hi = irt->seq64 >> 32; u32 pos; - if (ipsec_sa_is_set_USE_ESN (sa)) + if (irt->use_esn) { - int wrap = hi_seq - sa->seq_hi; + int wrap = hi_seq - exp_hi; - if (wrap == 0 && seq > sa->seq) + if (wrap == 0 && seq > exp_lo) { - pos = seq - sa->seq; - n_lost = ipsec_sa_anti_replay_window_shift (sa, pos, ar_huge); - sa->seq = seq; + pos = seq - exp_lo; + n_lost = ipsec_sa_anti_replay_window_shift (irt, window_size, pos); + irt->seq64 = (u64) exp_hi << 32 | seq; } else if (wrap > 0) { - pos = seq + ~sa->seq + 1; - n_lost = ipsec_sa_anti_replay_window_shift (sa, pos, ar_huge); - sa->seq = seq; - sa->seq_hi = hi_seq; - } - else if (wrap < 0) - { - pos = ~seq + sa->seq + 1; - if (ar_huge) - clib_bitmap_set_no_check (sa->replay_window_huge, - seq & (window_size - 1), 1); - else - sa->replay_window |= (1ULL << (window_size - 1 - pos)); + pos = seq + ~exp_lo + 1; + n_lost = ipsec_sa_anti_replay_window_shift (irt, window_size, pos); + irt->seq64 = (u64) hi_seq << 32 | seq; } else - { - pos = sa->seq - seq; - if (ar_huge) - clib_bitmap_set_no_check (sa->replay_window_huge, - seq & (window_size - 1), 1); - else - sa->replay_window |= (1ULL << (window_size - 1 - pos)); - } + uword_bitmap_set_bits_at_index (irt->replay_window, masked_seq, 1); } else { - if (seq > sa->seq) + if (seq > exp_lo) { - pos = seq - sa->seq; - n_lost = ipsec_sa_anti_replay_window_shift (sa, pos, ar_huge); - sa->seq = seq; + pos = seq - exp_lo; + n_lost = ipsec_sa_anti_replay_window_shift (irt, window_size, pos); + irt->seq64 = (u64) exp_hi << 32 | seq; } else - { - pos = sa->seq - seq; - if (ar_huge) - clib_bitmap_set_no_check (sa->replay_window_huge, - seq & (window_size - 1), 1); - else - sa->replay_window |= (1ULL << (window_size - 1 - pos)); - } + uword_bitmap_set_bits_at_index (irt->replay_window, masked_seq, 1); } return n_lost; @@ -840,12 +744,6 @@ ipsec_sa_assign_thread (u16 thread_id) : (unix_time_now_nsec () % vlib_num_workers ()) + 1); } -always_inline ipsec_sa_t * -ipsec_sa_get (u32 sa_index) -{ - return (pool_elt_at_index (ipsec_sa_pool, sa_index)); -} - #endif /* __IPSEC_SPD_SA_H__ */ /* diff --git a/src/vnet/ipsec/ipsec_tun.c b/src/vnet/ipsec/ipsec_tun.c index 5fb07b3ba09..28702bdec47 100644 --- a/src/vnet/ipsec/ipsec_tun.c +++ b/src/vnet/ipsec/ipsec_tun.c @@ -470,6 +470,7 @@ ipsec_tun_protect_set_crypto_addr (ipsec_tun_protect_t * itp) if (!(itp->itp_flags & IPSEC_PROTECT_ITF)) { ipsec_sa_set_IS_PROTECT (sa); + ipsec_sa_update_runtime (sa); itp->itp_flags |= IPSEC_PROTECT_ENCAPED; } } @@ -497,7 +498,11 @@ ipsec_tun_protect_config (ipsec_main_t * im, ipsec_sa_lock (itp->itp_out_sa); if (itp->itp_flags & IPSEC_PROTECT_ITF) - ipsec_sa_set_NO_ALGO_NO_DROP (ipsec_sa_get (itp->itp_out_sa)); + { + ipsec_sa_t *sa = ipsec_sa_get (itp->itp_out_sa); + ipsec_sa_set_NO_ALGO_NO_DROP (sa); + ipsec_sa_update_runtime (sa); + } FOR_EACH_IPSEC_PROTECT_INPUT_SAI(itp, sai, ({ @@ -523,12 +528,16 @@ ipsec_tun_protect_unconfig (ipsec_main_t * im, ipsec_tun_protect_t * itp) FOR_EACH_IPSEC_PROTECT_INPUT_SA(itp, sa, ({ ipsec_sa_unset_IS_PROTECT (sa); + ipsec_sa_update_runtime (sa); })); ipsec_tun_protect_rx_db_remove (im, itp); ipsec_tun_protect_tx_db_remove (itp); - ipsec_sa_unset_NO_ALGO_NO_DROP (ipsec_sa_get (itp->itp_out_sa)); + sa = ipsec_sa_get (itp->itp_out_sa); + ipsec_sa_unset_NO_ALGO_NO_DROP (sa); + ipsec_sa_update_runtime (sa); + ipsec_sa_unlock(itp->itp_out_sa); FOR_EACH_IPSEC_PROTECT_INPUT_SAI(itp, sai, diff --git a/src/vnet/ipsec/ipsec_tun_in.c b/src/vnet/ipsec/ipsec_tun_in.c index 3dde084cb24..38f6baf3d2e 100644 --- a/src/vnet/ipsec/ipsec_tun_in.c +++ b/src/vnet/ipsec/ipsec_tun_in.c @@ -114,7 +114,7 @@ ipsec_tun_protect_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vnet_interface_main_t *vim = &vnm->interface_main; int is_trace = node->flags & VLIB_NODE_FLAG_TRACE; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; u32 n_left_from, *from; u16 nexts[VLIB_FRAME_SIZE], *next; diff --git a/src/vnet/ipsec/main.c b/src/vnet/ipsec/main.c index e17d1dc5cfe..0a01797e066 100644 --- a/src/vnet/ipsec/main.c +++ b/src/vnet/ipsec/main.c @@ -61,6 +61,7 @@ ipsec_main_t ipsec_main = { .alg = VNET_CRYPTO_ALG_AES_128_CTR, .iv_size = 8, .block_align = 1, + .is_ctr = 1, }, [IPSEC_CRYPTO_ALG_AES_CTR_192] = { @@ -69,6 +70,7 @@ ipsec_main_t ipsec_main = { .alg = VNET_CRYPTO_ALG_AES_192_CTR, .iv_size = 8, .block_align = 1, + .is_ctr = 1, }, [IPSEC_CRYPTO_ALG_AES_CTR_256] = { @@ -77,6 +79,7 @@ ipsec_main_t ipsec_main = { .alg = VNET_CRYPTO_ALG_AES_256_CTR, .iv_size = 8, .block_align = 1, + .is_ctr = 1, }, [IPSEC_CRYPTO_ALG_AES_GCM_128] = { @@ -86,6 +89,8 @@ ipsec_main_t ipsec_main = { .iv_size = 8, .block_align = 1, .icv_size = 16, + .is_aead = 1, + .is_ctr = 1, }, [IPSEC_CRYPTO_ALG_AES_GCM_192] = { @@ -95,6 +100,8 @@ ipsec_main_t ipsec_main = { .iv_size = 8, .block_align = 1, .icv_size = 16, + .is_aead = 1, + .is_ctr = 1, }, [IPSEC_CRYPTO_ALG_AES_GCM_256] = { @@ -104,6 +111,8 @@ ipsec_main_t ipsec_main = { .iv_size = 8, .block_align = 1, .icv_size = 16, + .is_aead = 1, + .is_ctr = 1, }, [IPSEC_CRYPTO_ALG_CHACHA20_POLY1305] = { @@ -112,6 +121,8 @@ ipsec_main_t ipsec_main = { .alg = VNET_CRYPTO_ALG_CHACHA20_POLY1305, .iv_size = 8, .icv_size = 16, + .is_ctr = 1, + .is_aead = 1, }, [IPSEC_CRYPTO_ALG_AES_NULL_GMAC_128] = { @@ -121,6 +132,9 @@ ipsec_main_t ipsec_main = { .iv_size = 8, .block_align = 1, .icv_size = 16, + .is_ctr = 1, + .is_aead = 1, + .is_null_gmac = 1, }, [IPSEC_CRYPTO_ALG_AES_NULL_GMAC_192] = { @@ -130,6 +144,9 @@ ipsec_main_t ipsec_main = { .iv_size = 8, .block_align = 1, .icv_size = 16, + .is_ctr = 1, + .is_aead = 1, + .is_null_gmac = 1, }, [IPSEC_CRYPTO_ALG_AES_NULL_GMAC_256] = { @@ -139,6 +156,9 @@ ipsec_main_t ipsec_main = { .iv_size = 8, .block_align = 1, .icv_size = 16, + .is_ctr = 1, + .is_aead = 1, + .is_null_gmac = 1, }, }, .integ_algs = { diff --git a/src/vnet/l2/l2_flood.c b/src/vnet/l2/l2_flood.c index f8cb3cb5687..c06bf5c636b 100644 --- a/src/vnet/l2/l2_flood.c +++ b/src/vnet/l2/l2_flood.c @@ -141,7 +141,7 @@ VLIB_NODE_FN (l2flood_node) (vlib_main_t * vm, u32 n_left_from, *from, *to_next; l2flood_next_t next_index; l2flood_main_t *msm = &l2flood_main; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; diff --git a/src/vnet/l2/l2_input_node.c b/src/vnet/l2/l2_input_node.c index 76b94809eb3..58a541756da 100644 --- a/src/vnet/l2/l2_input_node.c +++ b/src/vnet/l2/l2_input_node.c @@ -215,7 +215,10 @@ classify_and_dispatch (l2input_main_t * msm, vlib_buffer_t * b0, u16 * next0) vnet_buffer (b0)->sw_if_index[VLIB_TX] = config->output_sw_if_index; } else - feat_mask = L2INPUT_FEAT_DROP; + { + *next0 = L2INPUT_NEXT_DROP; + return; + } /* mask out features from bitmap using packet type and bd config */ u32 feature_bitmap = config->feature_bitmap & feat_mask; diff --git a/src/vnet/mpls/mpls_input.c b/src/vnet/mpls/mpls_input.c index 0505d9a1829..79da5e53cba 100644 --- a/src/vnet/mpls/mpls_input.c +++ b/src/vnet/mpls/mpls_input.c @@ -75,7 +75,7 @@ mpls_input_inline (vlib_main_t * vm, { u32 n_left_from, next_index, * from, * to_next; mpls_main_t * mm = &mpls_main; - u32 thread_index = vlib_get_thread_index(); + clib_thread_index_t thread_index = vlib_get_thread_index (); vlib_simple_counter_main_t * cm; vnet_main_t * vnm = vnet_get_main(); diff --git a/src/vnet/mpls/mpls_lookup.c b/src/vnet/mpls/mpls_lookup.c index a5ac56534a5..5b0e9413ea3 100644 --- a/src/vnet/mpls/mpls_lookup.c +++ b/src/vnet/mpls/mpls_lookup.c @@ -61,7 +61,7 @@ VLIB_NODE_FN (mpls_lookup_node) (vlib_main_t * vm, vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters; u32 n_left_from, next_index, * from, * to_next; mpls_main_t * mm = &mpls_main; - u32 thread_index = vlib_get_thread_index(); + clib_thread_index_t thread_index = vlib_get_thread_index (); from = vlib_frame_vector_args (from_frame); n_left_from = from_frame->n_vectors; @@ -493,7 +493,7 @@ VLIB_NODE_FN (mpls_load_balance_node) (vlib_main_t * vm, { vlib_combined_counter_main_t * cm = &load_balance_main.lbm_via_counters; u32 n_left_from, n_left_to_next, * from, * to_next; - u32 thread_index = vlib_get_thread_index(); + clib_thread_index_t thread_index = vlib_get_thread_index (); u32 next; from = vlib_frame_vector_args (frame); diff --git a/src/vnet/policer/police.h b/src/vnet/policer/police.h index 8f126e22175..c1b8096ed41 100644 --- a/src/vnet/policer/police.h +++ b/src/vnet/policer/police.h @@ -91,7 +91,8 @@ typedef struct u32 current_bucket; // MOD u32 extended_limit; u32 extended_bucket; // MOD - u32 thread_index; // Tie policer to a thread, rather than lock + clib_thread_index_t + thread_index; // Tie policer to a thread, rather than lock u64 last_update_time; // MOD u8 *name; } policer_t; diff --git a/src/vnet/policer/police_inlines.h b/src/vnet/policer/police_inlines.h index 08000b9a303..7b7e19171d6 100644 --- a/src/vnet/policer/police_inlines.h +++ b/src/vnet/policer/police_inlines.h @@ -74,7 +74,7 @@ vnet_policer_police (vlib_main_t *vm, vlib_buffer_t *b, u32 policer_index, if (handoff) { - if (PREDICT_FALSE (pol->thread_index == ~0)) + if (PREDICT_FALSE (pol->thread_index == CLIB_INVALID_THREAD_INDEX)) /* * This is the first packet to use this policer. Set the * thread index in the policer to this thread and any diff --git a/src/vnet/qos/qos_store.c b/src/vnet/qos/qos_store.c index 3424a914e35..8875585f199 100644 --- a/src/vnet/qos/qos_store.c +++ b/src/vnet/qos/qos_store.c @@ -181,7 +181,7 @@ qos_store_cli (vlib_main_t * vm, enable = 1; else if (unformat (input, "disable")) enable = 0; - else if (unformat (input, "value &d", &value)) + else if (unformat (input, "value %d", &value)) ; else break; diff --git a/src/vnet/session/application.c b/src/vnet/session/application.c index eacc1adf941..1a2509e6356 100644 --- a/src/vnet/session/application.c +++ b/src/vnet/session/application.c @@ -490,7 +490,7 @@ vlib_node_registration_t appsl_rx_mqs_input_node; VLIB_NODE_FN (appsl_rx_mqs_input_node) (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) { - u32 thread_index = vm->thread_index, n_msgs = 0; + clib_thread_index_t thread_index = vm->thread_index, n_msgs = 0; app_rx_mq_elt_t *elt, *next; app_main_t *am = &app_main; session_worker_t *wrk; @@ -581,7 +581,7 @@ app_rx_mqs_epoll_add (application_t *app, app_rx_mq_elt_t *mqe) { clib_file_t template = { 0 }; app_rx_mq_handle_t handle; - u32 thread_index; + clib_thread_index_t thread_index; int fd; thread_index = mqe - app->rx_mqs; @@ -603,7 +603,7 @@ app_rx_mqs_epoll_add (application_t *app, app_rx_mq_elt_t *mqe) static void app_rx_mqs_epoll_del (application_t *app, app_rx_mq_elt_t *mqe) { - u32 thread_index = mqe - app->rx_mqs; + clib_thread_index_t thread_index = mqe - app->rx_mqs; app_main_t *am = &app_main; appsl_wrk_t *aw; diff --git a/src/vnet/session/application.h b/src/vnet/session/application.h index c68a911230f..d748eae9cd5 100644 --- a/src/vnet/session/application.h +++ b/src/vnet/session/application.h @@ -175,7 +175,7 @@ typedef struct app_rx_mq_handle_ struct { u32 app_index; - u32 thread_index; + clib_thread_index_t thread_index; }; u64 as_u64; }; @@ -368,9 +368,11 @@ int app_worker_session_fifo_tuning (app_worker_t * app_wrk, session_t * s, session_ft_action_t act, u32 len); void app_worker_add_event (app_worker_t *app_wrk, session_t *s, session_evt_type_t evt_type); -void app_worker_add_event_custom (app_worker_t *app_wrk, u32 thread_index, +void app_worker_add_event_custom (app_worker_t *app_wrk, + clib_thread_index_t thread_index, session_event_t *evt); -int app_wrk_flush_wrk_events (app_worker_t *app_wrk, u32 thread_index); +int app_wrk_flush_wrk_events (app_worker_t *app_wrk, + clib_thread_index_t thread_index); void app_worker_del_all_events (app_worker_t *app_wrk); segment_manager_t *app_worker_get_listen_segment_manager (app_worker_t *, session_t *); @@ -386,10 +388,12 @@ void app_wrk_send_ctrl_evt_fd (app_worker_t *app_wrk, u8 evt_type, void *msg, u32 msg_len, int fd); void app_wrk_send_ctrl_evt (app_worker_t *app_wrk, u8 evt_type, void *msg, u32 msg_len); -u8 app_worker_mq_wrk_is_congested (app_worker_t *app_wrk, u32 thread_index); -void app_worker_set_mq_wrk_congested (app_worker_t *app_wrk, u32 thread_index); +u8 app_worker_mq_wrk_is_congested (app_worker_t *app_wrk, + clib_thread_index_t thread_index); +void app_worker_set_mq_wrk_congested (app_worker_t *app_wrk, + clib_thread_index_t thread_index); void app_worker_unset_wrk_mq_congested (app_worker_t *app_wrk, - u32 thread_index); + clib_thread_index_t thread_index); session_t *app_worker_proxy_listener (app_worker_t * app, u8 fib_proto, u8 transport_proto); void app_worker_del_detached_sm (app_worker_t * app_wrk, u32 sm_index); diff --git a/src/vnet/session/application_interface.c b/src/vnet/session/application_interface.c index a62f914d43a..c9aaceb330d 100644 --- a/src/vnet/session/application_interface.c +++ b/src/vnet/session/application_interface.c @@ -23,9 +23,11 @@ /** * unformat a vnet URI * - * transport-proto://[hostname]ip46-addr:port - * eg. tcp://ip46-addr:port - * tls://[testtsl.fd.io]ip46-addr:port + * transport-proto://[hostname]ip4-addr:port + * eg. tcp://ip4-addr:port + * https://[ip6]:port + * http://ip4:port + * tls://[testtsl.fd.io]ip4-addr:port * * u8 ip46_address[16]; * u16 port_in_host_byte_order; @@ -38,35 +40,75 @@ * */ uword -unformat_vnet_uri (unformat_input_t * input, va_list * args) +unformat_vnet_uri (unformat_input_t *input, va_list *args) { session_endpoint_cfg_t *sep = va_arg (*args, session_endpoint_cfg_t *); u32 transport_proto = 0, port; - if (unformat (input, "%U://%U/%d", unformat_transport_proto, - &transport_proto, unformat_ip4_address, &sep->ip.ip4, &port)) + if (unformat (input, "%U:", unformat_transport_proto, &transport_proto)) { sep->transport_proto = transport_proto; - sep->port = clib_host_to_net_u16 (port); + } + else if (unformat (input, "%Us:", unformat_transport_proto, + &transport_proto)) + { + sep->flags |= SESSION_ENDPT_CFG_F_SECURE; + sep->transport_proto = transport_proto; + } + + if (unformat (input, "//%U:", unformat_ip4_address, &sep->ip.ip4)) + { + sep->is_ip4 = 1; + } + /* deprecated */ + else if (unformat (input, "//%U/", unformat_ip4_address, &sep->ip.ip4)) + { + sep->is_ip4 = 1; + } + else if (unformat (input, "//%U", unformat_ip4_address, &sep->ip.ip4)) + { sep->is_ip4 = 1; + } + /* deprecated */ + else if (unformat (input, "//%U/", unformat_ip6_address, &sep->ip.ip6)) + { + sep->is_ip4 = 0; + } + else if (unformat (input, "//[%U]:", unformat_ip6_address, &sep->ip.ip6)) + { + sep->is_ip4 = 0; + } + /* deprecated */ + else if (unformat (input, "//[%U]/", unformat_ip6_address, &sep->ip.ip6)) + { + sep->is_ip4 = 0; + } + else if (unformat (input, "//[%U]", unformat_ip6_address, &sep->ip.ip6)) + { + sep->is_ip4 = 0; + } + else if (unformat (input, "//session/%lu", &sep->parent_handle)) + { + sep->ip.ip4.as_u32 = 1; /* ip need to be non zero in vnet */ return 1; } - else if (unformat (input, "%U://%U/%d", unformat_transport_proto, - &transport_proto, unformat_ip6_address, &sep->ip.ip6, - &port)) + + if (unformat (input, "%d", &port)) { - sep->transport_proto = transport_proto; sep->port = clib_host_to_net_u16 (port); - sep->is_ip4 = 0; return 1; } - else if (unformat (input, "%U://session/%lu", unformat_transport_proto, - &transport_proto, &sep->parent_handle)) + else if (sep->transport_proto == TRANSPORT_PROTO_HTTP) { - sep->transport_proto = transport_proto; - sep->ip.ip4.as_u32 = 1; /* ip need to be non zero in vnet */ + sep->port = clib_host_to_net_u16 (80); return 1; } + else if (sep->transport_proto == TRANSPORT_PROTO_TLS) + { + sep->port = clib_host_to_net_u16 (443); + return 1; + } + return 0; } @@ -106,6 +148,45 @@ parse_uri (char *uri, session_endpoint_cfg_t *sep) return 0; } +/* Use before 'parse_uri()'. Removes target from URI and copies it to 'char + * **target'. char **target is resized automatically. + */ +session_error_t +parse_target (char **uri, char **target) +{ + u8 counter = 0; + + for (u32 i = 0; i < (u32) strlen (*uri); i++) + { + if ((*uri)[i] == '/') + counter++; + + if (counter == 3) + { + /* resize and make space for NULL terminator */ + if (vec_len (*target) < strlen (*uri) - i + 2) + vec_resize (*target, strlen (*uri) - i + 2); + + strncpy (*target, *uri + i, strlen (*uri) - i); + (*uri)[i + 1] = '\0'; + break; + } + } + + if (!*target) + { + vec_resize (*target, 2); + **target = '/'; + } + + vec_terminate_c_string (*target); + + if (!*target) + return SESSION_E_INVALID; + + return 0; +} + session_error_t vnet_bind_uri (vnet_listen_args_t *a) { diff --git a/src/vnet/session/application_interface.h b/src/vnet/session/application_interface.h index 21ed97998f2..33b61187fe3 100644 --- a/src/vnet/session/application_interface.h +++ b/src/vnet/session/application_interface.h @@ -281,6 +281,7 @@ typedef enum session_fd_flag_ } session_fd_flag_t; session_error_t parse_uri (char *uri, session_endpoint_cfg_t *sep); +session_error_t parse_target (char **uri, char **target); session_error_t vnet_bind_uri (vnet_listen_args_t *); session_error_t vnet_unbind_uri (vnet_unlisten_args_t *a); session_error_t vnet_connect_uri (vnet_connect_args_t *a); diff --git a/src/vnet/session/application_local.c b/src/vnet/session/application_local.c index 18ea77dc8a8..f22e3647b7f 100644 --- a/src/vnet/session/application_local.c +++ b/src/vnet/session/application_local.c @@ -76,13 +76,13 @@ typedef struct ct_main_ static ct_main_t ct_main; static inline ct_worker_t * -ct_worker_get (u32 thread_index) +ct_worker_get (clib_thread_index_t thread_index) { return &ct_main.wrk[thread_index]; } static ct_connection_t * -ct_connection_alloc (u32 thread_index) +ct_connection_alloc (clib_thread_index_t thread_index) { ct_worker_t *wrk = ct_worker_get (thread_index); ct_connection_t *ct; @@ -99,7 +99,7 @@ ct_connection_alloc (u32 thread_index) } static ct_connection_t * -ct_connection_get (u32 ct_index, u32 thread_index) +ct_connection_get (u32 ct_index, clib_thread_index_t thread_index) { ct_worker_t *wrk = ct_worker_get (thread_index); @@ -659,7 +659,7 @@ ct_init_accepted_session (app_worker_t *server_wrk, ct_connection_t *ct, } static void -ct_accept_one (u32 thread_index, u32 ho_index) +ct_accept_one (clib_thread_index_t thread_index, u32 ho_index) { ct_connection_t *sct, *cct, *ho; transport_connection_t *ll_ct; @@ -768,7 +768,7 @@ ct_accept_one (u32 thread_index, u32 ho_index) static void ct_accept_rpc_wrk_handler (void *rpc_args) { - u32 thread_index, n_connects, i, n_pending; + clib_thread_index_t thread_index, n_connects, i, n_pending; const u32 max_connects = 32; ct_worker_t *wrk; u8 need_rpc = 0; @@ -805,7 +805,7 @@ ct_accept_rpc_wrk_handler (void *rpc_args) static void ct_fwrk_flush_connects (void *rpc_args) { - u32 thread_index, fwrk_index, n_workers; + clib_thread_index_t thread_index, fwrk_index, n_workers; ct_main_t *cm = &ct_main; ct_worker_t *wrk; u8 need_rpc; @@ -851,7 +851,7 @@ static void ct_program_connect_to_wrk (u32 ho_index) { ct_main_t *cm = &ct_main; - u32 thread_index; + clib_thread_index_t thread_index; /* Simple round-robin policy for spreading sessions over workers. We skip * thread index 0, i.e., offset the index by 1, when we have workers as it @@ -946,7 +946,7 @@ ct_session_half_open_get (u32 ct_index) } static void -ct_session_cleanup (u32 conn_index, u32 thread_index) +ct_session_cleanup (u32 conn_index, clib_thread_index_t thread_index) { ct_connection_t *ct, *peer_ct; @@ -1173,7 +1173,7 @@ ct_program_cleanup (ct_connection_t *ct) } static void -ct_session_close (u32 ct_index, u32 thread_index) +ct_session_close (u32 ct_index, clib_thread_index_t thread_index) { ct_connection_t *ct, *peer_ct; session_t *s; @@ -1204,7 +1204,7 @@ ct_session_close (u32 ct_index, u32 thread_index) } static void -ct_session_reset (u32 ct_index, u32 thread_index) +ct_session_reset (u32 ct_index, clib_thread_index_t thread_index) { ct_connection_t *ct; ct = ct_connection_get (ct_index, thread_index); @@ -1213,7 +1213,7 @@ ct_session_reset (u32 ct_index, u32 thread_index) } static transport_connection_t * -ct_session_get (u32 ct_index, u32 thread_index) +ct_session_get (u32 ct_index, clib_thread_index_t thread_index) { return (transport_connection_t *) ct_connection_get (ct_index, thread_index); @@ -1331,7 +1331,7 @@ static u8 * format_ct_session (u8 * s, va_list * args) { u32 ct_index = va_arg (*args, u32); - u32 thread_index = va_arg (*args, u32); + clib_thread_index_t thread_index = va_arg (*args, u32); u32 verbose = va_arg (*args, u32); ct_connection_t *ct; diff --git a/src/vnet/session/application_worker.c b/src/vnet/session/application_worker.c index ad0b18e8d75..a5b1e1f4ea4 100644 --- a/src/vnet/session/application_worker.c +++ b/src/vnet/session/application_worker.c @@ -471,7 +471,8 @@ app_worker_connect_notify (app_worker_t * app_wrk, session_t * s, session_event_t evt = { .event_type = SESSION_CTRL_EVT_CONNECTED, .as_u64[0] = s ? s->session_index : ~0, .as_u64[1] = (u64) opaque << 32 | (u32) err }; - u32 thread_index = s ? s->thread_index : vlib_get_thread_index (); + clib_thread_index_t thread_index = + s ? s->thread_index : vlib_get_thread_index (); app_worker_add_event_custom (app_wrk, thread_index, &evt); return 0; @@ -782,7 +783,8 @@ app_worker_add_event (app_worker_t *app_wrk, session_t *s, } void -app_worker_add_event_custom (app_worker_t *app_wrk, u32 thread_index, +app_worker_add_event_custom (app_worker_t *app_wrk, + clib_thread_index_t thread_index, session_event_t *evt) { clib_fifo_add1 (app_wrk->wrk_evts[thread_index], *evt); @@ -832,13 +834,15 @@ app_wrk_send_ctrl_evt (app_worker_t *app_wrk, u8 evt_type, void *msg, } u8 -app_worker_mq_wrk_is_congested (app_worker_t *app_wrk, u32 thread_index) +app_worker_mq_wrk_is_congested (app_worker_t *app_wrk, + clib_thread_index_t thread_index) { return app_wrk->wrk_mq_congested[thread_index] > 0; } void -app_worker_set_mq_wrk_congested (app_worker_t *app_wrk, u32 thread_index) +app_worker_set_mq_wrk_congested (app_worker_t *app_wrk, + clib_thread_index_t thread_index) { ASSERT (thread_index == vlib_get_thread_index ()); if (!app_wrk->wrk_mq_congested[thread_index]) @@ -849,7 +853,8 @@ app_worker_set_mq_wrk_congested (app_worker_t *app_wrk, u32 thread_index) } void -app_worker_unset_wrk_mq_congested (app_worker_t *app_wrk, u32 thread_index) +app_worker_unset_wrk_mq_congested (app_worker_t *app_wrk, + clib_thread_index_t thread_index) { clib_atomic_fetch_sub_relax (&app_wrk->mq_congested, 1); ASSERT (thread_index == vlib_get_thread_index ()); diff --git a/src/vnet/session/segment_manager.c b/src/vnet/session/segment_manager.c index 8c8b904c33d..341b70086d1 100644 --- a/src/vnet/session/segment_manager.c +++ b/src/vnet/session/segment_manager.c @@ -702,7 +702,8 @@ segment_manager_del_sessions_filter (segment_manager_t *sm, } int -segment_manager_try_alloc_fifos (fifo_segment_t *fs, u32 thread_index, +segment_manager_try_alloc_fifos (fifo_segment_t *fs, + clib_thread_index_t thread_index, u32 rx_fifo_size, u32 tx_fifo_size, svm_fifo_t **rx_fifo, svm_fifo_t **tx_fifo) { @@ -740,8 +741,8 @@ segment_manager_try_alloc_fifos (fifo_segment_t *fs, u32 thread_index, static inline int sm_lookup_segment_and_alloc_fifos (segment_manager_t *sm, segment_manager_props_t *props, - u32 thread_index, svm_fifo_t **rx_fifo, - svm_fifo_t **tx_fifo) + clib_thread_index_t thread_index, + svm_fifo_t **rx_fifo, svm_fifo_t **tx_fifo) { uword free_bytes, max_free_bytes; fifo_segment_t *cur, *fs = 0; @@ -771,7 +772,8 @@ sm_lookup_segment_and_alloc_fifos (segment_manager_t *sm, static int sm_lock_and_alloc_segment_and_fifos (segment_manager_t *sm, segment_manager_props_t *props, - u32 thread_index, svm_fifo_t **rx_fifo, + clib_thread_index_t thread_index, + svm_fifo_t **rx_fifo, svm_fifo_t **tx_fifo) { int new_fs_index, rv; @@ -814,10 +816,10 @@ done: } int -segment_manager_alloc_session_fifos (segment_manager_t * sm, - u32 thread_index, - svm_fifo_t ** rx_fifo, - svm_fifo_t ** tx_fifo) +segment_manager_alloc_session_fifos (segment_manager_t *sm, + clib_thread_index_t thread_index, + svm_fifo_t **rx_fifo, + svm_fifo_t **tx_fifo) { segment_manager_props_t *props; int rv; diff --git a/src/vnet/session/segment_manager.h b/src/vnet/session/segment_manager.h index 86ca23bc9c7..0fb957a0912 100644 --- a/src/vnet/session/segment_manager.h +++ b/src/vnet/session/segment_manager.h @@ -146,15 +146,15 @@ u64 segment_manager_segment_handle (segment_manager_t * sm, fifo_segment_t * segment); void segment_manager_segment_reader_unlock (segment_manager_t * sm); -int segment_manager_alloc_session_fifos (segment_manager_t * sm, - u32 thread_index, - svm_fifo_t ** rx_fifo, - svm_fifo_t ** tx_fifo); -int segment_manager_try_alloc_fifos (fifo_segment_t * fs, - u32 thread_index, +int segment_manager_alloc_session_fifos (segment_manager_t *sm, + clib_thread_index_t thread_index, + svm_fifo_t **rx_fifo, + svm_fifo_t **tx_fifo); +int segment_manager_try_alloc_fifos (fifo_segment_t *fs, + clib_thread_index_t thread_index, u32 rx_fifo_size, u32 tx_fifo_size, - svm_fifo_t ** rx_fifo, - svm_fifo_t ** tx_fifo); + svm_fifo_t **rx_fifo, + svm_fifo_t **tx_fifo); void segment_manager_dealloc_fifos (svm_fifo_t * rx_fifo, svm_fifo_t * tx_fifo); void segment_manager_detach_fifo (segment_manager_t *sm, svm_fifo_t **f); diff --git a/src/vnet/session/session.c b/src/vnet/session/session.c index 2a6ac283fb9..7eb6181adb9 100644 --- a/src/vnet/session/session.c +++ b/src/vnet/session/session.c @@ -28,9 +28,18 @@ session_main_t session_main; +typedef enum +{ + SESSION_EVT_RPC, + SESSION_EVT_IO, + SESSION_EVT_SESSION, +} session_evt_family_t; + static inline int -session_send_evt_to_thread (void *data, void *args, u32 thread_index, - session_evt_type_t evt_type) +session_send_evt_to_thread (void *data, void *args, + clib_thread_index_t thread_index, + session_evt_type_t evt_type, + session_evt_family_t family) { session_worker_t *wrk = session_main_get_worker (thread_index); session_event_t *evt; @@ -45,30 +54,33 @@ session_send_evt_to_thread (void *data, void *args, u32 thread_index, svm_msg_q_unlock (mq); return -2; } - switch (evt_type) + switch (family) { - case SESSION_CTRL_EVT_RPC: + case SESSION_EVT_RPC: + ASSERT (evt_type == SESSION_CTRL_EVT_RPC); msg = svm_msg_q_alloc_msg_w_ring (mq, SESSION_MQ_IO_EVT_RING); evt = (session_event_t *) svm_msg_q_msg_data (mq, &msg); evt->rpc_args.fp = data; evt->rpc_args.arg = args; break; - case SESSION_IO_EVT_RX: - case SESSION_IO_EVT_TX: - case SESSION_IO_EVT_TX_FLUSH: - case SESSION_IO_EVT_BUILTIN_RX: + case SESSION_EVT_IO: + ASSERT (evt_type == SESSION_IO_EVT_RX || evt_type == SESSION_IO_EVT_TX || + evt_type == SESSION_IO_EVT_TX_FLUSH || + evt_type == SESSION_IO_EVT_BUILTIN_RX); msg = svm_msg_q_alloc_msg_w_ring (mq, SESSION_MQ_IO_EVT_RING); evt = (session_event_t *) svm_msg_q_msg_data (mq, &msg); evt->session_index = *(u32 *) data; break; - case SESSION_IO_EVT_TX_MAIN: - case SESSION_CTRL_EVT_CLOSE: - case SESSION_CTRL_EVT_RESET: + case SESSION_EVT_SESSION: + ASSERT (evt_type == SESSION_CTRL_EVT_CLOSE || + evt_type == SESSION_CTRL_EVT_HALF_CLOSE || + evt_type == SESSION_CTRL_EVT_RESET); msg = svm_msg_q_alloc_msg_w_ring (mq, SESSION_MQ_IO_EVT_RING); evt = (session_event_t *) svm_msg_q_msg_data (mq, &msg); evt->session_handle = session_handle ((session_t *) data); break; default: + ASSERT (0); clib_warning ("evt unhandled!"); svm_msg_q_unlock (mq); return -1; @@ -88,22 +100,26 @@ int session_send_io_evt_to_thread (svm_fifo_t * f, session_evt_type_t evt_type) { return session_send_evt_to_thread (&f->vpp_session_index, 0, - f->master_thread_index, evt_type); + f->master_thread_index, evt_type, + SESSION_EVT_IO); } /* Deprecated, use session_program_* functions */ int -session_send_io_evt_to_thread_custom (void *data, u32 thread_index, +session_send_io_evt_to_thread_custom (void *data, + clib_thread_index_t thread_index, session_evt_type_t evt_type) { - return session_send_evt_to_thread (data, 0, thread_index, evt_type); + return session_send_evt_to_thread (data, 0, thread_index, evt_type, + SESSION_EVT_IO); } int session_program_tx_io_evt (session_handle_tu_t sh, session_evt_type_t evt_type) { return session_send_evt_to_thread ((void *) &sh.session_index, 0, - (u32) sh.thread_index, evt_type); + (u32) sh.thread_index, evt_type, + SESSION_EVT_IO); } int @@ -116,9 +132,9 @@ session_program_rx_io_evt (session_handle_tu_t sh) } else { - return session_send_evt_to_thread ((void *) &sh.session_index, 0, - (u32) sh.thread_index, - SESSION_IO_EVT_BUILTIN_RX); + return session_send_evt_to_thread ( + (void *) &sh.session_index, 0, (u32) sh.thread_index, + SESSION_IO_EVT_BUILTIN_RX, SESSION_EVT_IO); } } @@ -127,29 +143,29 @@ session_program_transport_io_evt (session_handle_tu_t sh, session_evt_type_t evt_type) { return session_send_evt_to_thread ((void *) &sh.session_index, 0, - (u32) sh.thread_index, evt_type); + (u32) sh.thread_index, evt_type, + SESSION_EVT_IO); } int session_send_ctrl_evt_to_thread (session_t * s, session_evt_type_t evt_type) { /* only events supported are disconnect, shutdown and reset */ - ASSERT (evt_type == SESSION_CTRL_EVT_CLOSE || - evt_type == SESSION_CTRL_EVT_HALF_CLOSE || - evt_type == SESSION_CTRL_EVT_RESET); - return session_send_evt_to_thread (s, 0, s->thread_index, evt_type); + return session_send_evt_to_thread (s, 0, s->thread_index, evt_type, + SESSION_EVT_SESSION); } void -session_send_rpc_evt_to_thread_force (u32 thread_index, void *fp, - void *rpc_args) +session_send_rpc_evt_to_thread_force (clib_thread_index_t thread_index, + void *fp, void *rpc_args) { - session_send_evt_to_thread (fp, rpc_args, thread_index, - SESSION_CTRL_EVT_RPC); + session_send_evt_to_thread (fp, rpc_args, thread_index, SESSION_CTRL_EVT_RPC, + SESSION_EVT_RPC); } void -session_send_rpc_evt_to_thread (u32 thread_index, void *fp, void *rpc_args) +session_send_rpc_evt_to_thread (clib_thread_index_t thread_index, void *fp, + void *rpc_args) { if (thread_index != vlib_get_thread_index ()) session_send_rpc_evt_to_thread_force (thread_index, fp, rpc_args); @@ -212,7 +228,7 @@ sesssion_reschedule_tx (transport_connection_t * tc) static void session_program_transport_ctrl_evt (session_t * s, session_evt_type_t evt) { - u32 thread_index = vlib_get_thread_index (); + clib_thread_index_t thread_index = vlib_get_thread_index (); session_evt_elt_t *elt; session_worker_t *wrk; @@ -234,7 +250,7 @@ session_program_transport_ctrl_evt (session_t * s, session_evt_type_t evt) } session_t * -session_alloc (u32 thread_index) +session_alloc (clib_thread_index_t thread_index) { session_worker_t *wrk = &session_main.wrk[thread_index]; session_t *s; @@ -453,7 +469,7 @@ session_t * session_alloc_for_connection (transport_connection_t * tc) { session_t *s; - u32 thread_index = tc->thread_index; + clib_thread_index_t thread_index = tc->thread_index; ASSERT (thread_index == vlib_get_thread_index () || transport_protocol_is_cl (tc->proto)); @@ -480,115 +496,6 @@ session_alloc_for_half_open (transport_connection_t *tc) return s; } -/** - * Discards bytes from buffer chain - * - * It discards n_bytes_to_drop starting at first buffer after chain_b - */ -always_inline void -session_enqueue_discard_chain_bytes (vlib_main_t * vm, vlib_buffer_t * b, - vlib_buffer_t ** chain_b, - u32 n_bytes_to_drop) -{ - vlib_buffer_t *next = *chain_b; - u32 to_drop = n_bytes_to_drop; - ASSERT (b->flags & VLIB_BUFFER_NEXT_PRESENT); - while (to_drop && (next->flags & VLIB_BUFFER_NEXT_PRESENT)) - { - next = vlib_get_buffer (vm, next->next_buffer); - if (next->current_length > to_drop) - { - vlib_buffer_advance (next, to_drop); - to_drop = 0; - } - else - { - to_drop -= next->current_length; - next->current_length = 0; - } - } - *chain_b = next; - - if (to_drop == 0) - b->total_length_not_including_first_buffer -= n_bytes_to_drop; -} - -/** - * Enqueue buffer chain tail - */ -always_inline int -session_enqueue_chain_tail (session_t * s, vlib_buffer_t * b, - u32 offset, u8 is_in_order) -{ - vlib_buffer_t *chain_b; - u32 chain_bi, len, diff; - vlib_main_t *vm = vlib_get_main (); - u8 *data; - u32 written = 0; - int rv = 0; - - if (is_in_order && offset) - { - diff = offset - b->current_length; - if (diff > b->total_length_not_including_first_buffer) - return 0; - chain_b = b; - session_enqueue_discard_chain_bytes (vm, b, &chain_b, diff); - chain_bi = vlib_get_buffer_index (vm, chain_b); - } - else - chain_bi = b->next_buffer; - - do - { - chain_b = vlib_get_buffer (vm, chain_bi); - data = vlib_buffer_get_current (chain_b); - len = chain_b->current_length; - if (!len) - continue; - if (is_in_order) - { - rv = svm_fifo_enqueue (s->rx_fifo, len, data); - if (rv == len) - { - written += rv; - } - else if (rv < len) - { - return (rv > 0) ? (written + rv) : written; - } - else if (rv > len) - { - written += rv; - - /* written more than what was left in chain */ - if (written > b->total_length_not_including_first_buffer) - return written; - - /* drop the bytes that have already been delivered */ - session_enqueue_discard_chain_bytes (vm, b, &chain_b, rv - len); - } - } - else - { - rv = svm_fifo_enqueue_with_offset (s->rx_fifo, offset, len, data); - if (rv) - { - clib_warning ("failed to enqueue multi-buffer seg"); - return -1; - } - offset += len; - } - } - while ((chain_bi = (chain_b->flags & VLIB_BUFFER_NEXT_PRESENT) - ? chain_b->next_buffer : 0)); - - if (is_in_order) - return written; - - return 0; -} - void session_fifo_tuning (session_t * s, svm_fifo_t * f, session_ft_action_t act, u32 len) @@ -734,7 +641,7 @@ session_dequeue_notify (session_t *s) */ void session_main_flush_enqueue_events (transport_proto_t transport_proto, - u32 thread_index) + clib_thread_index_t thread_index) { session_worker_t *wrk = session_main_get_worker (thread_index); session_handle_t *handles; @@ -760,154 +667,6 @@ session_main_flush_enqueue_events (transport_proto_t transport_proto, wrk->session_to_enqueue[transport_proto] = handles; } -/* - * Enqueue data for delivery to app. If requested, it queues app notification - * event for later delivery. - * - * @param tc Transport connection which is to be enqueued data - * @param b Buffer to be enqueued - * @param offset Offset at which to start enqueueing if out-of-order - * @param queue_event Flag to indicate if peer is to be notified or if event - * is to be queued. The former is useful when more data is - * enqueued and only one event is to be generated. - * @param is_in_order Flag to indicate if data is in order - * @return Number of bytes enqueued or a negative value if enqueueing failed. - */ -int -session_enqueue_stream_connection (transport_connection_t * tc, - vlib_buffer_t * b, u32 offset, - u8 queue_event, u8 is_in_order) -{ - session_t *s; - int enqueued = 0, rv, in_order_off; - - s = session_get (tc->s_index, tc->thread_index); - - if (is_in_order) - { - enqueued = svm_fifo_enqueue (s->rx_fifo, - b->current_length, - vlib_buffer_get_current (b)); - if (PREDICT_FALSE ((b->flags & VLIB_BUFFER_NEXT_PRESENT) - && enqueued >= 0)) - { - in_order_off = enqueued > b->current_length ? enqueued : 0; - rv = session_enqueue_chain_tail (s, b, in_order_off, 1); - if (rv > 0) - enqueued += rv; - } - } - else - { - rv = svm_fifo_enqueue_with_offset (s->rx_fifo, offset, - b->current_length, - vlib_buffer_get_current (b)); - if (PREDICT_FALSE ((b->flags & VLIB_BUFFER_NEXT_PRESENT) && !rv)) - session_enqueue_chain_tail (s, b, offset + b->current_length, 0); - /* if something was enqueued, report even this as success for ooo - * segment handling */ - return rv; - } - - if (queue_event) - { - /* Queue RX event on this fifo. Eventually these will need to be - * flushed by calling @ref session_main_flush_enqueue_events () */ - if (!(s->flags & SESSION_F_RX_EVT)) - { - session_worker_t *wrk = session_main_get_worker (s->thread_index); - ASSERT (s->thread_index == vlib_get_thread_index ()); - s->flags |= SESSION_F_RX_EVT; - vec_add1 (wrk->session_to_enqueue[tc->proto], session_handle (s)); - } - - session_fifo_tuning (s, s->rx_fifo, SESSION_FT_ACTION_ENQUEUED, 0); - } - - return enqueued; -} - -always_inline int -session_enqueue_dgram_connection_inline (session_t *s, - session_dgram_hdr_t *hdr, - vlib_buffer_t *b, u8 proto, - u8 queue_event, u32 is_cl) -{ - int rv; - - ASSERT (svm_fifo_max_enqueue_prod (s->rx_fifo) - >= b->current_length + sizeof (*hdr)); - - if (PREDICT_TRUE (!(b->flags & VLIB_BUFFER_NEXT_PRESENT))) - { - svm_fifo_seg_t segs[2] = { - { (u8 *) hdr, sizeof (*hdr) }, - { vlib_buffer_get_current (b), b->current_length } - }; - - rv = svm_fifo_enqueue_segments (s->rx_fifo, segs, 2, - 0 /* allow_partial */ ); - } - else - { - vlib_main_t *vm = vlib_get_main (); - svm_fifo_seg_t *segs = 0, *seg; - vlib_buffer_t *it = b; - u32 n_segs = 1; - - vec_add2 (segs, seg, 1); - seg->data = (u8 *) hdr; - seg->len = sizeof (*hdr); - while (it) - { - vec_add2 (segs, seg, 1); - seg->data = vlib_buffer_get_current (it); - seg->len = it->current_length; - n_segs++; - if (!(it->flags & VLIB_BUFFER_NEXT_PRESENT)) - break; - it = vlib_get_buffer (vm, it->next_buffer); - } - rv = svm_fifo_enqueue_segments (s->rx_fifo, segs, n_segs, - 0 /* allow partial */ ); - vec_free (segs); - } - - if (queue_event && rv > 0) - { - /* Queue RX event on this fifo. Eventually these will need to be - * flushed by calling @ref session_main_flush_enqueue_events () */ - if (!(s->flags & SESSION_F_RX_EVT)) - { - u32 thread_index = - is_cl ? vlib_get_thread_index () : s->thread_index; - session_worker_t *wrk = session_main_get_worker (thread_index); - ASSERT (s->thread_index == vlib_get_thread_index () || is_cl); - s->flags |= SESSION_F_RX_EVT; - vec_add1 (wrk->session_to_enqueue[proto], session_handle (s)); - } - - session_fifo_tuning (s, s->rx_fifo, SESSION_FT_ACTION_ENQUEUED, 0); - } - return rv > 0 ? rv : 0; -} - -int -session_enqueue_dgram_connection (session_t *s, session_dgram_hdr_t *hdr, - vlib_buffer_t *b, u8 proto, u8 queue_event) -{ - return session_enqueue_dgram_connection_inline (s, hdr, b, proto, - queue_event, 0 /* is_cl */); -} - -int -session_enqueue_dgram_connection2 (session_t *s, session_dgram_hdr_t *hdr, - vlib_buffer_t *b, u8 proto, u8 queue_event) -{ - return session_enqueue_dgram_connection_inline (s, hdr, b, proto, - queue_event, 1 /* is_cl */); -} - int session_enqueue_dgram_connection_cl (session_t *s, session_dgram_hdr_t *hdr, vlib_buffer_t *b, u8 proto, @@ -1016,7 +775,7 @@ session_switch_pool_closed_rpc (void *arg) typedef struct _session_switch_pool_args { u32 session_index; - u32 thread_index; + clib_thread_index_t thread_index; u32 new_thread_index; u32 new_session_index; } session_switch_pool_args_t; @@ -1308,8 +1067,8 @@ session_stream_accept_notify (transport_connection_t * tc) * Accept a stream session. Optionally ping the server by callback. */ int -session_stream_accept (transport_connection_t * tc, u32 listener_index, - u32 thread_index, u8 notify) +session_stream_accept (transport_connection_t *tc, u32 listener_index, + clib_thread_index_t thread_index, u8 notify) { session_t *s; int rv; @@ -1343,8 +1102,8 @@ session_stream_accept (transport_connection_t * tc, u32 listener_index, } int -session_dgram_accept (transport_connection_t * tc, u32 listener_index, - u32 thread_index) +session_dgram_accept (transport_connection_t *tc, u32 listener_index, + clib_thread_index_t thread_index) { app_worker_t *app_wrk; session_t *s; diff --git a/src/vnet/session/session.h b/src/vnet/session/session.h index daa3bf97f56..d5402b3571e 100644 --- a/src/vnet/session/session.h +++ b/src/vnet/session/session.h @@ -151,6 +151,8 @@ typedef struct session_worker_ /** Per-app-worker bitmap of pending notifications */ uword *app_wrks_pending_ntf; + svm_fifo_seg_t *rx_segs; + int config_index; u8 dma_enabled; session_dma_transfer *dma_trans; @@ -323,6 +325,67 @@ typedef struct _session_enable_disable_args_t #define TRANSPORT_PROTO_INVALID (session_main.last_transport_proto_type + 1) #define TRANSPORT_N_PROTOS (session_main.last_transport_proto_type + 1) +/* + * Session layer functions + */ + +always_inline session_main_t * +vnet_get_session_main () +{ + return &session_main; +} + +always_inline session_worker_t * +session_main_get_worker (clib_thread_index_t thread_index) +{ + return vec_elt_at_index (session_main.wrk, thread_index); +} + +static inline session_worker_t * +session_main_get_worker_if_valid (clib_thread_index_t thread_index) +{ + if (thread_index > vec_len (session_main.wrk)) + return 0; + return session_main_get_worker (thread_index); +} + +always_inline svm_msg_q_t * +session_main_get_vpp_event_queue (clib_thread_index_t thread_index) +{ + return session_main_get_worker (thread_index)->vpp_event_queue; +} + +always_inline u8 +session_main_is_enabled () +{ + return session_main.is_enabled == 1; +} + +always_inline void +session_worker_stat_error_inc (session_worker_t *wrk, int error, int value) +{ + if ((-(error) >= 0 && -(error) < SESSION_N_ERRORS)) + wrk->stats.errors[-error] += value; + else + SESSION_DBG ("unknown session counter"); +} + +always_inline void +session_stat_error_inc (int error, int value) +{ + session_worker_t *wrk; + wrk = session_main_get_worker (vlib_get_thread_index ()); + session_worker_stat_error_inc (wrk, error, value); +} + +#define session_cli_return_if_not_enabled() \ + do \ + { \ + if (!session_main.is_enabled) \ + return clib_error_return (0, "session layer is not enabled"); \ + } \ + while (0) + static inline void session_evt_add_old (session_worker_t * wrk, session_evt_elt_t * elt) { @@ -392,7 +455,7 @@ session_evt_alloc_old (session_worker_t * wrk) int session_wrk_handle_mq (session_worker_t *wrk, svm_msg_q_t *mq); -session_t *session_alloc (u32 thread_index); +session_t *session_alloc (clib_thread_index_t thread_index); void session_free (session_t * s); void session_cleanup (session_t *s); void session_program_cleanup (session_t *s); @@ -400,14 +463,14 @@ void session_cleanup_half_open (session_handle_t ho_handle); u8 session_is_valid (u32 si, u8 thread_index); always_inline session_t * -session_get (u32 si, u32 thread_index) +session_get (u32 si, clib_thread_index_t thread_index) { ASSERT (session_is_valid (si, thread_index)); return pool_elt_at_index (session_main.wrk[thread_index].sessions, si); } always_inline session_t * -session_get_if_valid (u64 si, u32 thread_index) +session_get_if_valid (u64 si, clib_thread_index_t thread_index) { if (thread_index >= vec_len (session_main.wrk)) return 0; @@ -455,7 +518,7 @@ session_get_from_handle_safe (session_handle_tu_t handle) } always_inline session_t * -session_clone_safe (u32 session_index, u32 thread_index) +session_clone_safe (u32 session_index, clib_thread_index_t thread_index) { u32 current_thread_index = vlib_get_thread_index (), new_index; session_t *old_s, *new_s; @@ -487,17 +550,18 @@ int session_enqueue_notify_cl (session_t *s); /* Deprecated, use session_program_* functions */ int session_send_io_evt_to_thread (svm_fifo_t *f, session_evt_type_t evt_type); /* Deprecated, use session_program_* functions */ -int session_send_io_evt_to_thread_custom (void *data, u32 thread_index, +int session_send_io_evt_to_thread_custom (void *data, + clib_thread_index_t thread_index, session_evt_type_t evt_type); int session_program_tx_io_evt (session_handle_tu_t sh, session_evt_type_t evt_type); int session_program_rx_io_evt (session_handle_tu_t sh); int session_program_transport_io_evt (session_handle_tu_t sh, session_evt_type_t evt_type); -void session_send_rpc_evt_to_thread (u32 thread_index, void *fp, - void *rpc_args); -void session_send_rpc_evt_to_thread_force (u32 thread_index, void *fp, - void *rpc_args); +void session_send_rpc_evt_to_thread (clib_thread_index_t thread_index, + void *fp, void *rpc_args); +void session_send_rpc_evt_to_thread_force (clib_thread_index_t thread_index, + void *fp, void *rpc_args); void session_add_self_custom_tx_evt (transport_connection_t * tc, u8 has_prio); void sesssion_reschedule_tx (transport_connection_t * tc); @@ -517,20 +581,6 @@ uword unformat_transport_connection (unformat_input_t * input, * Interface to transport protos */ -int session_enqueue_stream_connection (transport_connection_t * tc, - vlib_buffer_t * b, u32 offset, - u8 queue_event, u8 is_in_order); -int session_enqueue_dgram_connection (session_t * s, - session_dgram_hdr_t * hdr, - vlib_buffer_t * b, u8 proto, - u8 queue_event); -int session_enqueue_dgram_connection2 (session_t *s, session_dgram_hdr_t *hdr, - vlib_buffer_t *b, u8 proto, - u8 queue_event); -int session_enqueue_dgram_connection_cl (session_t *s, - session_dgram_hdr_t *hdr, - vlib_buffer_t *b, u8 proto, - u8 queue_event); int session_stream_connect_notify (transport_connection_t * tc, session_error_t err); int session_dgram_connect_notify (transport_connection_t * tc, @@ -544,10 +594,10 @@ void session_half_open_migrate_notify (transport_connection_t *tc); int session_half_open_migrated_notify (transport_connection_t *tc); void session_transport_closed_notify (transport_connection_t * tc); void session_transport_reset_notify (transport_connection_t * tc); -int session_stream_accept (transport_connection_t * tc, u32 listener_index, - u32 thread_index, u8 notify); -int session_dgram_accept (transport_connection_t * tc, u32 listener_index, - u32 thread_index); +int session_stream_accept (transport_connection_t *tc, u32 listener_index, + clib_thread_index_t thread_index, u8 notify); +int session_dgram_accept (transport_connection_t *tc, u32 listener_index, + clib_thread_index_t thread_index); /** * Initialize session layer for given transport proto and ip version @@ -566,9 +616,279 @@ void session_register_transport (transport_proto_t transport_proto, u32 output_node); transport_proto_t session_add_transport_proto (void); void session_register_update_time_fn (session_update_time_fn fn, u8 is_add); +void session_main_flush_enqueue_events (transport_proto_t transport_proto, + clib_thread_index_t thread_index); +void session_queue_run_on_main_thread (vlib_main_t *vm); int session_tx_fifo_peek_bytes (transport_connection_t * tc, u8 * buffer, u32 offset, u32 max_bytes); u32 session_tx_fifo_dequeue_drop (transport_connection_t * tc, u32 max_bytes); +int session_enqueue_dgram_connection_cl (session_t *s, + session_dgram_hdr_t *hdr, + vlib_buffer_t *b, u8 proto, + u8 queue_event); +void session_fifo_tuning (session_t *s, svm_fifo_t *f, session_ft_action_t act, + u32 len); + +/** + * Discards bytes from buffer chain + * + * It discards n_bytes_to_drop starting at first buffer after chain_b + */ +always_inline void +session_enqueue_discard_chain_bytes (vlib_main_t *vm, vlib_buffer_t *b, + vlib_buffer_t **chain_b, + u32 n_bytes_to_drop) +{ + vlib_buffer_t *next = *chain_b; + u32 to_drop = n_bytes_to_drop; + ASSERT (b->flags & VLIB_BUFFER_NEXT_PRESENT); + while (to_drop && (next->flags & VLIB_BUFFER_NEXT_PRESENT)) + { + next = vlib_get_buffer (vm, next->next_buffer); + if (next->current_length > to_drop) + { + vlib_buffer_advance (next, to_drop); + to_drop = 0; + } + else + { + to_drop -= next->current_length; + next->current_length = 0; + } + } + *chain_b = next; + + if (to_drop == 0) + b->total_length_not_including_first_buffer -= n_bytes_to_drop; +} + +/** + * Enqueue buffer chain tail + */ +always_inline int +session_enqueue_chain_tail (session_t *s, vlib_buffer_t *b, u32 offset, + u8 is_in_order) +{ + vlib_buffer_t *chain_b; + u32 chain_bi; + + if (is_in_order) + { + session_worker_t *wrk = session_main_get_worker (s->thread_index); + u32 diff, written = 0; + + if (offset) + { + diff = offset - b->current_length; + if (diff > b->total_length_not_including_first_buffer) + return 0; + chain_b = b; + session_enqueue_discard_chain_bytes (wrk->vm, b, &chain_b, diff); + chain_bi = vlib_get_buffer_index (wrk->vm, chain_b); + } + else + { + chain_bi = b->next_buffer; + } + + chain_b = vlib_get_buffer (wrk->vm, chain_bi); + svm_fifo_seg_t *seg; + + while (chain_b) + { + vec_add2 (wrk->rx_segs, seg, 1); + seg->data = vlib_buffer_get_current (chain_b); + seg->len = chain_b->current_length; + chain_b = (chain_b->flags & VLIB_BUFFER_NEXT_PRESENT) ? + vlib_get_buffer (wrk->vm, chain_b->next_buffer) : + 0; + } + + written = svm_fifo_enqueue_segments (s->rx_fifo, wrk->rx_segs, + vec_len (wrk->rx_segs), + 1 /* allow partial*/); + + vec_reset_length (wrk->rx_segs); + + return written; + } + else + { + vlib_main_t *vm = vlib_get_main (); + int rv = 0; + u8 *data; + u32 len; + + /* TODO svm_fifo_enqueue_segments with offset */ + chain_bi = b->next_buffer; + do + { + chain_b = vlib_get_buffer (vm, chain_bi); + data = vlib_buffer_get_current (chain_b); + len = chain_b->current_length; + if (!len) + continue; + + rv = svm_fifo_enqueue_with_offset (s->rx_fifo, offset, len, data); + if (rv) + { + clib_warning ("failed to enqueue multi-buffer seg"); + return -1; + } + offset += len; + } + while ((chain_bi = (chain_b->flags & VLIB_BUFFER_NEXT_PRESENT) ? + chain_b->next_buffer : + 0)); + + return 0; + } +} + +/* + * Enqueue data for delivery to app. If requested, it queues app notification + * event for later delivery. + * + * @param tc Transport connection which is to be enqueued data + * @param b Buffer to be enqueued + * @param offset Offset at which to start enqueueing if out-of-order + * @param queue_event Flag to indicate if peer is to be notified or if event + * is to be queued. The former is useful when more data is + * enqueued and only one event is to be generated. + * @param is_in_order Flag to indicate if data is in order + * @return Number of bytes enqueued or a negative value if enqueueing failed. + */ +always_inline int +session_enqueue_stream_connection (transport_connection_t *tc, + vlib_buffer_t *b, u32 offset, + u8 queue_event, u8 is_in_order) +{ + session_t *s; + int enqueued = 0, rv, in_order_off; + + s = session_get (tc->s_index, tc->thread_index); + + if (is_in_order) + { + enqueued = svm_fifo_enqueue (s->rx_fifo, b->current_length, + vlib_buffer_get_current (b)); + if (PREDICT_FALSE ((b->flags & VLIB_BUFFER_NEXT_PRESENT) && + enqueued >= 0)) + { + in_order_off = enqueued > b->current_length ? enqueued : 0; + rv = session_enqueue_chain_tail (s, b, in_order_off, 1); + if (rv > 0) + enqueued += rv; + } + } + else + { + rv = svm_fifo_enqueue_with_offset (s->rx_fifo, offset, b->current_length, + vlib_buffer_get_current (b)); + if (PREDICT_FALSE ((b->flags & VLIB_BUFFER_NEXT_PRESENT) && !rv)) + session_enqueue_chain_tail (s, b, offset + b->current_length, 0); + /* if something was enqueued, report even this as success for ooo + * segment handling */ + return rv; + } + + if (queue_event) + { + /* Queue RX event on this fifo. Eventually these will need to be + * flushed by calling @ref session_main_flush_enqueue_events () */ + if (!(s->flags & SESSION_F_RX_EVT)) + { + session_worker_t *wrk = session_main_get_worker (s->thread_index); + ASSERT (s->thread_index == vlib_get_thread_index ()); + s->flags |= SESSION_F_RX_EVT; + vec_add1 (wrk->session_to_enqueue[tc->proto], session_handle (s)); + } + + session_fifo_tuning (s, s->rx_fifo, SESSION_FT_ACTION_ENQUEUED, 0); + } + + return enqueued; +} + +always_inline int +session_enqueue_dgram_connection_inline (session_t *s, + session_dgram_hdr_t *hdr, + vlib_buffer_t *b, u8 proto, + u8 queue_event, u32 is_cl) +{ + int rv; + + ASSERT (svm_fifo_max_enqueue_prod (s->rx_fifo) >= + b->current_length + sizeof (*hdr)); + + if (PREDICT_TRUE (!(b->flags & VLIB_BUFFER_NEXT_PRESENT))) + { + svm_fifo_seg_t segs[2] = { { (u8 *) hdr, sizeof (*hdr) }, + { vlib_buffer_get_current (b), + b->current_length } }; + + rv = + svm_fifo_enqueue_segments (s->rx_fifo, segs, 2, 0 /* allow_partial */); + } + else + { + vlib_main_t *vm = vlib_get_main (); + svm_fifo_seg_t *segs = 0, *seg; + vlib_buffer_t *it = b; + u32 n_segs = 1; + + vec_add2 (segs, seg, 1); + seg->data = (u8 *) hdr; + seg->len = sizeof (*hdr); + while (it) + { + vec_add2 (segs, seg, 1); + seg->data = vlib_buffer_get_current (it); + seg->len = it->current_length; + n_segs++; + if (!(it->flags & VLIB_BUFFER_NEXT_PRESENT)) + break; + it = vlib_get_buffer (vm, it->next_buffer); + } + rv = svm_fifo_enqueue_segments (s->rx_fifo, segs, n_segs, + 0 /* allow partial */); + vec_free (segs); + } + + if (queue_event && rv > 0) + { + /* Queue RX event on this fifo. Eventually these will need to be + * flushed by calling @ref session_main_flush_enqueue_events () */ + if (!(s->flags & SESSION_F_RX_EVT)) + { + clib_thread_index_t thread_index = + is_cl ? vlib_get_thread_index () : s->thread_index; + session_worker_t *wrk = session_main_get_worker (thread_index); + ASSERT (s->thread_index == vlib_get_thread_index () || is_cl); + s->flags |= SESSION_F_RX_EVT; + vec_add1 (wrk->session_to_enqueue[proto], session_handle (s)); + } + + session_fifo_tuning (s, s->rx_fifo, SESSION_FT_ACTION_ENQUEUED, 0); + } + return rv > 0 ? rv : 0; +} + +always_inline int +session_enqueue_dgram_connection (session_t *s, session_dgram_hdr_t *hdr, + vlib_buffer_t *b, u8 proto, u8 queue_event) +{ + return session_enqueue_dgram_connection_inline (s, hdr, b, proto, + queue_event, 0 /* is_cl */); +} + +always_inline int +session_enqueue_dgram_connection2 (session_t *s, session_dgram_hdr_t *hdr, + vlib_buffer_t *b, u8 proto, u8 queue_event) +{ + return session_enqueue_dgram_connection_inline (s, hdr, b, proto, + queue_event, 1 /* is_cl */); +} always_inline void session_set_state (session_t *s, session_state_t session_state) @@ -640,19 +960,19 @@ transport_rx_fifo_req_deq_ntf (transport_connection_t *tc) } always_inline clib_time_type_t -transport_time_now (u32 thread_index) +transport_time_now (clib_thread_index_t thread_index) { return session_main.wrk[thread_index].last_vlib_time; } always_inline clib_us_time_t -transport_us_time_now (u32 thread_index) +transport_us_time_now (clib_thread_index_t thread_index) { return session_main.wrk[thread_index].last_vlib_us_time; } always_inline clib_time_type_t -transport_seconds_per_loop (u32 thread_index) +transport_seconds_per_loop (clib_thread_index_t thread_index) { return session_main.wrk[thread_index].vm->seconds_per_loop; } @@ -753,69 +1073,6 @@ ho_session_free (session_t *s) transport_connection_t *listen_session_get_transport (session_t * s); -/* - * Session layer functions - */ - -always_inline session_main_t * -vnet_get_session_main () -{ - return &session_main; -} - -always_inline session_worker_t * -session_main_get_worker (u32 thread_index) -{ - return vec_elt_at_index (session_main.wrk, thread_index); -} - -static inline session_worker_t * -session_main_get_worker_if_valid (u32 thread_index) -{ - if (thread_index > vec_len (session_main.wrk)) - return 0; - return session_main_get_worker (thread_index); -} - -always_inline svm_msg_q_t * -session_main_get_vpp_event_queue (u32 thread_index) -{ - return session_main_get_worker (thread_index)->vpp_event_queue; -} - -always_inline u8 -session_main_is_enabled () -{ - return session_main.is_enabled == 1; -} - -always_inline void -session_worker_stat_error_inc (session_worker_t *wrk, int error, int value) -{ - if ((-(error) >= 0 && -(error) < SESSION_N_ERRORS)) - wrk->stats.errors[-error] += value; - else - SESSION_DBG ("unknown session counter"); -} - -always_inline void -session_stat_error_inc (int error, int value) -{ - session_worker_t *wrk; - wrk = session_main_get_worker (vlib_get_thread_index ()); - session_worker_stat_error_inc (wrk, error, value); -} - -#define session_cli_return_if_not_enabled() \ -do { \ - if (!session_main.is_enabled) \ - return clib_error_return (0, "session layer is not enabled"); \ -} while (0) - -void session_main_flush_enqueue_events (transport_proto_t transport_proto, - u32 thread_index); -void session_queue_run_on_main_thread (vlib_main_t * vm); - /** * Add session node pending buffer with custom node * @@ -825,7 +1082,8 @@ void session_queue_run_on_main_thread (vlib_main_t * vm); * must exist */ always_inline void -session_add_pending_tx_buffer (u32 thread_index, u32 bi, u32 next_node) +session_add_pending_tx_buffer (clib_thread_index_t thread_index, u32 bi, + u32 next_node) { session_worker_t *wrk = session_main_get_worker (thread_index); vec_add1 (wrk->pending_tx_buffers, bi); diff --git a/src/vnet/session/session_api.c b/src/vnet/session/session_api.c index 5ac21c4eb85..8192194ff34 100644 --- a/src/vnet/session/session_api.c +++ b/src/vnet/session/session_api.c @@ -517,7 +517,7 @@ mq_send_session_migrate_cb (session_t * s, session_handle_t new_sh) fifo_segment_t *eq_seg; app_worker_t *app_wrk; application_t *app; - u32 thread_index; + clib_thread_index_t thread_index; thread_index = session_thread_from_handle (new_sh); app_wrk = app_worker_get (s->app_wrk_index); diff --git a/src/vnet/session/session_cli.c b/src/vnet/session/session_cli.c index aff102a6989..b223eff41b0 100644 --- a/src/vnet/session/session_cli.c +++ b/src/vnet/session/session_cli.c @@ -404,7 +404,7 @@ typedef struct session_cli_filter_ session_cli_endpt_flags_t endpt_flags; session_state_t *states; transport_proto_t transport_proto; - u32 thread_index; + clib_thread_index_t thread_index; u32 verbose; } session_cli_filter_t; @@ -521,7 +521,8 @@ session_cli_show_session_filter (vlib_main_t *vm, session_cli_filter_t *sf) } void -session_cli_show_events_thread (vlib_main_t * vm, u32 thread_index) +session_cli_show_events_thread (vlib_main_t *vm, + clib_thread_index_t thread_index) { session_worker_t *wrk; @@ -540,7 +541,7 @@ session_cli_show_events_thread (vlib_main_t * vm, u32 thread_index) } static void -session_cli_show_events (vlib_main_t * vm, u32 thread_index) +session_cli_show_events (vlib_main_t *vm, clib_thread_index_t thread_index) { session_main_t *smm = &session_main; if (!thread_index) @@ -824,7 +825,7 @@ clear_session_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { session_main_t *smm = &session_main; - u32 thread_index = 0, clear_all = 0; + clib_thread_index_t thread_index = 0, clear_all = 0; session_worker_t *wrk; u32 session_index = ~0; session_t *session; diff --git a/src/vnet/session/session_input.c b/src/vnet/session/session_input.c index 01be281d4f7..dd3bde77058 100644 --- a/src/vnet/session/session_input.c +++ b/src/vnet/session/session_input.c @@ -4,6 +4,7 @@ #include <vnet/session/session.h> #include <vnet/session/application.h> +#include <vnet/session/application_local.h> static inline int mq_try_lock (svm_msg_q_t *mq) @@ -34,7 +35,7 @@ app_worker_del_all_events (app_worker_t *app_wrk) { session_worker_t *wrk; session_event_t *evt; - u32 thread_index; + clib_thread_index_t thread_index; session_t *s; for (thread_index = 0; thread_index < vec_len (app_wrk->wrk_evts); @@ -72,7 +73,8 @@ app_worker_del_all_events (app_worker_t *app_wrk) } always_inline int -app_worker_flush_events_inline (app_worker_t *app_wrk, u32 thread_index, +app_worker_flush_events_inline (app_worker_t *app_wrk, + clib_thread_index_t thread_index, u8 is_builtin) { application_t *app = application_get (app_wrk->app_index); @@ -166,6 +168,13 @@ app_worker_flush_events_inline (app_worker_t *app_wrk, u32 thread_index, if (!(s->flags & SESSION_F_APP_CLOSED)) app->cb_fns.session_disconnect_callback (s); } + else if (!session_has_transport (s)) + { + /* Special handling for cut-through sessions for builtin apps + * similar to session_mq_accepted_reply_handler */ + session_set_state (s, SESSION_STATE_READY); + ct_session_connect_notify (s, SESSION_E_NONE); + } } break; case SESSION_CTRL_EVT_CONNECTED: @@ -277,7 +286,8 @@ app_worker_flush_events_inline (app_worker_t *app_wrk, u32 thread_index, } int -app_wrk_flush_wrk_events (app_worker_t *app_wrk, u32 thread_index) +app_wrk_flush_wrk_events (app_worker_t *app_wrk, + clib_thread_index_t thread_index) { if (app_worker_application_is_builtin (app_wrk)) return app_worker_flush_events_inline (app_wrk, thread_index, @@ -292,7 +302,7 @@ session_wrk_flush_events (session_worker_t *wrk) { app_worker_t *app_wrk; uword app_wrk_index; - u32 thread_index; + clib_thread_index_t thread_index; thread_index = wrk->vm->thread_index; app_wrk_index = clib_bitmap_first_set (wrk->app_wrks_pending_ntf); @@ -320,7 +330,7 @@ session_wrk_flush_events (session_worker_t *wrk) VLIB_NODE_FN (session_input_node) (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) { - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; session_worker_t *wrk; wrk = session_main_get_worker (thread_index); diff --git a/src/vnet/session/session_lookup.c b/src/vnet/session/session_lookup.c index 28a1feb1ed8..42b48a00d02 100644 --- a/src/vnet/session/session_lookup.c +++ b/src/vnet/session/session_lookup.c @@ -954,10 +954,10 @@ session_lookup_half_open_connection (u64 handle, u8 proto, u8 is_ip4) * @return pointer to transport connection, if one is found, 0 otherwise */ transport_connection_t * -session_lookup_connection_wt4 (u32 fib_index, ip4_address_t * lcl, - ip4_address_t * rmt, u16 lcl_port, - u16 rmt_port, u8 proto, u32 thread_index, - u8 * result) +session_lookup_connection_wt4 (u32 fib_index, ip4_address_t *lcl, + ip4_address_t *rmt, u16 lcl_port, u16 rmt_port, + u8 proto, clib_thread_index_t thread_index, + u8 *result) { session_table_t *st; session_kv4_t kv4; @@ -1185,10 +1185,10 @@ session_lookup_safe4 (u32 fib_index, ip4_address_t * lcl, ip4_address_t * rmt, * @return pointer to transport connection, if one is found, 0 otherwise */ transport_connection_t * -session_lookup_connection_wt6 (u32 fib_index, ip6_address_t * lcl, - ip6_address_t * rmt, u16 lcl_port, - u16 rmt_port, u8 proto, u32 thread_index, - u8 * result) +session_lookup_connection_wt6 (u32 fib_index, ip6_address_t *lcl, + ip6_address_t *rmt, u16 lcl_port, u16 rmt_port, + u8 proto, clib_thread_index_t thread_index, + u8 *result) { session_table_t *st; session_t *s; @@ -1380,6 +1380,71 @@ session_lookup_connection (u32 fib_index, ip46_address_t * lcl, lcl_port, rmt_port, proto); } +/** + * Lookup exact match 6-tuple amongst established and half-open sessions + * + * Does not look into session rules table and does not try to find a listener. + */ +transport_connection_t * +session_lookup_6tuple (u32 fib_index, ip46_address_t *lcl, ip46_address_t *rmt, + u16 lcl_port, u16 rmt_port, u8 proto, u8 is_ip4) +{ + session_table_t *st; + session_t *s; + int rv; + + if (is_ip4) + { + session_kv4_t kv4; + + st = session_table_get_for_fib_index (FIB_PROTOCOL_IP4, fib_index); + if (PREDICT_FALSE (!st)) + return 0; + + /* + * Lookup session amongst established ones + */ + make_v4_ss_kv (&kv4, &lcl->ip4, &rmt->ip4, lcl_port, rmt_port, proto); + rv = clib_bihash_search_inline_16_8 (&st->v4_session_hash, &kv4); + if (rv == 0) + { + s = session_get_from_handle (kv4.value); + return transport_get_connection (proto, s->connection_index, + s->thread_index); + } + + /* + * Try half-open connections + */ + rv = clib_bihash_search_inline_16_8 (&st->v4_half_open_hash, &kv4); + if (rv == 0) + return transport_get_half_open (proto, kv4.value & 0xFFFFFFFF); + } + else + { + session_kv6_t kv6; + + st = session_table_get_for_fib_index (FIB_PROTOCOL_IP6, fib_index); + if (PREDICT_FALSE (!st)) + return 0; + + make_v6_ss_kv (&kv6, &lcl->ip6, &rmt->ip6, lcl_port, rmt_port, proto); + rv = clib_bihash_search_inline_48_8 (&st->v6_session_hash, &kv6); + if (rv == 0) + { + s = session_get_from_handle (kv6.value); + return transport_get_connection (proto, s->connection_index, + s->thread_index); + } + + /* Try half-open connections */ + rv = clib_bihash_search_inline_48_8 (&st->v6_half_open_hash, &kv6); + if (rv == 0) + return transport_get_half_open (proto, kv6.value & 0xFFFFFFFF); + } + return 0; +} + session_error_t vnet_session_rule_add_del (session_rule_add_del_args_t *args) { diff --git a/src/vnet/session/session_lookup.h b/src/vnet/session/session_lookup.h index 9f56af20a87..64016639190 100644 --- a/src/vnet/session/session_lookup.h +++ b/src/vnet/session/session_lookup.h @@ -43,25 +43,17 @@ session_t *session_lookup_safe4 (u32 fib_index, ip4_address_t * lcl, session_t *session_lookup_safe6 (u32 fib_index, ip6_address_t * lcl, ip6_address_t * rmt, u16 lcl_port, u16 rmt_port, u8 proto); -transport_connection_t *session_lookup_connection_wt4 (u32 fib_index, - ip4_address_t * lcl, - ip4_address_t * rmt, - u16 lcl_port, - u16 rmt_port, u8 proto, - u32 thread_index, - u8 * is_filtered); +transport_connection_t *session_lookup_connection_wt4 ( + u32 fib_index, ip4_address_t *lcl, ip4_address_t *rmt, u16 lcl_port, + u16 rmt_port, u8 proto, clib_thread_index_t thread_index, u8 *is_filtered); transport_connection_t *session_lookup_connection4 (u32 fib_index, ip4_address_t * lcl, ip4_address_t * rmt, u16 lcl_port, u16 rmt_port, u8 proto); -transport_connection_t *session_lookup_connection_wt6 (u32 fib_index, - ip6_address_t * lcl, - ip6_address_t * rmt, - u16 lcl_port, - u16 rmt_port, u8 proto, - u32 thread_index, - u8 * is_filtered); +transport_connection_t *session_lookup_connection_wt6 ( + u32 fib_index, ip6_address_t *lcl, ip6_address_t *rmt, u16 lcl_port, + u16 rmt_port, u8 proto, clib_thread_index_t thread_index, u8 *is_filtered); transport_connection_t *session_lookup_connection6 (u32 fib_index, ip6_address_t * lcl, ip6_address_t * rmt, @@ -72,6 +64,9 @@ transport_connection_t *session_lookup_connection (u32 fib_index, ip46_address_t * rmt, u16 lcl_port, u16 rmt_port, u8 proto, u8 is_ip4); +transport_connection_t * +session_lookup_6tuple (u32 fib_index, ip46_address_t *lcl, ip46_address_t *rmt, + u16 lcl_port, u16 rmt_port, u8 proto, u8 is_ip4); session_t *session_lookup_listener4 (u32 fib_index, ip4_address_t * lcl, u16 lcl_port, u8 proto, u8 use_wildcard); session_t *session_lookup_listener6 (u32 fib_index, ip6_address_t * lcl, diff --git a/src/vnet/session/session_node.c b/src/vnet/session/session_node.c index c0ff1de39bc..fb4c6252bb6 100644 --- a/src/vnet/session/session_node.c +++ b/src/vnet/session/session_node.c @@ -64,7 +64,8 @@ session_wrk_timerfd_update (session_worker_t *wrk, u64 time_ns) } always_inline u64 -session_wrk_tfd_timeout (session_wrk_state_t state, u32 thread_index) +session_wrk_tfd_timeout (session_wrk_state_t state, + clib_thread_index_t thread_index) { if (state == SESSION_WRK_INTERRUPT) return thread_index ? 1e6 : vlib_num_workers () ? 5e8 : 1e6; @@ -282,7 +283,7 @@ session_mq_handle_connects_rpc (void *arg) static void session_mq_connect_handler (session_worker_t *wrk, session_evt_elt_t *elt) { - u32 thread_index = wrk - session_main.wrk; + clib_thread_index_t thread_index = wrk - session_main.wrk; session_evt_elt_t *he; if (PREDICT_FALSE (thread_index > transport_cl_thread ())) @@ -778,7 +779,7 @@ session_wrk_handle_evts_main_rpc (void *args) clib_llist_index_t ei, next_ei; session_evt_elt_t *he, *elt; session_worker_t *fwrk; - u32 thread_index; + clib_thread_index_t thread_index; vlib_worker_thread_barrier_sync (vm); @@ -836,8 +837,7 @@ vlib_node_registration_t session_queue_node; typedef struct { - u32 session_index; - u32 server_thread_index; + clib_thread_index_t thread_index; } session_queue_trace_t; /* packet trace format function */ @@ -848,8 +848,7 @@ format_session_queue_trace (u8 * s, va_list * args) CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); session_queue_trace_t *t = va_arg (*args, session_queue_trace_t *); - s = format (s, "session index %d thread index %d", - t->session_index, t->server_thread_index); + s = format (s, "thread index %d", t->thread_index); return s; } @@ -880,25 +879,25 @@ enum }; static void -session_tx_trace_frame (vlib_main_t *vm, vlib_node_runtime_t *node, - u32 next_index, vlib_buffer_t **bufs, u16 n_segs, - session_t *s, u32 n_trace) +session_tx_trace_frame (vlib_main_t *vm, vlib_node_runtime_t *node, u32 *bis, + u16 *nexts, u16 n_bufs) { - vlib_buffer_t **b = bufs; + u32 n_trace = vlib_get_trace_count (vm, node), *bi = bis; + u16 *next = nexts; + vlib_buffer_t *b; - while (n_trace && n_segs) + while (n_trace && n_bufs) { - if (PREDICT_TRUE (vlib_trace_buffer (vm, node, next_index, b[0], - 1 /* follow_chain */))) + b = vlib_get_buffer (vm, bi[0]); + if (PREDICT_TRUE ( + vlib_trace_buffer (vm, node, next[0], b, 1 /* follow_chain */))) { - session_queue_trace_t *t = - vlib_add_trace (vm, node, b[0], sizeof (*t)); - t->session_index = s->session_index; - t->server_thread_index = s->thread_index; + session_queue_trace_t *t = vlib_add_trace (vm, node, b, sizeof (*t)); + t->thread_index = vm->thread_index; n_trace--; } - b++; - n_segs--; + bi++; + n_bufs--; } vlib_set_trace_count (vm, node, n_trace); } @@ -1194,7 +1193,7 @@ session_tx_not_ready (session_t * s, u8 peek_data) } else { - if (s->session_state == SESSION_STATE_TRANSPORT_DELETED) + if (s->session_state == SESSION_STATE_TRANSPORT_DELETED || !s->tx_fifo) return 2; } return 0; @@ -1402,7 +1401,7 @@ session_tx_fifo_read_and_snd_i (session_worker_t * wrk, session_evt_elt_t * elt, int *n_tx_packets, u8 peek_data) { - u32 n_trace, n_left, pbi, next_index, max_burst; + u32 n_left, pbi, next_index, max_burst; session_tx_context_t *ctx = &wrk->ctx; session_main_t *smm = &session_main; session_event_t *e = &elt->evt; @@ -1576,10 +1575,6 @@ session_tx_fifo_read_and_snd_i (session_worker_t * wrk, ctx->transport_vft->push_header (ctx->tc, ctx->transport_pending_bufs, ctx->n_segs_per_evt); - if (PREDICT_FALSE ((n_trace = vlib_get_trace_count (vm, node)) > 0)) - session_tx_trace_frame (vm, node, next_index, ctx->transport_pending_bufs, - ctx->n_segs_per_evt, ctx->s, n_trace); - if (PREDICT_FALSE (n_bufs)) vlib_buffer_free (vm, ctx->tx_buffers, n_bufs); @@ -1851,7 +1846,7 @@ static const u32 session_evt_msg_sizes[] = { always_inline void session_update_time_subscribers (session_main_t *smm, clib_time_type_t now, - u32 thread_index) + clib_thread_index_t thread_index) { session_update_time_fn *fn; @@ -1959,7 +1954,7 @@ static uword session_queue_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - u32 thread_index = vm->thread_index, __clib_unused n_evts; + clib_thread_index_t thread_index = vm->thread_index, __clib_unused n_evts; session_evt_elt_t *elt, *ctrl_he, *new_he, *old_he; session_main_t *smm = vnet_get_session_main (); session_worker_t *wrk = &smm->wrk[thread_index]; @@ -2072,7 +2067,13 @@ session_queue_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, SESSION_EVT (SESSION_EVT_DSP_CNTRS, OLD_IO_EVTS, wrk); if (vec_len (wrk->pending_tx_buffers)) - session_flush_pending_tx_buffers (wrk, node); + { + if (PREDICT_FALSE (vlib_get_trace_count (vm, node) > 0)) + session_tx_trace_frame (vm, node, wrk->pending_tx_buffers, + wrk->pending_tx_nexts, + vec_len (wrk->pending_tx_nexts)); + session_flush_pending_tx_buffers (wrk, node); + } vlib_node_increment_counter (vm, session_queue_node.index, SESSION_QUEUE_ERROR_TX, n_tx_packets); @@ -2119,7 +2120,7 @@ session_wrk_tfd_write_ready (clib_file_t *cf) void session_wrk_enable_adaptive_mode (session_worker_t *wrk) { - u32 thread_index = wrk->vm->thread_index; + clib_thread_index_t thread_index = wrk->vm->thread_index; clib_file_t template = { 0 }; if ((wrk->timerfd = timerfd_create (CLOCK_MONOTONIC, TFD_NONBLOCK)) < 0) diff --git a/src/vnet/session/session_types.h b/src/vnet/session/session_types.h index 935f8f189ee..47a77449ba7 100644 --- a/src/vnet/session/session_types.h +++ b/src/vnet/session/session_types.h @@ -34,7 +34,7 @@ typedef union session_handle_tu_ struct { u32 session_index; - u32 thread_index; + clib_thread_index_t thread_index; }; } __attribute__ ((__transparent_union__)) session_handle_tu_t; @@ -49,7 +49,9 @@ typedef struct _session_endpoint #undef _ } session_endpoint_t; -#define foreach_session_endpoint_cfg_flags _ (PROXY_LISTEN, "proxy listener") +#define foreach_session_endpoint_cfg_flags \ + _ (PROXY_LISTEN, "proxy listener") \ + _ (SECURE, "secure") typedef enum session_endpoint_cfg_flags_bits_ { @@ -218,7 +220,7 @@ typedef struct session_ u32 session_index; /** Index of the thread that allocated the session */ - u32 thread_index; + clib_thread_index_t thread_index; }; }; diff --git a/src/vnet/session/transport.c b/src/vnet/session/transport.c index e8c9490decb..edec182541e 100644 --- a/src/vnet/session/transport.c +++ b/src/vnet/session/transport.c @@ -106,7 +106,7 @@ format_transport_connection (u8 * s, va_list * args) { u32 transport_proto = va_arg (*args, u32); u32 conn_index = va_arg (*args, u32); - u32 thread_index = va_arg (*args, u32); + clib_thread_index_t thread_index = va_arg (*args, u32); u32 verbose = va_arg (*args, u32); transport_proto_vft_t *tp_vft; transport_connection_t *tc; @@ -247,15 +247,15 @@ format_transport_state (u8 *s, va_list *args) } u32 -transport_endpoint_lookup (transport_endpoint_table_t * ht, u8 proto, - ip46_address_t * ip, u16 port) +transport_endpoint_lookup (transport_endpoint_table_t *ht, u8 proto, + u32 fib_index, ip46_address_t *ip, u16 port) { clib_bihash_kv_24_8_t kv; int rv; kv.key[0] = ip->as_u64[0]; kv.key[1] = ip->as_u64[1]; - kv.key[2] = (u64) port << 8 | (u64) proto; + kv.key[2] = (u64) fib_index << 32 | (u64) port << 8 | (u64) proto; rv = clib_bihash_search_inline_24_8 (ht, &kv); if (rv == 0) @@ -272,7 +272,7 @@ transport_endpoint_table_add (transport_endpoint_table_t * ht, u8 proto, kv.key[0] = te->ip.as_u64[0]; kv.key[1] = te->ip.as_u64[1]; - kv.key[2] = (u64) te->port << 8 | (u64) proto; + kv.key[2] = (u64) te->fib_index << 32 | (u64) te->port << 8 | (u64) proto; kv.value = value; clib_bihash_add_del_24_8 (ht, &kv, 1); @@ -286,7 +286,7 @@ transport_endpoint_table_del (transport_endpoint_table_t * ht, u8 proto, kv.key[0] = te->ip.as_u64[0]; kv.key[1] = te->ip.as_u64[1]; - kv.key[2] = (u64) te->port << 8 | (u64) proto; + kv.key[2] = (u64) te->fib_index << 32 | (u64) te->port << 8 | (u64) proto; clib_bihash_add_del_24_8 (ht, &kv, 0); } @@ -431,8 +431,8 @@ default_get_transport_endpoint (transport_connection_t * tc, void transport_get_endpoint (transport_proto_t tp, u32 conn_index, - u32 thread_index, transport_endpoint_t * tep, - u8 is_lcl) + clib_thread_index_t thread_index, + transport_endpoint_t *tep, u8 is_lcl) { if (tp_vfts[tp].get_transport_endpoint) tp_vfts[tp].get_transport_endpoint (conn_index, thread_index, tep, @@ -547,14 +547,15 @@ transport_program_endpoint_cleanup (u32 lepi) } int -transport_release_local_endpoint (u8 proto, ip46_address_t *lcl_ip, u16 port) +transport_release_local_endpoint (u8 proto, u32 fib_index, + ip46_address_t *lcl_ip, u16 port) { transport_main_t *tm = &tp_main; local_endpoint_t *lep; u32 lepi; - lepi = transport_endpoint_lookup (&tm->local_endpoints_table, proto, lcl_ip, - port); + lepi = transport_endpoint_lookup (&tm->local_endpoints_table, proto, + fib_index, lcl_ip, port); if (lepi == ENDPOINT_INVALID_INDEX) return -1; @@ -574,7 +575,8 @@ transport_release_local_endpoint (u8 proto, ip46_address_t *lcl_ip, u16 port) } static int -transport_endpoint_mark_used (u8 proto, ip46_address_t *ip, u16 port) +transport_endpoint_mark_used (u8 proto, u32 fib_index, ip46_address_t *ip, + u16 port) { transport_main_t *tm = &tp_main; local_endpoint_t *lep; @@ -582,14 +584,15 @@ transport_endpoint_mark_used (u8 proto, ip46_address_t *ip, u16 port) ASSERT (vlib_get_thread_index () <= transport_cl_thread ()); - tei = - transport_endpoint_lookup (&tm->local_endpoints_table, proto, ip, port); + tei = transport_endpoint_lookup (&tm->local_endpoints_table, proto, + fib_index, ip, port); if (tei != ENDPOINT_INVALID_INDEX) return SESSION_E_PORTINUSE; /* Pool reallocs with worker barrier */ lep = transport_endpoint_alloc (); clib_memcpy_fast (&lep->ep.ip, ip, sizeof (*ip)); + lep->ep.fib_index = fib_index; lep->ep.port = port; lep->proto = proto; lep->refcnt = 1; @@ -601,7 +604,8 @@ transport_endpoint_mark_used (u8 proto, ip46_address_t *ip, u16 port) } void -transport_share_local_endpoint (u8 proto, ip46_address_t * lcl_ip, u16 port) +transport_share_local_endpoint (u8 proto, u32 fib_index, + ip46_address_t *lcl_ip, u16 port) { transport_main_t *tm = &tp_main; local_endpoint_t *lep; @@ -610,8 +614,8 @@ transport_share_local_endpoint (u8 proto, ip46_address_t * lcl_ip, u16 port) /* Active opens should call this only from a control thread, which are also * used to allocate and free ports. So, pool has only one writer and * potentially many readers. Listeners are allocated with barrier */ - lepi = transport_endpoint_lookup (&tm->local_endpoints_table, proto, lcl_ip, - port); + lepi = transport_endpoint_lookup (&tm->local_endpoints_table, proto, + fib_index, lcl_ip, port); if (lepi != ENDPOINT_INVALID_INDEX) { lep = pool_elt_at_index (tm->local_endpoints, lepi); @@ -653,16 +657,17 @@ transport_alloc_local_port (u8 proto, ip46_address_t *lcl_addr, } } - if (!transport_endpoint_mark_used (proto, lcl_addr, port)) + if (!transport_endpoint_mark_used (proto, rmt->fib_index, lcl_addr, + port)) break; /* IP:port pair already in use, check if 6-tuple available */ - if (session_lookup_connection (rmt->fib_index, lcl_addr, &rmt->ip, port, - rmt->port, proto, rmt->is_ip4)) + if (session_lookup_6tuple (rmt->fib_index, lcl_addr, &rmt->ip, port, + rmt->port, proto, rmt->is_ip4)) continue; /* 6-tuple is available so increment lcl endpoint refcount */ - transport_share_local_endpoint (proto, lcl_addr, port); + transport_share_local_endpoint (proto, rmt->fib_index, lcl_addr, port); break; } @@ -679,6 +684,13 @@ transport_port_alloc_max_tries () return tm->port_alloc_max_tries; } +u32 +transport_port_local_in_use () +{ + transport_main_t *tm = &tp_main; + return pool_elts (tm->local_endpoints) - vec_len (tm->lcl_endpts_freelist); +} + void transport_clear_stats () { @@ -783,17 +795,19 @@ transport_alloc_local_endpoint (u8 proto, transport_endpoint_cfg_t * rmt_cfg, { *lcl_port = rmt_cfg->peer.port; - if (!transport_endpoint_mark_used (proto, lcl_addr, rmt_cfg->peer.port)) + if (!transport_endpoint_mark_used (proto, rmt->fib_index, lcl_addr, + rmt_cfg->peer.port)) return 0; /* IP:port pair already in use, check if 6-tuple available */ - if (session_lookup_connection (rmt->fib_index, lcl_addr, &rmt->ip, - rmt_cfg->peer.port, rmt->port, proto, - rmt->is_ip4)) + if (session_lookup_6tuple (rmt->fib_index, lcl_addr, &rmt->ip, + rmt_cfg->peer.port, rmt->port, proto, + rmt->is_ip4)) return SESSION_E_PORTINUSE; /* 6-tuple is available so increment lcl endpoint refcount */ - transport_share_local_endpoint (proto, lcl_addr, rmt_cfg->peer.port); + transport_share_local_endpoint (proto, rmt->fib_index, lcl_addr, + rmt_cfg->peer.port); return 0; } @@ -816,7 +830,7 @@ u8 * format_transport_pacer (u8 * s, va_list * args) { spacer_t *pacer = va_arg (*args, spacer_t *); - u32 thread_index = va_arg (*args, int); + clib_thread_index_t thread_index = va_arg (*args, int); clib_us_time_t now, diff; now = transport_us_time_now (thread_index); @@ -952,7 +966,8 @@ transport_connection_tx_pacer_update_bytes (transport_connection_t * tc, } void -transport_update_pacer_time (u32 thread_index, clib_time_type_t now) +transport_update_pacer_time (clib_thread_index_t thread_index, + clib_time_type_t now) { session_wrk_update_time (session_main_get_worker (thread_index), now); } diff --git a/src/vnet/session/transport.h b/src/vnet/session/transport.h index 289bf471af0..31ad36bdc67 100644 --- a/src/vnet/session/transport.h +++ b/src/vnet/session/transport.h @@ -74,10 +74,10 @@ typedef struct _transport_proto_vft u32 (*start_listen) (u32 session_index, transport_endpoint_cfg_t *lcl); u32 (*stop_listen) (u32 conn_index); int (*connect) (transport_endpoint_cfg_t * rmt); - void (*half_close) (u32 conn_index, u32 thread_index); - void (*close) (u32 conn_index, u32 thread_index); - void (*reset) (u32 conn_index, u32 thread_index); - void (*cleanup) (u32 conn_index, u32 thread_index); + void (*half_close) (u32 conn_index, clib_thread_index_t thread_index); + void (*close) (u32 conn_index, clib_thread_index_t thread_index); + void (*reset) (u32 conn_index, clib_thread_index_t thread_index); + void (*cleanup) (u32 conn_index, clib_thread_index_t thread_index); void (*cleanup_ho) (u32 conn_index); clib_error_t *(*enable) (vlib_main_t * vm, u8 is_en); @@ -97,7 +97,8 @@ typedef struct _transport_proto_vft /* * Connection retrieval */ - transport_connection_t *(*get_connection) (u32 conn_idx, u32 thread_idx); + transport_connection_t *(*get_connection) (u32 conn_idx, + clib_thread_index_t thread_idx); transport_connection_t *(*get_listener) (u32 conn_index); transport_connection_t *(*get_half_open) (u32 conn_index); @@ -111,13 +112,14 @@ typedef struct _transport_proto_vft /* * Properties retrieval/setting */ - void (*get_transport_endpoint) (u32 conn_index, u32 thread_index, + void (*get_transport_endpoint) (u32 conn_index, + clib_thread_index_t thread_index, transport_endpoint_t *tep, u8 is_lcl); void (*get_transport_listener_endpoint) (u32 conn_index, transport_endpoint_t *tep, u8 is_lcl); - int (*attribute) (u32 conn_index, u32 thread_index, u8 is_get, - transport_endpt_attr_t *attr); + int (*attribute) (u32 conn_index, clib_thread_index_t thread_index, + u8 is_get, transport_endpt_attr_t *attr); /* * Properties @@ -144,8 +146,8 @@ void transport_cleanup (transport_proto_t tp, u32 conn_index, u8 thread_index); void transport_cleanup_half_open (transport_proto_t tp, u32 conn_index); void transport_get_endpoint (transport_proto_t tp, u32 conn_index, - u32 thread_index, transport_endpoint_t * tep, - u8 is_lcl); + clib_thread_index_t thread_index, + transport_endpoint_t *tep, u8 is_lcl); void transport_get_listener_endpoint (transport_proto_t tp, u32 conn_index, transport_endpoint_t * tep, u8 is_lcl); int transport_connection_attribute (transport_proto_t tp, u32 conn_index, @@ -179,7 +181,8 @@ transport_custom_tx (transport_proto_t tp, void *s, } static inline int -transport_app_rx_evt (transport_proto_t tp, u32 conn_index, u32 thread_index) +transport_app_rx_evt (transport_proto_t tp, u32 conn_index, + clib_thread_index_t thread_index) { transport_connection_t *tc; if (!tp_vfts[tp].app_rx_evt) @@ -248,11 +251,12 @@ int transport_alloc_local_port (u8 proto, ip46_address_t *ip, transport_endpoint_cfg_t *rmt); int transport_alloc_local_endpoint (u8 proto, transport_endpoint_cfg_t *rmt, ip46_address_t *lcl_addr, u16 *lcl_port); -void transport_share_local_endpoint (u8 proto, ip46_address_t * lcl_ip, - u16 port); -int transport_release_local_endpoint (u8 proto, ip46_address_t *lcl_ip, - u16 port); +void transport_share_local_endpoint (u8 proto, u32 fib_index, + ip46_address_t *lcl_ip, u16 port); +int transport_release_local_endpoint (u8 proto, u32 fib_index, + ip46_address_t *lcl_ip, u16 port); u16 transport_port_alloc_max_tries (); +u32 transport_port_local_in_use (); void transport_clear_stats (); void transport_enable_disable (vlib_main_t * vm, u8 is_en); void transport_init (void); @@ -367,7 +371,8 @@ transport_connection_tx_pacer_update_bytes (transport_connection_t * tc, * @param thread_index thread for which time is updated * @param now time now */ -void transport_update_pacer_time (u32 thread_index, clib_time_type_t now); +void transport_update_pacer_time (clib_thread_index_t thread_index, + clib_time_type_t now); #endif /* SRC_VNET_SESSION_TRANSPORT_H_ */ diff --git a/src/vnet/session/transport_types.h b/src/vnet/session/transport_types.h index 4a2f861814f..55cb1206e6b 100644 --- a/src/vnet/session/transport_types.h +++ b/src/vnet/session/transport_types.h @@ -113,7 +113,7 @@ typedef struct _transport_connection u32 s_index; /**< Parent session index */ u32 c_index; /**< Connection index in transport pool */ - u32 thread_index; /**< Worker-thread index */ + clib_thread_index_t thread_index; /**< Worker-thread index */ u8 flags; /**< Transport specific flags */ u8 dscp; /**< Differentiated Services Code Point */ diff --git a/src/vnet/srv6/sr_api.c b/src/vnet/srv6/sr_api.c index e546e1db0e7..5e022ebd637 100644 --- a/src/vnet/srv6/sr_api.c +++ b/src/vnet/srv6/sr_api.c @@ -215,7 +215,7 @@ vl_api_sr_policy_mod_v2_t_handler (vl_api_sr_policy_mod_v2_t *mp) ntohl (mp->sl_index), ntohl (mp->sids.weight)); vec_free (segments); - REPLY_MACRO (VL_API_SR_POLICY_MOD_REPLY); + REPLY_MACRO (VL_API_SR_POLICY_MOD_V2_REPLY); } static void diff --git a/src/vnet/srv6/sr_localsid.c b/src/vnet/srv6/sr_localsid.c index 47082e9c96a..8bf0996bbe1 100644 --- a/src/vnet/srv6/sr_localsid.c +++ b/src/vnet/srv6/sr_localsid.c @@ -1196,7 +1196,7 @@ sr_localsid_d_fn (vlib_main_t * vm, vlib_node_runtime_t * node, from = vlib_frame_vector_args (from_frame); n_left_from = from_frame->n_vectors; next_index = node->cached_next_index; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; while (n_left_from > 0) { @@ -1500,7 +1500,7 @@ sr_localsid_fn (vlib_main_t * vm, vlib_node_runtime_t * node, from = vlib_frame_vector_args (from_frame); n_left_from = from_frame->n_vectors; next_index = node->cached_next_index; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; while (n_left_from > 0) { @@ -1809,7 +1809,7 @@ sr_localsid_un_fn (vlib_main_t * vm, vlib_node_runtime_t * node, from = vlib_frame_vector_args (from_frame); n_left_from = from_frame->n_vectors; next_index = node->cached_next_index; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; while (n_left_from > 0) { @@ -2116,7 +2116,7 @@ sr_localsid_un_perf_fn (vlib_main_t * vm, vlib_node_runtime_t * node, from = vlib_frame_vector_args (from_frame); n_left_from = from_frame->n_vectors; next_index = node->cached_next_index; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; while (n_left_from > 0) { diff --git a/src/vnet/srv6/sr_policy_rewrite.c b/src/vnet/srv6/sr_policy_rewrite.c index a9114628f95..92586669378 100644 --- a/src/vnet/srv6/sr_policy_rewrite.c +++ b/src/vnet/srv6/sr_policy_rewrite.c @@ -503,8 +503,9 @@ update_lb (ip6_sr_policy_t * sr_policy) }; /* Add FIB entry for BSID */ - fhc = fib_table_get_flow_hash_config (sr_policy->fib_table, - FIB_PROTOCOL_IP6); + fhc = fib_table_get_flow_hash_config ( + fib_table_find (FIB_PROTOCOL_IP6, sr_policy->fib_table), + FIB_PROTOCOL_IP6); dpo_set (&sr_policy->bsid_dpo, DPO_LOAD_BALANCE, DPO_PROTO_IP6, load_balance_create (0, DPO_PROTO_IP6, fhc)); diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c index aea49558882..ed8c514ae8e 100644 --- a/src/vnet/tcp/tcp.c +++ b/src/vnet/tcp/tcp.c @@ -242,8 +242,8 @@ tcp_connection_cleanup (tcp_connection_t * tc) /* Cleanup local endpoint if this was an active connect */ if (!(tc->cfg_flags & TCP_CFG_F_NO_ENDPOINT)) - transport_release_local_endpoint (TRANSPORT_PROTO_TCP, &tc->c_lcl_ip, - tc->c_lcl_port); + transport_release_local_endpoint (TRANSPORT_PROTO_TCP, tc->c_fib_index, + &tc->c_lcl_ip, tc->c_lcl_port); /* Check if connection is not yet fully established */ if (tc->state == TCP_STATE_SYN_SENT) @@ -432,7 +432,7 @@ tcp_connection_close (tcp_connection_t * tc) } static void -tcp_session_half_close (u32 conn_index, u32 thread_index) +tcp_session_half_close (u32 conn_index, clib_thread_index_t thread_index) { tcp_worker_ctx_t *wrk; tcp_connection_t *tc; @@ -456,7 +456,7 @@ tcp_session_half_close (u32 conn_index, u32 thread_index) } static void -tcp_session_close (u32 conn_index, u32 thread_index) +tcp_session_close (u32 conn_index, clib_thread_index_t thread_index) { tcp_connection_t *tc; tc = tcp_connection_get (conn_index, thread_index); @@ -464,7 +464,7 @@ tcp_session_close (u32 conn_index, u32 thread_index) } static void -tcp_session_cleanup (u32 conn_index, u32 thread_index) +tcp_session_cleanup (u32 conn_index, clib_thread_index_t thread_index) { tcp_connection_t *tc; tc = tcp_connection_get (conn_index, thread_index); @@ -487,7 +487,7 @@ tcp_session_cleanup_ho (u32 conn_index) } static void -tcp_session_reset (u32 conn_index, u32 thread_index) +tcp_session_reset (u32 conn_index, clib_thread_index_t thread_index) { tcp_connection_t *tc; tc = tcp_connection_get (conn_index, thread_index); @@ -856,7 +856,7 @@ static u8 * format_tcp_session (u8 * s, va_list * args) { u32 tci = va_arg (*args, u32); - u32 thread_index = va_arg (*args, u32); + clib_thread_index_t thread_index = va_arg (*args, u32); u32 verbose = va_arg (*args, u32); tcp_connection_t *tc; @@ -906,7 +906,7 @@ format_tcp_half_open_session (u8 * s, va_list * args) } static transport_connection_t * -tcp_session_get_transport (u32 conn_index, u32 thread_index) +tcp_session_get_transport (u32 conn_index, clib_thread_index_t thread_index) { tcp_connection_t *tc = tcp_connection_get (conn_index, thread_index); if (PREDICT_FALSE (!tc)) @@ -1016,8 +1016,8 @@ tcp_get_attribute (tcp_connection_t *tc, transport_endpt_attr_t *attr) } static int -tcp_session_attribute (u32 conn_index, u32 thread_index, u8 is_get, - transport_endpt_attr_t *attr) +tcp_session_attribute (u32 conn_index, clib_thread_index_t thread_index, + u8 is_get, transport_endpt_attr_t *attr) { tcp_connection_t *tc = tcp_connection_get (conn_index, thread_index); @@ -1279,7 +1279,7 @@ tcp_dispatch_pending_timers (tcp_worker_ctx_t * wrk) static void tcp_handle_cleanups (tcp_worker_ctx_t * wrk, clib_time_type_t now) { - u32 thread_index = wrk->vm->thread_index; + clib_thread_index_t thread_index = wrk->vm->thread_index; tcp_cleanup_req_t *req; tcp_connection_t *tc; @@ -1404,7 +1404,8 @@ tcp_reschedule (tcp_connection_t * tc) static void tcp_expired_timers_dispatch (u32 * expired_timers) { - u32 thread_index = vlib_get_thread_index (), n_left, max_per_loop; + clib_thread_index_t thread_index = vlib_get_thread_index (), n_left, + max_per_loop; u32 connection_index, timer_id, n_expired, max_loops; tcp_worker_ctx_t *wrk; tcp_connection_t *tc; @@ -1467,7 +1468,7 @@ tcp_stats_collector_fn (vlib_stats_collector_data_t *d) tcp_wrk_stats_t acc = {}; tcp_worker_ctx_t *wrk; - vec_foreach (wrk, tm->wrk_ctx) + vec_foreach (wrk, tm->wrk) { #define _(name, type, str) acc.name += wrk->stats.name; foreach_tcp_wrk_stat @@ -1515,7 +1516,7 @@ tcp_main_enable (vlib_main_t * vm) int thread; /* Already initialized */ - if (tm->wrk_ctx) + if (tm->wrk) return 0; if ((error = vlib_call_init_function (vm, ip_main_init))) @@ -1537,11 +1538,11 @@ tcp_main_enable (vlib_main_t * vm) */ num_threads = 1 /* main thread */ + vtm->n_threads; - vec_validate (tm->wrk_ctx, num_threads - 1); + vec_validate (tm->wrk, num_threads - 1); n_workers = num_threads == 1 ? 1 : vtm->n_threads; prealloc_conn_per_wrk = tcp_cfg.preallocated_connections / n_workers; - wrk = &tm->wrk_ctx[0]; + wrk = &tm->wrk[0]; wrk->tco_next_node[0] = vlib_node_get_next (vm, session_queue_node.index, tcp4_output_node.index); wrk->tco_next_node[1] = vlib_node_get_next (vm, session_queue_node.index, @@ -1549,7 +1550,7 @@ tcp_main_enable (vlib_main_t * vm) for (thread = 0; thread < num_threads; thread++) { - wrk = &tm->wrk_ctx[thread]; + wrk = &tm->wrk[thread]; vec_validate (wrk->pending_deq_acked, 255); vec_validate (wrk->pending_disconnects, 255); @@ -1562,8 +1563,8 @@ tcp_main_enable (vlib_main_t * vm) if (thread > 0) { - wrk->tco_next_node[0] = tm->wrk_ctx[0].tco_next_node[0]; - wrk->tco_next_node[1] = tm->wrk_ctx[0].tco_next_node[1]; + wrk->tco_next_node[0] = tm->wrk[0].tco_next_node[0]; + wrk->tco_next_node[1] = tm->wrk[0].tco_next_node[1]; } /* diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h index 8feac807d59..67dc7407e91 100644 --- a/src/vnet/tcp/tcp.h +++ b/src/vnet/tcp/tcp.h @@ -220,7 +220,7 @@ typedef struct tcp_configuration_ typedef struct _tcp_main { /** per-worker context */ - tcp_worker_ctx_t *wrk_ctx; + tcp_worker_ctx_t *wrk; /* Pool of listeners. */ tcp_connection_t *listener_pool; @@ -299,10 +299,10 @@ vnet_get_tcp_main () } always_inline tcp_worker_ctx_t * -tcp_get_worker (u32 thread_index) +tcp_get_worker (clib_thread_index_t thread_index) { - ASSERT (thread_index < vec_len (tcp_main.wrk_ctx)); - return &tcp_main.wrk_ctx[thread_index]; + ASSERT (thread_index < vec_len (tcp_main.wrk)); + return &tcp_main.wrk[thread_index]; } tcp_connection_t *tcp_connection_alloc (u8 thread_index); @@ -314,8 +314,8 @@ void tcp_connection_cleanup (tcp_connection_t * tc); void tcp_connection_del (tcp_connection_t * tc); int tcp_half_open_connection_cleanup (tcp_connection_t * tc); -void tcp_send_reset_w_pkt (tcp_connection_t * tc, vlib_buffer_t * pkt, - u32 thread_index, u8 is_ip4); +void tcp_send_reset_w_pkt (tcp_connection_t *tc, vlib_buffer_t *pkt, + clib_thread_index_t thread_index, u8 is_ip4); void tcp_send_reset (tcp_connection_t * tc); void tcp_send_syn (tcp_connection_t * tc); void tcp_send_synack (tcp_connection_t * tc); diff --git a/src/vnet/tcp/tcp_bt.c b/src/vnet/tcp/tcp_bt.c index 3cb57a550de..3624cd8158a 100644 --- a/src/vnet/tcp/tcp_bt.c +++ b/src/vnet/tcp/tcp_bt.c @@ -635,6 +635,8 @@ tcp_bt_flush_samples (tcp_connection_t * tc) tcp_bt_sample_t *bts; u32 *samples = 0, *si; + ASSERT (pool_elts (bt->samples) != 0); + vec_validate (samples, pool_elts (bt->samples) - 1); vec_reset_length (samples); diff --git a/src/vnet/tcp/tcp_cli.c b/src/vnet/tcp/tcp_cli.c index 55bc5764df2..c14994aa440 100644 --- a/src/vnet/tcp/tcp_cli.c +++ b/src/vnet/tcp/tcp_cli.c @@ -919,7 +919,7 @@ show_tcp_stats_fn (vlib_main_t * vm, unformat_input_t * input, if (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) return clib_error_return (0, "unknown input `%U'", format_unformat_error, input); - for (thread = 0; thread < vec_len (tm->wrk_ctx); thread++) + for (thread = 0; thread < vec_len (tm->wrk); thread++) { wrk = tcp_get_worker (thread); vlib_cli_output (vm, "Thread %u:\n", thread); @@ -957,7 +957,7 @@ clear_tcp_stats_fn (vlib_main_t * vm, unformat_input_t * input, return clib_error_return (0, "unknown input `%U'", format_unformat_error, input); - for (thread = 0; thread < vec_len (tm->wrk_ctx); thread++) + for (thread = 0; thread < vec_len (tm->wrk); thread++) { wrk = tcp_get_worker (thread); clib_memset (&wrk->stats, 0, sizeof (wrk->stats)); diff --git a/src/vnet/tcp/tcp_cubic.c b/src/vnet/tcp/tcp_cubic.c index cf2b9a17d18..63abcd1312d 100644 --- a/src/vnet/tcp/tcp_cubic.c +++ b/src/vnet/tcp/tcp_cubic.c @@ -49,7 +49,7 @@ typedef struct cubic_data_ STATIC_ASSERT (sizeof (cubic_data_t) <= TCP_CC_DATA_SZ, "cubic data len"); static inline f64 -cubic_time (u32 thread_index) +cubic_time (clib_thread_index_t thread_index) { return tcp_time_now_us (thread_index); } diff --git a/src/vnet/tcp/tcp_inlines.h b/src/vnet/tcp/tcp_inlines.h index ccd0e3fe3ee..6ab467d759b 100644 --- a/src/vnet/tcp/tcp_inlines.h +++ b/src/vnet/tcp/tcp_inlines.h @@ -56,7 +56,7 @@ tcp_buffer_hdr (vlib_buffer_t * b) } always_inline tcp_connection_t * -tcp_connection_get (u32 conn_index, u32 thread_index) +tcp_connection_get (u32 conn_index, clib_thread_index_t thread_index) { tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index); if (PREDICT_FALSE (pool_is_free_index (wrk->connections, conn_index))) @@ -65,10 +65,10 @@ tcp_connection_get (u32 conn_index, u32 thread_index) } always_inline tcp_connection_t * -tcp_connection_get_if_valid (u32 conn_index, u32 thread_index) +tcp_connection_get_if_valid (u32 conn_index, clib_thread_index_t thread_index) { tcp_worker_ctx_t *wrk; - if (thread_index >= vec_len (tcp_main.wrk_ctx)) + if (thread_index >= vec_len (tcp_main.wrk)) return 0; wrk = tcp_get_worker (thread_index); if (pool_is_free_index (wrk->connections, conn_index)) @@ -215,9 +215,9 @@ tcp_is_lost_fin (tcp_connection_t * tc) * Time used to generate timestamps, not the timestamp */ always_inline u32 -tcp_time_tstamp (u32 thread_index) +tcp_time_tstamp (clib_thread_index_t thread_index) { - return tcp_main.wrk_ctx[thread_index].time_tstamp; + return tcp_main.wrk[thread_index].time_tstamp; } /** @@ -226,14 +226,13 @@ tcp_time_tstamp (u32 thread_index) always_inline u32 tcp_tstamp (tcp_connection_t * tc) { - return (tcp_main.wrk_ctx[tc->c_thread_index].time_tstamp - - tc->timestamp_delta); + return (tcp_main.wrk[tc->c_thread_index].time_tstamp - tc->timestamp_delta); } always_inline f64 -tcp_time_now_us (u32 thread_index) +tcp_time_now_us (clib_thread_index_t thread_index) { - return tcp_main.wrk_ctx[thread_index].time_us; + return tcp_main.wrk[thread_index].time_us; } always_inline void diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c index cd3e4b7700c..47ae8513f62 100644 --- a/src/vnet/tcp/tcp_input.c +++ b/src/vnet/tcp/tcp_input.c @@ -217,20 +217,6 @@ static int tcp_segment_validate (tcp_worker_ctx_t * wrk, tcp_connection_t * tc0, vlib_buffer_t * b0, tcp_header_t * th0, u32 * error0) { - /* We could get a burst of RSTs interleaved with acks */ - if (PREDICT_FALSE (tc0->state == TCP_STATE_CLOSED)) - { - tcp_send_reset (tc0); - *error0 = TCP_ERROR_CONNECTION_CLOSED; - goto error; - } - - if (PREDICT_FALSE (!tcp_ack (th0) && !tcp_rst (th0) && !tcp_syn (th0))) - { - *error0 = TCP_ERROR_SEGMENT_INVALID; - goto error; - } - if (PREDICT_FALSE (tcp_options_parse (th0, &tc0->rcv_opts, 0))) { *error0 = TCP_ERROR_OPTIONS; @@ -512,7 +498,7 @@ tcp_estimate_initial_rtt (tcp_connection_t * tc) static void tcp_handle_postponed_dequeues (tcp_worker_ctx_t * wrk) { - u32 thread_index = wrk->vm->thread_index; + clib_thread_index_t thread_index = wrk->vm->thread_index; u32 *pending_deq_acked; tcp_connection_t *tc; int i; @@ -1025,7 +1011,8 @@ tcp_program_disconnect (tcp_worker_ctx_t * wrk, tcp_connection_t * tc) static void tcp_handle_disconnects (tcp_worker_ctx_t * wrk) { - u32 thread_index, *pending_disconnects, *pending_resets; + clib_thread_index_t thread_index; + u32 *pending_disconnects, *pending_resets; tcp_connection_t *tc; int i; @@ -1372,11 +1359,47 @@ tcp_established_trace_frame (vlib_main_t * vm, vlib_node_runtime_t * node, } } +always_inline int +tcp_segment_is_exception (tcp_connection_t *tc, tcp_header_t *th) +{ + /* tcp-input allows through segments without ack, e.g., fin without ack, + * which have to be handled as exception in nodes like established. So + * flags must be checked */ + return !tc || tc->state == TCP_STATE_CLOSED || + !(th->flags & (TCP_FLAG_ACK | TCP_FLAG_RST | TCP_FLAG_SYN)); +} + +always_inline void +tcp_segment_handle_exception (tcp_connection_t *tc, tcp_header_t *th, + u32 *error) +{ + if (!tc) + { + *error = TCP_ERROR_INVALID_CONNECTION; + return; + } + + /* We could get a burst of RSTs interleaved with acks */ + if (tc->state == TCP_STATE_CLOSED) + { + tcp_send_reset (tc); + *error = TCP_ERROR_CONNECTION_CLOSED; + return; + } + + if (!(th->flags & (TCP_FLAG_ACK | TCP_FLAG_RST | TCP_FLAG_SYN))) + { + *error = TCP_ERROR_SEGMENT_INVALID; + return; + } +} + always_inline uword tcp46_established_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame, int is_ip4) { - u32 thread_index = vm->thread_index, n_left_from, *from; + clib_thread_index_t thread_index = vm->thread_index; + u32 n_left_from, *from; tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index); vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b; u16 err_counters[TCP_N_ERROR] = { 0 }; @@ -1404,15 +1427,14 @@ tcp46_established_inline (vlib_main_t * vm, vlib_node_runtime_t * node, tc = tcp_connection_get (vnet_buffer (b[0])->tcp.connection_index, thread_index); + th = tcp_buffer_hdr (b[0]); - if (PREDICT_FALSE (tc == 0)) + if (PREDICT_FALSE (tcp_segment_is_exception (tc, th))) { - error = TCP_ERROR_INVALID_CONNECTION; + tcp_segment_handle_exception (tc, th, &error); goto done; } - th = tcp_buffer_hdr (b[0]); - /* TODO header prediction fast path */ /* 1-4: check SEQ, RST, SYN */ @@ -1862,8 +1884,8 @@ tcp46_syn_sent_inline (vlib_main_t *vm, vlib_node_runtime_t *node, else new_tc->rcv_wscale = 0; - new_tc->snd_wnd = clib_net_to_host_u16 (tcp->window) - << new_tc->snd_wscale; + /* RFC7323 sec 2.2: Window field in a syn segment must not be scaled */ + new_tc->snd_wnd = clib_net_to_host_u16 (tcp->window); new_tc->snd_wl1 = seq; new_tc->snd_wl2 = ack; @@ -2005,7 +2027,7 @@ static void tcp46_rcv_process_trace_frame (vlib_main_t *vm, vlib_node_runtime_t *node, u32 *from, u32 n_bufs) { - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; tcp_connection_t *tc = 0; tcp_rx_trace_t *t; vlib_buffer_t *b; @@ -2031,7 +2053,8 @@ always_inline uword tcp46_rcv_process_inline (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame, int is_ip4) { - u32 thread_index = vm->thread_index, n_left_from, *from, max_deq; + clib_thread_index_t thread_index = vm->thread_index; + u32 n_left_from, *from, max_deq; tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index); vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b; @@ -2524,7 +2547,7 @@ tcp46_listen_inline (vlib_main_t *vm, vlib_node_runtime_t *node, { u32 n_left_from, *from, n_syns = 0; vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; u32 tw_iss = 0; from = vlib_frame_vector_args (frame); @@ -2819,8 +2842,6 @@ tcp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, CLIB_PREFETCH (b[3]->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD); } - next[0] = next[1] = TCP_INPUT_NEXT_DROP; - tc0 = tcp_input_lookup_buffer (b[0], thread_index, &error0, is_ip4, is_nolookup); tc1 = tcp_input_lookup_buffer (b[1], thread_index, &error1, is_ip4, @@ -2881,7 +2902,6 @@ tcp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, CLIB_PREFETCH (b[1]->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD); } - next[0] = TCP_INPUT_NEXT_DROP; tc0 = tcp_input_lookup_buffer (b[0], thread_index, &error0, is_ip4, is_nolookup); if (PREDICT_TRUE (tc0 != 0)) diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c index 2fd20acf241..120ad6c533e 100644 --- a/src/vnet/tcp/tcp_output.c +++ b/src/vnet/tcp/tcp_output.c @@ -299,7 +299,7 @@ tcp_make_options (tcp_connection_t * tc, tcp_options_t * opts, void tcp_update_burst_snd_vars (tcp_connection_t * tc) { - tcp_main_t *tm = &tcp_main; + tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index); /* Compute options to be used for connection. These may be reused when * sending data or to compute the effective mss (snd_mss) */ @@ -310,8 +310,7 @@ tcp_update_burst_snd_vars (tcp_connection_t * tc) tc->snd_mss = clib_min (tc->mss, tc->rcv_opts.mss) - tc->snd_opts_len; ASSERT (tc->snd_mss > 0); - tcp_options_write (tm->wrk_ctx[tc->c_thread_index].cached_opts, - &tc->snd_opts); + tcp_options_write (wrk->cached_opts, &tc->snd_opts); tcp_update_rcv_wnd (tc); @@ -647,8 +646,8 @@ tcp_buffer_make_reset (vlib_main_t *vm, vlib_buffer_t *b, u8 is_ip4) * It extracts connection info out of original packet */ void -tcp_send_reset_w_pkt (tcp_connection_t * tc, vlib_buffer_t * pkt, - u32 thread_index, u8 is_ip4) +tcp_send_reset_w_pkt (tcp_connection_t *tc, vlib_buffer_t *pkt, + clib_thread_index_t thread_index, u8 is_ip4) { tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index); vlib_main_t *vm = wrk->vm; @@ -875,7 +874,6 @@ tcp_push_hdr_i (tcp_connection_t * tc, vlib_buffer_t * b, u32 snd_nxt, { u8 tcp_hdr_opts_len, flags = TCP_FLAG_ACK; u32 advertise_wnd, data_len; - tcp_main_t *tm = &tcp_main; tcp_header_t *th; data_len = b->current_length; @@ -907,9 +905,8 @@ tcp_push_hdr_i (tcp_connection_t * tc, vlib_buffer_t * b, u32 snd_nxt, if (maybe_burst) { - clib_memcpy_fast ((u8 *) (th + 1), - tm->wrk_ctx[tc->c_thread_index].cached_opts, - tc->snd_opts_len); + tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index); + clib_memcpy_fast ((u8 *) (th + 1), wrk->cached_opts, tc->snd_opts_len); } else { diff --git a/src/vnet/tls/tls.c b/src/vnet/tls/tls.c index 08809f70070..d409ee5f126 100644 --- a/src/vnet/tls/tls.c +++ b/src/vnet/tls/tls.c @@ -21,7 +21,7 @@ static tls_main_t tls_main; tls_engine_vft_t *tls_vfts; -void tls_disconnect (u32 ctx_handle, u32 thread_index); +void tls_disconnect (u32 ctx_handle, clib_thread_index_t thread_index); void tls_disconnect_transport (tls_ctx_t * ctx) @@ -684,7 +684,7 @@ tls_connect (transport_endpoint_cfg_t * tep) } void -tls_disconnect (u32 ctx_handle, u32 thread_index) +tls_disconnect (u32 ctx_handle, clib_thread_index_t thread_index) { tls_ctx_t *ctx; @@ -820,7 +820,7 @@ tls_stop_listen (u32 lctx_index) } transport_connection_t * -tls_connection_get (u32 ctx_index, u32 thread_index) +tls_connection_get (u32 ctx_index, clib_thread_index_t thread_index) { tls_ctx_t *ctx; ctx = tls_ctx_get_w_thread (ctx_index, thread_index); @@ -959,7 +959,7 @@ u8 * format_tls_connection (u8 * s, va_list * args) { u32 ctx_index = va_arg (*args, u32); - u32 thread_index = va_arg (*args, u32); + clib_thread_index_t thread_index = va_arg (*args, u32); u32 verbose = va_arg (*args, u32); tls_ctx_t *ctx; @@ -1013,8 +1013,8 @@ format_tls_half_open (u8 * s, va_list * args) } static void -tls_transport_endpoint_get (u32 ctx_handle, u32 thread_index, - transport_endpoint_t * tep, u8 is_lcl) +tls_transport_endpoint_get (u32 ctx_handle, clib_thread_index_t thread_index, + transport_endpoint_t *tep, u8 is_lcl) { tls_ctx_t *ctx = tls_ctx_get_w_thread (ctx_handle, thread_index); session_t *ts; @@ -1179,7 +1179,7 @@ dtls_half_open_get (u32 ho_index) } static void -dtls_cleanup_callback (u32 ctx_index, u32 thread_index) +dtls_cleanup_callback (u32 ctx_index, clib_thread_index_t thread_index) { /* No op */ } diff --git a/src/vnet/tls/tls.h b/src/vnet/tls/tls.h index 244e2042f11..7e69432512e 100644 --- a/src/vnet/tls/tls.h +++ b/src/vnet/tls/tls.h @@ -146,10 +146,10 @@ typedef struct tls_main_ typedef struct tls_engine_vft_ { u32 (*ctx_alloc) (void); - u32 (*ctx_alloc_w_thread) (u32 thread_index); + u32 (*ctx_alloc_w_thread) (clib_thread_index_t thread_index); void (*ctx_free) (tls_ctx_t * ctx); void *(*ctx_detach) (tls_ctx_t *ctx); - u32 (*ctx_attach) (u32 thread_index, void *ctx); + u32 (*ctx_attach) (clib_thread_index_t thread_index, void *ctx); tls_ctx_t *(*ctx_get) (u32 ctx_index); tls_ctx_t *(*ctx_get_w_thread) (u32 ctx_index, u8 thread_index); int (*ctx_init_client) (tls_ctx_t * ctx); diff --git a/src/vnet/tls/tls_inlines.h b/src/vnet/tls/tls_inlines.h index 3e3f59fcf51..2f12a779102 100644 --- a/src/vnet/tls/tls_inlines.h +++ b/src/vnet/tls/tls_inlines.h @@ -23,7 +23,8 @@ tls_ctx_alloc (crypto_engine_type_t engine_type) } static inline u32 -tls_ctx_alloc_w_thread (crypto_engine_type_t engine_type, u32 thread_index) +tls_ctx_alloc_w_thread (crypto_engine_type_t engine_type, + clib_thread_index_t thread_index) { u32 ctx_index; ctx_index = tls_vfts[engine_type].ctx_alloc_w_thread (thread_index); @@ -65,7 +66,8 @@ tls_ctx_init_client (tls_ctx_t *ctx) } static inline u32 -tls_ctx_attach (crypto_engine_type_t engine_type, u32 thread_index, void *ctx) +tls_ctx_attach (crypto_engine_type_t engine_type, + clib_thread_index_t thread_index, void *ctx) { u32 ctx_index; ctx_index = tls_vfts[engine_type].ctx_attach (thread_index, ctx); diff --git a/src/vnet/udp/udp.c b/src/vnet/udp/udp.c index 1fc055f8d50..4ed5a68fa02 100644 --- a/src/vnet/udp/udp.c +++ b/src/vnet/udp/udp.c @@ -71,7 +71,7 @@ udp_connection_unregister_port (u16 lcl_port, u8 is_ip4) } udp_connection_t * -udp_connection_alloc (u32 thread_index) +udp_connection_alloc (clib_thread_index_t thread_index) { udp_worker_t *wrk = udp_worker_get (thread_index); udp_connection_t *uc; @@ -99,8 +99,8 @@ udp_connection_free (udp_connection_t * uc) static void udp_connection_cleanup (udp_connection_t * uc) { - transport_release_local_endpoint (TRANSPORT_PROTO_UDP, &uc->c_lcl_ip, - uc->c_lcl_port); + transport_release_local_endpoint (TRANSPORT_PROTO_UDP, uc->c_fib_index, + &uc->c_lcl_ip, uc->c_lcl_port); udp_connection_unregister_port (uc->c_lcl_port, uc->c_is_ip4); udp_connection_free (uc); } @@ -115,7 +115,7 @@ udp_connection_delete (udp_connection_t * uc) static void udp_handle_cleanups (void *args) { - u32 thread_index = (u32) pointer_to_uword (args); + clib_thread_index_t thread_index = (u32) pointer_to_uword (args); udp_connection_t *uc; udp_worker_t *wrk; u32 *uc_index; @@ -205,6 +205,7 @@ udp_session_bind (u32 session_index, transport_endpoint_cfg_t *lcl) clib_spinlock_init (&listener->rx_lock); if (!um->csum_offload) listener->cfg_flags |= UDP_CFG_F_NO_CSUM_OFFLOAD; + listener->start_ts = transport_time_now (listener->c_thread_index); udp_connection_register_port (listener->c_lcl_port, lcl->is_ip4); return listener->c_c_index; @@ -303,6 +304,8 @@ udp_push_one_header (vlib_main_t *vm, udp_connection_t *uc, vlib_buffer_t *b, vnet_buffer (b)->tcp.flags |= UDP_CONN_F_LISTEN; } + uc->bytes_out += vlib_buffer_length_in_chain (vm, b); + uc->dgrams_out += 1; uh->checksum = udp_compute_checksum (vm, b, udp_csum_offload (uc), uc->c_is_ip4); @@ -359,7 +362,7 @@ udp_push_header (transport_connection_t *tc, vlib_buffer_t **bs, u32 n_bufs) } static transport_connection_t * -udp_session_get (u32 connection_index, u32 thread_index) +udp_session_get (u32 connection_index, clib_thread_index_t thread_index) { udp_connection_t *uc; uc = udp_connection_get (connection_index, thread_index); @@ -369,7 +372,7 @@ udp_session_get (u32 connection_index, u32 thread_index) } static void -udp_session_close (u32 connection_index, u32 thread_index) +udp_session_close (u32 connection_index, clib_thread_index_t thread_index) { udp_connection_t *uc; @@ -384,7 +387,7 @@ udp_session_close (u32 connection_index, u32 thread_index) } static void -udp_session_cleanup (u32 connection_index, u32 thread_index) +udp_session_cleanup (u32 connection_index, clib_thread_index_t thread_index) { udp_connection_t *uc; uc = udp_connection_get (connection_index, thread_index); @@ -419,7 +422,7 @@ udp_open_connection (transport_endpoint_cfg_t * rmt) udp_main_t *um = &udp_main; ip46_address_t lcl_addr; udp_connection_t *uc; - u32 thread_index; + clib_thread_index_t thread_index; u16 lcl_port; int rv; @@ -434,8 +437,8 @@ udp_open_connection (transport_endpoint_cfg_t * rmt) /* If specific source port was requested abort */ if (rmt->peer.port) { - transport_release_local_endpoint (TRANSPORT_PROTO_UDP, &lcl_addr, - lcl_port); + transport_release_local_endpoint ( + TRANSPORT_PROTO_UDP, rmt->fib_index, &lcl_addr, lcl_port); return SESSION_E_PORTINUSE; } @@ -443,8 +446,8 @@ udp_open_connection (transport_endpoint_cfg_t * rmt) while (udp_connection_port_used_extern (clib_net_to_host_u16 (lcl_port), rmt->is_ip4)) { - transport_release_local_endpoint (TRANSPORT_PROTO_UDP, &lcl_addr, - lcl_port); + transport_release_local_endpoint ( + TRANSPORT_PROTO_UDP, rmt->fib_index, &lcl_addr, lcl_port); lcl_port = transport_alloc_local_port (TRANSPORT_PROTO_UDP, &lcl_addr, rmt); if ((int) lcl_port < 1) @@ -472,6 +475,7 @@ udp_open_connection (transport_endpoint_cfg_t * rmt) uc->cfg_flags |= UDP_CFG_F_NO_CSUM_OFFLOAD; uc->next_node_index = rmt->next_node_index; uc->next_node_opaque = rmt->next_node_opaque; + uc->start_ts = transport_time_now (thread_index); udp_connection_register_port (uc->c_lcl_port, rmt->is_ip4); @@ -482,7 +486,7 @@ static transport_connection_t * udp_session_get_half_open (u32 conn_index) { udp_connection_t *uc; - u32 thread_index; + clib_thread_index_t thread_index; /* We don't poll main thread if we have workers */ thread_index = transport_cl_thread (); @@ -496,7 +500,7 @@ static u8 * format_udp_session (u8 * s, va_list * args) { u32 uci = va_arg (*args, u32); - u32 thread_index = va_arg (*args, u32); + clib_thread_index_t thread_index = va_arg (*args, u32); u32 verbose = va_arg (*args, u32); udp_connection_t *uc; diff --git a/src/vnet/udp/udp.h b/src/vnet/udp/udp.h index c6f867500e0..6ff22009a4c 100644 --- a/src/vnet/udp/udp.h +++ b/src/vnet/udp/udp.h @@ -87,6 +87,12 @@ typedef struct u32 sw_if_index; /**< connection sw_if_index */ u32 next_node_index; /**< Can be used to control next node in output */ u32 next_node_opaque; /**< Opaque to pass to next node */ + u64 bytes_in; /**< bytes received */ + u64 dgrams_in; /**< rfc4113 dgrams received */ + u64 bytes_out; /**< bytes sent */ + u64 dgrams_out; /**< rfc4113 dgrams sent */ + u32 errors_in; /**< rfc4113 dgrams in errors */ + clib_time_type_t start_ts; /**< time stamp when connection was created */ } udp_connection_t; #define udp_csum_offload(uc) (!((uc)->cfg_flags & UDP_CFG_F_NO_CSUM_OFFLOAD)) @@ -171,13 +177,13 @@ void udp_add_dst_port (udp_main_t * um, udp_dst_port_t dst_port, char *dst_port_name, u8 is_ip4); always_inline udp_worker_t * -udp_worker_get (u32 thread_index) +udp_worker_get (clib_thread_index_t thread_index) { return vec_elt_at_index (udp_main.wrk, thread_index); } always_inline udp_connection_t * -udp_connection_get (u32 conn_index, u32 thread_index) +udp_connection_get (u32 conn_index, clib_thread_index_t thread_index) { udp_worker_t *wrk = udp_worker_get (thread_index); @@ -205,11 +211,12 @@ udp_connection_from_transport (transport_connection_t * tc) } void udp_connection_free (udp_connection_t * uc); -udp_connection_t *udp_connection_alloc (u32 thread_index); +udp_connection_t *udp_connection_alloc (clib_thread_index_t thread_index); void udp_connection_share_port (u16 lcl_port, u8 is_ip4); always_inline udp_connection_t * -udp_connection_clone_safe (u32 connection_index, u32 thread_index) +udp_connection_clone_safe (u32 connection_index, + clib_thread_index_t thread_index) { u32 current_thread_index = vlib_get_thread_index (), new_index; udp_connection_t *old_c, *new_c; diff --git a/src/vnet/udp/udp_cli.c b/src/vnet/udp/udp_cli.c index 6c8992cd0de..c910b508933 100644 --- a/src/vnet/udp/udp_cli.c +++ b/src/vnet/udp/udp_cli.c @@ -18,6 +18,7 @@ #include <vppinfra/format_table.h> #include <vnet/udp/udp.h> #include <vnet/session/session_types.h> +#include <vnet/session/session.h> u8 * format_udp_connection_id (u8 * s, va_list * args) @@ -91,17 +92,36 @@ format_udp_connection_flags (u8 * s, va_list * args) } static u8 * +format_udp_stats (u8 *s, va_list *args) +{ + udp_connection_t *uc = va_arg (*args, udp_connection_t *); + u32 indent = format_get_indent (s); + s = format (s, "in dgrams %lu bytes %lu err %lu\n", uc->dgrams_in, + uc->bytes_in, uc->errors_in); + s = format (s, "%Uout dgrams %lu bytes %lu", format_white_space, indent, + uc->dgrams_out, uc->bytes_out); + return s; +} + +static u8 * format_udp_vars (u8 * s, va_list * args) { udp_connection_t *uc = va_arg (*args, udp_connection_t *); - s = format (s, " index %u%U flags: %U\n", uc->c_c_index, + s = format (s, " index %u cfg: %U flags: %U\n", uc->c_c_index, format_udp_cfg_flags, uc, format_udp_connection_flags, uc); - s = format (s, " fib_index: %u next_node: %u opaque: %u ", uc->c_fib_index); - if (!(uc->flags & UDP_CONN_F_LISTEN)) - s = format (s, " sw_if_index: %d mss: %u\n", uc->sw_if_index, uc->mss); - else - s = format (s, "\n"); + s = format (s, " fib_index %u next_node %u opaque %u", uc->c_fib_index, + uc->next_node_index, uc->next_node_opaque); + + if (uc->flags & UDP_CONN_F_LISTEN) + { + s = format (s, "\n"); + return s; + } + + s = format (s, " sw_if_index %d mss %u duration %.3f\n", uc->sw_if_index, + uc->mss, transport_time_now (uc->c_thread_index) - uc->start_ts); + s = format (s, " stats: %U\n", format_udp_stats, uc); return s; } diff --git a/src/vnet/udp/udp_encap_node.c b/src/vnet/udp/udp_encap_node.c index a86614f5475..99658ef6d03 100644 --- a/src/vnet/udp/udp_encap_node.c +++ b/src/vnet/udp/udp_encap_node.c @@ -78,7 +78,7 @@ udp_encap_inline (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_combined_counter_main_t *cm = &udp_encap_counters; u32 *from = vlib_frame_vector_args (frame); u32 n_left_from, n_left_to_next, *to_next, next_index; - u32 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; n_left_from = frame->n_vectors; next_index = node->cached_next_index; diff --git a/src/vnet/udp/udp_input.c b/src/vnet/udp/udp_input.c index 693824f9628..e4aaa0c7218 100644 --- a/src/vnet/udp/udp_input.c +++ b/src/vnet/udp/udp_input.c @@ -36,7 +36,7 @@ typedef struct { u32 connection; u32 disposition; - u32 thread_index; + clib_thread_index_t thread_index; } udp_input_trace_t; /* packet trace format function */ @@ -101,8 +101,8 @@ udp_trace_buffer (vlib_main_t * vm, vlib_node_runtime_t * node, } static udp_connection_t * -udp_connection_accept (udp_connection_t * listener, session_dgram_hdr_t * hdr, - u32 thread_index) +udp_connection_accept (udp_connection_t *listener, session_dgram_hdr_t *hdr, + clib_thread_index_t thread_index) { udp_connection_t *uc; @@ -129,9 +129,10 @@ udp_connection_accept (udp_connection_t * listener, session_dgram_hdr_t * hdr, } static void -udp_connection_enqueue (udp_connection_t * uc0, session_t * s0, - session_dgram_hdr_t * hdr0, u32 thread_index, - vlib_buffer_t * b, u8 queue_event, u32 * error0) +udp_connection_enqueue (udp_connection_t *uc0, session_t *s0, + session_dgram_hdr_t *hdr0, + clib_thread_index_t thread_index, vlib_buffer_t *b, + u8 queue_event, u32 *error0) { int wrote0; @@ -146,7 +147,15 @@ udp_connection_enqueue (udp_connection_t * uc0, session_t * s0, /* Expect cl udp enqueue to fail because fifo enqueue */ if (PREDICT_FALSE (wrote0 == 0)) - *error0 = UDP_ERROR_FIFO_FULL; + { + *error0 = UDP_ERROR_FIFO_FULL; + uc0->errors_in += 1; + } + else + { + uc0->bytes_in += wrote0; + uc0->dgrams_in += 1; + } return; } @@ -155,6 +164,7 @@ udp_connection_enqueue (udp_connection_t * uc0, session_t * s0, < hdr0->data_length + sizeof (session_dgram_hdr_t)) { *error0 = UDP_ERROR_FIFO_FULL; + uc0->errors_in += 1; return; } @@ -175,7 +185,15 @@ udp_connection_enqueue (udp_connection_t * uc0, session_t * s0, /* In some rare cases, session_enqueue_dgram_connection can fail because a * chunk cannot be allocated in the RX FIFO */ if (PREDICT_FALSE (wrote0 == 0)) - *error0 = UDP_ERROR_FIFO_NOMEM; + { + *error0 = UDP_ERROR_FIFO_NOMEM; + uc0->errors_in += 1; + } + else + { + uc0->bytes_in += wrote0; + uc0->dgrams_in += 1; + } } always_inline session_t * @@ -241,7 +259,8 @@ always_inline uword udp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame, u8 is_ip4) { - u32 thread_index = vm->thread_index, n_left_from, *from, *first_buffer; + clib_thread_index_t thread_index = vm->thread_index; + u32 n_left_from, *from, *first_buffer; vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b; u16 err_counters[UDP_N_ERROR] = { 0 }; diff --git a/src/vnet/udp/udp_output.c b/src/vnet/udp/udp_output.c index 22b94141365..87bb150e403 100644 --- a/src/vnet/udp/udp_output.c +++ b/src/vnet/udp/udp_output.c @@ -52,7 +52,7 @@ format_udp_tx_trace (u8 *s, va_list *args) } always_inline udp_connection_t * -udp_output_get_connection (vlib_buffer_t *b, u32 thread_index) +udp_output_get_connection (vlib_buffer_t *b, clib_thread_index_t thread_index) { if (PREDICT_FALSE (vnet_buffer (b)->tcp.flags & UDP_CONN_F_LISTEN)) return udp_listener_get (vnet_buffer (b)->tcp.connection_index); diff --git a/src/vnet/unix/tuntap.c b/src/vnet/unix/tuntap.c index f1102dc321e..9013f4bf878 100644 --- a/src/vnet/unix/tuntap.c +++ b/src/vnet/unix/tuntap.c @@ -42,7 +42,7 @@ #include <linux/if_tun.h> #include <vlib/vlib.h> -#include <vlib/unix/unix.h> +#include <vlib/file.h> #include <vnet/ip/ip.h> #include <vnet/fib/fib_table.h> @@ -153,7 +153,7 @@ tuntap_tx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) vnet_interface_main_t *im = &vnm->interface_main; u32 n_bytes = 0; int i; - u16 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; for (i = 0; i < n_packets; i++) { @@ -242,7 +242,7 @@ tuntap_rx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) vlib_buffer_t *b; u32 bi; const uword buffer_size = vlib_buffer_get_default_data_size (vm); - u16 thread_index = vm->thread_index; + clib_thread_index_t thread_index = vm->thread_index; /** Make sure we have some RX buffers. */ { diff --git a/src/vnet/util/refcount.c b/src/vnet/util/refcount.c index a7b525d67be..dcb29841262 100644 --- a/src/vnet/util/refcount.c +++ b/src/vnet/util/refcount.c @@ -32,7 +32,7 @@ u64 vlib_refcount_get(vlib_refcount_t *r, u32 index) { u64 count = 0; vlib_thread_main_t *tm = vlib_get_thread_main (); - u32 thread_index; + clib_thread_index_t thread_index; for (thread_index = 0; thread_index < tm->n_vlib_mains; thread_index++) { vlib_refcount_lock(r->per_cpu[thread_index].counter_lock); if (index < vec_len(r->per_cpu[thread_index].counters)) diff --git a/src/vnet/util/refcount.h b/src/vnet/util/refcount.h index 4c7d7bdbdd5..63bc80d72be 100644 --- a/src/vnet/util/refcount.h +++ b/src/vnet/util/refcount.h @@ -64,8 +64,9 @@ void vlib_refcount_unlock (clib_spinlock_t counter_lock) void __vlib_refcount_resize(vlib_refcount_per_cpu_t *per_cpu, u32 size); -static_always_inline -void vlib_refcount_add(vlib_refcount_t *r, u32 thread_index, u32 counter_index, i32 v) +static_always_inline void +vlib_refcount_add (vlib_refcount_t *r, clib_thread_index_t thread_index, + u32 counter_index, i32 v) { vlib_refcount_per_cpu_t *per_cpu = &r->per_cpu[thread_index]; if (PREDICT_FALSE(counter_index >= vec_len(per_cpu->counters))) @@ -80,7 +81,7 @@ static_always_inline void vlib_refcount_init(vlib_refcount_t *r) { vlib_thread_main_t *tm = vlib_get_thread_main (); - u32 thread_index; + clib_thread_index_t thread_index; r->per_cpu = 0; vec_validate (r->per_cpu, tm->n_vlib_mains - 1); diff --git a/src/vnet/util/throttle.h b/src/vnet/util/throttle.h index 53435c4a359..4fd1619935e 100644 --- a/src/vnet/util/throttle.h +++ b/src/vnet/util/throttle.h @@ -40,7 +40,7 @@ extern void throttle_init (throttle_t *t, u32 n_threads, u32 buckets, f64 time); always_inline u64 -throttle_seed (throttle_t * t, u32 thread_index, f64 time_now) +throttle_seed (throttle_t *t, clib_thread_index_t thread_index, f64 time_now) { if (time_now - t->last_seed_change_time[thread_index] > t->time) { @@ -53,7 +53,8 @@ throttle_seed (throttle_t * t, u32 thread_index, f64 time_now) } always_inline int -throttle_check (throttle_t * t, u32 thread_index, u64 hash, u64 seed) +throttle_check (throttle_t *t, clib_thread_index_t thread_index, u64 hash, + u64 seed) { ASSERT (is_pow2 (t->buckets)); diff --git a/src/vpp-api/python/vpp_papi/vpp_papi_async.py b/src/vpp-api/python/vpp_papi/vpp_papi_async.py index d9a4fabb69e..44e2a78eeea 100644 --- a/src/vpp-api/python/vpp_papi/vpp_papi_async.py +++ b/src/vpp-api/python/vpp_papi/vpp_papi_async.py @@ -451,7 +451,8 @@ class VPPApiClient: for m in r.message_table: n = m.name self.message_table[n] = m.index - self.vpp_dictionary_maxid = len(self.message_table) + # Find the maximum index of the message table + self.vpp_dictionary_maxid = max(self.message_table.values() or [0]) # self.worker_task = asyncio.create_task(self.message_handler(event_queue)) requests = {} diff --git a/src/vpp-api/python/vpp_papi/vpp_transport_socket.py b/src/vpp-api/python/vpp_papi/vpp_transport_socket.py index 174ab74d0b8..1ba365ad6e1 100644 --- a/src/vpp-api/python/vpp_papi/vpp_transport_socket.py +++ b/src/vpp-api/python/vpp_papi/vpp_transport_socket.py @@ -177,7 +177,8 @@ class VppTransport: return 0 def msg_table_max_index(self): - return len(self.message_table) + """Return the maximum index of the message table.""" + return max(self.message_table.values() or [0]) def write(self, buf): """Send a binary-packed message to VPP.""" diff --git a/src/vpp-api/vapi/vapi.c b/src/vpp-api/vapi/vapi.c index 9e5101bd9f9..e9fd346fbbb 100644 --- a/src/vpp-api/vapi/vapi.c +++ b/src/vpp-api/vapi/vapi.c @@ -507,6 +507,10 @@ vapi_sock_recv_internal (vapi_ctx_t ctx, u8 **vec_msg, u32 timeout) vec_validate (sock->rx_buffer, sizeof (*mbp) - 1); n = recv (sock->fd, sock->rx_buffer + current_rx_index, sizeof (*mbp) - current_rx_index, MSG_DONTWAIT); + + if (n == 0) + return VAPI_ECONNRESET; + if (n < 0) { if (errno == EAGAIN && clib_time_now (&ctx->time) >= deadline) @@ -776,15 +780,19 @@ vapi_sock_client_connect (vapi_ctx_t ctx, char *path, const char *name) { qstatus = vapi_sock_recv_internal (ctx, &msg, 0); - if (qstatus == 0) + if (qstatus == VAPI_OK) goto read_one_msg; + + if (qstatus != VAPI_EAGAIN) + return VAPI_ECON_FAIL; + ts.tv_sec = 0; ts.tv_nsec = 10000 * 1000; /* 10 ms */ while (nanosleep (&ts, &tsrem) < 0) ts = tsrem; } /* Timeout... */ - return -1; + return VAPI_ECON_FAIL; read_one_msg: if (vec_len (msg) == 0) @@ -1338,9 +1346,14 @@ vapi_sock_disconnect (vapi_ctx_t ctx) rv = VAPI_ENORESP; goto fail; } - if (vapi_sock_recv_internal (ctx, &msg, 0) < 0) + + rv = vapi_sock_recv_internal (ctx, &msg, 0); + if (rv == VAPI_EAGAIN) continue; + if (rv != VAPI_OK) + goto fail; + if (vec_len (msg) == 0) continue; diff --git a/src/vpp/CMakeLists.txt b/src/vpp/CMakeLists.txt index 84144e4d059..88766021ba9 100644 --- a/src/vpp/CMakeLists.txt +++ b/src/vpp/CMakeLists.txt @@ -20,7 +20,7 @@ add_custom_command( COMMAND mkdir ARGS -p ${CMAKE_CURRENT_BINARY_DIR}/app COMMAND scripts/generate_version_h - ARGS ${CMAKE_CURRENT_BINARY_DIR}/app/version.h + ARGS ${CMAKE_CURRENT_BINARY_DIR}/app/version.h ${VPP_PLATFORM} COMMENT "Generating VPP version.h" ) diff --git a/src/vpp/conf/80-vpp.conf b/src/vpp/conf/80-vpp.conf index 2207e2e3824..33230236eb4 100644 --- a/src/vpp/conf/80-vpp.conf +++ b/src/vpp/conf/80-vpp.conf @@ -1,8 +1,13 @@ # Number of 2MB hugepages desired vm.nr_hugepages=1024 -# Must be greater than or equal to (2 * vm.nr_hugepages). -vm.max_map_count=3096 +# The vm max_map_count must be greater than or equal to (2 * vm.nr_hugepages). + +# The system default is often an order of magnitude greater than the +# value below. If you uncomment this stanza and reboot as-is, watch +# out for seemingly "random" severe application failures; known to +# occur in Brave, Firefox, and VirtualBox to name but a few. +# vm.max_map_count=3096 # All groups allowed to access hugepages vm.hugetlb_shm_group=0 @@ -12,4 +17,6 @@ vm.hugetlb_shm_group=0 # If the existing kernel.shmmax setting (cat /proc/sys/kernel/shmmax) # is greater than the calculated TotalHugepageSize then set this parameter # to current shmmax value. -kernel.shmmax=2147483648 +# Linux default is 4278190079, you don't need to change it unless you +# configure more than 2039 2MB hugepages +# kernel.shmmax=2147483648 diff --git a/src/vpp/conf/startup.conf b/src/vpp/conf/startup.conf index a30a15ab2b1..8e7aebd8271 100644 --- a/src/vpp/conf/startup.conf +++ b/src/vpp/conf/startup.conf @@ -231,6 +231,18 @@ cpu { # update-interval <f64-seconds>, sets the segment scrape / update interval # } +## L3 FIB +# l3fib { + ## load balance pool size preallocation (expected number of objects) + # load-balance-pool-size 1M + + ## fib entry pool size preallocation (expected number of objects) + # fib-entry-pool-size 1M + + ## ip4 mtrie pool size preallocation (expected number of mtries) + # ip4-mtrie-pool-size 1K +# } + ## L2 FIB # l2fib { ## l2fib hash table size. diff --git a/src/vpp/vnet/main.c b/src/vpp/vnet/main.c index dd4f4cc3353..2808265ffb6 100644 --- a/src/vpp/vnet/main.c +++ b/src/vpp/vnet/main.c @@ -123,6 +123,7 @@ main (int argc, char *argv[]) unformat_input_t input, sub_input; u8 *s = 0, *v = 0; int main_core = ~0; + int cpu_translate = 0; cpu_set_t cpuset; void *main_heap; @@ -282,6 +283,8 @@ main (int argc, char *argv[]) unix_main.flags |= UNIX_FLAG_INTERACTIVE; else if (!strncmp (argv[i], "nosyslog", 8)) unix_main.flags |= UNIX_FLAG_NOSYSLOG; + else if (!strncmp (argv[i], "relative", 8)) + cpu_translate = 1; } defaulted: @@ -329,6 +332,17 @@ defaulted: unformat_free (&input); + int translate_main_core = os_translate_cpu_to_affinity_bitmap (main_core); + + if (cpu_translate && main_core != ~0) + { + if (translate_main_core == -1) + clib_error ("cpu %u is not available to be used" + " for the main thread in relative mode", + main_core); + main_core = translate_main_core; + } + /* if main thread affinity is unspecified, set to current running cpu */ if (main_core == ~0) main_core = sched_getcpu (); diff --git a/src/vppinfra/CMakeLists.txt b/src/vppinfra/CMakeLists.txt index 83a8b2a7e57..08a5fa213ab 100644 --- a/src/vppinfra/CMakeLists.txt +++ b/src/vppinfra/CMakeLists.txt @@ -194,7 +194,6 @@ set(VPPINFRA_HEADERS random_isaac.h rbtree.h serialize.h - smp.h socket.h sparse_vec.h stack.h diff --git a/src/vppinfra/bihash_template.c b/src/vppinfra/bihash_template.c index d488b1a659c..975607d904e 100644 --- a/src/vppinfra/bihash_template.c +++ b/src/vppinfra/bihash_template.c @@ -544,7 +544,7 @@ BV (make_working_copy) (BVT (clib_bihash) * h, BVT (clib_bihash_bucket) * b) BVT (clib_bihash_value) * v; BVT (clib_bihash_bucket) working_bucket __attribute__ ((aligned (8))); BVT (clib_bihash_value) * working_copy; - u32 thread_index = os_get_thread_index (); + clib_thread_index_t thread_index = os_get_thread_index (); int log2_working_copy_length; ASSERT (h->alloc_lock[0]); @@ -696,7 +696,7 @@ static_always_inline int BV (clib_bihash_add_del_inline_with_hash) ( int i, limit; u64 new_hash; u32 new_log2_pages, old_log2_pages; - u32 thread_index = os_get_thread_index (); + clib_thread_index_t thread_index = os_get_thread_index (); int mark_bucket_linear; int resplit_once; diff --git a/src/vppinfra/bihash_vec8_8.h b/src/vppinfra/bihash_vec8_8.h index 822f1bcc51f..1532103e9c1 100644 --- a/src/vppinfra/bihash_vec8_8.h +++ b/src/vppinfra/bihash_vec8_8.h @@ -46,6 +46,7 @@ static inline void clib_bihash_mark_free_vec8_8 (clib_bihash_kv_vec8_8_t *v) { v->value = 0xFEEDFACE8BADF00DULL; + v->key = ~0ULL; } /** Decide if a clib_bihash_kv_vec8_8_t instance is free diff --git a/src/vppinfra/bitmap.h b/src/vppinfra/bitmap.h index 4ab7bcf7a7c..e4badb7f27d 100644 --- a/src/vppinfra/bitmap.h +++ b/src/vppinfra/bitmap.h @@ -381,11 +381,12 @@ clib_bitmap_set_region (uword * bitmap, uword i, uword value, uword n_bits) @param ai - the bitmap @param body - the expression to evaluate for each set bit */ -#define clib_bitmap_foreach(i,ai) \ - if (ai) \ - for (i = clib_bitmap_first_set (ai); \ - i != ~0; \ - i = clib_bitmap_next_set (ai, i + 1)) +#define clib_bitmap_foreach(i, ai) \ + if (ai) \ + for (uword __index = clib_bitmap_first_set (ai), \ + __clib_unused __dummy = (i) = __index; \ + __index != ~0; \ + __index = clib_bitmap_next_set (ai, __index + 1), (i) = __index) /** Return the lowest numbered set bit in a bitmap @param ai - pointer to the bitmap diff --git a/src/vppinfra/bitops.h b/src/vppinfra/bitops.h index c1122f59ff6..bf73bd95a84 100644 --- a/src/vppinfra/bitops.h +++ b/src/vppinfra/bitops.h @@ -195,6 +195,13 @@ next_with_same_number_of_set_bits (uword x) return ripple | ones; } +static_always_inline void +uword_bitmap_clear (uword *bmp, uword n_uwords) +{ + while (n_uwords--) + bmp++[0] = 0; +} + #define foreach_set_bit_index(i, v) \ for (uword _tmp = (v) + 0 * (uword) (i = get_lowest_set_bit_index (v)); \ _tmp; \ @@ -273,6 +280,34 @@ uword_bitmap_find_first_set (uword *bmp) return (b - bmp) * uword_bits + get_lowest_set_bit_index (b[0]); } +always_inline uword +uword_bitmap_get_multiple (uword *bmp, uword i, uword n_bits) +{ + uword rv; + + bmp += i / uword_bits; + i %= uword_bits; + + rv = (bmp[0] >> i); + rv &= pow2_mask (n_bits); + + if (i + n_bits <= uword_bits) + return rv; + + n_bits -= uword_bits - i; + rv |= (bmp[1] & pow2_mask (n_bits)) << (uword_bits - i); + + return rv; +} + +always_inline uword +uword_bitmap_get_multiple_no_check (uword *bmp, uword i, uword n_bits) +{ + bmp += i / uword_bits; + i %= uword_bits; + return ((bmp[0] >> i) & pow2_mask (n_bits)); +} + static_always_inline u32 bit_extract_u32 (u32 v, u32 mask) { diff --git a/src/vppinfra/clib.h b/src/vppinfra/clib.h index 5348738ec6a..cb90da5c1e0 100644 --- a/src/vppinfra/clib.h +++ b/src/vppinfra/clib.h @@ -39,6 +39,7 @@ #define included_clib_h #include <stddef.h> +#include <stdalign.h> #if __has_include(<vppinfra/config.h>) #include <vppinfra/config.h> diff --git a/src/vppinfra/clib_error.h b/src/vppinfra/clib_error.h index 45f18eb1fe4..5db1a5e3440 100644 --- a/src/vppinfra/clib_error.h +++ b/src/vppinfra/clib_error.h @@ -23,7 +23,7 @@ typedef struct /* Error message. */ u8 *what; - /* Where error occurred (e.g. __FUNCTION__ __LINE__) */ + /* Where error occurred (e.g. __func__ __LINE__) */ const u8 *where; uword flags; diff --git a/src/vppinfra/devicetree.c b/src/vppinfra/devicetree.c index df5a24f198e..309308c926e 100644 --- a/src/vppinfra/devicetree.c +++ b/src/vppinfra/devicetree.c @@ -4,6 +4,7 @@ #include <vppinfra/clib.h> #include <vppinfra/devicetree.h> +#include <vppinfra/hash.h> #ifdef __linux #include <sys/types.h> diff --git a/src/vppinfra/devicetree.h b/src/vppinfra/devicetree.h index db7d8411a11..be94c1487ee 100644 --- a/src/vppinfra/devicetree.h +++ b/src/vppinfra/devicetree.h @@ -6,7 +6,8 @@ #define CLIB_DEVICETREE_H_ #include <vppinfra/clib.h> -#include <vlib/vlib.h> +#include <vppinfra/error.h> +#include <vppinfra/format.h> #ifdef __linux #define CLIB_DT_LINUX_PREFIX "/sys/firmware/devicetree/base" diff --git a/src/vppinfra/elog.h b/src/vppinfra/elog.h index d0825bdd5b2..6a66319148d 100644 --- a/src/vppinfra/elog.h +++ b/src/vppinfra/elog.h @@ -444,21 +444,21 @@ elog_data_inline (elog_main_t * em, elog_event_type_t * type, #define ELOG_TYPE_INIT_FORMAT_AND_FUNCTION(fmt,func) \ { .format = fmt, .function = func, } -#define ELOG_TYPE_INIT(fmt) \ - ELOG_TYPE_INIT_FORMAT_AND_FUNCTION(fmt,(char *) __FUNCTION__) +#define ELOG_TYPE_INIT(fmt) \ + ELOG_TYPE_INIT_FORMAT_AND_FUNCTION (fmt, (char *) __func__) #define ELOG_TYPE_DECLARE_HELPER(f,fmt,func) \ static elog_event_type_t __ELOG_TYPE_VAR(f) = \ ELOG_TYPE_INIT_FORMAT_AND_FUNCTION (fmt, func) -#define ELOG_TYPE_DECLARE_FORMAT_AND_FUNCTION(f,fmt) \ - ELOG_TYPE_DECLARE_HELPER (f, fmt, (char *) __FUNCTION__) +#define ELOG_TYPE_DECLARE_FORMAT_AND_FUNCTION(f, fmt) \ + ELOG_TYPE_DECLARE_HELPER (f, fmt, (char *) __func__) #define ELOG_TYPE_DECLARE_FORMAT(f,fmt) \ ELOG_TYPE_DECLARE_HELPER (f, fmt, 0) -/* Shorthands with and without __FUNCTION__. - D for decimal; X for hex. F for __FUNCTION__. */ +/* Shorthands with and without __func__. + D for decimal; X for hex. F for __func__. */ #define ELOG_TYPE(f,fmt) ELOG_TYPE_DECLARE_FORMAT_AND_FUNCTION(f,fmt) #define ELOG_TYPE_D(f) ELOG_TYPE_DECLARE_FORMAT (f, #f " %d") #define ELOG_TYPE_X(f) ELOG_TYPE_DECLARE_FORMAT (f, #f " 0x%x") diff --git a/src/vppinfra/error_bootstrap.h b/src/vppinfra/error_bootstrap.h index ae23d1bcca8..d3eed1b83ae 100644 --- a/src/vppinfra/error_bootstrap.h +++ b/src/vppinfra/error_bootstrap.h @@ -53,7 +53,7 @@ enum }; /* Current function name. Need (char *) cast to silence gcc4 pointer signedness warning. */ -#define clib_error_function ((char *) __FUNCTION__) +#define clib_error_function ((char *) __func__) #ifndef CLIB_ASSERT_ENABLE #define CLIB_ASSERT_ENABLE (CLIB_DEBUG > 0) diff --git a/src/vppinfra/file.h b/src/vppinfra/file.h index 71956137665..99a1e97e1aa 100644 --- a/src/vppinfra/file.h +++ b/src/vppinfra/file.h @@ -42,6 +42,7 @@ #include <vppinfra/socket.h> #include <vppinfra/pool.h> +#include <vppinfra/lock.h> #include <termios.h> @@ -53,13 +54,18 @@ typedef struct clib_file /* Unix file descriptor from open/socket. */ u32 file_descriptor; - u32 flags; + u16 flags; #define UNIX_FILE_DATA_AVAILABLE_TO_WRITE (1 << 0) #define UNIX_FILE_EVENT_EDGE_TRIGGERED (1 << 1) + u16 active : 1; + u16 dont_close : 1; + /* polling thread index */ u32 polling_thread_index; + u32 index; + /* Data available for function's use. */ u64 private_data; @@ -85,77 +91,116 @@ typedef enum typedef struct { /* Pool of files to poll for input/output. */ - clib_file_t *file_pool; + clib_file_t **file_pool; + clib_file_t **pending_free; + + u8 lock; void (*file_update) (clib_file_t * file, clib_file_update_type_t update_type); } clib_file_main_t; +always_inline clib_file_t * +clib_file_get (clib_file_main_t *fm, u32 file_index) +{ + if (pool_is_free_index (fm->file_pool, file_index)) + return 0; + return *pool_elt_at_index (fm->file_pool, file_index); +} + always_inline uword -clib_file_add (clib_file_main_t * um, clib_file_t * template) +clib_file_add (clib_file_main_t *fm, clib_file_t *template) { - clib_file_t *f; - pool_get (um->file_pool, f); + clib_file_t *f, **fp; + u32 index; + + f = clib_mem_alloc_aligned (sizeof (clib_file_t), CLIB_CACHE_LINE_BYTES); + + CLIB_SPINLOCK_LOCK (fm->lock); + pool_get (fm->file_pool, fp); + index = fp - fm->file_pool; + fp[0] = f; + CLIB_SPINLOCK_UNLOCK (fm->lock); + f[0] = template[0]; f->read_events = 0; f->write_events = 0; f->error_events = 0; - um->file_update (f, UNIX_FILE_UPDATE_ADD); - return f - um->file_pool; + f->index = index; + fm->file_update (f, UNIX_FILE_UPDATE_ADD); + f->active = 1; + return index; +} + +always_inline void +clib_file_del (clib_file_main_t *fm, clib_file_t *f) +{ + fm->file_update (f, UNIX_FILE_UPDATE_DELETE); + if (f->dont_close == 0) + close ((int) f->file_descriptor); + + CLIB_SPINLOCK_LOCK (fm->lock); + f->active = 0; + vec_add1 (fm->pending_free, f); + pool_put_index (fm->file_pool, f->index); + CLIB_SPINLOCK_UNLOCK (fm->lock); } always_inline void -clib_file_del (clib_file_main_t * um, clib_file_t * f) +clib_file_del_by_index (clib_file_main_t *fm, uword index) { - um->file_update (f, UNIX_FILE_UPDATE_DELETE); - close (f->file_descriptor); - f->file_descriptor = ~0; - vec_free (f->description); - pool_put (um->file_pool, f); + clib_file_t *f = clib_file_get (fm, index); + clib_file_del (fm, f); } always_inline void -clib_file_del_by_index (clib_file_main_t * um, uword index) +clib_file_free_deleted (clib_file_main_t *fm, clib_thread_index_t thread_index) { - clib_file_t *uf; - uf = pool_elt_at_index (um->file_pool, index); - clib_file_del (um, uf); + u32 n_keep = 0; + + if (vec_len (fm->pending_free) == 0) + return; + + CLIB_SPINLOCK_LOCK (fm->lock); + vec_foreach_pointer (f, fm->pending_free) + { + if (f->polling_thread_index == thread_index) + { + vec_free (f->description); + clib_mem_free (f); + } + else + fm->pending_free[n_keep++] = f; + } + vec_set_len (fm->pending_free, n_keep); + CLIB_SPINLOCK_UNLOCK (fm->lock); } always_inline void -clib_file_set_polling_thread (clib_file_main_t * um, uword index, - u32 thread_index) +clib_file_set_polling_thread (clib_file_main_t *fm, uword index, + clib_thread_index_t thread_index) { - clib_file_t *f = pool_elt_at_index (um->file_pool, index); - um->file_update (f, UNIX_FILE_UPDATE_DELETE); + clib_file_t *f = clib_file_get (fm, index); + fm->file_update (f, UNIX_FILE_UPDATE_DELETE); f->polling_thread_index = thread_index; - um->file_update (f, UNIX_FILE_UPDATE_ADD); + fm->file_update (f, UNIX_FILE_UPDATE_ADD); } always_inline uword -clib_file_set_data_available_to_write (clib_file_main_t * um, - u32 clib_file_index, - uword is_available) +clib_file_set_data_available_to_write (clib_file_main_t *fm, + u32 clib_file_index, uword is_available) { - clib_file_t *uf = pool_elt_at_index (um->file_pool, clib_file_index); - uword was_available = (uf->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE); + clib_file_t *f = clib_file_get (fm, clib_file_index); + uword was_available = (f->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE); if ((was_available != 0) != (is_available != 0)) { - uf->flags ^= UNIX_FILE_DATA_AVAILABLE_TO_WRITE; - um->file_update (uf, UNIX_FILE_UPDATE_MODIFY); + f->flags ^= UNIX_FILE_DATA_AVAILABLE_TO_WRITE; + fm->file_update (f, UNIX_FILE_UPDATE_MODIFY); } return was_available != 0; } -always_inline clib_file_t * -clib_file_get (clib_file_main_t * fm, u32 file_index) -{ - if (pool_is_free_index (fm->file_pool, file_index)) - return 0; - return pool_elt_at_index (fm->file_pool, file_index); -} - always_inline clib_error_t * clib_file_write (clib_file_t * f) { @@ -166,11 +211,3 @@ clib_file_write (clib_file_t * f) u8 *clib_file_get_resolved_basename (char *fmt, ...); #endif /* included_clib_file_h */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vppinfra/lock.h b/src/vppinfra/lock.h index b7b3d00a905..0fa9aaa6e83 100644 --- a/src/vppinfra/lock.h +++ b/src/vppinfra/lock.h @@ -78,18 +78,24 @@ clib_spinlock_free (clib_spinlock_t * p) } } +#define CLIB_SPINLOCK_LOCK(x) \ + { \ + typeof (x) __free = 0; \ + while (!__atomic_compare_exchange_n (&(x), &__free, 1, 0, \ + __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) \ + { \ + while (__atomic_load_n (&(x), __ATOMIC_RELAXED)) \ + CLIB_PAUSE (); \ + __free = 0; \ + } \ + } + +#define CLIB_SPINLOCK_UNLOCK(x) __atomic_store_n (&(x), 0, __ATOMIC_RELEASE) + static_always_inline void clib_spinlock_lock (clib_spinlock_t * p) { - u32 free = 0; - while (!clib_atomic_cmp_and_swap_acq_relax_n (&(*p)->lock, &free, 1, 0)) - { - /* atomic load limits number of compare_exchange executions */ - while (clib_atomic_load_relax_n (&(*p)->lock)) - CLIB_PAUSE (); - /* on failure, compare_exchange writes (*p)->lock into free */ - free = 0; - } + CLIB_SPINLOCK_LOCK ((*p)->lock); CLIB_LOCK_DBG (p); } @@ -122,7 +128,7 @@ clib_spinlock_unlock (clib_spinlock_t * p) { CLIB_LOCK_DBG_CLEAR (p); /* Make sure all reads/writes are complete before releasing the lock */ - clib_atomic_release (&(*p)->lock); + CLIB_SPINLOCK_UNLOCK ((*p)->lock); } static_always_inline void diff --git a/src/vppinfra/mem.h b/src/vppinfra/mem.h index 6211bb51f0a..893978081d0 100644 --- a/src/vppinfra/mem.h +++ b/src/vppinfra/mem.h @@ -259,7 +259,7 @@ uword clib_mem_size (void *p); void clib_mem_free_s (void *p); /* Memory allocator which panics when it fails. - Use macro so that clib_panic macro can expand __FUNCTION__ and __LINE__. */ + Use macro so that clib_panic macro can expand __func__ and __LINE__. */ #define clib_mem_alloc_aligned_no_fail(size,align) \ ({ \ uword _clib_mem_alloc_size = (size); \ diff --git a/src/vppinfra/os.h b/src/vppinfra/os.h index cd3b4289da6..229ed375e74 100644 --- a/src/vppinfra/os.h +++ b/src/vppinfra/os.h @@ -56,29 +56,29 @@ void os_out_of_memory (void); /* Estimate, measure or divine CPU timestamp clock frequency. */ f64 os_cpu_clock_frequency (void); -extern __thread uword __os_thread_index; -extern __thread uword __os_numa_index; +extern __thread clib_thread_index_t __os_thread_index; +extern __thread clib_numa_node_index_t __os_numa_index; -static_always_inline uword +static_always_inline clib_thread_index_t os_get_thread_index (void) { return __os_thread_index; } static_always_inline void -os_set_thread_index (uword thread_index) +os_set_thread_index (clib_thread_index_t thread_index) { __os_thread_index = thread_index; } -static_always_inline uword +static_always_inline clib_numa_node_index_t os_get_numa_index (void) { return __os_numa_index; } static_always_inline void -os_set_numa_index (uword numa_index) +os_set_numa_index (clib_numa_node_index_t numa_index) { __os_numa_index = numa_index; } @@ -94,14 +94,6 @@ os_get_cpu_number (void) uword os_get_nthreads (void); -#include <vppinfra/smp.h> +#include <vppinfra/cache.h> #endif /* included_os_h */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vppinfra/pool.h b/src/vppinfra/pool.h index 07c9269c6d8..d73a9783ba8 100644 --- a/src/vppinfra/pool.h +++ b/src/vppinfra/pool.h @@ -421,18 +421,20 @@ _pool_free (void **v) #define pool_free(p) _pool_free ((void **) &(p)) static_always_inline uword -pool_get_first_index (void *pool) +_pool_get_first_index (void *pool) { pool_header_t *h = pool_header (pool); return clib_bitmap_first_clear (h->free_bitmap); } +#define pool_get_first_index(p) _pool_get_first_index ((void *) (p)) static_always_inline uword -pool_get_next_index (void *pool, uword last) +_pool_get_next_index (void *pool, uword last) { pool_header_t *h = pool_header (pool); return clib_bitmap_next_clear (h->free_bitmap, last + 1); } +#define pool_get_next_index(p, l) _pool_get_next_index ((void *) (p), l) /** Optimized iteration through pool. diff --git a/src/vppinfra/smp.h b/src/vppinfra/smp.h deleted file mode 100644 index 2b3ed548dfa..00000000000 --- a/src/vppinfra/smp.h +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - Copyright (c) 2001-2005 Eliot Dresselhaus - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be - included in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#ifndef included_clib_smp_h -#define included_clib_smp_h - -#include <vppinfra/cache.h> -#include <vppinfra/os.h> /* for os_panic */ - -#if defined (i386) || defined (__x86_64__) -#define clib_smp_pause() do { asm volatile ("pause"); } while (0) -#elif defined (__aarch64__) || defined (__arm__) -#define clib_smp_pause() do { asm volatile ("isb" ::: "memory"); } while (0) -#endif - -#ifndef clib_smp_pause -#define clib_smp_pause() do { } while (0) -#endif - -#ifdef CLIB_UNIX -#include <sched.h> - -always_inline void -os_sched_yield (void) -{ - sched_yield (); -} -#else -always_inline void -os_sched_yield (void) -{ - clib_smp_pause (); -} -#endif - - -#endif /* included_clib_smp_h */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vppinfra/socket.c b/src/vppinfra/socket.c index 2abf2b244cd..f4dad844d33 100644 --- a/src/vppinfra/socket.c +++ b/src/vppinfra/socket.c @@ -48,6 +48,7 @@ #include <netdb.h> #include <unistd.h> #include <fcntl.h> +#include <sched.h> #include <vppinfra/mem.h> #include <vppinfra/vec.h> diff --git a/src/vppinfra/string.c b/src/vppinfra/string.c index ea9480875a5..aedaf428a31 100644 --- a/src/vppinfra/string.c +++ b/src/vppinfra/string.c @@ -94,7 +94,7 @@ clib_memswap (void *_a, void *_b, uword bytes) __clib_export void clib_c11_violation (const char *s) { - _clib_error (CLIB_ERROR_WARNING, (char *) __FUNCTION__, 0, (char *) s); + _clib_error (CLIB_ERROR_WARNING, (char *) __func__, 0, (char *) s); } /** diff --git a/src/vppinfra/time.c b/src/vppinfra/time.c index f1736499a0a..7c0ea44b481 100644 --- a/src/vppinfra/time.c +++ b/src/vppinfra/time.c @@ -332,6 +332,7 @@ format_clib_time (u8 * s, va_list * args) clib_time_t *c = va_arg (*args, clib_time_t *); int verbose = va_arg (*args, int); f64 now, reftime, delta_reftime_in_seconds, error; + u32 indent = format_get_indent (s); /* Compute vpp elapsed time from the CPU clock */ reftime = unix_time_now (); @@ -346,8 +347,14 @@ format_clib_time (u8 * s, va_list * args) error = now - delta_reftime_in_seconds; - s = format (s, ", reftime %.6f, error %.6f, clocks/sec %.6f", - delta_reftime_in_seconds, error, c->clocks_per_second); + s = format (s, "\n%Ucpu time %.6f now %lu last %lu since start %lu \n", + format_white_space, indent, now, clib_cpu_time_now (), + c->last_cpu_time, c->total_cpu_time); + s = format (s, "%Ureftime %.6f now %.6f last %.6f init %.6f\n", + format_white_space, indent, delta_reftime_in_seconds, reftime, + c->last_verify_reference_time, c->init_reference_time); + s = format (s, "%Uerror %.6f, clocks/sec %.6f", format_white_space, indent, + error, c->clocks_per_second); return (s); } diff --git a/src/vppinfra/types.h b/src/vppinfra/types.h index ad85af35ac9..9ed1754fbff 100644 --- a/src/vppinfra/types.h +++ b/src/vppinfra/types.h @@ -211,6 +211,10 @@ typedef uword uwordu __attribute__ ((aligned (1), __may_alias__)); __ptr_ptr - (ARRAY_LEN (__ptr_array) - 1) < __ptr_array; \ __var = *++__ptr_ptr) +typedef u16 clib_thread_index_t; +typedef u8 clib_numa_node_index_t; +#define CLIB_INVALID_THREAD_INDEX CLIB_U16_MAX + #endif /* included_clib_types_h */ /* diff --git a/src/vppinfra/unix-misc.c b/src/vppinfra/unix-misc.c index 05ca2f901c6..2255cc7cc3a 100644 --- a/src/vppinfra/unix-misc.c +++ b/src/vppinfra/unix-misc.c @@ -64,10 +64,10 @@ #include <stdio.h> /* for sprintf */ #include <limits.h> -__clib_export __thread uword __os_thread_index = 0; -__clib_export __thread uword __os_numa_index = 0; - -__clib_export clib_bitmap_t *os_get_cpu_affinity_bitmap (int pid); +__clib_export __thread clib_thread_index_t __os_thread_index = 0; +__clib_export __thread clib_numa_node_index_t __os_numa_index = 0; +__clib_export cpu_set_t __os_affinity_cpu_set; +__clib_export clib_bitmap_t *os_get_cpu_affinity_bitmap (); clib_error_t * clib_file_n_bytes (char *file, uword * result) @@ -285,29 +285,31 @@ os_get_online_cpu_core_bitmap () } __clib_export clib_bitmap_t * -os_get_cpu_affinity_bitmap (int pid) +os_get_cpu_affinity_bitmap () { #if __linux - int index, ret; - cpu_set_t cpuset; + int cpu; uword *affinity_cpus; - clib_bitmap_alloc (affinity_cpus, sizeof (cpu_set_t)); + clib_bitmap_alloc (affinity_cpus, __CPU_SETSIZE); clib_bitmap_zero (affinity_cpus); - CPU_ZERO_S (sizeof (cpu_set_t), &cpuset); - - ret = sched_getaffinity (0, sizeof (cpu_set_t), &cpuset); - - if (ret < 0) + /* set__os_affinity_cpu_set once on first call to + * os_get_cpu_affinity_bitmap() */ + if (__CPU_COUNT_S (sizeof (cpu_set_t), &__os_affinity_cpu_set) == 0) { - clib_bitmap_free (affinity_cpus); - return 0; + int ret; + ret = sched_getaffinity (0, sizeof (cpu_set_t), &__os_affinity_cpu_set); + if (ret < 0) + { + clib_bitmap_free (affinity_cpus); + return NULL; + } } - for (index = 0; index < sizeof (cpu_set_t); index++) - if (CPU_ISSET_S (index, sizeof (cpu_set_t), &cpuset)) - clib_bitmap_set (affinity_cpus, index, 1); + for (cpu = 0; cpu < __CPU_SETSIZE; cpu++) + if (__CPU_ISSET_S (cpu, sizeof (cpu_set_t), &__os_affinity_cpu_set)) + clib_bitmap_set (affinity_cpus, cpu, 1); return affinity_cpus; #elif defined(__FreeBSD__) cpuset_t mask; @@ -332,6 +334,100 @@ os_get_cpu_affinity_bitmap (int pid) #endif } +__clib_export int +os_translate_cpu_to_affinity_bitmap (int cpu) +{ + uword *affinity_bmp = os_get_cpu_affinity_bitmap (); + int cpu_it = 0; + int cpu_translate_it = 0; + + if (!affinity_bmp) + return -1; + + if (cpu == ~0) + goto err; + + clib_bitmap_foreach (cpu_it, affinity_bmp) + { + + if (cpu == cpu_translate_it) + { + clib_bitmap_free (affinity_bmp); + return cpu_it; + } + + cpu_translate_it += 1; + } + +err: + clib_bitmap_free (affinity_bmp); + return -1; +} + +__clib_export int +os_translate_cpu_from_affinity_bitmap (int cpu_translated) +{ + uword *affinity_bmp = os_get_cpu_affinity_bitmap (); + int cpu_it = 0; + int cpu_translate_it = 0; + + if (!affinity_bmp) + return -1; + + if (cpu_translated == ~0) + goto err; + + clib_bitmap_foreach (cpu_it, affinity_bmp) + { + + if (cpu_translated == cpu_it) + { + clib_bitmap_free (affinity_bmp); + return cpu_translate_it; + } + + cpu_translate_it += 1; + } + +err: + clib_bitmap_free (affinity_bmp); + return -1; +} + +__clib_export clib_bitmap_t * +os_translate_cpu_bmp_to_affinity_bitmap (clib_bitmap_t *cpu_bmp) +{ + uword *affinity_bmp = os_get_cpu_affinity_bitmap (); + + if (!affinity_bmp) + return NULL; + + u32 cpu_count_relative = clib_bitmap_count_set_bits (affinity_bmp); + u32 cpu_max_corelist = clib_bitmap_last_set (cpu_bmp); + + if (cpu_count_relative <= cpu_max_corelist) + return NULL; + + uword *translated_cpulist; + clib_bitmap_alloc (translated_cpulist, __CPU_SETSIZE); + clib_bitmap_zero (translated_cpulist); + + uword cpu_it; + uword cpu_translate_it = 0; + + clib_bitmap_foreach (cpu_it, affinity_bmp) + { + + if (clib_bitmap_get (cpu_bmp, cpu_translate_it)) + clib_bitmap_set (translated_cpulist, cpu_it, 1); + + cpu_translate_it++; + } + + vec_free (affinity_bmp); + return translated_cpulist; +} + __clib_export clib_bitmap_t * os_get_online_cpu_node_bitmap () { diff --git a/src/vppinfra/unix.h b/src/vppinfra/unix.h index d0ddb93a46f..db3102e4fee 100644 --- a/src/vppinfra/unix.h +++ b/src/vppinfra/unix.h @@ -56,6 +56,19 @@ clib_error_t *unix_proc_file_contents (char *file, u8 ** result); /* Retrieve bitmap of online cpu cures */ clib_bitmap_t *os_get_online_cpu_core_bitmap (); +/* Retrieve bitmap of cpu affinity */ +clib_bitmap_t *os_get_cpu_affinity_bitmap (); + +/* Translate cpu index in cpu affinity bitmap */ +int os_translate_cpu_to_affinity_bitmap (int cpu); + +/* Retrieve cpu index after translation in cpu affinity bitmap */ +int os_translate_cpu_from_affinity_bitmap (int cpu_translated); + +/* Translate cpu bitmap based on cpu affinity bitmap */ +clib_bitmap_t * +os_translate_cpu_bmp_to_affinity_bitmap (clib_bitmap_t *cpu_bmp); + /* Retrieve bitmap of online cpu nodes (sockets) */ clib_bitmap_t *os_get_online_cpu_node_bitmap (); diff --git a/src/vppinfra/vec.h b/src/vppinfra/vec.h index 1a64a69a1e6..053c5b07aa2 100644 --- a/src/vppinfra/vec.h +++ b/src/vppinfra/vec.h @@ -446,8 +446,8 @@ _vec_dup (void *v, uword hdr_size, uword align, uword elt_sz) @param DST destination @param SRC source */ -#define vec_copy(DST,SRC) clib_memcpy_fast (DST, SRC, vec_len (DST) * \ - sizeof ((DST)[0])) +#define vec_copy(DST, SRC) \ + clib_memcpy_fast (DST, SRC, vec_len (DST) * _vec_elt_sz (DST)) /** \brief Clone a vector. Make a new vector with the same size as a given vector but possibly with a different type. @@ -480,7 +480,7 @@ _vec_zero_elts (void *v, uword first, uword count, uword elt_sz) { clib_memset_u8 (v + (first * elt_sz), 0, count * elt_sz); } -#define vec_zero_elts(V, F, C) _vec_zero_elts (V, F, C, sizeof ((V)[0])) +#define vec_zero_elts(V, F, C) _vec_zero_elts (V, F, C, _vec_elt_sz (V)) static_always_inline void _vec_validate (void **vp, uword index, uword header_size, uword align, @@ -518,7 +518,7 @@ _vec_validate (void **vp, uword index, uword header_size, uword align, } #define vec_validate_hap(V, I, H, A, P) \ - _vec_validate ((void **) &(V), I, H, _vec_align (V, A), 0, sizeof ((V)[0])) + _vec_validate ((void **) &(V), I, H, _vec_align (V, A), 0, _vec_elt_sz (V)) /** \brief Make sure vector is long enough for given index (no header, unspecified alignment) @@ -1228,11 +1228,13 @@ _vec_is_equal (void *v1, void *v2, uword v1_elt_sz, uword v2_elt_sz) @param vec vector to sort @param f comparison function */ -#define vec_sort_with_function(vec,f) \ -do { \ - if (vec_len (vec) > 1) \ - qsort (vec, vec_len (vec), sizeof (vec[0]), (void *) (f)); \ -} while (0) +#define vec_sort_with_function(vec, f) \ + do \ + { \ + if (vec_len (vec) > 1) \ + qsort (vec, vec_len (vec), _vec_elt_sz (vec), (void *) (f)); \ + } \ + while (0) /** \brief Make a vector containing a NULL terminated c-string. diff --git a/src/vppinfra/vec_bootstrap.h b/src/vppinfra/vec_bootstrap.h index 5d386b1eaad..a4e07511426 100644 --- a/src/vppinfra/vec_bootstrap.h +++ b/src/vppinfra/vec_bootstrap.h @@ -83,7 +83,8 @@ always_inline uword __vec_elt_sz (uword elt_sz, int is_void); (((s) + sizeof (uword) - 1) &~ (sizeof (uword) - 1)) #define _vec_is_void(P) \ __builtin_types_compatible_p (__typeof__ ((P)[0]), void) -#define _vec_elt_sz(V) __vec_elt_sz (sizeof ((V)[0]), _vec_is_void (V)) +#define _vec_elt_sz(V) \ + __vec_elt_sz (sizeof ((V)[0]), _vec_is_void (V)) /* NOLINT */ #define _vec_align(V, A) __vec_align (__alignof__((V)[0]), A) always_inline __clib_nosanitize_addr uword @@ -136,7 +137,7 @@ u32 vec_len_not_inline (void *v); /** \brief Number of data bytes in vector. */ -#define vec_bytes(v) (vec_len (v) * sizeof (v[0])) +#define vec_bytes(v) (vec_len (v) * _vec_elt_sz (v)) /** * Return size of memory allocated for the vector |