diff options
Diffstat (limited to 'src/vnet')
475 files changed, 19271 insertions, 24611 deletions
diff --git a/src/vnet/CMakeLists.txt b/src/vnet/CMakeLists.txt index ef187dc2f43..fb8d294009d 100644 --- a/src/vnet/CMakeLists.txt +++ b/src/vnet/CMakeLists.txt @@ -26,6 +26,21 @@ list(APPEND VNET_SOURCES config.c devices/devices.c devices/netlink.c + dev/api.c + dev/args.c + dev/cli.c + dev/config.c + dev/counters.c + dev/dev.c + dev/dev_api.c + dev/error.c + dev/format.c + dev/handlers.c + dev/pci.c + dev/port.c + dev/process.c + dev/queue.c + dev/runtime.c error.c flow/flow.c flow/flow_cli.c @@ -59,6 +74,7 @@ list(APPEND VNET_HEADERS config.h devices/devices.h devices/netlink.h + dev/dev.h flow/flow.h global_funcs.h interface/rx_queue_funcs.h @@ -83,6 +99,7 @@ list(APPEND VNET_HEADERS ) list(APPEND VNET_API_FILES + dev/dev.api interface.api interface_types.api ip/ip_types.api @@ -297,30 +314,6 @@ list(APPEND VNET_HEADERS ) ############################################################################## -# Layer 2 / vxlan -############################################################################## -list(APPEND VNET_SOURCES - vxlan/vxlan.c - vxlan/encap.c - vxlan/decap.c - vxlan/vxlan_api.c -) - -list(APPEND VNET_MULTIARCH_SOURCES - vxlan/encap.c -) - -list(APPEND VNET_HEADERS - vxlan/vxlan.h - vxlan/vxlan_packet.h - vxlan/vxlan_error.def -) - -list(APPEND VNET_MULTIARCH_SOURCES vxlan/decap.c) - -list(APPEND VNET_API_FILES vxlan/vxlan.api) - -############################################################################## # Layer 2 / Bonding ############################################################################## list(APPEND VNET_SOURCES @@ -670,6 +663,7 @@ list(APPEND VNET_SOURCES udp/udp_encap.c udp/udp_decap.c udp/udp_api.c + udp/udp_output.c ) list(APPEND VNET_MULTIARCH_SOURCES @@ -691,27 +685,10 @@ list(APPEND VNET_API_FILES udp/udp.api) ############################################################################## # Tunnel protocol: gre ############################################################################## -list(APPEND VNET_SOURCES - gre/gre.c - gre/node.c - gre/interface.c - gre/pg.c - gre/gre_api.c -) - -list(APPEND VNET_MULTIARCH_SOURCES - gre/node.c - gre/gre.c -) - list(APPEND VNET_HEADERS - gre/gre.h gre/packet.h - gre/error.def ) -list(APPEND VNET_API_FILES gre/gre.api) - ############################################################################## # Tunnel protocol: ipip ############################################################################## @@ -819,6 +796,8 @@ list(APPEND VNET_SOURCES srv6/sr_steering.c srv6/sr_api.c srv6/sr_pt.c + srv6/sr_pt_node.c + srv6/sr_pt_api.c ) list(APPEND VNET_HEADERS @@ -830,6 +809,7 @@ list(APPEND VNET_HEADERS list(APPEND VNET_API_FILES srv6/sr.api srv6/sr_types.api + srv6/sr_pt.api ) ############################################################################## @@ -915,23 +895,6 @@ list(APPEND VNET_HEADERS ) ############################################################################## -# lawful intercept -############################################################################## - -list(APPEND VNET_SOURCES - lawful-intercept/lawful_intercept.c - lawful-intercept/node.c -) - -list(APPEND VNET_MULTIARCH_SOURCES - lawful-intercept/node.c -) - -list(APPEND VNET_HEADERS - lawful-intercept/lawful_intercept.h -) - -############################################################################## # SPAN (port mirroring) ############################################################################## @@ -982,10 +945,6 @@ list(APPEND VNET_SOURCES devices/virtio/format.c devices/virtio/node.c devices/virtio/pci.c - devices/virtio/vhost_user.c - devices/virtio/vhost_user_input.c - devices/virtio/vhost_user_output.c - devices/virtio/vhost_user_api.c devices/virtio/virtio.c devices/virtio/virtio_api.c devices/virtio/virtio_pci_legacy.c @@ -1002,20 +961,15 @@ list(APPEND VNET_HEADERS devices/virtio/virtio_pci_legacy.h devices/virtio/virtio_pci_modern.h devices/virtio/vhost_std.h - devices/virtio/vhost_user.h devices/virtio/virtio_types_api.h ) list(APPEND VNET_MULTIARCH_SOURCES - devices/virtio/vhost_user_input.c - devices/virtio/vhost_user_output.c devices/virtio/node.c - devices/af_packet/node.c devices/virtio/device.c ) list(APPEND VNET_API_FILES - devices/virtio/vhost_user.api devices/virtio/virtio.api devices/virtio/virtio_types.api ) @@ -1024,6 +978,7 @@ list(APPEND VNET_API_FILES # tap interface (with virtio backend) ############################################################################## +if("${CMAKE_SYSTEM_NAME}" STREQUAL "Linux") list(APPEND VNET_SOURCES devices/tap/cli.c devices/tap/tap.c @@ -1037,6 +992,7 @@ list(APPEND VNET_HEADERS list(APPEND VNET_API_FILES devices/tap/tapv2.api ) +endif() ############################################################################## # tap interface (with virtio backend) @@ -1064,6 +1020,7 @@ list(APPEND VNET_SOURCES session/session_rules_table.c session/session_lookup.c session/session_node.c + session/session_input.c session/transport.c session/application.c session/application_worker.c @@ -1110,27 +1067,6 @@ list(APPEND VNET_HEADERS tls/tls_test.h ) -############################################################################## -# Linux packet interface -############################################################################## - -list(APPEND VNET_SOURCES - devices/af_packet/af_packet.c - devices/af_packet/device.c - devices/af_packet/node.c - devices/af_packet/cli.c - devices/af_packet/af_packet_api.c -) - -list(APPEND VNET_MULTIARCH_SOURCES - devices/af_packet/device.c -) - -list(APPEND VNET_HEADERS - devices/af_packet/af_packet.h -) - -list(APPEND VNET_API_FILES devices/af_packet/af_packet.api) ############################################################################## # Driver feature graph arc support @@ -1154,6 +1090,7 @@ list(APPEND VNET_API_FILES feature/feature.api) # FIXME: unix/hgshm.c +if("${CMAKE_SYSTEM_NAME}" STREQUAL "Linux") list(APPEND VNET_SOURCES unix/gdb_funcs.c unix/tuntap.c @@ -1162,6 +1099,7 @@ list(APPEND VNET_SOURCES list(APPEND VNET_HEADERS unix/tuntap.h ) +endif() ############################################################################## # FIB diff --git a/src/vnet/adj/adj.c b/src/vnet/adj/adj.c index 2cb9ec43c00..201561fe485 100644 --- a/src/vnet/adj/adj.c +++ b/src/vnet/adj/adj.c @@ -704,7 +704,6 @@ adj_show (vlib_main_t * vm, } else { - /* *INDENT-OFF* */ pool_foreach_index (ai, adj_pool) { if (~0 != sw_if_index && @@ -719,7 +718,6 @@ adj_show (vlib_main_t * vm, FORMAT_IP_ADJACENCY_NONE); } } - /* *INDENT-ON* */ } } return 0; diff --git a/src/vnet/adj/adj_bfd.c b/src/vnet/adj/adj_bfd.c index c1f02dd9073..e54ba6d74ae 100644 --- a/src/vnet/adj/adj_bfd.c +++ b/src/vnet/adj/adj_bfd.c @@ -280,9 +280,7 @@ adj_bfd_main_init (vlib_main_t * vm) return (0); } -/* *INDENT-OFF* */ VLIB_INIT_FUNCTION (adj_bfd_main_init)= { .runs_after = VLIB_INITS("bfd_main_init"), }; -/* *INDENT-ON* */ diff --git a/src/vnet/adj/adj_dp.h b/src/vnet/adj/adj_dp.h index aff1a2b1f43..186044b90ad 100644 --- a/src/vnet/adj/adj_dp.h +++ b/src/vnet/adj/adj_dp.h @@ -36,22 +36,36 @@ adj_midchain_ipip44_fixup (vlib_main_t * vm, ip4->length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b)); if (PREDICT_TRUE(TUNNEL_ENCAP_DECAP_FLAG_NONE == flags)) - { - ip_csum_t sum; - u16 old,new; - - old = 0; - new = ip4->length; - - sum = ip4->checksum; - sum = ip_csum_update (sum, old, new, ip4_header_t, length); - ip4->checksum = ip_csum_fold (sum); - } + { + if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_GSO)) + { + vnet_buffer2 (b)->outer_l3_hdr_offset = (u8 *) ip4 - b->data; + vnet_buffer_offload_flags_set (b, VNET_BUFFER_OFFLOAD_F_TNL_IPIP | + VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM); + } + else + { + ip_csum_t sum; + u16 old,new; + old = 0; + new = ip4->length; + sum = ip4->checksum; + sum = ip_csum_update (sum, old, new, ip4_header_t, length); + ip4->checksum = ip_csum_fold (sum); + } + } else - { + { tunnel_encap_fixup_4o4 (flags, ip4 + 1, ip4); - ip4->checksum = ip4_header_checksum (ip4); - } + if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_GSO)) + { + vnet_buffer2 (b)->outer_l3_hdr_offset = (u8 *) ip4 - b->data; + vnet_buffer_offload_flags_set (b, VNET_BUFFER_OFFLOAD_F_TNL_IPIP | + VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM); + } + else + ip4->checksum = ip4_header_checksum (ip4); + } } static_always_inline void diff --git a/src/vnet/adj/adj_glean.c b/src/vnet/adj/adj_glean.c index 45477649c1a..ceece0d74ed 100644 --- a/src/vnet/adj/adj_glean.c +++ b/src/vnet/adj/adj_glean.c @@ -45,7 +45,7 @@ adj_glean_db_lookup (fib_protocol_t proto, { uword *p; - if (vec_len(adj_gleans[proto]) <= sw_if_index) + if ((proto >= FIB_PROTOCOL_IP_MAX) || vec_len(adj_gleans[proto]) <= sw_if_index) return (ADJ_INDEX_INVALID); p = hash_get_mem (adj_gleans[proto][sw_if_index], nh_addr); @@ -66,6 +66,7 @@ adj_glean_db_insert (fib_protocol_t proto, vlib_worker_thread_barrier_sync(vm); + ASSERT(proto < FIB_PROTOCOL_IP_MAX); vec_validate(adj_gleans[proto], sw_if_index); if (NULL == adj_gleans[proto][sw_if_index]) @@ -186,6 +187,38 @@ adj_glean_update_rewrite_walk (adj_index_t ai, return (ADJ_WALK_RC_CONTINUE); } +static void +adj_glean_walk_proto (fib_protocol_t proto, + u32 sw_if_index, + adj_walk_cb_t cb, + void *data) +{ + adj_index_t ai, *aip, *ais = NULL; + ip46_address_t *conn; + + ASSERT(proto < FIB_PROTOCOL_IP_MAX); + if (vec_len(adj_gleans[proto]) <= sw_if_index || + NULL == adj_gleans[proto][sw_if_index]) + return; + + /* + * Walk first to collect the indices + * then walk the collection. This is safe + * to modifications of the hash table + */ + hash_foreach_mem(conn, ai, adj_gleans[proto][sw_if_index], + ({ + vec_add1(ais, ai); + })); + + vec_foreach(aip, ais) + { + if (ADJ_WALK_RC_STOP == cb(*aip, data)) + break; + } + vec_free(ais); +} + void adj_glean_walk (u32 sw_if_index, adj_walk_cb_t cb, @@ -195,29 +228,7 @@ adj_glean_walk (u32 sw_if_index, FOR_EACH_FIB_IP_PROTOCOL(proto) { - adj_index_t ai, *aip, *ais = NULL; - ip46_address_t *conn; - - if (vec_len(adj_gleans[proto]) <= sw_if_index || - NULL == adj_gleans[proto][sw_if_index]) - continue; - - /* - * Walk first to collect the indices - * then walk the collection. This is safe - * to modifications of the hash table - */ - hash_foreach_mem(conn, ai, adj_gleans[proto][sw_if_index], - ({ - vec_add1(ais, ai); - })); - - vec_foreach(aip, ais) - { - if (ADJ_WALK_RC_STOP == cb(*aip, data)) - break; - } - vec_free(ais); + adj_glean_walk_proto (proto, sw_if_index, cb, data); } } @@ -235,6 +246,7 @@ adj_glean_get (fib_protocol_t proto, ip46_address_t *conn; adj_index_t ai; + ASSERT(proto < FIB_PROTOCOL_IP_MAX); if (vec_len(adj_gleans[proto]) <= sw_if_index || NULL == adj_gleans[proto][sw_if_index]) return (ADJ_INDEX_INVALID); @@ -256,6 +268,7 @@ adj_glean_get_src (fib_protocol_t proto, const ip_adjacency_t *adj; adj_index_t ai; + ASSERT(proto < FIB_PROTOCOL_IP_MAX); if (vec_len(adj_gleans[proto]) <= sw_if_index || NULL == adj_gleans[proto][sw_if_index]) return (NULL); @@ -445,7 +458,7 @@ adj_glean_table_bind (fib_protocol_t fproto, }, }; - adj_glean_walk (sw_if_index, adj_glean_start_backwalk, &bw_ctx); + adj_glean_walk_proto (fproto, sw_if_index, adj_glean_start_backwalk, &bw_ctx); } diff --git a/src/vnet/adj/adj_mcast.c b/src/vnet/adj/adj_mcast.c index a20f61f6f6b..573105b7228 100644 --- a/src/vnet/adj/adj_mcast.c +++ b/src/vnet/adj/adj_mcast.c @@ -82,6 +82,8 @@ adj_mcast_add_or_lock (fib_protocol_t proto, */ vnet_update_adjacency_for_sw_interface(vnm, sw_if_index, adj_get_index(adj)); + + adj_delegate_adj_created(adj); } else { @@ -89,8 +91,6 @@ adj_mcast_add_or_lock (fib_protocol_t proto, adj_lock(adj_get_index(adj)); } - adj_delegate_adj_created(adj); - return (adj_get_index(adj)); } diff --git a/src/vnet/adj/adj_midchain_delegate.c b/src/vnet/adj/adj_midchain_delegate.c index de57442ac9b..16129ff86ac 100644 --- a/src/vnet/adj/adj_midchain_delegate.c +++ b/src/vnet/adj/adj_midchain_delegate.c @@ -148,12 +148,11 @@ adj_midchain_delegate_remove (adj_index_t ai) { adj_nbr_midchain_unstack(ai); - adj_delegate_remove (ai, ADJ_DELEGATE_MIDCHAIN); - amd = pool_elt_at_index(amd_pool, ad->ad_index); fib_entry_untrack(amd->amd_fei, amd->amd_sibling); - pool_put(amd_pool, amd); + + adj_delegate_remove (ai, ADJ_DELEGATE_MIDCHAIN); } } diff --git a/src/vnet/adj/adj_nsh.c b/src/vnet/adj/adj_nsh.c index 00d945729d8..1b4fa6c15b9 100644 --- a/src/vnet/adj/adj_nsh.c +++ b/src/vnet/adj/adj_nsh.c @@ -190,7 +190,6 @@ VLIB_REGISTER_NODE (adj_nsh_midchain_node) = { }; /* Built-in ip4 tx feature path definition */ -/* *INDENT-OFF* */ VNET_FEATURE_ARC_INIT (nsh_output, static) = { .arc_name = "nsh-output", @@ -204,4 +203,3 @@ VNET_FEATURE_INIT (nsh_tx_drop, static) = .node_name = "error-drop", .runs_before = 0, /* not before any other features */ }; -/* *INDENT-ON* */ diff --git a/src/vnet/adj/rewrite.h b/src/vnet/adj/rewrite.h index 5cb90e47318..06b1b00882e 100644 --- a/src/vnet/adj/rewrite.h +++ b/src/vnet/adj/rewrite.h @@ -147,8 +147,8 @@ vnet_rewrite_set_data_internal (vnet_rewrite_header_t * rw, int max_size, void *data, int data_bytes) { /* Sanity check values carefully for this clib_memset operation */ - ASSERT ((max_size > 0) && (max_size < VNET_REWRITE_TOTAL_BYTES)); - ASSERT ((data_bytes >= 0) && (data_bytes < max_size)); + ASSERT ((max_size > 0) && (max_size <= VNET_REWRITE_TOTAL_BYTES)); + ASSERT ((data_bytes >= 0) && (data_bytes <= max_size)); rw->data_bytes = data_bytes; clib_memcpy_fast (rw->data, data, data_bytes); diff --git a/src/vnet/api_errno.h b/src/vnet/api_errno.h index 4e91e132b89..52f201c081b 100644 --- a/src/vnet/api_errno.h +++ b/src/vnet/api_errno.h @@ -35,11 +35,21 @@ format_function_t format_vnet_api_errno; static_always_inline vnet_api_error_t vnet_api_error (clib_error_t *err) { + if (err == 0) + return 0; if (err->code >= 0) return VNET_API_ERROR_BUG; return err->code; } +static_always_inline vnet_api_error_t +vnet_get_api_error_and_free (clib_error_t *err) +{ + vnet_api_error_t rv = vnet_api_error (err); + clib_error_free (err); + return rv; +} + #endif /* included_vnet_api_errno_h */ /* diff --git a/src/vnet/arp/arp.c b/src/vnet/arp/arp.c index d39d48e2c77..43b2a93a7b3 100644 --- a/src/vnet/arp/arp.c +++ b/src/vnet/arp/arp.c @@ -191,7 +191,6 @@ always_inline u32 arp_learn (u32 sw_if_index, const ethernet_arp_ip4_over_ethernet_address_t * addr) { - /* *INDENT-OFF* */ ip_neighbor_learn_t l = { .ip = { .ip.ip4 = addr->ip4, @@ -200,7 +199,6 @@ arp_learn (u32 sw_if_index, .mac = addr->mac, .sw_if_index = sw_if_index, }; - /* *INDENT-ON* */ ip_neighbor_learn_dp (&l); @@ -354,7 +352,6 @@ arp_dst_fib_check (const fib_node_index_t fei, fib_entry_flag_t * flags) const fib_entry_t *entry = fib_entry_get (fei); const fib_entry_src_t *entry_src; fib_source_t src; - /* *INDENT-OFF* */ FOR_EACH_SRC_ADDED(entry, entry_src, src, ({ *flags = fib_entry_get_flags_for_source (fei, src); @@ -363,7 +360,6 @@ arp_dst_fib_check (const fib_node_index_t fei, fib_entry_flag_t * flags) else if (FIB_ENTRY_FLAG_CONNECTED & *flags) return ARP_DST_FIB_CONN; })) - /* *INDENT-ON* */ return ARP_DST_FIB_NONE; } @@ -427,6 +423,10 @@ arp_reply (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) } + dst_fei = ip4_fib_table_lookup (ip4_fib_get (fib_index0), + &arp0->ip4_over_ethernet[1].ip4, 32); + conn_sw_if_index0 = fib_entry_get_any_resolving_interface (dst_fei); + { /* * we're looking for FIB entries that indicate the source @@ -459,7 +459,6 @@ arp_reply (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) * flags we need, or the flags we must not have, * is not the best source, so check then all. */ - /* *INDENT-OFF* */ FOR_EACH_SRC_ADDED(src_fib_entry, src, source, ({ src_flags = fib_entry_get_flags_for_source (src_fei, source); @@ -497,7 +496,6 @@ arp_reply (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) * nor is it a already learned host resp. */ })); - /* *INDENT-ON* */ /* * shorter mask lookup for the next iteration. @@ -515,24 +513,20 @@ arp_reply (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) while (!attached && !fib_entry_is_sourced (src_fei, FIB_SOURCE_DEFAULT_ROUTE)); - if (!attached) + if (!attached && + !arp_unnumbered (p0, sw_if_index0, conn_sw_if_index0)) { /* - * the matching route is a not attached, i.e. it was - * added as a result of routing, rather than interface/ARP - * configuration. If the matching route is not a host route - * (i.e. a /32) + * the matching route is a not attached and not unnumbered, + * i.e. it was added as a result of routing, rather than + * interface/ARP configuration. If the matching route is not + * a host route (i.e. a /32) */ error0 = ARP_ERROR_L3_SRC_ADDRESS_NOT_LOCAL; goto drop; } } - dst_fei = ip4_fib_table_lookup (ip4_fib_get (fib_index0), - &arp0->ip4_over_ethernet[1].ip4, - 32); - conn_sw_if_index0 = fib_entry_get_any_resolving_interface (dst_fei); - switch (arp_dst_fib_check (dst_fei, &dst_flags)) { case ARP_DST_FIB_ADJ: @@ -625,9 +619,9 @@ arp_reply (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) sw_if_index0 != fib_entry_get_resolving_interface (src_fei)) { /* - * The interface the ARP is sent to or was received on is not the - * interface on which the covering prefix is configured. - * Maybe this is a case for unnumbered. + * The interface the ARP is sent to or was received on is + * not the interface on which the covering prefix is + * configured. Maybe this is a case for unnumbered. */ if (!arp_unnumbered (p0, sw_if_index0, conn_sw_if_index0)) { @@ -642,8 +636,7 @@ arp_reply (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) goto drop; } - next0 = arp_mk_reply (vnm, p0, sw_if_index0, - if_addr0, arp0, eth_rx); + next0 = arp_mk_reply (vnm, p0, sw_if_index0, if_addr0, arp0, eth_rx); /* We are going to reply to this request, so, in the absence of errors, learn the sender */ @@ -677,7 +670,6 @@ arp_reply (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (arp_input_node, static) = { @@ -764,7 +756,6 @@ VNET_FEATURE_INIT (arp_drop_feat_node, static) = .runs_before = 0, /* last feature */ }; -/* *INDENT-ON* */ typedef struct { @@ -936,13 +927,11 @@ ethernet_arp_init (vlib_main_t * vm) return 0; } -/* *INDENT-OFF* */ VLIB_INIT_FUNCTION (ethernet_arp_init) = { .runs_after = VLIB_INITS("ethernet_init", "ip_neighbor_init"), }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/arp/arp_proxy.c b/src/vnet/arp/arp_proxy.c index 184edbf8be8..39f624d5a1d 100644 --- a/src/vnet/arp/arp_proxy.c +++ b/src/vnet/arp/arp_proxy.c @@ -223,7 +223,6 @@ set_arp_proxy (vlib_main_t * vm, return (NULL); } -/* *INDENT-OFF* */ /*? * Enable proxy-arp on an interface. The vpp stack will answer ARP * requests for the indicated address range. Multiple proxy-arp @@ -249,15 +248,12 @@ VLIB_CLI_COMMAND (set_int_proxy_enable_command, static) = { "set interface proxy-arp <intfc> [enable|disable]", .function = set_int_proxy_arp_command_fn, }; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (set_arp_proxy_command, static) = { .path = "set arp proxy", .short_help = "set arp proxy [del] table-ID <table-ID> start <start-address> end <end-addres>", .function = set_arp_proxy, }; -/* *INDENT-ON* */ typedef struct { @@ -435,13 +431,11 @@ show_ip4_arp (vlib_main_t * vm, * Fib_index 0 6.0.0.1 - 6.0.0.11 * @cliexend ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_ip4_arp_command, static) = { .path = "show arp proxy", .function = show_ip4_arp, .short_help = "show ip arp", }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/bfd/bfd.api b/src/vnet/bfd/bfd.api index f573bc5309a..d3b3ed21a26 100644 --- a/src/vnet/bfd/bfd.api +++ b/src/vnet/bfd/bfd.api @@ -359,6 +359,7 @@ autoreply define bfd_udp_auth_deactivate bool is_delayed; }; +/* must be compatible with bfd_error_t */ counters bfd_udp { none { severity info; @@ -366,17 +367,23 @@ counters bfd_udp { units "packets"; description "OK"; }; - no_session { + bad { severity error; type counter64; units "packets"; - description "no-session"; + description "bad packet"; }; - bad { + disabled { severity error; type counter64; units "packets"; - description "bad packet"; + description "bfd packets received on disabled interfaces"; + }; + version { + severity error; + type counter64; + units "packets"; + description "version"; }; length { severity error; @@ -384,6 +391,42 @@ counters bfd_udp { units "packets"; description "too short"; }; + detect_multi { + severity error; + type counter64; + units "packets"; + description "detect-multi"; + }; + multi_point { + severity error; + type counter64; + units "packets"; + description "multi-point"; + }; + my_disc { + severity error; + type counter64; + units "packets"; + description "my-disc"; + }; + your_disc { + severity error; + type counter64; + units "packets"; + description "your-disc"; + }; + admin_down { + severity error; + type counter64; + units "packets"; + description "session admin-down"; + }; + no_session { + severity error; + type counter64; + units "packets"; + description "no-session"; + }; failed_verification { severity error; type counter64; diff --git a/src/vnet/bfd/bfd_api.c b/src/vnet/bfd/bfd_api.c index 4d76f71fd9f..816e71081ff 100644 --- a/src/vnet/bfd/bfd_api.c +++ b/src/vnet/bfd/bfd_api.c @@ -217,7 +217,6 @@ bfd_event (bfd_main_t * bm, bfd_session_t * bs) vpe_api_main_t *vam = &vpe_api_main; vpe_client_registration_t *reg; vl_api_registration_t *vl_reg; - /* *INDENT-OFF* */ pool_foreach (reg, vam->bfd_events_registrations) { vl_reg = vl_api_client_index_to_registration (reg->client_index); if (vl_reg) @@ -231,7 +230,6 @@ bfd_event (bfd_main_t * bm, bfd_session_t * bs) } } } - /* *INDENT-ON* */ } static void @@ -244,13 +242,11 @@ vl_api_bfd_udp_session_dump_t_handler (vl_api_bfd_udp_session_dump_t * mp) return; bfd_session_t *bs = NULL; - /* *INDENT-OFF* */ pool_foreach (bs, bfd_main.sessions) { if (bs->transport == BFD_TRANSPORT_UDP4 || bs->transport == BFD_TRANSPORT_UDP6) send_bfd_udp_session_details (reg, mp->context, bs); } - /* *INDENT-ON* */ } static void @@ -301,7 +297,6 @@ vl_api_bfd_auth_keys_dump_t_handler (vl_api_bfd_auth_keys_dump_t * mp) bfd_auth_key_t *key = NULL; vl_api_bfd_auth_keys_details_t *rmp = NULL; - /* *INDENT-OFF* */ pool_foreach (key, bfd_main.auth_keys) { rmp = vl_msg_api_alloc (sizeof (*rmp)); clib_memset (rmp, 0, sizeof (*rmp)); @@ -312,7 +307,6 @@ vl_api_bfd_auth_keys_dump_t_handler (vl_api_bfd_auth_keys_dump_t * mp) rmp->use_count = clib_host_to_net_u32 (key->use_count); vl_api_send_msg (reg, (u8 *)rmp); } - /* *INDENT-ON* */ } static void @@ -394,7 +388,6 @@ vl_api_bfd_udp_get_echo_source_t_handler (vl_api_bfd_udp_get_echo_source_t * bfd_udp_get_echo_source (&is_set, &sw_if_index, &have_usable_ip4, &ip4, &have_usable_ip6, &ip6); - /* *INDENT-OFF* */ REPLY_MACRO2 (VL_API_BFD_UDP_GET_ECHO_SOURCE_REPLY, ({ rmp->sw_if_index = ntohl (sw_if_index); @@ -428,7 +421,6 @@ vl_api_bfd_udp_get_echo_source_t_handler (vl_api_bfd_udp_get_echo_source_t * rmp->have_usable_ip6 = false; } })) - /* *INDENT-ON* */ } #include <vnet/bfd/bfd.api.c> diff --git a/src/vnet/bfd/bfd_cli.c b/src/vnet/bfd/bfd_cli.c index 1d100b077eb..33942bb89e6 100644 --- a/src/vnet/bfd/bfd_cli.c +++ b/src/vnet/bfd/bfd_cli.c @@ -134,12 +134,10 @@ show_bfd (vlib_main_t * vm, unformat_input_t * input, bfd_auth_key_t *key = NULL; u8 *s = format (NULL, "%=10s %=25s %=10s\n", "Configuration Key ID", "Type", "Use Count"); - /* *INDENT-OFF* */ pool_foreach (key, bm->auth_keys) { s = format (s, "%10u %-25s %10u\n", key->conf_key_id, bfd_auth_type_str (key->auth_type), key->use_count); } - /* *INDENT-ON* */ vlib_cli_output (vm, "%v\n", s); vec_free (s); vlib_cli_output (vm, "Number of configured BFD keys: %lu\n", @@ -149,11 +147,9 @@ show_bfd (vlib_main_t * vm, unformat_input_t * input, { u8 *s = format (NULL, "%=10s %=32s %=20s %=20s\n", "Index", "Property", "Local value", "Remote value"); - /* *INDENT-OFF* */ pool_foreach (bs, bm->sessions) { s = format (s, "%U", format_bfd_session_cli, vm, bs); } - /* *INDENT-ON* */ vlib_cli_output (vm, "%v", s); vec_free (s); vlib_cli_output (vm, "Number of configured BFD sessions: %lu\n", @@ -212,13 +208,11 @@ show_bfd (vlib_main_t * vm, unformat_input_t * input, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_bfd_command, static) = { .path = "show bfd", .short_help = "show bfd [keys|sessions|echo-source]", .function = show_bfd, }; -/* *INDENT-ON* */ static clib_error_t * bfd_cli_key_add (vlib_main_t * vm, unformat_input_t * input, @@ -310,7 +304,6 @@ out: return ret; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (bfd_cli_key_add_command, static) = { .path = "bfd key set", .short_help = "bfd key set" @@ -319,7 +312,6 @@ VLIB_CLI_COMMAND (bfd_cli_key_add_command, static) = { " secret <secret>", .function = bfd_cli_key_add, }; -/* *INDENT-ON* */ static clib_error_t * bfd_cli_key_del (vlib_main_t * vm, unformat_input_t * input, @@ -355,13 +347,11 @@ out: return ret; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (bfd_cli_key_del_command, static) = { .path = "bfd key del", .short_help = "bfd key del conf-key-id <id>", .function = bfd_cli_key_del, }; -/* *INDENT-ON* */ #define INTERFACE_STR "interface" #define LOCAL_ADDR_STR "local-addr" @@ -397,23 +387,30 @@ WARN_OFF(tautological-compare) \ goto out; \ } +static uword +bfd_cli_unformat_ip46_address (unformat_input_t *input, va_list *args) +{ + ip46_address_t *ip46 = va_arg (*args, ip46_address_t *); + return unformat_user (input, unformat_ip46_address, ip46, IP46_TYPE_ANY); +} + static clib_error_t * bfd_cli_udp_session_add (vlib_main_t * vm, unformat_input_t * input, CLIB_UNUSED (vlib_cli_command_t * lmd)) { clib_error_t *ret = NULL; unformat_input_t _line_input, *line_input = &_line_input; -#define foreach_bfd_cli_udp_session_add_cli_param(F) \ - F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \ - unformat_vnet_sw_interface, &vnet_main) \ - F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \ - unformat_ip46_address) \ - F (ip46_address_t, peer_addr, PEER_ADDR_STR, mandatory, "%U", \ - unformat_ip46_address) \ - F (u32, desired_min_tx, DESIRED_MIN_TX_STR, mandatory, "%u") \ - F (u32, required_min_rx, REQUIRED_MIN_RX_STR, mandatory, "%u") \ - F (u32, detect_mult, DETECT_MULT_STR, mandatory, "%u") \ - F (u32, conf_key_id, CONF_KEY_ID_STR, optional, "%u") \ +#define foreach_bfd_cli_udp_session_add_cli_param(F) \ + F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \ + unformat_vnet_sw_interface, &vnet_main) \ + F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \ + bfd_cli_unformat_ip46_address) \ + F (ip46_address_t, peer_addr, PEER_ADDR_STR, mandatory, "%U", \ + bfd_cli_unformat_ip46_address) \ + F (u32, desired_min_tx, DESIRED_MIN_TX_STR, mandatory, "%u") \ + F (u32, required_min_rx, REQUIRED_MIN_RX_STR, mandatory, "%u") \ + F (u32, detect_mult, DETECT_MULT_STR, mandatory, "%u") \ + F (u32, conf_key_id, CONF_KEY_ID_STR, optional, "%u") \ F (u32, bfd_key_id, BFD_KEY_ID_STR, optional, "%u") foreach_bfd_cli_udp_session_add_cli_param (DECLARE); @@ -477,7 +474,6 @@ out: return ret; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (bfd_cli_udp_session_add_command, static) = { .path = "bfd udp session add", .short_help = "bfd udp session add" @@ -493,7 +489,6 @@ VLIB_CLI_COMMAND (bfd_cli_udp_session_add_command, static) = { "]", .function = bfd_cli_udp_session_add, }; -/* *INDENT-ON* */ static clib_error_t * bfd_cli_udp_session_mod (vlib_main_t * vm, unformat_input_t * input, @@ -501,15 +496,15 @@ bfd_cli_udp_session_mod (vlib_main_t * vm, unformat_input_t * input, { clib_error_t *ret = NULL; unformat_input_t _line_input, *line_input = &_line_input; -#define foreach_bfd_cli_udp_session_mod_cli_param(F) \ - F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \ - unformat_vnet_sw_interface, &vnet_main) \ - F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \ - unformat_ip46_address) \ - F (ip46_address_t, peer_addr, PEER_ADDR_STR, mandatory, "%U", \ - unformat_ip46_address) \ - F (u32, desired_min_tx, DESIRED_MIN_TX_STR, mandatory, "%u") \ - F (u32, required_min_rx, REQUIRED_MIN_RX_STR, mandatory, "%u") \ +#define foreach_bfd_cli_udp_session_mod_cli_param(F) \ + F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \ + unformat_vnet_sw_interface, &vnet_main) \ + F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \ + bfd_cli_unformat_ip46_address) \ + F (ip46_address_t, peer_addr, PEER_ADDR_STR, mandatory, "%U", \ + bfd_cli_unformat_ip46_address) \ + F (u32, desired_min_tx, DESIRED_MIN_TX_STR, mandatory, "%u") \ + F (u32, required_min_rx, REQUIRED_MIN_RX_STR, mandatory, "%u") \ F (u32, detect_mult, DETECT_MULT_STR, mandatory, "%u") foreach_bfd_cli_udp_session_mod_cli_param (DECLARE); @@ -556,7 +551,6 @@ out: return ret; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (bfd_cli_udp_session_mod_command, static) = { .path = "bfd udp session mod", .short_help = "bfd udp session mod interface" @@ -568,7 +562,6 @@ VLIB_CLI_COMMAND (bfd_cli_udp_session_mod_command, static) = { " <detect multiplier> ", .function = bfd_cli_udp_session_mod, }; -/* *INDENT-ON* */ static clib_error_t * bfd_cli_udp_session_del (vlib_main_t * vm, unformat_input_t * input, @@ -576,13 +569,13 @@ bfd_cli_udp_session_del (vlib_main_t * vm, unformat_input_t * input, { clib_error_t *ret = NULL; unformat_input_t _line_input, *line_input = &_line_input; -#define foreach_bfd_cli_udp_session_del_cli_param(F) \ - F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \ - unformat_vnet_sw_interface, &vnet_main) \ - F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \ - unformat_ip46_address) \ - F (ip46_address_t, peer_addr, PEER_ADDR_STR, mandatory, "%U", \ - unformat_ip46_address) +#define foreach_bfd_cli_udp_session_del_cli_param(F) \ + F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \ + unformat_vnet_sw_interface, &vnet_main) \ + F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \ + bfd_cli_unformat_ip46_address) \ + F (ip46_address_t, peer_addr, PEER_ADDR_STR, mandatory, "%U", \ + bfd_cli_unformat_ip46_address) foreach_bfd_cli_udp_session_del_cli_param (DECLARE); @@ -620,7 +613,6 @@ out: return ret; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (bfd_cli_udp_session_del_command, static) = { .path = "bfd udp session del", .short_help = "bfd udp session del interface" @@ -629,7 +621,6 @@ VLIB_CLI_COMMAND (bfd_cli_udp_session_del_command, static) = { "<peer-address> ", .function = bfd_cli_udp_session_del, }; -/* *INDENT-ON* */ static clib_error_t * bfd_cli_udp_session_set_flags (vlib_main_t * vm, unformat_input_t * input, @@ -637,14 +628,14 @@ bfd_cli_udp_session_set_flags (vlib_main_t * vm, unformat_input_t * input, { clib_error_t *ret = NULL; unformat_input_t _line_input, *line_input = &_line_input; -#define foreach_bfd_cli_udp_session_set_flags_cli_param(F) \ - F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \ - unformat_vnet_sw_interface, &vnet_main) \ - F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \ - unformat_ip46_address) \ - F (ip46_address_t, peer_addr, PEER_ADDR_STR, mandatory, "%U", \ - unformat_ip46_address) \ - F (u8 *, admin_up_down_token, ADMIN_STR, mandatory, "%v", \ +#define foreach_bfd_cli_udp_session_set_flags_cli_param(F) \ + F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \ + unformat_vnet_sw_interface, &vnet_main) \ + F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \ + bfd_cli_unformat_ip46_address) \ + F (ip46_address_t, peer_addr, PEER_ADDR_STR, mandatory, "%U", \ + bfd_cli_unformat_ip46_address) \ + F (u8 *, admin_up_down_token, ADMIN_STR, mandatory, "%v", \ &admin_up_down_token) foreach_bfd_cli_udp_session_set_flags_cli_param (DECLARE); @@ -702,7 +693,6 @@ out: return ret; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (bfd_cli_udp_session_set_flags_command, static) = { .path = "bfd udp session set-flags", .short_help = "bfd udp session set-flags" @@ -712,7 +702,6 @@ VLIB_CLI_COMMAND (bfd_cli_udp_session_set_flags_command, static) = { " admin <up|down>", .function = bfd_cli_udp_session_set_flags, }; -/* *INDENT-ON* */ static clib_error_t * bfd_cli_udp_session_auth_activate (vlib_main_t * vm, @@ -721,15 +710,15 @@ bfd_cli_udp_session_auth_activate (vlib_main_t * vm, { clib_error_t *ret = NULL; unformat_input_t _line_input, *line_input = &_line_input; -#define foreach_bfd_cli_udp_session_auth_activate_cli_param(F) \ - F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \ - unformat_vnet_sw_interface, &vnet_main) \ - F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \ - unformat_ip46_address) \ - F (ip46_address_t, peer_addr, PEER_ADDR_STR, mandatory, "%U", \ - unformat_ip46_address) \ - F (u8 *, delayed_token, DELAYED_STR, optional, "%v") \ - F (u32, conf_key_id, CONF_KEY_ID_STR, mandatory, "%u") \ +#define foreach_bfd_cli_udp_session_auth_activate_cli_param(F) \ + F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \ + unformat_vnet_sw_interface, &vnet_main) \ + F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \ + bfd_cli_unformat_ip46_address) \ + F (ip46_address_t, peer_addr, PEER_ADDR_STR, mandatory, "%U", \ + bfd_cli_unformat_ip46_address) \ + F (u8 *, delayed_token, DELAYED_STR, optional, "%v") \ + F (u32, conf_key_id, CONF_KEY_ID_STR, mandatory, "%u") \ F (u32, bfd_key_id, BFD_KEY_ID_STR, mandatory, "%u") foreach_bfd_cli_udp_session_auth_activate_cli_param (DECLARE); @@ -799,7 +788,6 @@ out: return ret; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (bfd_cli_udp_session_auth_activate_command, static) = { .path = "bfd udp session auth activate", .short_help = "bfd udp session auth activate" @@ -818,13 +806,13 @@ bfd_cli_udp_session_auth_deactivate (vlib_main_t *vm, unformat_input_t *input, { clib_error_t *ret = NULL; unformat_input_t _line_input, *line_input = &_line_input; -#define foreach_bfd_cli_udp_session_auth_deactivate_cli_param(F) \ - F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \ - unformat_vnet_sw_interface, &vnet_main) \ - F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \ - unformat_ip46_address) \ - F (ip46_address_t, peer_addr, PEER_ADDR_STR, mandatory, "%U", \ - unformat_ip46_address) \ +#define foreach_bfd_cli_udp_session_auth_deactivate_cli_param(F) \ + F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \ + unformat_vnet_sw_interface, &vnet_main) \ + F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \ + bfd_cli_unformat_ip46_address) \ + F (ip46_address_t, peer_addr, PEER_ADDR_STR, mandatory, "%U", \ + bfd_cli_unformat_ip46_address) \ F (u8 *, delayed_token, DELAYED_STR, optional, "%v") foreach_bfd_cli_udp_session_auth_deactivate_cli_param (DECLARE); @@ -884,7 +872,6 @@ out: return ret; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (bfd_cli_udp_session_auth_deactivate_command, static) = { .path = "bfd udp session auth deactivate", .short_help = "bfd udp session auth deactivate" @@ -894,7 +881,6 @@ VLIB_CLI_COMMAND (bfd_cli_udp_session_auth_deactivate_command, static) = { "[ delayed <yes|no> ]", .function = bfd_cli_udp_session_auth_deactivate, }; -/* *INDENT-ON* */ static clib_error_t * bfd_cli_udp_set_echo_source (vlib_main_t * vm, unformat_input_t * input, @@ -941,13 +927,11 @@ out: return ret; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (bfd_cli_udp_set_echo_source_cmd, static) = { .path = "bfd udp echo-source set", .short_help = "bfd udp echo-source set interface <interface>", .function = bfd_cli_udp_set_echo_source, }; -/* *INDENT-ON* */ static clib_error_t * bfd_cli_udp_del_echo_source (vlib_main_t * vm, unformat_input_t * input, @@ -964,13 +948,11 @@ bfd_cli_udp_del_echo_source (vlib_main_t * vm, unformat_input_t * input, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (bfd_cli_udp_del_echo_source_cmd, static) = { .path = "bfd udp echo-source del", .short_help = "bfd udp echo-source del", .function = bfd_cli_udp_del_echo_source, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/bfd/bfd_main.c b/src/vnet/bfd/bfd_main.c index 1ca1d7ec0ab..1423da91158 100644 --- a/src/vnet/bfd/bfd_main.c +++ b/src/vnet/bfd/bfd_main.c @@ -500,30 +500,29 @@ bfd_session_set_flags (vlib_main_t * vm, bfd_session_t * bs, u8 admin_up_down) } u8 * -bfd_input_format_trace (u8 * s, va_list * args) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); - const bfd_input_trace_t *t = va_arg (*args, bfd_input_trace_t *); - const bfd_pkt_t *pkt = (bfd_pkt_t *) t->data; - if (t->len > STRUCT_SIZE_OF (bfd_pkt_t, head)) - { - s = format (s, "BFD v%u, diag=%u(%s), state=%u(%s),\n" - " flags=(P:%u, F:%u, C:%u, A:%u, D:%u, M:%u), " - "detect_mult=%u, length=%u\n", - bfd_pkt_get_version (pkt), bfd_pkt_get_diag_code (pkt), - bfd_diag_code_string (bfd_pkt_get_diag_code (pkt)), - bfd_pkt_get_state (pkt), - bfd_state_string (bfd_pkt_get_state (pkt)), - bfd_pkt_get_poll (pkt), bfd_pkt_get_final (pkt), - bfd_pkt_get_control_plane_independent (pkt), - bfd_pkt_get_auth_present (pkt), bfd_pkt_get_demand (pkt), - bfd_pkt_get_multipoint (pkt), pkt->head.detect_mult, - pkt->head.length); - if (t->len >= sizeof (bfd_pkt_t) && - pkt->head.length >= sizeof (bfd_pkt_t)) +format_bfd_pkt (u8 *s, va_list *args) +{ + u32 len = va_arg (*args, u32); + u8 *data = va_arg (*args, u8 *); + + const bfd_pkt_t *pkt = (bfd_pkt_t *) data; + if (len > STRUCT_SIZE_OF (bfd_pkt_t, head)) + { + s = format ( + s, + "BFD v%u, diag=%u(%s), state=%u(%s),\n" + " flags=(P:%u, F:%u, C:%u, A:%u, D:%u, M:%u), " + "detect_mult=%u, length=%u", + bfd_pkt_get_version (pkt), bfd_pkt_get_diag_code (pkt), + bfd_diag_code_string (bfd_pkt_get_diag_code (pkt)), + bfd_pkt_get_state (pkt), bfd_state_string (bfd_pkt_get_state (pkt)), + bfd_pkt_get_poll (pkt), bfd_pkt_get_final (pkt), + bfd_pkt_get_control_plane_independent (pkt), + bfd_pkt_get_auth_present (pkt), bfd_pkt_get_demand (pkt), + bfd_pkt_get_multipoint (pkt), pkt->head.detect_mult, pkt->head.length); + if (len >= sizeof (bfd_pkt_t) && pkt->head.length >= sizeof (bfd_pkt_t)) { - s = format (s, " my discriminator: %u\n", + s = format (s, "\n my discriminator: %u\n", clib_net_to_host_u32 (pkt->my_disc)); s = format (s, " your discriminator: %u\n", clib_net_to_host_u32 (pkt->your_disc)); @@ -534,16 +533,16 @@ bfd_input_format_trace (u8 * s, va_list * args) s = format (s, " required min echo rx interval: %u", clib_net_to_host_u32 (pkt->req_min_echo_rx)); } - if (t->len >= sizeof (bfd_pkt_with_common_auth_t) && + if (len >= sizeof (bfd_pkt_with_common_auth_t) && pkt->head.length >= sizeof (bfd_pkt_with_common_auth_t) && bfd_pkt_get_auth_present (pkt)) { const bfd_pkt_with_common_auth_t *with_auth = (void *) pkt; const bfd_auth_common_t *common = &with_auth->common_auth; s = format (s, "\n auth len: %u\n", common->len); - s = format (s, " auth type: %u:%s\n", common->type, + s = format (s, " auth type: %u:%s", common->type, bfd_auth_type_str (common->type)); - if (t->len >= sizeof (bfd_pkt_with_sha1_auth_t) && + if (len >= sizeof (bfd_pkt_with_sha1_auth_t) && pkt->head.length >= sizeof (bfd_pkt_with_sha1_auth_t) && (BFD_AUTH_TYPE_keyed_sha1 == common->type || BFD_AUTH_TYPE_meticulous_keyed_sha1 == common->type)) @@ -557,15 +556,23 @@ bfd_input_format_trace (u8 * s, va_list * args) sizeof (sha1->hash)); } } - else - { - s = format (s, "\n"); - } } return s; } +u8 * +bfd_input_format_trace (u8 *s, va_list *args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + const bfd_input_trace_t *t = va_arg (*args, bfd_input_trace_t *); + + s = format (s, "%U", format_bfd_pkt, t->len, t->data); + + return s; +} + typedef struct { u32 bs_idx; @@ -739,17 +746,18 @@ bfd_add_transport_layer (vlib_main_t * vm, u32 bi, bfd_session_t * bs) } static int -bfd_transport_control_frame (vlib_main_t * vm, u32 bi, bfd_session_t * bs) +bfd_transport_control_frame (vlib_main_t *vm, vlib_node_runtime_t *rt, u32 bi, + bfd_session_t *bs) { switch (bs->transport) { case BFD_TRANSPORT_UDP4: BFD_DBG ("Transport bfd via udp4, bs_idx=%u", bs->bs_idx); - return bfd_transport_udp4 (vm, bi, bs, 0 /* is_echo */); + return bfd_transport_udp4 (vm, rt, bi, bs, 0 /* is_echo */); break; case BFD_TRANSPORT_UDP6: BFD_DBG ("Transport bfd via udp6, bs_idx=%u", bs->bs_idx); - return bfd_transport_udp6 (vm, bi, bs, 0 /* is_echo */); + return bfd_transport_udp6 (vm, rt, bi, bs, 0 /* is_echo */); break; } return 0; @@ -773,17 +781,18 @@ bfd_echo_add_transport_layer (vlib_main_t * vm, u32 bi, bfd_session_t * bs) } static int -bfd_transport_echo (vlib_main_t * vm, u32 bi, bfd_session_t * bs) +bfd_transport_echo (vlib_main_t *vm, vlib_node_runtime_t *rt, u32 bi, + bfd_session_t *bs) { switch (bs->transport) { case BFD_TRANSPORT_UDP4: BFD_DBG ("Transport bfd echo via udp4, bs_idx=%u", bs->bs_idx); - return bfd_transport_udp4 (vm, bi, bs, 1 /* is_echo */); + return bfd_transport_udp4 (vm, rt, bi, bs, 1 /* is_echo */); break; case BFD_TRANSPORT_UDP6: BFD_DBG ("Transport bfd echo via udp6, bs_idx=%u", bs->bs_idx); - return bfd_transport_udp6 (vm, bi, bs, 1 /* is_echo */); + return bfd_transport_udp6 (vm, rt, bi, bs, 1 /* is_echo */); break; } return 0; @@ -902,8 +911,39 @@ bfd_init_control_frame (bfd_session_t *bs, vlib_buffer_t *b) b->current_length = bfd_length; } +typedef struct +{ + u32 bs_idx; + u32 len; + u8 data[400]; +} bfd_process_trace_t; + static void -bfd_send_echo (vlib_main_t *vm, bfd_main_t *bm, bfd_session_t *bs, u64 now) +bfd_process_trace_buf (vlib_main_t *vm, vlib_node_runtime_t *rt, + vlib_buffer_t *b, bfd_session_t *bs) +{ + u32 n_trace = vlib_get_trace_count (vm, rt); + if (n_trace > 0) + { + bfd_process_trace_t *tr; + if (vlib_trace_buffer (vm, rt, 0, b, 0)) + { + tr = vlib_add_trace (vm, rt, b, sizeof (*tr)); + tr->bs_idx = bs->bs_idx; + u64 len = (b->current_length < sizeof (tr->data)) ? + b->current_length : + sizeof (tr->data); + tr->len = len; + clib_memcpy_fast (tr->data, vlib_buffer_get_current (b), len); + --n_trace; + vlib_set_trace_count (vm, rt, n_trace); + } + } +} + +static void +bfd_send_echo (vlib_main_t *vm, vlib_node_runtime_t *rt, bfd_main_t *bm, + bfd_session_t *bs, u64 now) { if (!bfd_is_echo_possible (bs)) { @@ -931,6 +971,7 @@ bfd_send_echo (vlib_main_t *vm, bfd_main_t *bm, bfd_session_t *bs, u64 now) bfd_calc_echo_checksum (bs->local_discr, pkt->expire_time_nsec, bs->echo_secret); b->current_length = sizeof (*pkt); + bfd_process_trace_buf (vm, rt, b, bs); if (!bfd_echo_add_transport_layer (vm, bi, bs)) { BFD_ERR ("cannot send echo packet out, turning echo off"); @@ -938,7 +979,7 @@ bfd_send_echo (vlib_main_t *vm, bfd_main_t *bm, bfd_session_t *bs, u64 now) vlib_buffer_free_one (vm, bi); return; } - if (!bfd_transport_echo (vm, bi, bs)) + if (!bfd_transport_echo (vm, rt, bi, bs)) { BFD_ERR ("cannot send echo packet out, turning echo off"); bs->echo = 0; @@ -957,7 +998,8 @@ bfd_send_echo (vlib_main_t *vm, bfd_main_t *bm, bfd_session_t *bs, u64 now) } static void -bfd_send_periodic (vlib_main_t *vm, bfd_main_t *bm, bfd_session_t *bs, u64 now) +bfd_send_periodic (vlib_main_t *vm, vlib_node_runtime_t *rt, bfd_main_t *bm, + bfd_session_t *bs, u64 now) { if (!bs->remote_min_rx_usec && BFD_POLL_NOT_NEEDED == bs->poll_state) { @@ -1014,8 +1056,9 @@ bfd_send_periodic (vlib_main_t *vm, bfd_main_t *bm, bfd_session_t *bs, u64 now) break; } bfd_add_auth_section (vm, b, bs); + bfd_process_trace_buf (vm, rt, b, bs); bfd_add_transport_layer (vm, bi, bs); - if (!bfd_transport_control_frame (vm, bi, bs)) + if (!bfd_transport_control_frame (vm, rt, bi, bs)) { vlib_buffer_free_one (vm, bi); } @@ -1090,7 +1133,8 @@ bfd_check_rx_timeout (vlib_main_t * vm, bfd_main_t * bm, bfd_session_t * bs, } void -bfd_on_timeout (vlib_main_t *vm, bfd_main_t *bm, bfd_session_t *bs, u64 now) +bfd_on_timeout (vlib_main_t *vm, vlib_node_runtime_t *rt, bfd_main_t *bm, + bfd_session_t *bs, u64 now) { BFD_DBG ("Timeout for bs_idx=%lu", bs->bs_idx); switch (bs->local_state) @@ -1098,11 +1142,11 @@ bfd_on_timeout (vlib_main_t *vm, bfd_main_t *bm, bfd_session_t *bs, u64 now) case BFD_STATE_admin_down: /* fallthrough */ case BFD_STATE_down: - bfd_send_periodic (vm, bm, bs, now); + bfd_send_periodic (vm, rt, bm, bs, now); break; case BFD_STATE_init: bfd_check_rx_timeout (vm, bm, bs, now, 1); - bfd_send_periodic (vm, bm, bs, now); + bfd_send_periodic (vm, rt, bm, bs, now); break; case BFD_STATE_up: bfd_check_rx_timeout (vm, bm, bs, now, 1); @@ -1119,20 +1163,33 @@ bfd_on_timeout (vlib_main_t *vm, bfd_main_t *bm, bfd_session_t *bs, u64 now) bs->config_required_min_rx_nsec)); bfd_set_poll_state (bs, BFD_POLL_NEEDED); } - bfd_send_periodic (vm, bm, bs, now); + bfd_send_periodic (vm, rt, bm, bs, now); if (bs->echo) { - bfd_send_echo (vm, bm, bs, now); + bfd_send_echo (vm, rt, bm, bs, now); } break; } } +u8 * +format_bfd_process_trace (u8 *s, va_list *args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + bfd_process_trace_t *t = va_arg (*args, bfd_process_trace_t *); + + s = + format (s, "bs_idx=%u => %U", t->bs_idx, format_bfd_pkt, t->len, t->data); + + return s; +} + /* * bfd process node function */ static uword -bfd_process (vlib_main_t *vm, CLIB_UNUSED (vlib_node_runtime_t *rt), +bfd_process (vlib_main_t *vm, vlib_node_runtime_t *rt, CLIB_UNUSED (vlib_frame_t *f)) { bfd_main_t *bm = &bfd_main; @@ -1213,7 +1270,7 @@ bfd_process (vlib_main_t *vm, CLIB_UNUSED (vlib_node_runtime_t *rt), { bfd_session_t *bs = pool_elt_at_index (bm->sessions, *session_index); - bfd_send_periodic (vm, bm, bs, now); + bfd_send_periodic (vm, rt, bm, bs, now); bfd_set_timer (bm, bs, now, 1); } else @@ -1259,7 +1316,7 @@ bfd_process (vlib_main_t *vm, CLIB_UNUSED (vlib_node_runtime_t *rt), { bfd_session_t *bs = pool_elt_at_index (bm->sessions, bs_idx); bs->tw_id = 0; /* timer is gone because it expired */ - bfd_on_timeout (vm, bm, bs, now); + bfd_on_timeout (vm, rt, bm, bs, now); bfd_set_timer (bm, bs, now, 1); } } @@ -1280,13 +1337,25 @@ bfd_process (vlib_main_t *vm, CLIB_UNUSED (vlib_node_runtime_t *rt), /* * bfd process node declaration */ -VLIB_REGISTER_NODE (bfd_process_node, static) = { +// clang-format off +VLIB_REGISTER_NODE (bfd_process_node, static) = +{ .function = bfd_process, .type = VLIB_NODE_TYPE_PROCESS, .name = "bfd-process", - .n_next_nodes = 0, - .next_nodes = {}, + .flags = (VLIB_NODE_FLAG_TRACE_SUPPORTED), + .format_trace = format_bfd_process_trace, + .n_next_nodes = BFD_TX_N_NEXT, + .next_nodes = { + [BFD_TX_IP4_ARP] = "ip4-arp", + [BFD_TX_IP6_NDP] = "ip6-discover-neighbor", + [BFD_TX_IP4_REWRITE] = "ip4-rewrite", + [BFD_TX_IP6_REWRITE] = "ip6-rewrite", + [BFD_TX_IP4_MIDCHAIN] = "ip4-midchain", + [BFD_TX_IP6_MIDCHAIN] = "ip6-midchain", + } }; +// clang-format on static clib_error_t * bfd_sw_interface_up_down (CLIB_UNUSED (vnet_main_t *vnm), diff --git a/src/vnet/bfd/bfd_main.h b/src/vnet/bfd/bfd_main.h index 4fc4ef81260..1d4617e1d7c 100644 --- a/src/vnet/bfd/bfd_main.h +++ b/src/vnet/bfd/bfd_main.h @@ -366,7 +366,6 @@ typedef enum BFD_EVENT_CONFIG_CHANGED, } bfd_process_event_e; -/* *INDENT-OFF* */ /** echo packet structure */ typedef CLIB_PACKED (struct { /** local discriminator */ @@ -376,7 +375,6 @@ typedef CLIB_PACKED (struct { /** checksum - based on discriminator, local secret and expire time */ u64 checksum; }) bfd_echo_pkt_t; -/* *INDENT-ON* */ static inline void bfd_lock (bfd_main_t * bm) @@ -476,6 +474,17 @@ const char *bfd_poll_state_string (bfd_poll_state_e state); */ void bfd_register_listener (bfd_notify_fn_t fn); +typedef enum +{ + BFD_TX_IP4_ARP, + BFD_TX_IP6_NDP, + BFD_TX_IP4_REWRITE, + BFD_TX_IP6_REWRITE, + BFD_TX_IP4_MIDCHAIN, + BFD_TX_IP6_MIDCHAIN, + BFD_TX_N_NEXT, +} bfd_tx_next_t; + #endif /* __included_bfd_main_h__ */ /* diff --git a/src/vnet/bfd/bfd_protocol.h b/src/vnet/bfd/bfd_protocol.h index 210c561b430..16ee3231ef0 100644 --- a/src/vnet/bfd/bfd_protocol.h +++ b/src/vnet/bfd/bfd_protocol.h @@ -46,14 +46,11 @@ typedef enum u32 bfd_max_key_len_for_auth_type (bfd_auth_type_e auth_type); const char *bfd_auth_type_str (bfd_auth_type_e auth_type); -/* *INDENT-OFF* */ typedef CLIB_PACKED (struct { u8 type; u8 len; }) bfd_auth_common_t; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ typedef CLIB_PACKED (struct { /* * 4.4. Keyed SHA1 and Meticulous Keyed SHA1 Authentication Section Format @@ -88,9 +85,7 @@ typedef CLIB_PACKED (struct { */ u8 hash[20]; }) bfd_auth_sha1_t; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ typedef CLIB_PACKED (struct { /* * The Mandatory Section of a BFD Control packet has the following @@ -125,21 +120,16 @@ typedef CLIB_PACKED (struct { u32 req_min_rx; u32 req_min_echo_rx; }) bfd_pkt_t; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ typedef CLIB_PACKED (struct { bfd_pkt_t pkt; bfd_auth_common_t common_auth; }) bfd_pkt_with_common_auth_t; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ typedef CLIB_PACKED (struct { bfd_pkt_t pkt; bfd_auth_sha1_t sha1_auth; }) bfd_pkt_with_sha1_auth_t; -/* *INDENT-ON* */ u8 bfd_pkt_get_version (const bfd_pkt_t * pkt); void bfd_pkt_set_version (bfd_pkt_t * pkt, int version); diff --git a/src/vnet/bfd/bfd_udp.c b/src/vnet/bfd/bfd_udp.c index 36ecdf1dbc9..ec42cda1bc4 100644 --- a/src/vnet/bfd/bfd_udp.c +++ b/src/vnet/bfd/bfd_udp.c @@ -42,6 +42,14 @@ #include <vnet/bfd/bfd_api.h> #include <vnet/bfd/bfd.api_enum.h> +#define F(sym, str) \ + STATIC_ASSERT ((int) BFD_ERROR_##sym == (int) BFD_UDP_ERROR_##sym, \ + "BFD error enums mismatch"); +foreach_bfd_error (F) +#undef F + STATIC_ASSERT ((int) BFD_N_ERROR <= (int) BFD_UDP_N_ERROR, + "BFD error enum sizes mismatch"); + typedef struct { bfd_main_t *bfd_main; @@ -54,18 +62,6 @@ typedef struct int echo_source_is_set; /* loopback interface used to get echo source ip */ u32 echo_source_sw_if_index; - /* node index of "ip4-arp" node */ - u32 ip4_arp_idx; - /* node index of "ip6-discover-neighbor" node */ - u32 ip6_ndp_idx; - /* node index of "ip4-rewrite" node */ - u32 ip4_rewrite_idx; - /* node index of "ip6-rewrite" node */ - u32 ip6_rewrite_idx; - /* node index of "ip4-midchain" node */ - u32 ip4_midchain_idx; - /* node index of "ip6-midchain" node */ - u32 ip6_midchain_idx; /* log class */ vlib_log_class_t log_class; /* number of active udp4 sessions */ @@ -135,7 +131,6 @@ bfd_udp_is_echo_available (bfd_transport_e transport) { ip4_main_t *im = &ip4_main; ip_interface_address_t *ia = NULL; - /* *INDENT-OFF* */ foreach_ip_interface_address (&im->lookup_main, ia, bfd_udp_main.echo_source_sw_if_index, 0 /* honor unnumbered */, ({ @@ -144,13 +139,11 @@ bfd_udp_is_echo_available (bfd_transport_e transport) return 1; } })); - /* *INDENT-ON* */ } else if (BFD_TRANSPORT_UDP6 == transport) { ip6_main_t *im = &ip6_main; ip_interface_address_t *ia = NULL; - /* *INDENT-OFF* */ foreach_ip_interface_address (&im->lookup_main, ia, bfd_udp_main.echo_source_sw_if_index, 0 /* honor unnumbered */, ({ @@ -159,7 +152,6 @@ bfd_udp_is_echo_available (bfd_transport_e transport) return 1; } })); - /* *INDENT-ON* */ } } BFD_DBG ("No usable IP address for UDP echo - echo not available"); @@ -191,7 +183,6 @@ bfd_udp_get_echo_src_ip4 (ip4_address_t * addr) ip_interface_address_t *ia = NULL; ip4_main_t *im = &ip4_main; - /* *INDENT-OFF* */ foreach_ip_interface_address ( &im->lookup_main, ia, bfd_udp_main.echo_source_sw_if_index, 0 /* honor unnumbered */, ({ @@ -209,7 +200,6 @@ bfd_udp_get_echo_src_ip4 (ip4_address_t * addr) return 1; } })); - /* *INDENT-ON* */ BFD_ERR ("cannot find ip4 address, no usable address found"); return 0; } @@ -225,7 +215,6 @@ bfd_udp_get_echo_src_ip6 (ip6_address_t * addr) ip_interface_address_t *ia = NULL; ip6_main_t *im = &ip6_main; - /* *INDENT-OFF* */ foreach_ip_interface_address ( &im->lookup_main, ia, bfd_udp_main.echo_source_sw_if_index, 0 /* honor unnumbered */, ({ @@ -238,7 +227,6 @@ bfd_udp_get_echo_src_ip6 (ip6_address_t * addr) return 1; } })); - /* *INDENT-ON* */ BFD_ERR ("cannot find ip6 address, no usable address found"); return 0; } @@ -384,16 +372,23 @@ bfd_add_udp6_transport (vlib_main_t * vm, u32 bi, const bfd_session_t * bs, } static void -bfd_create_frame_to_next_node (vlib_main_t *vm, bfd_main_t *bm, - const bfd_session_t *bs, u32 bi, u32 next_node, +bfd_create_frame_to_next_node (vlib_main_t *vm, vlib_node_runtime_t *rt, + u32 bi, const bfd_session_t *bs, u32 next, vlib_combined_counter_main_t *tx_counter) { - vlib_frame_t *f = vlib_get_frame_to_node (vm, next_node); + vlib_buffer_t *b = vlib_get_buffer (vm, bi); + vlib_node_t *from_node = vlib_get_node (vm, rt->node_index); + ASSERT (next < vec_len (from_node->next_nodes)); + u32 to_node_index = from_node->next_nodes[next]; + vlib_frame_t *f = vlib_get_frame_to_node (vm, to_node_index); u32 *to_next = vlib_frame_vector_args (f); to_next[0] = bi; f->n_vectors = 1; - vlib_put_frame_to_node (vm, next_node, f); - vlib_buffer_t *b = vlib_get_buffer (vm, bi); + if (b->flags & VLIB_BUFFER_IS_TRACED) + { + f->frame_flags |= VLIB_NODE_FLAG_TRACE; + } + vlib_put_frame_to_node (vm, to_node_index, f); vlib_increment_combined_counter (tx_counter, vm->thread_index, bs->bs_idx, 1, vlib_buffer_length_in_chain (vm, b)); } @@ -415,10 +410,10 @@ bfd_udp_calc_next_node (const struct bfd_session_s *bs, u32 * next_node) switch (bs->transport) { case BFD_TRANSPORT_UDP4: - *next_node = bfd_udp_main.ip4_arp_idx; + *next_node = BFD_TX_IP4_ARP; return 1; case BFD_TRANSPORT_UDP6: - *next_node = bfd_udp_main.ip6_ndp_idx; + *next_node = BFD_TX_IP6_NDP; return 1; } break; @@ -426,10 +421,10 @@ bfd_udp_calc_next_node (const struct bfd_session_s *bs, u32 * next_node) switch (bs->transport) { case BFD_TRANSPORT_UDP4: - *next_node = bfd_udp_main.ip4_rewrite_idx; + *next_node = BFD_TX_IP4_REWRITE; return 1; case BFD_TRANSPORT_UDP6: - *next_node = bfd_udp_main.ip6_rewrite_idx; + *next_node = BFD_TX_IP6_REWRITE; return 1; } break; @@ -437,10 +432,10 @@ bfd_udp_calc_next_node (const struct bfd_session_s *bs, u32 * next_node) switch (bs->transport) { case BFD_TRANSPORT_UDP4: - *next_node = bfd_udp_main.ip4_midchain_idx; + *next_node = BFD_TX_IP4_MIDCHAIN; return 1; case BFD_TRANSPORT_UDP6: - *next_node = bfd_udp_main.ip6_midchain_idx; + *next_node = BFD_TX_IP6_MIDCHAIN; return 1; } break; @@ -452,35 +447,35 @@ bfd_udp_calc_next_node (const struct bfd_session_s *bs, u32 * next_node) } int -bfd_transport_udp4 (vlib_main_t *vm, u32 bi, const struct bfd_session_s *bs, - int is_echo) +bfd_transport_udp4 (vlib_main_t *vm, vlib_node_runtime_t *rt, u32 bi, + const struct bfd_session_s *bs, int is_echo) { u32 next_node; int rv = bfd_udp_calc_next_node (bs, &next_node); bfd_main_t *bm = bfd_udp_main.bfd_main; if (rv) { - bfd_create_frame_to_next_node (vm, bm, bs, bi, next_node, + bfd_create_frame_to_next_node (vm, rt, bi, bs, next_node, is_echo ? &bm->tx_echo_counter : - &bm->tx_counter); + &bm->tx_counter); } return rv; } int -bfd_transport_udp6 (vlib_main_t *vm, u32 bi, const struct bfd_session_s *bs, - int is_echo) +bfd_transport_udp6 (vlib_main_t *vm, vlib_node_runtime_t *rt, u32 bi, + const struct bfd_session_s *bs, int is_echo) { u32 next_node; int rv = bfd_udp_calc_next_node (bs, &next_node); bfd_main_t *bm = bfd_udp_main.bfd_main; if (rv) { - bfd_create_frame_to_next_node ( - vm, bfd_udp_main.bfd_main, bs, bi, next_node, - is_echo ? &bm->tx_echo_counter : &bm->tx_counter); + bfd_create_frame_to_next_node (vm, rt, bi, bs, next_node, + is_echo ? &bm->tx_echo_counter : + &bm->tx_counter); } - return 1; + return rv; } static bfd_session_t * @@ -1354,7 +1349,6 @@ bfd_udp4_input (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) /* * bfd input graph node declaration */ -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (bfd_udp4_input_node, static) = { .function = bfd_udp4_input, .name = "bfd-udp4-input", @@ -1375,7 +1369,6 @@ VLIB_REGISTER_NODE (bfd_udp4_input_node, static) = { [BFD_UDP_INPUT_NEXT_REPLY_MIDCHAIN] = "ip4-midchain", }, }; -/* *INDENT-ON* */ static uword bfd_udp6_input (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) @@ -1383,7 +1376,6 @@ bfd_udp6_input (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) return bfd_udp_input (vm, rt, f, 1); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (bfd_udp6_input_node, static) = { .function = bfd_udp6_input, .name = "bfd-udp6-input", @@ -1404,7 +1396,6 @@ VLIB_REGISTER_NODE (bfd_udp6_input_node, static) = { [BFD_UDP_INPUT_NEXT_REPLY_MIDCHAIN] = "ip6-midchain", }, }; -/* *INDENT-ON* */ /* * Process a frame of bfd echo packets @@ -1509,7 +1500,6 @@ bfd_echo_input_format_trace (u8 * s, va_list * args) /* * bfd input graph node declaration */ -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (bfd_udp_echo4_input_node, static) = { .function = bfd_udp_echo4_input, .name = "bfd-udp-echo4-input", @@ -1529,7 +1519,6 @@ VLIB_REGISTER_NODE (bfd_udp_echo4_input_node, static) = { [BFD_UDP_ECHO_INPUT_NEXT_REPLY_REWRITE] = "ip4-lookup", }, }; -/* *INDENT-ON* */ static uword bfd_udp_echo6_input (vlib_main_t * vm, vlib_node_runtime_t * rt, @@ -1538,7 +1527,6 @@ bfd_udp_echo6_input (vlib_main_t * vm, vlib_node_runtime_t * rt, return bfd_udp_echo_input (vm, rt, f, 1); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (bfd_udp_echo6_input_node, static) = { .function = bfd_udp_echo6_input, .name = "bfd-udp-echo6-input", @@ -1559,7 +1547,6 @@ VLIB_REGISTER_NODE (bfd_udp_echo6_input_node, static) = { }, }; -/* *INDENT-ON* */ static clib_error_t * bfd_udp_sw_if_add_del (CLIB_UNUSED (vnet_main_t *vnm), u32 sw_if_index, @@ -1639,25 +1626,6 @@ bfd_udp_init (vlib_main_t * vm) sizeof (bfd_udp_key_t)); bfd_udp_main.bfd_main = &bfd_main; bfd_udp_main.vnet_main = vnet_get_main (); - vlib_node_t *node = vlib_get_node_by_name (vm, (u8 *) "ip4-arp"); - ASSERT (node); - bfd_udp_main.ip4_arp_idx = node->index; - node = vlib_get_node_by_name (vm, (u8 *) "ip6-discover-neighbor"); - ASSERT (node); - bfd_udp_main.ip6_ndp_idx = node->index; - node = vlib_get_node_by_name (vm, (u8 *) "ip4-rewrite"); - ASSERT (node); - bfd_udp_main.ip4_rewrite_idx = node->index; - node = vlib_get_node_by_name (vm, (u8 *) "ip6-rewrite"); - ASSERT (node); - bfd_udp_main.ip6_rewrite_idx = node->index; - node = vlib_get_node_by_name (vm, (u8 *) "ip4-midchain"); - ASSERT (node); - bfd_udp_main.ip4_midchain_idx = node->index; - node = vlib_get_node_by_name (vm, (u8 *) "ip6-midchain"); - ASSERT (node); - bfd_udp_main.ip6_midchain_idx = node->index; - bfd_udp_stats_init (&bfd_udp_main); bfd_udp_main.log_class = vlib_log_register_class ("bfd", "udp"); diff --git a/src/vnet/bfd/bfd_udp.h b/src/vnet/bfd/bfd_udp.h index 866b5868b00..8f4bfee2bd7 100644 --- a/src/vnet/bfd/bfd_udp.h +++ b/src/vnet/bfd/bfd_udp.h @@ -82,7 +82,7 @@ int bfd_add_udp6_transport (vlib_main_t * vm, u32 bi, * * @return 1 on success, 0 on failure */ -int bfd_transport_udp4 (vlib_main_t *vm, u32 bi, +int bfd_transport_udp4 (vlib_main_t *vm, vlib_node_runtime_t *rt, u32 bi, const struct bfd_session_s *bs, int is_echo); /** @@ -90,7 +90,7 @@ int bfd_transport_udp4 (vlib_main_t *vm, u32 bi, * * @return 1 on success, 0 on failure */ -int bfd_transport_udp6 (vlib_main_t *vm, u32 bi, +int bfd_transport_udp6 (vlib_main_t *vm, vlib_node_runtime_t *rt, u32 bi, const struct bfd_session_s *bs, int is_echo); /** diff --git a/src/vnet/bier/bier_update.c b/src/vnet/bier/bier_update.c index 4108d09f51e..fdb7c5c0865 100644 --- a/src/vnet/bier/bier_update.c +++ b/src/vnet/bier/bier_update.c @@ -129,7 +129,14 @@ done: VLIB_CLI_COMMAND (bier_route_command) = { .path = "bier route", - .short_help = "bier route [add|del] sd <sud-domain> set <set> bsl <bit-string-length> bp <bit-position> via [next-hop-address] [next-hop-interface] [next-hop-table <value>] [weight <value>] [preference <value>] [udp-encap-id <value>] [ip4-lookup-in-table <value>] [ip6-lookup-in-table <value>] [mpls-lookup-in-table <value>] [resolve-via-host] [resolve-via-connected] [rx-ip4 <interface>] [out-labels <value value value>]", + .short_help = + "bier route [add|del] sd <sud-domain> set <set> bsl <bit-string-length> " + "bp <bit-position> via [next-hop-address] [next-hop-interface] " + "[next-hop-table <value>] [weight <value>] [preference <value>] " + "[udp-encap-id <value>] [ip4-lookup-in-table <value>] " + "[ip6-lookup-in-table <value>] [mpls-lookup-in-table <value>] " + "[resolve-via-host] [resolve-via-connected] [rx-ip4|rx-ip6 <interface>] " + "[out-labels <value value value>]", .function = vnet_bier_route_cmd, }; diff --git a/src/vnet/bonding/bond_api.c b/src/vnet/bonding/bond_api.c index 3fd73d7995f..d9287a8e23d 100644 --- a/src/vnet/bonding/bond_api.c +++ b/src/vnet/bonding/bond_api.c @@ -43,8 +43,11 @@ vl_api_bond_delete_t_handler (vl_api_bond_delete_t * mp) vl_api_bond_delete_reply_t *rmp; u32 sw_if_index = ntohl (mp->sw_if_index); + VALIDATE_SW_IF_INDEX (mp); + rv = bond_delete_if (vm, sw_if_index); + BAD_SW_IF_INDEX_LABEL; REPLY_MACRO (VL_API_BOND_DELETE_REPLY); } @@ -72,12 +75,10 @@ vl_api_bond_create_t_handler (vl_api_bond_create_t * mp) int rv = ap->rv; - /* *INDENT-OFF* */ REPLY_MACRO2(VL_API_BOND_CREATE_REPLY, ({ rmp->sw_if_index = ntohl (ap->sw_if_index); })); - /* *INDENT-ON* */ } static void @@ -105,12 +106,10 @@ vl_api_bond_create2_t_handler (vl_api_bond_create2_t * mp) int rv = ap->rv; - /* *INDENT-OFF* */ REPLY_MACRO2(VL_API_BOND_CREATE2_REPLY, ({ rmp->sw_if_index = ntohl (ap->sw_if_index); })); - /* *INDENT-ON* */ } static void @@ -168,6 +167,8 @@ static void vl_api_sw_interface_set_bond_weight_reply_t *rmp; int rv = 0; + VALIDATE_SW_IF_INDEX (mp); + clib_memset (ap, 0, sizeof (*ap)); ap->sw_if_index = ntohl (mp->sw_if_index); @@ -176,6 +177,7 @@ static void bond_set_intf_weight (vm, ap); rv = ap->rv; + BAD_SW_IF_INDEX_LABEL; REPLY_MACRO (VL_API_SW_INTERFACE_SET_BOND_WEIGHT_REPLY); } @@ -187,12 +189,15 @@ vl_api_bond_detach_slave_t_handler (vl_api_bond_detach_slave_t * mp) bond_detach_member_args_t _a, *ap = &_a; int rv = 0; + VALIDATE_SW_IF_INDEX (mp); + clib_memset (ap, 0, sizeof (*ap)); ap->member = ntohl (mp->sw_if_index); bond_detach_member (vm, ap); rv = ap->rv; + BAD_SW_IF_INDEX_LABEL; REPLY_MACRO (VL_API_BOND_DETACH_SLAVE_REPLY); } @@ -204,12 +209,15 @@ vl_api_bond_detach_member_t_handler (vl_api_bond_detach_member_t * mp) bond_detach_member_args_t _a, *ap = &_a; int rv = 0; + VALIDATE_SW_IF_INDEX (mp); + clib_memset (ap, 0, sizeof (*ap)); ap->member = ntohl (mp->sw_if_index); bond_detach_member (vm, ap); rv = ap->rv; + BAD_SW_IF_INDEX_LABEL; REPLY_MACRO (VL_API_BOND_DETACH_MEMBER_REPLY); } diff --git a/src/vnet/bonding/cli.c b/src/vnet/bonding/cli.c index b0ded4734dd..cdc935ff10f 100644 --- a/src/vnet/bonding/cli.c +++ b/src/vnet/bonding/cli.c @@ -183,7 +183,6 @@ bond_dump_ifs (bond_interface_details_t ** out_bondifs) bond_interface_details_t *r_bondifs = NULL; bond_interface_details_t *bondif = NULL; - /* *INDENT-OFF* */ pool_foreach (bif, bm->interfaces) { vec_add2(r_bondifs, bondif, 1); clib_memset (bondif, 0, sizeof (*bondif)); @@ -201,7 +200,6 @@ bond_dump_ifs (bond_interface_details_t ** out_bondifs) bondif->active_members = vec_len (bif->active_members); bondif->members = vec_len (bif->members); } - /* *INDENT-ON* */ *out_bondifs = r_bondifs; @@ -547,7 +545,6 @@ bond_create_command_fn (vlib_main_t * vm, unformat_input_t * input, return args.error; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (bond_create_command, static) = { .path = "create bond", .short_help = "create bond mode {round-robin | active-backup | broadcast | " @@ -555,7 +552,6 @@ VLIB_CLI_COMMAND (bond_create_command, static) = { "[hw-addr <mac-address>] [id <if-id>] [gso]", .function = bond_create_command_fn, }; -/* *INDENT-ON* */ static clib_error_t * bond_delete_command_fn (vlib_main_t * vm, unformat_input_t * input, @@ -596,14 +592,12 @@ bond_delete_command_fn (vlib_main_t * vm, unformat_input_t * input, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (bond_delete__command, static) = { .path = "delete bond", .short_help = "delete bond {<interface> | sw_if_index <sw_idx>}", .function = bond_delete_command_fn, }; -/* *INDENT-ON* */ void bond_add_member (vlib_main_t * vm, bond_add_member_args_t * args) @@ -823,14 +817,12 @@ add_member_interface_command_fn (vlib_main_t * vm, unformat_input_t * input, return args.error; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (add_member_interface_command, static) = { .path = "bond add", .short_help = "bond add <BondEthernetx> <member-interface> " "[passive] [long-timeout]", .function = add_member_interface_command_fn, }; -/* *INDENT-ON* */ void bond_detach_member (vlib_main_t * vm, bond_detach_member_args_t * args) @@ -887,13 +879,11 @@ detach_interface_command_fn (vlib_main_t * vm, unformat_input_t * input, return args.error; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (detach_interface_command, static) = { .path = "bond del", .short_help = "bond del <member-interface>", .function = detach_interface_command_fn, }; -/* *INDENT-ON* */ static void show_bond (vlib_main_t * vm) @@ -905,7 +895,6 @@ show_bond (vlib_main_t * vm) "interface name", "sw_if_index", "mode", "load balance", "active members", "members"); - /* *INDENT-OFF* */ pool_foreach (bif, bm->interfaces) { vlib_cli_output (vm, "%-16U %-12d %-13U %-13U %-14u %u", @@ -914,7 +903,6 @@ show_bond (vlib_main_t * vm) format_bond_load_balance, bif->lb, vec_len (bif->active_members), vec_len (bif->members)); } - /* *INDENT-ON* */ } static void @@ -924,7 +912,6 @@ show_bond_details (vlib_main_t * vm) bond_if_t *bif; u32 *sw_if_index; - /* *INDENT-OFF* */ pool_foreach (bif, bm->interfaces) { vlib_cli_output (vm, "%U", format_bond_interface_name, bif->dev_instance); @@ -963,7 +950,6 @@ show_bond_details (vlib_main_t * vm) vlib_cli_output (vm, " sw_if_index: %d", bif->sw_if_index); vlib_cli_output (vm, " hw_if_index: %d", bif->hw_if_index); } - /* *INDENT-ON* */ } static clib_error_t * @@ -991,13 +977,11 @@ show_bond_fn (vlib_main_t * vm, unformat_input_t * input, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_bond_command, static) = { .path = "show bond", .short_help = "show bond [details]", .function = show_bond_fn, }; -/* *INDENT-ON* */ void bond_set_intf_weight (vlib_main_t * vm, bond_set_intf_weight_args_t * args) @@ -1097,14 +1081,12 @@ bond_set_intf_cmd (vlib_main_t * vm, unformat_input_t * input, return args.error; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND(set_interface_bond_cmd, static) = { .path = "set interface bond", .short_help = "set interface bond <interface> | sw_if_index <idx>" " weight <value>", .function = bond_set_intf_cmd, }; -/* *INDENT-ON* */ clib_error_t * bond_cli_init (vlib_main_t * vm) diff --git a/src/vnet/bonding/device.c b/src/vnet/bonding/device.c index ca48585fa0a..a0b93fccde1 100644 --- a/src/vnet/bonding/device.c +++ b/src/vnet/bonding/device.c @@ -111,14 +111,6 @@ bond_set_l2_mode_function (vnet_main_t * vnm, return 0; } -static __clib_unused clib_error_t * -bond_subif_add_del_function (vnet_main_t * vnm, u32 hw_if_index, - struct vnet_sw_interface_t *st, int is_add) -{ - /* Nothing for now */ - return 0; -} - static clib_error_t * bond_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) { @@ -616,16 +608,13 @@ bond_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) return 0; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (bond_process_node) = { .function = bond_process, .flags = VLIB_NODE_FLAG_TRACE_SUPPORTED, .type = VLIB_NODE_TYPE_PROCESS, .name = "bond-process", }; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ VNET_DEVICE_CLASS (bond_dev_class) = { .name = "bond", .tx_function_n_errors = BOND_TX_N_ERROR, @@ -633,12 +622,10 @@ VNET_DEVICE_CLASS (bond_dev_class) = { .format_device_name = format_bond_interface_name, .set_l2_mode_function = bond_set_l2_mode_function, .admin_up_down_function = bond_interface_admin_up_down, - .subif_add_del_function = bond_subif_add_del_function, .format_tx_trace = format_bond_tx_trace, .mac_addr_add_del_function = bond_add_del_mac_address, }; -/* *INDENT-ON* */ static clib_error_t * bond_member_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add) diff --git a/src/vnet/bonding/node.c b/src/vnet/bonding/node.c index 21a968177fe..66de1e4dd80 100644 --- a/src/vnet/bonding/node.c +++ b/src/vnet/bonding/node.c @@ -397,7 +397,6 @@ bond_input_init (vlib_main_t * vm) return 0; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (bond_input_node) = { .name = "bond-input", .vector_size = sizeof (u32), @@ -421,7 +420,6 @@ VNET_FEATURE_INIT (bond_input, static) = .node_name = "bond-input", .runs_before = VNET_FEATURES ("ethernet-input"), }; -/* *INDENT-ON* */ static clib_error_t * bond_sw_interface_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags) diff --git a/src/vnet/buffer.h b/src/vnet/buffer.h index 144f62ac17a..2f34aa4b5fc 100644 --- a/src/vnet/buffer.h +++ b/src/vnet/buffer.h @@ -467,7 +467,7 @@ typedef struct } qos; u8 loop_counter; - u8 __unused[5]; + u8 pad[5]; /* unused */ /** * The L4 payload size set on input on GSO enabled interfaces diff --git a/src/vnet/classify/classify_api.c b/src/vnet/classify/classify_api.c index 9353a647277..fc57b006d37 100644 --- a/src/vnet/classify/classify_api.c +++ b/src/vnet/classify/classify_api.c @@ -115,9 +115,8 @@ static void vl_api_classify_pcap_set_table_t_handler u32 table_index = ntohl (mp->table_index); u32 sw_if_index = ntohl (mp->sw_if_index); - if (sw_if_index == ~0 - || sw_if_index >= vec_len (cm->classify_table_index_by_sw_if_index) - || (table_index != ~0 && pool_is_free_index (cm->tables, table_index))) + if (sw_if_index == ~0 || + (table_index != ~0 && pool_is_free_index (cm->tables, table_index))) { rv = VNET_API_ERROR_INVALID_VALUE; goto out; @@ -380,7 +379,6 @@ static void vl_api_classify_add_del_table_t_handler current_data_flag, current_data_offset, mp->is_add, mp->del_chain); out: - /* *INDENT-OFF* */ REPLY_MACRO2(VL_API_CLASSIFY_ADD_DEL_TABLE_REPLY, ({ if (rv == 0 && mp->is_add) @@ -397,7 +395,6 @@ out: rmp->new_table_index = ~0; } })); - /* *INDENT-ON* */ } static void vl_api_classify_add_del_session_t_handler @@ -534,12 +531,10 @@ vl_api_classify_table_ids_t_handler (vl_api_classify_table_ids_t * mp) u32 *table_ids = 0; u32 count; - /* *INDENT-OFF* */ pool_foreach (t, cm->tables) { vec_add1 (table_ids, ntohl(t - cm->tables)); } - /* *INDENT-ON* */ count = vec_len (table_ids); vl_api_classify_table_ids_reply_t *rmp; @@ -596,7 +591,6 @@ static void BAD_SW_IF_INDEX_LABEL; - /* *INDENT-OFF* */ REPLY_MACRO2(VL_API_CLASSIFY_TABLE_BY_INTERFACE_REPLY, ({ rmp->sw_if_index = ntohl(sw_if_index); @@ -604,7 +598,6 @@ static void rmp->ip4_table_id = ntohl(acl[IN_OUT_ACL_TABLE_IP4]); rmp->ip6_table_id = ntohl(acl[IN_OUT_ACL_TABLE_IP6]); })); - /* *INDENT-ON* */ vec_free (acl); } @@ -695,7 +688,6 @@ vl_api_classify_session_dump_t_handler (vl_api_classify_session_dump_t * mp) if (!reg) return; - /* *INDENT-OFF* */ pool_foreach (t, cm->tables) { if (table_id == t - cm->tables) @@ -729,7 +721,6 @@ vl_api_classify_session_dump_t_handler (vl_api_classify_session_dump_t * mp) break; } } - /* *INDENT-ON* */ } static void diff --git a/src/vnet/classify/flow_classify.c b/src/vnet/classify/flow_classify.c index afdadc66235..7197558a77a 100644 --- a/src/vnet/classify/flow_classify.c +++ b/src/vnet/classify/flow_classify.c @@ -150,7 +150,6 @@ set_flow_classify_command_fn (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (set_input_acl_command, static) = { .path = "set flow classify", .short_help = @@ -158,7 +157,6 @@ VLIB_CLI_COMMAND (set_input_acl_command, static) = { " [ip6-table <index>] [del]", .function = set_flow_classify_command_fn, }; -/* *INDENT-ON* */ static uword unformat_table_type (unformat_input_t * input, va_list * va) @@ -215,13 +213,11 @@ show_flow_classify_command_fn (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_flow_classify_command, static) = { .path = "show classify flow", .short_help = "show classify flow type [ip4|ip6]", .function = show_flow_classify_command_fn, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/classify/flow_classify_node.c b/src/vnet/classify/flow_classify_node.c index c0a29992fb4..a34bab6190b 100644 --- a/src/vnet/classify/flow_classify_node.c +++ b/src/vnet/classify/flow_classify_node.c @@ -279,7 +279,6 @@ VLIB_NODE_FN (ip4_flow_classify_node) (vlib_main_t * vm, return flow_classify_inline (vm, node, frame, FLOW_CLASSIFY_TABLE_IP4); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_flow_classify_node) = { .name = "ip4-flow-classify", .vector_size = sizeof (u32), @@ -291,7 +290,6 @@ VLIB_REGISTER_NODE (ip4_flow_classify_node) = { [FLOW_CLASSIFY_NEXT_INDEX_DROP] = "error-drop", }, }; -/* *INDENT-ON* */ VLIB_NODE_FN (ip6_flow_classify_node) (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -300,7 +298,6 @@ VLIB_NODE_FN (ip6_flow_classify_node) (vlib_main_t * vm, return flow_classify_inline (vm, node, frame, FLOW_CLASSIFY_TABLE_IP6); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip6_flow_classify_node) = { .name = "ip6-flow-classify", .vector_size = sizeof (u32), @@ -313,7 +310,6 @@ VLIB_REGISTER_NODE (ip6_flow_classify_node) = { }, }; -/* *INDENT-ON* */ static clib_error_t * diff --git a/src/vnet/classify/in_out_acl.c b/src/vnet/classify/in_out_acl.c index 752305e1cc2..af765139332 100644 --- a/src/vnet/classify/in_out_acl.c +++ b/src/vnet/classify/in_out_acl.c @@ -255,7 +255,6 @@ set_output_acl_command_fn (vlib_main_t * vm, * Note: Only one table index per API call is allowed. * */ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (set_input_acl_command, static) = { .path = "set interface input acl", .short_help = @@ -271,7 +270,6 @@ VLIB_CLI_COMMAND (set_output_acl_command, static) = { " [ip6-table <index>] [l2-table <index>] [del]", .function = set_output_acl_command_fn, }; -/* *INDENT-ON* */ clib_error_t * in_out_acl_init (vlib_main_t * vm) @@ -284,12 +282,10 @@ in_out_acl_init (vlib_main_t * vm) return 0; } -/* *INDENT-OFF* */ VLIB_INIT_FUNCTION (in_out_acl_init) = { .runs_after = VLIB_INITS("ip_in_out_acl_init"), }; -/* *INDENT-ON* */ uword unformat_acl_type (unformat_input_t * input, va_list * args) @@ -392,7 +388,6 @@ show_outacl_command_fn (vlib_main_t * vm, IN_OUT_ACL_OUTPUT_TABLE_GROUP); } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_inacl_command, static) = { .path = "show inacl", .short_help = "show inacl type [ip4|ip6|l2]", @@ -403,7 +398,6 @@ VLIB_CLI_COMMAND (show_outacl_command, static) = { .short_help = "show outacl type [ip4|ip6|l2]", .function = show_outacl_command_fn, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/classify/ip_classify.c b/src/vnet/classify/ip_classify.c index 9454ae91937..e8562c6912c 100644 --- a/src/vnet/classify/ip_classify.c +++ b/src/vnet/classify/ip_classify.c @@ -309,7 +309,6 @@ VLIB_NODE_FN (ip4_classify_node) (vlib_main_t * vm, } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_classify_node) = { .name = "ip4-classify", .vector_size = sizeof (u32), @@ -320,7 +319,6 @@ VLIB_REGISTER_NODE (ip4_classify_node) = { .n_next_nodes = 0, }; -/* *INDENT-ON* */ VLIB_NODE_FN (ip6_classify_node) (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -330,7 +328,6 @@ VLIB_NODE_FN (ip6_classify_node) (vlib_main_t * vm, } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip6_classify_node) = { .name = "ip6-classify", .vector_size = sizeof (u32), @@ -341,7 +338,6 @@ VLIB_REGISTER_NODE (ip6_classify_node) = { .n_next_nodes = 0, }; -/* *INDENT-ON* */ #ifndef CLIB_MARCH_VARIANT static clib_error_t * diff --git a/src/vnet/classify/pcap_classify.h b/src/vnet/classify/pcap_classify.h index e079816f62c..a4ebcd1241c 100644 --- a/src/vnet/classify/pcap_classify.h +++ b/src/vnet/classify/pcap_classify.h @@ -47,11 +47,11 @@ vnet_is_packet_pcaped (vnet_pcap_t *pp, vlib_buffer_t *b, u32 sw_if_index) return 0; /* wrong error */ if (filter_classify_table_index != ~0 && - vnet_is_packet_traced_inline (b, filter_classify_table_index, - 0 /* full classify */) != 1) + pp->current_filter_function (b, filter_classify_table_index, + 0 /* full classify */) != 1) return 0; /* not matching the filter, skip */ - return 1; /* success */ + return 1; } /* diff --git a/src/vnet/classify/policer_classify.c b/src/vnet/classify/policer_classify.c index 4cf12a24e9e..814adefc987 100644 --- a/src/vnet/classify/policer_classify.c +++ b/src/vnet/classify/policer_classify.c @@ -164,7 +164,6 @@ set_policer_classify_command_fn (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (set_policer_classify_command, static) = { .path = "set policer classify", .short_help = @@ -172,7 +171,6 @@ VLIB_CLI_COMMAND (set_policer_classify_command, static) = { " [ip6-table <index>] [l2-table <index>] [del]", .function = set_policer_classify_command_fn, }; -/* *INDENT-ON* */ static uword unformat_table_type (unformat_input_t * input, va_list * va) @@ -231,13 +229,11 @@ show_policer_classify_command_fn (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_policer_classify_command, static) = { .path = "show classify policer", .short_help = "show classify policer type [ip4|ip6|l2]", .function = show_policer_classify_command_fn, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/classify/trace_classify.h b/src/vnet/classify/trace_classify.h index bc25ecd0ff7..03421210d03 100644 --- a/src/vnet/classify/trace_classify.h +++ b/src/vnet/classify/trace_classify.h @@ -29,6 +29,8 @@ * @param u32 classify_table_index - classifier table index * @return 0 => no trace, 1 => trace, -1 => error */ +int vnet_is_packet_traced (vlib_buffer_t *b, u32 classify_table_index, + int func); static inline int vnet_is_packet_traced_inline (vlib_buffer_t * b, @@ -43,6 +45,9 @@ vnet_is_packet_traced_inline (vlib_buffer_t * b, if (func != 0) return -1; + if (classify_table_index == ~0) + return -1; + /* This will happen... */ if (pool_is_free_index (vcm->tables, classify_table_index)) return -1; diff --git a/src/vnet/classify/vnet_classify.c b/src/vnet/classify/vnet_classify.c index 305521be267..77c1c81f9c4 100644 --- a/src/vnet/classify/vnet_classify.c +++ b/src/vnet/classify/vnet_classify.c @@ -640,12 +640,10 @@ unlock: return rv; } -/* *INDENT-OFF* */ typedef CLIB_PACKED(struct { ethernet_header_t eh; ip4_header_t ip; }) classify_data_or_mask_t; -/* *INDENT-ON* */ u32 vnet_classify_hash_packet (const vnet_classify_table_t *t, u8 *h) @@ -777,8 +775,10 @@ vnet_classify_add_del_table (vnet_classify_main_t *cm, const u8 *mask, else /* update */ { vnet_classify_main_t *cm = &vnet_classify_main; - t = pool_elt_at_index (cm->tables, *table_index); + if (pool_is_free_index (cm->tables, *table_index)) + return VNET_API_ERROR_CLASSIFY_TABLE_NOT_FOUND; + t = pool_elt_at_index (cm->tables, *table_index); t->next_table_index = next_table_index; } return 0; @@ -1331,12 +1331,11 @@ unformat_classify_mask (unformat_input_t * input, va_list * args) return 0; } -#define foreach_l2_input_next \ -_(drop, DROP) \ -_(ethernet, ETHERNET_INPUT) \ -_(ip4, IP4_INPUT) \ -_(ip6, IP6_INPUT) \ -_(li, LI) +#define foreach_l2_input_next \ + _ (drop, DROP) \ + _ (ethernet, ETHERNET_INPUT) \ + _ (ip4, IP4_INPUT) \ + _ (ip6, IP6_INPUT) uword unformat_l2_input_next_index (unformat_input_t * input, va_list * args) @@ -1636,7 +1635,6 @@ classify_table_command_fn (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (classify_table, static) = { .path = "classify table", @@ -1648,7 +1646,6 @@ VLIB_CLI_COMMAND (classify_table, static) = "\n [del] [del-chain]", .function = classify_table_command_fn, }; -/* *INDENT-ON* */ static int filter_table_mask_compare (void *a1, void *a2) @@ -2052,7 +2049,7 @@ vlib_enable_disable_pkt_trace_filter (int enable) /*? * Construct an arbitrary set of packet classifier tables for use with - * "pcap rx | tx trace," and with the vpp packet tracer + * "pcap trace rx | tx," and with the vpp packet tracer * * Packets which match a rule in the classifier table chain * will be traced. The tables are automatically ordered so that @@ -2095,10 +2092,10 @@ vlib_enable_disable_pkt_trace_filter (int enable) * @cliexpar * Configuring the classify filter * - * Configure a simple classify filter, and configure pcap rx trace to use it: + * Configure a simple classify filter, and configure pcap trace rx to use it: * * @cliexcmd{classify filter rx mask l3 ip4 src match l3 ip4 src 192.168.1.11} - * <b><em>pcap rx trace on max 100 filter</em></b> + * <b><em>pcap trace rx max 100 filter</em></b> * * Configure another fairly simple filter * @@ -2124,7 +2121,6 @@ vlib_enable_disable_pkt_trace_filter (int enable) * The verbose form displays all of the match rules, with hit-counters * @cliexend ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (classify_filter, static) = { .path = "classify filter", @@ -2134,7 +2130,6 @@ VLIB_CLI_COMMAND (classify_filter, static) = " [buckets <nn>] [memory-size <n>]", .function = classify_filter_command_fn, }; -/* *INDENT-ON* */ static clib_error_t * show_classify_filter_command_fn (vlib_main_t * vm, @@ -2214,14 +2209,12 @@ show_classify_filter_command_fn (vlib_main_t * vm, } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_classify_filter, static) = { .path = "show classify filter", .short_help = "show classify filter [verbose [nn]]", .function = show_classify_filter_command_fn, }; -/* *INDENT-ON* */ u8 * format_vnet_classify_table (u8 *s, va_list *args) @@ -2284,13 +2277,11 @@ show_classify_tables_command_fn (vlib_main_t * vm, break; } - /* *INDENT-OFF* */ pool_foreach (t, cm->tables) { if (match_index == ~0 || (match_index == t - cm->tables)) vec_add1 (indices, t - cm->tables); } - /* *INDENT-ON* */ if (vec_len (indices)) { @@ -2310,13 +2301,11 @@ show_classify_tables_command_fn (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_classify_table_command, static) = { .path = "show classify tables", .short_help = "show classify tables [index <nn>]", .function = show_classify_tables_command_fn, }; -/* *INDENT-ON* */ uword unformat_l4_match (unformat_input_t * input, va_list * args) @@ -2783,9 +2772,9 @@ unformat_classify_match (unformat_input_t * input, va_list * args) int vnet_classify_add_del_session (vnet_classify_main_t *cm, u32 table_index, - const u8 *match, u32 hit_next_index, + const u8 *match, u16 hit_next_index, u32 opaque_index, i32 advance, u8 action, - u16 metadata, int is_add) + u32 metadata, int is_add) { vnet_classify_table_t *t; vnet_classify_entry_5_t _max_e __attribute__ ((aligned (16))); @@ -2929,7 +2918,6 @@ classify_session_command_fn (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (classify_session_command, static) = { .path = "classify session", .short_help = @@ -2939,7 +2927,6 @@ VLIB_CLI_COMMAND (classify_session_command, static) = { "\n [action set-ip4-fib-id|set-ip6-fib-id|set-sr-policy-index <n>] [del]", .function = classify_session_command_fn, }; -/* *INDENT-ON* */ static uword unformat_opaque_sw_if_index (unformat_input_t * input, va_list * args) @@ -3083,7 +3070,12 @@ vnet_is_packet_traced (vlib_buffer_t * b, u32 classify_table_index, int func) { return vnet_is_packet_traced_inline (b, classify_table_index, func); } - +VLIB_REGISTER_TRACE_FILTER_FUNCTION (vnet_is_packet_traced_fn, static) = { + .name = "vnet_is_packet_traced", + .description = "classifier based filter", + .priority = 50, + .function = vnet_is_packet_traced +}; #define TEST_CODE 0 @@ -3352,7 +3344,6 @@ test_classify_command_fn (vlib_main_t * vm, return error; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (test_classify_command, static) = { .path = "test classify", .short_help = @@ -3361,7 +3352,6 @@ VLIB_CLI_COMMAND (test_classify_command, static) = { " [churn-test]", .function = test_classify_command_fn, }; -/* *INDENT-ON* */ #endif /* TEST_CODE */ /* diff --git a/src/vnet/classify/vnet_classify.h b/src/vnet/classify/vnet_classify.h index 143833dfb20..768593c45af 100644 --- a/src/vnet/classify/vnet_classify.h +++ b/src/vnet/classify/vnet_classify.h @@ -89,15 +89,17 @@ typedef struct _vnet_classify_entry /* last heard time */ f64 last_heard; + u32 metadata; + + /* Graph node next index */ + u16 next_index; + + vnet_classify_action_t action; + /* Really only need 1 bit */ u8 flags; #define VNET_CLASSIFY_ENTRY_FREE (1<<0) - vnet_classify_action_t action; - u16 metadata; - /* Graph node next index */ - u32 next_index; - /* Must be aligned to a 16-octet boundary */ u32x4 key[0]; } vnet_classify_entry_t; @@ -586,9 +588,9 @@ vnet_classify_table_t *vnet_classify_new_table (vnet_classify_main_t *cm, u32 match_n_vectors); int vnet_classify_add_del_session (vnet_classify_main_t *cm, u32 table_index, - const u8 *match, u32 hit_next_index, + const u8 *match, u16 hit_next_index, u32 opaque_index, i32 advance, u8 action, - u16 metadata, int is_add); + u32 metadata, int is_add); int vnet_classify_add_del_table (vnet_classify_main_t *cm, const u8 *mask, u32 nbuckets, u32 memory_size, u32 skip, diff --git a/src/vnet/crypto/cli.c b/src/vnet/crypto/cli.c index 4ee14ac1100..2ca66f228c3 100644 --- a/src/vnet/crypto/cli.c +++ b/src/vnet/crypto/cli.c @@ -36,16 +36,13 @@ show_crypto_engines_command_fn (vlib_main_t * vm, } vlib_cli_output (vm, "%-20s%-8s%s", "Name", "Prio", "Description"); - /* *INDENT-OFF* */ vec_foreach (p, cm->engines) { vlib_cli_output (vm, "%-20s%-8u%s", p->name, p->priority, p->desc); } - /* *INDENT-ON* */ return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_crypto_engines_command, static) = { .path = "show crypto engines", @@ -145,20 +142,18 @@ show_crypto_handlers_command_fn (vlib_main_t * vm, "Chained"); for (i = 0; i < VNET_CRYPTO_N_ALGS; i++) - vlib_cli_output (vm, "%-16U%U", format_vnet_crypto_alg, i, + vlib_cli_output (vm, "%-20U%U", format_vnet_crypto_alg, i, format_vnet_crypto_handlers, i); return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_crypto_handlers_command, static) = { .path = "show crypto handlers", .short_help = "show crypto handlers", .function = show_crypto_handlers_command_fn, }; -/* *INDENT-ON* */ static clib_error_t * set_crypto_handler_command_fn (vlib_main_t * vm, @@ -209,13 +204,11 @@ set_crypto_handler_command_fn (vlib_main_t * vm, char *key; u8 *value; - /* *INDENT-OFF* */ hash_foreach_mem (key, value, cm->alg_index_by_name, ({ (void) value; rc += vnet_crypto_set_handler2 (key, engine, oct); })); - /* *INDENT-ON* */ if (rc) vlib_cli_output (vm, "failed to set crypto engine!"); @@ -241,7 +234,6 @@ done: return error; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (set_crypto_handler_command, static) = { .path = "set crypto handler", @@ -249,7 +241,6 @@ VLIB_CLI_COMMAND (set_crypto_handler_command, static) = " [simple|chained]", .function = set_crypto_handler_command_fn, }; -/* *INDENT-ON* */ static u8 * format_vnet_crypto_async_handlers (u8 * s, va_list * args) @@ -300,14 +291,12 @@ show_crypto_async_handlers_command_fn (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_crypto_async_handlers_command, static) = { .path = "show crypto async handlers", .short_help = "show crypto async handlers", .function = show_crypto_async_handlers_command_fn, }; -/* *INDENT-ON* */ static clib_error_t * @@ -316,7 +305,6 @@ show_crypto_async_status_command_fn (vlib_main_t * vm, vlib_cli_command_t * cmd) { vnet_crypto_main_t *cm = &crypto_main; - u32 skip_master = vlib_num_workers () > 0; vlib_thread_main_t *tm = vlib_get_thread_main (); unformat_input_t _line_input, *line_input = &_line_input; int i; @@ -324,12 +312,7 @@ show_crypto_async_status_command_fn (vlib_main_t * vm, if (unformat_user (input, unformat_line_input, line_input)) unformat_free (line_input); - vlib_cli_output (vm, "Crypto async dispatch mode: %s", - cm->dispatch_mode == - VNET_CRYPTO_ASYNC_DISPATCH_POLLING ? "POLLING" : - "INTERRUPT"); - - for (i = skip_master; i < tm->n_vlib_mains; i++) + for (i = 0; i < tm->n_vlib_mains; i++) { vlib_node_state_t state = vlib_node_get_state ( vlib_get_main_by_index (i), cm->crypto_node_index); @@ -343,14 +326,12 @@ show_crypto_async_status_command_fn (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_crypto_async_status_command, static) = { .path = "show crypto async status", .short_help = "show crypto async status", .function = show_crypto_async_status_command_fn, }; -/* *INDENT-ON* */ static clib_error_t * set_crypto_async_handler_command_fn (vlib_main_t * vm, @@ -394,13 +375,11 @@ set_crypto_async_handler_command_fn (vlib_main_t * vm, char *key; u8 *value; - /* *INDENT-OFF* */ hash_foreach_mem (key, value, cm->async_alg_index_by_name, ({ (void) value; rc += vnet_crypto_set_async_handler2 (key, engine); })); - /* *INDENT-ON* */ if (rc) vlib_cli_output (vm, "failed to set crypto engine!"); @@ -426,57 +405,52 @@ done: return error; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (set_crypto_async_handler_command, static) = { .path = "set crypto async handler", .short_help = "set crypto async handler type [type2 type3 ...] engine", .function = set_crypto_async_handler_command_fn, }; -/* *INDENT-ON* */ - -static inline void -print_crypto_async_dispatch_warning () -{ - clib_warning ("Switching dispatch mode might not work is some situations."); - clib_warning - ("Use 'show crypto async status' to verify that the nodes' states were set"); - clib_warning ("and if not, set 'crypto async dispatch' mode again."); -} static clib_error_t * -set_crypto_async_dispatch_polling_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) +set_crypto_async_dispatch_command_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) { - print_crypto_async_dispatch_warning (); - vnet_crypto_set_async_dispatch_mode (VNET_CRYPTO_ASYNC_DISPATCH_POLLING); - return 0; -} + unformat_input_t _line_input, *line_input = &_line_input; + clib_error_t *error = 0; + u8 adaptive = 0; + u8 mode = VLIB_NODE_STATE_INTERRUPT; -static clib_error_t * -set_crypto_async_dispatch_interrupt_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - print_crypto_async_dispatch_warning (); - vnet_crypto_set_async_dispatch_mode (VNET_CRYPTO_ASYNC_DISPATCH_INTERRUPT); - return 0; + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "polling")) + mode = VLIB_NODE_STATE_POLLING; + else if (unformat (line_input, "interrupt")) + mode = VLIB_NODE_STATE_INTERRUPT; + else if (unformat (line_input, "adaptive")) + adaptive = 1; + else + { + error = clib_error_return (0, "invalid params"); + goto done; + } + } + + vnet_crypto_set_async_dispatch (mode, adaptive); +done: + unformat_free (line_input); + return error; } -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (set_crypto_async_dispatch_polling_command, static) = -{ - .path = "set crypto async dispatch polling", - .short_help = "set crypto async dispatch polling|interrupt", - .function = set_crypto_async_dispatch_polling_command_fn, -}; -VLIB_CLI_COMMAND (set_crypto_async_dispatch_interrupt_command, static) = -{ - .path = "set crypto async dispatch interrupt", - .short_help = "set crypto async dispatch polling|interrupt", - .function = set_crypto_async_dispatch_interrupt_command_fn, +VLIB_CLI_COMMAND (set_crypto_async_dispatch_mode_command, static) = { + .path = "set crypto async dispatch mode", + .short_help = "set crypto async dispatch mode <polling|interrupt|adaptive>", + .function = set_crypto_async_dispatch_command_fn, }; + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/vnet/crypto/crypto.api b/src/vnet/crypto/crypto.api index 6eccd8524ba..8fec805dcfc 100644 --- a/src/vnet/crypto/crypto.api +++ b/src/vnet/crypto/crypto.api @@ -28,7 +28,8 @@ enum crypto_op_class_type:u8 CRYPTO_API_OP_BOTH, }; - /** \brief crypto: use polling or interrupt dispatch + /** \brief crypto: Use polling or interrupt dispatch. + Always unset the adaptive flag (that is why it is deprecated). @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param mode - dispatch mode @@ -36,11 +37,29 @@ enum crypto_op_class_type:u8 autoreply define crypto_set_async_dispatch { + option deprecated; + option replaced_by="crypto_set_async_dispatch_v2"; u32 client_index; u32 context; vl_api_crypto_dispatch_mode_t mode; }; + /** \brief crypto: Change the way crypto operations are dispatched. + Use adaptive (or not) mode, starting in polling or interrupt state. + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param mode - dispatch initial state + @param adaptive - whether on not the state shall change depending on load +*/ + +autoreply define crypto_set_async_dispatch_v2 +{ + u32 client_index; + u32 context; + vl_api_crypto_dispatch_mode_t mode; + bool adaptive; +}; + /** \brief crypto: set crypto handler @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request diff --git a/src/vnet/crypto/crypto.c b/src/vnet/crypto/crypto.c index 1c724a346c2..c8e7ca90c9d 100644 --- a/src/vnet/crypto/crypto.c +++ b/src/vnet/crypto/crypto.c @@ -192,13 +192,16 @@ vnet_crypto_is_set_handler (vnet_crypto_alg_t alg) vnet_crypto_op_id_t opt = 0; int i; - if (alg > vec_len (cm->algs)) + if (alg >= vec_len (cm->algs)) return 0; for (i = 0; i < VNET_CRYPTO_OP_N_TYPES; i++) if ((opt = cm->algs[alg].op_by_type[i]) != 0) break; + if (opt >= vec_len (cm->ops_handlers)) + return 0; + return NULL != cm->ops_handlers[opt]; } @@ -284,8 +287,6 @@ vnet_crypto_register_enqueue_handler (vlib_main_t *vm, u32 engine_index, vnet_crypto_async_op_data_t *otd = cm->async_opt_data + opt; vec_validate_aligned (cm->enqueue_handlers, VNET_CRYPTO_ASYNC_OP_N_IDS, CLIB_CACHE_LINE_BYTES); - vec_validate_aligned (cm->dequeue_handlers, VNET_CRYPTO_ASYNC_OP_N_IDS, - CLIB_CACHE_LINE_BYTES); if (!enqueue_hdl) return; @@ -370,6 +371,8 @@ vnet_crypto_register_dequeue_handler (vlib_main_t *vm, u32 engine_index, e->dequeue_handler = deq_fn; + vnet_crypto_update_cm_dequeue_handlers (); + return; } @@ -446,11 +449,9 @@ vnet_crypto_key_add (vlib_main_t * vm, vnet_crypto_alg_t alg, u8 * data, key->alg = alg; vec_validate_aligned (key->data, length - 1, CLIB_CACHE_LINE_BYTES); clib_memcpy (key->data, data, length); - /* *INDENT-OFF* */ vec_foreach (engine, cm->engines) if (engine->key_op_handler) engine->key_op_handler (vm, VNET_CRYPTO_KEY_OP_ADD, index); - /* *INDENT-ON* */ return index; } @@ -461,25 +462,34 @@ vnet_crypto_key_del (vlib_main_t * vm, vnet_crypto_key_index_t index) vnet_crypto_engine_t *engine; vnet_crypto_key_t *key = pool_elt_at_index (cm->keys, index); - /* *INDENT-OFF* */ vec_foreach (engine, cm->engines) if (engine->key_op_handler) engine->key_op_handler (vm, VNET_CRYPTO_KEY_OP_DEL, index); - /* *INDENT-ON* */ if (key->type == VNET_CRYPTO_KEY_TYPE_DATA) { - clib_memset (key->data, 0, vec_len (key->data)); + clib_memset (key->data, 0xfe, vec_len (key->data)); vec_free (key->data); } else if (key->type == VNET_CRYPTO_KEY_TYPE_LINK) { - key->index_crypto = key->index_integ = 0; + key->index_crypto = key->index_integ = ~0; } pool_put (cm->keys, key); } +void +vnet_crypto_key_update (vlib_main_t *vm, vnet_crypto_key_index_t index) +{ + vnet_crypto_main_t *cm = &crypto_main; + vnet_crypto_engine_t *engine; + + vec_foreach (engine, cm->engines) + if (engine->key_op_handler) + engine->key_op_handler (vm, VNET_CRYPTO_KEY_OP_MODIFY, index); +} + vnet_crypto_async_alg_t vnet_crypto_link_algs (vnet_crypto_alg_t crypto_alg, vnet_crypto_alg_t integ_alg) @@ -518,50 +528,13 @@ vnet_crypto_key_add_linked (vlib_main_t * vm, key->index_integ = index_integ; key->async_alg = linked_alg; - /* *INDENT-OFF* */ vec_foreach (engine, cm->engines) if (engine->key_op_handler) engine->key_op_handler (vm, VNET_CRYPTO_KEY_OP_ADD, index); - /* *INDENT-ON* */ return index; } -clib_error_t * -crypto_dispatch_enable_disable (int is_enable) -{ - vnet_crypto_main_t *cm = &crypto_main; - vlib_thread_main_t *tm = vlib_get_thread_main (); - u32 skip_master = vlib_num_workers () > 0, i; - vlib_node_state_t state = VLIB_NODE_STATE_DISABLED; - u8 state_change = 0; - - CLIB_MEMORY_STORE_BARRIER (); - if (is_enable && cm->async_refcnt > 0) - { - state_change = 1; - state = - cm->dispatch_mode == - VNET_CRYPTO_ASYNC_DISPATCH_POLLING ? VLIB_NODE_STATE_POLLING : - VLIB_NODE_STATE_INTERRUPT; - } - - if (!is_enable && cm->async_refcnt == 0) - { - state_change = 1; - state = VLIB_NODE_STATE_DISABLED; - } - - if (state_change) - for (i = skip_master; i < tm->n_vlib_mains; i++) - { - vlib_main_t *ovm = vlib_get_main_by_index (i); - if (state != vlib_node_get_state (ovm, cm->crypto_node_index)) - vlib_node_set_state (ovm, cm->crypto_node_index, state); - } - return 0; -} - static_always_inline void crypto_set_active_async_engine (vnet_crypto_async_op_data_t * od, vnet_crypto_async_op_id_t id, u32 ei) @@ -573,7 +546,6 @@ crypto_set_active_async_engine (vnet_crypto_async_op_data_t * od, { od->active_engine_index_async = ei; cm->enqueue_handlers[id] = ce->enqueue_handlers[id]; - cm->dequeue_handlers[id] = ce->dequeue_handler; } } @@ -585,9 +557,6 @@ vnet_crypto_set_async_handler2 (char *alg_name, char *engine) vnet_crypto_async_alg_data_t *ad; int i; - if (cm->async_refcnt) - return -EBUSY; - p = hash_get_mem (cm->async_alg_index_by_name, alg_name); if (!p) return -1; @@ -626,13 +595,11 @@ vnet_crypto_register_post_node (vlib_main_t * vm, char *post_node_name) if (!pn) return ~0; - /* *INDENT-OFF* */ - vec_foreach (cm->next_nodes, nn) - { - if (nn->node_idx == pn->index) - return nn->next_idx; - } - /* *INDENT-ON* */ + vec_foreach (nn, cm->next_nodes) + { + if (nn->node_idx == pn->index) + return nn->next_idx; + } vec_validate (cm->next_nodes, index); nn = vec_elt_at_index (cm->next_nodes, index); @@ -645,76 +612,19 @@ vnet_crypto_register_post_node (vlib_main_t * vm, char *post_node_name) } void -vnet_crypto_request_async_mode (int is_enable) -{ - vnet_crypto_main_t *cm = &crypto_main; - vlib_thread_main_t *tm = vlib_get_thread_main (); - u32 skip_master = vlib_num_workers () > 0, i; - vlib_node_state_t state = VLIB_NODE_STATE_DISABLED; - u8 state_change = 0; - - CLIB_MEMORY_STORE_BARRIER (); - if (is_enable && cm->async_refcnt == 0) - { - state_change = 1; - state = - cm->dispatch_mode == VNET_CRYPTO_ASYNC_DISPATCH_POLLING ? - VLIB_NODE_STATE_POLLING : VLIB_NODE_STATE_INTERRUPT; - } - if (!is_enable && cm->async_refcnt == 1) - { - state_change = 1; - state = VLIB_NODE_STATE_DISABLED; - } - - if (state_change) - { - - for (i = skip_master; i < tm->n_vlib_mains; i++) - { - vlib_main_t *ovm = vlib_get_main_by_index (i); - if (state != vlib_node_get_state (ovm, cm->crypto_node_index)) - vlib_node_set_state (ovm, cm->crypto_node_index, state); - } - - if (is_enable) - vnet_crypto_update_cm_dequeue_handlers (); - } - - if (is_enable) - cm->async_refcnt += 1; - else if (cm->async_refcnt > 0) - cm->async_refcnt -= 1; -} - -void -vnet_crypto_set_async_dispatch_mode (u8 mode) +vnet_crypto_set_async_dispatch (u8 mode, u8 adaptive) { - vnet_crypto_main_t *cm = &crypto_main; - u32 skip_master = vlib_num_workers () > 0, i; vlib_thread_main_t *tm = vlib_get_thread_main (); - vlib_node_state_t state = VLIB_NODE_STATE_DISABLED; + u32 i, node_index = crypto_main.crypto_node_index; + vlib_node_state_t state = + mode ? VLIB_NODE_STATE_INTERRUPT : VLIB_NODE_STATE_POLLING; - CLIB_MEMORY_STORE_BARRIER (); - cm->dispatch_mode = mode; - if (mode == VNET_CRYPTO_ASYNC_DISPATCH_INTERRUPT) - { - state = - cm->async_refcnt == 0 ? - VLIB_NODE_STATE_DISABLED : VLIB_NODE_STATE_INTERRUPT; - } - else if (mode == VNET_CRYPTO_ASYNC_DISPATCH_POLLING) - { - state = - cm->async_refcnt == 0 ? - VLIB_NODE_STATE_DISABLED : VLIB_NODE_STATE_POLLING; - } - - for (i = skip_master; i < tm->n_vlib_mains; i++) + for (i = vlib_num_workers () > 0; i < tm->n_vlib_mains; i++) { vlib_main_t *ovm = vlib_get_main_by_index (i); - if (state != vlib_node_get_state (ovm, cm->crypto_node_index)) - vlib_node_set_state (ovm, cm->crypto_node_index, state); + vlib_node_set_state (ovm, node_index, state); + vlib_node_set_flag (ovm, node_index, VLIB_NODE_FLAG_ADAPTIVE_MODE, + adaptive); } } @@ -813,15 +723,13 @@ vnet_crypto_init (vlib_main_t * vm) vlib_thread_main_t *tm = vlib_get_thread_main (); vnet_crypto_thread_t *ct = 0; - cm->dispatch_mode = VNET_CRYPTO_ASYNC_DISPATCH_POLLING; cm->engine_index_by_name = hash_create_string ( /* size */ 0, sizeof (uword)); cm->alg_index_by_name = hash_create_string (0, sizeof (uword)); cm->async_alg_index_by_name = hash_create_string (0, sizeof (uword)); vec_validate_aligned (cm->threads, tm->n_vlib_mains, CLIB_CACHE_LINE_BYTES); vec_foreach (ct, cm->threads) - pool_alloc_aligned (ct->frame_pool, VNET_CRYPTO_FRAME_POOL_SIZE, - CLIB_CACHE_LINE_BYTES); + pool_init_fixed (ct->frame_pool, VNET_CRYPTO_FRAME_POOL_SIZE); vec_validate (cm->algs, VNET_CRYPTO_N_ALGS); vec_validate (cm->async_algs, VNET_CRYPTO_N_ASYNC_ALGS); diff --git a/src/vnet/crypto/crypto.h b/src/vnet/crypto/crypto.h index e24ad1091f3..89cf70d19e3 100644 --- a/src/vnet/crypto/crypto.h +++ b/src/vnet/crypto/crypto.h @@ -33,11 +33,14 @@ _(AES_256_CTR, "aes-256-ctr", 32) /* CRYPTO_ID, PRETTY_NAME, KEY_LENGTH_IN_BYTES */ -#define foreach_crypto_aead_alg \ - _(AES_128_GCM, "aes-128-gcm", 16) \ - _(AES_192_GCM, "aes-192-gcm", 24) \ - _(AES_256_GCM, "aes-256-gcm", 32) \ - _(CHACHA20_POLY1305, "chacha20-poly1305", 32) +#define foreach_crypto_aead_alg \ + _ (AES_128_GCM, "aes-128-gcm", 16) \ + _ (AES_192_GCM, "aes-192-gcm", 24) \ + _ (AES_256_GCM, "aes-256-gcm", 32) \ + _ (AES_128_NULL_GMAC, "aes-128-null-gmac", 16) \ + _ (AES_192_NULL_GMAC, "aes-192-null-gmac", 24) \ + _ (AES_256_NULL_GMAC, "aes-256-null-gmac", 32) \ + _ (CHACHA20_POLY1305, "chacha20-poly1305", 32) #define foreach_crypto_hash_alg \ _ (SHA1, "sha-1") \ @@ -89,6 +92,12 @@ typedef enum _ (AES_192_GCM, "aes-192-gcm-aad12", 24, 16, 12) \ _ (AES_256_GCM, "aes-256-gcm-aad8", 32, 16, 8) \ _ (AES_256_GCM, "aes-256-gcm-aad12", 32, 16, 12) \ + _ (AES_128_NULL_GMAC, "aes-128-null-gmac-aad8", 16, 16, 8) \ + _ (AES_128_NULL_GMAC, "aes-128-null-gmac-aad12", 16, 16, 12) \ + _ (AES_192_NULL_GMAC, "aes-192-null-gmac-aad8", 24, 16, 8) \ + _ (AES_192_NULL_GMAC, "aes-192-null-gmac-aad12", 24, 16, 12) \ + _ (AES_256_NULL_GMAC, "aes-256-null-gmac-aad8", 32, 16, 8) \ + _ (AES_256_NULL_GMAC, "aes-256-null-gmac-aad12", 32, 16, 12) \ _ (CHACHA20_POLY1305, "chacha20-poly1305-aad8", 32, 16, 8) \ _ (CHACHA20_POLY1305, "chacha20-poly1305-aad12", 32, 16, 12) \ _ (CHACHA20_POLY1305, "chacha20-poly1305", 32, 16, 0) @@ -142,7 +151,6 @@ typedef enum VNET_CRYPTO_OP_N_STATUS, } vnet_crypto_op_status_t; -/* *INDENT-OFF* */ typedef enum { VNET_CRYPTO_ALG_NONE = 0, @@ -231,7 +239,6 @@ typedef enum #undef _ VNET_CRYPTO_N_OP_IDS, } vnet_crypto_op_id_t; -/* *INDENT-ON* */ typedef enum { @@ -260,9 +267,8 @@ typedef struct vnet_crypto_op_id_t op:16; vnet_crypto_op_status_t status:8; u8 flags; -#define VNET_CRYPTO_OP_FLAG_INIT_IV (1 << 0) -#define VNET_CRYPTO_OP_FLAG_HMAC_CHECK (1 << 1) -#define VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS (1 << 2) +#define VNET_CRYPTO_OP_FLAG_HMAC_CHECK (1 << 0) +#define VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS (1 << 1) union { @@ -338,7 +344,7 @@ typedef struct i16 crypto_start_offset; /* first buffer offset */ i16 integ_start_offset; /* adj total_length for integ, e.g.4 bytes for IPSec ESN */ - u16 integ_length_adj; + i16 integ_length_adj; vnet_crypto_op_status_t status : 8; u8 flags; /**< share same VNET_CRYPTO_OP_FLAG_* values */ } vnet_crypto_async_frame_elt_t; @@ -468,12 +474,8 @@ typedef struct uword *alg_index_by_name; uword *async_alg_index_by_name; vnet_crypto_async_alg_data_t *async_algs; - u32 async_refcnt; vnet_crypto_async_next_node_t *next_nodes; u32 crypto_node_index; -#define VNET_CRYPTO_ASYNC_DISPATCH_POLLING 0 -#define VNET_CRYPTO_ASYNC_DISPATCH_INTERRUPT 1 - u8 dispatch_mode; } vnet_crypto_main_t; extern vnet_crypto_main_t crypto_main; @@ -484,7 +486,7 @@ u32 vnet_crypto_process_chained_ops (vlib_main_t * vm, vnet_crypto_op_t ops[], u32 vnet_crypto_process_ops (vlib_main_t * vm, vnet_crypto_op_t ops[], u32 n_ops); - +void vnet_crypto_set_async_dispatch (u8 mode, u8 adaptive); int vnet_crypto_set_handler2 (char *ops_handler_name, char *engine, crypto_op_class_type_t oct); int vnet_crypto_is_set_handler (vnet_crypto_alg_t alg); @@ -492,6 +494,7 @@ int vnet_crypto_is_set_handler (vnet_crypto_alg_t alg); u32 vnet_crypto_key_add (vlib_main_t * vm, vnet_crypto_alg_t alg, u8 * data, u16 length); void vnet_crypto_key_del (vlib_main_t * vm, vnet_crypto_key_index_t index); +void vnet_crypto_key_update (vlib_main_t *vm, vnet_crypto_key_index_t index); /** * Use 2 created keys to generate new key for linked algs (cipher + integ) @@ -501,21 +504,13 @@ u32 vnet_crypto_key_add_linked (vlib_main_t * vm, vnet_crypto_key_index_t index_crypto, vnet_crypto_key_index_t index_integ); -clib_error_t *crypto_dispatch_enable_disable (int is_enable); - int vnet_crypto_set_async_handler2 (char *alg_name, char *engine); int vnet_crypto_is_set_async_handler (vnet_crypto_async_op_id_t opt); -void vnet_crypto_request_async_mode (int is_enable); - -void vnet_crypto_set_async_dispatch_mode (u8 mode); - vnet_crypto_async_alg_t vnet_crypto_link_algs (vnet_crypto_alg_t crypto_alg, vnet_crypto_alg_t integ_alg); -clib_error_t *crypto_dispatch_enable_disable (int is_enable); - format_function_t format_vnet_crypto_alg; format_function_t format_vnet_crypto_engine; format_function_t format_vnet_crypto_op; @@ -569,12 +564,16 @@ vnet_crypto_async_get_frame (vlib_main_t * vm, vnet_crypto_async_op_id_t opt) vnet_crypto_thread_t *ct = cm->threads + vm->thread_index; vnet_crypto_async_frame_t *f = NULL; - pool_get_aligned (ct->frame_pool, f, CLIB_CACHE_LINE_BYTES); - if (CLIB_DEBUG > 0) - clib_memset (f, 0xfe, sizeof (*f)); - f->state = VNET_CRYPTO_FRAME_STATE_NOT_PROCESSED; - f->op = opt; - f->n_elts = 0; + if (PREDICT_TRUE (pool_free_elts (ct->frame_pool))) + { + pool_get_aligned (ct->frame_pool, f, CLIB_CACHE_LINE_BYTES); +#if CLIB_DEBUG > 0 + clib_memset (f, 0xfe, sizeof (*f)); +#endif + f->state = VNET_CRYPTO_FRAME_STATE_NOT_PROCESSED; + f->op = opt; + f->n_elts = 0; + } return f; } @@ -594,7 +593,8 @@ vnet_crypto_async_submit_open_frame (vlib_main_t * vm, { vnet_crypto_main_t *cm = &crypto_main; vlib_thread_main_t *tm = vlib_get_thread_main (); - u32 i = vlib_num_workers () > 0; + u32 i; + vlib_node_t *n; frame->state = VNET_CRYPTO_FRAME_STATE_PENDING; frame->enqueue_thread_index = vm->thread_index; @@ -609,9 +609,10 @@ vnet_crypto_async_submit_open_frame (vlib_main_t * vm, if (PREDICT_TRUE (ret == 0)) { - if (cm->dispatch_mode == VNET_CRYPTO_ASYNC_DISPATCH_INTERRUPT) + n = vlib_get_node (vm, cm->crypto_node_index); + if (n->state == VLIB_NODE_STATE_INTERRUPT) { - for (; i < tm->n_vlib_mains; i++) + for (i = 0; i < tm->n_vlib_mains; i++) vlib_node_set_interrupt_pending (vlib_get_main_by_index (i), cm->crypto_node_index); } @@ -628,7 +629,7 @@ static_always_inline void vnet_crypto_async_add_to_frame (vlib_main_t *vm, vnet_crypto_async_frame_t *f, u32 key_index, u32 crypto_len, i16 integ_len_adj, i16 crypto_start_offset, - u16 integ_start_offset, u32 buffer_index, + i16 integ_start_offset, u32 buffer_index, u16 next_node, u8 *iv, u8 *tag, u8 *aad, u8 flags) { diff --git a/src/vnet/crypto/crypto_api.c b/src/vnet/crypto/crypto_api.c index 49b12a3d377..e701864a5ba 100644 --- a/src/vnet/crypto/crypto_api.c +++ b/src/vnet/crypto/crypto_api.c @@ -46,12 +46,24 @@ vl_api_crypto_set_async_dispatch_t_handler (vl_api_crypto_set_async_dispatch_t vl_api_crypto_set_async_dispatch_reply_t *rmp; int rv = 0; - vnet_crypto_set_async_dispatch_mode ((u8) mp->mode); + vnet_crypto_set_async_dispatch ((u8) mp->mode, 0); REPLY_MACRO (VL_API_CRYPTO_SET_ASYNC_DISPATCH_REPLY); } static void +vl_api_crypto_set_async_dispatch_v2_t_handler ( + vl_api_crypto_set_async_dispatch_v2_t *mp) +{ + vl_api_crypto_set_async_dispatch_v2_reply_t *rmp; + int rv = 0; + + vnet_crypto_set_async_dispatch ((u8) mp->mode, mp->adaptive ? 1 : 0); + + REPLY_MACRO (VL_API_CRYPTO_SET_ASYNC_DISPATCH_V2_REPLY); +} + +static void vl_api_crypto_set_handler_t_handler (vl_api_crypto_set_handler_t * mp) { vl_api_crypto_set_handler_reply_t *rmp; diff --git a/src/vnet/crypto/node.c b/src/vnet/crypto/node.c index 216b924f96e..ee7f344ce68 100644 --- a/src/vnet/crypto/node.c +++ b/src/vnet/crypto/node.c @@ -135,8 +135,11 @@ crypto_dequeue_frame (vlib_main_t * vm, vlib_node_runtime_t * node, vnet_crypto_async_free_frame (vm, cf); } /* signal enqueue-thread to dequeue the processed frame (n_elts>0) */ - if (cm->dispatch_mode == VNET_CRYPTO_ASYNC_DISPATCH_INTERRUPT - && n_elts > 0) + if (n_elts > 0 && + ((node->state == VLIB_NODE_STATE_POLLING && + (node->flags & + VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE)) || + node->state == VLIB_NODE_STATE_INTERRUPT)) { vlib_node_set_interrupt_pending ( vlib_get_main_by_index (enqueue_thread_idx), @@ -161,24 +164,32 @@ VLIB_NODE_FN (crypto_dispatch_node) (vlib_main_t * vm, u32 n_dispatched = 0, n_cache = 0, index; vec_foreach_index (index, cm->dequeue_handlers) { - if (PREDICT_FALSE (cm->dequeue_handlers[index] == 0)) - continue; n_cache = crypto_dequeue_frame ( vm, node, ct, cm->dequeue_handlers[index], n_cache, &n_dispatched); } - /* *INDENT-ON* */ if (n_cache) vlib_buffer_enqueue_to_next_vec (vm, node, &ct->buffer_indices, &ct->nexts, n_cache); + /* if there are still pending tasks and node in interrupt mode, + sending current thread signal to dequeue next loop */ + if (pool_elts (ct->frame_pool) > 0 && + ((node->state == VLIB_NODE_STATE_POLLING && + (node->flags & + VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE)) || + node->state == VLIB_NODE_STATE_INTERRUPT)) + { + vlib_node_set_interrupt_pending (vm, node->node_index); + } + return n_dispatched; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (crypto_dispatch_node) = { .name = "crypto-dispatch", .type = VLIB_NODE_TYPE_INPUT, - .state = VLIB_NODE_STATE_DISABLED, + .flags = VLIB_NODE_FLAG_ADAPTIVE_MODE, + .state = VLIB_NODE_STATE_INTERRUPT, .format_trace = format_crypto_dispatch_trace, .n_errors = ARRAY_LEN(vnet_crypto_async_error_strings), @@ -192,7 +203,6 @@ VLIB_REGISTER_NODE (crypto_dispatch_node) = { #undef _ }, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/dev/api.c b/src/vnet/dev/api.c new file mode 100644 index 00000000000..114b63d6662 --- /dev/null +++ b/src/vnet/dev/api.c @@ -0,0 +1,275 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2023 Cisco Systems, Inc. + */ + +#include "vppinfra/pool.h" +#include <vnet/vnet.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/dev/dev.h> +#include <vnet/dev/counters.h> +#include <vnet/dev/log.h> +#include <vnet/dev/api.h> + +VLIB_REGISTER_LOG_CLASS (dev_log, static) = { + .class_name = "dev", + .subclass_name = "api", +}; + +static int +_vnet_dev_queue_size_validate (u32 size, vnet_dev_queue_config_t c) +{ + if (size < c.min_size) + return 0; + if (size > c.max_size) + return 0; + if (c.size_is_power_of_two && count_set_bits (size) != 1) + return 0; + if (c.multiplier && size % c.multiplier) + return 0; + + return 1; +} + +vnet_dev_rv_t +vnet_dev_api_attach (vlib_main_t *vm, vnet_dev_api_attach_args_t *args) +{ + vnet_dev_main_t *dm = &vnet_dev_main; + vnet_dev_t *dev = 0; + vnet_dev_rv_t rv = VNET_DEV_OK; + vnet_dev_bus_t *bus; + vnet_dev_driver_t *driver; + void *bus_dev_info = 0; + u8 *dev_desc = 0; + + log_debug (0, "%s driver %s flags '%U' args '%v'", args->device_id, + args->driver_name, format_vnet_dev_flags, &args->flags, + args->args); + + if (vnet_dev_by_id (args->device_id)) + return VNET_DEV_ERR_ALREADY_IN_USE; + + bus = vnet_dev_find_device_bus (vm, args->device_id); + if (!bus) + { + log_err (dev, "unknown bus"); + rv = VNET_DEV_ERR_INVALID_BUS; + goto done; + } + + bus_dev_info = vnet_dev_get_device_info (vm, args->device_id); + if (!bus_dev_info) + { + log_err (dev, "invalid or unsupported device id"); + rv = VNET_DEV_ERR_INVALID_DEVICE_ID; + goto done; + } + + vec_foreach (driver, dm->drivers) + { + if (args->driver_name[0] && + strcmp (args->driver_name, driver->registration->name)) + continue; + if (driver->ops.probe && + (dev_desc = driver->ops.probe (vm, bus->index, bus_dev_info))) + break; + } + + if (!dev_desc) + { + log_err (dev, "driver not available for %s", args->device_id); + rv = VNET_DEV_ERR_DRIVER_NOT_AVAILABLE; + goto done; + } + + dev = vnet_dev_alloc (vm, args->device_id, driver); + if (!dev) + { + log_err (dev, "dev alloc failed for %s", args->device_id); + rv = VNET_DEV_ERR_BUG; + goto done; + } + dev->description = dev_desc; + + if (driver->registration->args) + for (vnet_dev_arg_t *a = driver->registration->args; + a->type != VNET_DEV_ARG_END; a++) + vec_add1 (dev->args, *a); + + if (args->args) + { + if ((rv = vnet_dev_arg_parse (vm, dev, dev->args, args->args)) != + VNET_DEV_OK) + goto done; + } + + if ((args->flags.e & VNET_DEV_F_NO_STATS) == 0) + dev->poll_stats = 1; + + log_debug (0, "found '%v'", dev->description); + + rv = vnet_dev_process_call_op (vm, dev, vnet_dev_init); + +done: + if (bus_dev_info) + bus->ops.free_device_info (vm, bus_dev_info); + + if (rv != VNET_DEV_OK && dev) + vnet_dev_process_call_op_no_rv (vm, dev, vnet_dev_free); + else if (dev) + args->dev_index = dev->index; + + return rv; +} + +vnet_dev_rv_t +vnet_dev_api_detach (vlib_main_t *vm, vnet_dev_api_detach_args_t *args) +{ + vnet_dev_t *dev = vnet_dev_by_index (args->dev_index); + + log_debug (dev, "detach"); + + if (dev) + return vnet_dev_process_call_op_no_rv (vm, dev, vnet_dev_detach); + + return VNET_DEV_ERR_NOT_FOUND; +} + +vnet_dev_rv_t +vnet_dev_api_reset (vlib_main_t *vm, vnet_dev_api_reset_args_t *args) +{ + vnet_dev_t *dev = vnet_dev_by_id (args->device_id); + + log_debug (dev, "detach"); + + if (!dev) + return VNET_DEV_ERR_NOT_FOUND; + + if (dev->ops.reset) + return VNET_DEV_ERR_NOT_SUPPORTED; + + return vnet_dev_process_call_op (vm, dev, vnet_dev_reset); +} + +vnet_dev_rv_t +vnet_dev_api_create_port_if (vlib_main_t *vm, + vnet_dev_api_create_port_if_args_t *args) +{ + vnet_dev_t *dev = vnet_dev_by_index (args->dev_index); + vnet_dev_port_t *port = 0; + u16 n_threads = vlib_get_n_threads (); + int default_is_intr_mode; + vnet_dev_rv_t rv; + + log_debug (dev, + "create_port_if: dev_index %u port %u intf_name '%s' num_rx_q %u " + "num_tx_q %u rx_q_sz %u tx_q_sz %u, flags '%U' args '%v'", + args->dev_index, args->port_id, args->intf_name, + args->num_rx_queues, args->num_tx_queues, args->rx_queue_size, + args->tx_queue_size, format_vnet_dev_port_flags, &args->flags, + args->args); + + if (dev == 0) + return VNET_DEV_ERR_NOT_FOUND; + + foreach_vnet_dev_port (p, dev) + if (p->port_id == args->port_id) + { + port = p; + break; + } + + if (!port) + return VNET_DEV_ERR_INVALID_DEVICE_ID; + + if (port->interface_created) + return VNET_DEV_ERR_ALREADY_EXISTS; + + if (args->args) + { + rv = vnet_dev_arg_parse (vm, dev, port->args, args->args); + if (rv != VNET_DEV_OK) + return rv; + } + + default_is_intr_mode = (args->flags.e & VNET_DEV_PORT_F_INTERRUPT_MODE) != 0; + if (default_is_intr_mode && port->attr.caps.interrupt_mode == 0) + { + log_err (dev, "interrupt mode requested and port doesn't support it"); + return VNET_DEV_ERR_NOT_SUPPORTED; + } + + if (args->num_rx_queues) + { + if (args->num_rx_queues > port->attr.max_rx_queues) + return VNET_DEV_ERR_INVALID_NUM_RX_QUEUES; + port->intf.num_rx_queues = args->num_rx_queues; + } + else + port->intf.num_rx_queues = clib_min (port->attr.max_tx_queues, 1); + + if (args->num_tx_queues) + { + if (args->num_tx_queues > port->attr.max_tx_queues) + return VNET_DEV_ERR_INVALID_NUM_TX_QUEUES; + port->intf.num_tx_queues = args->num_tx_queues; + } + else + port->intf.num_tx_queues = clib_min (port->attr.max_tx_queues, n_threads); + + if (args->rx_queue_size) + { + if (!_vnet_dev_queue_size_validate (args->rx_queue_size, + port->rx_queue_config)) + return VNET_DEV_ERR_INVALID_RX_QUEUE_SIZE; + port->intf.rxq_sz = args->rx_queue_size; + } + else + port->intf.rxq_sz = port->rx_queue_config.default_size; + + if (args->tx_queue_size) + { + if (!_vnet_dev_queue_size_validate (args->tx_queue_size, + port->tx_queue_config)) + return VNET_DEV_ERR_INVALID_TX_QUEUE_SIZE; + port->intf.txq_sz = args->tx_queue_size; + } + else + port->intf.txq_sz = port->tx_queue_config.default_size; + + clib_memcpy (port->intf.name, args->intf_name, sizeof (port->intf.name)); + port->intf.default_is_intr_mode = default_is_intr_mode; + + rv = vnet_dev_process_call_port_op (vm, port, vnet_dev_port_if_create); + args->sw_if_index = (rv == VNET_DEV_OK) ? port->intf.sw_if_index : ~0; + + return rv; +} + +vnet_dev_rv_t +vnet_dev_api_remove_port_if (vlib_main_t *vm, + vnet_dev_api_remove_port_if_args_t *args) +{ + vnet_dev_main_t *dm = &vnet_dev_main; + vnet_main_t *vnm = vnet_get_main (); + vnet_sw_interface_t *si; + vnet_hw_interface_t *hi; + vnet_dev_port_t *port; + + si = vnet_get_sw_interface_or_null (vnm, args->sw_if_index); + if (!si) + return VNET_DEV_ERR_UNKNOWN_INTERFACE; + + hi = vnet_get_hw_interface_or_null (vnm, si->hw_if_index); + if (!hi) + return VNET_DEV_ERR_UNKNOWN_INTERFACE; + + if (pool_is_free_index (dm->ports_by_dev_instance, hi->dev_instance)) + return VNET_DEV_ERR_UNKNOWN_INTERFACE; + + port = vnet_dev_get_port_from_dev_instance (hi->dev_instance); + + if (port->intf.hw_if_index != si->hw_if_index) + return VNET_DEV_ERR_UNKNOWN_INTERFACE; + + return vnet_dev_process_call_port_op (vm, port, vnet_dev_port_if_remove); +} diff --git a/src/vnet/dev/api.h b/src/vnet/dev/api.h new file mode 100644 index 00000000000..1b7bf27d62a --- /dev/null +++ b/src/vnet/dev/api.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2023 Cisco Systems, Inc. + */ + +#ifndef _VNET_DEV_API_H_ +#define _VNET_DEV_API_H_ + +#include <vppinfra/clib.h> +#include <vnet/vnet.h> +#include <vnet/dev/types.h> + +typedef struct +{ + vnet_dev_device_id_t device_id; + vnet_dev_driver_name_t driver_name; + vnet_dev_flags_t flags; + u8 *args; + + /* return */ + u32 dev_index; +} vnet_dev_api_attach_args_t; + +vnet_dev_rv_t vnet_dev_api_attach (vlib_main_t *, + vnet_dev_api_attach_args_t *); + +typedef struct +{ + u32 dev_index; +} vnet_dev_api_detach_args_t; +vnet_dev_rv_t vnet_dev_api_detach (vlib_main_t *, + vnet_dev_api_detach_args_t *); + +typedef struct +{ + vnet_dev_device_id_t device_id; +} vnet_dev_api_reset_args_t; +vnet_dev_rv_t vnet_dev_api_reset (vlib_main_t *, vnet_dev_api_reset_args_t *); + +typedef struct +{ + u32 dev_index; + vnet_dev_if_name_t intf_name; + u16 num_rx_queues; + u16 num_tx_queues; + u16 rx_queue_size; + u16 tx_queue_size; + vnet_dev_port_id_t port_id; + vnet_dev_port_flags_t flags; + u8 *args; + + /* return */ + u32 sw_if_index; +} vnet_dev_api_create_port_if_args_t; + +vnet_dev_rv_t +vnet_dev_api_create_port_if (vlib_main_t *, + vnet_dev_api_create_port_if_args_t *); + +typedef struct +{ + u32 sw_if_index; +} vnet_dev_api_remove_port_if_args_t; + +vnet_dev_rv_t +vnet_dev_api_remove_port_if (vlib_main_t *, + vnet_dev_api_remove_port_if_args_t *); + +#endif /* _VNET_DEV_API_H_ */ diff --git a/src/vnet/dev/args.c b/src/vnet/dev/args.c new file mode 100644 index 00000000000..e302517cc61 --- /dev/null +++ b/src/vnet/dev/args.c @@ -0,0 +1,237 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2023 Cisco Systems, Inc. + */ + +#include "vppinfra/pool.h" +#include <vnet/vnet.h> +#include <vnet/dev/dev.h> +#include <vnet/dev/counters.h> +#include <vnet/dev/log.h> +#include <vnet/dev/types.h> +#include <vppinfra/format_table.h> + +VLIB_REGISTER_LOG_CLASS (dev_log, static) = { + .class_name = "dev", + .subclass_name = "args", +}; + +void +vnet_dev_arg_clear_value (vnet_dev_arg_t *a) +{ + if (a->type == VNET_DEV_ARG_TYPE_STRING) + vec_free (a->val.string); + a->val = (typeof (a->val)){}; + a->val_set = 0; +} + +void +vnet_dev_arg_free (vnet_dev_arg_t **vp) +{ + vnet_dev_arg_t *v; + vec_foreach (v, *vp) + vnet_dev_arg_clear_value (v); + vec_free (*vp); +} + +vnet_dev_rv_t +vnet_dev_arg_parse (vlib_main_t *vm, vnet_dev_t *dev, vnet_dev_arg_t *args, + u8 *str) +{ + vnet_dev_rv_t rv = VNET_DEV_OK; + unformat_input_t in; + u8 *name = 0; + u8 *err = 0; + + log_debug (dev, "input '%v'", str); + if (args == 0) + return rv; + + unformat_init_string (&in, (char *) str, vec_len (str)); + + while (unformat (&in, "%U=", unformat_token, "a-zA-Z0-9_", &name)) + { + vnet_dev_arg_t *a = args; + vec_add1 (name, 0); + while (a < vec_end (args)) + if (strcmp (a->name, (char *) name) == 0) + break; + else + a++; + + if (a->type == VNET_DEV_ARG_TYPE_BOOL) + { + + if (unformat (&in, "true") || unformat (&in, "1") || + unformat (&in, "on") || unformat (&in, "yes")) + a->val.boolean = 1; + else if (unformat (&in, "false") || unformat (&in, "0") || + unformat (&in, "off") || unformat (&in, "no")) + a->val.boolean = 0; + else + { + log_err (dev, "unable to parse args: %U", format_unformat_error, + &in); + err = format ( + 0, + "boolean value expected ('yes', 'no', '0', '1', 'on', " + "'off', 'true' or 'false') for argument '%s', found '%U'", + a->name, format_unformat_error, &in); + goto done; + } + } + else if (a->type == VNET_DEV_ARG_TYPE_UINT32) + { + u32 val, min = 0, max = CLIB_U32_MAX; + if (!unformat (&in, "%u", &val)) + { + err = format (0, + "unsigned integer in range %u - %u expected for " + "argument '%s', found '%U'", + min, max, a->name, format_unformat_error, &in); + goto done; + } + + if (a->min || a->max) + { + min = a->min; + max = a->max; + } + + if (val < min || val > max) + { + err = format (0, + "unsigned integer in range %u - %u expected for " + "argument '%s', found '%u'", + min, max, a->name, val); + goto done; + } + a->val.uint32 = val; + } + else if (a->type == VNET_DEV_ARG_TYPE_STRING) + { + if (!unformat (&in, "%U", unformat_double_quoted_string, + &a->val.string)) + { + err = format ( + 0, + "double quoted string expected for argument '%s', found '%U'", + a->name, format_unformat_error, &in); + goto done; + } + + if (a->min && vec_len (a->val.string) < a->min) + { + err = + format (0, "string '%v' too short, must be at least %u chars", + a->val.string, a->min); + goto done; + } + if (a->max && vec_len (a->val.string) > a->max) + { + err = format ( + 0, "string '%v' too long, must be no longer than %u chars", + a->val.string, a->max); + goto done; + } + } + else + { + err = format (0, "unknown argument '%s'", name); + goto done; + } + + a->val_set = 1; + log_debug (dev, "name '%s' type %U value %U", name, + format_vnet_dev_arg_type, a->type, format_vnet_dev_arg_value, + a->type, &a->val); + vec_free (name); + unformat (&in, ","); + } + + if (unformat_check_input (&in) != UNFORMAT_END_OF_INPUT) + err = format (0, "unable to parse argument name '%U'", + format_unformat_error, &in); + +done: + if (err) + { + vnet_dev_arg_t *a = 0; + log_err (dev, "%v", err); + vec_free (err); + vec_foreach (a, args) + vnet_dev_arg_clear_value (a); + rv = VNET_DEV_ERR_INVALID_ARG; + } + + vec_free (name); + unformat_free (&in); + return rv; +} + +u8 * +format_vnet_dev_arg_type (u8 *s, va_list *args) +{ + vnet_dev_arg_type_t t = va_arg (*args, u32); + switch (t) + { +#define _(n, f, val) \ + case VNET_DEV_ARG_TYPE_##n: \ + return format (s, #n); + foreach_vnet_dev_arg_type +#undef _ + default : ASSERT (0); + break; + } + return s; +} + +u8 * +format_vnet_dev_arg_value (u8 *s, va_list *args) +{ + vnet_dev_arg_type_t t = va_arg (*args, u32); + vnet_dev_arg_value_t *v = va_arg (*args, vnet_dev_arg_value_t *); + + switch (t) + { +#define _(n, f, value) \ + case VNET_DEV_ARG_TYPE_##n: \ + s = format (s, f, v->value); \ + break; + foreach_vnet_dev_arg_type +#undef _ + default : break; + } + return s; +} + +u8 * +format_vnet_dev_args (u8 *s, va_list *va) +{ + vnet_dev_arg_t *a, *args = va_arg (*va, vnet_dev_arg_t *); + table_t t = { .no_ansi = 1 }; + + table_add_header_col (&t, 4, "Name", "Value", "Default", "Description"); + table_set_cell_align (&t, -1, 0, TTAA_LEFT); + table_set_cell_align (&t, -1, 3, TTAA_LEFT); + vec_foreach (a, args) + { + int r = a - args; + table_format_cell (&t, r, 0, "%s", a->name); + if (a->val_set) + table_format_cell (&t, r, 1, "%U", format_vnet_dev_arg_value, a->type, + &a->val); + else + table_format_cell (&t, r, 1, "<not set>"); + + table_format_cell (&t, r, 2, "%U", format_vnet_dev_arg_value, a->type, + &a->default_val); + table_format_cell (&t, r, 3, "%s", a->desc); + table_set_cell_align (&t, r, 0, TTAA_LEFT); + table_set_cell_align (&t, r, 3, TTAA_LEFT); + } + + s = format (s, "%U", format_table, &t); + + table_free (&t); + return s; +} diff --git a/src/vnet/dev/args.h b/src/vnet/dev/args.h new file mode 100644 index 00000000000..a256cfe8e0e --- /dev/null +++ b/src/vnet/dev/args.h @@ -0,0 +1,74 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2023 Cisco Systems, Inc. + */ + +#ifndef _VNET_DEV_ARGS_H_ +#define _VNET_DEV_ARGS_H_ + +#include <vppinfra/clib.h> +#include <vnet/dev/errors.h> + +#define foreach_vnet_dev_arg_type \ + _ (BOOL, "%u", boolean) \ + _ (UINT32, "%u", uint32) \ + _ (STRING, "\'%v\'", string) + +typedef enum +{ + VNET_DEV_ARG_END, +#define _(n, f, v) VNET_DEV_ARG_TYPE_##n, + foreach_vnet_dev_arg_type +#undef _ +} __clib_packed vnet_dev_arg_type_t; + +typedef union +{ + u8 boolean; + u32 uint32; + u8 *string; +} vnet_dev_arg_value_t; + +typedef struct +{ + char *name; + char *desc; + vnet_dev_arg_type_t type; + u8 val_set; + u32 min; + u32 max; + u64 id; + vnet_dev_arg_value_t val; + vnet_dev_arg_value_t default_val; +} vnet_dev_arg_t; + +#define VNET_DEV_ARG_BOOL(ud, n, d, ...) \ + { \ + .type = VNET_DEV_ARG_TYPE_BOOL, .id = ud, .name = n, .desc = d, \ + __VA_ARGS__ \ + } +#define VNET_DEV_ARG_UINT32(ud, n, d, ...) \ + { \ + .type = VNET_DEV_ARG_TYPE_UINT32, .id = ud, .name = n, .desc = d, \ + __VA_ARGS__ \ + } +#define VNET_DEV_ARG_STRING(ud, n, d, ...) \ + { \ + .type = VNET_DEV_ARG_TYPE_STRING, .id = ud, .name = n, .desc = d, \ + __VA_ARGS__ \ + } +#define VNET_DEV_ARG_END() \ + { \ + .type = VNET_DEV_ARG_END \ + } + +#define VNET_DEV_ARGS(...) \ + (vnet_dev_arg_t[]) { __VA_ARGS__, VNET_DEV_ARG_END () } + +#define foreach_vnet_dev_args(a, d) \ + for (typeof ((d)->args[0]) *(a) = (d)->args; (a) < vec_end ((d)->args); \ + (a)++) +#define foreach_vnet_dev_port_args(a, p) \ + for (typeof ((p)->args[0]) *(a) = (p)->args; (a) < vec_end ((p)->args); \ + (a)++) + +#endif /* _VNET_DEV_ARGS_H_ */ diff --git a/src/vnet/dev/cli.c b/src/vnet/dev/cli.c new file mode 100644 index 00000000000..53be4483183 --- /dev/null +++ b/src/vnet/dev/cli.c @@ -0,0 +1,331 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2023 Cisco Systems, Inc. + */ + +#include <vnet/vnet.h> +#include <vnet/dev/dev.h> +#include <vnet/dev/counters.h> +#include <vnet/dev/api.h> + +static clib_error_t * +device_attach_cmd_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + vnet_dev_api_attach_args_t a = {}; + vnet_dev_rv_t rv; + + if (!unformat_user (input, unformat_c_string_array, a.device_id, + sizeof (a.device_id))) + return clib_error_return (0, "please specify valid device id"); + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (!a.driver_name[0] && + unformat (input, "driver %U", unformat_c_string_array, a.driver_name, + sizeof (a.driver_name))) + ; + else if (!a.flags.n && + unformat (input, "flags %U", unformat_vnet_dev_flags, &a.flags)) + ; + else if (!a.args && unformat (input, "args %v", &a.args)) + ; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + + rv = vnet_dev_api_attach (vm, &a); + + vec_free (a.args); + + if (rv != VNET_DEV_OK) + return clib_error_return (0, "unable to attach '%s': %U", a.device_id, + format_vnet_dev_rv, rv); + + return 0; +} + +VLIB_CLI_COMMAND (device_attach_cmd, static) = { + .path = "device attach", + .short_help = "device attach <device-id> [driver <name>] " + "[args <dev-args>]", + .function = device_attach_cmd_fn, +}; + +static clib_error_t * +device_detach_cmd_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + vnet_dev_rv_t rv; + vnet_dev_device_id_t device_id = {}; + vnet_dev_t *dev; + + if (!unformat_user (input, unformat_c_string_array, device_id, + sizeof (device_id))) + return clib_error_return (0, "please specify valid device id"); + + dev = vnet_dev_by_id (device_id); + + if (dev) + { + vnet_dev_api_detach_args_t a = { .dev_index = dev->index }; + rv = vnet_dev_api_detach (vm, &a); + } + else + rv = VNET_DEV_ERR_UNKNOWN_DEVICE; + + if (rv != VNET_DEV_OK) + return clib_error_return (0, "unable to detach '%s': %U", device_id, + format_vnet_dev_rv, rv); + + return 0; +} + +VLIB_CLI_COMMAND (device_detach_cmd, static) = { + .path = "device detach", + .short_help = "device detach <device-id>", + .function = device_detach_cmd_fn, + .is_mp_safe = 1, +}; + +static clib_error_t * +device_reset_cmd_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + vnet_dev_api_reset_args_t a = {}; + vnet_dev_rv_t rv; + + if (!unformat_user (input, unformat_c_string_array, a.device_id, + sizeof (a.device_id))) + return clib_error_return (0, "please specify valid device id"); + + rv = vnet_dev_api_reset (vm, &a); + + if (rv != VNET_DEV_OK) + return clib_error_return (0, "unable to reset '%s': %U", a.device_id, + format_vnet_dev_rv, rv); + + return 0; +} + +VLIB_CLI_COMMAND (device_reset_cmd, static) = { + .path = "device reset", + .short_help = "device reset <device-id>", + .function = device_reset_cmd_fn, + .is_mp_safe = 1, +}; + +static clib_error_t * +device_create_if_cmd_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + vnet_dev_api_create_port_if_args_t a = {}; + vnet_dev_rv_t rv; + vnet_dev_device_id_t device_id = {}; + vnet_dev_t *dev = 0; + u32 n; + + if (unformat_user (input, unformat_c_string_array, device_id, + sizeof (device_id))) + dev = vnet_dev_by_id (device_id); + + if (!dev) + return clib_error_return (0, "please specify valid device id"); + + a.dev_index = dev->index; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (!a.intf_name[0] && + unformat (input, "if-name %U", unformat_c_string_array, a.intf_name, + sizeof (a.intf_name))) + ; + else if (!a.port_id && unformat (input, "port %u", &n)) + a.port_id = n; + else if (!a.flags.n && unformat (input, "flags %U", + unformat_vnet_dev_port_flags, &a.flags)) + ; + else if (!a.num_rx_queues && unformat (input, "num-rx-queues %u", &n)) + a.num_rx_queues = n; + else if (!a.num_tx_queues && unformat (input, "num-tx-queues %u", &n)) + a.num_tx_queues = n; + else if (!a.rx_queue_size && unformat (input, "rx-queues-size %u", &n)) + a.rx_queue_size = n; + else if (!a.tx_queue_size && unformat (input, "tx-queues-size %u", &n)) + a.tx_queue_size = n; + else if (!a.intf_name[0] && + unformat (input, "name %U", unformat_c_string_array, + &a.intf_name, sizeof (a.intf_name))) + ; + else if (!a.args && unformat (input, "args %v", &a.args)) + ; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + + rv = vnet_dev_api_create_port_if (vm, &a); + + vec_free (a.args); + + if (rv != VNET_DEV_OK) + return clib_error_return (0, "unable to create_if '%s': %U", device_id, + format_vnet_dev_rv, rv); + + return 0; +} + +VLIB_CLI_COMMAND (device_create_if_cmd, static) = { + .path = "device create-interface", + .short_help = "device create-interface <device-id> [port <port-id>] " + "[args <iface-args>]", + .function = device_create_if_cmd_fn, + .is_mp_safe = 1, +}; + +static clib_error_t * +device_remove_if_cmd_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + vnet_dev_api_remove_port_if_args_t a = { .sw_if_index = ~0 }; + vnet_main_t *vnm = vnet_get_main (); + vnet_dev_rv_t rv; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "%U", unformat_vnet_sw_interface, vnm, + &a.sw_if_index)) + ; + else if (unformat (input, "sw-if-index %u", &a.sw_if_index)) + ; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + + if (a.sw_if_index == ~0) + return clib_error_return (0, "please specify existing interface name"); + + rv = vnet_dev_api_remove_port_if (vm, &a); + + if (rv != VNET_DEV_OK) + return clib_error_return (0, "unable to remove interface: %U", + format_vnet_dev_rv, rv); + + return 0; +} + +VLIB_CLI_COMMAND (device_remove_if_cmd, static) = { + .path = "device remove-interface", + .short_help = "device remove-interface [<interface-name> | sw-if-index <n>]", + .function = device_remove_if_cmd_fn, + .is_mp_safe = 1, +}; + +static clib_error_t * +show_devices_cmd_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + vnet_dev_main_t *dm = &vnet_dev_main; + vnet_dev_format_args_t fa = {}, *a = &fa; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "counters")) + fa.counters = 1; + else if (unformat (input, "all")) + fa.show_zero_counters = 1; + else if (unformat (input, "debug")) + fa.debug = 1; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + + pool_foreach_pointer (dev, dm->devices) + { + vlib_cli_output (vm, "device '%s':", dev->device_id); + vlib_cli_output (vm, " %U", format_vnet_dev_info, a, dev); + foreach_vnet_dev_port (p, dev) + { + vlib_cli_output (vm, " Port %u:", p->port_id); + vlib_cli_output (vm, " %U", format_vnet_dev_port_info, a, p); + if (fa.counters) + vlib_cli_output (vm, " %U", format_vnet_dev_counters, a, + p->counter_main); + + foreach_vnet_dev_port_rx_queue (q, p) + { + vlib_cli_output (vm, " RX queue %u:", q->queue_id); + vlib_cli_output (vm, " %U", format_vnet_dev_rx_queue_info, + a, q); + } + + foreach_vnet_dev_port_tx_queue (q, p) + { + vlib_cli_output (vm, " TX queue %u:", q->queue_id); + vlib_cli_output (vm, " %U", format_vnet_dev_tx_queue_info, + a, q); + } + } + } + return 0; +} + +VLIB_CLI_COMMAND (show_devices_cmd, static) = { + .path = "show device", + .short_help = "show device [counters]", + .function = show_devices_cmd_fn, + .is_mp_safe = 1, +}; + +static clib_error_t * +show_device_counters_cmd_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + vnet_dev_main_t *dm = &vnet_dev_main; + vnet_dev_format_args_t fa = { .counters = 1 }; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "all")) + fa.show_zero_counters = 1; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + + pool_foreach_pointer (dev, dm->devices) + { + vlib_cli_output (vm, "device '%s':", dev->device_id); + foreach_vnet_dev_port (p, dev) + { + vlib_cli_output (vm, " %U", format_vnet_dev_counters, &fa, + p->counter_main); + + foreach_vnet_dev_port_rx_queue (q, p) + if (q->counter_main) + { + vlib_cli_output (vm, " RX queue %u:", q->queue_id); + vlib_cli_output (vm, " %U", format_vnet_dev_counters, &fa, + q->counter_main); + } + + foreach_vnet_dev_port_tx_queue (q, p) + if (q->counter_main) + { + vlib_cli_output (vm, " TX queue %u:", q->queue_id); + vlib_cli_output (vm, " %U", format_vnet_dev_counters, &fa, + q->counter_main); + } + } + } + return 0; +} + +VLIB_CLI_COMMAND (show_device_counters_cmd, static) = { + .path = "show device counters", + .short_help = "show device counters [all]", + .function = show_device_counters_cmd_fn, + .is_mp_safe = 1, +}; diff --git a/src/vnet/dev/config.c b/src/vnet/dev/config.c new file mode 100644 index 00000000000..8883e727ac2 --- /dev/null +++ b/src/vnet/dev/config.c @@ -0,0 +1,196 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2023 Cisco Systems, Inc. + */ + +#include "vppinfra/error.h" +#include "vppinfra/pool.h" +#include <vnet/vnet.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/dev/dev.h> +#include <vnet/dev/api.h> +#include <vnet/dev/log.h> + +VLIB_REGISTER_LOG_CLASS (dev_log, static) = { + .class_name = "dev", + .subclass_name = "config", +}; + +static clib_error_t * +vnet_dev_config_one_interface (vlib_main_t *vm, unformat_input_t *input, + vnet_dev_api_create_port_if_args_t *args) +{ + clib_error_t *err = 0; + + log_debug (0, "port %u %U", args->port_id, format_unformat_input, input); + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + u32 n; + + if (unformat (input, "name %U", unformat_c_string_array, args->intf_name, + sizeof (args->intf_name))) + ; + else if (unformat (input, "num-rx-queues %u", &n)) + args->num_rx_queues = n; + else if (unformat (input, "num-tx-queues %u", &n)) + args->num_tx_queues = n; + else if (unformat (input, "rx-queue-size %u", &n)) + args->rx_queue_size = n; + else if (unformat (input, "tx-queue-size %u", &n)) + args->tx_queue_size = n; + else if (unformat (input, "flags %U", unformat_vnet_dev_port_flags, + &args->flags)) + ; + else if (unformat (input, "args %U", unformat_single_quoted_string, + &args->args)) + ; + else + { + err = clib_error_return (0, "unknown input '%U'", + format_unformat_error, input); + break; + } + } + return err; +} +static clib_error_t * +vnet_dev_config_one_device (vlib_main_t *vm, unformat_input_t *input, + char *device_id) +{ + log_debug (0, "device %s %U", device_id, format_unformat_input, input); + clib_error_t *err = 0; + vnet_dev_api_attach_args_t args = {}; + vnet_dev_api_create_port_if_args_t *if_args_vec = 0, *if_args; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + unformat_input_t sub_input; + u32 n; + + if (unformat (input, "driver %U", unformat_c_string_array, + args.driver_name, sizeof (args.driver_name))) + ; + else if (unformat (input, "flags %U", unformat_vnet_dev_flags, + &args.flags)) + ; + else if (unformat (input, "args %U", unformat_single_quoted_string, + &args.args)) + ; + else if (unformat (input, "port %u %U", &n, unformat_vlib_cli_sub_input, + &sub_input)) + { + vnet_dev_api_create_port_if_args_t *if_args; + vec_add2 (if_args_vec, if_args, 1); + if_args->port_id = n; + err = vnet_dev_config_one_interface (vm, &sub_input, if_args); + unformat_free (&sub_input); + if (err) + break; + } + else + { + err = clib_error_return (0, "unknown input '%U'", + format_unformat_error, input); + break; + } + } + + if (err == 0) + { + vnet_dev_rv_t rv; + + clib_memcpy (args.device_id, device_id, sizeof (args.device_id)); + rv = vnet_dev_api_attach (vm, &args); + vec_free (args.args); + + if (rv == VNET_DEV_OK) + { + vec_foreach (if_args, if_args_vec) + { + if_args->dev_index = args.dev_index; + rv = vnet_dev_api_create_port_if (vm, if_args); + if (rv != VNET_DEV_OK) + break; + } + } + + if (rv != VNET_DEV_OK) + err = clib_error_return (0, "error: %U for device '%s'", + format_vnet_dev_rv, rv, device_id); + } + + vec_free (if_args_vec); + return err; +} + +uword +dev_config_process_node_fn (vlib_main_t *vm, vlib_node_runtime_t *rt, + vlib_frame_t *f) +{ + vnet_dev_main_t *dm = &vnet_dev_main; + unformat_input_t input; + clib_error_t *err = 0; + + if (dm->startup_config == 0) + return 0; + + unformat_init_vector (&input, dm->startup_config); + dm->startup_config = 0; + + while (!err && unformat_check_input (&input) != UNFORMAT_END_OF_INPUT) + { + unformat_input_t sub_input; + vnet_dev_device_id_t device_id; + if (unformat (&input, "dev %U %U", unformat_c_string_array, device_id, + sizeof (device_id), unformat_vlib_cli_sub_input, + &sub_input)) + { + err = vnet_dev_config_one_device (vm, &sub_input, device_id); + unformat_free (&sub_input); + } + else if (unformat (&input, "dev %U", unformat_c_string_array, device_id, + sizeof (device_id))) + { + unformat_input_t no_input = {}; + unformat_init_vector (&no_input, 0); + err = vnet_dev_config_one_device (vm, &no_input, device_id); + unformat_free (&no_input); + } + else + err = clib_error_return (0, "unknown input '%U'", + format_unformat_error, &input); + } + + unformat_free (&input); + + if (err) + { + log_err (0, "%U", format_clib_error, err); + clib_error_free (err); + } + + vlib_node_set_state (vm, rt->node_index, VLIB_NODE_STATE_DISABLED); + vlib_node_rename (vm, rt->node_index, "deleted-%u", rt->node_index); + vec_add1 (dm->free_process_node_indices, rt->node_index); + return 0; +} + +VLIB_REGISTER_NODE (dev_config_process_node) = { + .function = dev_config_process_node_fn, + .type = VLIB_NODE_TYPE_PROCESS, + .name = "dev-config", +}; + +static clib_error_t * +devices_config (vlib_main_t *vm, unformat_input_t *input) +{ + vnet_dev_main_t *dm = &vnet_dev_main; + uword c; + + while ((c = unformat_get_input (input)) != UNFORMAT_END_OF_INPUT) + vec_add1 (dm->startup_config, c); + + return 0; +} + +VLIB_CONFIG_FUNCTION (devices_config, "devices"); diff --git a/src/vnet/dev/counters.c b/src/vnet/dev/counters.c new file mode 100644 index 00000000000..0a1e0a7419d --- /dev/null +++ b/src/vnet/dev/counters.c @@ -0,0 +1,132 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2023 Cisco Systems, Inc. + */ + +#include <vnet/vnet.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/dev/dev.h> +#include <vnet/dev/counters.h> +#include <vnet/dev/log.h> +#include <vnet/interface/rx_queue_funcs.h> + +VLIB_REGISTER_LOG_CLASS (dev_log, static) = { + .class_name = "dev", + .subclass_name = "counters", +}; + +vnet_dev_counter_main_t * +vnet_dev_counters_alloc (vlib_main_t *vm, vnet_dev_counter_t *counters, + u16 n_counters, char *fmt, ...) +{ + vnet_dev_counter_t *c; + vnet_dev_counter_main_t *cm; + u32 alloc_sz; + + alloc_sz = sizeof (*cm) + n_counters * sizeof (*c); + cm = clib_mem_alloc_aligned (alloc_sz, CLIB_CACHE_LINE_BYTES); + clib_memset (cm, 0, sizeof (*cm)); + cm->n_counters = n_counters; + + if (fmt && strlen (fmt)) + { + va_list va; + va_start (va, fmt); + cm->desc = va_format (0, fmt, &va); + va_end (va); + } + + for (u32 i = 0; i < n_counters; i++) + { + cm->counters[i] = counters[i]; + cm->counters[i].index = i; + } + + vec_validate_aligned (cm->counter_data, n_counters - 1, + CLIB_CACHE_LINE_BYTES); + vec_validate_aligned (cm->counter_start, n_counters - 1, + CLIB_CACHE_LINE_BYTES); + + return cm; +} + +void +vnet_dev_counters_clear (vlib_main_t *vm, vnet_dev_counter_main_t *cm) +{ + for (int i = 0; i < cm->n_counters; i++) + { + cm->counter_start[i] = cm->counter_data[i]; + cm->counter_data[i] = 0; + } +} + +void +vnet_dev_counters_free (vlib_main_t *vm, vnet_dev_counter_main_t *cm) +{ + vec_free (cm->desc); + vec_free (cm->counter_data); + vec_free (cm->counter_start); + clib_mem_free (cm); +} + +u8 * +format_vnet_dev_counter_name (u8 *s, va_list *va) +{ + vnet_dev_counter_t *c = va_arg (*va, vnet_dev_counter_t *); + + char *std_counters[] = { + [VNET_DEV_CTR_TYPE_RX_BYTES] = "total bytes received", + [VNET_DEV_CTR_TYPE_TX_BYTES] = "total bytes transmitted", + [VNET_DEV_CTR_TYPE_RX_PACKETS] = "total packets received", + [VNET_DEV_CTR_TYPE_TX_PACKETS] = "total packets transmitted", + [VNET_DEV_CTR_TYPE_RX_DROPS] = "total drops received", + [VNET_DEV_CTR_TYPE_TX_DROPS] = "total drops transmitted", + }; + + char *directions[] = { + [VNET_DEV_CTR_DIR_RX] = "received", + [VNET_DEV_CTR_DIR_TX] = "sent", + }; + char *units[] = { + [VNET_DEV_CTR_UNIT_BYTES] = "bytes", + [VNET_DEV_CTR_UNIT_PACKETS] = "packets", + }; + + if (c->type == VNET_DEV_CTR_TYPE_VENDOR) + { + s = format (s, "%s", c->name); + + if (c->unit < ARRAY_LEN (units) && units[c->unit]) + s = format (s, " %s", units[c->unit]); + + if (c->dir < ARRAY_LEN (directions) && directions[c->dir]) + s = format (s, " %s", directions[c->dir]); + } + else if (c->type < ARRAY_LEN (std_counters) && std_counters[c->type]) + s = format (s, "%s", std_counters[c->type]); + else + ASSERT (0); + + return s; +} + +u8 * +format_vnet_dev_counters (u8 *s, va_list *va) +{ + vnet_dev_format_args_t *a = va_arg (*va, vnet_dev_format_args_t *); + vnet_dev_counter_main_t *cm = va_arg (*va, vnet_dev_counter_main_t *); + u32 line = 0, indent = format_get_indent (s); + + foreach_vnet_dev_counter (c, cm) + { + if (a->show_zero_counters == 0 && cm->counter_data[c->index] == 0) + continue; + + if (line++) + s = format (s, "\n%U", format_white_space, indent); + + s = format (s, "%-45U%lu", format_vnet_dev_counter_name, c, + cm->counter_data[c->index]); + } + + return s; +} diff --git a/src/vnet/dev/counters.h b/src/vnet/dev/counters.h new file mode 100644 index 00000000000..33d08ffbecd --- /dev/null +++ b/src/vnet/dev/counters.h @@ -0,0 +1,128 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2023 Cisco Systems, Inc. + */ + +#ifndef _VNET_DEV_COUNTERS_H_ +#define _VNET_DEV_COUNTERS_H_ + +#include <vnet/dev/dev.h> + +typedef enum +{ + VNET_DEV_CTR_DIR_NA, + VNET_DEV_CTR_DIR_RX, + VNET_DEV_CTR_DIR_TX, +} __clib_packed vnet_dev_counter_direction_t; + +typedef enum +{ + VNET_DEV_CTR_TYPE_RX_BYTES, + VNET_DEV_CTR_TYPE_RX_PACKETS, + VNET_DEV_CTR_TYPE_RX_DROPS, + VNET_DEV_CTR_TYPE_TX_BYTES, + VNET_DEV_CTR_TYPE_TX_PACKETS, + VNET_DEV_CTR_TYPE_TX_DROPS, + VNET_DEV_CTR_TYPE_VENDOR, +} __clib_packed vnet_dev_counter_type_t; + +typedef enum +{ + VNET_DEV_CTR_UNIT_NA, + VNET_DEV_CTR_UNIT_BYTES, + VNET_DEV_CTR_UNIT_PACKETS, +} __clib_packed vnet_dev_counter_unit_t; + +typedef struct vnet_dev_counter +{ + char name[24]; + uword user_data; + vnet_dev_counter_type_t type; + vnet_dev_counter_direction_t dir; + vnet_dev_counter_unit_t unit; + u16 index; +} vnet_dev_counter_t; + +typedef struct vnet_dev_counter_main +{ + u8 *desc; + u64 *counter_data; + u64 *counter_start; + u16 n_counters; + vnet_dev_counter_t counters[]; +} vnet_dev_counter_main_t; + +#define VNET_DEV_CTR_RX_BYTES(p, ...) \ + { \ + .type = VNET_DEV_CTR_TYPE_RX_BYTES, .dir = VNET_DEV_CTR_DIR_RX, \ + .unit = VNET_DEV_CTR_UNIT_BYTES, .user_data = (p), __VA_ARGS__ \ + } +#define VNET_DEV_CTR_TX_BYTES(p, ...) \ + { \ + .type = VNET_DEV_CTR_TYPE_TX_BYTES, .dir = VNET_DEV_CTR_DIR_TX, \ + .unit = VNET_DEV_CTR_UNIT_BYTES, .user_data = (p), __VA_ARGS__ \ + } +#define VNET_DEV_CTR_RX_PACKETS(p, ...) \ + { \ + .type = VNET_DEV_CTR_TYPE_RX_PACKETS, .dir = VNET_DEV_CTR_DIR_RX, \ + .unit = VNET_DEV_CTR_UNIT_PACKETS, .user_data = (p), __VA_ARGS__ \ + } +#define VNET_DEV_CTR_TX_PACKETS(p, ...) \ + { \ + .type = VNET_DEV_CTR_TYPE_TX_PACKETS, .dir = VNET_DEV_CTR_DIR_TX, \ + .unit = VNET_DEV_CTR_UNIT_PACKETS, .user_data = (p), __VA_ARGS__ \ + } +#define VNET_DEV_CTR_RX_DROPS(p, ...) \ + { \ + .type = VNET_DEV_CTR_TYPE_RX_DROPS, .dir = VNET_DEV_CTR_DIR_RX, \ + .unit = VNET_DEV_CTR_UNIT_PACKETS, .user_data = (p), __VA_ARGS__ \ + } +#define VNET_DEV_CTR_TX_DROPS(p, ...) \ + { \ + .type = VNET_DEV_CTR_TYPE_TX_DROPS, .dir = VNET_DEV_CTR_DIR_TX, \ + .unit = VNET_DEV_CTR_UNIT_PACKETS, .user_data = (p), __VA_ARGS__ \ + } +#define VNET_DEV_CTR_VENDOR(p, d, u, n, ...) \ + { \ + .type = VNET_DEV_CTR_TYPE_VENDOR, .user_data = (p), .name = n, \ + .dir = VNET_DEV_CTR_DIR_##d, .unit = VNET_DEV_CTR_UNIT_##u, __VA_ARGS__ \ + } + +vnet_dev_counter_main_t *vnet_dev_counters_alloc (vlib_main_t *, + vnet_dev_counter_t *, u16, + char *, ...); +void vnet_dev_counters_clear (vlib_main_t *, vnet_dev_counter_main_t *); +void vnet_dev_counters_free (vlib_main_t *, vnet_dev_counter_main_t *); + +format_function_t format_vnet_dev_counters; +format_function_t format_vnet_dev_counters_all; + +static_always_inline vnet_dev_counter_main_t * +vnet_dev_counter_get_main (vnet_dev_counter_t *counter) +{ + return (vnet_dev_counter_main_t *) ((u8 *) (counter - counter->index) - + STRUCT_OFFSET_OF ( + vnet_dev_counter_main_t, counters)); +} + +static_always_inline void +vnet_dev_counter_value_add (vlib_main_t *vm, vnet_dev_counter_t *counter, + u64 val) +{ + vnet_dev_counter_main_t *cm = vnet_dev_counter_get_main (counter); + cm->counter_data[counter->index] += val; +} + +static_always_inline void +vnet_dev_counter_value_update (vlib_main_t *vm, vnet_dev_counter_t *counter, + u64 val) +{ + vnet_dev_counter_main_t *cm = vnet_dev_counter_get_main (counter); + cm->counter_data[counter->index] = val - cm->counter_start[counter->index]; +} + +#define foreach_vnet_dev_counter(c, cm) \ + if (cm) \ + for (typeof (*(cm)->counters) *(c) = (cm)->counters; \ + (c) < (cm)->counters + (cm)->n_counters; (c)++) + +#endif /* _VNET_DEV_COUNTERS_H_ */ diff --git a/src/vnet/dev/dev.api b/src/vnet/dev/dev.api new file mode 100644 index 00000000000..552b778949b --- /dev/null +++ b/src/vnet/dev/dev.api @@ -0,0 +1,86 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2022 Cisco Systems, Inc. + */ + +option version = "0.0.1"; + +enumflag dev_flags : u32 +{ + VL_API_DEV_FLAG_NO_STATS = 0x1, +}; + +enumflag dev_port_flags : u32 +{ + VL_API_DEV_PORT_FLAG_INTERRUPT_MODE = 0x1, +}; + +autoendian define dev_attach +{ + u32 client_index; + u32 context; + string device_id[48]; + string driver_name[16]; + vl_api_dev_flags_t flags; + string args[]; +}; + +autoendian define dev_attach_reply +{ + u32 context; + u32 dev_index; + i32 retval; + string error_string[]; +}; + +autoendian define dev_detach +{ + u32 client_index; + u32 context; + u32 dev_index; +}; + +autoendian define dev_detach_reply +{ + u32 context; + i32 retval; + string error_string[]; +}; + +autoendian define dev_create_port_if +{ + u32 client_index; + u32 context; + u32 dev_index; + string intf_name[32]; + u16 num_rx_queues; + u16 num_tx_queues; + u16 rx_queue_size; + u16 tx_queue_size; + u16 port_id; + vl_api_dev_port_flags_t flags; + string args[]; +}; + +autoendian define dev_create_port_if_reply +{ + u32 client_index; + u32 context; + u32 sw_if_index; + i32 retval; + string error_string[]; +}; + +autoendian define dev_remove_port_if +{ + u32 client_index; + u32 context; + u32 sw_if_index; +}; + +autoendian define dev_remove_port_if_reply +{ + u32 context; + i32 retval; + string error_string[]; +}; + diff --git a/src/vnet/dev/dev.c b/src/vnet/dev/dev.c new file mode 100644 index 00000000000..e04fa161ce2 --- /dev/null +++ b/src/vnet/dev/dev.c @@ -0,0 +1,461 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2023 Cisco Systems, Inc. + */ + +#include "vppinfra/pool.h" +#include <vnet/vnet.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/dev/dev.h> +#include <vnet/dev/log.h> +#include <vnet/dev/counters.h> + +VLIB_REGISTER_LOG_CLASS (dev_log, static) = { + .class_name = "dev", +}; + +vnet_dev_main_t vnet_dev_main = { .next_rx_queue_thread = 1 }; + +vnet_dev_bus_t * +vnet_dev_find_device_bus (vlib_main_t *vm, vnet_dev_device_id_t id) +{ + vnet_dev_main_t *dm = &vnet_dev_main; + vnet_dev_bus_t *bus; + + pool_foreach (bus, dm->buses) + { + int n = strlen (bus->registration->name); + int l = strlen (id); + int dl = strlen (VNET_DEV_DEVICE_ID_PREFIX_DELIMITER); + + if (l <= n + dl) + continue; + + if (strncmp (bus->registration->name, id, n)) + continue; + + if (strncmp (VNET_DEV_DEVICE_ID_PREFIX_DELIMITER, id + n, dl)) + continue; + + return bus; + } + + return 0; +} + +void * +vnet_dev_get_device_info (vlib_main_t *vm, vnet_dev_device_id_t id) +{ + vnet_dev_bus_t *bus; + + bus = vnet_dev_find_device_bus (vm, id); + if (bus == 0) + return 0; + + return bus->ops.get_device_info (vm, id); +} + +vnet_dev_t * +vnet_dev_alloc (vlib_main_t *vm, vnet_dev_device_id_t id, + vnet_dev_driver_t *driver) +{ + vnet_dev_main_t *dm = &vnet_dev_main; + vnet_dev_t *dev = 0, **devp = 0; + + dev = vnet_dev_alloc_with_data (sizeof (vnet_dev_t), + driver->registration->device_data_sz); + + pool_get (dm->devices, devp); + devp[0] = dev; + dev->index = devp - dm->devices; + dev->driver_index = driver->index; + dev->ops = driver->registration->ops; + dev->bus_index = driver->bus_index; + clib_memcpy (dev->device_id, id, sizeof (dev->device_id)); + hash_set (dm->device_index_by_id, dev->device_id, dev->index); + + if ((vnet_dev_process_create (vm, dev)) == VNET_DEV_OK) + return dev; + + vnet_dev_free (vm, dev); + return 0; +} + +vnet_dev_rv_t +vnet_dev_init (vlib_main_t *vm, vnet_dev_t *dev) +{ + vnet_dev_main_t *dm = &vnet_dev_main; + vnet_dev_bus_t *bus = pool_elt_at_index (dm->buses, dev->bus_index); + vnet_dev_rv_t rv; + + vnet_dev_validate (vm, dev); + + if ((rv = bus->ops.device_open (vm, dev)) != VNET_DEV_OK) + return rv; + + if (dev->ops.alloc) + { + rv = dev->ops.alloc (vm, dev); + if (rv != VNET_DEV_OK) + { + log_err (dev, "device init failed [rv %d]", rv); + if (dev->ops.deinit) + dev->ops.deinit (vm, dev); + if (dev->ops.free) + dev->ops.free (vm, dev); + return rv; + } + } + + if ((rv = dev->ops.init (vm, dev)) != VNET_DEV_OK) + { + log_err (dev, "device init failed [rv %d]", rv); + if (dev->ops.deinit) + dev->ops.deinit (vm, dev); + if (dev->ops.free) + dev->ops.free (vm, dev); + return rv; + } + + dev->initialized = 1; + dev->not_first_init = 1; + return VNET_DEV_OK; +} + +void +vnet_dev_deinit (vlib_main_t *vm, vnet_dev_t *dev) +{ + ASSERT (dev->initialized == 1); + vnet_dev_bus_t *bus; + + vnet_dev_validate (vm, dev); + + foreach_vnet_dev_port (p, dev) + ASSERT (p->interface_created == 0); + + if (dev->ops.deinit) + dev->ops.deinit (vm, dev); + + bus = vnet_dev_get_bus (dev); + if (bus->ops.device_close) + bus->ops.device_close (vm, dev); + + vnet_dev_process_quit (vm, dev); + + dev->initialized = 0; +} + +void +vnet_dev_free (vlib_main_t *vm, vnet_dev_t *dev) +{ + vnet_dev_main_t *dm = &vnet_dev_main; + + vnet_dev_validate (vm, dev); + + ASSERT (dev->initialized == 0); + + foreach_vnet_dev_port (p, dev) + vnet_dev_port_free (vm, p); + + vec_free (dev->description); + pool_free (dev->ports); + pool_free (dev->periodic_ops); + hash_unset (dm->device_index_by_id, dev->device_id); + vnet_dev_arg_free (&dev->args); + pool_put_index (dm->devices, dev->index); +} + +vnet_dev_rv_t +vnet_dev_reset (vlib_main_t *vm, vnet_dev_t *dev) +{ + vnet_dev_rv_t rv; + + ASSERT (dev->initialized == 1); + vnet_dev_validate (vm, dev); + + if (dev->ops.reset == 0) + return VNET_DEV_ERR_NOT_SUPPORTED; + + if ((rv = dev->ops.reset (vm, dev)) != VNET_DEV_OK) + { + log_err (dev, "device reset failed [rv %d]", rv); + return rv; + } + + return VNET_DEV_OK; +} + +void +vnet_dev_detach (vlib_main_t *vm, vnet_dev_t *dev) +{ + foreach_vnet_dev_port (p, dev) + if (p->interface_created) + vnet_dev_port_if_remove (vm, p); + vnet_dev_deinit (vm, dev); + vnet_dev_free (vm, dev); +} + +vnet_dev_rv_t +vnet_dev_dma_mem_alloc (vlib_main_t *vm, vnet_dev_t *dev, u32 size, u32 align, + void **pp) +{ + vnet_dev_main_t *dm = &vnet_dev_main; + vnet_dev_bus_t *bus = pool_elt_at_index (dm->buses, dev->bus_index); + vnet_dev_rv_t rv; + + vnet_dev_validate (vm, dev); + + if (!bus->ops.dma_mem_alloc_fn) + return VNET_DEV_ERR_NOT_SUPPORTED; + + rv = bus->ops.dma_mem_alloc_fn (vm, dev, size, align, pp); + if (rv == VNET_DEV_OK) + log_debug (dev, "%u bytes va %p dma-addr 0x%lx numa %u align %u", size, + *pp, vnet_dev_get_dma_addr (vm, dev, *pp), dev->numa_node, + align); + return rv; +} + +void +vnet_dev_dma_mem_free (vlib_main_t *vm, vnet_dev_t *dev, void *p) +{ + vnet_dev_main_t *dm = &vnet_dev_main; + vnet_dev_bus_t *bus = pool_elt_at_index (dm->buses, dev->bus_index); + + vnet_dev_validate (vm, dev); + + if (p == 0 || !bus->ops.dma_mem_free_fn) + return; + + return bus->ops.dma_mem_free_fn (vm, dev, p); +} + +clib_error_t * +vnet_dev_admin_up_down_fn (vnet_main_t *vnm, u32 hw_if_index, u32 flags) +{ + vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index); + vlib_main_t *vm = vlib_get_main (); + vnet_dev_port_t *p = vnet_dev_get_port_from_dev_instance (hi->dev_instance); + vnet_dev_rv_t rv = VNET_DEV_OK; + u32 is_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0; + + if (is_up && p->started == 0) + rv = vnet_dev_process_call_port_op (vm, p, vnet_dev_port_start); + else if (!is_up && p->started) + rv = vnet_dev_process_call_port_op_no_rv (vm, p, vnet_dev_port_stop); + + if (rv != VNET_DEV_OK) + return clib_error_return (0, "failed to change port admin state: %U", + format_vnet_dev_rv, rv); + + return 0; +} + +static void +vnet_dev_feature_update_cb (u32 sw_if_index, u8 arc_index, u8 is_enable, + void *cb) +{ + vlib_main_t *vm = vlib_get_main (); + vnet_main_t *vnm = vnet_get_main (); + vnet_feature_main_t *fm = &feature_main; + vnet_feature_config_main_t *cm; + vnet_dev_main_t *vdm = &vnet_dev_main; + vnet_dev_port_t *port; + vnet_hw_interface_t *hw; + u32 current_config_index = ~0; + u32 next_index = ~0; + int update_runtime = 0; + + if (arc_index != vdm->eth_port_rx_feature_arc_index) + return; + + hw = vnet_get_sup_hw_interface (vnm, sw_if_index); + port = vnet_dev_get_port_from_dev_instance (hw->dev_instance); + + if (port == 0 || port->intf.sw_if_index != sw_if_index) + return; + + if (vnet_have_features (arc_index, sw_if_index)) + { + cm = &fm->feature_config_mains[arc_index]; + current_config_index = + vec_elt (cm->config_index_by_sw_if_index, sw_if_index); + vnet_get_config_data (&cm->config_main, ¤t_config_index, + &next_index, 0); + if (port->intf.feature_arc == 0 || + port->intf.rx_next_index != next_index || + port->intf.current_config_index != current_config_index) + { + port->intf.current_config_index = current_config_index; + port->intf.rx_next_index = next_index; + port->intf.feature_arc_index = arc_index; + port->intf.feature_arc = 1; + update_runtime = 1; + } + } + else + { + if (port->intf.feature_arc) + { + port->intf.current_config_index = 0; + port->intf.rx_next_index = + port->intf.redirect_to_node ? + port->intf.redirect_to_node_next_index : + vnet_dev_default_next_index_by_port_type[port->attr.type]; + port->intf.feature_arc_index = 0; + port->intf.feature_arc = 0; + update_runtime = 1; + } + } + + if (update_runtime) + { + foreach_vnet_dev_port_rx_queue (rxq, port) + vnet_dev_rx_queue_rt_request ( + vm, rxq, + (vnet_dev_rx_queue_rt_req_t){ .update_next_index = 1, + .update_feature_arc = 1 }); + log_debug (port->dev, "runtime update requested due to chgange in " + "feature arc configuration"); + } +} + +static int +sort_driver_registrations (void *a0, void *a1) +{ + vnet_dev_driver_registration_t **r0 = a0; + vnet_dev_driver_registration_t **r1 = a1; + + if (r0[0]->priority > r1[0]->priority) + return -1; + else if (r0[0]->priority < r1[0]->priority) + return 1; + + return 0; +} + +static clib_error_t * +vnet_dev_main_init (vlib_main_t *vm) +{ + vnet_dev_main_t *dm = &vnet_dev_main; + vnet_dev_driver_registration_t **drv = 0; + u32 temp_space_sz = 0; + + dm->device_index_by_id = hash_create_string (0, sizeof (uword)); + + for (vnet_dev_bus_registration_t *r = dm->bus_registrations; r; + r = r->next_registration) + { + vnet_dev_bus_t *bus; + pool_get_zero (dm->buses, bus); + bus->registration = r; + bus->index = bus - dm->buses; + bus->ops = r->ops; + if (!r->device_data_size || + r->device_data_size > STRUCT_SIZE_OF (vnet_dev_t, bus_data)) + return clib_error_return ( + 0, "bus device data for bus '%s' is too big not specified", r->name); + + log_debug (0, "bus '%s' registered", r->name); + } + + for (vnet_dev_driver_registration_t *r = dm->driver_registrations; r; + r = r->next_registration) + vec_add1 (drv, r); + + vec_sort_with_function (drv, sort_driver_registrations); + + vec_foreach_pointer (r, drv) + { + vnet_dev_driver_t *driver; + vnet_dev_bus_t *bus; + vnet_device_class_t *dev_class; + int bus_index = -1; + + pool_foreach (bus, dm->buses) + { + if (strcmp (bus->registration->name, r->bus) == 0) + { + bus_index = bus->index; + break; + } + } + + if (bus_index < 0) + return clib_error_return (0, "unknown bus '%s'", r->bus); + + pool_get_zero (dm->drivers, driver); + driver->registration = r; + driver->index = driver - dm->drivers; + driver->bus_index = bus_index; + driver->ops = r->ops; + dev_class = clib_mem_alloc (sizeof (vnet_device_class_t)); + *dev_class = (vnet_device_class_t){ + .name = r->name, + .format_device_name = format_vnet_dev_interface_name, + .format_device = format_vnet_dev_interface_info, + .admin_up_down_function = vnet_dev_admin_up_down_fn, + .rx_redirect_to_node = vnet_dev_set_interface_next_node, + .clear_counters = vnet_dev_clear_hw_interface_counters, + .mac_addr_change_function = vnet_dev_port_mac_change, + .mac_addr_add_del_function = vnet_dev_add_del_mac_address, + .flow_ops_function = vnet_dev_flow_ops_fn, + .format_flow = format_vnet_dev_flow, + .set_rss_queues_function = vnet_dev_interface_set_rss_queues, + }; + driver->dev_class_index = vnet_register_device_class (vm, dev_class); + log_debug (0, "driver '%s' registered on bus '%s'", r->name, + bus->registration->name); + + if (temp_space_sz < r->runtime_temp_space_sz) + temp_space_sz = r->runtime_temp_space_sz; + } + + if (dm->startup_config) + log_debug (0, "startup config: %v", dm->startup_config); + + vec_free (drv); + + if (temp_space_sz > 0) + { + const u32 align = CLIB_CACHE_LINE_BYTES; + u32 sz = round_pow2 (temp_space_sz, align); + dm->log2_runtime_temp_space_sz = + get_lowest_set_bit_index (max_pow2 (sz)); + sz = 1 << dm->log2_runtime_temp_space_sz; + sz *= vlib_get_n_threads (); + dm->runtime_temp_spaces = clib_mem_alloc_aligned (sz, align); + clib_memset (dm->runtime_temp_spaces, 0, sz); + log_debug (0, + "requested %u bytes for runtime temp storage, allocated %u " + "per thread (total %u)", + temp_space_sz, 1 << dm->log2_runtime_temp_space_sz, sz); + } + + vnet_feature_register (vnet_dev_feature_update_cb, 0); + + return 0; +} + +VLIB_INIT_FUNCTION (vnet_dev_main_init); + +clib_error_t * +vnet_dev_num_workers_change (vlib_main_t *vm) +{ + vnet_dev_main_t *dm = &vnet_dev_main; + + if (dm->log2_runtime_temp_space_sz > 0) + { + const u32 align = CLIB_CACHE_LINE_BYTES; + uword sz = + (1ULL << dm->log2_runtime_temp_space_sz) * vlib_get_n_threads (); + if (dm->runtime_temp_spaces) + clib_mem_free (dm->runtime_temp_spaces); + dm->runtime_temp_spaces = clib_mem_alloc_aligned (sz, align); + clib_memset (dm->runtime_temp_spaces, 0, sz); + log_debug (0, "runtime temp storage resized to %u", sz); + } + + return 0; +} + +VLIB_NUM_WORKERS_CHANGE_FN (vnet_dev_num_workers_change); diff --git a/src/vnet/dev/dev.h b/src/vnet/dev/dev.h new file mode 100644 index 00000000000..bbf2f9dff21 --- /dev/null +++ b/src/vnet/dev/dev.h @@ -0,0 +1,753 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2023 Cisco Systems, Inc. + */ + +#ifndef _VNET_DEV_H_ +#define _VNET_DEV_H_ + +#include <vppinfra/clib.h> +#include <vppinfra/error_bootstrap.h> +#include <vppinfra/format.h> +#include <vnet/vnet.h> +#include <vnet/dev/types.h> +#include <vnet/dev/args.h> + +#define VNET_DEV_DEVICE_ID_PREFIX_DELIMITER "/" + +#define foreach_vnet_dev_port_type \ + _ (0, UNKNOWN) \ + _ (1, ETHERNET) + +typedef enum +{ +#define _(b, n) VNET_DEV_PORT_TYPE_##n = (1U << (b)), + foreach_vnet_dev_port_type +#undef _ +} vnet_dev_port_type_t; + +#define foreach_vnet_dev_port_caps \ + _ (interrupt_mode) \ + _ (rss) \ + _ (change_max_rx_frame_size) \ + _ (mac_filter) + +#define foreach_vnet_dev_port_rx_offloads _ (ip4_cksum) + +#define foreach_vnet_dev_port_tx_offloads \ + _ (ip4_cksum) \ + _ (tcp_gso) \ + _ (udp_gso) + +typedef union +{ + struct + { +#define _(n) u8 n : 1; + foreach_vnet_dev_port_caps +#undef _ + }; + u8 as_number; +} vnet_dev_port_caps_t; + +typedef union +{ + struct + { +#define _(n) u8 n : 1; + foreach_vnet_dev_port_rx_offloads +#undef _ + }; + u8 as_number; +} vnet_dev_port_rx_offloads_t; + +typedef union +{ + struct + { +#define _(n) u8 n : 1; + foreach_vnet_dev_port_tx_offloads +#undef _ + }; + u8 as_number; +} vnet_dev_port_tx_offloads_t; + +typedef union +{ + u8 eth_mac[6]; + u8 raw[8]; +} vnet_dev_hw_addr_t; + +typedef struct vnet_dev_bus_registration vnet_dev_bus_registration_t; +typedef struct vnet_dev_driver_registration vnet_dev_driver_registration_t; + +typedef struct vnet_dev vnet_dev_t; +typedef struct vnet_dev_port vnet_dev_port_t; +typedef struct vnet_dev_rx_queue vnet_dev_rx_queue_t; +typedef struct vnet_dev_tx_queue vnet_dev_tx_queue_t; +typedef struct vnet_dev_bus_registration vnet_dev_bus_registration_t; +typedef struct vnet_dev_driver_registration vnet_dev_driver_registration_t; +typedef struct vnet_dev_counter vnet_dev_counter_t; +typedef struct vnet_dev_counter_main vnet_dev_counter_main_t; +typedef struct vnet_dev_port_cfg_change_req vnet_dev_port_cfg_change_req_t; + +typedef vnet_dev_rv_t (vnet_dev_op_t) (vlib_main_t *, vnet_dev_t *); +typedef vnet_dev_rv_t (vnet_dev_port_op_t) (vlib_main_t *, vnet_dev_port_t *); +typedef vnet_dev_rv_t (vnet_dev_port_cfg_change_op_t) ( + vlib_main_t *, vnet_dev_port_t *, vnet_dev_port_cfg_change_req_t *); +typedef vnet_dev_rv_t (vnet_dev_rx_queue_op_t) (vlib_main_t *, + vnet_dev_rx_queue_t *); +typedef vnet_dev_rv_t (vnet_dev_tx_queue_op_t) (vlib_main_t *, + vnet_dev_tx_queue_t *); +typedef void (vnet_dev_op_no_rv_t) (vlib_main_t *, vnet_dev_t *); +typedef void (vnet_dev_port_op_no_rv_t) (vlib_main_t *, vnet_dev_port_t *); +typedef void (vnet_dev_rx_queue_op_no_rv_t) (vlib_main_t *, + vnet_dev_rx_queue_t *); +typedef void (vnet_dev_tx_queue_op_no_rv_t) (vlib_main_t *, + vnet_dev_tx_queue_t *); + +typedef u16 vnet_dev_queue_id_t; +typedef u16 vnet_dev_bus_index_t; +typedef u16 vnet_dev_driver_index_t; + +typedef struct +{ + vnet_dev_rx_queue_op_t *alloc; + vnet_dev_rx_queue_op_t *start; + vnet_dev_rx_queue_op_no_rv_t *stop; + vnet_dev_rx_queue_op_no_rv_t *free; + format_function_t *format_info; +} vnet_dev_rx_queue_ops_t; + +typedef struct +{ + vnet_dev_tx_queue_op_t *alloc; + vnet_dev_tx_queue_op_t *start; + vnet_dev_tx_queue_op_no_rv_t *stop; + vnet_dev_tx_queue_op_no_rv_t *free; + format_function_t *format_info; +} vnet_dev_tx_queue_ops_t; + +typedef struct +{ + u16 data_size; + u16 min_size; + u16 max_size; + u16 default_size; + u8 multiplier; + u8 size_is_power_of_two : 1; +} vnet_dev_queue_config_t; + +#define foreach_vnet_dev_port_cfg_type \ + _ (PROMISC_MODE) \ + _ (MAX_RX_FRAME_SIZE) \ + _ (CHANGE_PRIMARY_HW_ADDR) \ + _ (ADD_SECONDARY_HW_ADDR) \ + _ (REMOVE_SECONDARY_HW_ADDR) \ + _ (RXQ_INTR_MODE_ENABLE) \ + _ (RXQ_INTR_MODE_DISABLE) \ + _ (ADD_RX_FLOW) \ + _ (DEL_RX_FLOW) \ + _ (GET_RX_FLOW_COUNTER) \ + _ (RESET_RX_FLOW_COUNTER) + +typedef enum +{ + VNET_DEV_PORT_CFG_UNKNOWN, +#define _(n) VNET_DEV_PORT_CFG_##n, + foreach_vnet_dev_port_cfg_type +#undef _ +} __clib_packed vnet_dev_port_cfg_type_t; + +typedef struct vnet_dev_port_cfg_change_req +{ + vnet_dev_port_cfg_type_t type; + u8 validated : 1; + u8 all_queues : 1; + + union + { + u8 promisc : 1; + vnet_dev_hw_addr_t addr; + u16 max_rx_frame_size; + vnet_dev_queue_id_t queue_id; + struct + { + u32 flow_index; + uword *private_data; + }; + }; + +} vnet_dev_port_cfg_change_req_t; + +typedef struct +{ + vnet_dev_hw_addr_t hw_addr; + u16 max_rx_queues; + u16 max_tx_queues; + u16 max_supported_rx_frame_size; + vnet_dev_port_type_t type; + vnet_dev_port_caps_t caps; + vnet_dev_port_rx_offloads_t rx_offloads; + vnet_dev_port_tx_offloads_t tx_offloads; +} vnet_dev_port_attr_t; + +typedef enum +{ + VNET_DEV_PERIODIC_OP_TYPE_DEV = 1, + VNET_DEV_PERIODIC_OP_TYPE_PORT = 2, +} __clib_packed vnet_dev_periodic_op_type_t; + +typedef struct +{ + f64 interval; + f64 last_run; + vnet_dev_periodic_op_type_t type; + union + { + vnet_dev_t *dev; + vnet_dev_port_t *port; + void *arg; + }; + union + { + vnet_dev_op_no_rv_t *dev_op; + vnet_dev_port_op_no_rv_t *port_op; + void *op; + }; +} vnet_dev_periodic_op_t; + +typedef struct +{ + struct _vlib_node_fn_registration *registrations; + format_function_t *format_trace; + vlib_error_desc_t *error_counters; + u16 n_error_counters; +} vnet_dev_node_t; + +typedef struct +{ + vnet_dev_op_t *alloc; + vnet_dev_op_t *init; + vnet_dev_op_no_rv_t *deinit; + vnet_dev_op_t *reset; + vnet_dev_op_no_rv_t *free; + u8 *(*probe) (vlib_main_t *, vnet_dev_bus_index_t, void *); + format_function_t *format_info; +} vnet_dev_ops_t; + +typedef struct +{ + vnet_dev_port_op_t *alloc; + vnet_dev_port_op_t *init; + vnet_dev_port_cfg_change_op_t *config_change; + vnet_dev_port_cfg_change_op_t *config_change_validate; + vnet_dev_port_op_t *start; + vnet_dev_port_op_no_rv_t *stop; + vnet_dev_port_op_no_rv_t *deinit; + vnet_dev_port_op_no_rv_t *free; + format_function_t *format_status; + format_function_t *format_flow; +} vnet_dev_port_ops_t; + +typedef union +{ + struct + { + u8 update_next_index : 1; + u8 update_feature_arc : 1; + u8 suspend_off : 1; + u8 suspend_on : 1; + }; + u8 as_number; +} vnet_dev_rx_queue_rt_req_t; + +typedef struct vnet_dev_rx_queue +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + vnet_dev_port_t *port; + u16 rx_thread_index; + u16 index; + vnet_dev_counter_main_t *counter_main; + CLIB_CACHE_LINE_ALIGN_MARK (runtime0); + vnet_dev_rx_queue_t *next_on_thread; + u8 interrupt_mode : 1; + u8 enabled : 1; + u8 started : 1; + u8 suspended : 1; + vnet_dev_queue_id_t queue_id; + u16 size; + u16 next_index; + vnet_dev_rx_queue_rt_req_t runtime_request; + CLIB_CACHE_LINE_ALIGN_MARK (runtime1); + vlib_buffer_template_t buffer_template; + CLIB_CACHE_LINE_ALIGN_MARK (driver_data); + u8 data[]; +} vnet_dev_rx_queue_t; + +STATIC_ASSERT_SIZEOF (vnet_dev_rx_queue_t, 3 * CLIB_CACHE_LINE_BYTES); + +typedef struct vnet_dev_tx_queue +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + vnet_dev_port_t *port; + clib_bitmap_t *assigned_threads; + u16 index; + vnet_dev_counter_main_t *counter_main; + CLIB_CACHE_LINE_ALIGN_MARK (runtime0); + vnet_dev_queue_id_t queue_id; + u8 started : 1; + u8 enabled : 1; + u8 lock_needed : 1; + u8 lock; + u16 size; + CLIB_ALIGN_MARK (private_data, 16); + u8 data[]; +} vnet_dev_tx_queue_t; + +STATIC_ASSERT_SIZEOF (vnet_dev_tx_queue_t, 2 * CLIB_CACHE_LINE_BYTES); + +typedef struct vnet_dev_port +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + vnet_dev_t *dev; + vnet_dev_port_id_t port_id; + vnet_dev_driver_index_t driver_index; + u8 initialized : 1; + u8 started : 1; + u8 link_up : 1; + u8 promisc : 1; + u8 interface_created : 1; + u8 rx_node_assigned : 1; + vnet_dev_counter_main_t *counter_main; + vnet_dev_queue_config_t rx_queue_config; + vnet_dev_queue_config_t tx_queue_config; + vnet_dev_port_attr_t attr; + u32 max_rx_frame_size; + vnet_dev_hw_addr_t primary_hw_addr; + vnet_dev_hw_addr_t *secondary_hw_addr; + u32 index; + u32 speed; + vnet_dev_rx_queue_t **rx_queues; + vnet_dev_tx_queue_t **tx_queues; + vnet_dev_port_ops_t port_ops; + vnet_dev_arg_t *args; + vnet_dev_rx_queue_ops_t rx_queue_ops; + vnet_dev_tx_queue_ops_t tx_queue_ops; + vnet_dev_node_t rx_node; + vnet_dev_node_t tx_node; + + struct + { + vnet_dev_if_name_t name; + u32 dev_instance; + u32 rx_node_index; + u32 current_config_index; + u16 rx_next_index; + u16 redirect_to_node_next_index; + u8 feature_arc_index; + u8 feature_arc : 1; + u8 redirect_to_node : 1; + u8 default_is_intr_mode : 1; + u32 tx_node_index; + u32 hw_if_index; + u32 sw_if_index; + u16 num_rx_queues; + u16 num_tx_queues; + u16 txq_sz; + u16 rxq_sz; + } intf; + + CLIB_CACHE_LINE_ALIGN_MARK (data0); + u8 data[]; +} vnet_dev_port_t; + +typedef struct vnet_dev +{ + vnet_dev_device_id_t device_id; + u16 initialized : 1; + u16 not_first_init : 1; + u16 va_dma : 1; + u16 process_node_quit : 1; + u16 process_node_periodic : 1; + u16 poll_stats : 1; + u16 bus_index; + u8 numa_node; + u16 max_rx_queues; + u16 max_tx_queues; + vnet_dev_driver_index_t driver_index; + u32 index; + u32 process_node_index; + u8 bus_data[32] __clib_aligned (16); + vnet_dev_ops_t ops; + vnet_dev_port_t **ports; + vnet_dev_periodic_op_t *periodic_ops; + u8 *description; + vnet_dev_arg_t *args; + u8 __clib_aligned (16) + data[]; +} vnet_dev_t; + +typedef struct +{ + u16 vendor_id, device_id; + char *description; +} vnet_dev_match_t; + +#define VNET_DEV_MATCH(...) \ + (vnet_dev_match_t[]) \ + { \ + __VA_ARGS__, {} \ + } + +typedef struct +{ + vnet_dev_op_t *device_open; + vnet_dev_op_no_rv_t *device_close; + vnet_dev_rv_t (*dma_mem_alloc_fn) (vlib_main_t *, vnet_dev_t *, u32, u32, + void **); + void (*dma_mem_free_fn) (vlib_main_t *, vnet_dev_t *, void *); + void *(*get_device_info) (vlib_main_t *, char *); + void (*free_device_info) (vlib_main_t *, void *); + format_function_t *format_device_info; + format_function_t *format_device_addr; +} vnet_dev_bus_ops_t; + +struct vnet_dev_bus_registration +{ + vnet_dev_bus_registration_t *next_registration; + vnet_dev_driver_name_t name; + u16 device_data_size; + vnet_dev_bus_ops_t ops; +}; + +struct vnet_dev_driver_registration +{ + vnet_dev_driver_registration_t *next_registration; + u8 bus_master_enable : 1; + vnet_dev_driver_name_t name; + vnet_dev_bus_name_t bus; + u16 device_data_sz; + u16 runtime_temp_space_sz; + vnet_dev_match_t *match; + int priority; + vnet_dev_ops_t ops; + vnet_dev_arg_t *args; +}; + +typedef struct +{ + u32 index; + vnet_dev_bus_registration_t *registration; + vnet_dev_bus_ops_t ops; +} vnet_dev_bus_t; + +typedef struct +{ + u32 index; + void *dev_data; + vnet_dev_driver_registration_t *registration; + u32 dev_class_index; + vnet_dev_bus_index_t bus_index; + vnet_dev_ops_t ops; +} vnet_dev_driver_t; + +typedef struct +{ + vnet_dev_bus_t *buses; + vnet_dev_driver_t *drivers; + vnet_dev_t **devices; + vnet_dev_port_t **ports_by_dev_instance; + vnet_dev_bus_registration_t *bus_registrations; + vnet_dev_driver_registration_t *driver_registrations; + void *runtime_temp_spaces; + u32 log2_runtime_temp_space_sz; + u32 *free_process_node_indices; + u32 *free_rx_node_indices; + uword *device_index_by_id; + + u8 *startup_config; + u16 next_rx_queue_thread; + u8 eth_port_rx_feature_arc_index; +} vnet_dev_main_t; + +extern vnet_dev_main_t vnet_dev_main; + +typedef struct +{ + struct + { + vnet_dev_port_attr_t attr; + vnet_dev_port_ops_t ops; + vnet_dev_arg_t *args; + u16 data_size; + void *initial_data; + } port; + + vnet_dev_node_t *rx_node; + vnet_dev_node_t *tx_node; + + struct + { + vnet_dev_queue_config_t config; + vnet_dev_rx_queue_ops_t ops; + } rx_queue; + + struct + { + vnet_dev_queue_config_t config; + vnet_dev_tx_queue_ops_t ops; + } tx_queue; +} vnet_dev_port_add_args_t; + +typedef struct +{ + union + { + struct + { + u8 link_speed : 1; + u8 link_state : 1; + u8 link_duplex : 1; + }; + u8 any; + } change; + u8 link_state : 1; + u8 full_duplex : 1; + u32 link_speed; +} vnet_dev_port_state_changes_t; + +/* args.c */ +vnet_dev_rv_t vnet_dev_arg_parse (vlib_main_t *, vnet_dev_t *, + vnet_dev_arg_t *, u8 *); +void vnet_dev_arg_free (vnet_dev_arg_t **); +void vnet_dev_arg_clear_value (vnet_dev_arg_t *); +format_function_t format_vnet_dev_arg_type; +format_function_t format_vnet_dev_arg_value; +format_function_t format_vnet_dev_args; + +/* dev.c */ +vnet_dev_t *vnet_dev_alloc (vlib_main_t *, vnet_dev_device_id_t, + vnet_dev_driver_t *); +void vnet_dev_free (vlib_main_t *, vnet_dev_t *); +vnet_dev_rv_t vnet_dev_init (vlib_main_t *, vnet_dev_t *); +void vnet_dev_deinit (vlib_main_t *, vnet_dev_t *); +vnet_dev_rv_t vnet_dev_reset (vlib_main_t *, vnet_dev_t *); +void vnet_dev_detach (vlib_main_t *, vnet_dev_t *); +vnet_dev_rv_t vnet_dev_port_add (vlib_main_t *, vnet_dev_t *, + vnet_dev_port_id_t, + vnet_dev_port_add_args_t *); +vnet_dev_rv_t vnet_dev_dma_mem_alloc (vlib_main_t *, vnet_dev_t *, u32, u32, + void **); +void vnet_dev_dma_mem_free (vlib_main_t *, vnet_dev_t *, void *); +vnet_dev_bus_t *vnet_dev_find_device_bus (vlib_main_t *, vnet_dev_device_id_t); +void *vnet_dev_get_device_info (vlib_main_t *, vnet_dev_device_id_t); + +/* error.c */ +clib_error_t *vnet_dev_port_err (vlib_main_t *, vnet_dev_port_t *, + vnet_dev_rv_t, char *, ...); +int vnet_dev_flow_err (vlib_main_t *, vnet_dev_rv_t); + +/* handlers.c */ +clib_error_t *vnet_dev_port_set_max_frame_size (vnet_main_t *, + vnet_hw_interface_t *, u32); +u32 vnet_dev_port_eth_flag_change (vnet_main_t *, vnet_hw_interface_t *, u32); +clib_error_t *vnet_dev_port_mac_change (vnet_hw_interface_t *, const u8 *, + const u8 *); +clib_error_t *vnet_dev_add_del_mac_address (vnet_hw_interface_t *, const u8 *, + u8); +int vnet_dev_flow_ops_fn (vnet_main_t *, vnet_flow_dev_op_t, u32, u32, + uword *); +clib_error_t *vnet_dev_interface_set_rss_queues (vnet_main_t *, + vnet_hw_interface_t *, + clib_bitmap_t *); +void vnet_dev_clear_hw_interface_counters (u32); +void vnet_dev_set_interface_next_node (vnet_main_t *, u32, u32); + +/* port.c */ +vnet_dev_rv_t vnet_dev_port_start (vlib_main_t *, vnet_dev_port_t *); +vnet_dev_rv_t vnet_dev_port_start_all_rx_queues (vlib_main_t *, + vnet_dev_port_t *); +vnet_dev_rv_t vnet_dev_port_start_all_tx_queues (vlib_main_t *, + vnet_dev_port_t *); +void vnet_dev_port_stop (vlib_main_t *, vnet_dev_port_t *); +void vnet_dev_port_deinit (vlib_main_t *, vnet_dev_port_t *); +void vnet_dev_port_free (vlib_main_t *, vnet_dev_port_t *); +void vnet_dev_port_add_counters (vlib_main_t *, vnet_dev_port_t *, + vnet_dev_counter_t *, u16); +void vnet_dev_port_free_counters (vlib_main_t *, vnet_dev_port_t *); +void vnet_dev_port_update_tx_node_runtime (vlib_main_t *, vnet_dev_port_t *); +void vnet_dev_port_state_change (vlib_main_t *, vnet_dev_port_t *, + vnet_dev_port_state_changes_t); +void vnet_dev_port_clear_counters (vlib_main_t *, vnet_dev_port_t *); +vnet_dev_rv_t +vnet_dev_port_cfg_change_req_validate (vlib_main_t *, vnet_dev_port_t *, + vnet_dev_port_cfg_change_req_t *); +vnet_dev_rv_t vnet_dev_port_cfg_change (vlib_main_t *, vnet_dev_port_t *, + vnet_dev_port_cfg_change_req_t *); +vnet_dev_rv_t vnet_dev_port_if_create (vlib_main_t *, vnet_dev_port_t *); +vnet_dev_rv_t vnet_dev_port_if_remove (vlib_main_t *, vnet_dev_port_t *); + +/* queue.c */ +vnet_dev_rv_t vnet_dev_rx_queue_alloc (vlib_main_t *, vnet_dev_port_t *, u16); +vnet_dev_rv_t vnet_dev_tx_queue_alloc (vlib_main_t *, vnet_dev_port_t *, u16); +void vnet_dev_rx_queue_free (vlib_main_t *, vnet_dev_rx_queue_t *); +void vnet_dev_tx_queue_free (vlib_main_t *, vnet_dev_tx_queue_t *); +void vnet_dev_rx_queue_add_counters (vlib_main_t *, vnet_dev_rx_queue_t *, + vnet_dev_counter_t *, u16); +void vnet_dev_rx_queue_free_counters (vlib_main_t *, vnet_dev_rx_queue_t *); +void vnet_dev_tx_queue_add_counters (vlib_main_t *, vnet_dev_tx_queue_t *, + vnet_dev_counter_t *, u16); +void vnet_dev_tx_queue_free_counters (vlib_main_t *, vnet_dev_tx_queue_t *); +vnet_dev_rv_t vnet_dev_rx_queue_start (vlib_main_t *, vnet_dev_rx_queue_t *); +vnet_dev_rv_t vnet_dev_tx_queue_start (vlib_main_t *, vnet_dev_tx_queue_t *); +void vnet_dev_rx_queue_stop (vlib_main_t *, vnet_dev_rx_queue_t *); +void vnet_dev_tx_queue_stop (vlib_main_t *, vnet_dev_tx_queue_t *); + +/* process.c */ +vnet_dev_rv_t vnet_dev_process_create (vlib_main_t *, vnet_dev_t *); +vnet_dev_rv_t vnet_dev_process_call_op (vlib_main_t *, vnet_dev_t *, + vnet_dev_op_t *); +vnet_dev_rv_t vnet_dev_process_call_op_no_rv (vlib_main_t *, vnet_dev_t *, + vnet_dev_op_no_rv_t *); +void vnet_dev_process_call_op_no_wait (vlib_main_t *, vnet_dev_t *, + vnet_dev_op_no_rv_t *); +vnet_dev_rv_t vnet_dev_process_call_port_op (vlib_main_t *, vnet_dev_port_t *, + vnet_dev_port_op_t *); +vnet_dev_rv_t vnet_dev_process_call_port_op_no_rv (vlib_main_t *vm, + vnet_dev_port_t *, + vnet_dev_port_op_no_rv_t *); +void vnet_dev_process_call_port_op_no_wait (vlib_main_t *, vnet_dev_port_t *, + vnet_dev_port_op_no_rv_t *); +vnet_dev_rv_t +vnet_dev_process_port_cfg_change_req (vlib_main_t *, vnet_dev_port_t *, + vnet_dev_port_cfg_change_req_t *); +void vnet_dev_process_quit (vlib_main_t *, vnet_dev_t *); +void vnet_dev_poll_dev_add (vlib_main_t *, vnet_dev_t *, f64, + vnet_dev_op_no_rv_t *); +void vnet_dev_poll_dev_remove (vlib_main_t *, vnet_dev_t *, + vnet_dev_op_no_rv_t *); +void vnet_dev_poll_port_add (vlib_main_t *, vnet_dev_port_t *, f64, + vnet_dev_port_op_no_rv_t *); +void vnet_dev_poll_port_remove (vlib_main_t *, vnet_dev_port_t *, + vnet_dev_port_op_no_rv_t *); + +typedef struct +{ + u16 thread_index; + u8 completed; + u8 in_order; + vnet_dev_port_t *port; +} vnet_dev_rt_op_t; + +vnet_dev_rv_t vnet_dev_rt_exec_ops (vlib_main_t *, vnet_dev_t *, + vnet_dev_rt_op_t *, u32); + +/* format.c */ +typedef struct +{ + u8 counters : 1; + u8 show_zero_counters : 1; + u8 debug : 1; +} vnet_dev_format_args_t; + +format_function_t format_vnet_dev_addr; +format_function_t format_vnet_dev_flags; +format_function_t format_vnet_dev_hw_addr; +format_function_t format_vnet_dev_info; +format_function_t format_vnet_dev_interface_info; +format_function_t format_vnet_dev_interface_name; +format_function_t format_vnet_dev_log; +format_function_t format_vnet_dev_port_caps; +format_function_t format_vnet_dev_port_flags; +format_function_t format_vnet_dev_port_info; +format_function_t format_vnet_dev_port_rx_offloads; +format_function_t format_vnet_dev_port_tx_offloads; +format_function_t format_vnet_dev_rv; +format_function_t format_vnet_dev_rx_queue_info; +format_function_t format_vnet_dev_tx_queue_info; +format_function_t format_vnet_dev_flow; +unformat_function_t unformat_vnet_dev_flags; +unformat_function_t unformat_vnet_dev_port_flags; + +typedef struct +{ + vnet_dev_rx_queue_t *first_rx_queue; +} vnet_dev_rx_node_runtime_t; + +STATIC_ASSERT (sizeof (vnet_dev_rx_node_runtime_t) <= + VLIB_NODE_RUNTIME_DATA_SIZE, + "must fit into runtime data"); + +#define foreach_vnet_dev_port_rx_next \ + _ (ETH_INPUT, "ethernet-input") \ + _ (DROP, "error-drop") + +typedef enum +{ +#define _(n, s) VNET_DEV_ETH_RX_PORT_NEXT_##n, + foreach_vnet_dev_port_rx_next +#undef _ + VNET_DEV_ETH_RX_PORT_N_NEXTS +} vnet_dev_eth_port_rx_next_t; + +extern u16 vnet_dev_default_next_index_by_port_type[]; +extern vlib_node_registration_t port_rx_eth_node; + +typedef vnet_interface_output_runtime_t vnet_dev_tx_node_runtime_t; + +STATIC_ASSERT (sizeof (vnet_dev_tx_node_runtime_t) <= + VLIB_NODE_RUNTIME_DATA_SIZE, + "must fit into runtime data"); + +#define VNET_DEV_REGISTER_BUS(x, ...) \ + __VA_ARGS__ vnet_dev_bus_registration_t __vnet_dev_bus_registration_##x; \ + static void __clib_constructor __vnet_dev_bus_registration_fn_##x (void) \ + { \ + vnet_dev_main_t *dm = &vnet_dev_main; \ + __vnet_dev_bus_registration_##x.next_registration = \ + dm->bus_registrations; \ + dm->bus_registrations = &__vnet_dev_bus_registration_##x; \ + } \ + __VA_ARGS__ vnet_dev_bus_registration_t __vnet_dev_bus_registration_##x + +#define VNET_DEV_REGISTER_DRIVER(x, ...) \ + __VA_ARGS__ vnet_dev_driver_registration_t \ + __vnet_dev_driver_registration_##x; \ + static void __clib_constructor __vnet_dev_driver_registration_fn_##x (void) \ + { \ + vnet_dev_main_t *dm = &vnet_dev_main; \ + __vnet_dev_driver_registration_##x.next_registration = \ + dm->driver_registrations; \ + dm->driver_registrations = &__vnet_dev_driver_registration_##x; \ + } \ + __VA_ARGS__ vnet_dev_driver_registration_t __vnet_dev_driver_registration_##x + +#define VNET_DEV_NODE_FN(node) \ + uword CLIB_MARCH_SFX (node##_fn) (vlib_main_t *, vlib_node_runtime_t *, \ + vlib_frame_t *); \ + static vlib_node_fn_registration_t CLIB_MARCH_SFX ( \ + node##_fn_registration) = { \ + .function = &CLIB_MARCH_SFX (node##_fn), \ + }; \ + \ + static void __clib_constructor CLIB_MARCH_SFX ( \ + node##_fn_multiarch_register) (void) \ + { \ + extern vnet_dev_node_t node; \ + vlib_node_fn_registration_t *r; \ + r = &CLIB_MARCH_SFX (node##_fn_registration); \ + r->march_variant = CLIB_MARCH_SFX (CLIB_MARCH_VARIANT_TYPE); \ + r->next_registration = (node).registrations; \ + (node).registrations = r; \ + } \ + uword CLIB_MARCH_SFX (node##_fn) + +#define foreach_vnet_dev_port(p, d) pool_foreach_pointer (p, d->ports) +#define foreach_vnet_dev_port_rx_queue(q, p) \ + pool_foreach_pointer (q, p->rx_queues) +#define foreach_vnet_dev_port_tx_queue(q, p) \ + pool_foreach_pointer (q, p->tx_queues) + +#include <vnet/dev/dev_funcs.h> + +#endif /* _VNET_DEV_H_ */ diff --git a/src/vnet/dev/dev_api.c b/src/vnet/dev/dev_api.c new file mode 100644 index 00000000000..5e9ac502b5d --- /dev/null +++ b/src/vnet/dev/dev_api.c @@ -0,0 +1,192 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2023 Cisco Systems, Inc. + */ + +#include <vnet/vnet.h> +#include <vnet/dev/dev.h> +#include <vnet/dev/api.h> + +#include <vlibapi/api.h> +#include <vlibmemory/api.h> + +/* define message IDs */ +#include <dev/dev.api_enum.h> +#include <dev/dev.api_types.h> + +static u16 vnet_dev_api_msg_id_base; + +#define REPLY_MSG_ID_BASE (vnet_dev_api_msg_id_base) +#include <vlibapi/api_helper_macros.h> + +#define _(b, n, d) \ + STATIC_ASSERT ((int) VL_API_DEV_FLAG_##n == (int) VNET_DEV_F_##n, ""); +foreach_vnet_dev_flag; +#undef _ + +#define _(b, n, d) \ + STATIC_ASSERT ((int) VL_API_DEV_PORT_FLAG_##n == (int) VNET_DEV_PORT_F_##n, \ + ""); +foreach_vnet_dev_port_flag; +#undef _ + +static void +vl_api_dev_attach_t_handler (vl_api_dev_attach_t *mp) +{ + vlib_main_t *vm = vlib_get_main (); + vl_api_dev_attach_reply_t *rmp; + vnet_dev_api_attach_args_t a = {}; + vnet_dev_rv_t rv; + u8 *error_string = 0; + + STATIC_ASSERT (sizeof (mp->device_id) == sizeof (a.device_id), ""); + STATIC_ASSERT (sizeof (mp->driver_name) == sizeof (a.driver_name), ""); + STATIC_ASSERT (sizeof (mp->flags) == sizeof (a.flags), ""); + + a.flags.n = mp->flags; + strncpy (a.device_id, (char *) mp->device_id, sizeof (a.device_id)); + strncpy (a.driver_name, (char *) mp->driver_name, sizeof (a.driver_name)); + vec_add (a.args, mp->args.buf, mp->args.length); + + rv = vnet_dev_api_attach (vm, &a); + + if (rv != VNET_DEV_OK) + error_string = format (0, "%U", format_vnet_dev_rv, rv); + + vec_free (a.args); + + REPLY_MACRO3_END (VL_API_DEV_ATTACH_REPLY, vec_len (error_string), ({ + rmp->retval = rv; + if (error_string) + { + rmp->dev_index = ~0; + vl_api_vec_to_api_string (error_string, + &rmp->error_string); + } + else + rmp->dev_index = a.dev_index; + })); + + vec_free (a.args); + vec_free (error_string); +} + +static void +vl_api_dev_detach_t_handler (vl_api_dev_detach_t *mp) +{ + vlib_main_t *vm = vlib_get_main (); + vl_api_dev_detach_reply_t *rmp; + vnet_dev_api_detach_args_t a = {}; + vnet_dev_rv_t rv; + u8 *error_string = 0; + + a.dev_index = mp->dev_index; + + rv = vnet_dev_api_detach (vm, &a); + + if (rv != VNET_DEV_OK) + error_string = format (0, "%U", format_vnet_dev_rv, rv); + + REPLY_MACRO3_END (VL_API_DEV_DETACH_REPLY, vec_len (error_string), ({ + rmp->retval = rv; + if (error_string) + vl_api_vec_to_api_string (error_string, + &rmp->error_string); + })); + + vec_free (error_string); +} + +static void +vl_api_dev_create_port_if_t_handler (vl_api_dev_create_port_if_t *mp) +{ + vlib_main_t *vm = vlib_get_main (); + vl_api_dev_create_port_if_reply_t *rmp; + vnet_dev_api_create_port_if_args_t a = {}; + vnet_dev_rv_t rv; + u8 *error_string = 0; + + STATIC_ASSERT (sizeof (mp->intf_name) == sizeof (a.intf_name), ""); + STATIC_ASSERT (sizeof (mp->flags) == sizeof (a.flags), ""); + + a.flags.n = mp->flags; +#define _(n) a.n = mp->n; + _ (dev_index) + _ (port_id) + _ (num_rx_queues) + _ (num_tx_queues) + _ (rx_queue_size) + _ (tx_queue_size) +#undef _ + + strncpy (a.intf_name, (char *) mp->intf_name, sizeof (a.intf_name)); + vec_add (a.args, mp->args.buf, mp->args.length); + + rv = vnet_dev_api_create_port_if (vm, &a); + + if (rv != VNET_DEV_OK) + error_string = format (0, "%U", format_vnet_dev_rv, rv); + + vec_free (a.args); + + REPLY_MACRO3_END (VL_API_DEV_CREATE_PORT_IF_REPLY, vec_len (error_string), ({ + rmp->retval = rv; + if (error_string) + { + rmp->sw_if_index = ~0; + vl_api_vec_to_api_string (error_string, + &rmp->error_string); + } + else + rmp->sw_if_index = a.sw_if_index; + })); + + vec_free (a.args); + vec_free (error_string); +} + +static void +vl_api_dev_remove_port_if_t_handler (vl_api_dev_remove_port_if_t *mp) +{ + vlib_main_t *vm = vlib_get_main (); + vl_api_dev_remove_port_if_reply_t *rmp; + vnet_dev_api_remove_port_if_args_t a = {}; + vnet_dev_rv_t rv; + u8 *error_string = 0; + + a.sw_if_index = mp->sw_if_index; + + rv = vnet_dev_api_remove_port_if (vm, &a); + + if (rv != VNET_DEV_OK) + error_string = format (0, "%U", format_vnet_dev_rv, rv); + + REPLY_MACRO3_END (VL_API_DEV_REMOVE_PORT_IF_REPLY, vec_len (error_string), ({ + rmp->retval = rv; + if (error_string) + vl_api_vec_to_api_string (error_string, + &rmp->error_string); + })); + + vec_free (error_string); +} + +/* set tup the API message handling tables */ + +#include <dev/dev.api.c> + +static clib_error_t * +vnet_dev_api_hookup (vlib_main_t *vm) +{ + api_main_t *am = vlibapi_get_main (); + + /* ask for a correctly-sized block of API message decode slots */ + vnet_dev_api_msg_id_base = setup_message_id_table (); + + foreach_int (i, VL_API_DEV_ATTACH, VL_API_DEV_DETACH, + VL_API_DEV_CREATE_PORT_IF, VL_API_DEV_REMOVE_PORT_IF) + vl_api_set_msg_thread_safe (am, vnet_dev_api_msg_id_base + i, 1); + + return 0; +} + +VLIB_API_INIT_FUNCTION (vnet_dev_api_hookup); diff --git a/src/vnet/dev/dev_funcs.h b/src/vnet/dev/dev_funcs.h new file mode 100644 index 00000000000..521157abbec --- /dev/null +++ b/src/vnet/dev/dev_funcs.h @@ -0,0 +1,332 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2023 Cisco Systems, Inc. + */ + +#ifndef _VNET_DEV_FUNCS_H_ +#define _VNET_DEV_FUNCS_H_ + +#include <vppinfra/clib.h> +#include <vnet/dev/dev.h> + +static_always_inline void * +vnet_dev_get_data (vnet_dev_t *dev) +{ + return dev->data; +} + +static_always_inline vnet_dev_t * +vnet_dev_from_data (void *p) +{ + return (void *) ((u8 *) p - STRUCT_OFFSET_OF (vnet_dev_t, data)); +} + +static_always_inline void * +vnet_dev_get_port_data (vnet_dev_port_t *port) +{ + return port->data; +} + +static_always_inline void * +vnet_dev_get_rx_queue_data (vnet_dev_rx_queue_t *rxq) +{ + return rxq->data; +} + +static_always_inline void * +vnet_dev_get_tx_queue_data (vnet_dev_tx_queue_t *txq) +{ + return txq->data; +} + +static_always_inline vnet_dev_t * +vnet_dev_get_by_index (u32 index) +{ + vnet_dev_main_t *dm = &vnet_dev_main; + return pool_elt_at_index (dm->devices, index)[0]; +} + +static_always_inline vnet_dev_port_t * +vnet_dev_get_port_by_index (vnet_dev_t *dev, u32 index) +{ + return pool_elt_at_index (dev->ports, index)[0]; +} + +static_always_inline vnet_dev_port_t * +vnet_dev_get_port_from_dev_instance (u32 dev_instance) +{ + vnet_dev_main_t *dm = &vnet_dev_main; + if (pool_is_free_index (dm->ports_by_dev_instance, dev_instance)) + return 0; + return pool_elt_at_index (dm->ports_by_dev_instance, dev_instance)[0]; +} + +static_always_inline vnet_dev_port_t * +vnet_dev_get_port_from_hw_if_index (u32 hw_if_index) +{ + vnet_hw_interface_t *hw; + vnet_dev_port_t *port; + hw = vnet_get_hw_interface (vnet_get_main (), hw_if_index); + port = vnet_dev_get_port_from_dev_instance (hw->dev_instance); + + if (!port || port->intf.hw_if_index != hw_if_index) + return 0; + + return port; +} + +static_always_inline vnet_dev_t * +vnet_dev_by_index (u32 index) +{ + vnet_dev_main_t *dm = &vnet_dev_main; + if (pool_is_free_index (dm->devices, index)) + return 0; + + return *pool_elt_at_index (dm->devices, index); +} + +static_always_inline vnet_dev_t * +vnet_dev_by_id (char *id) +{ + vnet_dev_main_t *dm = &vnet_dev_main; + uword *p = hash_get (dm->device_index_by_id, id); + if (p) + return *pool_elt_at_index (dm->devices, p[0]); + return 0; +} + +static_always_inline uword +vnet_dev_get_dma_addr (vlib_main_t *vm, vnet_dev_t *dev, void *p) +{ + return dev->va_dma ? pointer_to_uword (p) : vlib_physmem_get_pa (vm, p); +} + +static_always_inline void * +vnet_dev_get_bus_data (vnet_dev_t *dev) +{ + return (void *) dev->bus_data; +} + +static_always_inline vnet_dev_bus_t * +vnet_dev_get_bus (vnet_dev_t *dev) +{ + vnet_dev_main_t *dm = &vnet_dev_main; + return pool_elt_at_index (dm->buses, dev->bus_index); +} + +static_always_inline void +vnet_dev_validate (vlib_main_t *vm, vnet_dev_t *dev) +{ + ASSERT (dev->process_node_index == vlib_get_current_process_node_index (vm)); + ASSERT (vm->thread_index == 0); +} + +static_always_inline void +vnet_dev_port_validate (vlib_main_t *vm, vnet_dev_port_t *port) +{ + ASSERT (port->dev->process_node_index == + vlib_get_current_process_node_index (vm)); + ASSERT (vm->thread_index == 0); +} + +static_always_inline u32 +vnet_dev_port_get_sw_if_index (vnet_dev_port_t *port) +{ + return port->intf.sw_if_index; +} + +static_always_inline vnet_dev_port_t * +vnet_dev_get_port_by_id (vnet_dev_t *dev, vnet_dev_port_id_t port_id) +{ + foreach_vnet_dev_port (p, dev) + if (p->port_id == port_id) + return p; + return 0; +} + +static_always_inline vnet_dev_rx_queue_t * +vnet_dev_port_get_rx_queue_by_id (vnet_dev_port_t *port, + vnet_dev_queue_id_t queue_id) +{ + foreach_vnet_dev_port_rx_queue (q, port) + if (q->queue_id == queue_id) + return q; + return 0; +} + +static_always_inline vnet_dev_tx_queue_t * +vnet_dev_port_get_tx_queue_by_id (vnet_dev_port_t *port, + vnet_dev_queue_id_t queue_id) +{ + foreach_vnet_dev_port_tx_queue (q, port) + if (q->queue_id == queue_id) + return q; + return 0; +} + +static_always_inline void * +vnet_dev_alloc_with_data (u32 sz, u32 data_sz) +{ + void *p; + sz += data_sz; + sz = round_pow2 (sz, CLIB_CACHE_LINE_BYTES); + p = clib_mem_alloc_aligned (sz, CLIB_CACHE_LINE_BYTES); + clib_memset (p, 0, sz); + return p; +} + +static_always_inline void +vnet_dev_tx_queue_lock_if_needed (vnet_dev_tx_queue_t *txq) +{ + u8 free = 0; + + if (!txq->lock_needed) + return; + + while (!__atomic_compare_exchange_n (&txq->lock, &free, 1, 0, + __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) + { + while (__atomic_load_n (&txq->lock, __ATOMIC_RELAXED)) + CLIB_PAUSE (); + free = 0; + } +} + +static_always_inline void +vnet_dev_tx_queue_unlock_if_needed (vnet_dev_tx_queue_t *txq) +{ + if (!txq->lock_needed) + return; + __atomic_store_n (&txq->lock, 0, __ATOMIC_RELEASE); +} + +static_always_inline u8 +vnet_dev_get_rx_queue_buffer_pool_index (vnet_dev_rx_queue_t *rxq) +{ + return rxq->buffer_template.buffer_pool_index; +} + +static_always_inline u32 +vnet_dev_get_rx_queue_buffer_data_size (vlib_main_t *vm, + vnet_dev_rx_queue_t *rxq) +{ + u8 bpi = vnet_dev_get_rx_queue_buffer_pool_index (rxq); + return vlib_get_buffer_pool (vm, bpi)->data_size; +} + +static_always_inline void +vnet_dev_rx_queue_rt_request (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq, + vnet_dev_rx_queue_rt_req_t req) +{ + __atomic_fetch_or (&rxq->runtime_request.as_number, req.as_number, + __ATOMIC_RELEASE); +} + +static_always_inline vnet_dev_rx_node_runtime_t * +vnet_dev_get_rx_node_runtime (vlib_node_runtime_t *node) +{ + return (void *) node->runtime_data; +} + +static_always_inline vnet_dev_tx_node_runtime_t * +vnet_dev_get_tx_node_runtime (vlib_node_runtime_t *node) +{ + return (void *) node->runtime_data; +} + +static_always_inline vnet_dev_rx_queue_t * +foreach_vnet_dev_rx_queue_runtime_helper (vlib_node_runtime_t *node, + vnet_dev_rx_queue_t *rxq) +{ + vnet_dev_port_t *port; + vnet_dev_rx_queue_rt_req_t req; + + if (rxq == 0) + rxq = vnet_dev_get_rx_node_runtime (node)->first_rx_queue; + else + next: + rxq = rxq->next_on_thread; + + if (PREDICT_FALSE (rxq == 0)) + return 0; + + if (PREDICT_TRUE (rxq->runtime_request.as_number == 0)) + return rxq; + + req.as_number = + __atomic_exchange_n (&rxq->runtime_request.as_number, 0, __ATOMIC_ACQUIRE); + + port = rxq->port; + if (req.update_next_index) + rxq->next_index = port->intf.rx_next_index; + + if (req.update_feature_arc) + { + vlib_buffer_template_t *bt = &rxq->buffer_template; + bt->current_config_index = port->intf.current_config_index; + vnet_buffer (bt)->feature_arc_index = port->intf.feature_arc_index; + } + + if (req.suspend_on) + { + rxq->suspended = 1; + goto next; + } + + if (req.suspend_off) + rxq->suspended = 0; + + return rxq; +} + +#define foreach_vnet_dev_rx_queue_runtime(q, node) \ + for (vnet_dev_rx_queue_t * (q) = \ + foreach_vnet_dev_rx_queue_runtime_helper (node, 0); \ + q; (q) = foreach_vnet_dev_rx_queue_runtime_helper (node, q)) + +static_always_inline void * +vnet_dev_get_rt_temp_space (vlib_main_t *vm) +{ + return vnet_dev_main.runtime_temp_spaces + + ((uword) vm->thread_index + << vnet_dev_main.log2_runtime_temp_space_sz); +} + +static_always_inline void +vnet_dev_set_hw_addr_eth_mac (vnet_dev_hw_addr_t *addr, const u8 *eth_mac_addr) +{ + vnet_dev_hw_addr_t ha = {}; + clib_memcpy_fast (&ha.eth_mac, eth_mac_addr, sizeof (ha.eth_mac)); + *addr = ha; +} + +static_always_inline vnet_dev_arg_t * +vnet_dev_get_port_arg_by_id (vnet_dev_port_t *port, u32 id) +{ + foreach_vnet_dev_port_args (a, port) + if (a->id == id) + return a; + return 0; +} + +static_always_inline int +vnet_dev_arg_get_bool (vnet_dev_arg_t *arg) +{ + ASSERT (arg->type == VNET_DEV_ARG_TYPE_BOOL); + return arg->val_set ? arg->val.boolean : arg->default_val.boolean; +} + +static_always_inline u32 +vnet_dev_arg_get_uint32 (vnet_dev_arg_t *arg) +{ + ASSERT (arg->type == VNET_DEV_ARG_TYPE_UINT32); + return arg->val_set ? arg->val.uint32 : arg->default_val.uint32; +} + +static_always_inline u8 * +vnet_dev_arg_get_string (vnet_dev_arg_t *arg) +{ + ASSERT (arg->type == VNET_DEV_ARG_TYPE_STRING); + return arg->val_set ? arg->val.string : arg->default_val.string; +} + +#endif /* _VNET_DEV_FUNCS_H_ */ diff --git a/src/vnet/dev/error.c b/src/vnet/dev/error.c new file mode 100644 index 00000000000..4e057010af0 --- /dev/null +++ b/src/vnet/dev/error.c @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2023 Cisco Systems, Inc. + */ + +#include <vnet/vnet.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/dev/dev.h> +#include <vnet/dev/counters.h> +#include <vnet/flow/flow.h> + +clib_error_t * +vnet_dev_port_err (vlib_main_t *vm, vnet_dev_port_t *port, vnet_dev_rv_t rv, + char *fmt, ...) +{ + clib_error_t *err; + va_list va; + u8 *s; + + if (rv == VNET_DEV_OK) + return 0; + + va_start (va, fmt); + s = va_format (0, fmt, &va); + va_end (va); + + err = clib_error_return (0, "%s port %u: %U (%v)", port->dev->device_id, + port->port_id, format_vnet_dev_rv, rv, s); + vec_free (s); + return err; +} + +int +vnet_dev_flow_err (vlib_main_t *vm, vnet_dev_rv_t rv) +{ + if (rv == VNET_DEV_OK) + return 0; + + switch (rv) + { + /* clang-format off */ +#define _(n, e, s) \ + case VNET_DEV_ERR_##e: \ + return VNET_FLOW_ERROR_##e; + foreach_flow_error; +#undef _ + /* clang-format on */ + default: + ASSERT (0); + } + + ASSERT (0); + + return 0; +} diff --git a/src/vnet/dev/errors.h b/src/vnet/dev/errors.h new file mode 100644 index 00000000000..430a6aef282 --- /dev/null +++ b/src/vnet/dev/errors.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2023 Cisco Systems, Inc. + */ + +#ifndef _VNET_DEV_ERRORS_H_ +#define _VNET_DEV_ERRORS_H_ + +#define foreach_vnet_dev_rv_type \ + _ (ALREADY_EXISTS, "already exists") \ + _ (ALREADY_IN_USE, "already in use") \ + _ (BUFFER_ALLOC_FAIL, "packet buffer allocation failure") \ + _ (BUG, "bug") \ + _ (BUS, "bus error") \ + _ (DEVICE_NO_REPLY, "no reply from device") \ + _ (DMA_MEM_ALLOC_FAIL, "DMA memory allocation error") \ + _ (DRIVER_NOT_AVAILABLE, "driver not available") \ + _ (INVALID_ARG, "invalid argument") \ + _ (INVALID_BUS, "invalid bus") \ + _ (INVALID_DATA, "invalid data") \ + _ (INVALID_DEVICE_ID, "invalid device id") \ + _ (INVALID_NUM_RX_QUEUES, "invalid number of rx queues") \ + _ (INVALID_NUM_TX_QUEUES, "invalid number of tx queues") \ + _ (INVALID_PORT_ID, "invalid port id") \ + _ (INVALID_RX_QUEUE_SIZE, "invalid rx queue size") \ + _ (INVALID_TX_QUEUE_SIZE, "invalid tx queue size") \ + _ (INVALID_VALUE, "invalid value") \ + _ (INTERNAL, "internal error") \ + _ (NOT_FOUND, "not found") \ + _ (NOT_READY, "not ready") \ + _ (NOT_SUPPORTED, "not supported") \ + _ (NO_CHANGE, "no change") \ + _ (NO_AVAIL_QUEUES, "no queues available") \ + _ (NO_SUCH_ENTRY, "no such enty") \ + _ (PORT_STARTED, "port started") \ + _ (PROCESS_REPLY, "dev process reply error") \ + _ (RESOURCE_NOT_AVAILABLE, "resource not available") \ + _ (TIMEOUT, "timeout") \ + _ (UNKNOWN_DEVICE, "unknown device") \ + _ (UNKNOWN_INTERFACE, "unknown interface") \ + _ (UNSUPPORTED_CONFIG, "unsupported config") \ + _ (UNSUPPORTED_DEVICE, "unsupported device") \ + _ (UNSUPPORTED_DEVICE_VER, "unsupported device version") \ + _ (ALREADY_DONE, "already done") \ + _ (NO_SUCH_INTERFACE, "no such interface") + +#endif /* _VNET_DEV_ERRORS_H_ */ diff --git a/src/vnet/dev/format.c b/src/vnet/dev/format.c new file mode 100644 index 00000000000..ed83a0eba95 --- /dev/null +++ b/src/vnet/dev/format.c @@ -0,0 +1,507 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2023 Cisco Systems, Inc. + */ + +#include <vnet/vnet.h> +#include <vnet/dev/dev.h> +#include <vnet/dev/counters.h> +#include <vnet/ethernet/ethernet.h> + +u8 * +format_vnet_dev_rv (u8 *s, va_list *args) +{ + vnet_dev_rv_t rv = va_arg (*args, vnet_dev_rv_t); + u32 index = -rv; + + char *strings[] = { [0] = "OK", +#define _(n, d) [-VNET_DEV_ERR_##n] = d, + foreach_vnet_dev_rv_type +#undef _ + }; + + if (index >= ARRAY_LEN (strings)) + return format (s, "unknown return value (%d)", rv); + return format (s, "%s", strings[index]); +} + +u8 * +format_vnet_dev_addr (u8 *s, va_list *args) +{ + vnet_dev_main_t *dm = &vnet_dev_main; + vnet_dev_t *dev = va_arg (*args, vnet_dev_t *); + vnet_dev_bus_t *bus; + + if (dev == 0) + return 0; + + bus = pool_elt_at_index (dm->buses, dev->bus_index); + s = format (s, "%U", bus->ops.format_device_addr, dev); + + return s; +} + +u8 * +format_vnet_dev_interface_name (u8 *s, va_list *args) +{ + u32 i = va_arg (*args, u32); + vnet_dev_port_t *port = vnet_dev_get_port_from_dev_instance (i); + + return format (s, "%s", port->intf.name); +} + +u8 * +format_vnet_dev_info (u8 *s, va_list *args) +{ + vnet_dev_format_args_t *a = va_arg (*args, vnet_dev_format_args_t *); + vlib_main_t *vm = vlib_get_main (); + vnet_dev_main_t *dm = &vnet_dev_main; + vnet_dev_t *dev = va_arg (*args, vnet_dev_t *); + vnet_dev_driver_t *dr = pool_elt_at_index (dm->drivers, dev->driver_index); + vnet_dev_bus_t *bus = pool_elt_at_index (dm->buses, dev->bus_index); + + u32 indent = format_get_indent (s); + s = format (s, "Driver is '%s', bus is '%s'", dr->registration->name, + bus->registration->name); + + if (dev->description) + s = format (s, ", description is '%v'", dev->description); + + if (bus->ops.format_device_info) + s = format (s, "\n%U%U", format_white_space, indent, + bus->ops.format_device_info, a, dev); + + s = format (s, "\n%UAssigned process node is '%U'", format_white_space, + indent, format_vlib_node_name, vm, dev->process_node_index); + if (dev->args) + s = format (s, "\n%UDevice Specific Arguments:\n%U%U", format_white_space, + indent, format_white_space, indent + 2, format_vnet_dev_args, + dev->args); + if (dev->ops.format_info) + s = + format (s, "\n%UDevice Specific Info:\n%U%U", format_white_space, indent, + format_white_space, indent + 2, dev->ops.format_info, a, dev); + return s; +} + +u8 * +format_vnet_dev_hw_addr (u8 *s, va_list *args) +{ + vnet_dev_hw_addr_t *addr = va_arg (*args, vnet_dev_hw_addr_t *); + return format (s, "%U", format_ethernet_address, addr->eth_mac); +} + +u8 * +format_vnet_dev_port_info (u8 *s, va_list *args) +{ + vnet_dev_format_args_t *a = va_arg (*args, vnet_dev_format_args_t *); + vlib_main_t *vm = vlib_get_main (); + vnet_main_t *vnm = vnet_get_main (); + vnet_dev_port_t *port = va_arg (*args, vnet_dev_port_t *); + + u32 indent = format_get_indent (s); + + s = format (s, "Hardware Address is %U", format_vnet_dev_hw_addr, + &port->attr.hw_addr); + s = format (s, ", %u RX queues (max %u), %u TX queues (max %u)", + pool_elts (port->rx_queues), port->attr.max_rx_queues, + pool_elts (port->tx_queues), port->attr.max_tx_queues); + if (pool_elts (port->secondary_hw_addr)) + { + u32 i = 0; + vnet_dev_hw_addr_t *a; + s = format (s, "\n%USecondary Hardware Address%s:", format_white_space, + indent, + pool_elts (port->secondary_hw_addr) > 1 ? "es are" : " is"); + pool_foreach (a, port->secondary_hw_addr) + { + if (i++ % 6 == 0) + s = format (s, "\n%U", format_white_space, indent + 1); + s = format (s, " %U", format_vnet_dev_hw_addr, a); + } + } + s = format (s, "\n%UMax RX frame size is %u (max supported %u)", + format_white_space, indent, port->max_rx_frame_size, + port->attr.max_supported_rx_frame_size); + s = format (s, "\n%UCaps: %U", format_white_space, indent, + format_vnet_dev_port_caps, &port->attr.caps); + s = format (s, "\n%URX Offloads: %U", format_white_space, indent, + format_vnet_dev_port_rx_offloads, &port->attr.rx_offloads); + s = format (s, "\n%UTX Offloads: %U", format_white_space, indent, + format_vnet_dev_port_tx_offloads, &port->attr.tx_offloads); + if (port->port_ops.format_status) + s = format (s, "\n%UDevice Specific Port Status:\n%U%U", + format_white_space, indent, format_white_space, indent + 2, + port->port_ops.format_status, a, port); + if (port->args) + s = format (s, "\n%UDevice Specific Port Arguments:\n%U%U", + format_white_space, indent, format_white_space, indent + 2, + format_vnet_dev_args, port->args); + + s = format (s, "\n%UInterface ", format_white_space, indent); + if (port->interface_created) + { + s = format (s, "assigned, interface name is '%U', RX node is '%U'", + format_vnet_sw_if_index_name, vnm, port->intf.sw_if_index, + format_vlib_node_name, vm, port->intf.rx_node_index); + } + else + s = format (s, "not assigned"); + return s; +} + +u8 * +format_vnet_dev_rx_queue_info (u8 *s, va_list *args) +{ + vnet_dev_format_args_t __clib_unused *a = + va_arg (*args, vnet_dev_format_args_t *); + vnet_dev_rx_queue_t *rxq = va_arg (*args, vnet_dev_rx_queue_t *); + u32 indent = format_get_indent (s); + + s = format (s, "Size is %u, buffer pool index is %u", rxq->size, + vnet_dev_get_rx_queue_buffer_pool_index (rxq)); + s = format (s, "\n%UPolling thread is %u, %sabled, %sstarted, %s mode", + format_white_space, indent, rxq->rx_thread_index, + rxq->enabled ? "en" : "dis", rxq->started ? "" : "not-", + rxq->interrupt_mode ? "interrupt" : "polling"); + if (rxq->port->rx_queue_ops.format_info) + s = format (s, "\n%U%U", format_white_space, indent, + rxq->port->rx_queue_ops.format_info, a, rxq); + + return s; +} + +u8 * +format_vnet_dev_tx_queue_info (u8 *s, va_list *args) +{ + vnet_dev_format_args_t __clib_unused *a = + va_arg (*args, vnet_dev_format_args_t *); + vnet_dev_tx_queue_t *txq = va_arg (*args, vnet_dev_tx_queue_t *); + u32 indent = format_get_indent (s); + u32 n; + + s = format (s, "Size is %u", txq->size); + s = format (s, "\n%U", format_white_space, indent); + n = clib_bitmap_count_set_bits (txq->assigned_threads); + if (n == 0) + s = format (s, "Not used by any thread"); + else + s = format (s, "Used by thread%s %U", n > 1 ? "s" : "", format_bitmap_list, + txq->assigned_threads); + if (txq->port->tx_queue_ops.format_info) + s = format (s, "\n%U%U", format_white_space, indent, + txq->port->tx_queue_ops.format_info, a, txq); + + return s; +} + +u8 * +format_vnet_dev_interface_info (u8 *s, va_list *args) +{ + u32 i = va_arg (*args, u32); + vnet_dev_format_args_t fa = {}, *a = &fa; + vnet_dev_port_t *port = vnet_dev_get_port_from_dev_instance (i); + vnet_dev_t *dev = port->dev; + u32 indent = format_get_indent (s); + + s = format (s, "Device:"); + s = format (s, "\n%U%U", format_white_space, indent + 2, + format_vnet_dev_info, a, dev); + + s = format (s, "\n%UPort %u:", format_white_space, indent, port->port_id); + s = format (s, "\n%U%U", format_white_space, indent + 2, + format_vnet_dev_port_info, a, port); + + foreach_vnet_dev_port_rx_queue (q, port) + { + s = format (s, "\n%URX queue %u:", format_white_space, indent + 2, + q->queue_id); + s = format (s, "\n%U%U", format_white_space, indent + 4, + format_vnet_dev_rx_queue_info, a, q); + } + + foreach_vnet_dev_port_tx_queue (q, port) + { + s = format (s, "\n%UTX queue %u:", format_white_space, indent + 2, + q->queue_id); + s = format (s, "\n%U%U", format_white_space, indent + 4, + format_vnet_dev_tx_queue_info, a, q); + } + return s; +} + +static u64 +unformat_flags (unformat_input_t *input, char *names[], u64 val[], u32 n_flags) +{ + u64 rv = 0; + uword c = 0; + u8 *s = 0; + + while ((c = unformat_get_input (input)) != UNFORMAT_END_OF_INPUT) + { + switch (c) + { + case 'a' ... 'z': + c -= 'a' - 'A'; + case '0' ... '9': + case 'A' ... 'Z': + vec_add1 (s, c); + break; + case '-': + vec_add1 (s, '_'); + break; + case ',': + vec_add1 (s, 0); + break; + default: + goto end_of_string; + } + } +end_of_string: + + if (s == 0) + return 0; + + vec_add1 (s, 0); + + for (u8 *p = s, *end = vec_end (s); p < end; p += strlen ((char *) p) + 1) + { + for (c = 0; c < n_flags; c++) + if (strcmp (names[c], (char *) p) == 0) + { + rv |= val[c]; + break; + } + if (c == n_flags) + goto done; + } + +done: + vec_free (s); + return rv; +} + +uword +unformat_vnet_dev_flags (unformat_input_t *input, va_list *args) +{ + vnet_dev_flags_t *fp = va_arg (*args, vnet_dev_flags_t *); + u64 val; + + char *names[] = { +#define _(b, n, d) #n, + foreach_vnet_dev_flag +#undef _ + }; + u64 vals[] = { +#define _(b, n, d) 1ull << (b) + foreach_vnet_dev_flag +#undef _ + }; + + val = unformat_flags (input, names, vals, ARRAY_LEN (names)); + + if (!val) + return 0; + + fp->n = val; + return 1; +} + +uword +unformat_vnet_dev_port_flags (unformat_input_t *input, va_list *args) +{ + vnet_dev_port_flags_t *fp = va_arg (*args, vnet_dev_port_flags_t *); + u64 val; + + char *flag_names[] = { +#define _(b, n, d) #n, + foreach_vnet_dev_port_flag +#undef _ + }; + u64 flag_values[] = { +#define _(b, n, d) 1ull << (b) + foreach_vnet_dev_port_flag +#undef _ + }; + + val = + unformat_flags (input, flag_names, flag_values, ARRAY_LEN (flag_names)); + + if (!val) + return 0; + + fp->n = val; + return 1; +} + +static u8 * +format_flags (u8 *s, u64 val, char *flag_names[], u64 flag_values[], + u32 n_flags) +{ + u32 n = 0; + for (int i = 0; i < n_flags; i++) + { + if ((val & flag_values[i]) == 0) + continue; + + if (n++) + vec_add1 (s, ' '); + + for (char *c = flag_names[i]; c[0] != 0; c++) + { + switch (c[0]) + { + case 'A' ... 'Z': + vec_add1 (s, c[0] + 'a' - 'A'); + break; + case '_': + vec_add1 (s, '-'); + break; + default: + vec_add1 (s, c[0]); + } + } + } + + return s; +} + +u8 * +format_vnet_dev_flags (u8 *s, va_list *args) +{ + vnet_dev_flags_t *fp = va_arg (*args, vnet_dev_flags_t *); + char *flag_names[] = { +#define _(b, n, d) #n, + foreach_vnet_dev_flag +#undef _ + }; + u64 flag_values[] = { +#define _(b, n, d) 1ull << (b) + foreach_vnet_dev_flag +#undef _ + }; + + return format_flags (s, fp->n, flag_names, flag_values, + ARRAY_LEN (flag_names)); +} + +u8 * +format_vnet_dev_port_flags (u8 *s, va_list *args) +{ + vnet_dev_port_flags_t *fp = va_arg (*args, vnet_dev_port_flags_t *); + char *flag_names[] = { +#define _(b, n, d) #n, + foreach_vnet_dev_port_flag +#undef _ + }; + u64 flag_values[] = { +#define _(b, n, d) 1ull << (b) + foreach_vnet_dev_port_flag +#undef _ + }; + + return format_flags (s, fp->n, flag_names, flag_values, + ARRAY_LEN (flag_names)); +} + +u8 * +format_vnet_dev_log (u8 *s, va_list *args) +{ + vnet_dev_t *dev = va_arg (*args, vnet_dev_t *); + char *func = va_arg (*args, char *); + + if (dev) + s = format (s, "%U", format_vnet_dev_addr, dev); + if (dev && func) + vec_add1 (s, ' '); + if (func) + s = format (s, "%s", func); + vec_add1 (s, ':'); + vec_add1 (s, ' '); + return s; +} + +u8 * +format_vnet_dev_port_caps (u8 *s, va_list *args) +{ + vnet_dev_port_caps_t *c = va_arg (*args, vnet_dev_port_caps_t *); + u32 line = 0; + + if (c->as_number == 0) + return s; + +#define _(n) \ + if (c->n) \ + { \ + if (line++) \ + vec_add1 (s, ' '); \ + for (char *str = #n; *str; str++) \ + vec_add1 (s, *str == '_' ? '-' : *str); \ + } + foreach_vnet_dev_port_caps; +#undef _ + + return s; +} + +u8 * +format_vnet_dev_port_rx_offloads (u8 *s, va_list *args) +{ + vnet_dev_port_rx_offloads_t *c = + va_arg (*args, vnet_dev_port_rx_offloads_t *); + u32 line = 0; + + if (c->as_number == 0) + return s; + +#define _(n) \ + if (c->n) \ + { \ + if (line++) \ + vec_add1 (s, ' '); \ + for (char *str = #n; *str; str++) \ + vec_add1 (s, *str == '_' ? '-' : *str); \ + } + foreach_vnet_dev_port_rx_offloads; +#undef _ + + return s; +} + +u8 * +format_vnet_dev_port_tx_offloads (u8 *s, va_list *args) +{ + vnet_dev_port_tx_offloads_t *c = + va_arg (*args, vnet_dev_port_tx_offloads_t *); + u32 line = 0; + + if (c->as_number == 0) + return s; + +#define _(n) \ + if (c->n) \ + { \ + if (line++) \ + vec_add1 (s, ' '); \ + for (char *str = #n; *str; str++) \ + vec_add1 (s, *str == '_' ? '-' : *str); \ + } + foreach_vnet_dev_port_tx_offloads; +#undef _ + + return s; +} + +u8 * +format_vnet_dev_flow (u8 *s, va_list *args) +{ + u32 dev_instance = va_arg (*args, u32); + u32 flow_index = va_arg (*args, u32); + uword private_data = va_arg (*args, uword); + vnet_dev_port_t *port = vnet_dev_get_port_from_dev_instance (dev_instance); + + if (port->port_ops.format_flow) + s = format (s, "%U", port->port_ops.format_flow, port, flow_index, + private_data); + + return s; +} diff --git a/src/vnet/dev/handlers.c b/src/vnet/dev/handlers.c new file mode 100644 index 00000000000..2a55affe3e3 --- /dev/null +++ b/src/vnet/dev/handlers.c @@ -0,0 +1,256 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2023 Cisco Systems, Inc. + */ + +#include <vnet/vnet.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/dev/dev.h> +#include <vnet/dev/counters.h> +#include <vnet/dev/log.h> +#include <vnet/flow/flow.h> + +VLIB_REGISTER_LOG_CLASS (dev_log, static) = { + .class_name = "dev", + .subclass_name = "handler", +}; + +clib_error_t * +vnet_dev_port_set_max_frame_size (vnet_main_t *vnm, vnet_hw_interface_t *hw, + u32 frame_size) +{ + vlib_main_t *vm = vlib_get_main (); + vnet_dev_port_t *p = vnet_dev_get_port_from_dev_instance (hw->dev_instance); + vnet_dev_rv_t rv; + + vnet_dev_port_cfg_change_req_t req = { + .type = VNET_DEV_PORT_CFG_MAX_RX_FRAME_SIZE, + .max_rx_frame_size = frame_size, + }; + + log_debug (p->dev, "size %u", frame_size); + + rv = vnet_dev_port_cfg_change_req_validate (vm, p, &req); + if (rv == VNET_DEV_ERR_NO_CHANGE) + return 0; + + if (rv != VNET_DEV_OK) + return vnet_dev_port_err (vm, p, rv, + "new max frame size is not valid for port"); + + if ((rv = vnet_dev_process_port_cfg_change_req (vm, p, &req)) != VNET_DEV_OK) + return vnet_dev_port_err (vm, p, rv, + "device failed to change max frame size"); + + return 0; +} + +u32 +vnet_dev_port_eth_flag_change (vnet_main_t *vnm, vnet_hw_interface_t *hw, + u32 flags) +{ + vlib_main_t *vm = vlib_get_main (); + vnet_dev_port_t *p = vnet_dev_get_port_from_dev_instance (hw->dev_instance); + vnet_dev_rv_t rv; + + vnet_dev_port_cfg_change_req_t req = { + .type = VNET_DEV_PORT_CFG_PROMISC_MODE, + }; + + switch (flags) + { + case ETHERNET_INTERFACE_FLAG_DEFAULT_L3: + log_debug (p->dev, "promisc off"); + break; + case ETHERNET_INTERFACE_FLAG_ACCEPT_ALL: + log_debug (p->dev, "promisc on"); + req.promisc = 1; + break; + default: + return ~0; + } + + rv = vnet_dev_port_cfg_change_req_validate (vm, p, &req); + if (rv == VNET_DEV_ERR_NO_CHANGE) + return 0; + + if (rv != VNET_DEV_OK) + return ~0; + + rv = vnet_dev_process_port_cfg_change_req (vm, p, &req); + if (rv == VNET_DEV_OK || rv == VNET_DEV_ERR_NO_CHANGE) + return 0; + return ~0; +} + +clib_error_t * +vnet_dev_port_mac_change (vnet_hw_interface_t *hi, const u8 *old, + const u8 *new) +{ + vlib_main_t *vm = vlib_get_main (); + vnet_dev_port_t *p = vnet_dev_get_port_from_dev_instance (hi->dev_instance); + vnet_dev_rv_t rv; + + vnet_dev_port_cfg_change_req_t req = { + .type = VNET_DEV_PORT_CFG_CHANGE_PRIMARY_HW_ADDR, + }; + + vnet_dev_set_hw_addr_eth_mac (&req.addr, new); + + log_debug (p->dev, "new mac %U", format_vnet_dev_hw_addr, &req.addr); + + rv = vnet_dev_port_cfg_change_req_validate (vm, p, &req); + if (rv == VNET_DEV_ERR_NO_CHANGE) + return 0; + + if (rv != VNET_DEV_OK) + return vnet_dev_port_err (vm, p, rv, "hw address is not valid for port"); + + if ((rv = vnet_dev_process_port_cfg_change_req (vm, p, &req)) != VNET_DEV_OK) + return vnet_dev_port_err (vm, p, rv, "device failed to change hw address"); + + return 0; +} + +clib_error_t * +vnet_dev_add_del_mac_address (vnet_hw_interface_t *hi, const u8 *address, + u8 is_add) +{ + vlib_main_t *vm = vlib_get_main (); + vnet_dev_port_t *p = vnet_dev_get_port_from_dev_instance (hi->dev_instance); + vnet_dev_rv_t rv; + + vnet_dev_port_cfg_change_req_t req = { + .type = is_add ? VNET_DEV_PORT_CFG_ADD_SECONDARY_HW_ADDR : + VNET_DEV_PORT_CFG_REMOVE_SECONDARY_HW_ADDR, + }; + + vnet_dev_set_hw_addr_eth_mac (&req.addr, address); + + log_debug (p->dev, "received (addr %U is_add %u", format_vnet_dev_hw_addr, + &req.addr, is_add); + + rv = vnet_dev_port_cfg_change_req_validate (vm, p, &req); + if (rv != VNET_DEV_OK) + return vnet_dev_port_err (vm, p, rv, + "provided secondary hw addresses cannot " + "be added/removed"); + + if ((rv = vnet_dev_process_port_cfg_change_req (vm, p, &req)) != VNET_DEV_OK) + return vnet_dev_port_err ( + vm, p, rv, "device failed to add/remove secondary hw address"); + + return 0; +} + +int +vnet_dev_flow_ops_fn (vnet_main_t *vnm, vnet_flow_dev_op_t op, + u32 dev_instance, u32 flow_index, uword *private_data) +{ + vlib_main_t *vm = vlib_get_main (); + vnet_dev_port_t *p = vnet_dev_get_port_from_dev_instance (dev_instance); + vnet_dev_port_cfg_change_req_t req; + vnet_dev_rv_t rv; + + switch (op) + { + case VNET_FLOW_DEV_OP_ADD_FLOW: + req.type = VNET_DEV_PORT_CFG_ADD_RX_FLOW; + break; + case VNET_FLOW_DEV_OP_DEL_FLOW: + req.type = VNET_DEV_PORT_CFG_DEL_RX_FLOW; + break; + case VNET_FLOW_DEV_OP_GET_COUNTER: + req.type = VNET_DEV_PORT_CFG_GET_RX_FLOW_COUNTER; + break; + case VNET_FLOW_DEV_OP_RESET_COUNTER: + req.type = VNET_DEV_PORT_CFG_RESET_RX_FLOW_COUNTER; + break; + default: + log_warn (p->dev, "unsupported request for flow_ops received"); + return VNET_FLOW_ERROR_NOT_SUPPORTED; + } + + req.flow_index = flow_index; + req.private_data = private_data; + + rv = vnet_dev_port_cfg_change_req_validate (vm, p, &req); + if (rv != VNET_DEV_OK) + { + log_err (p->dev, "validation failed for flow_ops"); + return VNET_FLOW_ERROR_NOT_SUPPORTED; + } + + if ((rv = vnet_dev_process_port_cfg_change_req (vm, p, &req)) != VNET_DEV_OK) + { + log_err (p->dev, "request for flow_ops failed"); + return vnet_dev_flow_err (vm, rv); + } + + return 0; +} + +clib_error_t * +vnet_dev_interface_set_rss_queues (vnet_main_t *vnm, vnet_hw_interface_t *hi, + clib_bitmap_t *bitmap) +{ + vnet_dev_port_t *p = vnet_dev_get_port_from_dev_instance (hi->dev_instance); + log_warn (p->dev, "unsupported request for flow_ops received"); + return vnet_error (VNET_ERR_UNSUPPORTED, "not implemented"); +} + +void +vnet_dev_clear_hw_interface_counters (u32 instance) +{ + vnet_dev_port_t *port = vnet_dev_get_port_from_dev_instance (instance); + vlib_main_t *vm = vlib_get_main (); + + vnet_dev_process_call_port_op_no_rv (vm, port, vnet_dev_port_clear_counters); +} + +void +vnet_dev_set_interface_next_node (vnet_main_t *vnm, u32 hw_if_index, + u32 node_index) +{ + vlib_main_t *vm = vlib_get_main (); + vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); + vnet_dev_port_t *port = + vnet_dev_get_port_from_dev_instance (hw->dev_instance); + int runtime_update = 0; + + if (node_index == ~0) + { + port->intf.redirect_to_node_next_index = 0; + if (port->intf.feature_arc == 0) + { + port->intf.rx_next_index = + vnet_dev_default_next_index_by_port_type[port->attr.type]; + runtime_update = 1; + } + port->intf.redirect_to_node = 0; + } + else + { + u16 next_index = vlib_node_add_next (vlib_get_main (), + port_rx_eth_node.index, node_index); + port->intf.redirect_to_node_next_index = next_index; + if (port->intf.feature_arc == 0) + { + port->intf.rx_next_index = next_index; + runtime_update = 1; + } + port->intf.redirect_to_node = 1; + } + port->intf.rx_next_index = + node_index == ~0 ? + vnet_dev_default_next_index_by_port_type[port->attr.type] : + node_index; + + if (runtime_update) + { + foreach_vnet_dev_port_rx_queue (rxq, port) + vnet_dev_rx_queue_rt_request ( + vm, rxq, (vnet_dev_rx_queue_rt_req_t){ .update_next_index = 1 }); + log_debug (port->dev, "runtime update requested due to chgange in " + "reditect-to-next configuration"); + } +} diff --git a/src/vnet/dev/log.h b/src/vnet/dev/log.h new file mode 100644 index 00000000000..5ca7b6620e9 --- /dev/null +++ b/src/vnet/dev/log.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2023 Cisco Systems, Inc. + */ + +#ifndef _VNET_DEV_LOG_H_ +#define _VNET_DEV_LOG_H_ + +#define log_debug(dev, f, ...) \ + vlib_log (VLIB_LOG_LEVEL_DEBUG, dev_log.class, "%U" f, format_vnet_dev_log, \ + dev, clib_string_skip_prefix (__func__, "vnet_dev_"), \ + ##__VA_ARGS__) +#define log_notice(dev, f, ...) \ + vlib_log (VLIB_LOG_LEVEL_NOTICE, dev_log.class, "%U" f, \ + format_vnet_dev_log, dev, 0, ##__VA_ARGS__) +#define log_warn(dev, f, ...) \ + vlib_log (VLIB_LOG_LEVEL_WARNING, dev_log.class, "%U" f, \ + format_vnet_dev_log, dev, 0, ##__VA_ARGS__) +#define log_err(dev, f, ...) \ + vlib_log (VLIB_LOG_LEVEL_ERR, dev_log.class, "%U" f, format_vnet_dev_log, \ + dev, 0, ##__VA_ARGS__) + +#endif /* _VNET_DEV_LOG_H_ */ diff --git a/src/vnet/dev/mgmt.h b/src/vnet/dev/mgmt.h new file mode 100644 index 00000000000..f13f4075255 --- /dev/null +++ b/src/vnet/dev/mgmt.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2023 Cisco Systems, Inc. + */ + +#ifndef _VNET_DEV_MGMT_H_ +#define _VNET_DEV_MGMT_H_ + +#include <vppinfra/clib.h> + +#endif /* _VNET_DEV_MGMT_H_ */ diff --git a/src/vnet/dev/pci.c b/src/vnet/dev/pci.c new file mode 100644 index 00000000000..3cc0cba5003 --- /dev/null +++ b/src/vnet/dev/pci.c @@ -0,0 +1,458 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2023 Cisco Systems, Inc. + */ + +#include <vnet/vnet.h> +#include <vnet/dev/dev.h> +#include <vnet/dev/pci.h> +#include <vnet/dev/log.h> +#include <vlib/unix/unix.h> + +VLIB_REGISTER_LOG_CLASS (dev_log, static) = { + .class_name = "dev", + .subclass_name = "pci", +}; + +static int +vnet_dev_bus_pci_device_id_to_pci_addr (vlib_pci_addr_t *addr, char *str) +{ + unformat_input_t input; + uword rv; + unformat_init_string (&input, str, strlen (str)); + rv = unformat (&input, "pci" VNET_DEV_DEVICE_ID_PREFIX_DELIMITER "%U", + unformat_vlib_pci_addr, addr); + unformat_free (&input); + return rv; +} + +static void * +vnet_dev_bus_pci_get_device_info (vlib_main_t *vm, char *device_id) +{ + vnet_dev_bus_pci_device_info_t *info; + vlib_pci_addr_t addr = {}; + clib_error_t *err = 0; + vlib_pci_device_info_t *di = 0; + + vlib_log_debug (dev_log.class, "device %s", device_id); + + if (vnet_dev_bus_pci_device_id_to_pci_addr (&addr, device_id) == 0) + return 0; + + di = vlib_pci_get_device_info (vm, &addr, &err); + if (err) + { + vlib_log_err (dev_log.class, "get_device_info: %U", format_clib_error, + err); + clib_error_free (err); + return 0; + } + + info = clib_mem_alloc (sizeof (vnet_dev_bus_pci_device_info_t)); + info->addr = addr; + info->vendor_id = di->vendor_id; + info->device_id = di->device_id; + info->revision = di->revision; + + vlib_pci_free_device_info (di); + return info; +} + +static void +vnet_dev_bus_pci_free_device_info (vlib_main_t *vm, void *dev_info) +{ + clib_mem_free (dev_info); +} + +static vnet_dev_rv_t +vnet_dev_bus_pci_open (vlib_main_t *vm, vnet_dev_t *dev) +{ + clib_error_t *err = 0; + vnet_dev_bus_pci_device_data_t *pdd = vnet_dev_get_bus_pci_device_data (dev); + + if (vnet_dev_bus_pci_device_id_to_pci_addr (&pdd->addr, dev->device_id) == 0) + return VNET_DEV_ERR_INVALID_DEVICE_ID; + + if ((err = vlib_pci_device_open (vm, &pdd->addr, 0, &pdd->handle))) + { + log_err (dev, "device_open: %U", format_clib_error, err); + clib_error_free (err); + return VNET_DEV_ERR_BUS; + } + + dev->numa_node = vlib_pci_get_numa_node (vm, pdd->handle); + + if (vlib_pci_supports_virtual_addr_dma (vm, pdd->handle)) + { + dev->va_dma = 1; + log_debug (dev, "device supports VA DMA"); + } + + vlib_pci_set_private_data (vm, pdd->handle, (uword) dev); + + pdd->n_msix_int = vlib_pci_get_num_msix_interrupts (vm, pdd->handle); + if (pdd->n_msix_int) + { + u32 sz = sizeof (pdd->msix_handlers[0]) * pdd->n_msix_int; + sz = round_pow2 (sz, CLIB_CACHE_LINE_BYTES); + pdd->msix_handlers = clib_mem_alloc_aligned (sz, CLIB_CACHE_LINE_BYTES); + clib_memset (pdd->msix_handlers, 0, sz); + } + + return VNET_DEV_OK; +} + +static void +vnet_dev_bus_pci_close (vlib_main_t *vm, vnet_dev_t *dev) +{ + vnet_dev_bus_pci_device_data_t *pdd = vnet_dev_get_bus_pci_device_data (dev); + + if (pdd->intx_handler) + vnet_dev_pci_intx_remove_handler (vm, dev); + + if (pdd->msix_handlers) + { + for (u16 i = 0; i < pdd->n_msix_int; i++) + if (pdd->msix_handlers[i]) + vnet_dev_pci_msix_remove_handler (vm, dev, i, 1); + clib_mem_free (pdd->msix_handlers); + pdd->msix_handlers = 0; + } + + if (pdd->pci_handle_valid) + vlib_pci_device_close (vm, pdd->handle); +} + +static vnet_dev_rv_t +vnet_dev_bus_pci_dma_mem_alloc (vlib_main_t *vm, vnet_dev_t *dev, u32 size, + u32 align, void **pp) +{ + clib_error_t *err; + void *p; + + align = align ? align : CLIB_CACHE_LINE_BYTES; + size = round_pow2 (size, align); + + p = vlib_physmem_alloc_aligned_on_numa (vm, size, align, dev->numa_node); + + if (p == 0) + { + err = vlib_physmem_last_error (vm); + log_err (dev, "dev_dma_mem_alloc: physmem_alloc_aligned error %U", + format_clib_error, err); + clib_error_free (err); + return VNET_DEV_ERR_DMA_MEM_ALLOC_FAIL; + } + + if ((err = vlib_pci_map_dma (vm, vnet_dev_get_pci_handle (dev), p))) + { + log_err (dev, "dev_dma_mem_alloc: pci_map_dma: %U", format_clib_error, + err); + clib_error_free (err); + return VNET_DEV_ERR_DMA_MEM_ALLOC_FAIL; + } + + clib_memset (p, 0, size); + pp[0] = p; + return VNET_DEV_OK; +} + +static void +vnet_dev_bus_pci_dma_mem_free (vlib_main_t *vm, vnet_dev_t *dev, void *p) +{ + if (p) + vlib_physmem_free (vm, p); +} + +vnet_dev_rv_t +vnet_dev_pci_read_config_header (vlib_main_t *vm, vnet_dev_t *dev, + vlib_pci_config_hdr_t *hdr) +{ + vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev); + clib_error_t *err; + + err = vlib_pci_read_write_config (vm, h, VLIB_READ, 0, hdr, sizeof (*hdr)); + if (err) + { + log_err (dev, "pci_read_config_header: %U", format_clib_error, err); + clib_error_free (err); + return VNET_DEV_ERR_BUS; + } + return VNET_DEV_OK; +} + +vnet_dev_rv_t +vnet_dev_pci_map_region (vlib_main_t *vm, vnet_dev_t *dev, u8 region, + void **pp) +{ + vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev); + clib_error_t *err; + + if ((err = vlib_pci_map_region (vm, h, region, pp))) + { + log_err (dev, "pci_map_region: %U", format_clib_error, err); + clib_error_free (err); + return VNET_DEV_ERR_BUS; + } + + return VNET_DEV_OK; +} + +vnet_dev_rv_t +vnet_dev_pci_function_level_reset (vlib_main_t *vm, vnet_dev_t *dev) +{ + vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev); + clib_error_t *err; + + if ((err = vlib_pci_function_level_reset (vm, h))) + { + log_err (dev, "pci_function_level_reset: %U", format_clib_error, err); + clib_error_free (err); + return VNET_DEV_ERR_BUS; + } + + return VNET_DEV_OK; +} + +vnet_dev_rv_t +vnet_dev_pci_bus_master_enable (vlib_main_t *vm, vnet_dev_t *dev) +{ + vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev); + clib_error_t *err; + + if ((err = vlib_pci_bus_master_enable (vm, h))) + { + log_err (dev, "pci_bus_master_enable: %U", format_clib_error, err); + clib_error_free (err); + return VNET_DEV_ERR_BUS; + } + return VNET_DEV_OK; +} + +static void +vnet_dev_pci_intx_handler (vlib_main_t *vm, vlib_pci_dev_handle_t h) +{ + vnet_dev_t *dev = (vnet_dev_t *) vlib_pci_get_private_data (vm, h); + vnet_dev_bus_pci_device_data_t *pdd = vnet_dev_get_bus_pci_device_data (dev); + + if (pdd->intx_handler) + pdd->intx_handler (vm, dev); +} + +vnet_dev_rv_t +vnet_dev_pci_intx_add_handler (vlib_main_t *vm, vnet_dev_t *dev, + vnet_dev_pci_intx_handler_fn_t *fn) +{ + vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev); + clib_error_t *err; + + err = vlib_pci_register_intx_handler (vm, h, vnet_dev_pci_intx_handler); + + if (err) + { + log_err (dev, "pci_register_intx_handler: %U", format_clib_error, err); + clib_error_free (err); + return VNET_DEV_ERR_BUS; + } + + return VNET_DEV_OK; +} + +vnet_dev_rv_t +vnet_dev_pci_intx_remove_handler (vlib_main_t *vm, vnet_dev_t *dev) +{ + vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev); + vnet_dev_bus_pci_device_data_t *pdd = vnet_dev_get_bus_pci_device_data (dev); + clib_error_t *err; + + err = vlib_pci_unregister_intx_handler (vm, h); + + if (err) + { + log_err (dev, "pci_unregister_intx_handler: %U", format_clib_error, err); + clib_error_free (err); + return VNET_DEV_ERR_BUS; + } + + pdd->intx_handler = 0; + + return VNET_DEV_OK; +} + +static void +vnet_dev_pci_msix_handler (vlib_main_t *vm, vlib_pci_dev_handle_t h, u16 line) +{ + vnet_dev_t *dev = (vnet_dev_t *) vlib_pci_get_private_data (vm, h); + vnet_dev_bus_pci_device_data_t *pdd = vnet_dev_get_bus_pci_device_data (dev); + + if (line < pdd->n_msix_int && pdd->msix_handlers[line]) + pdd->msix_handlers[line](vm, dev, line); +} + +vnet_dev_rv_t +vnet_dev_pci_msix_add_handler (vlib_main_t *vm, vnet_dev_t *dev, + vnet_dev_pci_msix_handler_fn_t *fn, u16 first, + u16 count) +{ + vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev); + vnet_dev_bus_pci_device_data_t *pdd = vnet_dev_get_bus_pci_device_data (dev); + clib_error_t *err; + + err = vlib_pci_register_msix_handler (vm, h, first, count, + vnet_dev_pci_msix_handler); + + if (err) + { + log_err (dev, "pci_register_msix_handler: %U", format_clib_error, err); + clib_error_free (err); + return VNET_DEV_ERR_BUS; + } + + for (u16 i = first; i < first + count; i++) + { + ASSERT (pdd->msix_handlers[i] == 0); + pdd->msix_handlers[i] = fn; + } + + return VNET_DEV_OK; +} + +void +vnet_dev_pci_msix_set_polling_thread (vlib_main_t *vm, vnet_dev_t *dev, + u16 line, u16 thread_index) +{ + vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev); + u32 index; + + index = vlib_pci_get_msix_file_index (vm, h, line); + + clib_file_set_polling_thread (&file_main, index, thread_index); +} + +vnet_dev_rv_t +vnet_dev_pci_msix_remove_handler (vlib_main_t *vm, vnet_dev_t *dev, u16 first, + u16 count) +{ + vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev); + vnet_dev_bus_pci_device_data_t *pdd = vnet_dev_get_bus_pci_device_data (dev); + clib_error_t *err; + + err = vlib_pci_unregister_msix_handler (vm, h, first, count); + + if (err) + { + log_err (dev, "pci_unregister_msix_handler: %U", format_clib_error, err); + clib_error_free (err); + return VNET_DEV_ERR_BUS; + } + + for (u16 i = first; i < first + count; i++) + { + ASSERT (pdd->msix_handlers[i] != 0); + pdd->msix_handlers[i] = 0; + } + + return VNET_DEV_OK; +} + +vnet_dev_rv_t +vnet_dev_pci_msix_enable (vlib_main_t *vm, vnet_dev_t *dev, u16 first, + u16 count) +{ + vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev); + clib_error_t *err; + + err = vlib_pci_enable_msix_irq (vm, h, first, count); + + if (err) + { + log_err (dev, "pci_enable_msix_irq: %U", format_clib_error, err); + clib_error_free (err); + return VNET_DEV_ERR_BUS; + } + + return VNET_DEV_OK; +} + +vnet_dev_rv_t +vnet_dev_pci_msix_disable (vlib_main_t *vm, vnet_dev_t *dev, u16 first, + u16 count) +{ + vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev); + clib_error_t *err; + + err = vlib_pci_disable_msix_irq (vm, h, first, count); + + if (err) + { + log_err (dev, "pci_disble_msix_irq: %U", format_clib_error, err); + clib_error_free (err); + return VNET_DEV_ERR_BUS; + } + + return VNET_DEV_OK; +} + +vnet_dev_rv_t +vnet_dev_pci_bus_master_disable (vlib_main_t *vm, vnet_dev_t *dev) +{ + vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev); + clib_error_t *err; + + if ((err = vlib_pci_bus_master_disable (vm, h))) + { + log_err (dev, "pci_bus_master_disable: %U", format_clib_error, err); + clib_error_free (err); + return VNET_DEV_ERR_BUS; + } + return VNET_DEV_OK; +} + +static u8 * +format_dev_pci_device_info (u8 *s, va_list *args) +{ + vnet_dev_format_args_t __clib_unused *a = + va_arg (*args, vnet_dev_format_args_t *); + vnet_dev_t *dev = va_arg (*args, vnet_dev_t *); + vnet_dev_bus_pci_device_data_t *pdd = vnet_dev_get_bus_pci_device_data (dev); + vlib_main_t *vm = vlib_get_main (); + vlib_pci_config_t cfg = {}; + clib_error_t *err; + + s = format (s, "PCIe address is %U", format_vlib_pci_addr, &pdd->addr); + + err = vlib_pci_read_write_config (vm, pdd->handle, VLIB_READ, 0, &cfg, + sizeof (cfg)); + if (!err) + { + s = format (s, ", port is %U, speed is %U (max %U)", + format_vlib_pci_link_port, &cfg, format_vlib_pci_link_speed, + &cfg, format_vlib_pci_link_speed_cap, &cfg); + } + else + clib_error_free (err); + + return s; +} + +static u8 * +format_dev_pci_device_addr (u8 *s, va_list *args) +{ + vnet_dev_t *dev = va_arg (*args, vnet_dev_t *); + vnet_dev_bus_pci_device_data_t *pdd = vnet_dev_get_bus_pci_device_data (dev); + return format (s, "%U", format_vlib_pci_addr, &pdd->addr); +} + +VNET_DEV_REGISTER_BUS (pci) = { + .name = "pci", + .device_data_size = sizeof (vnet_dev_bus_pci_device_info_t), + .ops = { + .device_open = vnet_dev_bus_pci_open, + .device_close = vnet_dev_bus_pci_close, + .get_device_info = vnet_dev_bus_pci_get_device_info, + .free_device_info = vnet_dev_bus_pci_free_device_info, + .dma_mem_alloc_fn = vnet_dev_bus_pci_dma_mem_alloc, + .dma_mem_free_fn = vnet_dev_bus_pci_dma_mem_free, + .format_device_info = format_dev_pci_device_info, + .format_device_addr = format_dev_pci_device_addr, + }, +}; diff --git a/src/vnet/dev/pci.h b/src/vnet/dev/pci.h new file mode 100644 index 00000000000..ce9a53aa273 --- /dev/null +++ b/src/vnet/dev/pci.h @@ -0,0 +1,80 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2023 Cisco Systems, Inc. + */ + +#ifndef _VNET_DEV_PCI_H_ +#define _VNET_DEV_PCI_H_ + +#include <vppinfra/clib.h> +#include <vlib/pci/pci.h> +#include <vnet/dev/dev.h> + +typedef void (vnet_dev_pci_intx_handler_fn_t) (vlib_main_t *vm, + vnet_dev_t *dev); +typedef void (vnet_dev_pci_msix_handler_fn_t) (vlib_main_t *vm, + vnet_dev_t *dev, u16 line); + +typedef struct +{ + vlib_pci_addr_t addr; + u16 vendor_id; + u16 device_id; + u8 revision; +} vnet_dev_bus_pci_device_info_t; + +typedef struct +{ + u8 pci_handle_valid : 1; + u16 n_msix_int; + vlib_pci_addr_t addr; + vlib_pci_dev_handle_t handle; + vnet_dev_pci_intx_handler_fn_t *intx_handler; + vnet_dev_pci_msix_handler_fn_t **msix_handlers; +} vnet_dev_bus_pci_device_data_t; + +static_always_inline vnet_dev_bus_pci_device_data_t * +vnet_dev_get_bus_pci_device_data (vnet_dev_t *dev) +{ + return (void *) dev->bus_data; +} +static_always_inline vlib_pci_dev_handle_t +vnet_dev_get_pci_handle (vnet_dev_t *dev) +{ + return ((vnet_dev_bus_pci_device_data_t *) (dev->bus_data))->handle; +} + +static_always_inline vlib_pci_addr_t +vnet_dev_get_pci_addr (vnet_dev_t *dev) +{ + return ((vnet_dev_bus_pci_device_data_t *) (dev->bus_data))->addr; +} + +static_always_inline vlib_pci_dev_handle_t +vnet_dev_get_pci_n_msix_interrupts (vnet_dev_t *dev) +{ + return vnet_dev_get_bus_pci_device_data (dev)->n_msix_int; +} + +vnet_dev_rv_t vnet_dev_pci_read_config_header (vlib_main_t *, vnet_dev_t *, + vlib_pci_config_hdr_t *); + +vnet_dev_rv_t vnet_dev_pci_map_region (vlib_main_t *, vnet_dev_t *, u8, + void **); +vnet_dev_rv_t vnet_dev_pci_function_level_reset (vlib_main_t *, vnet_dev_t *); +vnet_dev_rv_t vnet_dev_pci_bus_master_enable (vlib_main_t *, vnet_dev_t *); +vnet_dev_rv_t vnet_dev_pci_bus_master_disable (vlib_main_t *, vnet_dev_t *); +vnet_dev_rv_t vnet_dev_pci_intx_add_handler (vlib_main_t *, vnet_dev_t *, + vnet_dev_pci_intx_handler_fn_t *); +vnet_dev_rv_t vnet_dev_pci_intx_remove_handler (vlib_main_t *, vnet_dev_t *); +vnet_dev_rv_t vnet_dev_pci_msix_add_handler (vlib_main_t *, vnet_dev_t *, + vnet_dev_pci_msix_handler_fn_t *, + u16, u16); +vnet_dev_rv_t vnet_dev_pci_msix_remove_handler (vlib_main_t *, vnet_dev_t *, + u16, u16); +vnet_dev_rv_t vnet_dev_pci_msix_enable (vlib_main_t *, vnet_dev_t *, u16, u16); +vnet_dev_rv_t vnet_dev_pci_msix_disable (vlib_main_t *, vnet_dev_t *, u16, + u16); +void vnet_dev_pci_msix_set_polling_thread (vlib_main_t *, vnet_dev_t *, u16, + u16); + +#endif /* _VNET_DEV_PCI_H_ */ diff --git a/src/vnet/dev/port.c b/src/vnet/dev/port.c new file mode 100644 index 00000000000..8a6df54cbc8 --- /dev/null +++ b/src/vnet/dev/port.c @@ -0,0 +1,748 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2023 Cisco Systems, Inc. + */ + +#include <vnet/vnet.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/dev/dev.h> +#include <vnet/dev/counters.h> +#include <vnet/dev/log.h> + +VLIB_REGISTER_LOG_CLASS (dev_log, static) = { + .class_name = "dev", + .subclass_name = "port", +}; + +static uword +dummy_input_fn (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame) +{ + ASSERT (0); + return 0; +} + +VLIB_REGISTER_NODE (port_rx_eth_node) = { + .function = dummy_input_fn, + .name = "port-rx-eth", + .runtime_data_bytes = sizeof (vnet_dev_rx_node_runtime_t), + .type = VLIB_NODE_TYPE_INPUT, + .state = VLIB_NODE_STATE_DISABLED, + .n_next_nodes = VNET_DEV_ETH_RX_PORT_N_NEXTS, + .next_nodes = { +#define _(n, s) [VNET_DEV_ETH_RX_PORT_NEXT_##n] = s, + foreach_vnet_dev_port_rx_next +#undef _ + }, +}; + +u16 vnet_dev_default_next_index_by_port_type[] = { + [VNET_DEV_PORT_TYPE_ETHERNET] = VNET_DEV_ETH_RX_PORT_NEXT_ETH_INPUT, +}; + +VNET_FEATURE_ARC_INIT (eth_port_rx, static) = { + .arc_name = "port-rx-eth", + .start_nodes = VNET_FEATURES ("port-rx-eth"), + .last_in_arc = "ethernet-input", + .arc_index_ptr = &vnet_dev_main.eth_port_rx_feature_arc_index, +}; + +VNET_FEATURE_INIT (l2_patch, static) = { + .arc_name = "port-rx-eth", + .node_name = "l2-patch", + .runs_before = VNET_FEATURES ("ethernet-input"), +}; + +VNET_FEATURE_INIT (worker_handoff, static) = { + .arc_name = "port-rx-eth", + .node_name = "worker-handoff", + .runs_before = VNET_FEATURES ("ethernet-input"), +}; + +VNET_FEATURE_INIT (span_input, static) = { + .arc_name = "port-rx-eth", + .node_name = "span-input", + .runs_before = VNET_FEATURES ("ethernet-input"), +}; + +VNET_FEATURE_INIT (p2p_ethernet_node, static) = { + .arc_name = "port-rx-eth", + .node_name = "p2p-ethernet-input", + .runs_before = VNET_FEATURES ("ethernet-input"), +}; + +VNET_FEATURE_INIT (ethernet_input, static) = { + .arc_name = "port-rx-eth", + .node_name = "ethernet-input", + .runs_before = 0, /* not before any other features */ +}; + +void +vnet_dev_port_free (vlib_main_t *vm, vnet_dev_port_t *port) +{ + vnet_dev_t *dev = port->dev; + + vnet_dev_port_validate (vm, port); + + ASSERT (port->started == 0); + + log_debug (dev, "port %u", port->port_id); + + if (port->port_ops.free) + port->port_ops.free (vm, port); + + pool_free (port->secondary_hw_addr); + pool_free (port->rx_queues); + pool_free (port->tx_queues); + vnet_dev_arg_free (&port->args); + pool_put_index (dev->ports, port->index); + clib_mem_free (port); +} + +void +vnet_dev_port_update_tx_node_runtime (vlib_main_t *vm, vnet_dev_port_t *port) +{ + vnet_dev_port_validate (vm, port); + + foreach_vnet_dev_port_tx_queue (q, port) + { + u32 ti; + clib_bitmap_foreach (ti, q->assigned_threads) + { + vlib_main_t *tvm = vlib_get_main_by_index (ti); + vlib_node_runtime_t *nr = + vlib_node_get_runtime (tvm, port->intf.tx_node_index); + vnet_dev_tx_node_runtime_t *tnr = vnet_dev_get_tx_node_runtime (nr); + tnr->hw_if_index = port->intf.hw_if_index; + tnr->tx_queue = q; + } + } +} + +void +vnet_dev_port_stop (vlib_main_t *vm, vnet_dev_port_t *port) +{ + vnet_dev_t *dev = port->dev; + vnet_dev_rt_op_t *ops = 0; + u16 n_threads = vlib_get_n_threads (); + + log_debug (dev, "stopping port %u", port->port_id); + + for (u16 i = 0; i < n_threads; i++) + { + vnet_dev_rt_op_t op = { .thread_index = i, .port = port }; + vec_add1 (ops, op); + } + + vnet_dev_rt_exec_ops (vm, dev, ops, vec_len (ops)); + vec_free (ops); + + port->port_ops.stop (vm, port); + + foreach_vnet_dev_port_rx_queue (q, port) + { + q->started = 0; + log_debug (dev, "port %u rx queue %u stopped", port->port_id, + q->queue_id); + } + + foreach_vnet_dev_port_tx_queue (q, port) + { + q->started = 0; + log_debug (dev, "port %u tx queue %u stopped", port->port_id, + q->queue_id); + } + + log_debug (dev, "port %u stopped", port->port_id); + port->started = 0; +} + +vnet_dev_rv_t +vnet_dev_port_start_all_rx_queues (vlib_main_t *vm, vnet_dev_port_t *port) +{ + vnet_dev_rv_t rv = VNET_DEV_OK; + + vnet_dev_port_validate (vm, port); + + foreach_vnet_dev_port_rx_queue (q, port) + { + rv = vnet_dev_rx_queue_start (vm, q); + if (rv != VNET_DEV_OK) + return rv; + } + return rv; +} + +vnet_dev_rv_t +vnet_dev_port_start_all_tx_queues (vlib_main_t *vm, vnet_dev_port_t *port) +{ + vnet_dev_rv_t rv = VNET_DEV_OK; + + vnet_dev_port_validate (vm, port); + + foreach_vnet_dev_port_tx_queue (q, port) + { + rv = vnet_dev_tx_queue_start (vm, q); + if (rv != VNET_DEV_OK) + return rv; + } + return rv; +} + +vnet_dev_rv_t +vnet_dev_port_start (vlib_main_t *vm, vnet_dev_port_t *port) +{ + u16 n_threads = vlib_get_n_threads (); + vnet_dev_t *dev = port->dev; + vnet_dev_rt_op_t *ops = 0; + vnet_dev_rv_t rv; + + vnet_dev_port_validate (vm, port); + + log_debug (dev, "starting port %u", port->port_id); + + vnet_dev_port_update_tx_node_runtime (vm, port); + + if ((rv = port->port_ops.start (vm, port)) != VNET_DEV_OK) + { + vnet_dev_port_stop (vm, port); + return rv; + } + + for (u16 i = 0; i < n_threads; i++) + { + vnet_dev_rt_op_t op = { .thread_index = i, .port = port }; + vec_add1 (ops, op); + } + + vnet_dev_rt_exec_ops (vm, dev, ops, vec_len (ops)); + vec_free (ops); + + foreach_vnet_dev_port_rx_queue (q, port) + if (q->enabled) + { + log_debug (dev, "port %u rx queue %u started", port->port_id, + q->queue_id); + q->started = 1; + } + + foreach_vnet_dev_port_tx_queue (q, port) + if (q->enabled) + { + log_debug (dev, "port %u tx queue %u started", port->port_id, + q->queue_id); + q->started = 1; + } + + port->started = 1; + log_debug (dev, "port %u started", port->port_id); + + return VNET_DEV_OK; +} + +vnet_dev_rv_t +vnet_dev_port_add (vlib_main_t *vm, vnet_dev_t *dev, vnet_dev_port_id_t id, + vnet_dev_port_add_args_t *args) +{ + vnet_dev_port_t **pp, *port; + vnet_dev_rv_t rv = VNET_DEV_OK; + + ASSERT (args->port.attr.type != VNET_DEV_PORT_TYPE_UNKNOWN); + ASSERT (args->port.attr.max_supported_rx_frame_size); + + port = + vnet_dev_alloc_with_data (sizeof (vnet_dev_port_t), args->port.data_size); + pool_get (dev->ports, pp); + pp[0] = port; + clib_memcpy (vnet_dev_get_port_data (port), args->port.initial_data, + args->port.data_size); + port->port_id = id; + port->index = pp - dev->ports; + port->dev = dev; + port->attr = args->port.attr; + port->rx_queue_config = args->rx_queue.config; + port->tx_queue_config = args->tx_queue.config; + port->rx_queue_ops = args->rx_queue.ops; + port->tx_queue_ops = args->tx_queue.ops; + port->port_ops = args->port.ops; + port->rx_node = *args->rx_node; + port->tx_node = *args->tx_node; + + if (args->port.args) + for (vnet_dev_arg_t *a = args->port.args; a->type != VNET_DEV_ARG_END; a++) + vec_add1 (port->args, *a); + + /* defaults out of port attributes */ + port->max_rx_frame_size = args->port.attr.max_supported_rx_frame_size; + port->primary_hw_addr = args->port.attr.hw_addr; + + if (port->attr.type == VNET_DEV_PORT_TYPE_ETHERNET) + { + if (port->max_rx_frame_size > 1514 && + port->attr.caps.change_max_rx_frame_size) + port->max_rx_frame_size = 1514; + } + + if (port->port_ops.alloc) + rv = port->port_ops.alloc (vm, port); + + if (rv == VNET_DEV_OK) + port->initialized = 1; + + return rv; +} + +vnet_dev_rv_t +vnet_dev_port_cfg_change_req_validate (vlib_main_t *vm, vnet_dev_port_t *port, + vnet_dev_port_cfg_change_req_t *req) +{ + vnet_dev_rv_t rv; + vnet_dev_hw_addr_t *addr; + int found; + + if (req->validated) + return VNET_DEV_OK; + + switch (req->type) + { + case VNET_DEV_PORT_CFG_MAX_RX_FRAME_SIZE: + if (req->max_rx_frame_size > port->attr.max_supported_rx_frame_size) + return VNET_DEV_ERR_INVALID_VALUE; + if (req->max_rx_frame_size == port->max_rx_frame_size) + return VNET_DEV_ERR_NO_CHANGE; + break; + + case VNET_DEV_PORT_CFG_PROMISC_MODE: + if (req->promisc == port->promisc) + return VNET_DEV_ERR_NO_CHANGE; + break; + + case VNET_DEV_PORT_CFG_CHANGE_PRIMARY_HW_ADDR: + if (clib_memcmp (&req->addr, &port->primary_hw_addr, + sizeof (vnet_dev_hw_addr_t)) == 0) + return VNET_DEV_ERR_NO_CHANGE; + break; + + case VNET_DEV_PORT_CFG_ADD_SECONDARY_HW_ADDR: + pool_foreach (addr, port->secondary_hw_addr) + if (clib_memcmp (addr, &req->addr, sizeof (*addr)) == 0) + return VNET_DEV_ERR_ALREADY_EXISTS; + break; + + case VNET_DEV_PORT_CFG_REMOVE_SECONDARY_HW_ADDR: + found = 0; + pool_foreach (addr, port->secondary_hw_addr) + if (clib_memcmp (addr, &req->addr, sizeof (*addr)) == 0) + found = 1; + if (!found) + return VNET_DEV_ERR_NO_SUCH_ENTRY; + break; + + default: + break; + } + + if (port->port_ops.config_change_validate) + { + rv = port->port_ops.config_change_validate (vm, port, req); + if (rv != VNET_DEV_OK) + return rv; + } + else + return VNET_DEV_ERR_NOT_SUPPORTED; + + req->validated = 1; + return VNET_DEV_OK; +} + +vnet_dev_rv_t +vnet_dev_port_cfg_change (vlib_main_t *vm, vnet_dev_port_t *port, + vnet_dev_port_cfg_change_req_t *req) +{ + vnet_dev_rv_t rv = VNET_DEV_OK; + vnet_dev_hw_addr_t *a; + vnet_dev_rx_queue_t *rxq = 0; + u8 enable = 0; + + vnet_dev_port_validate (vm, port); + + if (req->type == VNET_DEV_PORT_CFG_RXQ_INTR_MODE_ENABLE || + req->type == VNET_DEV_PORT_CFG_RXQ_INTR_MODE_DISABLE) + { + if (req->all_queues == 0) + { + rxq = vnet_dev_port_get_rx_queue_by_id (port, req->queue_id); + if (rxq == 0) + return VNET_DEV_ERR_BUG; + } + } + + if ((rv = vnet_dev_port_cfg_change_req_validate (vm, port, req))) + return rv; + + if (port->port_ops.config_change) + rv = port->port_ops.config_change (vm, port, req); + else + return VNET_DEV_ERR_NOT_SUPPORTED; + + if (rv != VNET_DEV_OK) + return rv; + + switch (req->type) + { + case VNET_DEV_PORT_CFG_MAX_RX_FRAME_SIZE: + port->max_rx_frame_size = req->max_rx_frame_size; + break; + + case VNET_DEV_PORT_CFG_PROMISC_MODE: + port->promisc = req->promisc; + break; + + case VNET_DEV_PORT_CFG_RXQ_INTR_MODE_ENABLE: + enable = 1; + case VNET_DEV_PORT_CFG_RXQ_INTR_MODE_DISABLE: + if (req->all_queues) + { + clib_bitmap_t *bmp = 0; + vnet_dev_rt_op_t *ops = 0; + u32 i; + + foreach_vnet_dev_port_rx_queue (q, port) + { + q->interrupt_mode = enable; + bmp = clib_bitmap_set (bmp, q->rx_thread_index, 1); + } + + clib_bitmap_foreach (i, bmp) + { + vnet_dev_rt_op_t op = { .port = port, .thread_index = i }; + vec_add1 (ops, op); + } + + vnet_dev_rt_exec_ops (vm, port->dev, ops, vec_len (ops)); + clib_bitmap_free (bmp); + vec_free (ops); + } + else + { + rxq->interrupt_mode = enable; + vnet_dev_rt_exec_ops (vm, port->dev, + &(vnet_dev_rt_op_t){ + .port = port, + .thread_index = rxq->rx_thread_index, + }, + 1); + } + break; + + case VNET_DEV_PORT_CFG_CHANGE_PRIMARY_HW_ADDR: + clib_memcpy (&port->primary_hw_addr, &req->addr, + sizeof (vnet_dev_hw_addr_t)); + break; + + case VNET_DEV_PORT_CFG_ADD_SECONDARY_HW_ADDR: + pool_get (port->secondary_hw_addr, a); + clib_memcpy (a, &req->addr, sizeof (vnet_dev_hw_addr_t)); + break; + + case VNET_DEV_PORT_CFG_REMOVE_SECONDARY_HW_ADDR: + pool_foreach (a, port->secondary_hw_addr) + if (clib_memcmp (a, &req->addr, sizeof (vnet_dev_hw_addr_t)) == 0) + { + pool_put (port->secondary_hw_addr, a); + break; + } + break; + + default: + break; + } + + return VNET_DEV_OK; +} + +void +vnet_dev_port_state_change (vlib_main_t *vm, vnet_dev_port_t *port, + vnet_dev_port_state_changes_t changes) +{ + vnet_main_t *vnm = vnet_get_main (); + + vnet_dev_port_validate (vm, port); + + if (changes.change.link_speed) + { + port->speed = changes.link_speed; + if (port->interface_created) + vnet_hw_interface_set_link_speed (vnm, port->intf.hw_if_index, + changes.link_speed); + log_debug (port->dev, "port speed changed to %u", changes.link_speed); + } + + if (changes.change.link_state) + { + port->link_up = changes.link_state; + if (port->interface_created) + vnet_hw_interface_set_flags ( + vnm, port->intf.hw_if_index, + changes.link_state ? VNET_HW_INTERFACE_FLAG_LINK_UP : 0); + log_debug (port->dev, "port link state changed to %s", + changes.link_state ? "up" : "down"); + } +} + +void +vnet_dev_port_add_counters (vlib_main_t *vm, vnet_dev_port_t *port, + vnet_dev_counter_t *counters, u16 n_counters) +{ + vnet_dev_port_validate (vm, port); + + port->counter_main = + vnet_dev_counters_alloc (vm, counters, n_counters, "%s port %u counters", + port->dev->device_id, port->port_id); +} + +void +vnet_dev_port_free_counters (vlib_main_t *vm, vnet_dev_port_t *port) +{ + vnet_dev_port_validate (vm, port); + + if (port->counter_main) + vnet_dev_counters_free (vm, port->counter_main); +} + +vnet_dev_rv_t +vnet_dev_port_if_create (vlib_main_t *vm, vnet_dev_port_t *port) +{ + vnet_main_t *vnm = vnet_get_main (); + u16 n_threads = vlib_get_n_threads (); + vnet_dev_main_t *dm = &vnet_dev_main; + vnet_dev_t *dev = port->dev; + vnet_dev_port_t **pp; + vnet_dev_rv_t rv; + u16 ti = 0; + + if (port->intf.name[0] == 0) + { + u8 *s; + s = format (0, "%s%u/%u", + dm->drivers[port->dev->driver_index].registration->name, + port->dev->index, port->index); + u32 n = vec_len (s); + + if (n >= sizeof (port->intf.name)) + { + vec_free (s); + return VNET_DEV_ERR_BUG; + } + clib_memcpy (port->intf.name, s, n); + port->intf.name[n] = 0; + vec_free (s); + } + + log_debug ( + dev, "allocating %u rx queues with size %u and %u tx queues with size %u", + port->intf.num_rx_queues, port->intf.rxq_sz, port->intf.num_tx_queues, + port->intf.txq_sz); + + for (int i = 0; i < port->intf.num_rx_queues; i++) + if ((rv = vnet_dev_rx_queue_alloc (vm, port, port->intf.rxq_sz)) != + VNET_DEV_OK) + goto error; + + for (u32 i = 0; i < port->intf.num_tx_queues; i++) + if ((rv = vnet_dev_tx_queue_alloc (vm, port, port->intf.txq_sz)) != + VNET_DEV_OK) + goto error; + + foreach_vnet_dev_port_tx_queue (q, port) + { + q->assigned_threads = clib_bitmap_set (q->assigned_threads, ti, 1); + log_debug (dev, "port %u tx queue %u assigned to thread %u", + port->port_id, q->queue_id, ti); + if (++ti >= n_threads) + break; + } + + /* pool of port pointers helps us to assign unique dev_instance */ + pool_get (dm->ports_by_dev_instance, pp); + port->intf.dev_instance = pp - dm->ports_by_dev_instance; + pp[0] = port; + + if (port->attr.type == VNET_DEV_PORT_TYPE_ETHERNET) + { + vnet_device_class_t *dev_class; + vnet_dev_driver_t *driver; + vnet_sw_interface_t *sw; + vnet_hw_interface_t *hw; + vnet_hw_if_caps_t caps = 0; + u32 rx_node_index; + + driver = pool_elt_at_index (dm->drivers, dev->driver_index); + + /* hack to provide per-port tx node function */ + dev_class = vnet_get_device_class (vnm, driver->dev_class_index); + dev_class->tx_fn_registrations = port->tx_node.registrations; + dev_class->format_tx_trace = port->tx_node.format_trace; + dev_class->tx_function_error_counters = port->tx_node.error_counters; + dev_class->tx_function_n_errors = port->tx_node.n_error_counters; + + /* create new interface including tx and output nodes */ + port->intf.hw_if_index = vnet_eth_register_interface ( + vnm, &(vnet_eth_interface_registration_t){ + .address = port->primary_hw_addr.eth_mac, + .max_frame_size = port->max_rx_frame_size, + .dev_class_index = driver->dev_class_index, + .dev_instance = port->intf.dev_instance, + .cb.set_max_frame_size = vnet_dev_port_set_max_frame_size, + .cb.flag_change = vnet_dev_port_eth_flag_change, + }); + + sw = vnet_get_hw_sw_interface (vnm, port->intf.hw_if_index); + hw = vnet_get_hw_interface (vnm, port->intf.hw_if_index); + port->intf.sw_if_index = sw->sw_if_index; + vnet_hw_interface_set_flags ( + vnm, port->intf.hw_if_index, + port->link_up ? VNET_HW_INTERFACE_FLAG_LINK_UP : 0); + if (port->speed) + vnet_hw_interface_set_link_speed (vnm, port->intf.hw_if_index, + port->speed); + + port->intf.tx_node_index = hw->tx_node_index; + + caps |= port->attr.caps.interrupt_mode ? VNET_HW_IF_CAP_INT_MODE : 0; + caps |= port->attr.caps.mac_filter ? VNET_HW_IF_CAP_MAC_FILTER : 0; + caps |= port->attr.tx_offloads.tcp_gso ? VNET_HW_IF_CAP_TCP_GSO : 0; + caps |= port->attr.tx_offloads.ip4_cksum ? VNET_HW_IF_CAP_TX_CKSUM : 0; + + if (caps) + vnet_hw_if_set_caps (vnm, port->intf.hw_if_index, caps); + + /* create / reuse rx node */ + if (vec_len (dm->free_rx_node_indices)) + { + vlib_node_t *n; + rx_node_index = vec_pop (dm->free_rx_node_indices); + vlib_node_rename (vm, rx_node_index, "%s-rx", port->intf.name); + n = vlib_get_node (vm, rx_node_index); + n->function = vlib_node_get_preferred_node_fn_variant ( + vm, port->rx_node.registrations); + n->format_trace = port->rx_node.format_trace; + vlib_register_errors (vm, rx_node_index, + port->rx_node.n_error_counters, 0, + port->rx_node.error_counters); + } + else + { + dev_class->format_tx_trace = port->tx_node.format_trace; + dev_class->tx_function_error_counters = port->tx_node.error_counters; + dev_class->tx_function_n_errors = port->tx_node.n_error_counters; + vlib_node_registration_t rx_node_reg = { + .sibling_of = "port-rx-eth", + .type = VLIB_NODE_TYPE_INPUT, + .state = VLIB_NODE_STATE_DISABLED, + .flags = VLIB_NODE_FLAG_TRACE_SUPPORTED, + .node_fn_registrations = port->rx_node.registrations, + .format_trace = port->rx_node.format_trace, + .error_counters = port->rx_node.error_counters, + .n_errors = port->rx_node.n_error_counters, + }; + rx_node_index = + vlib_register_node (vm, &rx_node_reg, "%s-rx", port->intf.name); + } + port->rx_node_assigned = 1; + port->intf.rx_node_index = rx_node_index; + port->intf.rx_next_index = + vnet_dev_default_next_index_by_port_type[port->attr.type]; + + vlib_worker_thread_node_runtime_update (); + log_debug (dev, + "ethernet interface created, hw_if_index %u sw_if_index %u " + "rx_node_index %u tx_node_index %u", + port->intf.hw_if_index, port->intf.sw_if_index, + port->intf.rx_node_index, port->intf.tx_node_index); + } + + port->interface_created = 1; + foreach_vnet_dev_port_rx_queue (q, port) + { + vnet_buffer (&q->buffer_template)->sw_if_index[VLIB_RX] = + port->intf.sw_if_index; + /* poison to catch node not calling runtime update function */ + q->next_index = ~0; + q->interrupt_mode = port->intf.default_is_intr_mode; + vnet_dev_rx_queue_rt_request ( + vm, q, (vnet_dev_rx_queue_rt_req_t){ .update_next_index = 1 }); + } + + vnet_dev_port_update_tx_node_runtime (vm, port); + + if (port->port_ops.init) + rv = port->port_ops.init (vm, port); + +error: + if (rv != VNET_DEV_OK) + vnet_dev_port_if_remove (vm, port); + return rv; +} + +vnet_dev_rv_t +vnet_dev_port_if_remove (vlib_main_t *vm, vnet_dev_port_t *port) +{ + vnet_dev_main_t *dm = &vnet_dev_main; + vnet_main_t *vnm = vnet_get_main (); + + vnet_dev_port_validate (vm, port); + + if (port->started) + vnet_dev_port_stop (vm, port); + + if (port->rx_node_assigned) + { + vlib_node_rename (vm, port->intf.rx_node_index, "deleted-%u", + port->intf.rx_node_index); + vec_add1 (dm->free_rx_node_indices, port->intf.rx_node_index); + port->rx_node_assigned = 0; + } + + if (port->interface_created) + { + vlib_worker_thread_barrier_sync (vm); + vnet_delete_hw_interface (vnm, port->intf.hw_if_index); + vlib_worker_thread_barrier_release (vm); + pool_put_index (dm->ports_by_dev_instance, port->intf.dev_instance); + port->interface_created = 0; + } + + port->intf = (typeof (port->intf)){}; + + if (port->port_ops.deinit) + port->port_ops.deinit (vm, port); + + foreach_vnet_dev_port_tx_queue (q, port) + vnet_dev_tx_queue_free (vm, q); + + foreach_vnet_dev_port_rx_queue (q, port) + vnet_dev_rx_queue_free (vm, q); + + vnet_dev_port_free_counters (vm, port); + + foreach_vnet_dev_port_args (v, port) + vnet_dev_arg_clear_value (v); + + return VNET_DEV_OK; +} +void +vnet_dev_port_clear_counters (vlib_main_t *vm, vnet_dev_port_t *port) +{ + if (port->counter_main) + vnet_dev_counters_clear (vm, port->counter_main); + + foreach_vnet_dev_port_rx_queue (q, port) + if (q->counter_main) + vnet_dev_counters_clear (vm, q->counter_main); + + foreach_vnet_dev_port_tx_queue (q, port) + if (q->counter_main) + vnet_dev_counters_clear (vm, q->counter_main); + + log_notice (port->dev, "counters cleared on port %u", port->port_id); +} diff --git a/src/vnet/dev/process.c b/src/vnet/dev/process.c new file mode 100644 index 00000000000..3c1f0b8d2d8 --- /dev/null +++ b/src/vnet/dev/process.c @@ -0,0 +1,474 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2023 Cisco Systems, Inc. + */ + +#include "vppinfra/error.h" +#include <vnet/vnet.h> +#include <vnet/dev/dev.h> +#include <vnet/dev/log.h> + +VLIB_REGISTER_LOG_CLASS (dev_log, static) = { + .class_name = "dev", + .subclass_name = "process", +}; + +typedef enum +{ + VNET_DEV_EVENT_PERIODIC_STOP, + VNET_DEV_EVENT_PERIODIC_START, + VNET_DEV_EVENT_PORT_CONFIG_CHANGE_REQ, + VNET_DEV_EVENT_PROCESS_QUIT, + VNET_DEV_EVENT_CALL_OP, + VNET_DEV_EVENT_CALL_OP_NO_RV, + VNET_DEV_EVENT_CALL_OP_NO_WAIT, + VNET_DEV_EVENT_CALL_PORT_OP, + VNET_DEV_EVENT_CALL_PORT_OP_NO_RV, + VNET_DEV_EVENT_CALL_PORT_OP_NO_WAIT, + VNET_DEV_EVENT_CLOCK = ~0 +} __clib_packed vnet_dev_event_t; + +typedef struct +{ + vnet_dev_event_t event; + u8 reply_needed : 1; + u32 calling_process_index; + union + { + struct + { + vnet_dev_port_t *port; + vnet_dev_port_cfg_change_req_t *change_req; + } port_cfg_change; + struct + { + vnet_dev_op_t *op; + } call_op; + struct + { + vnet_dev_op_no_rv_t *op; + } call_op_no_rv; + struct + { + vnet_dev_op_no_rv_t *op; + } call_op_no_wait; + struct + { + vnet_dev_port_op_t *op; + vnet_dev_port_t *port; + } call_port_op; + struct + { + vnet_dev_port_op_no_rv_t *op; + vnet_dev_port_t *port; + } call_port_op_no_rv; + struct + { + vnet_dev_port_op_no_rv_t *op; + vnet_dev_port_t *port; + } call_port_op_no_wait; + }; +} vnet_dev_event_data_t; + +static vnet_dev_rv_t +vnet_dev_process_one_event (vlib_main_t *vm, vnet_dev_t *dev, + vnet_dev_event_data_t *ed) +{ + vnet_dev_port_t *p; + vnet_dev_rv_t rv = VNET_DEV_OK; + + switch (ed->event) + { + case VNET_DEV_EVENT_CLOCK: + break; + case VNET_DEV_EVENT_PROCESS_QUIT: + log_debug (dev, "quit requested"); + dev->process_node_quit = 1; + break; + case VNET_DEV_EVENT_PERIODIC_START: + log_debug (dev, "periodic start"); + dev->process_node_periodic = 1; + break; + case VNET_DEV_EVENT_PERIODIC_STOP: + log_debug (dev, "periodic stop"); + dev->process_node_periodic = 0; + break; + case VNET_DEV_EVENT_PORT_CONFIG_CHANGE_REQ: + log_debug (dev, "port config change"); + p = ed->port_cfg_change.port; + rv = vnet_dev_port_cfg_change (vm, p, ed->port_cfg_change.change_req); + break; + case VNET_DEV_EVENT_CALL_OP: + log_debug (dev, "call op"); + rv = ed->call_op.op (vm, dev); + break; + case VNET_DEV_EVENT_CALL_OP_NO_RV: + log_debug (dev, "call op no rv"); + ed->call_op_no_rv.op (vm, dev); + break; + case VNET_DEV_EVENT_CALL_OP_NO_WAIT: + log_debug (dev, "call op no wait"); + ed->call_op_no_wait.op (vm, dev); + break; + case VNET_DEV_EVENT_CALL_PORT_OP: + log_debug (dev, "call port op"); + rv = ed->call_port_op.op (vm, ed->call_port_op.port); + break; + case VNET_DEV_EVENT_CALL_PORT_OP_NO_RV: + log_debug (dev, "call port op no rv"); + ed->call_port_op_no_rv.op (vm, ed->call_port_op_no_rv.port); + break; + case VNET_DEV_EVENT_CALL_PORT_OP_NO_WAIT: + log_debug (dev, "call port op no wait"); + ed->call_port_op_no_wait.op (vm, ed->call_port_op_no_wait.port); + break; + default: + ASSERT (0); + } + return rv; +} + +static uword +vnet_dev_process (vlib_main_t *vm, vlib_node_runtime_t *rt, vlib_frame_t *f) +{ + vnet_dev_main_t *dm = &vnet_dev_main; + vnet_dev_periodic_op_t *pop, *pops = 0; + f64 next = CLIB_F64_MAX; + vnet_dev_event_data_t *event_data = 0, *new_event_data, *ed; + + vnet_dev_t *dev = + *((vnet_dev_t **) vlib_node_get_runtime_data (vm, rt->node_index)); + + log_debug (dev, "process '%U' started", format_vlib_node_name, vm, + rt->node_index); + + while (dev->process_node_quit == 0) + { + uword event_type; + f64 now = vlib_time_now (vm); + + if (dev->process_node_periodic) + vlib_process_wait_for_event_or_clock (vm, next > now ? next - now : 0); + else + vlib_process_wait_for_event (vm); + + new_event_data = vlib_process_get_event_data (vm, &event_type); + + if (new_event_data) + { + vec_append (event_data, new_event_data); + vlib_process_put_event_data (vm, new_event_data); + + ASSERT (event_type == 0); + + vec_foreach (ed, event_data) + { + vnet_dev_rv_t rv; + rv = vnet_dev_process_one_event (vm, dev, ed); + if (ed->reply_needed) + vlib_process_signal_event (vm, ed->calling_process_index, + ed->event, rv); + } + vec_reset_length (event_data); + } + + next = CLIB_F64_MAX; + pool_foreach (pop, dev->periodic_ops) + { + if (pop->last_run + pop->interval < now) + { + vec_add1 (pops, *pop); + pop->last_run = now; + } + if (pop->last_run + pop->interval < next) + next = pop->last_run + pop->interval; + } + + vec_foreach (pop, pops) + { + switch (pop->type) + { + case VNET_DEV_PERIODIC_OP_TYPE_DEV: + pop->dev_op (vm, pop->dev); + break; + case VNET_DEV_PERIODIC_OP_TYPE_PORT: + pop->port_op (vm, pop->port); + break; + default: + ASSERT (0); + } + } + vec_reset_length (pops); + } + + log_debug (dev, "process '%U' quit", format_vlib_node_name, vm, + rt->node_index); + vlib_node_set_state (vm, rt->node_index, VLIB_NODE_STATE_DISABLED); + vlib_node_rename (vm, rt->node_index, "deleted-%u", rt->node_index); + + /* add node index to the freelist */ + vec_add1 (dm->free_process_node_indices, rt->node_index); + vec_free (pops); + vec_free (event_data); + return 0; +} + +vnet_dev_rv_t +vnet_dev_process_create (vlib_main_t *vm, vnet_dev_t *dev) +{ + vnet_dev_main_t *dm = &vnet_dev_main; + vlib_node_t *n; + uword l; + + l = vec_len (dm->free_process_node_indices); + if (l > 0) + { + n = vlib_get_node (vm, dm->free_process_node_indices[l - 1]); + if (n->function != vnet_dev_process) + { + vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, n->index); + n->function = vnet_dev_process; + rt->function = vnet_dev_process; + } + vlib_node_rename (vm, n->index, "%s-process", dev->device_id); + vlib_node_set_state (vm, n->index, VLIB_NODE_STATE_POLLING); + vec_set_len (dm->free_process_node_indices, l - 1); + log_debug (dev, "process node '%U' (%u) reused", format_vlib_node_name, + vm, n->index, n->index); + } + else + { + vlib_node_registration_t r = { + .function = vnet_dev_process, + .type = VLIB_NODE_TYPE_PROCESS, + .process_log2_n_stack_bytes = 16, + .runtime_data_bytes = sizeof (void *), + }; + + vlib_register_node (vm, &r, "%s-process", dev->device_id); + + n = vlib_get_node (vm, r.index); + log_debug (dev, "process node '%U' (%u) created", format_vlib_node_name, + vm, r.index, r.index); + } + + dev->process_node_index = n->index; + *(vnet_dev_t **) vlib_node_get_runtime_data (vm, n->index) = dev; + vlib_start_process (vm, n->runtime_index); + + return VNET_DEV_OK; +} + +static void +vnet_dev_process_event_send (vlib_main_t *vm, vnet_dev_t *dev, + vnet_dev_event_data_t ed) +{ + vnet_dev_event_data_t *edp = vlib_process_signal_event_data ( + vm, dev->process_node_index, 0, 1, sizeof (ed)); + *edp = ed; +} + +static vnet_dev_rv_t +vnet_dev_process_event_send_and_wait (vlib_main_t *vm, vnet_dev_t *dev, + vnet_dev_event_data_t ed) +{ + uword event, *event_data = 0; + vnet_dev_rv_t rv; + + ed.calling_process_index = vlib_get_current_process_node_index (vm); + + if (ed.calling_process_index == dev->process_node_index) + return vnet_dev_process_one_event (vm, dev, &ed); + + ed.reply_needed = 1; + vnet_dev_process_event_send (vm, dev, ed); + vlib_process_wait_for_event_or_clock (vm, 5.0); + event = vlib_process_get_events (vm, &event_data); + if (event != ed.event) + { + log_err (dev, "%s", + event == VNET_DEV_EVENT_CLOCK ? + "timeout waiting for process node to respond" : + "unexpected event received"); + rv = VNET_DEV_ERR_PROCESS_REPLY; + } + else + rv = event_data[0]; + vec_free (event_data); + return rv; +} + +void +vnet_dev_process_quit (vlib_main_t *vm, vnet_dev_t *dev) +{ + vnet_dev_event_data_t ed = { .event = VNET_DEV_EVENT_PROCESS_QUIT }; + vnet_dev_process_event_send_and_wait (vm, dev, ed); +} + +static int +_vnet_dev_poll_add (vlib_main_t *vm, vnet_dev_t *dev, + vnet_dev_periodic_op_t pop) +{ + const vnet_dev_event_data_t ed = { .event = VNET_DEV_EVENT_PERIODIC_START }; + vnet_dev_periodic_op_t *p; + + pool_foreach (p, dev->periodic_ops) + if (p->op == pop.op && p->arg == pop.arg) + return 0; + + pool_get_zero (dev->periodic_ops, p); + *p = pop; + if (pool_elts (dev->periodic_ops) == 1) + vnet_dev_process_event_send (vm, dev, ed); + return 1; +} + +static int +_vnet_dev_poll_remove (vlib_main_t *vm, vnet_dev_t *dev, void *op, void *arg) +{ + const vnet_dev_event_data_t ed = { .event = VNET_DEV_EVENT_PERIODIC_STOP }; + vnet_dev_periodic_op_t *pop; + + pool_foreach (pop, dev->periodic_ops) + if (pop->op == op && pop->arg == arg) + { + pool_put (dev->periodic_ops, pop); + if (pool_elts (dev->periodic_ops) == 0) + vnet_dev_process_event_send (vm, dev, ed); + return 1; + } + return 0; +} + +void +vnet_dev_poll_dev_add (vlib_main_t *vm, vnet_dev_t *dev, f64 interval, + vnet_dev_op_no_rv_t *dev_op) +{ + vnet_dev_periodic_op_t pop = { + .interval = interval, + .type = VNET_DEV_PERIODIC_OP_TYPE_DEV, + .dev_op = dev_op, + .dev = dev, + }; + + if (_vnet_dev_poll_add (vm, dev, pop) == 0) + log_warn (dev, "poll_dev_add: op already exists, not added"); +} + +void +vnet_dev_poll_dev_remove (vlib_main_t *vm, vnet_dev_t *dev, + vnet_dev_op_no_rv_t *dev_op) +{ + if (_vnet_dev_poll_remove (vm, dev, (void *) dev_op, (void *) dev) == 0) + log_warn (dev, "poll_dev_remove: op not found, not removed"); +} + +void +vnet_dev_poll_port_add (vlib_main_t *vm, vnet_dev_port_t *port, f64 interval, + vnet_dev_port_op_no_rv_t *port_op) +{ + vnet_dev_t *dev = port->dev; + vnet_dev_periodic_op_t pop = { + .interval = interval, + .type = VNET_DEV_PERIODIC_OP_TYPE_PORT, + .port_op = port_op, + .port = port, + }; + + if (_vnet_dev_poll_add (vm, dev, pop) == 0) + log_warn (dev, "poll_port_add: op already exists, not added"); +} + +void +vnet_dev_poll_port_remove (vlib_main_t *vm, vnet_dev_port_t *port, + vnet_dev_port_op_no_rv_t *port_op) +{ + vnet_dev_t *dev = port->dev; + if (_vnet_dev_poll_remove (vm, dev, (void *) port_op, (void *) port) == 0) + log_warn (dev, "poll_port_remove: op not found, not removed"); +} + +vnet_dev_rv_t +vnet_dev_process_port_cfg_change_req (vlib_main_t *vm, vnet_dev_port_t *port, + vnet_dev_port_cfg_change_req_t *pccr) +{ + const vnet_dev_event_data_t ed = { + .event = VNET_DEV_EVENT_PORT_CONFIG_CHANGE_REQ, + .port_cfg_change = { + .port = port, + .change_req = pccr, + }, + }; + + return vnet_dev_process_event_send_and_wait (vm, port->dev, ed); +} + +vnet_dev_rv_t +vnet_dev_process_call_op (vlib_main_t *vm, vnet_dev_t *dev, vnet_dev_op_t *op) +{ + const vnet_dev_event_data_t ed = { + .event = VNET_DEV_EVENT_CALL_OP, + .call_op.op = op, + }; + + return vnet_dev_process_event_send_and_wait (vm, dev, ed); +} + +vnet_dev_rv_t +vnet_dev_process_call_op_no_rv (vlib_main_t *vm, vnet_dev_t *dev, + vnet_dev_op_no_rv_t *op) +{ + const vnet_dev_event_data_t ed = { + .event = VNET_DEV_EVENT_CALL_OP_NO_RV, + .call_op_no_rv.op = op, + }; + + return vnet_dev_process_event_send_and_wait (vm, dev, ed); +} + +void +vnet_dev_process_call_op_no_wait (vlib_main_t *vm, vnet_dev_t *dev, + vnet_dev_op_no_rv_t *op) +{ + const vnet_dev_event_data_t ed = { + .event = VNET_DEV_EVENT_CALL_OP_NO_WAIT, + .call_op_no_rv.op = op, + }; + + vnet_dev_process_event_send (vm, dev, ed); +} + +vnet_dev_rv_t +vnet_dev_process_call_port_op (vlib_main_t *vm, vnet_dev_port_t *port, + vnet_dev_port_op_t *op) +{ + const vnet_dev_event_data_t ed = { + .event = VNET_DEV_EVENT_CALL_PORT_OP, + .call_port_op = { .op = op, .port = port }, + }; + + return vnet_dev_process_event_send_and_wait (vm, port->dev, ed); +} + +vnet_dev_rv_t +vnet_dev_process_call_port_op_no_rv (vlib_main_t *vm, vnet_dev_port_t *port, + vnet_dev_port_op_no_rv_t *op) +{ + const vnet_dev_event_data_t ed = { + .event = VNET_DEV_EVENT_CALL_PORT_OP_NO_RV, + .call_port_op_no_rv = { .op = op, .port = port }, + }; + + return vnet_dev_process_event_send_and_wait (vm, port->dev, ed); +} + +void +vnet_dev_process_call_port_op_no_wait (vlib_main_t *vm, vnet_dev_port_t *port, + vnet_dev_port_op_no_rv_t *op) +{ + const vnet_dev_event_data_t ed = { + .event = VNET_DEV_EVENT_CALL_PORT_OP_NO_WAIT, + .call_port_op_no_wait = { .op = op, .port = port }, + }; + + vnet_dev_process_event_send (vm, port->dev, ed); +} diff --git a/src/vnet/dev/process.h b/src/vnet/dev/process.h new file mode 100644 index 00000000000..9223973dffc --- /dev/null +++ b/src/vnet/dev/process.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2023 Cisco Systems, Inc. + */ + +#ifndef _VNET_DEV_PROCESS_H_ +#define _VNET_DEV_PROCESS_H_ + +#include <vppinfra/clib.h> + +#endif /* _VNET_DEV_PROCESS_H_ */ diff --git a/src/vnet/dev/queue.c b/src/vnet/dev/queue.c new file mode 100644 index 00000000000..9a016a626fb --- /dev/null +++ b/src/vnet/dev/queue.c @@ -0,0 +1,227 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2023 Cisco Systems, Inc. + */ + +#include <vnet/vnet.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/dev/dev.h> +#include <vnet/dev/counters.h> +#include <vnet/dev/log.h> + +VLIB_REGISTER_LOG_CLASS (dev_log, static) = { + .class_name = "dev", + .subclass_name = "error", +}; + +void +vnet_dev_rx_queue_free (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq) +{ + vnet_dev_port_t *port = rxq->port; + vnet_dev_t *dev = port->dev; + log_debug (dev, "queue %u", rxq->queue_id); + if (port->rx_queue_ops.free) + port->rx_queue_ops.free (vm, rxq); + + vnet_dev_rx_queue_free_counters (vm, rxq); + pool_put_index (port->rx_queues, rxq->index); + clib_mem_free (rxq); +} + +vnet_dev_rv_t +vnet_dev_rx_queue_alloc (vlib_main_t *vm, vnet_dev_port_t *port, + u16 queue_size) +{ + vnet_dev_main_t *dm = &vnet_dev_main; + vnet_dev_rx_queue_t *rxq, **qp; + vnet_dev_t *dev = port->dev; + vnet_dev_rv_t rv = VNET_DEV_OK; + u16 n_threads = vlib_get_n_threads (); + u8 buffer_pool_index; + + vnet_dev_port_validate (vm, port); + + log_debug (dev, "port %u queue_size %u", port->port_id, queue_size); + + if (pool_elts (port->rx_queues) == port->attr.max_rx_queues) + return VNET_DEV_ERR_NO_AVAIL_QUEUES; + + rxq = vnet_dev_alloc_with_data (sizeof (vnet_dev_port_t), + port->rx_queue_config.data_size); + pool_get (port->rx_queues, qp); + qp[0] = rxq; + rxq->enabled = 1; + rxq->port = port; + rxq->size = queue_size; + rxq->index = qp - port->rx_queues; + + /* default queue id - can be changed by driver */ + rxq->queue_id = qp - port->rx_queues; + ASSERT (rxq->queue_id < port->attr.max_rx_queues); + + if (n_threads > 1) + { + rxq->rx_thread_index = dm->next_rx_queue_thread++; + if (dm->next_rx_queue_thread >= n_threads) + dm->next_rx_queue_thread = 1; + } + + buffer_pool_index = + vlib_buffer_pool_get_default_for_numa (vm, dev->numa_node); + vlib_buffer_pool_t *bp = vlib_get_buffer_pool (vm, buffer_pool_index); + + rxq->buffer_template = bp->buffer_template; + vnet_buffer (&rxq->buffer_template)->sw_if_index[VLIB_TX] = ~0; + + rxq->next_index = vnet_dev_default_next_index_by_port_type[port->attr.type]; + + if (port->rx_queue_ops.alloc) + rv = port->rx_queue_ops.alloc (vm, rxq); + + if (rv != VNET_DEV_OK) + { + log_err (dev, "driver rejected rx queue add with rv %d", rv); + vnet_dev_rx_queue_free (vm, rxq); + } + else + log_debug (dev, "queue %u added, assigned to thread %u", rxq->queue_id, + rxq->rx_thread_index); + + return rv; +} + +vnet_dev_rv_t +vnet_dev_rx_queue_start (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq) +{ + vnet_dev_rv_t rv = VNET_DEV_OK; + if (rxq->port->rx_queue_ops.start) + rv = rxq->port->rx_queue_ops.start (vm, rxq); + + if (rv == VNET_DEV_OK) + rxq->started = 1; + + return rv; +} + +void +vnet_dev_rx_queue_stop (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq) +{ + if (rxq->port->rx_queue_ops.stop) + rxq->port->rx_queue_ops.stop (vm, rxq); + vlib_node_set_state (vm, rxq->port->intf.rx_node_index, + VLIB_NODE_STATE_DISABLED); + rxq->started = 0; +} + +void +vnet_dev_tx_queue_free (vlib_main_t *vm, vnet_dev_tx_queue_t *txq) +{ + vnet_dev_port_t *port = txq->port; + vnet_dev_t *dev = port->dev; + + vnet_dev_port_validate (vm, port); + + log_debug (dev, "queue %u", txq->queue_id); + if (port->tx_queue_ops.free) + port->tx_queue_ops.free (vm, txq); + + clib_bitmap_free (txq->assigned_threads); + vnet_dev_tx_queue_free_counters (vm, txq); + pool_put_index (port->tx_queues, txq->index); + clib_mem_free (txq); +} + +vnet_dev_rv_t +vnet_dev_tx_queue_alloc (vlib_main_t *vm, vnet_dev_port_t *port, + u16 queue_size) +{ + vnet_dev_tx_queue_t *txq, **qp; + vnet_dev_t *dev = port->dev; + vnet_dev_rv_t rv = VNET_DEV_OK; + + log_debug (dev, "port %u size %u", port->port_id, queue_size); + + if (pool_elts (port->tx_queues) == port->attr.max_tx_queues) + return VNET_DEV_ERR_NO_AVAIL_QUEUES; + + txq = vnet_dev_alloc_with_data (sizeof (vnet_dev_port_t), + port->tx_queue_config.data_size); + pool_get (port->tx_queues, qp); + qp[0] = txq; + txq->enabled = 1; + txq->port = port; + txq->size = queue_size; + txq->index = qp - port->tx_queues; + + /* default queue id - can be changed by driver */ + txq->queue_id = qp - port->tx_queues; + ASSERT (txq->queue_id < port->attr.max_tx_queues); + + if (port->tx_queue_ops.alloc) + rv = port->tx_queue_ops.alloc (vm, txq); + + if (rv != VNET_DEV_OK) + { + log_err (dev, "driver rejected tx queue alloc with rv %d", rv); + vnet_dev_tx_queue_free (vm, txq); + } + else + log_debug (dev, "queue %u added", txq->queue_id); + + return rv; +} + +vnet_dev_rv_t +vnet_dev_tx_queue_start (vlib_main_t *vm, vnet_dev_tx_queue_t *txq) +{ + vnet_dev_rv_t rv = VNET_DEV_OK; + if (txq->port->tx_queue_ops.start) + rv = txq->port->tx_queue_ops.start (vm, txq); + + if (rv == VNET_DEV_OK) + txq->started = 1; + + return rv; +} + +void +vnet_dev_tx_queue_stop (vlib_main_t *vm, vnet_dev_tx_queue_t *txq) +{ + if (txq->port->tx_queue_ops.stop) + txq->port->tx_queue_ops.stop (vm, txq); + txq->started = 0; +} + +void +vnet_dev_rx_queue_add_counters (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq, + vnet_dev_counter_t *counters, u16 n_counters) +{ + rxq->counter_main = vnet_dev_counters_alloc ( + vm, counters, n_counters, "%s port %u rx-queue %u counters", + rxq->port->dev->device_id, rxq->port->port_id, rxq->queue_id); +} + +void +vnet_dev_rx_queue_free_counters (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq) +{ + if (rxq->counter_main) + vnet_dev_counters_free (vm, rxq->counter_main); +} + +void +vnet_dev_tx_queue_add_counters (vlib_main_t *vm, vnet_dev_tx_queue_t *txq, + vnet_dev_counter_t *counters, u16 n_counters) +{ + txq->counter_main = vnet_dev_counters_alloc ( + vm, counters, n_counters, "%s port %u tx-queue %u counters", + txq->port->dev->device_id, txq->port->port_id, txq->queue_id); +} + +void +vnet_dev_tx_queue_free_counters (vlib_main_t *vm, vnet_dev_tx_queue_t *txq) +{ + if (!txq->counter_main) + return; + + log_debug (txq->port->dev, "free"); + vnet_dev_counters_free (vm, txq->counter_main); +} diff --git a/src/vnet/dev/runtime.c b/src/vnet/dev/runtime.c new file mode 100644 index 00000000000..79c55cfbd53 --- /dev/null +++ b/src/vnet/dev/runtime.c @@ -0,0 +1,180 @@ + +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2023 Cisco Systems, Inc. + */ + +#include "vppinfra/bitmap.h" +#include "vppinfra/lock.h" +#include <vnet/vnet.h> +#include <vnet/dev/dev.h> +#include <vnet/dev/log.h> + +VLIB_REGISTER_LOG_CLASS (dev_log, static) = { + .class_name = "dev", + .subclass_name = "runtime", +}; + +static vnet_dev_rt_op_t *rt_ops; + +static void +_vnet_dev_rt_exec_op (vlib_main_t *vm, vnet_dev_rt_op_t *op) +{ + vnet_dev_port_t *port = op->port; + vnet_dev_rx_queue_t *previous = 0, *first = 0; + vnet_dev_rx_node_runtime_t *rtd; + vlib_node_state_t state = VLIB_NODE_STATE_DISABLED; + u32 node_index = port->intf.rx_node_index; + + rtd = vlib_node_get_runtime_data (vm, node_index); + + foreach_vnet_dev_port_rx_queue (q, port) + { + if (q->rx_thread_index != vm->thread_index) + continue; + + if (q->interrupt_mode == 0) + state = VLIB_NODE_STATE_POLLING; + else if (state != VLIB_NODE_STATE_POLLING) + state = VLIB_NODE_STATE_INTERRUPT; + + q->next_on_thread = 0; + if (previous == 0) + first = q; + else + previous->next_on_thread = q; + + previous = q; + } + + rtd->first_rx_queue = first; + vlib_node_set_state (vm, port->intf.rx_node_index, state); + __atomic_store_n (&op->completed, 1, __ATOMIC_RELEASE); +} + +static uword +vnet_dev_rt_mgmt_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame) +{ + u16 thread_index = vm->thread_index; + vnet_dev_rt_op_t *op, *ops = __atomic_load_n (&rt_ops, __ATOMIC_ACQUIRE); + u32 n_pending = 0; + uword rv = 0; + + vec_foreach (op, ops) + { + if (!op->completed && op->thread_index == thread_index) + { + if (op->in_order == 1 && n_pending) + { + vlib_node_set_interrupt_pending (vm, node->node_index); + return rv; + } + _vnet_dev_rt_exec_op (vm, op); + rv++; + } + + if (op->completed == 0) + n_pending++; + } + + return rv; +} + +VLIB_REGISTER_NODE (vnet_dev_rt_mgmt_node, static) = { + .function = vnet_dev_rt_mgmt_node_fn, + .name = "dev-rt-mgmt", + .type = VLIB_NODE_TYPE_PRE_INPUT, + .state = VLIB_NODE_STATE_INTERRUPT, +}; + +vnet_dev_rv_t +vnet_dev_rt_exec_ops (vlib_main_t *vm, vnet_dev_t *dev, vnet_dev_rt_op_t *ops, + u32 n_ops) +{ + vnet_dev_rt_op_t *op = ops; + vnet_dev_rt_op_t *remote_ops = 0; + clib_bitmap_t *remote_bmp = 0; + u32 i; + + ASSERT (rt_ops == 0); + + if (vlib_worker_thread_barrier_held ()) + { + for (op = ops; op < (ops + n_ops); op++) + { + vlib_main_t *tvm = vlib_get_main_by_index (op->thread_index); + _vnet_dev_rt_exec_op (tvm, op); + log_debug ( + dev, + "port %u rx node runtime update on thread %u executed locally", + op->port->port_id, op->thread_index); + } + return VNET_DEV_OK; + } + + while (n_ops) + { + if (op->thread_index != vm->thread_index) + break; + + _vnet_dev_rt_exec_op (vm, op); + log_debug ( + dev, "port %u rx node runtime update on thread %u executed locally", + op->port->port_id, op->thread_index); + op++; + n_ops--; + } + + if (n_ops == 0) + return VNET_DEV_OK; + + for (op = ops; op < (ops + n_ops); op++) + { + if (op->thread_index == vm->thread_index && + (op->in_order == 0 || vec_len (remote_ops) == 0)) + { + _vnet_dev_rt_exec_op (vm, op); + log_debug (dev, + "port %u rx node runtime update on thread " + "%u executed locally", + op->port->port_id, op->thread_index); + } + else + { + vec_add1 (remote_ops, *op); + log_debug (dev, + "port %u rx node runtime update on thread %u " + "enqueued for remote execution", + op->port->port_id, op->thread_index); + remote_bmp = clib_bitmap_set (remote_bmp, op->thread_index, 1); + } + } + + if (remote_ops == 0) + return VNET_DEV_OK; + + __atomic_store_n (&rt_ops, remote_ops, __ATOMIC_RELEASE); + + clib_bitmap_foreach (i, remote_bmp) + { + vlib_node_set_interrupt_pending (vlib_get_main_by_index (i), + vnet_dev_rt_mgmt_node.index); + log_debug (dev, "interrupt sent to %s node on thread %u", + vnet_dev_rt_mgmt_node.name, i); + } + + vec_foreach (op, remote_ops) + { + while (op->completed == 0) + vlib_process_suspend (vm, 5e-5); + + log_debug ( + dev, "port %u rx node runtime update on thread %u executed locally", + op->port->port_id, op->thread_index); + } + + __atomic_store_n (&rt_ops, 0, __ATOMIC_RELAXED); + vec_free (remote_ops); + clib_bitmap_free (remote_bmp); + return VNET_DEV_OK; +} diff --git a/src/vnet/dev/types.h b/src/vnet/dev/types.h new file mode 100644 index 00000000000..006d18e5bc5 --- /dev/null +++ b/src/vnet/dev/types.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2023 Cisco Systems, Inc. + */ + +#ifndef _VNET_DEV_TYPES_H_ +#define _VNET_DEV_TYPES_H_ + +#include <vppinfra/types.h> +#include <vnet/dev/errors.h> + +typedef char vnet_dev_device_id_t[48]; +typedef char vnet_dev_if_name_t[32]; +typedef char vnet_dev_driver_name_t[16]; +typedef char vnet_dev_bus_name_t[8]; +typedef u16 vnet_dev_port_id_t; +typedef struct vnet_dev vnet_dev_t; +typedef struct vnet_dev_port vnet_dev_port_t; +typedef struct vnet_dev_rx_queue vnet_dev_rx_queue_t; +typedef struct vnet_dev_tx_queue vnet_dev_tx_queue_t; + +typedef enum +{ + VNET_DEV_MINUS_OK = 0, +#define _(n, d) VNET_DEV_ERR_MINUS_##n, + foreach_vnet_dev_rv_type +#undef _ +} vnet_dev_minus_rv_t; + +typedef enum +{ + VNET_DEV_OK = 0, +#define _(n, d) VNET_DEV_ERR_##n = -(VNET_DEV_ERR_MINUS_##n), + foreach_vnet_dev_rv_type +#undef _ +} vnet_dev_rv_t; + +/* do not change bit assignments - API dependency */ +#define foreach_vnet_dev_flag _ (0, NO_STATS, "don't poll device stats") + +typedef union +{ + enum + { +#define _(b, n, d) VNET_DEV_F_##n = 1ull << (b), + foreach_vnet_dev_flag +#undef _ + } e; + u32 n; +} vnet_dev_flags_t; + +/* do not change bit assignments - API dependency */ +#define foreach_vnet_dev_port_flag \ + _ (0, INTERRUPT_MODE, "enable interrupt mode") + +typedef union +{ + enum + { +#define _(b, n, d) VNET_DEV_PORT_F_##n = 1ull << (b), + foreach_vnet_dev_port_flag +#undef _ + } e; + u32 n; +} vnet_dev_port_flags_t; + +#endif /* _VNET_DEV_TYPES_H_ */ diff --git a/src/vnet/devices/af_packet/FEATURE.yaml b/src/vnet/devices/af_packet/FEATURE.yaml deleted file mode 100644 index 4a11ea2beb5..00000000000 --- a/src/vnet/devices/af_packet/FEATURE.yaml +++ /dev/null @@ -1,16 +0,0 @@ ---- -name: host-interface Device AF_PACKET -maintainer: Damjan Marion <damarion@cisco.com> -features: - - L4 checksum offload - - GSO offload -description: "Create a host interface that will attach to a linux AF_PACKET - interface, one side of a veth pair. The veth pair must - already exist. Once created, a new host interface will - exist in VPP with the name 'host-<ifname>', where '<ifname>' - is the name of the specified veth pair. Use the 'show interface' - command to display host interface details." -missing: - - API dump details beyond sw_if_index and name -state: production -properties: [API, CLI, STATS, MULTITHREAD] diff --git a/src/vnet/devices/af_packet/af_packet.api b/src/vnet/devices/af_packet/af_packet.api deleted file mode 100644 index 4a5cfb0fc3a..00000000000 --- a/src/vnet/devices/af_packet/af_packet.api +++ /dev/null @@ -1,191 +0,0 @@ -/* - * Copyright (c) 2015-2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -option version = "2.0.0"; - -import "vnet/interface_types.api"; -import "vnet/ethernet/ethernet_types.api"; - -/** \brief Create host-interface - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param hw_addr - interface MAC - @param use_random_hw_addr - use random generated MAC - @param host_if_name - interface name -*/ -define af_packet_create -{ - u32 client_index; - u32 context; - - vl_api_mac_address_t hw_addr; - bool use_random_hw_addr; - string host_if_name[64]; -}; - -/** \brief Create host-interface response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define af_packet_create_reply -{ - u32 context; - i32 retval; - vl_api_interface_index_t sw_if_index; -}; - -/** \brief Create host-interface - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param hw_addr - interface MAC - @param use_random_hw_addr - use random generated MAC - @param host_if_name - interface name - @param rx_frame_size - frame size for RX - @param tx_frame_size - frame size for TX - @param rx_frames_per_block - frames per block for RX - @param tx_frames_per_block - frames per block for TX - @param flags - flags for the af_packet interface creation - @param num_rx_queues - number of rx queues -*/ -define af_packet_create_v2 -{ - u32 client_index; - u32 context; - - vl_api_mac_address_t hw_addr; - bool use_random_hw_addr; - string host_if_name[64]; - u32 rx_frame_size; - u32 tx_frame_size; - u32 rx_frames_per_block; - u32 tx_frames_per_block; - u32 flags; - u16 num_rx_queues [default=1]; -}; - -/** \brief Create host-interface response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define af_packet_create_v2_reply -{ - u32 context; - i32 retval; - vl_api_interface_index_t sw_if_index; -}; - -enum af_packet_mode { - AF_PACKET_API_MODE_ETHERNET = 1, /* mode ethernet */ - AF_PACKET_API_MODE_IP = 2, /* mode ip */ -}; - -enum af_packet_flags { - AF_PACKET_API_FLAG_QDISC_BYPASS = 1, /* enable the qdisc bypass */ - AF_PACKET_API_FLAG_CKSUM_GSO = 2, /* enable checksum/gso */ -}; - -/** \brief Create host-interface - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param mode - 1 - Ethernet, 2 - IP - @param hw_addr - interface MAC - @param use_random_hw_addr - use random generated MAC - @param host_if_name - interface name - @param rx_frame_size - frame size for RX - @param tx_frame_size - frame size for TX - @param rx_frames_per_block - frames per block for RX - @param tx_frames_per_block - frames per block for TX - @param flags - flags for the af_packet interface creation - @param num_rx_queues - number of rx queues - @param num_tx_queues - number of tx queues -*/ -define af_packet_create_v3 -{ - u32 client_index; - u32 context; - - vl_api_af_packet_mode_t mode; - vl_api_mac_address_t hw_addr; - bool use_random_hw_addr; - string host_if_name[64]; - u32 rx_frame_size; - u32 tx_frame_size; - u32 rx_frames_per_block; - u32 tx_frames_per_block; - vl_api_af_packet_flags_t flags; - u16 num_rx_queues [default=1]; - u16 num_tx_queues [default=1]; -}; - -/** \brief Create host-interface response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define af_packet_create_v3_reply -{ - u32 context; - i32 retval; - vl_api_interface_index_t sw_if_index; -}; - -/** \brief Delete host-interface - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param host_if_name - interface name -*/ -autoreply define af_packet_delete -{ - u32 client_index; - u32 context; - - string host_if_name[64]; -}; - -/** \brief Set l4 offload checksum calculation - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request -*/ -autoreply define af_packet_set_l4_cksum_offload -{ - u32 client_index; - u32 context; - - vl_api_interface_index_t sw_if_index; - bool set; -}; - -/** \brief Dump af_packet interfaces request */ -define af_packet_dump -{ - u32 client_index; - u32 context; -}; - -/** \brief Reply for af_packet dump request - @param sw_if_index - software index of af_packet interface - @param host_if_name - interface name -*/ -define af_packet_details -{ - u32 context; - vl_api_interface_index_t sw_if_index; - string host_if_name[64]; -}; - -/* - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/af_packet/af_packet.c b/src/vnet/devices/af_packet/af_packet.c deleted file mode 100644 index ec65bf6d493..00000000000 --- a/src/vnet/devices/af_packet/af_packet.c +++ /dev/null @@ -1,849 +0,0 @@ -/* - *------------------------------------------------------------------ - * af_packet.c - linux kernel packet interface - * - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *------------------------------------------------------------------ - */ - -#include <linux/if_ether.h> -#include <linux/if_packet.h> -#include <sys/ioctl.h> -#include <net/if.h> -#include <dirent.h> -#include <sys/stat.h> -#include <sys/types.h> -#include <fcntl.h> - -#include <vppinfra/linux/sysfs.h> -#include <vlib/vlib.h> -#include <vlib/unix/unix.h> -#include <vnet/ip/ip.h> -#include <vnet/devices/netlink.h> -#include <vnet/ethernet/ethernet.h> -#include <vnet/interface/rx_queue_funcs.h> -#include <vnet/interface/tx_queue_funcs.h> - -#include <vnet/devices/af_packet/af_packet.h> - -af_packet_main_t af_packet_main; - -VNET_HW_INTERFACE_CLASS (af_packet_ip_device_hw_interface_class, static) = { - .name = "af-packet-ip-device", - .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P, -}; - -#define AF_PACKET_DEFAULT_TX_FRAMES_PER_BLOCK 1024 -#define AF_PACKET_DEFAULT_TX_FRAME_SIZE (2048 * 33) // GSO packet of 64KB -#define AF_PACKET_TX_BLOCK_NR 1 - -#define AF_PACKET_DEFAULT_RX_FRAMES_PER_BLOCK 32 -#define AF_PACKET_DEFAULT_RX_FRAME_SIZE 2048 -#define AF_PACKET_RX_BLOCK_NR 160 - -/*defined in net/if.h but clashes with dpdk headers */ -unsigned int if_nametoindex (const char *ifname); - -static clib_error_t * -af_packet_eth_set_max_frame_size (vnet_main_t *vnm, vnet_hw_interface_t *hi, - u32 frame_size) -{ - clib_error_t *error, *rv; - af_packet_main_t *apm = &af_packet_main; - af_packet_if_t *apif = pool_elt_at_index (apm->interfaces, hi->dev_instance); - - error = vnet_netlink_set_link_mtu (apif->host_if_index, - frame_size + hi->frame_overhead); - - if (error) - { - vlib_log_err (apm->log_class, "netlink failed to change MTU: %U", - format_clib_error, error); - rv = vnet_error (VNET_ERR_SYSCALL_ERROR_1, "netlink error: %U", - format_clib_error, error); - clib_error_free (error); - return rv; - } - else - apif->host_mtu = frame_size + hi->frame_overhead; - return 0; -} - -static int -af_packet_read_mtu (af_packet_if_t *apif) -{ - af_packet_main_t *apm = &af_packet_main; - clib_error_t *error; - error = vnet_netlink_get_link_mtu (apif->host_if_index, &apif->host_mtu); - if (error) - { - vlib_log_err (apm->log_class, "netlink failed to get MTU: %U", - format_clib_error, error); - clib_error_free (error); - return VNET_API_ERROR_SYSCALL_ERROR_1; - } - return 0; -} - -static clib_error_t * -af_packet_fd_read_ready (clib_file_t * uf) -{ - vnet_main_t *vnm = vnet_get_main (); - - /* Schedule the rx node */ - vnet_hw_if_rx_queue_set_int_pending (vnm, uf->private_data); - return 0; -} - -static int -is_bridge (const u8 * host_if_name) -{ - u8 *s; - DIR *dir = NULL; - - s = format (0, "/sys/class/net/%s/bridge%c", host_if_name, 0); - dir = opendir ((char *) s); - vec_free (s); - - if (dir) - { - closedir (dir); - return 0; - } - - return -1; -} - -static void -af_packet_set_rx_queues (vlib_main_t *vm, af_packet_if_t *apif) -{ - vnet_main_t *vnm = vnet_get_main (); - af_packet_queue_t *rx_queue; - - vnet_hw_if_set_input_node (vnm, apif->hw_if_index, - af_packet_input_node.index); - - vec_foreach (rx_queue, apif->rx_queues) - { - rx_queue->queue_index = vnet_hw_if_register_rx_queue ( - vnm, apif->hw_if_index, rx_queue->queue_id, VNET_HW_IF_RXQ_THREAD_ANY); - - { - clib_file_t template = { 0 }; - template.read_function = af_packet_fd_read_ready; - template.file_descriptor = rx_queue->fd; - template.private_data = rx_queue->queue_index; - template.flags = UNIX_FILE_EVENT_EDGE_TRIGGERED; - template.description = - format (0, "%U queue %u", format_af_packet_device_name, - apif->dev_instance, rx_queue->queue_id); - rx_queue->clib_file_index = clib_file_add (&file_main, &template); - } - vnet_hw_if_set_rx_queue_file_index (vnm, rx_queue->queue_index, - rx_queue->clib_file_index); - vnet_hw_if_set_rx_queue_mode (vnm, rx_queue->queue_index, - VNET_HW_IF_RX_MODE_INTERRUPT); - rx_queue->mode = VNET_HW_IF_RX_MODE_INTERRUPT; - } - vnet_hw_if_update_runtime_data (vnm, apif->hw_if_index); -} - -static void -af_packet_set_tx_queues (vlib_main_t *vm, af_packet_if_t *apif) -{ - vnet_main_t *vnm = vnet_get_main (); - af_packet_main_t *apm = &af_packet_main; - af_packet_queue_t *tx_queue; - - vec_foreach (tx_queue, apif->tx_queues) - { - tx_queue->queue_index = vnet_hw_if_register_tx_queue ( - vnm, apif->hw_if_index, tx_queue->queue_id); - } - - if (apif->num_txqs == 0) - { - vlib_log_err (apm->log_class, "Interface %U has 0 txq", - format_vnet_hw_if_index_name, vnm, apif->hw_if_index); - return; - } - - for (u32 j = 0; j < vlib_get_n_threads (); j++) - { - u32 qi = apif->tx_queues[j % apif->num_txqs].queue_index; - vnet_hw_if_tx_queue_assign_thread (vnm, qi, j); - } - - vnet_hw_if_update_runtime_data (vnm, apif->hw_if_index); -} - -static int -create_packet_v3_sock (int host_if_index, tpacket_req3_t *rx_req, - tpacket_req3_t *tx_req, int *fd, af_packet_ring_t *ring, - u32 fanout_id, af_packet_if_flags_t *flags) -{ - af_packet_main_t *apm = &af_packet_main; - struct sockaddr_ll sll; - socklen_t req_sz = sizeof (tpacket_req3_t); - int ret; - int ver = TPACKET_V3; - u32 ring_sz = 0; - - if (rx_req) - ring_sz += rx_req->tp_block_size * rx_req->tp_block_nr; - - if (tx_req) - ring_sz += tx_req->tp_block_size * tx_req->tp_block_nr; - - if ((*fd = socket (AF_PACKET, SOCK_RAW, htons (ETH_P_ALL))) < 0) - { - vlib_log_err (apm->log_class, - "Failed to create AF_PACKET socket: %s (errno %d)", - strerror (errno), errno); - ret = VNET_API_ERROR_SYSCALL_ERROR_1; - goto error; - } - - /* bind before rx ring is cfged so we don't receive packets from other interfaces */ - clib_memset (&sll, 0, sizeof (sll)); - sll.sll_family = PF_PACKET; - sll.sll_protocol = htons (ETH_P_ALL); - sll.sll_ifindex = host_if_index; - if (bind (*fd, (struct sockaddr *) &sll, sizeof (sll)) < 0) - { - vlib_log_err (apm->log_class, - "Failed to bind rx packet socket: %s (errno %d)", - strerror (errno), errno); - ret = VNET_API_ERROR_SYSCALL_ERROR_1; - goto error; - } - - if (setsockopt (*fd, SOL_PACKET, PACKET_VERSION, &ver, sizeof (ver)) < 0) - { - vlib_log_err (apm->log_class, - "Failed to set rx packet interface version: %s (errno %d)", - strerror (errno), errno); - ret = VNET_API_ERROR_SYSCALL_ERROR_1; - goto error; - } - - int opt = 1; - if (setsockopt (*fd, SOL_PACKET, PACKET_LOSS, &opt, sizeof (opt)) < 0) - { - vlib_log_err ( - apm->log_class, - "Failed to set packet tx ring error handling option: %s (errno %d)", - strerror (errno), errno); - ret = VNET_API_ERROR_SYSCALL_ERROR_1; - goto error; - } - - if (*flags & AF_PACKET_IF_FLAGS_CKSUM_GSO) - { - - int opt2 = 1; - if (setsockopt (*fd, SOL_PACKET, PACKET_VNET_HDR, &opt2, sizeof (opt2)) < - 0) - { - // remove the flag - *flags &= ~AF_PACKET_IF_FLAGS_CKSUM_GSO; - vlib_log_debug (apm->log_class, - "Failed to set packet vnet hdr error handling " - "option: %s (errno %d)", - strerror (errno), errno); - } - } - -#if defined(PACKET_QDISC_BYPASS) - if (*flags & AF_PACKET_IF_FLAGS_QDISC_BYPASS) - /* Introduced with Linux 3.14 so the ifdef should eventually be removed */ - if (setsockopt (*fd, SOL_PACKET, PACKET_QDISC_BYPASS, &opt, sizeof (opt)) < - 0) - { - // remove the flag - *flags &= ~AF_PACKET_IF_FLAGS_QDISC_BYPASS; - vlib_log_debug (apm->log_class, - "Failed to set qdisc bypass error " - "handling option: %s (errno %d)", - strerror (errno), errno); - } -#endif - - if (rx_req) - { - if (*flags & AF_PACKET_IF_FLAGS_FANOUT) - { - int fanout = ((fanout_id & 0xffff) | ((PACKET_FANOUT_HASH) << 16)); - if (setsockopt (*fd, SOL_PACKET, PACKET_FANOUT, &fanout, - sizeof (fanout)) < 0) - { - // remove the flag - *flags &= ~AF_PACKET_IF_FLAGS_FANOUT; - vlib_log_err (apm->log_class, - "Failed to set fanout options: %s (errno %d)", - strerror (errno), errno); - ret = VNET_API_ERROR_SYSCALL_ERROR_1; - goto error; - } - } - - if (setsockopt (*fd, SOL_PACKET, PACKET_RX_RING, rx_req, req_sz) < 0) - { - vlib_log_err (apm->log_class, - "Failed to set packet rx ring options: %s (errno %d)", - strerror (errno), errno); - ret = VNET_API_ERROR_SYSCALL_ERROR_1; - goto error; - } - } - - if (tx_req) - if (setsockopt (*fd, SOL_PACKET, PACKET_TX_RING, tx_req, req_sz) < 0) - { - vlib_log_err (apm->log_class, - "Failed to set packet tx ring options: %s (errno %d)", - strerror (errno), errno); - ret = VNET_API_ERROR_SYSCALL_ERROR_1; - goto error; - } - - ring->ring_start_addr = mmap (NULL, ring_sz, PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_LOCKED, *fd, 0); - if (ring->ring_start_addr == MAP_FAILED) - { - vlib_log_err (apm->log_class, "mmap failure: %s (errno %d)", - strerror (errno), errno); - ret = VNET_API_ERROR_SYSCALL_ERROR_1; - goto error; - } - - ring->ring_size = ring_sz; - - return 0; -error: - if (*fd >= 0) - { - close (*fd); - *fd = -1; - } - return ret; -} - -int -af_packet_queue_init (vlib_main_t *vm, af_packet_if_t *apif, - af_packet_create_if_arg_t *arg, - af_packet_queue_t *rx_queue, af_packet_queue_t *tx_queue, - u8 queue_id) -{ - af_packet_main_t *apm = &af_packet_main; - tpacket_req3_t *rx_req = 0; - tpacket_req3_t *tx_req = 0; - int ret, fd = -1; - af_packet_ring_t ring = { 0 }; - u8 *ring_addr = 0; - u32 rx_frames_per_block, tx_frames_per_block; - u32 rx_frame_size, tx_frame_size; - u32 i = 0; - - if (rx_queue) - { - rx_frames_per_block = arg->rx_frames_per_block ? - arg->rx_frames_per_block : - AF_PACKET_DEFAULT_RX_FRAMES_PER_BLOCK; - - rx_frame_size = arg->rx_frame_size ? arg->rx_frame_size : - AF_PACKET_DEFAULT_RX_FRAME_SIZE; - vec_validate (rx_queue->rx_req, 0); - rx_queue->rx_req->tp_block_size = rx_frame_size * rx_frames_per_block; - rx_queue->rx_req->tp_frame_size = rx_frame_size; - rx_queue->rx_req->tp_block_nr = AF_PACKET_RX_BLOCK_NR; - rx_queue->rx_req->tp_frame_nr = - AF_PACKET_RX_BLOCK_NR * rx_frames_per_block; - rx_queue->rx_req->tp_retire_blk_tov = 1; // 1 ms block timout - rx_queue->rx_req->tp_feature_req_word = 0; - rx_queue->rx_req->tp_sizeof_priv = 0; - rx_req = rx_queue->rx_req; - } - - if (tx_queue) - { - tx_frames_per_block = arg->tx_frames_per_block ? - arg->tx_frames_per_block : - AF_PACKET_DEFAULT_TX_FRAMES_PER_BLOCK; - tx_frame_size = arg->tx_frame_size ? arg->tx_frame_size : - AF_PACKET_DEFAULT_TX_FRAME_SIZE; - - vec_validate (tx_queue->tx_req, 0); - tx_queue->tx_req->tp_block_size = tx_frame_size * tx_frames_per_block; - tx_queue->tx_req->tp_frame_size = tx_frame_size; - tx_queue->tx_req->tp_block_nr = AF_PACKET_TX_BLOCK_NR; - tx_queue->tx_req->tp_frame_nr = - AF_PACKET_TX_BLOCK_NR * tx_frames_per_block; - tx_queue->tx_req->tp_retire_blk_tov = 0; - tx_queue->tx_req->tp_sizeof_priv = 0; - tx_queue->tx_req->tp_feature_req_word = 0; - tx_req = tx_queue->tx_req; - } - - if (rx_queue || tx_queue) - { - ret = create_packet_v3_sock (apif->host_if_index, rx_req, tx_req, &fd, - &ring, apif->dev_instance, &arg->flags); - - if (ret != 0) - goto error; - - vec_add1 (apif->rings, ring); - ring_addr = ring.ring_start_addr; - } - - if (rx_queue) - { - rx_queue->fd = fd; - vec_validate (rx_queue->rx_ring, rx_queue->rx_req->tp_block_nr - 1); - vec_foreach_index (i, rx_queue->rx_ring) - { - rx_queue->rx_ring[i] = - ring_addr + i * rx_queue->rx_req->tp_block_size; - } - - rx_queue->next_rx_block = 0; - rx_queue->queue_id = queue_id; - rx_queue->is_rx_pending = 0; - ring_addr = ring_addr + rx_queue->rx_req->tp_block_size * - rx_queue->rx_req->tp_block_nr; - } - - if (tx_queue) - { - tx_queue->fd = fd; - vec_validate (tx_queue->tx_ring, tx_queue->tx_req->tp_block_nr - 1); - vec_foreach_index (i, tx_queue->tx_ring) - { - tx_queue->tx_ring[i] = - ring_addr + i * tx_queue->tx_req->tp_block_size; - } - - tx_queue->next_tx_frame = 0; - tx_queue->queue_id = queue_id; - tx_queue->is_tx_pending = 0; - clib_spinlock_init (&tx_queue->lockp); - } - - return 0; -error: - vlib_log_err (apm->log_class, "Failed to set queue %u error", queue_id); - if (rx_queue) - vec_free (rx_queue->rx_req); - if (tx_queue) - vec_free (tx_queue->tx_req); - return ret; -} - -int -af_packet_device_init (vlib_main_t *vm, af_packet_if_t *apif, - af_packet_create_if_arg_t *args) -{ - af_packet_main_t *apm = &af_packet_main; - af_packet_queue_t *rx_queue = 0; - af_packet_queue_t *tx_queue = 0; - u16 nq = clib_min (args->num_rxqs, args->num_txqs); - u16 i = 0; - int ret = 0; - - // enable fanout feature for multi-rxqs - if (args->num_rxqs > 1) - args->flags |= AF_PACKET_IF_FLAGS_FANOUT; - - vec_validate (apif->rx_queues, args->num_rxqs - 1); - vec_validate (apif->tx_queues, args->num_txqs - 1); - - for (; i < nq; i++) - { - rx_queue = vec_elt_at_index (apif->rx_queues, i); - tx_queue = vec_elt_at_index (apif->tx_queues, i); - ret = af_packet_queue_init (vm, apif, args, rx_queue, tx_queue, i); - if (ret != 0) - goto error; - } - - if (args->num_rxqs > args->num_txqs) - { - for (; i < args->num_rxqs; i++) - { - rx_queue = vec_elt_at_index (apif->rx_queues, i); - ret = af_packet_queue_init (vm, apif, args, rx_queue, 0, i); - if (ret != 0) - goto error; - } - } - else if (args->num_txqs > args->num_rxqs) - { - for (; i < args->num_txqs; i++) - { - tx_queue = vec_elt_at_index (apif->tx_queues, i); - ret = af_packet_queue_init (vm, apif, args, 0, tx_queue, i); - if (ret != 0) - goto error; - } - } - - apif->num_rxqs = args->num_rxqs; - apif->num_txqs = args->num_txqs; - - return 0; -error: - vlib_log_err (apm->log_class, "Failed to init device error"); - return ret; -} - -int -af_packet_create_if (af_packet_create_if_arg_t *arg) -{ - af_packet_main_t *apm = &af_packet_main; - vlib_main_t *vm = vlib_get_main (); - int fd2 = -1; - struct ifreq ifr; - af_packet_if_t *apif = 0; - u8 hw_addr[6]; - vnet_sw_interface_t *sw; - vnet_main_t *vnm = vnet_get_main (); - vnet_hw_if_caps_t caps = VNET_HW_IF_CAP_INT_MODE; - uword *p; - uword if_index; - u8 *host_if_name_dup = 0; - int host_if_index = -1; - int ret = 0; - - p = mhash_get (&apm->if_index_by_host_if_name, arg->host_if_name); - if (p) - { - apif = vec_elt_at_index (apm->interfaces, p[0]); - arg->sw_if_index = apif->sw_if_index; - return VNET_API_ERROR_IF_ALREADY_EXISTS; - } - - host_if_name_dup = vec_dup (arg->host_if_name); - - /* - * make sure host side of interface is 'UP' before binding AF_PACKET - * socket on it. - */ - if ((fd2 = socket (AF_UNIX, SOCK_DGRAM, 0)) < 0) - { - vlib_log_debug (apm->log_class, - "Failed to create AF_UNIX socket: %s (errno %d)", - strerror (errno), errno); - ret = VNET_API_ERROR_SYSCALL_ERROR_1; - goto error; - } - - clib_memcpy (ifr.ifr_name, (const char *) arg->host_if_name, - vec_len (arg->host_if_name)); - if (ioctl (fd2, SIOCGIFINDEX, &ifr) < 0) - { - vlib_log_debug ( - apm->log_class, - "Failed to retrieve the interface (%s) index: %s (errno %d)", - arg->host_if_name, strerror (errno), errno); - ret = VNET_API_ERROR_INVALID_INTERFACE; - goto error; - } - - host_if_index = ifr.ifr_ifindex; - if (ioctl (fd2, SIOCGIFFLAGS, &ifr) < 0) - { - vlib_log_debug (apm->log_class, - "Failed to get the active flag: %s (errno %d)", - strerror (errno), errno); - ret = VNET_API_ERROR_SYSCALL_ERROR_1; - goto error; - } - - if (!(ifr.ifr_flags & IFF_UP)) - { - ifr.ifr_flags |= IFF_UP; - if (ioctl (fd2, SIOCSIFFLAGS, &ifr) < 0) - { - vlib_log_debug (apm->log_class, - "Failed to set the active flag: %s (errno %d)", - strerror (errno), errno); - ret = VNET_API_ERROR_SYSCALL_ERROR_1; - goto error; - } - } - - if (fd2 > -1) - { - close (fd2); - fd2 = -1; - } - - ret = is_bridge (arg->host_if_name); - if (ret == 0) /* is a bridge, ignore state */ - host_if_index = -1; - - /* So far everything looks good, let's create interface */ - pool_get (apm->interfaces, apif); - if_index = apif - apm->interfaces; - - apif->dev_instance = if_index; - apif->host_if_index = host_if_index; - apif->host_if_name = host_if_name_dup; - apif->per_interface_next_index = ~0; - apif->mode = arg->mode; - - ret = af_packet_device_init (vm, apif, arg); - if (ret != 0) - goto error; - - ret = af_packet_read_mtu (apif); - if (ret != 0) - goto error; - - - if (apif->mode != AF_PACKET_IF_MODE_IP) - { - vnet_eth_interface_registration_t eir = {}; - /*use configured or generate random MAC address */ - if (arg->hw_addr) - clib_memcpy (hw_addr, arg->hw_addr, 6); - else - { - f64 now = vlib_time_now (vm); - u32 rnd; - rnd = (u32) (now * 1e6); - rnd = random_u32 (&rnd); - - clib_memcpy (hw_addr + 2, &rnd, sizeof (rnd)); - hw_addr[0] = 2; - hw_addr[1] = 0xfe; - } - - eir.dev_class_index = af_packet_device_class.index; - eir.dev_instance = apif->dev_instance; - eir.address = hw_addr; - eir.cb.set_max_frame_size = af_packet_eth_set_max_frame_size; - apif->hw_if_index = vnet_eth_register_interface (vnm, &eir); - } - else - { - apif->hw_if_index = vnet_register_interface ( - vnm, af_packet_device_class.index, apif->dev_instance, - af_packet_ip_device_hw_interface_class.index, apif->dev_instance); - } - - sw = vnet_get_hw_sw_interface (vnm, apif->hw_if_index); - apif->sw_if_index = sw->sw_if_index; - - af_packet_set_rx_queues (vm, apif); - af_packet_set_tx_queues (vm, apif); - - if (arg->flags & AF_PACKET_IF_FLAGS_FANOUT) - apif->is_fanout_enabled = 1; - - apif->is_qdisc_bypass_enabled = - (arg->flags & AF_PACKET_IF_FLAGS_QDISC_BYPASS); - - if (arg->flags & AF_PACKET_IF_FLAGS_CKSUM_GSO) - apif->is_cksum_gso_enabled = 1; - - if (apif->is_cksum_gso_enabled) - caps |= VNET_HW_IF_CAP_TCP_GSO | VNET_HW_IF_CAP_TX_IP4_CKSUM | - VNET_HW_IF_CAP_TX_TCP_CKSUM | VNET_HW_IF_CAP_TX_UDP_CKSUM; - - vnet_hw_if_set_caps (vnm, apif->hw_if_index, caps); - vnet_hw_interface_set_flags (vnm, apif->hw_if_index, - VNET_HW_INTERFACE_FLAG_LINK_UP); - - mhash_set_mem (&apm->if_index_by_host_if_name, host_if_name_dup, &if_index, - 0); - arg->sw_if_index = apif->sw_if_index; - - return 0; - -error: - if (fd2 > -1) - { - close (fd2); - fd2 = -1; - } - vec_free (host_if_name_dup); - if (apif) - { - memset (apif, 0, sizeof (*apif)); - pool_put (apm->interfaces, apif); - } - return ret; -} - -static int -af_packet_rx_queue_free (af_packet_if_t *apif, af_packet_queue_t *rx_queue) -{ - clib_file_del_by_index (&file_main, rx_queue->clib_file_index); - close (rx_queue->fd); - rx_queue->fd = -1; - rx_queue->rx_ring = NULL; - vec_free (rx_queue->rx_req); - rx_queue->rx_req = NULL; - return 0; -} - -static int -af_packet_tx_queue_free (af_packet_if_t *apif, af_packet_queue_t *tx_queue) -{ - close (tx_queue->fd); - tx_queue->fd = -1; - clib_spinlock_free (&tx_queue->lockp); - tx_queue->tx_ring = NULL; - vec_free (tx_queue->tx_req); - tx_queue->tx_req = NULL; - return 0; -} - -static int -af_packet_ring_free (af_packet_if_t *apif, af_packet_ring_t *ring) -{ - af_packet_main_t *apm = &af_packet_main; - - if (ring) - { - // FIXME: unmap the memory - if (munmap (ring->ring_start_addr, ring->ring_size)) - vlib_log_warn (apm->log_class, - "Host interface %s could not free ring %p of size %u", - apif->host_if_name, ring->ring_start_addr, - ring->ring_size); - else - ring->ring_start_addr = 0; - } - - return 0; -} - -int -af_packet_delete_if (u8 *host_if_name) -{ - vnet_main_t *vnm = vnet_get_main (); - af_packet_main_t *apm = &af_packet_main; - af_packet_if_t *apif; - af_packet_queue_t *rx_queue; - af_packet_queue_t *tx_queue; - af_packet_ring_t *ring; - uword *p; - - p = mhash_get (&apm->if_index_by_host_if_name, host_if_name); - if (p == NULL) - { - vlib_log_warn (apm->log_class, "Host interface %s does not exist", - host_if_name); - return VNET_API_ERROR_SYSCALL_ERROR_1; - } - apif = pool_elt_at_index (apm->interfaces, p[0]); - - /* bring down the interface */ - vnet_hw_interface_set_flags (vnm, apif->hw_if_index, 0); - - /* clean up */ - vec_foreach (rx_queue, apif->rx_queues) - af_packet_rx_queue_free (apif, rx_queue); - vec_foreach (tx_queue, apif->tx_queues) - af_packet_tx_queue_free (apif, tx_queue); - vec_foreach (ring, apif->rings) - af_packet_ring_free (apif, ring); - - vec_free (apif->rx_queues); - apif->rx_queues = NULL; - vec_free (apif->tx_queues); - apif->tx_queues = NULL; - vec_free (apif->rings); - apif->rings = NULL; - - vec_free (apif->host_if_name); - apif->host_if_name = NULL; - apif->host_if_index = -1; - - mhash_unset (&apm->if_index_by_host_if_name, host_if_name, p); - - if (apif->mode != AF_PACKET_IF_MODE_IP) - ethernet_delete_interface (vnm, apif->hw_if_index); - else - vnet_delete_hw_interface (vnm, apif->hw_if_index); - - memset (apif, 0, sizeof (*apif)); - pool_put (apm->interfaces, apif); - - return 0; -} - -int -af_packet_set_l4_cksum_offload (u32 sw_if_index, u8 set) -{ - // deprecated ... - return 0; -} - -int -af_packet_dump_ifs (af_packet_if_detail_t ** out_af_packet_ifs) -{ - af_packet_main_t *apm = &af_packet_main; - af_packet_if_t *apif; - af_packet_if_detail_t *r_af_packet_ifs = NULL; - af_packet_if_detail_t *af_packet_if = NULL; - - pool_foreach (apif, apm->interfaces) - { - vec_add2 (r_af_packet_ifs, af_packet_if, 1); - af_packet_if->sw_if_index = apif->sw_if_index; - if (apif->host_if_name) - { - clib_memcpy (af_packet_if->host_if_name, apif->host_if_name, - MIN (ARRAY_LEN (af_packet_if->host_if_name) - 1, - strlen ((const char *) apif->host_if_name))); - } - } - - *out_af_packet_ifs = r_af_packet_ifs; - - return 0; -} - -static clib_error_t * -af_packet_init (vlib_main_t * vm) -{ - af_packet_main_t *apm = &af_packet_main; - vlib_thread_main_t *tm = vlib_get_thread_main (); - - clib_memset (apm, 0, sizeof (af_packet_main_t)); - - mhash_init_vec_string (&apm->if_index_by_host_if_name, sizeof (uword)); - - vec_validate_aligned (apm->rx_buffers, tm->n_vlib_mains - 1, - CLIB_CACHE_LINE_BYTES); - - apm->log_class = vlib_log_register_class ("af_packet", 0); - vlib_log_debug (apm->log_class, "initialized"); - - return 0; -} - -VLIB_INIT_FUNCTION (af_packet_init); - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/af_packet/af_packet.h b/src/vnet/devices/af_packet/af_packet.h deleted file mode 100644 index 940acbb1372..00000000000 --- a/src/vnet/devices/af_packet/af_packet.h +++ /dev/null @@ -1,168 +0,0 @@ -/* - *------------------------------------------------------------------ - * af_packet.h - linux kernel packet interface header file - * - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *------------------------------------------------------------------ - */ - -#include <linux/if_packet.h> - -#include <vppinfra/lock.h> -#include <vlib/log.h> - -typedef struct tpacket_block_desc block_desc_t; -typedef struct tpacket_req3 tpacket_req3_t; -typedef struct tpacket3_hdr tpacket3_hdr_t; - -typedef enum -{ - AF_PACKET_IF_MODE_ETHERNET = 1, - AF_PACKET_IF_MODE_IP = 2 -} af_packet_if_mode_t; - -typedef enum -{ - AF_PACKET_IF_FLAGS_QDISC_BYPASS = 1, - AF_PACKET_IF_FLAGS_CKSUM_GSO = 2, - AF_PACKET_IF_FLAGS_FANOUT = 4, -} af_packet_if_flags_t; - -typedef struct -{ - u32 sw_if_index; - u8 host_if_name[64]; -} af_packet_if_detail_t; - -typedef struct -{ - u8 *ring_start_addr; - u32 ring_size; -} af_packet_ring_t; - -typedef struct -{ - CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); - clib_spinlock_t lockp; - int fd; - union - { - tpacket_req3_t *rx_req; - tpacket_req3_t *tx_req; - }; - - union - { - u8 **rx_ring; - u8 **tx_ring; - }; - - union - { - u32 next_rx_block; - u32 next_tx_frame; - }; - - u16 queue_id; - u32 queue_index; - - u32 clib_file_index; - - u32 rx_frame_offset; - u16 num_rx_pkts; - u8 is_rx_pending; - u8 is_tx_pending; - vnet_hw_if_rx_mode mode; -} af_packet_queue_t; - -typedef struct -{ - CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); - u32 hw_if_index; - u32 sw_if_index; - u32 per_interface_next_index; - af_packet_if_mode_t mode; - u8 is_admin_up; - u8 is_cksum_gso_enabled; - - af_packet_queue_t *rx_queues; - af_packet_queue_t *tx_queues; - - u8 num_rxqs; - u8 num_txqs; - - u8 *host_if_name; - int host_if_index; - - u32 host_mtu; - u32 dev_instance; - - af_packet_ring_t *rings; - u8 is_qdisc_bypass_enabled; - u8 is_fanout_enabled; -} af_packet_if_t; - -typedef struct -{ - CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); - af_packet_if_t *interfaces; - - u32 polling_count; - /* rx buffer cache */ - u32 **rx_buffers; - - /* hash of host interface names */ - mhash_t if_index_by_host_if_name; - - /** log class */ - vlib_log_class_t log_class; -} af_packet_main_t; - -typedef struct -{ - u8 *host_if_name; - u8 *hw_addr; - u32 rx_frame_size; - u32 tx_frame_size; - u32 rx_frames_per_block; - u32 tx_frames_per_block; - u8 num_rxqs; - u8 num_txqs; - af_packet_if_mode_t mode; - af_packet_if_flags_t flags; - - /* return */ - u32 sw_if_index; -} af_packet_create_if_arg_t; - -extern af_packet_main_t af_packet_main; -extern vnet_device_class_t af_packet_device_class; -extern vlib_node_registration_t af_packet_input_node; - -int af_packet_create_if (af_packet_create_if_arg_t *arg); -int af_packet_delete_if (u8 *host_if_name); -int af_packet_set_l4_cksum_offload (u32 sw_if_index, u8 set); -int af_packet_dump_ifs (af_packet_if_detail_t ** out_af_packet_ifs); - -format_function_t format_af_packet_device_name; - -#define MIN(x,y) (((x)<(y))?(x):(y)) - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/af_packet/af_packet_api.c b/src/vnet/devices/af_packet/af_packet_api.c deleted file mode 100644 index 21f2c381809..00000000000 --- a/src/vnet/devices/af_packet/af_packet_api.c +++ /dev/null @@ -1,248 +0,0 @@ -/* - *------------------------------------------------------------------ - * af_packet_api.c - af-packet api - * - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *------------------------------------------------------------------ - */ - -#include <vnet/vnet.h> -#include <vlibmemory/api.h> - -#include <vnet/interface.h> -#include <vnet/api_errno.h> -#include <vnet/devices/af_packet/af_packet.h> - -#include <vnet/format_fns.h> -#include <vnet/devices/af_packet/af_packet.api_enum.h> -#include <vnet/devices/af_packet/af_packet.api_types.h> - -#define REPLY_MSG_ID_BASE msg_id_base -#include <vlibapi/api_helper_macros.h> - -static u16 msg_id_base; - -static void -vl_api_af_packet_create_t_handler (vl_api_af_packet_create_t * mp) -{ - af_packet_create_if_arg_t _arg, *arg = &_arg; - vl_api_af_packet_create_reply_t *rmp; - int rv = 0; - - clib_memset (arg, 0, sizeof (*arg)); - - arg->host_if_name = format (0, "%s", mp->host_if_name); - vec_add1 (arg->host_if_name, 0); - - arg->hw_addr = mp->use_random_hw_addr ? 0 : mp->hw_addr; - arg->mode = AF_PACKET_IF_MODE_ETHERNET; - // Default flags - arg->flags = AF_PACKET_IF_FLAGS_QDISC_BYPASS | AF_PACKET_IF_FLAGS_CKSUM_GSO; - rv = af_packet_create_if (arg); - - vec_free (arg->host_if_name); - - REPLY_MACRO2 (VL_API_AF_PACKET_CREATE_REPLY, ({ - rmp->sw_if_index = clib_host_to_net_u32 (arg->sw_if_index); - })); -} - -static void -vl_api_af_packet_create_v2_t_handler (vl_api_af_packet_create_v2_t *mp) -{ - af_packet_create_if_arg_t _arg, *arg = &_arg; - vl_api_af_packet_create_v2_reply_t *rmp; - int rv = 0; - - clib_memset (arg, 0, sizeof (*arg)); - - arg->host_if_name = format (0, "%s", mp->host_if_name); - vec_add1 (arg->host_if_name, 0); - - // Default number of rx/tx queue(s) - arg->num_rxqs = 1; - arg->num_txqs = 1; - arg->rx_frame_size = clib_net_to_host_u32 (mp->rx_frame_size); - arg->tx_frame_size = clib_net_to_host_u32 (mp->tx_frame_size); - arg->rx_frames_per_block = clib_net_to_host_u32 (mp->rx_frames_per_block); - arg->tx_frames_per_block = clib_net_to_host_u32 (mp->tx_frames_per_block); - arg->hw_addr = mp->use_random_hw_addr ? 0 : mp->hw_addr; - arg->mode = AF_PACKET_IF_MODE_ETHERNET; - // Default flags - arg->flags = AF_PACKET_IF_FLAGS_QDISC_BYPASS | AF_PACKET_IF_FLAGS_CKSUM_GSO; - - if (mp->num_rx_queues > 1) - arg->num_rxqs = clib_net_to_host_u16 (mp->num_rx_queues); - - rv = af_packet_create_if (arg); - - vec_free (arg->host_if_name); - REPLY_MACRO2 (VL_API_AF_PACKET_CREATE_V2_REPLY, ({ - rmp->sw_if_index = clib_host_to_net_u32 (arg->sw_if_index); - })); -} - -static void -vl_api_af_packet_create_v3_t_handler (vl_api_af_packet_create_v3_t *mp) -{ - af_packet_create_if_arg_t _arg, *arg = &_arg; - vl_api_af_packet_create_v3_reply_t *rmp; - int rv = 0; - - clib_memset (arg, 0, sizeof (*arg)); - - arg->host_if_name = format (0, "%s", mp->host_if_name); - vec_add1 (arg->host_if_name, 0); - - // Default number of rx/tx queue(s) - arg->num_rxqs = 1; - arg->num_txqs = 1; - arg->rx_frame_size = clib_net_to_host_u32 (mp->rx_frame_size); - arg->tx_frame_size = clib_net_to_host_u32 (mp->tx_frame_size); - arg->rx_frames_per_block = clib_net_to_host_u32 (mp->rx_frames_per_block); - arg->tx_frames_per_block = clib_net_to_host_u32 (mp->tx_frames_per_block); - arg->hw_addr = mp->use_random_hw_addr ? 0 : mp->hw_addr; - - switch (clib_net_to_host_u32 (mp->mode)) - { - case AF_PACKET_API_MODE_ETHERNET: - arg->mode = AF_PACKET_IF_MODE_ETHERNET; - break; - case AF_PACKET_API_MODE_IP: - arg->mode = AF_PACKET_IF_MODE_IP; - break; - default: - arg->sw_if_index = ~0; - rv = VNET_ERR_INVALID_VALUE; - goto error; - } - - STATIC_ASSERT (((int) AF_PACKET_API_FLAG_QDISC_BYPASS == - (int) AF_PACKET_IF_FLAGS_QDISC_BYPASS), - "af-packet qdisc-bypass api flag mismatch"); - STATIC_ASSERT ( - ((int) AF_PACKET_API_FLAG_CKSUM_GSO == (int) AF_PACKET_IF_FLAGS_CKSUM_GSO), - "af-packet checksum/gso offload api flag mismatch"); - - // Default flags - arg->flags = clib_net_to_host_u32 (mp->flags); - - if (clib_net_to_host_u16 (mp->num_rx_queues) > 1) - arg->num_rxqs = clib_net_to_host_u16 (mp->num_rx_queues); - - if (clib_net_to_host_u16 (mp->num_tx_queues) > 1) - arg->num_txqs = clib_net_to_host_u16 (mp->num_tx_queues); - - rv = af_packet_create_if (arg); - -error: - vec_free (arg->host_if_name); - REPLY_MACRO2 (VL_API_AF_PACKET_CREATE_V3_REPLY, ({ - rmp->sw_if_index = clib_host_to_net_u32 (arg->sw_if_index); - })); -} - -static void -vl_api_af_packet_delete_t_handler (vl_api_af_packet_delete_t * mp) -{ - vl_api_af_packet_delete_reply_t *rmp; - int rv = 0; - u8 *host_if_name = NULL; - - host_if_name = format (0, "%s", mp->host_if_name); - vec_add1 (host_if_name, 0); - - rv = af_packet_delete_if (host_if_name); - - vec_free (host_if_name); - - REPLY_MACRO (VL_API_AF_PACKET_DELETE_REPLY); -} - -static void - vl_api_af_packet_set_l4_cksum_offload_t_handler - (vl_api_af_packet_set_l4_cksum_offload_t * mp) -{ - vl_api_af_packet_delete_reply_t *rmp; - int rv = 0; - - rv = af_packet_set_l4_cksum_offload (ntohl (mp->sw_if_index), mp->set); - REPLY_MACRO (VL_API_AF_PACKET_SET_L4_CKSUM_OFFLOAD_REPLY); -} - -static void -af_packet_send_details (vpe_api_main_t * am, - vl_api_registration_t * reg, - af_packet_if_detail_t * af_packet_if, u32 context) -{ - vl_api_af_packet_details_t *mp; - mp = vl_msg_api_alloc (sizeof (*mp)); - clib_memset (mp, 0, sizeof (*mp)); - mp->_vl_msg_id = htons (REPLY_MSG_ID_BASE + VL_API_AF_PACKET_DETAILS); - mp->sw_if_index = htonl (af_packet_if->sw_if_index); - clib_memcpy (mp->host_if_name, af_packet_if->host_if_name, - MIN (ARRAY_LEN (mp->host_if_name) - 1, - strlen ((const char *) af_packet_if->host_if_name))); - - mp->context = context; - vl_api_send_msg (reg, (u8 *) mp); -} - - -static void -vl_api_af_packet_dump_t_handler (vl_api_af_packet_dump_t * mp) -{ - int rv; - vpe_api_main_t *am = &vpe_api_main; - vl_api_registration_t *reg; - af_packet_if_detail_t *out_af_packet_ifs = NULL; - af_packet_if_detail_t *af_packet_if = NULL; - - reg = vl_api_client_index_to_registration (mp->client_index); - if (!reg) - return; - - rv = af_packet_dump_ifs (&out_af_packet_ifs); - if (rv) - return; - - vec_foreach (af_packet_if, out_af_packet_ifs) - { - af_packet_send_details (am, reg, af_packet_if, mp->context); - } - - vec_free (out_af_packet_ifs); -} - -#include <vnet/devices/af_packet/af_packet.api.c> -static clib_error_t * -af_packet_api_hookup (vlib_main_t * vm) -{ - /* - * Set up the (msg_name, crc, message-id) table - */ - REPLY_MSG_ID_BASE = setup_message_id_table (); - - return 0; -} - -VLIB_API_INIT_FUNCTION (af_packet_api_hookup); - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/af_packet/cli.c b/src/vnet/devices/af_packet/cli.c deleted file mode 100644 index e730659bfcd..00000000000 --- a/src/vnet/devices/af_packet/cli.c +++ /dev/null @@ -1,293 +0,0 @@ -/* - *------------------------------------------------------------------ - * af_packet.c - linux kernel packet interface - * - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *------------------------------------------------------------------ - */ - -#include <fcntl.h> /* for open */ -#include <sys/ioctl.h> -#include <sys/socket.h> -#include <sys/stat.h> -#include <sys/types.h> -#include <sys/uio.h> /* for iovec */ -#include <netinet/in.h> - -#include <vlib/vlib.h> -#include <vlib/unix/unix.h> -#include <vnet/ip/ip.h> -#include <vnet/ethernet/ethernet.h> - -#include <vnet/devices/af_packet/af_packet.h> - -/** - * @file - * @brief CLI for Host Interface Device Driver. - * - * This file contains the source code for CLI for the host interface. - */ - -static clib_error_t * -af_packet_create_command_fn (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - af_packet_create_if_arg_t _arg, *arg = &_arg; - clib_error_t *error = NULL; - u8 hwaddr[6]; - int r; - - clib_memset (arg, 0, sizeof (*arg)); - - // Default mode - arg->mode = AF_PACKET_IF_MODE_ETHERNET; - - // Default number of rx/tx queue(s) - arg->num_rxqs = 1; - arg->num_txqs = 1; - - // Default flags - arg->flags = AF_PACKET_IF_FLAGS_QDISC_BYPASS | AF_PACKET_IF_FLAGS_CKSUM_GSO; - - /* Get a line of input. */ - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (line_input, "name %s", &arg->host_if_name)) - ; - else if (unformat (line_input, "rx-size %u", &arg->rx_frame_size)) - ; - else if (unformat (line_input, "tx-size %u", &arg->tx_frame_size)) - ; - else if (unformat (line_input, "rx-per-block %u", - &arg->rx_frames_per_block)) - ; - else if (unformat (line_input, "tx-per-block %u", - &arg->tx_frames_per_block)) - ; - else if (unformat (line_input, "num-rx-queues %u", &arg->num_rxqs)) - ; - else if (unformat (line_input, "num-tx-queues %u", &arg->num_txqs)) - ; - else if (unformat (line_input, "qdisc-bypass-disable")) - arg->flags &= ~AF_PACKET_IF_FLAGS_QDISC_BYPASS; - else if (unformat (line_input, "cksum-gso-disable")) - arg->flags &= ~AF_PACKET_IF_FLAGS_CKSUM_GSO; - else if (unformat (line_input, "mode ip")) - arg->mode = AF_PACKET_IF_MODE_IP; - else if (unformat (line_input, "hw-addr %U", unformat_ethernet_address, - hwaddr)) - arg->hw_addr = hwaddr; - else - { - error = clib_error_return (0, "unknown input `%U'", - format_unformat_error, line_input); - goto done; - } - } - - if (arg->host_if_name == NULL) - { - error = clib_error_return (0, "missing host interface name"); - goto done; - } - - r = af_packet_create_if (arg); - - if (r == VNET_API_ERROR_SYSCALL_ERROR_1) - { - error = clib_error_return (0, "%s (errno %d)", strerror (errno), errno); - goto done; - } - - if (r == VNET_API_ERROR_INVALID_INTERFACE) - { - error = clib_error_return (0, "Invalid interface name"); - goto done; - } - - if (r == VNET_API_ERROR_SUBIF_ALREADY_EXISTS) - { - error = clib_error_return (0, "Interface already exists"); - goto done; - } - - vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name, vnet_get_main (), - arg->sw_if_index); - -done: - vec_free (arg->host_if_name); - unformat_free (line_input); - - return error; -} - -/*? - * Create a host interface that will attach to a linux AF_PACKET - * interface, one side of a veth pair. The veth pair must already - * exist. Once created, a new host interface will exist in VPP - * with the name '<em>host-<ifname></em>', where '<em><ifname></em>' - * is the name of the specified veth pair. Use the - * '<em>show interface</em>' command to display host interface details. - * - * This command has the following optional parameters: - * - * - <b>hw-addr <mac-addr></b> - Optional ethernet address, can be in either - * X:X:X:X:X:X unix or X.X.X cisco format. - * - * @cliexpar - * Example of how to create a host interface tied to one side of an - * existing linux veth pair named vpp1: - * @cliexstart{create host-interface name vpp1} - * host-vpp1 - * @cliexend - * Once the host interface is created, enable the interface using: - * @cliexcmd{set interface state host-vpp1 up} -?*/ -VLIB_CLI_COMMAND (af_packet_create_command, static) = { - .path = "create host-interface", - .short_help = "create host-interface name <ifname> [num-rx-queues <n>] " - "[num-tx-queues <n>] [hw-addr <mac-addr>] [mode ip] " - "[qdisc-bypass-disable] [cksum-gso-disable]", - .function = af_packet_create_command_fn, -}; - -static clib_error_t * -af_packet_delete_command_fn (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - u8 *host_if_name = NULL; - clib_error_t *error = NULL; - - /* Get a line of input. */ - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (line_input, "name %s", &host_if_name)) - ; - else - { - error = clib_error_return (0, "unknown input `%U'", - format_unformat_error, line_input); - goto done; - } - } - - if (host_if_name == NULL) - { - error = clib_error_return (0, "missing host interface name"); - goto done; - } - - af_packet_delete_if (host_if_name); - -done: - vec_free (host_if_name); - unformat_free (line_input); - - return error; -} - -/*? - * Delete a host interface. Use the linux interface name to identify - * the host interface to be deleted. In VPP, host interfaces are - * named as '<em>host-<ifname></em>', where '<em><ifname></em>' - * is the name of the linux interface. - * - * @cliexpar - * Example of how to delete a host interface named host-vpp1: - * @cliexcmd{delete host-interface name vpp1} -?*/ -VLIB_CLI_COMMAND (af_packet_delete_command, static) = { - .path = "delete host-interface", - .short_help = "delete host-interface name <ifname>", - .function = af_packet_delete_command_fn, -}; - -static clib_error_t * -af_packet_set_l4_cksum_offload_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - u8 set = 0; - clib_error_t *error = NULL; - vnet_main_t *vnm = vnet_get_main (); - u32 sw_if_index; - - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (line_input, "%U", unformat_vnet_sw_interface, vnm, - &sw_if_index)) - ; - else if (unformat (line_input, "on")) - set = 1; - else if (unformat (line_input, "off")) - set = 0; - else - { - error = clib_error_return (0, "unknown input '%U'", - format_unformat_error, line_input); - goto done; - } - } - - if (af_packet_set_l4_cksum_offload (sw_if_index, set) < 0) - error = clib_error_return (0, "not an af_packet interface"); - -done: - unformat_free (line_input); - return error; -} - -/*? - * Set TCP/UDP offload checksum calculation. Use interface - * name to identify the interface to set TCP/UDP offload checksum - * calculation. - * - * @cliexpar - * Example of how to set TCP/UDP offload checksum calculation on host-vpp0: - * @cliexcmd{set host-interface l4-cksum-offload host-vpp0 off} - * @cliexcmd{set host-interface l4-cksum-offload host-vpp0 on} -?*/ -VLIB_CLI_COMMAND (af_packet_set_l4_cksum_offload_command, static) = { - .path = "set host-interface l4-cksum-offload", - .short_help = "set host-interface l4-cksum-offload <host-if-name> <on|off>", - .function = af_packet_set_l4_cksum_offload_command_fn, -}; - -clib_error_t * -af_packet_cli_init (vlib_main_t * vm) -{ - return 0; -} - -VLIB_INIT_FUNCTION (af_packet_cli_init); - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/af_packet/device.c b/src/vnet/devices/af_packet/device.c deleted file mode 100644 index 74bc1c8c42c..00000000000 --- a/src/vnet/devices/af_packet/device.c +++ /dev/null @@ -1,690 +0,0 @@ -/* - *------------------------------------------------------------------ - * af_packet.c - linux kernel packet interface - * - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *------------------------------------------------------------------ - */ - -#include <linux/if_packet.h> -#include <sys/socket.h> -#include <sys/ioctl.h> -#include <net/if.h> -#include <net/if_arp.h> - -#include <vlib/vlib.h> -#include <vlib/unix/unix.h> -#include <vnet/ip/ip.h> -#include <vnet/ethernet/ethernet.h> -#include <vnet/ip/ip4_packet.h> -#include <vnet/ip/ip6_packet.h> -#include <vnet/ip/ip_psh_cksum.h> -#include <vnet/tcp/tcp_packet.h> -#include <vnet/udp/udp_packet.h> - -#include <vnet/devices/af_packet/af_packet.h> -#include <vnet/devices/virtio/virtio_std.h> - -#define foreach_af_packet_tx_func_error \ -_(FRAME_NOT_READY, "tx frame not ready") \ -_(TXRING_EAGAIN, "tx sendto temporary failure") \ -_(TXRING_FATAL, "tx sendto fatal failure") \ -_(TXRING_OVERRUN, "tx ring overrun") - -typedef enum -{ -#define _(f,s) AF_PACKET_TX_ERROR_##f, - foreach_af_packet_tx_func_error -#undef _ - AF_PACKET_TX_N_ERROR, -} af_packet_tx_func_error_t; - -static char *af_packet_tx_func_error_strings[] = { -#define _(n,s) s, - foreach_af_packet_tx_func_error -#undef _ -}; - -typedef struct -{ - u32 buffer_index; - u32 hw_if_index; - u16 queue_id; - tpacket3_hdr_t tph; - vnet_virtio_net_hdr_t vnet_hdr; - vlib_buffer_t buffer; -} af_packet_tx_trace_t; - -#ifndef CLIB_MARCH_VARIANT -u8 * -format_af_packet_device_name (u8 * s, va_list * args) -{ - u32 i = va_arg (*args, u32); - af_packet_main_t *apm = &af_packet_main; - af_packet_if_t *apif = pool_elt_at_index (apm->interfaces, i); - - s = format (s, "host-%s", apif->host_if_name); - return s; -} -#endif /* CLIB_MARCH_VARIANT */ - -static u8 * -format_af_packet_device (u8 * s, va_list * args) -{ - u32 dev_instance = va_arg (*args, u32); - u32 indent = format_get_indent (s); - int __clib_unused verbose = va_arg (*args, int); - - af_packet_main_t *apm = &af_packet_main; - af_packet_if_t *apif = pool_elt_at_index (apm->interfaces, dev_instance); - af_packet_queue_t *rx_queue = 0; - af_packet_queue_t *tx_queue = 0; - - s = format (s, "Linux PACKET socket interface"); - s = format (s, "\n%UFEATURES:", format_white_space, indent); - if (apif->is_qdisc_bypass_enabled) - s = format (s, "\n%Uqdisc-bpass-enabled", format_white_space, indent + 2); - if (apif->is_cksum_gso_enabled) - s = format (s, "\n%Ucksum-gso-enabled", format_white_space, indent + 2); - if (apif->is_fanout_enabled) - s = format (s, "\n%Ufanout-enabled", format_white_space, indent + 2); - - vec_foreach (rx_queue, apif->rx_queues) - { - u32 rx_block_size = rx_queue->rx_req->tp_block_size; - u32 rx_frame_size = rx_queue->rx_req->tp_frame_size; - u32 rx_frame_nr = rx_queue->rx_req->tp_frame_nr; - u32 rx_block_nr = rx_queue->rx_req->tp_block_nr; - - s = format (s, "\n%URX Queue %u:", format_white_space, indent, - rx_queue->queue_id); - s = format (s, "\n%Ublock size:%d nr:%d frame size:%d nr:%d", - format_white_space, indent + 2, rx_block_size, rx_block_nr, - rx_frame_size, rx_frame_nr); - s = format (s, " next block:%d", rx_queue->next_rx_block); - if (rx_queue->is_rx_pending) - { - s = format ( - s, "\n%UPending Request: num-rx-pkts:%d next-frame-offset:%d", - format_white_space, indent + 2, rx_queue->num_rx_pkts, - rx_queue->rx_frame_offset); - } - } - - vec_foreach (tx_queue, apif->tx_queues) - { - clib_spinlock_lock (&tx_queue->lockp); - u32 tx_block_sz = tx_queue->tx_req->tp_block_size; - u32 tx_frame_sz = tx_queue->tx_req->tp_frame_size; - u32 tx_frame_nr = tx_queue->tx_req->tp_frame_nr; - u32 tx_block_nr = tx_queue->tx_req->tp_block_nr; - int block = 0; - int n_send_req = 0, n_avail = 0, n_sending = 0, n_tot = 0, n_wrong = 0; - u8 *tx_block_start = tx_queue->tx_ring[block]; - u32 tx_frame = tx_queue->next_tx_frame; - tpacket3_hdr_t *tph; - - s = format (s, "\n%UTX Queue %u:", format_white_space, indent, - tx_queue->queue_id); - s = format (s, "\n%Ublock size:%d nr:%d frame size:%d nr:%d", - format_white_space, indent + 2, tx_block_sz, tx_block_nr, - tx_frame_sz, tx_frame_nr); - s = format (s, " next frame:%d", tx_queue->next_tx_frame); - - do - { - tph = (tpacket3_hdr_t *) (tx_block_start + tx_frame * tx_frame_sz); - tx_frame = (tx_frame + 1) % tx_frame_nr; - if (tph->tp_status == 0) - n_avail++; - else if (tph->tp_status & TP_STATUS_SEND_REQUEST) - n_send_req++; - else if (tph->tp_status & TP_STATUS_SENDING) - n_sending++; - else - n_wrong++; - n_tot++; - } - while (tx_frame != tx_queue->next_tx_frame); - s = - format (s, "\n%Uavailable:%d request:%d sending:%d wrong:%d total:%d", - format_white_space, indent + 2, n_avail, n_send_req, n_sending, - n_wrong, n_tot); - clib_spinlock_unlock (&tx_queue->lockp); - } - return s; -} - -static u8 * -format_af_packet_tx_trace (u8 *s, va_list *va) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); - af_packet_tx_trace_t *t = va_arg (*va, af_packet_tx_trace_t *); - u32 indent = format_get_indent (s); - - s = format (s, "af_packet: hw_if_index %u tx-queue %u", t->hw_if_index, - t->queue_id); - - s = - format (s, - "\n%Utpacket3_hdr:\n%Ustatus 0x%x len %u snaplen %u mac %u net %u" - "\n%Usec 0x%x nsec 0x%x vlan %U" -#ifdef TP_STATUS_VLAN_TPID_VALID - " vlan_tpid %u" -#endif - , - format_white_space, indent + 2, format_white_space, indent + 4, - t->tph.tp_status, t->tph.tp_len, t->tph.tp_snaplen, t->tph.tp_mac, - t->tph.tp_net, format_white_space, indent + 4, t->tph.tp_sec, - t->tph.tp_nsec, format_ethernet_vlan_tci, t->tph.hv1.tp_vlan_tci -#ifdef TP_STATUS_VLAN_TPID_VALID - , - t->tph.hv1.tp_vlan_tpid -#endif - ); - - s = format (s, - "\n%Uvnet-hdr:\n%Uflags 0x%02x gso_type 0x%02x hdr_len %u" - "\n%Ugso_size %u csum_start %u csum_offset %u", - format_white_space, indent + 2, format_white_space, indent + 4, - t->vnet_hdr.flags, t->vnet_hdr.gso_type, t->vnet_hdr.hdr_len, - format_white_space, indent + 4, t->vnet_hdr.gso_size, - t->vnet_hdr.csum_start, t->vnet_hdr.csum_offset); - - s = format (s, "\n%Ubuffer 0x%x:\n%U%U", format_white_space, indent + 2, - t->buffer_index, format_white_space, indent + 4, - format_vnet_buffer_no_chain, &t->buffer); - s = format (s, "\n%U%U", format_white_space, indent + 2, - format_ethernet_header_with_length, t->buffer.pre_data, - sizeof (t->buffer.pre_data)); - return s; -} - -static void -af_packet_tx_trace (vlib_main_t *vm, vlib_node_runtime_t *node, - vlib_buffer_t *b0, u32 bi, tpacket3_hdr_t *tph, - vnet_virtio_net_hdr_t *vnet_hdr, u32 hw_if_index, - u16 queue_id) -{ - af_packet_tx_trace_t *t; - t = vlib_add_trace (vm, node, b0, sizeof (t[0])); - t->hw_if_index = hw_if_index; - t->queue_id = queue_id; - t->buffer_index = bi; - - clib_memcpy_fast (&t->tph, tph, sizeof (*tph)); - clib_memcpy_fast (&t->vnet_hdr, vnet_hdr, sizeof (*vnet_hdr)); - clib_memcpy_fast (&t->buffer, b0, sizeof (*b0) - sizeof (b0->pre_data)); - clib_memcpy_fast (t->buffer.pre_data, vlib_buffer_get_current (b0), - sizeof (t->buffer.pre_data)); -} - -static_always_inline void -fill_gso_offload (vlib_buffer_t *b0, vnet_virtio_net_hdr_t *vnet_hdr) -{ - vnet_buffer_oflags_t oflags = vnet_buffer (b0)->oflags; - if (b0->flags & VNET_BUFFER_F_IS_IP4) - { - ip4_header_t *ip4; - vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; - vnet_hdr->gso_size = vnet_buffer2 (b0)->gso_size; - vnet_hdr->hdr_len = - vnet_buffer (b0)->l4_hdr_offset + vnet_buffer2 (b0)->gso_l4_hdr_sz; - vnet_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; - vnet_hdr->csum_start = vnet_buffer (b0)->l4_hdr_offset; // 0x22; - vnet_hdr->csum_offset = STRUCT_OFFSET_OF (tcp_header_t, checksum); - ip4 = (ip4_header_t *) (b0->data + vnet_buffer (b0)->l3_hdr_offset); - if (oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM) - ip4->checksum = ip4_header_checksum (ip4); - } - else if (b0->flags & VNET_BUFFER_F_IS_IP6) - { - vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; - vnet_hdr->gso_size = vnet_buffer2 (b0)->gso_size; - vnet_hdr->hdr_len = - vnet_buffer (b0)->l4_hdr_offset + vnet_buffer2 (b0)->gso_l4_hdr_sz; - vnet_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; - vnet_hdr->csum_start = vnet_buffer (b0)->l4_hdr_offset; // 0x36; - vnet_hdr->csum_offset = STRUCT_OFFSET_OF (tcp_header_t, checksum); - } -} - -static_always_inline void -fill_cksum_offload (vlib_buffer_t *b0, vnet_virtio_net_hdr_t *vnet_hdr) -{ - vnet_buffer_oflags_t oflags = vnet_buffer (b0)->oflags; - if (b0->flags & VNET_BUFFER_F_IS_IP4) - { - ip4_header_t *ip4; - ip4 = (ip4_header_t *) (b0->data + vnet_buffer (b0)->l3_hdr_offset); - if (oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM) - ip4->checksum = ip4_header_checksum (ip4); - vnet_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; - vnet_hdr->csum_start = 0x22; - if (oflags & VNET_BUFFER_OFFLOAD_F_TCP_CKSUM) - { - tcp_header_t *tcp = - (tcp_header_t *) (b0->data + vnet_buffer (b0)->l4_hdr_offset); - tcp->checksum = ip4_pseudo_header_cksum (ip4); - vnet_hdr->csum_offset = STRUCT_OFFSET_OF (tcp_header_t, checksum); - } - else if (oflags & VNET_BUFFER_OFFLOAD_F_UDP_CKSUM) - { - udp_header_t *udp = - (udp_header_t *) (b0->data + vnet_buffer (b0)->l4_hdr_offset); - udp->checksum = ip4_pseudo_header_cksum (ip4); - vnet_hdr->csum_offset = STRUCT_OFFSET_OF (udp_header_t, checksum); - } - } - else if (b0->flags & VNET_BUFFER_F_IS_IP6) - { - ip6_header_t *ip6; - vnet_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; - vnet_hdr->csum_start = 0x36; - ip6 = (ip6_header_t *) (b0->data + vnet_buffer (b0)->l3_hdr_offset); - if (oflags & VNET_BUFFER_OFFLOAD_F_TCP_CKSUM) - { - tcp_header_t *tcp = - (tcp_header_t *) (b0->data + vnet_buffer (b0)->l4_hdr_offset); - tcp->checksum = ip6_pseudo_header_cksum (ip6); - vnet_hdr->csum_offset = STRUCT_OFFSET_OF (tcp_header_t, checksum); - } - else if (oflags & VNET_BUFFER_OFFLOAD_F_UDP_CKSUM) - { - udp_header_t *udp = - (udp_header_t *) (b0->data + vnet_buffer (b0)->l4_hdr_offset); - udp->checksum = ip6_pseudo_header_cksum (ip6); - vnet_hdr->csum_offset = STRUCT_OFFSET_OF (udp_header_t, checksum); - } - } -} - -VNET_DEVICE_CLASS_TX_FN (af_packet_device_class) (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) -{ - af_packet_main_t *apm = &af_packet_main; - vnet_hw_if_tx_frame_t *tf = vlib_frame_scalar_args (frame); - u32 *buffers = vlib_frame_vector_args (frame); - u32 n_left = frame->n_vectors; - u32 n_sent = 0; - vnet_interface_output_runtime_t *rd = (void *) node->runtime_data; - af_packet_if_t *apif = - pool_elt_at_index (apm->interfaces, rd->dev_instance); - u16 queue_id = tf->queue_id; - af_packet_queue_t *tx_queue = vec_elt_at_index (apif->tx_queues, queue_id); - u32 block = 0, frame_size = 0, frame_num = 0, tx_frame = 0; - u8 *block_start = 0; - tpacket3_hdr_t *tph = 0; - u32 frame_not_ready = 0; - u8 is_cksum_gso_enabled = (apif->is_cksum_gso_enabled == 1) ? 1 : 0; - - if (tf->shared_queue) - clib_spinlock_lock (&tx_queue->lockp); - - frame_size = tx_queue->tx_req->tp_frame_size; - frame_num = tx_queue->tx_req->tp_frame_nr; - block_start = tx_queue->tx_ring[block]; - tx_frame = tx_queue->next_tx_frame; - - while (n_left) - { - u32 len; - vnet_virtio_net_hdr_t *vnet_hdr = 0; - u32 offset = 0; - vlib_buffer_t *b0 = 0, *b0_first = 0; - u32 bi, bi_first; - - bi = bi_first = buffers[0]; - n_left--; - buffers++; - - tph = (tpacket3_hdr_t *) (block_start + tx_frame * frame_size); - if (PREDICT_FALSE (tph->tp_status & - (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING))) - { - frame_not_ready++; - goto next; - } - - b0_first = b0 = vlib_get_buffer (vm, bi); - - if (PREDICT_TRUE (is_cksum_gso_enabled)) - { - vnet_hdr = - (vnet_virtio_net_hdr_t *) ((u8 *) tph + TPACKET_ALIGN (sizeof ( - tpacket3_hdr_t))); - - clib_memset_u8 (vnet_hdr, 0, sizeof (vnet_virtio_net_hdr_t)); - offset = sizeof (vnet_virtio_net_hdr_t); - - if (b0->flags & VNET_BUFFER_F_GSO) - fill_gso_offload (b0, vnet_hdr); - else if (b0->flags & VNET_BUFFER_F_OFFLOAD) - fill_cksum_offload (b0, vnet_hdr); - } - - len = b0->current_length; - clib_memcpy_fast ((u8 *) tph + TPACKET_ALIGN (sizeof (tpacket3_hdr_t)) + - offset, - vlib_buffer_get_current (b0), len); - offset += len; - - while (b0->flags & VLIB_BUFFER_NEXT_PRESENT) - { - b0 = vlib_get_buffer (vm, b0->next_buffer); - len = b0->current_length; - clib_memcpy_fast ((u8 *) tph + - TPACKET_ALIGN (sizeof (tpacket3_hdr_t)) + offset, - vlib_buffer_get_current (b0), len); - offset += len; - } - - tph->tp_len = tph->tp_snaplen = offset; - tph->tp_status = TP_STATUS_SEND_REQUEST; - n_sent++; - - if (PREDICT_FALSE (b0_first->flags & VLIB_BUFFER_IS_TRACED)) - { - if (PREDICT_TRUE (is_cksum_gso_enabled)) - af_packet_tx_trace (vm, node, b0_first, bi_first, tph, vnet_hdr, - apif->hw_if_index, queue_id); - else - { - vnet_virtio_net_hdr_t vnet_hdr2 = {}; - af_packet_tx_trace (vm, node, b0_first, bi_first, tph, - &vnet_hdr2, apif->hw_if_index, queue_id); - } - } - tx_frame = (tx_frame + 1) % frame_num; - - next: - /* check if we've exhausted the ring */ - if (PREDICT_FALSE (frame_not_ready + n_sent == frame_num)) - break; - } - - CLIB_MEMORY_BARRIER (); - - if (PREDICT_TRUE (n_sent || tx_queue->is_tx_pending)) - { - tx_queue->next_tx_frame = tx_frame; - tx_queue->is_tx_pending = 0; - - if (PREDICT_FALSE ( - sendto (tx_queue->fd, NULL, 0, MSG_DONTWAIT, NULL, 0) == -1)) - { - /* Uh-oh, drop & move on, but count whether it was fatal or not. - * Note that we have no reliable way to properly determine the - * disposition of the packets we just enqueued for delivery. - */ - uword counter; - - if (unix_error_is_fatal (errno)) - { - counter = AF_PACKET_TX_ERROR_TXRING_FATAL; - } - else - { - counter = AF_PACKET_TX_ERROR_TXRING_EAGAIN; - /* non-fatal error: kick again next time - * note that you could still end up in a deadlock: if you do not - * try to send new packets (ie reschedule this tx node), eg. - * because your peer is waiting for the unsent packets to reply - * to you but your waiting for its reply etc., you are not going - * to kick again, and everybody is waiting for the other to talk - * 1st... */ - tx_queue->is_tx_pending = 1; - } - - vlib_error_count (vm, node->node_index, counter, 1); - } - } - - if (tf->shared_queue) - clib_spinlock_unlock (&tx_queue->lockp); - - if (PREDICT_FALSE (frame_not_ready)) - vlib_error_count (vm, node->node_index, - AF_PACKET_TX_ERROR_FRAME_NOT_READY, frame_not_ready); - - if (PREDICT_FALSE (frame_not_ready + n_sent == frame_num)) - vlib_error_count (vm, node->node_index, AF_PACKET_TX_ERROR_TXRING_OVERRUN, - n_left); - - vlib_buffer_free (vm, vlib_frame_vector_args (frame), frame->n_vectors); - return frame->n_vectors; -} - -static void -af_packet_set_interface_next_node (vnet_main_t * vnm, u32 hw_if_index, - u32 node_index) -{ - af_packet_main_t *apm = &af_packet_main; - vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); - af_packet_if_t *apif = - pool_elt_at_index (apm->interfaces, hw->dev_instance); - - /* Shut off redirection */ - if (node_index == ~0) - { - apif->per_interface_next_index = node_index; - return; - } - - apif->per_interface_next_index = - vlib_node_add_next (vlib_get_main (), af_packet_input_node.index, - node_index); -} - -static void -af_packet_clear_hw_interface_counters (u32 instance) -{ - /* Nothing for now */ -} - -static clib_error_t * -af_packet_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, - u32 flags) -{ - af_packet_main_t *apm = &af_packet_main; - vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); - af_packet_if_t *apif = - pool_elt_at_index (apm->interfaces, hw->dev_instance); - u32 hw_flags; - int rv, fd = socket (AF_UNIX, SOCK_DGRAM, 0); - struct ifreq ifr; - - if (0 > fd) - { - vlib_log_warn (apm->log_class, "af_packet_%s could not open socket", - apif->host_if_name); - return 0; - } - - /* if interface is a bridge ignore */ - if (apif->host_if_index < 0) - goto error; /* no error */ - - /* use host_if_index in case host name has changed */ - ifr.ifr_ifindex = apif->host_if_index; - if ((rv = ioctl (fd, SIOCGIFNAME, &ifr)) < 0) - { - vlib_log_warn (apm->log_class, - "af_packet_%s ioctl could not retrieve eth name", - apif->host_if_name); - goto error; - } - - apif->is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0; - - if ((rv = ioctl (fd, SIOCGIFFLAGS, &ifr)) < 0) - { - vlib_log_warn (apm->log_class, "af_packet_%s error: %d", - apif->is_admin_up ? "up" : "down", rv); - goto error; - } - - if (apif->is_admin_up) - { - hw_flags = VNET_HW_INTERFACE_FLAG_LINK_UP; - ifr.ifr_flags |= IFF_UP; - } - else - { - hw_flags = 0; - ifr.ifr_flags &= ~IFF_UP; - } - - if ((rv = ioctl (fd, SIOCSIFFLAGS, &ifr)) < 0) - { - vlib_log_warn (apm->log_class, "af_packet_%s error: %d", - apif->is_admin_up ? "up" : "down", rv); - goto error; - } - - vnet_hw_interface_set_flags (vnm, hw_if_index, hw_flags); - -error: - if (0 <= fd) - close (fd); - - return 0; /* no error */ -} - -static clib_error_t * -af_packet_subif_add_del_function (vnet_main_t * vnm, - u32 hw_if_index, - struct vnet_sw_interface_t *st, int is_add) -{ - /* Nothing for now */ - return 0; -} - -static clib_error_t *af_packet_set_mac_address_function - (struct vnet_hw_interface_t *hi, const u8 * old_address, const u8 * address) -{ - af_packet_main_t *apm = &af_packet_main; - af_packet_if_t *apif = - pool_elt_at_index (apm->interfaces, hi->dev_instance); - int rv, fd; - struct ifreq ifr; - - if (apif->mode == AF_PACKET_IF_MODE_IP) - { - vlib_log_warn (apm->log_class, "af_packet_%s interface is in IP mode", - apif->host_if_name); - return clib_error_return (0, - " MAC update failed, interface is in IP mode"); - } - - fd = socket (AF_UNIX, SOCK_DGRAM, 0); - if (0 > fd) - { - vlib_log_warn (apm->log_class, "af_packet_%s could not open socket", - apif->host_if_name); - return 0; - } - - /* if interface is a bridge ignore */ - if (apif->host_if_index < 0) - goto error; /* no error */ - - /* use host_if_index in case host name has changed */ - ifr.ifr_ifindex = apif->host_if_index; - if ((rv = ioctl (fd, SIOCGIFNAME, &ifr)) < 0) - { - vlib_log_warn - (apm->log_class, - "af_packet_%s ioctl could not retrieve eth name, error: %d", - apif->host_if_name, rv); - goto error; - } - - clib_memcpy (ifr.ifr_hwaddr.sa_data, address, 6); - ifr.ifr_hwaddr.sa_family = ARPHRD_ETHER; - - if ((rv = ioctl (fd, SIOCSIFHWADDR, &ifr)) < 0) - { - vlib_log_warn (apm->log_class, - "af_packet_%s ioctl could not set mac, error: %d", - apif->host_if_name, rv); - goto error; - } - -error: - - if (0 <= fd) - close (fd); - - return 0; /* no error */ -} - -static clib_error_t * -af_packet_interface_rx_mode_change (vnet_main_t *vnm, u32 hw_if_index, u32 qid, - vnet_hw_if_rx_mode mode) -{ - af_packet_main_t *apm = &af_packet_main; - vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); - af_packet_if_t *apif; - - apif = vec_elt_at_index (apm->interfaces, hw->dev_instance); - - if (mode == VNET_HW_IF_RX_MODE_ADAPTIVE) - { - vlib_log_err (apm->log_class, - "af_packet_%s adaptive mode is not supported", - apif->host_if_name); - return clib_error_return ( - 0, "af_packet_%s adaptive mode is not supported", apif->host_if_name); - } - - af_packet_queue_t *rx_queue = vec_elt_at_index (apif->rx_queues, qid); - - if (rx_queue->mode != mode) - { - rx_queue->mode = mode; - - if (mode == VNET_HW_IF_RX_MODE_POLLING) - apm->polling_count++; - else if (mode == VNET_HW_IF_RX_MODE_INTERRUPT && apm->polling_count > 0) - apm->polling_count--; - } - - return 0; -} - -VNET_DEVICE_CLASS (af_packet_device_class) = { - .name = "af-packet", - .format_device_name = format_af_packet_device_name, - .format_device = format_af_packet_device, - .format_tx_trace = format_af_packet_tx_trace, - .tx_function_n_errors = AF_PACKET_TX_N_ERROR, - .tx_function_error_strings = af_packet_tx_func_error_strings, - .rx_redirect_to_node = af_packet_set_interface_next_node, - .clear_counters = af_packet_clear_hw_interface_counters, - .admin_up_down_function = af_packet_interface_admin_up_down, - .subif_add_del_function = af_packet_subif_add_del_function, - .mac_addr_change_function = af_packet_set_mac_address_function, - .rx_mode_change_function = af_packet_interface_rx_mode_change, -}; - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/af_packet/dir.dox b/src/vnet/devices/af_packet/dir.dox deleted file mode 100644 index 78991c6d97f..00000000000 --- a/src/vnet/devices/af_packet/dir.dox +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (c) 2016 Cisco and/or its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* Doxygen directory documentation */ - -/** -@dir -@brief Host Interface Implementation. - -This directory contains the source code for Host Interface driver. The -Host Interface driver leverages the DPDK AF_PACKET driver. - - -*/ -/*? %%clicmd:group_label Host Interface %% ?*/ -/*? %%syscfg:group_label Host Interface %% ?*/ diff --git a/src/vnet/devices/af_packet/node.c b/src/vnet/devices/af_packet/node.c deleted file mode 100644 index 8c72afb2456..00000000000 --- a/src/vnet/devices/af_packet/node.c +++ /dev/null @@ -1,574 +0,0 @@ -/* - *------------------------------------------------------------------ - * af_packet.c - linux kernel packet interface - * - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *------------------------------------------------------------------ - */ - -#include <linux/if_packet.h> - -#include <vlib/vlib.h> -#include <vlib/unix/unix.h> -#include <vnet/ip/ip.h> -#include <vnet/ethernet/ethernet.h> -#include <vnet/interface/rx_queue_funcs.h> -#include <vnet/feature/feature.h> -#include <vnet/ethernet/packet.h> - -#include <vnet/devices/af_packet/af_packet.h> -#include <vnet/devices/virtio/virtio_std.h> - -#define foreach_af_packet_input_error \ - _ (PARTIAL_PKT, "partial packet") \ - _ (TIMEDOUT_BLK, "timed out block") \ - _ (TOTAL_RECV_BLK, "total received block") -typedef enum -{ -#define _(f,s) AF_PACKET_INPUT_ERROR_##f, - foreach_af_packet_input_error -#undef _ - AF_PACKET_INPUT_N_ERROR, -} af_packet_input_error_t; - -static char *af_packet_input_error_strings[] = { -#define _(n,s) s, - foreach_af_packet_input_error -#undef _ -}; - -typedef struct -{ - u32 next_index; - u32 hw_if_index; - u16 queue_id; - int block; - u32 pkt_num; - void *block_start; - block_desc_t bd; - tpacket3_hdr_t tph; - vnet_virtio_net_hdr_t vnet_hdr; -} af_packet_input_trace_t; - -static u8 * -format_af_packet_input_trace (u8 * s, va_list * args) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); - af_packet_input_trace_t *t = va_arg (*args, af_packet_input_trace_t *); - u32 indent = format_get_indent (s); - - s = format (s, "af_packet: hw_if_index %d rx-queue %u next-index %d", - t->hw_if_index, t->queue_id, t->next_index); - - s = format ( - s, "\n%Ublock %u:\n%Uaddress %p version %u seq_num %lu pkt_num %u", - format_white_space, indent + 2, t->block, format_white_space, indent + 4, - t->block_start, t->bd.version, t->bd.hdr.bh1.seq_num, t->pkt_num); - s = - format (s, - "\n%Utpacket3_hdr:\n%Ustatus 0x%x len %u snaplen %u mac %u net %u" - "\n%Usec 0x%x nsec 0x%x vlan %U" -#ifdef TP_STATUS_VLAN_TPID_VALID - " vlan_tpid %u" -#endif - , - format_white_space, indent + 2, format_white_space, indent + 4, - t->tph.tp_status, t->tph.tp_len, t->tph.tp_snaplen, t->tph.tp_mac, - t->tph.tp_net, format_white_space, indent + 4, t->tph.tp_sec, - t->tph.tp_nsec, format_ethernet_vlan_tci, t->tph.hv1.tp_vlan_tci -#ifdef TP_STATUS_VLAN_TPID_VALID - , - t->tph.hv1.tp_vlan_tpid -#endif - ); - - s = format (s, - "\n%Uvnet-hdr:\n%Uflags 0x%02x gso_type 0x%02x hdr_len %u" - "\n%Ugso_size %u csum_start %u csum_offset %u", - format_white_space, indent + 2, format_white_space, indent + 4, - t->vnet_hdr.flags, t->vnet_hdr.gso_type, t->vnet_hdr.hdr_len, - format_white_space, indent + 4, t->vnet_hdr.gso_size, - t->vnet_hdr.csum_start, t->vnet_hdr.csum_offset); - return s; -} - -always_inline void -buffer_add_to_chain (vlib_buffer_t *b, vlib_buffer_t *first_b, - vlib_buffer_t *prev_b, u32 bi) -{ - /* update first buffer */ - first_b->total_length_not_including_first_buffer += b->current_length; - - /* update previous buffer */ - prev_b->next_buffer = bi; - prev_b->flags |= VLIB_BUFFER_NEXT_PRESENT; - - /* update current buffer */ - b->next_buffer = ~0; -} - -static_always_inline void -fill_gso_offload (vlib_buffer_t *b, u32 gso_size, u8 l4_hdr_sz) -{ - b->flags |= VNET_BUFFER_F_GSO; - vnet_buffer2 (b)->gso_size = gso_size; - vnet_buffer2 (b)->gso_l4_hdr_sz = l4_hdr_sz; -} - -static_always_inline void -fill_cksum_offload (vlib_buffer_t *b, u8 *l4_hdr_sz, u8 is_ip) -{ - vnet_buffer_oflags_t oflags = 0; - u16 l2hdr_sz = 0; - u16 ethertype = 0; - u8 l4_proto = 0; - - if (is_ip) - { - switch (b->data[0] & 0xf0) - { - case 0x40: - ethertype = ETHERNET_TYPE_IP4; - break; - case 0x60: - ethertype = ETHERNET_TYPE_IP6; - break; - } - } - else - { - ethernet_header_t *eth = (ethernet_header_t *) b->data; - ethertype = clib_net_to_host_u16 (eth->type); - l2hdr_sz = sizeof (ethernet_header_t); - if (ethernet_frame_is_tagged (ethertype)) - { - ethernet_vlan_header_t *vlan = (ethernet_vlan_header_t *) (eth + 1); - - ethertype = clib_net_to_host_u16 (vlan->type); - l2hdr_sz += sizeof (*vlan); - if (ethertype == ETHERNET_TYPE_VLAN) - { - vlan++; - ethertype = clib_net_to_host_u16 (vlan->type); - l2hdr_sz += sizeof (*vlan); - } - } - } - - vnet_buffer (b)->l2_hdr_offset = 0; - vnet_buffer (b)->l3_hdr_offset = l2hdr_sz; - - if (ethertype == ETHERNET_TYPE_IP4) - { - ip4_header_t *ip4 = (ip4_header_t *) (b->data + l2hdr_sz); - vnet_buffer (b)->l4_hdr_offset = l2hdr_sz + ip4_header_bytes (ip4); - b->flags |= (VNET_BUFFER_F_IS_IP4 | VNET_BUFFER_F_L2_HDR_OFFSET_VALID | - VNET_BUFFER_F_L3_HDR_OFFSET_VALID | - VNET_BUFFER_F_L4_HDR_OFFSET_VALID); - - l4_proto = ip4->protocol; - } - else if (ethertype == ETHERNET_TYPE_IP6) - { - ip6_header_t *ip6 = (ip6_header_t *) (b->data + l2hdr_sz); - b->flags |= (VNET_BUFFER_F_IS_IP6 | VNET_BUFFER_F_L2_HDR_OFFSET_VALID | - VNET_BUFFER_F_L3_HDR_OFFSET_VALID | - VNET_BUFFER_F_L4_HDR_OFFSET_VALID); - u16 ip6_hdr_len = sizeof (ip6_header_t); - - if (ip6_ext_hdr (ip6->protocol)) - { - ip6_ext_header_t *p = (void *) (ip6 + 1); - ip6_hdr_len += ip6_ext_header_len (p); - while (ip6_ext_hdr (p->next_hdr)) - { - ip6_hdr_len += ip6_ext_header_len (p); - p = ip6_ext_next_header (p); - } - l4_proto = p->next_hdr; - } - else - l4_proto = ip6->protocol; - vnet_buffer (b)->l4_hdr_offset = l2hdr_sz + ip6_hdr_len; - } - - if (l4_proto == IP_PROTOCOL_TCP) - { - oflags |= VNET_BUFFER_OFFLOAD_F_TCP_CKSUM; - tcp_header_t *tcp = - (tcp_header_t *) (b->data + vnet_buffer (b)->l4_hdr_offset); - *l4_hdr_sz = tcp_header_bytes (tcp); - } - else if (l4_proto == IP_PROTOCOL_UDP) - { - oflags |= VNET_BUFFER_OFFLOAD_F_UDP_CKSUM; - *l4_hdr_sz = sizeof (udp_header_t); - } - - if (oflags) - vnet_buffer_offload_flags_set (b, oflags); -} - -always_inline uword -af_packet_device_input_fn (vlib_main_t *vm, vlib_node_runtime_t *node, - vlib_frame_t *frame, af_packet_if_t *apif, - u16 queue_id, u8 is_cksum_gso_enabled) -{ - af_packet_main_t *apm = &af_packet_main; - af_packet_queue_t *rx_queue = vec_elt_at_index (apif->rx_queues, queue_id); - tpacket3_hdr_t *tph; - u32 next_index; - u32 n_free_bufs; - u32 n_rx_packets = 0; - u32 n_rx_bytes = 0; - u32 timedout_blk = 0; - u32 total = 0; - u32 *to_next = 0; - u32 block = rx_queue->next_rx_block; - u32 block_nr = rx_queue->rx_req->tp_block_nr; - u8 *block_start = 0; - uword n_trace = vlib_get_trace_count (vm, node); - u32 thread_index = vm->thread_index; - u32 n_buffer_bytes = vlib_buffer_get_default_data_size (vm); - u32 min_bufs = rx_queue->rx_req->tp_frame_size / n_buffer_bytes; - u32 num_pkts = 0; - u32 rx_frame_offset = 0; - block_desc_t *bd = 0; - vlib_buffer_t bt = {}; - u8 is_ip = (apif->mode == AF_PACKET_IF_MODE_IP); - - if (is_ip) - next_index = VNET_DEVICE_INPUT_NEXT_IP4_INPUT; - else - { - next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; - if (PREDICT_FALSE (apif->per_interface_next_index != ~0)) - next_index = apif->per_interface_next_index; - - /* redirect if feature path enabled */ - vnet_feature_start_device_input_x1 (apif->sw_if_index, &next_index, &bt); - } - - if ((((block_desc_t *) (block_start = rx_queue->rx_ring[block])) - ->hdr.bh1.block_status & - TP_STATUS_USER) != 0) - { - u32 n_required = 0; - bd = (block_desc_t *) block_start; - - if (PREDICT_FALSE (rx_queue->is_rx_pending)) - { - num_pkts = rx_queue->num_rx_pkts; - rx_frame_offset = rx_queue->rx_frame_offset; - rx_queue->is_rx_pending = 0; - } - else - { - num_pkts = bd->hdr.bh1.num_pkts; - rx_frame_offset = sizeof (block_desc_t); - total++; - - if (TP_STATUS_BLK_TMO & bd->hdr.bh1.block_status) - timedout_blk++; - } - - n_required = clib_max (num_pkts, VLIB_FRAME_SIZE); - n_free_bufs = vec_len (apm->rx_buffers[thread_index]); - if (PREDICT_FALSE (n_free_bufs < n_required)) - { - vec_validate (apm->rx_buffers[thread_index], - n_required + n_free_bufs - 1); - n_free_bufs += vlib_buffer_alloc ( - vm, &apm->rx_buffers[thread_index][n_free_bufs], n_required); - vec_set_len (apm->rx_buffers[thread_index], n_free_bufs); - } - - while (num_pkts && (n_free_bufs >= min_bufs)) - { - u32 next0 = next_index; - u32 n_left_to_next; - - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - while (num_pkts && n_left_to_next && (n_free_bufs >= min_bufs)) - { - tph = (tpacket3_hdr_t *) (block_start + rx_frame_offset); - - if (num_pkts > 1) - CLIB_PREFETCH (block_start + rx_frame_offset + - tph->tp_next_offset, - 2 * CLIB_CACHE_LINE_BYTES, LOAD); - - vlib_buffer_t *b0 = 0, *first_b0 = 0, *prev_b0 = 0; - vnet_virtio_net_hdr_t *vnet_hdr = 0; - u32 data_len = tph->tp_snaplen; - u32 offset = 0; - u32 bi0 = ~0, first_bi0 = ~0; - u8 l4_hdr_sz = 0; - - if (is_cksum_gso_enabled) - vnet_hdr = - (vnet_virtio_net_hdr_t *) ((u8 *) tph + tph->tp_mac - - sizeof (vnet_virtio_net_hdr_t)); - - // save current state and return - if (PREDICT_FALSE (((data_len / n_buffer_bytes) + 1) > - vec_len (apm->rx_buffers[thread_index]))) - { - rx_queue->rx_frame_offset = rx_frame_offset; - rx_queue->num_rx_pkts = num_pkts; - rx_queue->is_rx_pending = 1; - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - goto done; - } - - while (data_len) - { - /* grab free buffer */ - u32 last_empty_buffer = - vec_len (apm->rx_buffers[thread_index]) - 1; - bi0 = apm->rx_buffers[thread_index][last_empty_buffer]; - vec_set_len (apm->rx_buffers[thread_index], - last_empty_buffer); - n_free_bufs--; - - /* copy data */ - u32 bytes_to_copy = - data_len > n_buffer_bytes ? n_buffer_bytes : data_len; - u32 vlan_len = 0; - u32 bytes_copied = 0; - - b0 = vlib_get_buffer (vm, bi0); - b0->current_data = 0; - - /* Kernel removes VLAN headers, so reconstruct VLAN */ - if (PREDICT_FALSE (tph->tp_status & TP_STATUS_VLAN_VALID)) - { - if (PREDICT_TRUE (offset == 0)) - { - clib_memcpy_fast (vlib_buffer_get_current (b0), - (u8 *) tph + tph->tp_mac, - sizeof (ethernet_header_t)); - ethernet_header_t *eth = - vlib_buffer_get_current (b0); - ethernet_vlan_header_t *vlan = - (ethernet_vlan_header_t *) (eth + 1); - vlan->priority_cfi_and_id = - clib_host_to_net_u16 (tph->hv1.tp_vlan_tci); - vlan->type = eth->type; - eth->type = - clib_host_to_net_u16 (ETHERNET_TYPE_VLAN); - vlan_len = sizeof (ethernet_vlan_header_t); - bytes_copied = sizeof (ethernet_header_t); - } - } - clib_memcpy_fast (((u8 *) vlib_buffer_get_current (b0)) + - bytes_copied + vlan_len, - (u8 *) tph + tph->tp_mac + offset + - bytes_copied, - (bytes_to_copy - bytes_copied)); - - /* fill buffer header */ - b0->current_length = bytes_to_copy + vlan_len; - - if (offset == 0) - { - b0->total_length_not_including_first_buffer = 0; - b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID; - vnet_buffer (b0)->sw_if_index[VLIB_RX] = - apif->sw_if_index; - vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~0; - first_b0 = b0; - first_bi0 = bi0; - if (is_cksum_gso_enabled) - { - if (vnet_hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) - fill_cksum_offload (first_b0, &l4_hdr_sz, is_ip); - if (vnet_hdr->gso_type & (VIRTIO_NET_HDR_GSO_TCPV4 | - VIRTIO_NET_HDR_GSO_TCPV6)) - fill_gso_offload (first_b0, vnet_hdr->gso_size, - l4_hdr_sz); - } - } - else - buffer_add_to_chain (b0, first_b0, prev_b0, bi0); - - prev_b0 = b0; - offset += bytes_to_copy; - data_len -= bytes_to_copy; - } - n_rx_packets++; - n_rx_bytes += tph->tp_snaplen; - to_next[0] = first_bi0; - to_next += 1; - n_left_to_next--; - - /* drop partial packets */ - if (PREDICT_FALSE (tph->tp_len != tph->tp_snaplen)) - { - next0 = VNET_DEVICE_INPUT_NEXT_DROP; - first_b0->error = - node->errors[AF_PACKET_INPUT_ERROR_PARTIAL_PKT]; - } - else - { - if (PREDICT_FALSE (apif->mode == AF_PACKET_IF_MODE_IP)) - { - switch (first_b0->data[0] & 0xf0) - { - case 0x40: - next0 = VNET_DEVICE_INPUT_NEXT_IP4_INPUT; - break; - case 0x60: - next0 = VNET_DEVICE_INPUT_NEXT_IP6_INPUT; - break; - default: - next0 = VNET_DEVICE_INPUT_NEXT_DROP; - break; - } - if (PREDICT_FALSE (apif->per_interface_next_index != ~0)) - next0 = apif->per_interface_next_index; - } - else - { - /* copy feature arc data from template */ - first_b0->current_config_index = bt.current_config_index; - vnet_buffer (first_b0)->feature_arc_index = - vnet_buffer (&bt)->feature_arc_index; - } - } - - /* trace */ - if (PREDICT_FALSE (n_trace > 0 && - vlib_trace_buffer (vm, node, next0, first_b0, - /* follow_chain */ 0))) - { - af_packet_input_trace_t *tr; - vlib_set_trace_count (vm, node, --n_trace); - tr = vlib_add_trace (vm, node, first_b0, sizeof (*tr)); - tr->next_index = next0; - tr->hw_if_index = apif->hw_if_index; - tr->queue_id = queue_id; - tr->block = block; - tr->block_start = bd; - tr->pkt_num = bd->hdr.bh1.num_pkts - num_pkts; - clib_memcpy_fast (&tr->bd, bd, sizeof (block_desc_t)); - clib_memcpy_fast (&tr->tph, tph, sizeof (tpacket3_hdr_t)); - if (is_cksum_gso_enabled) - clib_memcpy_fast (&tr->vnet_hdr, vnet_hdr, - sizeof (vnet_virtio_net_hdr_t)); - else - clib_memset_u8 (&tr->vnet_hdr, 0, - sizeof (vnet_virtio_net_hdr_t)); - } - - /* enque and take next packet */ - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, - n_left_to_next, first_bi0, - next0); - - /* next packet */ - num_pkts--; - rx_frame_offset += tph->tp_next_offset; - } - - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - - if (PREDICT_TRUE (num_pkts == 0)) - { - bd->hdr.bh1.block_status = TP_STATUS_KERNEL; - block = (block + 1) % block_nr; - } - else - { - rx_queue->rx_frame_offset = rx_frame_offset; - rx_queue->num_rx_pkts = num_pkts; - rx_queue->is_rx_pending = 1; - } - } - - rx_queue->next_rx_block = block; - -done: - - if (apm->polling_count == 0) - { - if ((((block_desc_t *) (block_start = rx_queue->rx_ring[block])) - ->hdr.bh1.block_status & - TP_STATUS_USER) != 0) - vlib_node_set_state (vm, node->node_index, VLIB_NODE_STATE_POLLING); - else - vlib_node_set_state (vm, node->node_index, VLIB_NODE_STATE_INTERRUPT); - } - - vlib_error_count (vm, node->node_index, AF_PACKET_INPUT_ERROR_TOTAL_RECV_BLK, - total); - vlib_error_count (vm, node->node_index, AF_PACKET_INPUT_ERROR_TIMEDOUT_BLK, - timedout_blk); - - vlib_increment_combined_counter - (vnet_get_main ()->interface_main.combined_sw_if_counters - + VNET_INTERFACE_COUNTER_RX, - vlib_get_thread_index (), apif->hw_if_index, n_rx_packets, n_rx_bytes); - - vnet_device_increment_rx_packets (thread_index, n_rx_packets); - return n_rx_packets; -} - -VLIB_NODE_FN (af_packet_input_node) (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) -{ - u32 n_rx_packets = 0; - af_packet_main_t *apm = &af_packet_main; - vnet_hw_if_rxq_poll_vector_t *pv; - pv = vnet_hw_if_get_rxq_poll_vector (vm, node); - for (int i = 0; i < vec_len (pv); i++) - { - af_packet_if_t *apif; - apif = vec_elt_at_index (apm->interfaces, pv[i].dev_instance); - if (apif->is_admin_up) - { - if (apif->is_cksum_gso_enabled) - n_rx_packets += af_packet_device_input_fn (vm, node, frame, apif, - pv[i].queue_id, 1); - else - n_rx_packets += af_packet_device_input_fn (vm, node, frame, apif, - pv[i].queue_id, 0); - } - } - return n_rx_packets; -} - -VLIB_REGISTER_NODE (af_packet_input_node) = { - .name = "af-packet-input", - .flags = VLIB_NODE_FLAG_TRACE_SUPPORTED, - .sibling_of = "device-input", - .format_trace = format_af_packet_input_trace, - .type = VLIB_NODE_TYPE_INPUT, - .state = VLIB_NODE_STATE_INTERRUPT, - .n_errors = AF_PACKET_INPUT_N_ERROR, - .error_strings = af_packet_input_error_strings, -}; - - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/devices.c b/src/vnet/devices/devices.c index 1a4f02df6a8..ee380bebbde 100644 --- a/src/vnet/devices/devices.c +++ b/src/vnet/devices/devices.c @@ -29,7 +29,6 @@ device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, return 0; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (device_input_node) = { .function = device_input_fn, .name = "device-input", @@ -40,29 +39,6 @@ VLIB_REGISTER_NODE (device_input_node) = { .next_nodes = VNET_DEVICE_INPUT_NEXT_NODES, }; -/* Table defines how much we need to advance current data pointer - in the buffer if we shortcut to l3 nodes */ - -const u32 __attribute__((aligned (CLIB_CACHE_LINE_BYTES))) -device_input_next_node_advance[((VNET_DEVICE_INPUT_N_NEXT_NODES / - CLIB_CACHE_LINE_BYTES) +1) * CLIB_CACHE_LINE_BYTES] = -{ - [VNET_DEVICE_INPUT_NEXT_IP4_INPUT] = sizeof (ethernet_header_t), - [VNET_DEVICE_INPUT_NEXT_IP4_NCS_INPUT] = sizeof (ethernet_header_t), - [VNET_DEVICE_INPUT_NEXT_IP6_INPUT] = sizeof (ethernet_header_t), - [VNET_DEVICE_INPUT_NEXT_MPLS_INPUT] = sizeof (ethernet_header_t), -}; - -const u32 __attribute__((aligned (CLIB_CACHE_LINE_BYTES))) -device_input_next_node_flags[((VNET_DEVICE_INPUT_N_NEXT_NODES / - CLIB_CACHE_LINE_BYTES) +1) * CLIB_CACHE_LINE_BYTES] = -{ - [VNET_DEVICE_INPUT_NEXT_IP4_INPUT] = VNET_BUFFER_F_L3_HDR_OFFSET_VALID, - [VNET_DEVICE_INPUT_NEXT_IP4_NCS_INPUT] = VNET_BUFFER_F_L3_HDR_OFFSET_VALID, - [VNET_DEVICE_INPUT_NEXT_IP6_INPUT] = VNET_BUFFER_F_L3_HDR_OFFSET_VALID, - [VNET_DEVICE_INPUT_NEXT_MPLS_INPUT] = VNET_BUFFER_F_L3_HDR_OFFSET_VALID, -}; - VNET_FEATURE_ARC_INIT (device_input, static) = { .arc_name = "device-input", @@ -100,7 +76,6 @@ VNET_FEATURE_INIT (ethernet_input, static) = { .node_name = "ethernet-input", .runs_before = 0, /* not before any other features */ }; -/* *INDENT-ON* */ static void input_rate_collector_fn (vlib_stats_collector_data_t *d) diff --git a/src/vnet/devices/devices.h b/src/vnet/devices/devices.h index e54c7a29130..cadf1f857a6 100644 --- a/src/vnet/devices/devices.h +++ b/src/vnet/devices/devices.h @@ -67,8 +67,6 @@ typedef struct extern vnet_device_main_t vnet_device_main; extern vlib_node_registration_t device_input_node; -extern const u32 device_input_next_node_advance[]; -extern const u32 device_input_next_node_flags[]; static inline u64 vnet_get_aggregate_rx_packets (void) diff --git a/src/vnet/devices/netlink.c b/src/vnet/devices/netlink.c index da21e9adea1..3fd3e13bf77 100644 --- a/src/vnet/devices/netlink.c +++ b/src/vnet/devices/netlink.c @@ -20,8 +20,13 @@ #include <fcntl.h> #include <net/if.h> +#ifdef __linux__ #include <linux/netlink.h> #include <linux/rtnetlink.h> +#elif __FreeBSD__ +#include <netlink/netlink.h> +#include <netlink/netlink_route.h> +#endif #include <vlib/vlib.h> #include <vlib/unix/unix.h> diff --git a/src/vnet/devices/pipe/pipe.c b/src/vnet/devices/pipe/pipe.c index 26b01970b6c..9caee2a55cb 100644 --- a/src/vnet/devices/pipe/pipe.c +++ b/src/vnet/devices/pipe/pipe.c @@ -83,13 +83,11 @@ pipe_build_rewrite (vnet_main_t * vnm, return (rewrite); } -/* *INDENT-OFF* */ VNET_HW_INTERFACE_CLASS (pipe_hw_interface_class) = { .name = "Pipe", .build_rewrite = pipe_build_rewrite, .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P, }; -/* *INDENT-ON* */ pipe_t * pipe_get (u32 sw_if_index) @@ -131,7 +129,7 @@ pipe_tx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { u32 n_left_from, n_left_to_next, n_copy, *from, *to_next; u32 next_index = VNET_PIPE_TX_NEXT_ETHERNET_INPUT; - u32 i, sw_if_index = 0, n_pkts = 0; + u32 i, sw_if_index = 0; vlib_buffer_t *b; pipe_t *pipe; @@ -159,7 +157,6 @@ pipe_tx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) vnet_buffer (b)->sw_if_index[VLIB_TX] = ~0; i++; - n_pkts++; } from += n_copy; @@ -186,25 +183,21 @@ pipe_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) VNET_HW_INTERFACE_FLAG_LINK_UP : 0); vnet_hw_interface_set_flags (vnm, hw_if_index, hw_flags); - /* *INDENT-OFF* */ hi = vnet_get_hw_interface (vnm, hw_if_index); hash_foreach (id, sw_if_index, hi->sub_interface_sw_if_index_by_id, ({ vnet_sw_interface_set_flags (vnm, sw_if_index, flags); })); - /* *INDENT-ON* */ return (NULL); } -/* *INDENT-OFF* */ VNET_DEVICE_CLASS (pipe_device_class) = { .name = "Pipe", .format_device_name = format_pipe_name, .tx_function = pipe_tx, .admin_up_down_function = pipe_admin_up_down, }; -/* *INDENT-ON* */ #define foreach_pipe_rx_next \ _ (DROP, "error-drop") @@ -433,7 +426,6 @@ pipe_rx (vlib_main_t * vm, return from_frame->n_vectors; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (pipe_rx_node) = { .function = pipe_rx, .name = "pipe-rx", @@ -443,7 +435,6 @@ VLIB_REGISTER_NODE (pipe_rx_node) = { .sibling_of = "ethernet-input", }; -/* *INDENT-ON* */ /* * Maintain a bitmap of allocated pipe instance numbers. @@ -626,13 +617,11 @@ pipe_hw_walk (vnet_main_t * vnm, u32 hw_if_index, void *args) { u32 pipe_sw_if_index[2], id, sw_if_index; - /* *INDENT-OFF* */ hash_foreach (id, sw_if_index, hi->sub_interface_sw_if_index_by_id, ({ ASSERT(id < 2); pipe_sw_if_index[id] = sw_if_index; })); - /* *INDENT-ON* */ ctx->cb (hi->sw_if_index, pipe_sw_if_index, hi->dev_instance, ctx->ctx); } @@ -691,13 +680,11 @@ create_pipe_interfaces (vlib_main_t * vm, * Example of how to create a pipe interface: * @cliexcmd{pipe create} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (pipe_create_interface_command, static) = { .path = "pipe create", .short_help = "pipe create [instance <instance>]", .function = create_pipe_interfaces, }; -/* *INDENT-ON* */ int vnet_delete_pipe_interface (u32 sw_if_index) @@ -721,13 +708,11 @@ vnet_delete_pipe_interface (u32 sw_if_index) return VNET_API_ERROR_INVALID_SW_IF_INDEX; } - /* *INDENT-OFF* */ hash_foreach (id, sw_if_index, hi->sub_interface_sw_if_index_by_id, ({ vnet_delete_sub_interface(sw_if_index); pipe_main.pipes[sw_if_index] = PIPE_INVALID; })); - /* *INDENT-ON* */ ethernet_delete_interface (vnm, hw_if_index); @@ -771,13 +756,11 @@ delete_pipe_interfaces (vlib_main_t * vm, * Example of how to delete a pipe interface: * @cliexcmd{pipe delete-interface intfc loop0} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (pipe_delete_interface_command, static) = { .path = "pipe delete", .short_help = "pipe delete <interface>", .function = delete_pipe_interfaces, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/devices/pipe/pipe_api.c b/src/vnet/devices/pipe/pipe_api.c index 1f0faef7c1e..79a4377de83 100644 --- a/src/vnet/devices/pipe/pipe_api.c +++ b/src/vnet/devices/pipe/pipe_api.c @@ -42,14 +42,12 @@ vl_api_pipe_create_t_handler (vl_api_pipe_create_t * mp) rv = vnet_create_pipe_interface (is_specified, user_instance, &parent_sw_if_index, pipe_sw_if_index); - /* *INDENT-OFF* */ REPLY_MACRO2(VL_API_PIPE_CREATE_REPLY, ({ rmp->sw_if_index = ntohl (parent_sw_if_index); rmp->pipe_sw_if_index[0] = ntohl (pipe_sw_if_index[0]); rmp->pipe_sw_if_index[1] = ntohl (pipe_sw_if_index[1]); })); - /* *INDENT-ON* */ } static void diff --git a/src/vnet/devices/tap/FEATURE.yaml b/src/vnet/devices/tap/FEATURE.yaml index 35ee4885b02..1a774fb0e74 100644 --- a/src/vnet/devices/tap/FEATURE.yaml +++ b/src/vnet/devices/tap/FEATURE.yaml @@ -1,6 +1,6 @@ --- name: Tap Device -maintainer: damarion@cisco.com sluong@cisco.com sykazmi@cisco.com +maintainer: damarion@cisco.com sluong@cisco.com mohsin.kazmi14@gmail.com features: - Virtio - Persistence diff --git a/src/vnet/devices/tap/cli.c b/src/vnet/devices/tap/cli.c index 096a2c46970..5c676d32d60 100644 --- a/src/vnet/devices/tap/cli.c +++ b/src/vnet/devices/tap/cli.c @@ -136,7 +136,6 @@ tap_create_command_fn (vlib_main_t * vm, unformat_input_t * input, } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (tap_create_command, static) = { .path = "create tap", .short_help = @@ -150,7 +149,6 @@ VLIB_CLI_COMMAND (tap_create_command, static) = { "[persist] [attach] [tun] [packed] [in-order]", .function = tap_create_command_fn, }; -/* *INDENT-ON* */ static clib_error_t * tap_delete_command_fn (vlib_main_t * vm, unformat_input_t * input, @@ -191,14 +189,12 @@ tap_delete_command_fn (vlib_main_t * vm, unformat_input_t * input, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (tap_delete__command, static) = { .path = "delete tap", .short_help = "delete tap {<interface> | sw_if_index <sw_idx>}", .function = tap_delete_command_fn, }; -/* *INDENT-ON* */ static clib_error_t * tap_offload_command_fn (vlib_main_t * vm, unformat_input_t * input, @@ -261,7 +257,6 @@ tap_offload_command_fn (vlib_main_t * vm, unformat_input_t * input, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (tap_offload_command, static) = { .path = "set tap offload", @@ -270,7 +265,6 @@ VLIB_CLI_COMMAND (tap_offload_command, static) = "csum-offload-disable>", .function = tap_offload_command_fn, }; -/* *INDENT-ON* */ static clib_error_t * tap_show_command_fn (vlib_main_t * vm, unformat_input_t * input, @@ -300,10 +294,8 @@ tap_show_command_fn (vlib_main_t * vm, unformat_input_t * input, if (vec_len (hw_if_indices) == 0) { - /* *INDENT-OFF* */ pool_foreach (vif, mm->interfaces) vec_add1 (hw_if_indices, vif->hw_if_index); - /* *INDENT-ON* */ } virtio_show (vm, hw_if_indices, show_descr, VIRTIO_IF_TYPE_TAP); @@ -313,13 +305,11 @@ done: return error; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (tap_show_command, static) = { .path = "show tap", .short_help = "show tap {<interface>] [descriptors]", .function = tap_show_command_fn, }; -/* *INDENT-ON* */ static clib_error_t * tun_show_command_fn (vlib_main_t * vm, unformat_input_t * input, @@ -349,10 +339,8 @@ tun_show_command_fn (vlib_main_t * vm, unformat_input_t * input, if (vec_len (hw_if_indices) == 0) { - /* *INDENT-OFF* */ pool_foreach (vif, mm->interfaces) vec_add1 (hw_if_indices, vif->hw_if_index); - /* *INDENT-ON* */ } virtio_show (vm, hw_if_indices, show_descr, VIRTIO_IF_TYPE_TUN); @@ -362,13 +350,11 @@ done: return error; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (tun_show_command, static) = { .path = "show tun", .short_help = "show tun {<interface>] [descriptors]", .function = tun_show_command_fn, }; -/* *INDENT-ON* */ clib_error_t * tap_cli_init (vlib_main_t * vm) diff --git a/src/vnet/devices/tap/tap.c b/src/vnet/devices/tap/tap.c index 360c001b168..1e2ee87041d 100644 --- a/src/vnet/devices/tap/tap.c +++ b/src/vnet/devices/tap/tap.c @@ -97,14 +97,12 @@ tap_free (vlib_main_t * vm, virtio_if_t * vif) virtio_pre_input_node_disable (vm, vif); - /* *INDENT-OFF* */ vec_foreach_index (i, vif->vhost_fds) if (vif->vhost_fds[i] != -1) close (vif->vhost_fds[i]); vec_foreach_index (i, vif->rxq_vrings) virtio_vring_free_rx (vm, vif, RX_QUEUE (i)); vec_foreach_index (i, vif->txq_vrings) virtio_vring_free_tx (vm, vif, TX_QUEUE (i)); - /* *INDENT-ON* */ if (vif->tap_fds) { @@ -885,7 +883,6 @@ tap_dump_ifs (tap_interface_details_t ** out_tapids) tap_interface_details_t *r_tapids = NULL; tap_interface_details_t *tapid = NULL; - /* *INDENT-OFF* */ pool_foreach (vif, mm->interfaces) { if ((vif->type != VIRTIO_IF_TYPE_TAP) && (vif->type != VIRTIO_IF_TYPE_TUN)) @@ -929,7 +926,6 @@ tap_dump_ifs (tap_interface_details_t ** out_tapids) tapid->host_ip6_prefix_len = vif->host_ip6_prefix_len; tapid->host_mtu_size = vif->host_mtu_size; } - /* *INDENT-ON* */ *out_tapids = r_tapids; diff --git a/src/vnet/devices/tap/tapv2.api b/src/vnet/devices/tap/tapv2.api index 0ee14511529..bf53d1bc6fe 100644 --- a/src/vnet/devices/tap/tapv2.api +++ b/src/vnet/devices/tap/tapv2.api @@ -144,6 +144,8 @@ autoendian define tap_create_v3_reply */ define tap_create_v2 { + option deprecated; + u32 client_index; u32 context; u32 id [default=0xffffffff]; @@ -181,6 +183,8 @@ define tap_create_v2 */ define tap_create_v2_reply { + option deprecated; + u32 context; i32 retval; vl_api_interface_index_t sw_if_index; diff --git a/src/vnet/devices/virtio/FEATURE.yaml b/src/vnet/devices/virtio/FEATURE.yaml index 7b2fb59e1ad..446a45b61a3 100644 --- a/src/vnet/devices/virtio/FEATURE.yaml +++ b/src/vnet/devices/virtio/FEATURE.yaml @@ -1,6 +1,6 @@ --- name: Virtio PCI Device -maintainer: sykazmi@cisco.com sluong@cisco.com +maintainer: mohsin.kazmi14@gmail.com sluong@cisco.com features: - Driver mode to emulate PCI interface presented to VPP from the host interface. @@ -11,6 +11,8 @@ features: - Support multi-queue, GSO, checksum offload, indirect descriptor, jumbo frame, and packed ring. - Support virtio 1.1 packed ring in vhost + - Support for tx queue size configuration (tested on host kernel 5.15 + and qemu version 6.2.0) description: "Virtio implementation" missing: - API dump filtering by sw_if_index diff --git a/src/vnet/devices/virtio/cli.c b/src/vnet/devices/virtio/cli.c index a78336997e2..c1b6c8be065 100644 --- a/src/vnet/devices/virtio/cli.c +++ b/src/vnet/devices/virtio/cli.c @@ -31,6 +31,7 @@ virtio_pci_create_command_fn (vlib_main_t * vm, unformat_input_t * input, virtio_pci_create_if_args_t args; u64 feature_mask = (u64) ~ (0ULL); u32 buffering_size = 0; + u32 txq_size = 0; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -43,6 +44,8 @@ virtio_pci_create_command_fn (vlib_main_t * vm, unformat_input_t * input, ; else if (unformat (line_input, "feature-mask 0x%llx", &feature_mask)) args.features = feature_mask; + else if (unformat (line_input, "tx-queue-size %u", &txq_size)) + args.tx_queue_size = txq_size; else if (unformat (line_input, "gso-enabled")) args.gso_enabled = 1; else if (unformat (line_input, "csum-enabled")) @@ -55,6 +58,10 @@ virtio_pci_create_command_fn (vlib_main_t * vm, unformat_input_t * input, } else if (unformat (line_input, "packed")) args.virtio_flags |= VIRTIO_FLAG_PACKED; + else if (unformat (line_input, "bind force")) + args.bind = VIRTIO_BIND_FORCE; + else if (unformat (line_input, "bind")) + args.bind = VIRTIO_BIND_DEFAULT; else return clib_error_return (0, "unknown input `%U'", format_unformat_error, input); @@ -66,15 +73,14 @@ virtio_pci_create_command_fn (vlib_main_t * vm, unformat_input_t * input, return args.error; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (virtio_pci_create_command, static) = { .path = "create interface virtio", .short_help = "create interface virtio <pci-address> " - "[feature-mask <hex-mask>] [gso-enabled] [csum-enabled] " - "[buffering [size <buffering-szie>]] [packed]", + "[feature-mask <hex-mask>] [tx-queue-size <size>] " + "[gso-enabled] [csum-enabled] " + "[buffering [size <buffering-szie>]] [packed] [bind [force]]", .function = virtio_pci_create_command_fn, }; -/* *INDENT-ON* */ static clib_error_t * virtio_pci_delete_command_fn (vlib_main_t * vm, unformat_input_t * input, @@ -120,14 +126,12 @@ virtio_pci_delete_command_fn (vlib_main_t * vm, unformat_input_t * input, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (virtio_pci_delete_command, static) = { .path = "delete interface virtio", .short_help = "delete interface virtio " "{<interface> | sw_if_index <sw_idx>}", .function = virtio_pci_delete_command_fn, }; -/* *INDENT-ON* */ static clib_error_t * virtio_pci_enable_command_fn (vlib_main_t * vm, unformat_input_t * input, @@ -182,14 +186,12 @@ virtio_pci_enable_command_fn (vlib_main_t * vm, unformat_input_t * input, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (virtio_pci_enable_command, static) = { .path = "set virtio pci", .short_help = "set virtio pci {<interface> | sw_if_index <sw_idx>}" " [gso-enabled | csum-offload-enabled | offloads-disabled]", .function = virtio_pci_enable_command_fn, }; -/* *INDENT-ON* */ static clib_error_t * show_virtio_pci_fn (vlib_main_t * vm, unformat_input_t * input, @@ -248,13 +250,11 @@ done: return error; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_virtio_pci_command, static) = { .path = "show virtio pci", .short_help = "show virtio pci [<interface>] [descriptors | desc] [debug-device]", .function = show_virtio_pci_fn, }; -/* *INDENT-ON* */ clib_error_t * virtio_pci_cli_init (vlib_main_t * vm) diff --git a/src/vnet/devices/virtio/device.c b/src/vnet/devices/virtio/device.c index ec5cdbd7bdd..112f77e7065 100644 --- a/src/vnet/devices/virtio/device.c +++ b/src/vnet/devices/virtio/device.c @@ -309,12 +309,12 @@ set_checksum_offsets (vlib_buffer_t *b, vnet_virtio_net_hdr_v1_t *hdr, const int is_l2) { vnet_buffer_oflags_t oflags = vnet_buffer (b)->oflags; - + i16 l4_hdr_offset = vnet_buffer (b)->l4_hdr_offset - b->current_data; if (b->flags & VNET_BUFFER_F_IS_IP4) { ip4_header_t *ip4; hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; - hdr->csum_start = vnet_buffer (b)->l4_hdr_offset; // 0x22; + hdr->csum_start = l4_hdr_offset; // 0x22; /* * virtio devices do not support IP4 checksum offload. So driver takes @@ -347,7 +347,7 @@ set_checksum_offsets (vlib_buffer_t *b, vnet_virtio_net_hdr_v1_t *hdr, { ip6_header_t *ip6; hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; - hdr->csum_start = vnet_buffer (b)->l4_hdr_offset; // 0x36; + hdr->csum_start = l4_hdr_offset; // 0x36; ip6 = (ip6_header_t *) (b->data + vnet_buffer (b)->l3_hdr_offset); /* @@ -376,17 +376,16 @@ set_gso_offsets (vlib_buffer_t *b, vnet_virtio_net_hdr_v1_t *hdr, const int is_l2) { vnet_buffer_oflags_t oflags = vnet_buffer (b)->oflags; + i16 l4_hdr_offset = vnet_buffer (b)->l4_hdr_offset - b->current_data; if (b->flags & VNET_BUFFER_F_IS_IP4) { ip4_header_t *ip4; hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; hdr->gso_size = vnet_buffer2 (b)->gso_size; - hdr->hdr_len = vnet_buffer (b)->l4_hdr_offset - - vnet_buffer (b)->l2_hdr_offset + - vnet_buffer2 (b)->gso_l4_hdr_sz; + hdr->hdr_len = l4_hdr_offset + vnet_buffer2 (b)->gso_l4_hdr_sz; hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; - hdr->csum_start = vnet_buffer (b)->l4_hdr_offset; // 0x22; + hdr->csum_start = l4_hdr_offset; // 0x22; hdr->csum_offset = STRUCT_OFFSET_OF (tcp_header_t, checksum); ip4 = (ip4_header_t *) (b->data + vnet_buffer (b)->l3_hdr_offset); /* @@ -400,11 +399,9 @@ set_gso_offsets (vlib_buffer_t *b, vnet_virtio_net_hdr_v1_t *hdr, { hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; hdr->gso_size = vnet_buffer2 (b)->gso_size; - hdr->hdr_len = vnet_buffer (b)->l4_hdr_offset - - vnet_buffer (b)->l2_hdr_offset + - vnet_buffer2 (b)->gso_l4_hdr_sz; + hdr->hdr_len = l4_hdr_offset + vnet_buffer2 (b)->gso_l4_hdr_sz; hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; - hdr->csum_start = vnet_buffer (b)->l4_hdr_offset; // 0x36; + hdr->csum_start = l4_hdr_offset; // 0x36; hdr->csum_offset = STRUCT_OFFSET_OF (tcp_header_t, checksum); } } @@ -1205,16 +1202,6 @@ virtio_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) return 0; } -static clib_error_t * -virtio_subif_add_del_function (vnet_main_t * vnm, - u32 hw_if_index, - struct vnet_sw_interface_t *st, int is_add) -{ - /* Nothing for now */ - return 0; -} - -/* *INDENT-OFF* */ VNET_DEVICE_CLASS (virtio_device_class) = { .name = "virtio", .format_device_name = format_virtio_device_name, @@ -1225,11 +1212,9 @@ VNET_DEVICE_CLASS (virtio_device_class) = { .rx_redirect_to_node = virtio_set_interface_next_node, .clear_counters = virtio_clear_hw_interface_counters, .admin_up_down_function = virtio_interface_admin_up_down, - .subif_add_del_function = virtio_subif_add_del_function, .rx_mode_change_function = virtio_interface_rx_mode_change, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/devices/virtio/node.c b/src/vnet/devices/virtio/node.c index a022ee5eacc..8c837575cf8 100644 --- a/src/vnet/devices/virtio/node.c +++ b/src/vnet/devices/virtio/node.c @@ -19,7 +19,11 @@ #include <sys/stat.h> #include <fcntl.h> #include <net/if.h> +#ifdef __linux__ #include <linux/if_tun.h> +#elif __FreeBSD__ +#include <net/if_tun.h> +#endif /* __linux */ #include <sys/ioctl.h> #include <sys/eventfd.h> @@ -202,6 +206,19 @@ virtio_get_len (vnet_virtio_vring_t *vring, const int packed, const int hdr_sz, return vring->used->ring[last & mask].len - hdr_sz; } +#define virtio_packed_check_n_left(vring, last) \ + do \ + { \ + vnet_virtio_vring_packed_desc_t *d = &vring->packed_desc[last]; \ + u16 flags = d->flags; \ + if ((flags & VRING_DESC_F_AVAIL) != (vring->used_wrap_counter << 7) || \ + (flags & VRING_DESC_F_USED) != (vring->used_wrap_counter << 15)) \ + { \ + n_left = 0; \ + } \ + } \ + while (0) + #define increment_last(last, packed, vring) \ do \ { \ @@ -214,6 +231,29 @@ virtio_get_len (vnet_virtio_vring_t *vring, const int packed, const int hdr_sz, } \ while (0) +static_always_inline void +virtio_device_input_ethernet (vlib_main_t *vm, vlib_node_runtime_t *node, + const u32 next_index, const u32 sw_if_index, + const u32 hw_if_index) +{ + vlib_next_frame_t *nf; + vlib_frame_t *f; + ethernet_input_frame_t *ef; + + if (PREDICT_FALSE (VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT != next_index)) + return; + + nf = vlib_node_runtime_get_next_frame ( + vm, node, VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT); + f = vlib_get_frame (vm, nf->frame); + f->flags = ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX; + + ef = vlib_frame_scalar_args (f); + ef->sw_if_index = sw_if_index; + ef->hw_if_index = hw_if_index; + vlib_frame_no_append (f); +} + static_always_inline uword virtio_device_input_gso_inline (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame, virtio_if_t *vif, @@ -234,6 +274,11 @@ virtio_device_input_gso_inline (vlib_main_t *vm, vlib_node_runtime_t *node, u16 n_left = virtio_n_left_to_process (vring, packed); vlib_buffer_t bt = {}; + if (packed) + { + virtio_packed_check_n_left (vring, last); + } + if (n_left == 0) return 0; @@ -248,7 +293,7 @@ virtio_device_input_gso_inline (vlib_main_t *vm, vlib_node_runtime_t *node, next_index = vif->per_interface_next_index; /* only for l2, redirect if feature path enabled */ - vnet_feature_start_device_input_x1 (vif->sw_if_index, &next_index, &bt); + vnet_feature_start_device_input (vif->sw_if_index, &next_index, &bt); } while (n_left) @@ -256,7 +301,7 @@ virtio_device_input_gso_inline (vlib_main_t *vm, vlib_node_runtime_t *node, u32 n_left_to_next; u32 next0 = next_index; - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + vlib_get_new_next_frame (vm, node, next_index, to_next, n_left_to_next); while (n_left && n_left_to_next) { @@ -386,6 +431,8 @@ virtio_device_input_gso_inline (vlib_main_t *vm, vlib_node_runtime_t *node, n_rx_packets++; n_rx_bytes += len; } + virtio_device_input_ethernet (vm, node, next_index, vif->sw_if_index, + vif->hw_if_index); vlib_put_next_frame (vm, node, next_index, n_left_to_next); } vring->last_used_idx = last; @@ -477,7 +524,6 @@ VLIB_NODE_FN (virtio_input_node) (vlib_main_t * vm, return n_rx; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (virtio_input_node) = { .name = "virtio-input", .sibling_of = "device-input", @@ -488,7 +534,6 @@ VLIB_REGISTER_NODE (virtio_input_node) = { .n_errors = VIRTIO_INPUT_N_ERROR, .error_strings = virtio_input_error_strings, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/devices/virtio/pci.c b/src/vnet/devices/virtio/pci.c index f678c3960bf..6234f64fcfb 100644 --- a/src/vnet/devices/virtio/pci.c +++ b/src/vnet/devices/virtio/pci.c @@ -721,8 +721,8 @@ virtio_pci_control_vring_init (vlib_main_t * vm, virtio_if_t * vif, } clib_error_t * -virtio_pci_vring_split_init (vlib_main_t * vm, virtio_if_t * vif, - u16 queue_num) +virtio_pci_vring_split_init (vlib_main_t *vm, virtio_if_t *vif, u16 queue_num, + u16 txq_size) { clib_error_t *error = 0; u16 queue_size = 0; @@ -745,6 +745,16 @@ virtio_pci_vring_split_init (vlib_main_t * vm, virtio_if_t * vif, if (queue_num % 2) { + if (txq_size) + { + virtio_log_debug (vif, "tx-queue: number %u, default-size %u", + queue_num, queue_size); + vif->virtio_pci_func->set_queue_size (vm, vif, queue_num, txq_size); + queue_size = + vif->virtio_pci_func->get_queue_size (vm, vif, queue_num); + virtio_log_debug (vif, "tx-queue: number %u, new size %u", queue_num, + queue_size); + } vec_validate_aligned (vif->txq_vrings, TX_QUEUE_ACCESS (queue_num), CLIB_CACHE_LINE_BYTES); vring = vec_elt_at_index (vif->txq_vrings, TX_QUEUE_ACCESS (queue_num)); @@ -886,12 +896,13 @@ virtio_pci_vring_packed_init (vlib_main_t * vm, virtio_if_t * vif, } clib_error_t * -virtio_pci_vring_init (vlib_main_t * vm, virtio_if_t * vif, u16 queue_num) +virtio_pci_vring_init (vlib_main_t *vm, virtio_if_t *vif, u16 queue_num, + u16 txq_size) { if (vif->is_packed) return virtio_pci_vring_packed_init (vm, vif, queue_num); else - return virtio_pci_vring_split_init (vm, vif, queue_num); + return virtio_pci_vring_split_init (vm, vif, queue_num, txq_size); } static void @@ -1229,7 +1240,7 @@ virtio_pci_device_init (vlib_main_t * vm, virtio_if_t * vif, for (int i = 0; i < vif->max_queue_pairs; i++) { - if ((error = virtio_pci_vring_init (vm, vif, RX_QUEUE (i)))) + if ((error = virtio_pci_vring_init (vm, vif, RX_QUEUE (i), 0))) { args->rv = VNET_API_ERROR_INIT_FAILED; virtio_log_error (vif, "%s (%u) %s", "error in rxq-queue", @@ -1244,7 +1255,8 @@ virtio_pci_device_init (vlib_main_t * vm, virtio_if_t * vif, vif->num_rxqs++; } - if ((error = virtio_pci_vring_init (vm, vif, TX_QUEUE (i)))) + if ((error = virtio_pci_vring_init (vm, vif, TX_QUEUE (i), + args->tx_queue_size))) { args->rv = VNET_API_ERROR_INIT_FAILED; virtio_log_error (vif, "%s (%u) %s", "error in txq-queue", @@ -1328,7 +1340,6 @@ virtio_pci_create_if (vlib_main_t * vm, virtio_pci_create_if_args_t * args) clib_error_t *error = 0; u32 interrupt_count = 0; - /* *INDENT-OFF* */ pool_foreach (vif, vim->interfaces) { if (vif->pci_addr.as_u32 == args->addr) { @@ -1341,7 +1352,24 @@ virtio_pci_create_if (vlib_main_t * vm, virtio_pci_create_if_args_t * args) return; } } - /* *INDENT-ON* */ + + if (args->bind) + { + vlib_pci_addr_t pci = { .as_u32 = args->addr }; + error = vlib_pci_bind_to_uio (vm, &pci, (char *) "auto", + VIRTIO_BIND_FORCE == args->bind); + if (error) + { + args->rv = VNET_API_ERROR_INVALID_INTERFACE; + args->error = + clib_error_return (error, "%U: %s", format_vlib_pci_addr, &pci, + "error encountered on binding pci device"); + vlib_log (VLIB_LOG_LEVEL_ERR, vim->log_default, "%U: %s", + format_vlib_pci_addr, &pci, + "error encountered on binding pci devicee"); + return; + } + } pool_get (vim->interfaces, vif); vif->dev_instance = vif - vim->interfaces; @@ -1480,9 +1508,17 @@ virtio_pci_create_if (vlib_main_t * vm, virtio_pci_create_if_args_t * args) "error encountered during packet buffering init"); goto error; } + /* + * packet buffering flag needs to be set 1 before calling the + * virtio_pre_input_node_enable but after the successful initialization + * of buffering queues above. + * Packet buffering flag set to 0 if there will be any error during + * buffering initialization. + */ + vif->packet_buffering = 1; + virtio_pre_input_node_enable (vm, vif); } - virtio_pre_input_node_enable (vm, vif); virtio_vring_set_rx_queues (vm, vif); virtio_vring_set_tx_queues (vm, vif); @@ -1524,17 +1560,19 @@ virtio_pci_delete_if (vlib_main_t * vm, virtio_if_t * vif) vlib_pci_intr_disable (vm, vif->pci_dev_handle); - for (i = 0; i < vif->max_queue_pairs; i++) + if (vif->virtio_pci_func) { - vif->virtio_pci_func->del_queue (vm, vif, RX_QUEUE (i)); - vif->virtio_pci_func->del_queue (vm, vif, TX_QUEUE (i)); - } + for (i = 0; i < vif->max_queue_pairs; i++) + { + vif->virtio_pci_func->del_queue (vm, vif, RX_QUEUE (i)); + vif->virtio_pci_func->del_queue (vm, vif, TX_QUEUE (i)); + } - if (vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_CTRL_VQ)) - vif->virtio_pci_func->del_queue (vm, vif, vif->max_queue_pairs * 2); + if (vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_CTRL_VQ)) + vif->virtio_pci_func->del_queue (vm, vif, vif->max_queue_pairs * 2); - if (vif->virtio_pci_func) - vif->virtio_pci_func->device_reset (vm, vif); + vif->virtio_pci_func->device_reset (vm, vif); + } if (vif->hw_if_index) { @@ -1555,7 +1593,8 @@ virtio_pci_delete_if (vlib_main_t * vm, virtio_if_t * vif) vlib_physmem_free (vm, vring->desc); } - virtio_pre_input_node_disable (vm, vif); + if (vif->packet_buffering) + virtio_pre_input_node_disable (vm, vif); vec_foreach_index (i, vif->txq_vrings) { diff --git a/src/vnet/devices/virtio/pci.h b/src/vnet/devices/virtio/pci.h index db20537bc3f..5eb80f823be 100644 --- a/src/vnet/devices/virtio/pci.h +++ b/src/vnet/devices/virtio/pci.h @@ -154,13 +154,11 @@ typedef struct * and an ack/status response in the last entry. Data for the * command goes in between. */ -/* *INDENT-OFF* */ typedef CLIB_PACKED (struct { u8 class; u8 cmd; }) virtio_net_ctrl_hdr_t; -/* *INDENT-ON* */ typedef u8 virtio_net_ctrl_ack_t; @@ -227,6 +225,13 @@ typedef enum #undef _ } virtio_flag_t; +typedef enum +{ + VIRTIO_BIND_NONE = 0, + VIRTIO_BIND_DEFAULT = 1, + VIRTIO_BIND_FORCE = 2, +} __clib_packed virtio_bind_t; + typedef struct { u32 addr; @@ -238,6 +243,8 @@ typedef struct u64 features; u8 gso_enabled; u8 checksum_offload_enabled; + u32 tx_queue_size; + virtio_bind_t bind; u32 buffering_size; u32 virtio_flags; clib_error_t *error; diff --git a/src/vnet/devices/virtio/vhost_user.api b/src/vnet/devices/virtio/vhost_user.api deleted file mode 100644 index b026ba768a9..00000000000 --- a/src/vnet/devices/virtio/vhost_user.api +++ /dev/null @@ -1,201 +0,0 @@ -/* - * Copyright (c) 2015-2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -option version = "4.1.1"; - -import "vnet/interface_types.api"; -import "vnet/ethernet/ethernet_types.api"; -import "vnet/devices/virtio/virtio_types.api"; - -/** \brief vhost-user interface create request - @param client_index - opaque cookie to identify the sender - @param is_server - our side is socket server - @param sock_filename - unix socket filename, used to speak with frontend - @param use_custom_mac - enable or disable the use of the provided hardware address - @param disable_mrg_rxbuf - disable the use of merge receive buffers - @param disable_indirect_desc - disable the use of indirect descriptors which driver can use - @param enable_gso - enable gso support (default 0) - @param enable_packed - enable packed ring support (default 0) - @param mac_address - hardware address to use if 'use_custom_mac' is set -*/ -define create_vhost_user_if -{ - option deprecated; - u32 client_index; - u32 context; - bool is_server; - string sock_filename[256]; - bool renumber; - bool disable_mrg_rxbuf; - bool disable_indirect_desc; - bool enable_gso; - bool enable_packed; - u32 custom_dev_instance; - bool use_custom_mac; - vl_api_mac_address_t mac_address; - string tag[64]; -}; - -/** \brief vhost-user interface create response - @param context - sender context, to match reply w/ request - @param retval - return code for the request - @param sw_if_index - interface the operation is applied to -*/ -define create_vhost_user_if_reply -{ - option deprecated; - u32 context; - i32 retval; - vl_api_interface_index_t sw_if_index; -}; - -/** \brief vhost-user interface modify request - @param client_index - opaque cookie to identify the sender - @param is_server - our side is socket server - @param sock_filename - unix socket filename, used to speak with frontend - @param enable_gso - enable gso support (default 0) - @param enable_packed - enable packed ring support (default 0) -*/ -autoreply define modify_vhost_user_if -{ - option deprecated; - u32 client_index; - u32 context; - vl_api_interface_index_t sw_if_index; - bool is_server; - string sock_filename[256]; - bool renumber; - bool enable_gso; - bool enable_packed; - u32 custom_dev_instance; -}; - -/** \brief vhost-user interface create request - @param client_index - opaque cookie to identify the sender - @param is_server - our side is socket server - @param sock_filename - unix socket filename, used to speak with frontend - @param use_custom_mac - enable or disable the use of the provided hardware address - @param disable_mrg_rxbuf - disable the use of merge receive buffers - @param disable_indirect_desc - disable the use of indirect descriptors which driver can use - @param enable_gso - enable gso support (default 0) - @param enable_packed - enable packed ring support (default 0) - @param enable_event_idx - enable event_idx support (default 0) - @param mac_address - hardware address to use if 'use_custom_mac' is set - @param renumber - if true, use custom_dev_instance is valid - @param custom_dev_instance - custom device instance number -*/ -define create_vhost_user_if_v2 -{ - u32 client_index; - u32 context; - bool is_server; - string sock_filename[256]; - bool renumber; - bool disable_mrg_rxbuf; - bool disable_indirect_desc; - bool enable_gso; - bool enable_packed; - bool enable_event_idx; - u32 custom_dev_instance; - bool use_custom_mac; - vl_api_mac_address_t mac_address; - string tag[64]; -}; - -/** \brief vhost-user interface create response - @param context - sender context, to match reply w/ request - @param retval - return code for the request - @param sw_if_index - interface the operation is applied to -*/ -define create_vhost_user_if_v2_reply -{ - u32 context; - i32 retval; - vl_api_interface_index_t sw_if_index; -}; - -/** \brief vhost-user interface modify request - @param client_index - opaque cookie to identify the sender - @param is_server - our side is socket server - @param sock_filename - unix socket filename, used to speak with frontend - @param enable_gso - enable gso support (default 0) - @param enable_packed - enable packed ring support (default 0) - @param enable_event_idx - enable event idx support (default 0) - @param renumber - if true, use custom_dev_instance is valid - @param custom_dev_instance - custom device instance number -*/ -autoreply define modify_vhost_user_if_v2 -{ - u32 client_index; - u32 context; - vl_api_interface_index_t sw_if_index; - bool is_server; - string sock_filename[256]; - bool renumber; - bool enable_gso; - bool enable_packed; - bool enable_event_idx; - u32 custom_dev_instance; -}; - -/** \brief vhost-user interface delete request - @param client_index - opaque cookie to identify the sender -*/ -autoreply define delete_vhost_user_if -{ - u32 client_index; - u32 context; - vl_api_interface_index_t sw_if_index; -}; - -/** \brief Vhost-user interface details structure (fix this) - @param sw_if_index - index of the interface - @param interface_name - name of interface - @param virtio_net_hdr_sz - net header size - @param features_first_32 - interface features, first 32 bits - @param features_last_32 - interface features, last 32 bits - @param is_server - vhost-user server socket - @param sock_filename - socket filename - @param num_regions - number of used memory regions - @param sock_errno - socket errno -*/ -define sw_interface_vhost_user_details -{ - u32 context; - vl_api_interface_index_t sw_if_index; - string interface_name[64]; - u32 virtio_net_hdr_sz; - vl_api_virtio_net_features_first_32_t features_first_32; - vl_api_virtio_net_features_last_32_t features_last_32; - bool is_server; - string sock_filename[256]; - u32 num_regions; - i32 sock_errno; -}; - -/** \brief Vhost-user interface dump request - @param sw_if_index - filter by sw_if_index -*/ -define sw_interface_vhost_user_dump -{ - u32 client_index; - u32 context; - vl_api_interface_index_t sw_if_index [default=0xffffffff]; -}; -/* - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/virtio/vhost_user.c b/src/vnet/devices/virtio/vhost_user.c deleted file mode 100644 index b6e0806db90..00000000000 --- a/src/vnet/devices/virtio/vhost_user.c +++ /dev/null @@ -1,2613 +0,0 @@ -/* - *------------------------------------------------------------------ - * vhost.c - vhost-user - * - * Copyright (c) 2014-2018 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *------------------------------------------------------------------ - */ - -#include <fcntl.h> /* for open */ -#include <sys/ioctl.h> -#include <sys/socket.h> -#include <sys/un.h> -#include <sys/stat.h> -#include <sys/types.h> -#include <sys/uio.h> /* for iovec */ -#include <netinet/in.h> -#include <sys/vfs.h> - -#include <linux/if_arp.h> -#include <linux/if_tun.h> - -#include <vlib/vlib.h> -#include <vlib/unix/unix.h> - -#include <vnet/ethernet/ethernet.h> -#include <vnet/devices/devices.h> -#include <vnet/feature/feature.h> -#include <vnet/interface/rx_queue_funcs.h> -#include <vnet/interface/tx_queue_funcs.h> - -#include <vnet/devices/virtio/vhost_user.h> -#include <vnet/devices/virtio/vhost_user_inline.h> - -/** - * @file - * @brief vHost User Device Driver. - * - * This file contains the source code for vHost User interface. - */ - - -vlib_node_registration_t vhost_user_send_interrupt_node; - -/* *INDENT-OFF* */ -vhost_user_main_t vhost_user_main = { - .mtu_bytes = 1518, -}; - -VNET_HW_INTERFACE_CLASS (vhost_interface_class, static) = { - .name = "vhost-user", -}; -/* *INDENT-ON* */ - -static long -get_huge_page_size (int fd) -{ - struct statfs s; - fstatfs (fd, &s); - return s.f_bsize; -} - -static void -unmap_all_mem_regions (vhost_user_intf_t * vui) -{ - int i, r, q; - vhost_user_vring_t *vq; - - for (i = 0; i < vui->nregions; i++) - { - if (vui->region_mmap_addr[i] != MAP_FAILED) - { - - long page_sz = get_huge_page_size (vui->region_mmap_fd[i]); - - ssize_t map_sz = (vui->regions[i].memory_size + - vui->regions[i].mmap_offset + - page_sz - 1) & ~(page_sz - 1); - - r = - munmap (vui->region_mmap_addr[i] - vui->regions[i].mmap_offset, - map_sz); - - vu_log_debug (vui, "unmap memory region %d addr 0x%lx len 0x%lx " - "page_sz 0x%x", i, vui->region_mmap_addr[i], map_sz, - page_sz); - - vui->region_mmap_addr[i] = MAP_FAILED; - - if (r == -1) - { - vu_log_err (vui, "failed to unmap memory region (errno %d)", - errno); - } - close (vui->region_mmap_fd[i]); - } - } - vui->nregions = 0; - - FOR_ALL_VHOST_RX_TXQ (q, vui) - { - vq = &vui->vrings[q]; - vq->avail = 0; - vq->used = 0; - vq->desc = 0; - } -} - -static_always_inline void -vhost_user_tx_thread_placement (vhost_user_intf_t *vui, u32 qid) -{ - vnet_main_t *vnm = vnet_get_main (); - vhost_user_vring_t *rxvq = &vui->vrings[qid]; - u32 q = qid >> 1, rxvq_count; - - ASSERT ((qid & 1) == 0); - if (!rxvq->started || !rxvq->enabled) - return; - - rxvq_count = (qid >> 1) + 1; - if (rxvq->queue_index == ~0) - { - rxvq->queue_index = - vnet_hw_if_register_tx_queue (vnm, vui->hw_if_index, q); - rxvq->qid = q; - } - - FOR_ALL_VHOST_RXQ (q, vui) - { - vhost_user_vring_t *rxvq = &vui->vrings[q]; - u32 qi = rxvq->queue_index; - - if (rxvq->queue_index == ~0) - break; - for (u32 i = 0; i < vlib_get_n_threads (); i++) - vnet_hw_if_tx_queue_unassign_thread (vnm, qi, i); - } - - for (u32 i = 0; i < vlib_get_n_threads (); i++) - { - vhost_user_vring_t *rxvq = - &vui->vrings[VHOST_VRING_IDX_RX (i % rxvq_count)]; - u32 qi = rxvq->queue_index; - - vnet_hw_if_tx_queue_assign_thread (vnm, qi, i); - } - - vnet_hw_if_update_runtime_data (vnm, vui->hw_if_index); -} - -/** - * @brief Unassign existing interface/queue to thread mappings and re-assign - * new interface/queue to thread mappings - */ -static_always_inline void -vhost_user_rx_thread_placement (vhost_user_intf_t * vui, u32 qid) -{ - vhost_user_vring_t *txvq = &vui->vrings[qid]; - vnet_main_t *vnm = vnet_get_main (); - int rv; - u32 q = qid >> 1; - vhost_user_main_t *vum = &vhost_user_main; - - ASSERT ((qid & 1) == 1); // should be odd - // Assign new queue mappings for the interface - if (txvq->queue_index != ~0) - return; - vnet_hw_if_set_input_node (vnm, vui->hw_if_index, - vhost_user_input_node.index); - txvq->queue_index = vnet_hw_if_register_rx_queue (vnm, vui->hw_if_index, q, - VNET_HW_IF_RXQ_THREAD_ANY); - txvq->thread_index = - vnet_hw_if_get_rx_queue_thread_index (vnm, txvq->queue_index); - - if (txvq->mode == VNET_HW_IF_RX_MODE_UNKNOWN) - /* Set polling as the default */ - txvq->mode = VNET_HW_IF_RX_MODE_POLLING; - if (txvq->mode == VNET_HW_IF_RX_MODE_POLLING) - { - vhost_cpu_t *cpu = vec_elt_at_index (vum->cpus, txvq->thread_index); - /* Keep a polling queue count for each thread */ - cpu->polling_q_count++; - } - txvq->qid = q; - rv = vnet_hw_if_set_rx_queue_mode (vnm, txvq->queue_index, txvq->mode); - if (rv) - vu_log_warn (vui, "unable to set rx mode for interface %d, " - "queue %d: rc=%d", vui->hw_if_index, q, rv); - vnet_hw_if_update_runtime_data (vnm, vui->hw_if_index); -} - -/** @brief Returns whether at least one TX and one RX vring are enabled */ -static_always_inline int -vhost_user_intf_ready (vhost_user_intf_t * vui) -{ - int i, found[2] = { }; //RX + TX - - for (i = 0; i < vui->num_qid; i++) - if (vui->vrings[i].started && vui->vrings[i].enabled) - found[i & 1] = 1; - - return found[0] && found[1]; -} - -static_always_inline void -vhost_user_update_iface_state (vhost_user_intf_t * vui) -{ - /* if we have pointers to descriptor table, go up */ - int is_ready = vhost_user_intf_ready (vui); - if (is_ready != vui->is_ready) - { - vu_log_debug (vui, "interface %d %s", vui->sw_if_index, - is_ready ? "ready" : "down"); - if (vui->admin_up) - vnet_hw_interface_set_flags (vnet_get_main (), vui->hw_if_index, - is_ready ? VNET_HW_INTERFACE_FLAG_LINK_UP - : 0); - vui->is_ready = is_ready; - } -} - -static clib_error_t * -vhost_user_callfd_read_ready (clib_file_t * uf) -{ - __attribute__ ((unused)) int n; - u8 buff[8]; - - n = read (uf->file_descriptor, ((char *) &buff), 8); - - return 0; -} - -static_always_inline void -vhost_user_thread_placement (vhost_user_intf_t * vui, u32 qid) -{ - if (qid & 1) // RX is odd, TX is even - { - if (vui->vrings[qid].queue_index == ~0) - vhost_user_rx_thread_placement (vui, qid); - } - else - vhost_user_tx_thread_placement (vui, qid); -} - -static clib_error_t * -vhost_user_kickfd_read_ready (clib_file_t * uf) -{ - __attribute__ ((unused)) ssize_t n; - u8 buff[8]; - vhost_user_main_t *vum = &vhost_user_main; - vhost_user_intf_t *vui = - pool_elt_at_index (vum->vhost_user_interfaces, uf->private_data >> 8); - u32 qid = uf->private_data & 0xff; - u32 is_txq = qid & 1; - vhost_user_vring_t *vq = &vui->vrings[qid]; - vnet_main_t *vnm = vnet_get_main (); - - n = read (uf->file_descriptor, buff, 8); - if (vq->started == 0) - { - vq->started = 1; - vhost_user_thread_placement (vui, qid); - vhost_user_update_iface_state (vui); - if (is_txq) - vnet_hw_if_set_rx_queue_file_index (vnm, vq->queue_index, - vq->kickfd_idx); - } - - if (is_txq && (vq->mode != VNET_HW_IF_RX_MODE_POLLING) && - vhost_user_intf_ready (vui)) - { - vhost_cpu_t *cpu = vec_elt_at_index (vum->cpus, vq->thread_index); - /* - * If the thread has more than 1 queue and the other queue is in polling - * mode, there is no need to trigger an interrupt - */ - if (cpu->polling_q_count == 0) - vnet_hw_if_rx_queue_set_int_pending (vnm, vq->queue_index); - } - - return 0; -} - -static_always_inline void -vhost_user_vring_init (vhost_user_intf_t * vui, u32 qid) -{ - vhost_user_vring_t *vring = &vui->vrings[qid]; - - clib_memset (vring, 0, sizeof (*vring)); - vring->kickfd_idx = ~0; - vring->callfd_idx = ~0; - vring->errfd = -1; - vring->qid = -1; - vring->queue_index = ~0; - vring->thread_index = ~0; - vring->mode = VNET_HW_IF_RX_MODE_POLLING; - - clib_spinlock_init (&vring->vring_lock); - - /* - * We have a bug with some qemu 2.5, and this may be a fix. - * Feel like interpretation holy text, but this is from vhost-user.txt. - * " - * One queue pair is enabled initially. More queues are enabled - * dynamically, by sending message VHOST_USER_SET_VRING_ENABLE. - * " - * Don't know who's right, but this is what DPDK does. - */ - if (qid == 0 || qid == 1) - vring->enabled = 1; -} - -static_always_inline void -vhost_user_vring_close (vhost_user_intf_t * vui, u32 qid) -{ - vhost_user_vring_t *vring = &vui->vrings[qid]; - - if (vring->kickfd_idx != ~0) - { - clib_file_t *uf = pool_elt_at_index (file_main.file_pool, - vring->kickfd_idx); - clib_file_del (&file_main, uf); - vring->kickfd_idx = ~0; - } - if (vring->callfd_idx != ~0) - { - clib_file_t *uf = pool_elt_at_index (file_main.file_pool, - vring->callfd_idx); - clib_file_del (&file_main, uf); - vring->callfd_idx = ~0; - } - if (vring->errfd != -1) - { - close (vring->errfd); - vring->errfd = -1; - } - - clib_spinlock_free (&vring->vring_lock); - - // save the needed information in vrings prior to being wiped out - u16 q = vui->vrings[qid].qid; - u32 queue_index = vui->vrings[qid].queue_index; - u32 mode = vui->vrings[qid].mode; - u32 thread_index = vui->vrings[qid].thread_index; - vhost_user_vring_init (vui, qid); - vui->vrings[qid].qid = q; - vui->vrings[qid].queue_index = queue_index; - vui->vrings[qid].mode = mode; - vui->vrings[qid].thread_index = thread_index; -} - -static_always_inline void -vhost_user_if_disconnect (vhost_user_intf_t * vui) -{ - vnet_main_t *vnm = vnet_get_main (); - int q; - - vnet_hw_interface_set_flags (vnm, vui->hw_if_index, 0); - - if (vui->clib_file_index != ~0) - { - clib_file_del (&file_main, file_main.file_pool + vui->clib_file_index); - vui->clib_file_index = ~0; - } - - vui->is_ready = 0; - - FOR_ALL_VHOST_RX_TXQ (q, vui) { vhost_user_vring_close (vui, q); } - - unmap_all_mem_regions (vui); - vu_log_debug (vui, "interface ifindex %d disconnected", vui->sw_if_index); -} - -void -vhost_user_set_operation_mode (vhost_user_intf_t *vui, - vhost_user_vring_t *txvq) -{ - if (vhost_user_is_packed_ring_supported (vui)) - { - if (txvq->used_event) - { - if (txvq->mode == VNET_HW_IF_RX_MODE_POLLING) - txvq->used_event->flags = VRING_EVENT_F_DISABLE; - else - txvq->used_event->flags = 0; - } - } - else - { - if (txvq->used) - { - if (txvq->mode == VNET_HW_IF_RX_MODE_POLLING) - txvq->used->flags = VRING_USED_F_NO_NOTIFY; - else - txvq->used->flags = 0; - } - } -} - -static clib_error_t * -vhost_user_socket_read (clib_file_t * uf) -{ - int n, i, j; - int fd, number_of_fds = 0; - int fds[VHOST_MEMORY_MAX_NREGIONS]; - vhost_user_msg_t msg; - struct msghdr mh; - struct iovec iov[1]; - vhost_user_main_t *vum = &vhost_user_main; - vhost_user_intf_t *vui; - struct cmsghdr *cmsg; - u8 q; - clib_file_t template = { 0 }; - vnet_main_t *vnm = vnet_get_main (); - vlib_main_t *vm = vlib_get_main (); - - vui = pool_elt_at_index (vum->vhost_user_interfaces, uf->private_data); - - char control[CMSG_SPACE (VHOST_MEMORY_MAX_NREGIONS * sizeof (int))]; - - clib_memset (&mh, 0, sizeof (mh)); - clib_memset (control, 0, sizeof (control)); - - for (i = 0; i < VHOST_MEMORY_MAX_NREGIONS; i++) - fds[i] = -1; - - /* set the payload */ - iov[0].iov_base = (void *) &msg; - iov[0].iov_len = VHOST_USER_MSG_HDR_SZ; - - mh.msg_iov = iov; - mh.msg_iovlen = 1; - mh.msg_control = control; - mh.msg_controllen = sizeof (control); - - n = recvmsg (uf->file_descriptor, &mh, 0); - - if (n != VHOST_USER_MSG_HDR_SZ) - { - if (n == -1) - { - vu_log_debug (vui, "recvmsg returned error %d %s", errno, - strerror (errno)); - } - else - { - vu_log_debug (vui, "n (%d) != VHOST_USER_MSG_HDR_SZ (%d)", - n, VHOST_USER_MSG_HDR_SZ); - } - goto close_socket; - } - - if (mh.msg_flags & MSG_CTRUNC) - { - vu_log_debug (vui, "MSG_CTRUNC is set"); - goto close_socket; - } - - cmsg = CMSG_FIRSTHDR (&mh); - - if (cmsg && (cmsg->cmsg_len > 0) && (cmsg->cmsg_level == SOL_SOCKET) && - (cmsg->cmsg_type == SCM_RIGHTS) && - (cmsg->cmsg_len - CMSG_LEN (0) <= - VHOST_MEMORY_MAX_NREGIONS * sizeof (int))) - { - number_of_fds = (cmsg->cmsg_len - CMSG_LEN (0)) / sizeof (int); - clib_memcpy_fast (fds, CMSG_DATA (cmsg), number_of_fds * sizeof (int)); - } - - /* version 1, no reply bit set */ - if ((msg.flags & 7) != 1) - { - vu_log_debug (vui, "malformed message received. closing socket"); - goto close_socket; - } - - { - int rv; - rv = - read (uf->file_descriptor, ((char *) &msg) + VHOST_USER_MSG_HDR_SZ, - msg.size); - if (rv < 0) - { - vu_log_debug (vui, "read failed %s", strerror (errno)); - goto close_socket; - } - else if (rv != msg.size) - { - vu_log_debug (vui, "message too short (read %dB should be %dB)", rv, - msg.size); - goto close_socket; - } - } - - switch (msg.request) - { - case VHOST_USER_GET_FEATURES: - msg.flags |= 4; - msg.u64 = VIRTIO_FEATURE (VIRTIO_NET_F_MRG_RXBUF) | - VIRTIO_FEATURE (VIRTIO_NET_F_CTRL_VQ) | - VIRTIO_FEATURE (VIRTIO_F_ANY_LAYOUT) | - VIRTIO_FEATURE (VIRTIO_RING_F_INDIRECT_DESC) | - VIRTIO_FEATURE (VHOST_F_LOG_ALL) | - VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_ANNOUNCE) | - VIRTIO_FEATURE (VIRTIO_NET_F_MQ) | - VIRTIO_FEATURE (VHOST_USER_F_PROTOCOL_FEATURES) | - VIRTIO_FEATURE (VIRTIO_F_VERSION_1); - msg.u64 &= vui->feature_mask; - - if (vui->enable_event_idx) - msg.u64 |= VIRTIO_FEATURE (VIRTIO_RING_F_EVENT_IDX); - if (vui->enable_gso) - msg.u64 |= FEATURE_VIRTIO_NET_F_HOST_GUEST_TSO_FEATURE_BITS; - if (vui->enable_packed) - msg.u64 |= VIRTIO_FEATURE (VIRTIO_F_RING_PACKED); - - msg.size = sizeof (msg.u64); - vu_log_debug (vui, "if %d msg VHOST_USER_GET_FEATURES - reply " - "0x%016llx", vui->hw_if_index, msg.u64); - n = - send (uf->file_descriptor, &msg, VHOST_USER_MSG_HDR_SZ + msg.size, 0); - if (n != (msg.size + VHOST_USER_MSG_HDR_SZ)) - { - vu_log_debug (vui, "could not send message response"); - goto close_socket; - } - break; - - case VHOST_USER_SET_FEATURES: - vu_log_debug (vui, "if %d msg VHOST_USER_SET_FEATURES features " - "0x%016llx", vui->hw_if_index, msg.u64); - - vui->features = msg.u64; - - if (vui->features & - (VIRTIO_FEATURE (VIRTIO_NET_F_MRG_RXBUF) | - VIRTIO_FEATURE (VIRTIO_F_VERSION_1))) - vui->virtio_net_hdr_sz = 12; - else - vui->virtio_net_hdr_sz = 10; - - vui->is_any_layout = - (vui->features & VIRTIO_FEATURE (VIRTIO_F_ANY_LAYOUT)) ? 1 : 0; - - ASSERT (vui->virtio_net_hdr_sz < VLIB_BUFFER_PRE_DATA_SIZE); - if (vui->enable_gso && - ((vui->features & FEATURE_VIRTIO_NET_F_HOST_GUEST_TSO_FEATURE_BITS) - == FEATURE_VIRTIO_NET_F_HOST_GUEST_TSO_FEATURE_BITS)) - { - vnet_hw_if_set_caps (vnm, vui->hw_if_index, - VNET_HW_IF_CAP_TCP_GSO | - VNET_HW_IF_CAP_TX_TCP_CKSUM | - VNET_HW_IF_CAP_TX_UDP_CKSUM); - } - else - { - vnet_hw_if_unset_caps (vnm, vui->hw_if_index, - VNET_HW_IF_CAP_TCP_GSO | - VNET_HW_IF_CAP_L4_TX_CKSUM); - } - vnet_hw_interface_set_flags (vnm, vui->hw_if_index, 0); - vui->is_ready = 0; - vhost_user_update_iface_state (vui); - break; - - case VHOST_USER_SET_MEM_TABLE: - vu_log_debug (vui, "if %d msg VHOST_USER_SET_MEM_TABLE nregions %d", - vui->hw_if_index, msg.memory.nregions); - - if ((msg.memory.nregions < 1) || - (msg.memory.nregions > VHOST_MEMORY_MAX_NREGIONS)) - { - vu_log_debug (vui, "number of mem regions must be between 1 and %i", - VHOST_MEMORY_MAX_NREGIONS); - goto close_socket; - } - - if (msg.memory.nregions != number_of_fds) - { - vu_log_debug (vui, "each memory region must have FD"); - goto close_socket; - } - - /* Do the mmap without barrier sync */ - void *region_mmap_addr[VHOST_MEMORY_MAX_NREGIONS]; - for (i = 0; i < msg.memory.nregions; i++) - { - long page_sz = get_huge_page_size (fds[i]); - - /* align size to page */ - ssize_t map_sz = (msg.memory.regions[i].memory_size + - msg.memory.regions[i].mmap_offset + - page_sz - 1) & ~(page_sz - 1); - - region_mmap_addr[i] = mmap (0, map_sz, PROT_READ | PROT_WRITE, - MAP_SHARED, fds[i], 0); - if (region_mmap_addr[i] == MAP_FAILED) - { - vu_log_err (vui, "failed to map memory. errno is %d", errno); - for (j = 0; j < i; j++) - munmap (region_mmap_addr[j], map_sz); - goto close_socket; - } - vu_log_debug (vui, "map memory region %d addr 0 len 0x%lx fd %d " - "mapped 0x%lx page_sz 0x%x", i, map_sz, fds[i], - region_mmap_addr[i], page_sz); - } - - vlib_worker_thread_barrier_sync (vm); - unmap_all_mem_regions (vui); - for (i = 0; i < msg.memory.nregions; i++) - { - clib_memcpy_fast (&(vui->regions[i]), &msg.memory.regions[i], - sizeof (vhost_user_memory_region_t)); - - vui->region_mmap_addr[i] = region_mmap_addr[i]; - vui->region_guest_addr_lo[i] = vui->regions[i].guest_phys_addr; - vui->region_guest_addr_hi[i] = vui->regions[i].guest_phys_addr + - vui->regions[i].memory_size; - - vui->region_mmap_addr[i] += vui->regions[i].mmap_offset; - vui->region_mmap_fd[i] = fds[i]; - - vui->nregions++; - } - - /* - * Re-compute desc, used, and avail descriptor table if vring address - * is set. - */ - FOR_ALL_VHOST_RX_TXQ (q, vui) - { - if (vui->vrings[q].desc_user_addr && vui->vrings[q].used_user_addr && - vui->vrings[q].avail_user_addr) - { - vui->vrings[q].desc = - map_user_mem (vui, vui->vrings[q].desc_user_addr); - vui->vrings[q].used = - map_user_mem (vui, vui->vrings[q].used_user_addr); - vui->vrings[q].avail = - map_user_mem (vui, vui->vrings[q].avail_user_addr); - } - } - vlib_worker_thread_barrier_release (vm); - break; - - case VHOST_USER_SET_VRING_NUM: - vu_log_debug (vui, "if %d msg VHOST_USER_SET_VRING_NUM idx %d num %d", - vui->hw_if_index, msg.state.index, msg.state.num); - - if ((msg.state.num > 32768) || /* maximum ring size is 32768 */ - (msg.state.num == 0) || /* it cannot be zero */ - ((msg.state.num - 1) & msg.state.num) || /* must be power of 2 */ - (msg.state.index >= vui->num_qid)) - { - vu_log_debug (vui, "invalid VHOST_USER_SET_VRING_NUM: msg.state.num" - " %d, msg.state.index %d, curruent max q %d", - msg.state.num, msg.state.index, vui->num_qid); - goto close_socket; - } - vui->vrings[msg.state.index].qsz_mask = msg.state.num - 1; - break; - - case VHOST_USER_SET_VRING_ADDR: - vu_log_debug (vui, "if %d msg VHOST_USER_SET_VRING_ADDR idx %d", - vui->hw_if_index, msg.state.index); - - if (msg.state.index >= vui->num_qid) - { - vu_log_debug (vui, "invalid vring index VHOST_USER_SET_VRING_ADDR:" - " %u >= %u", msg.state.index, vui->num_qid); - goto close_socket; - } - - if (msg.size < sizeof (msg.addr)) - { - vu_log_debug (vui, "vhost message is too short (%d < %d)", - msg.size, sizeof (msg.addr)); - goto close_socket; - } - - vnet_virtio_vring_desc_t *desc = - map_user_mem (vui, msg.addr.desc_user_addr); - vnet_virtio_vring_used_t *used = - map_user_mem (vui, msg.addr.used_user_addr); - vnet_virtio_vring_avail_t *avail = - map_user_mem (vui, msg.addr.avail_user_addr); - - if ((desc == NULL) || (used == NULL) || (avail == NULL)) - { - vu_log_debug (vui, "failed to map user memory for hw_if_index %d", - vui->hw_if_index); - goto close_socket; - } - - vui->vrings[msg.state.index].desc_user_addr = msg.addr.desc_user_addr; - vui->vrings[msg.state.index].used_user_addr = msg.addr.used_user_addr; - vui->vrings[msg.state.index].avail_user_addr = msg.addr.avail_user_addr; - - vlib_worker_thread_barrier_sync (vm); - vui->vrings[msg.state.index].desc = desc; - vui->vrings[msg.state.index].used = used; - vui->vrings[msg.state.index].avail = avail; - - vui->vrings[msg.state.index].log_guest_addr = msg.addr.log_guest_addr; - vui->vrings[msg.state.index].log_used = - (msg.addr.flags & (1 << VHOST_VRING_F_LOG)) ? 1 : 0; - - /* Spec says: If VHOST_USER_F_PROTOCOL_FEATURES has not been negotiated, - the ring is initialized in an enabled state. */ - if (!(vui->features & VIRTIO_FEATURE (VHOST_USER_F_PROTOCOL_FEATURES))) - vui->vrings[msg.state.index].enabled = 1; - - vui->vrings[msg.state.index].last_used_idx = - vui->vrings[msg.state.index].last_avail_idx = - vui->vrings[msg.state.index].used->idx; - vui->vrings[msg.state.index].last_kick = - vui->vrings[msg.state.index].last_used_idx; - - /* tell driver that we want interrupts or not */ - vhost_user_set_operation_mode (vui, &vui->vrings[msg.state.index]); - vlib_worker_thread_barrier_release (vm); - vhost_user_update_iface_state (vui); - break; - - case VHOST_USER_SET_OWNER: - vu_log_debug (vui, "if %d msg VHOST_USER_SET_OWNER", vui->hw_if_index); - break; - - case VHOST_USER_RESET_OWNER: - vu_log_debug (vui, "if %d msg VHOST_USER_RESET_OWNER", - vui->hw_if_index); - break; - - case VHOST_USER_SET_VRING_CALL: - vu_log_debug (vui, "if %d msg VHOST_USER_SET_VRING_CALL %d", - vui->hw_if_index, msg.u64); - - q = (u8) (msg.u64 & 0xFF); - if (vui->num_qid > q) - { - /* if there is old fd, delete and close it */ - if (vui->vrings[q].callfd_idx != ~0) - { - clib_file_t *uf = pool_elt_at_index (file_main.file_pool, - vui->vrings[q].callfd_idx); - clib_file_del (&file_main, uf); - vui->vrings[q].callfd_idx = ~0; - } - } - else if (vec_len (vui->vrings) > q) - { - /* grow vrings by pair (RX + TX) */ - vui->num_qid = (q & 1) ? (q + 1) : (q + 2); - } - else - { - u32 i, new_max_q, old_max_q = vec_len (vui->vrings); - - /* - * Double the array size if it is less than 64 entries. - * Slow down thereafter. - */ - if (vec_len (vui->vrings) < (VHOST_VRING_INIT_MQ_PAIR_SZ << 3)) - new_max_q = vec_len (vui->vrings) << 1; - else - new_max_q = vec_len (vui->vrings) + - (VHOST_VRING_INIT_MQ_PAIR_SZ << 2); - if (new_max_q > (VHOST_VRING_MAX_MQ_PAIR_SZ << 1)) - new_max_q = (VHOST_VRING_MAX_MQ_PAIR_SZ << 1); - - /* sync with the worker threads, vrings may move due to realloc */ - vlib_worker_thread_barrier_sync (vm); - vec_validate_aligned (vui->vrings, new_max_q - 1, - CLIB_CACHE_LINE_BYTES); - vlib_worker_thread_barrier_release (vm); - - for (i = old_max_q; i < vec_len (vui->vrings); i++) - vhost_user_vring_init (vui, i); - - /* grow vrings by pair (RX + TX) */ - vui->num_qid = (q & 1) ? (q + 1) : (q + 2); - } - - if (!(msg.u64 & VHOST_USER_VRING_NOFD_MASK)) - { - if (number_of_fds != 1) - { - vu_log_debug (vui, "More than one fd received !"); - goto close_socket; - } - - template.read_function = vhost_user_callfd_read_ready; - template.file_descriptor = fds[0]; - template.private_data = - ((vui - vhost_user_main.vhost_user_interfaces) << 8) + q; - template.description = format (0, "vhost user"); - vui->vrings[q].callfd_idx = clib_file_add (&file_main, &template); - } - else - vui->vrings[q].callfd_idx = ~0; - break; - - case VHOST_USER_SET_VRING_KICK: - vu_log_debug (vui, "if %d msg VHOST_USER_SET_VRING_KICK %d", - vui->hw_if_index, msg.u64); - - q = (u8) (msg.u64 & 0xFF); - if (q >= vui->num_qid) - { - vu_log_debug (vui, "invalid vring index VHOST_USER_SET_VRING_KICK:" - " %u >= %u", q, vui->num_qid); - goto close_socket; - } - - if (vui->vrings[q].kickfd_idx != ~0) - { - clib_file_t *uf = pool_elt_at_index (file_main.file_pool, - vui->vrings[q].kickfd_idx); - clib_file_del (&file_main, uf); - vui->vrings[q].kickfd_idx = ~0; - } - - if (!(msg.u64 & VHOST_USER_VRING_NOFD_MASK)) - { - if (number_of_fds != 1) - { - vu_log_debug (vui, "More than one fd received !"); - goto close_socket; - } - - template.read_function = vhost_user_kickfd_read_ready; - template.file_descriptor = fds[0]; - template.private_data = - (((uword) (vui - vhost_user_main.vhost_user_interfaces)) << 8) + - q; - vui->vrings[q].kickfd_idx = clib_file_add (&file_main, &template); - } - else - { - //When no kickfd is set, the queue is initialized as started - vui->vrings[q].kickfd_idx = ~0; - vui->vrings[q].started = 1; - vhost_user_thread_placement (vui, q); - } - vhost_user_update_iface_state (vui); - break; - - case VHOST_USER_SET_VRING_ERR: - vu_log_debug (vui, "if %d msg VHOST_USER_SET_VRING_ERR %d", - vui->hw_if_index, msg.u64); - - q = (u8) (msg.u64 & 0xFF); - if (q >= vui->num_qid) - { - vu_log_debug (vui, "invalid vring index VHOST_USER_SET_VRING_ERR:" - " %u >= %u", q, vui->num_qid); - goto close_socket; - } - - if (vui->vrings[q].errfd != -1) - close (vui->vrings[q].errfd); - - if (!(msg.u64 & VHOST_USER_VRING_NOFD_MASK)) - { - if (number_of_fds != 1) - goto close_socket; - - vui->vrings[q].errfd = fds[0]; - } - else - vui->vrings[q].errfd = -1; - break; - - case VHOST_USER_SET_VRING_BASE: - vu_log_debug (vui, - "if %d msg VHOST_USER_SET_VRING_BASE idx %d num 0x%x", - vui->hw_if_index, msg.state.index, msg.state.num); - if (msg.state.index >= vui->num_qid) - { - vu_log_debug (vui, "invalid vring index VHOST_USER_SET_VRING_ADDR:" - " %u >= %u", msg.state.index, vui->num_qid); - goto close_socket; - } - vlib_worker_thread_barrier_sync (vm); - vui->vrings[msg.state.index].last_avail_idx = msg.state.num; - if (vhost_user_is_packed_ring_supported (vui)) - { - /* - * 0 1 2 3 - * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 - * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - * | last avail idx | | last used idx | | - * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - * ^ ^ - * | | - * avail wrap counter used wrap counter - */ - /* last avail idx at bit 0-14. */ - vui->vrings[msg.state.index].last_avail_idx = - msg.state.num & 0x7fff; - /* avail wrap counter at bit 15 */ - vui->vrings[msg.state.index].avail_wrap_counter = - ! !(msg.state.num & (1 << 15)); - - /* - * Although last_used_idx is passed in the upper 16 bits in qemu - * implementation, in practice, last_avail_idx and last_used_idx are - * usually the same. As a result, DPDK does not bother to pass us - * last_used_idx. The spec is not clear on thex coding. I figured it - * out by reading the qemu code. So let's just read last_avail_idx - * and set last_used_idx equals to last_avail_idx. - */ - vui->vrings[msg.state.index].last_used_idx = - vui->vrings[msg.state.index].last_avail_idx; - vui->vrings[msg.state.index].last_kick = - vui->vrings[msg.state.index].last_used_idx; - vui->vrings[msg.state.index].used_wrap_counter = - vui->vrings[msg.state.index].avail_wrap_counter; - - if (vui->vrings[msg.state.index].avail_wrap_counter == 1) - vui->vrings[msg.state.index].avail_wrap_counter = - VRING_DESC_F_AVAIL; - } - vlib_worker_thread_barrier_release (vm); - break; - - case VHOST_USER_GET_VRING_BASE: - if (msg.state.index >= vui->num_qid) - { - vu_log_debug (vui, "invalid vring index VHOST_USER_GET_VRING_BASE:" - " %u >= %u", msg.state.index, vui->num_qid); - goto close_socket; - } - - /* protection is needed to prevent rx/tx from changing last_avail_idx */ - vlib_worker_thread_barrier_sync (vm); - /* - * Copy last_avail_idx from the vring before closing it because - * closing the vring also initializes the vring last_avail_idx - */ - msg.state.num = vui->vrings[msg.state.index].last_avail_idx; - if (vhost_user_is_packed_ring_supported (vui)) - { - msg.state.num = - (vui->vrings[msg.state.index].last_avail_idx & 0x7fff) | - (! !vui->vrings[msg.state.index].avail_wrap_counter << 15); - msg.state.num |= - ((vui->vrings[msg.state.index].last_used_idx & 0x7fff) | - (! !vui->vrings[msg.state.index].used_wrap_counter << 15)) << 16; - } - msg.flags |= 4; - msg.size = sizeof (msg.state); - - /* - * Spec says: Client must [...] stop ring upon receiving - * VHOST_USER_GET_VRING_BASE - */ - vhost_user_vring_close (vui, msg.state.index); - vlib_worker_thread_barrier_release (vm); - vu_log_debug (vui, - "if %d msg VHOST_USER_GET_VRING_BASE idx %d num 0x%x", - vui->hw_if_index, msg.state.index, msg.state.num); - n = - send (uf->file_descriptor, &msg, VHOST_USER_MSG_HDR_SZ + msg.size, 0); - if (n != (msg.size + VHOST_USER_MSG_HDR_SZ)) - { - vu_log_debug (vui, "could not send message response"); - goto close_socket; - } - vhost_user_update_iface_state (vui); - break; - - case VHOST_USER_NONE: - vu_log_debug (vui, "if %d msg VHOST_USER_NONE", vui->hw_if_index); - break; - - case VHOST_USER_SET_LOG_BASE: - vu_log_debug (vui, "if %d msg VHOST_USER_SET_LOG_BASE", - vui->hw_if_index); - - if (msg.size != sizeof (msg.log)) - { - vu_log_debug (vui, "invalid msg size for VHOST_USER_SET_LOG_BASE:" - " %d instead of %d", msg.size, sizeof (msg.log)); - goto close_socket; - } - - if (!(vui->protocol_features & (1 << VHOST_USER_PROTOCOL_F_LOG_SHMFD))) - { - vu_log_debug (vui, "VHOST_USER_PROTOCOL_F_LOG_SHMFD not set but " - "VHOST_USER_SET_LOG_BASE received"); - goto close_socket; - } - - fd = fds[0]; - /* align size to page */ - long page_sz = get_huge_page_size (fd); - ssize_t map_sz = - (msg.log.size + msg.log.offset + page_sz - 1) & ~(page_sz - 1); - - void *log_base_addr = mmap (0, map_sz, PROT_READ | PROT_WRITE, - MAP_SHARED, fd, 0); - - vu_log_debug (vui, "map log region addr 0 len 0x%lx off 0x%lx fd %d " - "mapped 0x%lx", map_sz, msg.log.offset, fd, - log_base_addr); - - if (log_base_addr == MAP_FAILED) - { - vu_log_err (vui, "failed to map memory. errno is %d", errno); - goto close_socket; - } - - vlib_worker_thread_barrier_sync (vm); - vui->log_base_addr = log_base_addr; - vui->log_base_addr += msg.log.offset; - vui->log_size = msg.log.size; - vlib_worker_thread_barrier_release (vm); - - msg.flags |= 4; - msg.size = sizeof (msg.u64); - n = - send (uf->file_descriptor, &msg, VHOST_USER_MSG_HDR_SZ + msg.size, 0); - if (n != (msg.size + VHOST_USER_MSG_HDR_SZ)) - { - vu_log_debug (vui, "could not send message response"); - goto close_socket; - } - break; - - case VHOST_USER_SET_LOG_FD: - vu_log_debug (vui, "if %d msg VHOST_USER_SET_LOG_FD", vui->hw_if_index); - break; - - case VHOST_USER_GET_PROTOCOL_FEATURES: - msg.flags |= 4; - msg.u64 = (1 << VHOST_USER_PROTOCOL_F_LOG_SHMFD) | - (1 << VHOST_USER_PROTOCOL_F_MQ); - msg.size = sizeof (msg.u64); - vu_log_debug (vui, "if %d msg VHOST_USER_GET_PROTOCOL_FEATURES - " - "reply 0x%016llx", vui->hw_if_index, msg.u64); - n = - send (uf->file_descriptor, &msg, VHOST_USER_MSG_HDR_SZ + msg.size, 0); - if (n != (msg.size + VHOST_USER_MSG_HDR_SZ)) - { - vu_log_debug (vui, "could not send message response"); - goto close_socket; - } - break; - - case VHOST_USER_SET_PROTOCOL_FEATURES: - vu_log_debug (vui, "if %d msg VHOST_USER_SET_PROTOCOL_FEATURES " - "features 0x%016llx", vui->hw_if_index, msg.u64); - vui->protocol_features = msg.u64; - break; - - case VHOST_USER_GET_QUEUE_NUM: - msg.flags |= 4; - msg.u64 = VHOST_VRING_MAX_MQ_PAIR_SZ; - msg.size = sizeof (msg.u64); - vu_log_debug (vui, "if %d msg VHOST_USER_GET_QUEUE_NUM - reply %d", - vui->hw_if_index, msg.u64); - n = - send (uf->file_descriptor, &msg, VHOST_USER_MSG_HDR_SZ + msg.size, 0); - if (n != (msg.size + VHOST_USER_MSG_HDR_SZ)) - { - vu_log_debug (vui, "could not send message response"); - goto close_socket; - } - break; - - case VHOST_USER_SET_VRING_ENABLE: - vu_log_debug (vui, "if %d VHOST_USER_SET_VRING_ENABLE: %s queue %d", - vui->hw_if_index, msg.state.num ? "enable" : "disable", - msg.state.index); - if (msg.state.index >= vui->num_qid) - { - vu_log_debug (vui, "invalid vring idx VHOST_USER_SET_VRING_ENABLE:" - " %u >= %u", msg.state.index, vui->num_qid); - goto close_socket; - } - - vui->vrings[msg.state.index].enabled = msg.state.num; - vhost_user_thread_placement (vui, msg.state.index); - vhost_user_update_iface_state (vui); - break; - - default: - vu_log_debug (vui, "unknown vhost-user message %d received. " - "closing socket", msg.request); - goto close_socket; - } - - return 0; - -close_socket: - vlib_worker_thread_barrier_sync (vm); - vhost_user_if_disconnect (vui); - vlib_worker_thread_barrier_release (vm); - vhost_user_update_iface_state (vui); - return 0; -} - -static clib_error_t * -vhost_user_socket_error (clib_file_t * uf) -{ - vlib_main_t *vm = vlib_get_main (); - vhost_user_main_t *vum = &vhost_user_main; - vhost_user_intf_t *vui = - pool_elt_at_index (vum->vhost_user_interfaces, uf->private_data); - - vu_log_debug (vui, "socket error on if %d", vui->sw_if_index); - vlib_worker_thread_barrier_sync (vm); - vhost_user_if_disconnect (vui); - vlib_worker_thread_barrier_release (vm); - return 0; -} - -static clib_error_t * -vhost_user_socksvr_accept_ready (clib_file_t * uf) -{ - int client_fd, client_len; - struct sockaddr_un client; - clib_file_t template = { 0 }; - vhost_user_main_t *vum = &vhost_user_main; - vhost_user_intf_t *vui; - - vui = pool_elt_at_index (vum->vhost_user_interfaces, uf->private_data); - - client_len = sizeof (client); - client_fd = accept (uf->file_descriptor, - (struct sockaddr *) &client, - (socklen_t *) & client_len); - - if (client_fd < 0) - return clib_error_return_unix (0, "accept"); - - if (vui->clib_file_index != ~0) - { - vu_log_debug (vui, "Close client socket for vhost interface %d, fd %d", - vui->sw_if_index, UNIX_GET_FD (vui->clib_file_index)); - clib_file_del (&file_main, file_main.file_pool + vui->clib_file_index); - } - - vu_log_debug (vui, "New client socket for vhost interface %d, fd %d", - vui->sw_if_index, client_fd); - template.read_function = vhost_user_socket_read; - template.error_function = vhost_user_socket_error; - template.file_descriptor = client_fd; - template.private_data = vui - vhost_user_main.vhost_user_interfaces; - template.description = format (0, "vhost interface %d", vui->sw_if_index); - vui->clib_file_index = clib_file_add (&file_main, &template); - vui->num_qid = 2; - return 0; -} - -static clib_error_t * -vhost_user_init (vlib_main_t * vm) -{ - vhost_user_main_t *vum = &vhost_user_main; - vlib_thread_main_t *tm = vlib_get_thread_main (); - - vum->log_default = vlib_log_register_class ("vhost-user", 0); - - vum->coalesce_frames = 32; - vum->coalesce_time = 1e-3; - - vec_validate (vum->cpus, tm->n_vlib_mains - 1); - - vhost_cpu_t *cpu; - vec_foreach (cpu, vum->cpus) - { - /* This is actually not necessary as validate already zeroes it - * Just keeping the loop here for later because I am lazy. */ - cpu->rx_buffers_len = 0; - } - - vum->random = random_default_seed (); - - mhash_init_c_string (&vum->if_index_by_sock_name, sizeof (uword)); - - return 0; -} - -/* *INDENT-OFF* */ -VLIB_INIT_FUNCTION (vhost_user_init) = -{ - .runs_after = VLIB_INITS("ip4_init"), -}; -/* *INDENT-ON* */ - -static uword -vhost_user_send_interrupt_process (vlib_main_t * vm, - vlib_node_runtime_t * rt, vlib_frame_t * f) -{ - vhost_user_intf_t *vui; - f64 timeout = 3153600000.0 /* 100 years */ ; - uword event_type, *event_data = 0; - vhost_user_main_t *vum = &vhost_user_main; - u16 qid; - f64 now, poll_time_remaining; - f64 next_timeout; - u8 stop_timer = 0; - - while (1) - { - poll_time_remaining = - vlib_process_wait_for_event_or_clock (vm, timeout); - event_type = vlib_process_get_events (vm, &event_data); - vec_reset_length (event_data); - - /* - * Use the remaining timeout if it is less than coalesce time to avoid - * resetting the existing timer in the middle of expiration - */ - timeout = poll_time_remaining; - if (vlib_process_suspend_time_is_zero (timeout) || - (timeout > vum->coalesce_time)) - timeout = vum->coalesce_time; - - now = vlib_time_now (vm); - switch (event_type) - { - case VHOST_USER_EVENT_STOP_TIMER: - stop_timer = 1; - break; - - case VHOST_USER_EVENT_START_TIMER: - stop_timer = 0; - timeout = 1e-3; - if (!vlib_process_suspend_time_is_zero (poll_time_remaining)) - break; - /* fall through */ - - case ~0: - /* *INDENT-OFF* */ - pool_foreach (vui, vum->vhost_user_interfaces) { - next_timeout = timeout; - FOR_ALL_VHOST_RX_TXQ (qid, vui) - { - vhost_user_vring_t *vq = &vui->vrings[qid]; - - if (vq->started == 0) - continue; - if (vq->n_since_last_int) - { - if (now >= vq->int_deadline) - vhost_user_send_call (vm, vui, vq); - else - next_timeout = vq->int_deadline - now; - } - - if ((next_timeout < timeout) && (next_timeout > 0.0)) - timeout = next_timeout; - } - } - /* *INDENT-ON* */ - break; - - default: - clib_warning ("BUG: unhandled event type %d", event_type); - break; - } - /* No less than 1 millisecond */ - if (timeout < 1e-3) - timeout = 1e-3; - if (stop_timer) - timeout = 3153600000.0; - } - return 0; -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (vhost_user_send_interrupt_node) = { - .function = vhost_user_send_interrupt_process, - .type = VLIB_NODE_TYPE_PROCESS, - .name = "vhost-user-send-interrupt-process", -}; -/* *INDENT-ON* */ - -static uword -vhost_user_process (vlib_main_t * vm, - vlib_node_runtime_t * rt, vlib_frame_t * f) -{ - vhost_user_main_t *vum = &vhost_user_main; - vhost_user_intf_t *vui; - struct sockaddr_un sun; - int sockfd; - clib_file_t template = { 0 }; - f64 timeout = 3153600000.0 /* 100 years */ ; - uword *event_data = 0; - - sockfd = -1; - sun.sun_family = AF_UNIX; - template.read_function = vhost_user_socket_read; - template.error_function = vhost_user_socket_error; - - while (1) - { - vlib_process_wait_for_event_or_clock (vm, timeout); - vlib_process_get_events (vm, &event_data); - vec_reset_length (event_data); - - timeout = 3.0; - - /* *INDENT-OFF* */ - pool_foreach (vui, vum->vhost_user_interfaces) { - - if (vui->unix_server_index == ~0) { //Nothing to do for server sockets - if (vui->clib_file_index == ~0) - { - if ((sockfd < 0) && - ((sockfd = socket (AF_UNIX, SOCK_STREAM, 0)) < 0)) - { - /* - * 1st time error or new error for this interface, - * spit out the message and record the error - */ - if (!vui->sock_errno || (vui->sock_errno != errno)) - { - clib_unix_warning - ("Error: Could not open unix socket for %s", - vui->sock_filename); - vui->sock_errno = errno; - } - continue; - } - - /* try to connect */ - strncpy (sun.sun_path, (char *) vui->sock_filename, - sizeof (sun.sun_path) - 1); - sun.sun_path[sizeof (sun.sun_path) - 1] = 0; - - /* Avoid hanging VPP if the other end does not accept */ - if (fcntl(sockfd, F_SETFL, O_NONBLOCK) < 0) - clib_unix_warning ("fcntl"); - - if (connect (sockfd, (struct sockaddr *) &sun, - sizeof (struct sockaddr_un)) == 0) - { - /* Set the socket to blocking as it was before */ - if (fcntl(sockfd, F_SETFL, 0) < 0) - clib_unix_warning ("fcntl2"); - - vui->sock_errno = 0; - template.file_descriptor = sockfd; - template.private_data = - vui - vhost_user_main.vhost_user_interfaces; - template.description = format (0, "vhost user process"); - vui->clib_file_index = clib_file_add (&file_main, &template); - vui->num_qid = 2; - - /* This sockfd is considered consumed */ - sockfd = -1; - } - else - { - vui->sock_errno = errno; - } - } - else - { - /* check if socket is alive */ - int error = 0; - socklen_t len = sizeof (error); - int fd = UNIX_GET_FD(vui->clib_file_index); - int retval = - getsockopt (fd, SOL_SOCKET, SO_ERROR, &error, &len); - - if (retval) - { - vu_log_debug (vui, "getsockopt returned %d", retval); - vhost_user_if_disconnect (vui); - } - } - } - } - /* *INDENT-ON* */ - } - return 0; -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (vhost_user_process_node,static) = { - .function = vhost_user_process, - .type = VLIB_NODE_TYPE_PROCESS, - .name = "vhost-user-process", -}; -/* *INDENT-ON* */ - -/** - * Disables and reset interface structure. - * It can then be either init again, or removed from used interfaces. - */ -static void -vhost_user_term_if (vhost_user_intf_t * vui) -{ - int q; - vhost_user_main_t *vum = &vhost_user_main; - - // disconnect interface sockets - vhost_user_if_disconnect (vui); - vhost_user_update_gso_interface_count (vui, 0 /* delete */ ); - vhost_user_update_iface_state (vui); - - for (q = 0; q < vec_len (vui->vrings); q++) - clib_spinlock_free (&vui->vrings[q].vring_lock); - - if (vui->unix_server_index != ~0) - { - //Close server socket - clib_file_t *uf = pool_elt_at_index (file_main.file_pool, - vui->unix_server_index); - clib_file_del (&file_main, uf); - vui->unix_server_index = ~0; - unlink (vui->sock_filename); - } - - mhash_unset (&vum->if_index_by_sock_name, vui->sock_filename, - &vui->if_index); -} - -int -vhost_user_delete_if (vnet_main_t * vnm, vlib_main_t * vm, u32 sw_if_index) -{ - vhost_user_main_t *vum = &vhost_user_main; - vhost_user_intf_t *vui; - int rv = 0; - vnet_hw_interface_t *hwif; - u16 qid; - - if (! - (hwif = - vnet_get_sup_hw_interface_api_visible_or_null (vnm, sw_if_index)) - || hwif->dev_class_index != vhost_user_device_class.index) - return VNET_API_ERROR_INVALID_SW_IF_INDEX; - - vui = pool_elt_at_index (vum->vhost_user_interfaces, hwif->dev_instance); - - vu_log_debug (vui, "Deleting vhost-user interface %s (instance %d)", - hwif->name, hwif->dev_instance); - - FOR_ALL_VHOST_TXQ (qid, vui) - { - vhost_user_vring_t *txvq = &vui->vrings[qid]; - - if ((txvq->mode == VNET_HW_IF_RX_MODE_POLLING) && - (txvq->thread_index != ~0)) - { - vhost_cpu_t *cpu = vec_elt_at_index (vum->cpus, txvq->thread_index); - ASSERT (cpu->polling_q_count != 0); - cpu->polling_q_count--; - } - - if ((vum->ifq_count > 0) && - ((txvq->mode == VNET_HW_IF_RX_MODE_INTERRUPT) || - (txvq->mode == VNET_HW_IF_RX_MODE_ADAPTIVE))) - { - vum->ifq_count--; - // Stop the timer if there is no more interrupt interface/queue - if (vum->ifq_count == 0) - { - vlib_process_signal_event (vm, - vhost_user_send_interrupt_node.index, - VHOST_USER_EVENT_STOP_TIMER, 0); - break; - } - } - } - - // Disable and reset interface - vhost_user_term_if (vui); - - // Reset renumbered iface - if (hwif->dev_instance < - vec_len (vum->show_dev_instance_by_real_dev_instance)) - vum->show_dev_instance_by_real_dev_instance[hwif->dev_instance] = ~0; - - // Delete ethernet interface - ethernet_delete_interface (vnm, vui->hw_if_index); - - // free vrings - vec_free (vui->vrings); - - // Back to pool - pool_put (vum->vhost_user_interfaces, vui); - - return rv; -} - -static clib_error_t * -vhost_user_exit (vlib_main_t * vm) -{ - vnet_main_t *vnm = vnet_get_main (); - vhost_user_main_t *vum = &vhost_user_main; - vhost_user_intf_t *vui; - - vlib_worker_thread_barrier_sync (vlib_get_main ()); - /* *INDENT-OFF* */ - pool_foreach (vui, vum->vhost_user_interfaces) { - vhost_user_delete_if (vnm, vm, vui->sw_if_index); - } - /* *INDENT-ON* */ - vlib_worker_thread_barrier_release (vlib_get_main ()); - return 0; -} - -VLIB_MAIN_LOOP_EXIT_FUNCTION (vhost_user_exit); - -/** - * Open server unix socket on specified sock_filename. - */ -static int -vhost_user_init_server_sock (const char *sock_filename, int *sock_fd) -{ - int rv = 0; - struct sockaddr_un un = { }; - int fd; - /* create listening socket */ - if ((fd = socket (AF_UNIX, SOCK_STREAM, 0)) < 0) - return VNET_API_ERROR_SYSCALL_ERROR_1; - - un.sun_family = AF_UNIX; - strncpy ((char *) un.sun_path, (char *) sock_filename, - sizeof (un.sun_path) - 1); - - /* remove if exists */ - unlink ((char *) sock_filename); - - if (bind (fd, (struct sockaddr *) &un, sizeof (un)) == -1) - { - rv = VNET_API_ERROR_SYSCALL_ERROR_2; - goto error; - } - - if (listen (fd, 1) == -1) - { - rv = VNET_API_ERROR_SYSCALL_ERROR_3; - goto error; - } - - *sock_fd = fd; - return 0; - -error: - close (fd); - return rv; -} - -/** - * Create ethernet interface for vhost user interface. - */ -static void -vhost_user_create_ethernet (vnet_main_t *vnm, vlib_main_t *vm, - vhost_user_intf_t *vui, - vhost_user_create_if_args_t *args) -{ - vhost_user_main_t *vum = &vhost_user_main; - vnet_eth_interface_registration_t eir = {}; - u8 hwaddr[6]; - - /* create hw and sw interface */ - if (args->use_custom_mac) - { - clib_memcpy (hwaddr, args->hwaddr, 6); - } - else - { - random_u32 (&vum->random); - clib_memcpy (hwaddr + 2, &vum->random, sizeof (vum->random)); - hwaddr[0] = 2; - hwaddr[1] = 0xfe; - } - - eir.dev_class_index = vhost_user_device_class.index; - eir.dev_instance = vui - vum->vhost_user_interfaces /* device instance */, - eir.address = hwaddr; - vui->hw_if_index = vnet_eth_register_interface (vnm, &eir); -} - -/* - * Initialize vui with specified attributes - */ -static void -vhost_user_vui_init (vnet_main_t * vnm, vhost_user_intf_t * vui, - int server_sock_fd, vhost_user_create_if_args_t * args, - u32 * sw_if_index) -{ - vnet_sw_interface_t *sw; - int q; - vhost_user_main_t *vum = &vhost_user_main; - - sw = vnet_get_hw_sw_interface (vnm, vui->hw_if_index); - if (server_sock_fd != -1) - { - clib_file_t template = { 0 }; - template.read_function = vhost_user_socksvr_accept_ready; - template.file_descriptor = server_sock_fd; - template.private_data = vui - vum->vhost_user_interfaces; //hw index - template.description = format (0, "vhost user %d", sw); - vui->unix_server_index = clib_file_add (&file_main, &template); - } - else - { - vui->unix_server_index = ~0; - } - - vui->sw_if_index = sw->sw_if_index; - strncpy (vui->sock_filename, args->sock_filename, - ARRAY_LEN (vui->sock_filename) - 1); - vui->sock_errno = 0; - vui->is_ready = 0; - vui->feature_mask = args->feature_mask; - vui->clib_file_index = ~0; - vui->log_base_addr = 0; - vui->if_index = vui - vum->vhost_user_interfaces; - vui->enable_gso = args->enable_gso; - vui->enable_event_idx = args->enable_event_idx; - vui->enable_packed = args->enable_packed; - /* - * enable_gso takes precedence over configurable feature mask if there - * is a clash. - * if feature mask disables gso, but enable_gso is configured, - * then gso is enable - * if feature mask enables gso, but enable_gso is not configured, - * then gso is enable - * - * if gso is enable via feature mask, it must enable both host and guest - * gso feature mask, we don't support one sided GSO or partial GSO. - */ - if ((vui->enable_gso == 0) && - ((args->feature_mask & FEATURE_VIRTIO_NET_F_HOST_GUEST_TSO_FEATURE_BITS) - == (FEATURE_VIRTIO_NET_F_HOST_GUEST_TSO_FEATURE_BITS))) - vui->enable_gso = 1; - vhost_user_update_gso_interface_count (vui, 1 /* add */ ); - mhash_set_mem (&vum->if_index_by_sock_name, vui->sock_filename, - &vui->if_index, 0); - - vec_validate_aligned (vui->vrings, (VHOST_VRING_INIT_MQ_PAIR_SZ << 1) - 1, - CLIB_CACHE_LINE_BYTES); - vui->num_qid = 2; - for (q = 0; q < vec_len (vui->vrings); q++) - vhost_user_vring_init (vui, q); - - vnet_hw_if_set_caps (vnm, vui->hw_if_index, VNET_HW_IF_CAP_INT_MODE); - vnet_hw_interface_set_flags (vnm, vui->hw_if_index, 0); - - if (sw_if_index) - *sw_if_index = vui->sw_if_index; -} - -int -vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm, - vhost_user_create_if_args_t * args) -{ - vhost_user_intf_t *vui = NULL; - u32 sw_if_idx = ~0; - int rv = 0; - int server_sock_fd = -1; - vhost_user_main_t *vum = &vhost_user_main; - uword *if_index; - - if (args->sock_filename == NULL || !(strlen (args->sock_filename) > 0)) - { - return VNET_API_ERROR_INVALID_ARGUMENT; - } - - if_index = mhash_get (&vum->if_index_by_sock_name, - (void *) args->sock_filename); - if (if_index) - { - vui = &vum->vhost_user_interfaces[*if_index]; - args->sw_if_index = vui->sw_if_index; - return VNET_API_ERROR_IF_ALREADY_EXISTS; - } - - if (args->is_server) - { - if ((rv = - vhost_user_init_server_sock (args->sock_filename, - &server_sock_fd)) != 0) - { - return rv; - } - } - - /* Protect the uninitialized vui from being dispatched by rx/tx */ - vlib_worker_thread_barrier_sync (vm); - pool_get (vhost_user_main.vhost_user_interfaces, vui); - vhost_user_create_ethernet (vnm, vm, vui, args); - vlib_worker_thread_barrier_release (vm); - - vhost_user_vui_init (vnm, vui, server_sock_fd, args, &sw_if_idx); - vnet_sw_interface_set_mtu (vnm, vui->sw_if_index, 9000); - vhost_user_rx_thread_placement (vui, 1); - - if (args->renumber) - vnet_interface_name_renumber (sw_if_idx, args->custom_dev_instance); - - args->sw_if_index = sw_if_idx; - - // Process node must connect - vlib_process_signal_event (vm, vhost_user_process_node.index, 0, 0); - - return rv; -} - -int -vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm, - vhost_user_create_if_args_t * args) -{ - vhost_user_main_t *vum = &vhost_user_main; - vhost_user_intf_t *vui = NULL; - u32 sw_if_idx = ~0; - int server_sock_fd = -1; - int rv = 0; - vnet_hw_interface_t *hwif; - uword *if_index; - - if (!(hwif = vnet_get_sup_hw_interface_api_visible_or_null (vnm, - args->sw_if_index)) - || hwif->dev_class_index != vhost_user_device_class.index) - return VNET_API_ERROR_INVALID_SW_IF_INDEX; - - if (args->sock_filename == NULL || !(strlen (args->sock_filename) > 0)) - return VNET_API_ERROR_INVALID_ARGUMENT; - - vui = vec_elt_at_index (vum->vhost_user_interfaces, hwif->dev_instance); - - /* - * Disallow changing the interface to have the same path name - * as other interface - */ - if_index = mhash_get (&vum->if_index_by_sock_name, - (void *) args->sock_filename); - if (if_index && (*if_index != vui->if_index)) - return VNET_API_ERROR_IF_ALREADY_EXISTS; - - // First try to open server socket - if (args->is_server) - if ((rv = vhost_user_init_server_sock (args->sock_filename, - &server_sock_fd)) != 0) - return rv; - - vhost_user_term_if (vui); - vhost_user_vui_init (vnm, vui, server_sock_fd, args, &sw_if_idx); - - if (args->renumber) - vnet_interface_name_renumber (sw_if_idx, args->custom_dev_instance); - - // Process node must connect - vlib_process_signal_event (vm, vhost_user_process_node.index, 0, 0); - - return rv; -} - -clib_error_t * -vhost_user_connect_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - vnet_main_t *vnm = vnet_get_main (); - unformat_input_t _line_input, *line_input = &_line_input; - clib_error_t *error = NULL; - vhost_user_create_if_args_t args = { 0 }; - int rv; - - /* Get a line of input. */ - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - args.feature_mask = (u64) ~ (0ULL); - args.custom_dev_instance = ~0; - /* GSO feature is disable by default */ - args.feature_mask &= ~FEATURE_VIRTIO_NET_F_HOST_GUEST_TSO_FEATURE_BITS; - /* packed-ring feature is disable by default */ - args.feature_mask &= ~VIRTIO_FEATURE (VIRTIO_F_RING_PACKED); - /* event_idx feature is disable by default */ - args.feature_mask &= ~VIRTIO_FEATURE (VIRTIO_RING_F_EVENT_IDX); - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (line_input, "socket %s", &args.sock_filename)) - ; - else if (unformat (line_input, "server")) - args.is_server = 1; - else if (unformat (line_input, "gso")) - args.enable_gso = 1; - else if (unformat (line_input, "packed")) - args.enable_packed = 1; - else if (unformat (line_input, "event-idx")) - args.enable_event_idx = 1; - else if (unformat (line_input, "feature-mask 0x%llx", - &args.feature_mask)) - ; - else if (unformat (line_input, "hwaddr %U", unformat_ethernet_address, - args.hwaddr)) - args.use_custom_mac = 1; - else if (unformat (line_input, "renumber %d", - &args.custom_dev_instance)) - args.renumber = 1; - else - { - error = clib_error_return (0, "unknown input `%U'", - format_unformat_error, line_input); - goto done; - } - } - - if ((rv = vhost_user_create_if (vnm, vm, &args))) - { - error = clib_error_return (0, "vhost_user_create_if returned %d", rv); - goto done; - } - - vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name, vnm, - args.sw_if_index); - -done: - vec_free (args.sock_filename); - unformat_free (line_input); - - return error; -} - -clib_error_t * -vhost_user_delete_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - u32 sw_if_index = ~0; - vnet_main_t *vnm = vnet_get_main (); - clib_error_t *error = NULL; - - /* Get a line of input. */ - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (line_input, "sw_if_index %d", &sw_if_index)) - ; - else if (unformat - (line_input, "%U", unformat_vnet_sw_interface, vnm, - &sw_if_index)) - { - vnet_hw_interface_t *hwif = - vnet_get_sup_hw_interface_api_visible_or_null (vnm, sw_if_index); - if (hwif == NULL || - vhost_user_device_class.index != hwif->dev_class_index) - { - error = clib_error_return (0, "Not a vhost interface"); - goto done; - } - } - else - { - error = clib_error_return (0, "unknown input `%U'", - format_unformat_error, line_input); - goto done; - } - } - - vhost_user_delete_if (vnm, vm, sw_if_index); - -done: - unformat_free (line_input); - - return error; -} - -int -vhost_user_dump_ifs (vnet_main_t * vnm, vlib_main_t * vm, - vhost_user_intf_details_t ** out_vuids) -{ - int rv = 0; - vhost_user_main_t *vum = &vhost_user_main; - vhost_user_intf_t *vui; - vhost_user_intf_details_t *r_vuids = NULL; - vhost_user_intf_details_t *vuid = NULL; - u32 *hw_if_indices = 0; - vnet_hw_interface_t *hi; - int i; - - if (!out_vuids) - return -1; - - pool_foreach (vui, vum->vhost_user_interfaces) - vec_add1 (hw_if_indices, vui->hw_if_index); - - for (i = 0; i < vec_len (hw_if_indices); i++) - { - hi = vnet_get_hw_interface (vnm, hw_if_indices[i]); - vui = pool_elt_at_index (vum->vhost_user_interfaces, hi->dev_instance); - - vec_add2 (r_vuids, vuid, 1); - vuid->sw_if_index = vui->sw_if_index; - vuid->virtio_net_hdr_sz = vui->virtio_net_hdr_sz; - vuid->features = vui->features; - vuid->num_regions = vui->nregions; - vuid->is_server = vui->unix_server_index != ~0; - vuid->sock_errno = vui->sock_errno; - snprintf ((char *) vuid->sock_filename, sizeof (vuid->sock_filename), - "%s", vui->sock_filename); - memcpy_s (vuid->if_name, sizeof (vuid->if_name), hi->name, - clib_min (vec_len (hi->name), sizeof (vuid->if_name) - 1)); - vuid->if_name[sizeof (vuid->if_name) - 1] = 0; - } - - vec_free (hw_if_indices); - - *out_vuids = r_vuids; - - return rv; -} - -static u8 * -format_vhost_user_desc (u8 * s, va_list * args) -{ - char *fmt = va_arg (*args, char *); - vhost_user_intf_t *vui = va_arg (*args, vhost_user_intf_t *); - vnet_virtio_vring_desc_t *desc_table = - va_arg (*args, vnet_virtio_vring_desc_t *); - int idx = va_arg (*args, int); - u32 *mem_hint = va_arg (*args, u32 *); - - s = format (s, fmt, idx, desc_table[idx].addr, desc_table[idx].len, - desc_table[idx].flags, desc_table[idx].next, - pointer_to_uword (map_guest_mem (vui, desc_table[idx].addr, - mem_hint))); - return s; -} - -static void -vhost_user_show_fds (vlib_main_t * vm, vhost_user_vring_t * vq) -{ - int kickfd = UNIX_GET_FD (vq->kickfd_idx); - int callfd = UNIX_GET_FD (vq->callfd_idx); - - vlib_cli_output (vm, " kickfd %d callfd %d errfd %d\n", kickfd, callfd, - vq->errfd); -} - -static void -vhost_user_show_desc (vlib_main_t * vm, vhost_user_intf_t * vui, int q, - int show_descr, int show_verbose) -{ - int j; - u32 mem_hint = 0; - u32 idx; - u32 n_entries; - vnet_virtio_vring_desc_t *desc_table; - vhost_user_vring_t *vq = &vui->vrings[q]; - - if (vq->avail && vq->used) - vlib_cli_output (vm, - " avail.flags %x avail event idx %u avail.idx %d " - "used.flags %x used event idx %u used.idx %d\n", - vq->avail->flags, vhost_user_avail_event_idx (vq), - vq->avail->idx, vq->used->flags, - vhost_user_used_event_idx (vq), vq->used->idx); - - vhost_user_show_fds (vm, vq); - - if (show_descr) - { - vlib_cli_output (vm, "\n descriptor table:\n"); - vlib_cli_output (vm, - " slot addr len flags next " - "user_addr\n"); - vlib_cli_output (vm, - " ===== ================== ===== ====== ===== " - "==================\n"); - for (j = 0; j < vq->qsz_mask + 1; j++) - { - desc_table = vq->desc; - vlib_cli_output (vm, "%U", format_vhost_user_desc, - " %-5d 0x%016lx %-5d 0x%04x %-5d 0x%016lx\n", vui, - desc_table, j, &mem_hint); - if (show_verbose && (desc_table[j].flags & VRING_DESC_F_INDIRECT)) - { - n_entries = - desc_table[j].len / sizeof (vnet_virtio_vring_desc_t); - desc_table = map_guest_mem (vui, desc_table[j].addr, &mem_hint); - if (desc_table) - { - for (idx = 0; idx < clib_min (20, n_entries); idx++) - { - vlib_cli_output - (vm, "%U", format_vhost_user_desc, - "> %-4u 0x%016lx %-5u 0x%04x %-5u 0x%016lx\n", vui, - desc_table, idx, &mem_hint); - } - if (n_entries >= 20) - vlib_cli_output (vm, "Skip displaying entries 20...%u\n", - n_entries); - } - } - } - } -} - -static u8 * -format_vhost_user_packed_desc (u8 * s, va_list * args) -{ - char *fmt = va_arg (*args, char *); - vhost_user_intf_t *vui = va_arg (*args, vhost_user_intf_t *); - vnet_virtio_vring_packed_desc_t *desc_table = - va_arg (*args, vnet_virtio_vring_packed_desc_t *); - int idx = va_arg (*args, int); - u32 *mem_hint = va_arg (*args, u32 *); - - s = format (s, fmt, idx, desc_table[idx].addr, desc_table[idx].len, - desc_table[idx].flags, desc_table[idx].id, - pointer_to_uword (map_guest_mem (vui, desc_table[idx].addr, - mem_hint))); - return s; -} - -static u8 * -format_vhost_user_event_idx_flags (u8 * s, va_list * args) -{ - u32 flags = va_arg (*args, u32); - typedef struct - { - u8 value; - char *str; - } event_idx_flags; - static event_idx_flags event_idx_array[] = { -#define _(s,v) { .str = #s, .value = v, }, - foreach_virtio_event_idx_flags -#undef _ - }; - u32 num_entries = sizeof (event_idx_array) / sizeof (event_idx_flags); - - if (flags < num_entries) - s = format (s, "%s", event_idx_array[flags].str); - else - s = format (s, "%u", flags); - return s; -} - -static void -vhost_user_show_desc_packed (vlib_main_t * vm, vhost_user_intf_t * vui, int q, - int show_descr, int show_verbose) -{ - int j; - u32 mem_hint = 0; - u32 idx; - u32 n_entries; - vnet_virtio_vring_packed_desc_t *desc_table; - vhost_user_vring_t *vq = &vui->vrings[q]; - u16 off_wrap, event_idx; - - off_wrap = vq->avail_event->off_wrap; - event_idx = off_wrap & 0x7fff; - vlib_cli_output (vm, " avail_event.flags %U avail_event.off_wrap %u " - "avail event idx %u\n", format_vhost_user_event_idx_flags, - (u32) vq->avail_event->flags, off_wrap, event_idx); - - off_wrap = vq->used_event->off_wrap; - event_idx = off_wrap & 0x7fff; - vlib_cli_output (vm, " used_event.flags %U used_event.off_wrap %u " - "used event idx %u\n", format_vhost_user_event_idx_flags, - (u32) vq->used_event->flags, off_wrap, event_idx); - - vlib_cli_output (vm, " avail wrap counter %u, used wrap counter %u\n", - vq->avail_wrap_counter, vq->used_wrap_counter); - - vhost_user_show_fds (vm, vq); - - if (show_descr) - { - vlib_cli_output (vm, "\n descriptor table:\n"); - vlib_cli_output (vm, - " slot addr len flags id " - "user_addr\n"); - vlib_cli_output (vm, - " ===== ================== ===== ====== ===== " - "==================\n"); - for (j = 0; j < vq->qsz_mask + 1; j++) - { - desc_table = vq->packed_desc; - vlib_cli_output (vm, "%U", format_vhost_user_packed_desc, - " %-5u 0x%016lx %-5u 0x%04x %-5u 0x%016lx\n", vui, - desc_table, j, &mem_hint); - if (show_verbose && (desc_table[j].flags & VRING_DESC_F_INDIRECT)) - { - n_entries = desc_table[j].len >> 4; - desc_table = map_guest_mem (vui, desc_table[j].addr, &mem_hint); - if (desc_table) - { - for (idx = 0; idx < clib_min (20, n_entries); idx++) - { - vlib_cli_output - (vm, "%U", format_vhost_user_packed_desc, - "> %-4u 0x%016lx %-5u 0x%04x %-5u 0x%016lx\n", vui, - desc_table, idx, &mem_hint); - } - if (n_entries >= 20) - vlib_cli_output (vm, "Skip displaying entries 20...%u\n", - n_entries); - } - } - } - } -} - -clib_error_t * -show_vhost_user_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - clib_error_t *error = 0; - vnet_main_t *vnm = vnet_get_main (); - vhost_user_main_t *vum = &vhost_user_main; - vhost_user_intf_t *vui; - u32 hw_if_index, *hw_if_indices = 0; - vnet_hw_interface_t *hi; - u16 qid; - int i, j, q; - int show_descr = 0; - int show_verbose = 0; - struct feat_struct - { - u8 bit; - char *str; - }; - struct feat_struct *feat_entry; - - static struct feat_struct feat_array[] = { -#define _(s,b) { .str = #s, .bit = b, }, - foreach_virtio_net_features -#undef _ - {.str = NULL} - }; - -#define foreach_protocol_feature \ - _(VHOST_USER_PROTOCOL_F_MQ) \ - _(VHOST_USER_PROTOCOL_F_LOG_SHMFD) - - static struct feat_struct proto_feat_array[] = { -#define _(s) { .str = #s, .bit = s}, - foreach_protocol_feature -#undef _ - {.str = NULL} - }; - - while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) - { - if (unformat - (input, "%U", unformat_vnet_hw_interface, vnm, &hw_if_index)) - { - hi = vnet_get_hw_interface (vnm, hw_if_index); - if (vhost_user_device_class.index != hi->dev_class_index) - { - error = clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); - goto done; - } - vec_add1 (hw_if_indices, hw_if_index); - } - else if (unformat (input, "descriptors") || unformat (input, "desc")) - show_descr = 1; - else if (unformat (input, "verbose")) - show_verbose = 1; - else - { - error = clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); - goto done; - } - } - if (vec_len (hw_if_indices) == 0) - { - pool_foreach (vui, vum->vhost_user_interfaces) - vec_add1 (hw_if_indices, vui->hw_if_index); - } - vlib_cli_output (vm, "Virtio vhost-user interfaces"); - vlib_cli_output (vm, "Global:\n coalesce frames %d time %e", - vum->coalesce_frames, vum->coalesce_time); - vlib_cli_output (vm, " Number of rx virtqueues in interrupt mode: %d", - vum->ifq_count); - vlib_cli_output (vm, " Number of GSO interfaces: %d", vum->gso_count); - for (u32 tid = 0; tid <= vlib_num_workers (); tid++) - { - vhost_cpu_t *cpu = vec_elt_at_index (vum->cpus, tid); - vlib_cli_output (vm, " Thread %u: Polling queue count %u", tid, - cpu->polling_q_count); - } - - for (i = 0; i < vec_len (hw_if_indices); i++) - { - hi = vnet_get_hw_interface (vnm, hw_if_indices[i]); - vui = pool_elt_at_index (vum->vhost_user_interfaces, hi->dev_instance); - vlib_cli_output (vm, "Interface: %U (ifindex %d)", - format_vnet_hw_if_index_name, vnm, hw_if_indices[i], - hw_if_indices[i]); - vlib_cli_output (vm, " Number of qids %u", vui->num_qid); - if (vui->enable_gso) - vlib_cli_output (vm, " GSO enable"); - if (vui->enable_packed) - vlib_cli_output (vm, " Packed ring enable"); - if (vui->enable_event_idx) - vlib_cli_output (vm, " Event index enable"); - - vlib_cli_output (vm, "virtio_net_hdr_sz %d\n" - " features mask (0x%llx): \n" - " features (0x%llx): \n", - vui->virtio_net_hdr_sz, vui->feature_mask, - vui->features); - - feat_entry = (struct feat_struct *) &feat_array; - while (feat_entry->str) - { - if (vui->features & (1ULL << feat_entry->bit)) - vlib_cli_output (vm, " %s (%d)", feat_entry->str, - feat_entry->bit); - feat_entry++; - } - - vlib_cli_output (vm, " protocol features (0x%llx)", - vui->protocol_features); - feat_entry = (struct feat_struct *) &proto_feat_array; - while (feat_entry->str) - { - if (vui->protocol_features & (1ULL << feat_entry->bit)) - vlib_cli_output (vm, " %s (%d)", feat_entry->str, - feat_entry->bit); - feat_entry++; - } - - vlib_cli_output (vm, "\n"); - - vlib_cli_output (vm, " socket filename %s type %s errno \"%s\"\n\n", - vui->sock_filename, - (vui->unix_server_index != ~0) ? "server" : "client", - strerror (vui->sock_errno)); - - vlib_cli_output (vm, " rx placement: "); - - FOR_ALL_VHOST_TXQ (qid, vui) - { - vhost_user_vring_t *txvq = &vui->vrings[qid]; - - if (txvq->qid == -1) - continue; - vlib_cli_output (vm, " thread %d on vring %d, %U\n", - txvq->thread_index, qid, format_vnet_hw_if_rx_mode, - txvq->mode); - } - - vlib_cli_output (vm, " tx placement\n"); - - FOR_ALL_VHOST_RXQ (qid, vui) - { - vhost_user_vring_t *rxvq = &vui->vrings[qid]; - vnet_hw_if_tx_queue_t *txq; - - if (rxvq->queue_index == ~0) - continue; - txq = vnet_hw_if_get_tx_queue (vnm, rxvq->queue_index); - if (txq->threads) - vlib_cli_output (vm, " threads %U on vring %u: %s\n", - format_bitmap_list, txq->threads, qid, - txq->shared_queue ? "spin-lock" : "lock-free"); - } - - vlib_cli_output (vm, "\n"); - - vlib_cli_output (vm, " Memory regions (total %d)\n", vui->nregions); - - if (vui->nregions) - { - vlib_cli_output (vm, - " region fd guest_phys_addr memory_size userspace_addr mmap_offset mmap_addr\n"); - vlib_cli_output (vm, - " ====== ===== ================== ================== ================== ================== ==================\n"); - } - for (j = 0; j < vui->nregions; j++) - { - vlib_cli_output (vm, - " %d %-5d 0x%016lx 0x%016lx 0x%016lx 0x%016lx 0x%016lx\n", - j, vui->region_mmap_fd[j], - vui->regions[j].guest_phys_addr, - vui->regions[j].memory_size, - vui->regions[j].userspace_addr, - vui->regions[j].mmap_offset, - pointer_to_uword (vui->region_mmap_addr[j])); - } - FOR_ALL_VHOST_RX_TXQ (q, vui) - { - if (!vui->vrings[q].started) - continue; - - vlib_cli_output (vm, "\n Virtqueue %d (%s%s)\n", q, - (q & 1) ? "RX" : "TX", - vui->vrings[q].enabled ? "" : " disabled"); - vlib_cli_output (vm, " global %s queue index %u\n", - (q & 1) ? "RX" : "TX", vui->vrings[q].queue_index); - - vlib_cli_output ( - vm, - " qsz %d last_avail_idx %d last_used_idx %d" - " last_kick %u\n", - vui->vrings[q].qsz_mask + 1, vui->vrings[q].last_avail_idx, - vui->vrings[q].last_used_idx, vui->vrings[q].last_kick); - - if (vhost_user_is_packed_ring_supported (vui)) - vhost_user_show_desc_packed (vm, vui, q, show_descr, show_verbose); - else - vhost_user_show_desc (vm, vui, q, show_descr, show_verbose); - } - vlib_cli_output (vm, "\n"); - } -done: - vec_free (hw_if_indices); - return error; -} - -/* - * CLI functions - */ - -/*? - * Create a vHost User interface. Once created, a new virtual interface - * will exist with the name '<em>VirtualEthernet0/0/x</em>', where '<em>x</em>' - * is the next free index. - * - * There are several parameters associated with a vHost interface: - * - * - <b>socket <socket-filename></b> - Name of the linux socket used by - * hypervisor and VPP to manage the vHost interface. If in <em>server</em> - * mode, VPP will create the socket if it does not already exist. If in - * <em>client</em> mode, hypervisor will create the socket if it does not - * already exist. The VPP code is indifferent to the file location. However, - * if SELinux is enabled, then the socket needs to be created in - * <em>/var/run/vpp/</em>. - * - * - <b>server</b> - Optional flag to indicate that VPP should be the server - * for the linux socket. If not provided, VPP will be the client. In - * <em>server</em> mode, the VM can be reset without tearing down the vHost - * Interface. In <em>client</em> mode, VPP can be reset without bringing down - * the VM and tearing down the vHost Interface. - * - * - <b>feature-mask <hex></b> - Optional virtio/vhost feature set negotiated - * at startup. <b>This is intended for degugging only.</b> It is recommended - * that this parameter not be used except by experienced users. By default, - * all supported features will be advertised. Otherwise, provide the set of - * features desired. - * - 0x000008000 (15) - VIRTIO_NET_F_MRG_RXBUF - * - 0x000020000 (17) - VIRTIO_NET_F_CTRL_VQ - * - 0x000200000 (21) - VIRTIO_NET_F_GUEST_ANNOUNCE - * - 0x000400000 (22) - VIRTIO_NET_F_MQ - * - 0x004000000 (26) - VHOST_F_LOG_ALL - * - 0x008000000 (27) - VIRTIO_F_ANY_LAYOUT - * - 0x010000000 (28) - VIRTIO_F_INDIRECT_DESC - * - 0x040000000 (30) - VHOST_USER_F_PROTOCOL_FEATURES - * - 0x100000000 (32) - VIRTIO_F_VERSION_1 - * - * - <b>hwaddr <mac-addr></b> - Optional ethernet address, can be in either - * X:X:X:X:X:X unix or X.X.X cisco format. - * - * - <b>renumber <dev_instance></b> - Optional parameter which allows the - * instance in the name to be specified. If instance already exists, name - * will be used anyway and multiple instances will have the same name. Use - * with caution. - * - * @cliexpar - * Example of how to create a vhost interface with VPP as the client and all - * features enabled: - * @cliexstart{create vhost-user socket /var/run/vpp/vhost1.sock} - * VirtualEthernet0/0/0 - * @cliexend - * Example of how to create a vhost interface with VPP as the server and with - * just multiple queues enabled: - * @cliexstart{create vhost-user socket /var/run/vpp/vhost2.sock server - * feature-mask 0x40400000} - * VirtualEthernet0/0/1 - * @cliexend - * Once the vHost interface is created, enable the interface using: - * @cliexcmd{set interface state VirtualEthernet0/0/0 up} -?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (vhost_user_connect_command, static) = { - .path = "create vhost-user", - .short_help = "create vhost-user socket <socket-filename> [server] " - "[feature-mask <hex>] [hwaddr <mac-addr>] [renumber <dev_instance>] [gso] " - "[packed] [event-idx]", - .function = vhost_user_connect_command_fn, - .is_mp_safe = 1, -}; -/* *INDENT-ON* */ - -/*? - * Delete a vHost User interface using the interface name or the - * software interface index. Use the '<em>show interface</em>' - * command to determine the software interface index. On deletion, - * the linux socket will not be deleted. - * - * @cliexpar - * Example of how to delete a vhost interface by name: - * @cliexcmd{delete vhost-user VirtualEthernet0/0/1} - * Example of how to delete a vhost interface by software interface index: - * @cliexcmd{delete vhost-user sw_if_index 1} -?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (vhost_user_delete_command, static) = { - .path = "delete vhost-user", - .short_help = "delete vhost-user {<interface> | sw_if_index <sw_idx>}", - .function = vhost_user_delete_command_fn, -}; - -/*? - * Display the attributes of a single vHost User interface (provide interface - * name), multiple vHost User interfaces (provide a list of interface names - * separated by spaces) or all Vhost User interfaces (omit an interface name - * to display all vHost interfaces). - * - * @cliexpar - * @parblock - * Example of how to display a vhost interface: - * @cliexstart{show vhost-user VirtualEthernet0/0/0} - * Virtio vhost-user interfaces - * Global: - * coalesce frames 32 time 1e-3 - * Interface: VirtualEthernet0/0/0 (ifindex 1) - * virtio_net_hdr_sz 12 - * features mask (0xffffffffffffffff): - * features (0x50408000): - * VIRTIO_NET_F_MRG_RXBUF (15) - * VIRTIO_NET_F_MQ (22) - * VIRTIO_F_INDIRECT_DESC (28) - * VHOST_USER_F_PROTOCOL_FEATURES (30) - * protocol features (0x3) - * VHOST_USER_PROTOCOL_F_MQ (0) - * VHOST_USER_PROTOCOL_F_LOG_SHMFD (1) - * - * socket filename /var/run/vpp/vhost1.sock type client errno "Success" - * - * rx placement: - * thread 1 on vring 1 - * thread 1 on vring 5 - * thread 2 on vring 3 - * thread 2 on vring 7 - * tx placement: spin-lock - * thread 0 on vring 0 - * thread 1 on vring 2 - * thread 2 on vring 0 - * - * Memory regions (total 2) - * region fd guest_phys_addr memory_size userspace_addr mmap_offset mmap_addr - * ====== == =============== =========== ============== =========== ========== - * 0 60 0x00000000 0x000a0000 0xaac00000 0x00000000 0x2b400000 - * 1 61 0x000c0000 0x3ff40000 0xaacc0000 0x000c0000 0xabcc0000 - * - * Virtqueue 0 (TX) - * qsz 256 last_avail_idx 0 last_used_idx 0 - * avail.flags 1 avail.idx 128 used.flags 1 used.idx 0 - * kickfd 62 callfd 64 errfd -1 - * - * Virtqueue 1 (RX) - * qsz 256 last_avail_idx 0 last_used_idx 0 - * avail.flags 1 avail.idx 0 used.flags 1 used.idx 0 - * kickfd 65 callfd 66 errfd -1 - * - * Virtqueue 2 (TX) - * qsz 256 last_avail_idx 0 last_used_idx 0 - * avail.flags 1 avail.idx 128 used.flags 1 used.idx 0 - * kickfd 63 callfd 70 errfd -1 - * - * Virtqueue 3 (RX) - * qsz 256 last_avail_idx 0 last_used_idx 0 - * avail.flags 1 avail.idx 0 used.flags 1 used.idx 0 - * kickfd 72 callfd 74 errfd -1 - * - * Virtqueue 4 (TX disabled) - * qsz 256 last_avail_idx 0 last_used_idx 0 - * avail.flags 1 avail.idx 0 used.flags 1 used.idx 0 - * kickfd 76 callfd 78 errfd -1 - * - * Virtqueue 5 (RX disabled) - * qsz 256 last_avail_idx 0 last_used_idx 0 - * avail.flags 1 avail.idx 0 used.flags 1 used.idx 0 - * kickfd 80 callfd 82 errfd -1 - * - * Virtqueue 6 (TX disabled) - * qsz 256 last_avail_idx 0 last_used_idx 0 - * avail.flags 1 avail.idx 0 used.flags 1 used.idx 0 - * kickfd 84 callfd 86 errfd -1 - * - * Virtqueue 7 (RX disabled) - * qsz 256 last_avail_idx 0 last_used_idx 0 - * avail.flags 1 avail.idx 0 used.flags 1 used.idx 0 - * kickfd 88 callfd 90 errfd -1 - * - * @cliexend - * - * The optional '<em>descriptors</em>' parameter will display the same output - * as the previous example but will include the descriptor table for each - * queue. - * The output is truncated below: - * @cliexstart{show vhost-user VirtualEthernet0/0/0 descriptors} - * Virtio vhost-user interfaces - * Global: - * coalesce frames 32 time 1e-3 - * Interface: VirtualEthernet0/0/0 (ifindex 1) - * virtio_net_hdr_sz 12 - * features mask (0xffffffffffffffff): - * features (0x50408000): - * VIRTIO_NET_F_MRG_RXBUF (15) - * VIRTIO_NET_F_MQ (22) - * : - * Virtqueue 0 (TX) - * qsz 256 last_avail_idx 0 last_used_idx 0 - * avail.flags 1 avail.idx 128 used.flags 1 used.idx 0 - * kickfd 62 callfd 64 errfd -1 - * - * descriptor table: - * id addr len flags next user_addr - * ===== ================== ===== ====== ===== ================== - * 0 0x0000000010b6e974 2060 0x0002 1 0x00002aabbc76e974 - * 1 0x0000000010b6e034 2060 0x0002 2 0x00002aabbc76e034 - * 2 0x0000000010b6d6f4 2060 0x0002 3 0x00002aabbc76d6f4 - * 3 0x0000000010b6cdb4 2060 0x0002 4 0x00002aabbc76cdb4 - * 4 0x0000000010b6c474 2060 0x0002 5 0x00002aabbc76c474 - * 5 0x0000000010b6bb34 2060 0x0002 6 0x00002aabbc76bb34 - * 6 0x0000000010b6b1f4 2060 0x0002 7 0x00002aabbc76b1f4 - * 7 0x0000000010b6a8b4 2060 0x0002 8 0x00002aabbc76a8b4 - * 8 0x0000000010b69f74 2060 0x0002 9 0x00002aabbc769f74 - * 9 0x0000000010b69634 2060 0x0002 10 0x00002aabbc769634 - * 10 0x0000000010b68cf4 2060 0x0002 11 0x00002aabbc768cf4 - * : - * 249 0x0000000000000000 0 0x0000 250 0x00002aab2b400000 - * 250 0x0000000000000000 0 0x0000 251 0x00002aab2b400000 - * 251 0x0000000000000000 0 0x0000 252 0x00002aab2b400000 - * 252 0x0000000000000000 0 0x0000 253 0x00002aab2b400000 - * 253 0x0000000000000000 0 0x0000 254 0x00002aab2b400000 - * 254 0x0000000000000000 0 0x0000 255 0x00002aab2b400000 - * 255 0x0000000000000000 0 0x0000 32768 0x00002aab2b400000 - * - * Virtqueue 1 (RX) - * qsz 256 last_avail_idx 0 last_used_idx 0 - * : - * @cliexend - * @endparblock -?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (show_vhost_user_command, static) = { - .path = "show vhost-user", - .short_help = "show vhost-user [<interface> [<interface> [..]]] " - "[[descriptors] [verbose]]", - .function = show_vhost_user_command_fn, -}; -/* *INDENT-ON* */ - - -static clib_error_t * -vhost_user_config (vlib_main_t * vm, unformat_input_t * input) -{ - vhost_user_main_t *vum = &vhost_user_main; - - while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (input, "coalesce-frames %d", &vum->coalesce_frames)) - ; - else if (unformat (input, "coalesce-time %f", &vum->coalesce_time)) - ; - else if (unformat (input, "dont-dump-memory")) - vum->dont_dump_vhost_user_memory = 1; - else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); - } - - return 0; -} - -/* vhost-user { ... } configuration. */ -VLIB_CONFIG_FUNCTION (vhost_user_config, "vhost-user"); - -void -vhost_user_unmap_all (void) -{ - vhost_user_main_t *vum = &vhost_user_main; - vhost_user_intf_t *vui; - - if (vum->dont_dump_vhost_user_memory) - { - pool_foreach (vui, vum->vhost_user_interfaces) - unmap_all_mem_regions (vui); - } -} - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/virtio/vhost_user.h b/src/vnet/devices/virtio/vhost_user.h deleted file mode 100644 index f44951e030a..00000000000 --- a/src/vnet/devices/virtio/vhost_user.h +++ /dev/null @@ -1,388 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef __VIRTIO_VHOST_USER_H__ -#define __VIRTIO_VHOST_USER_H__ - -#include <vnet/devices/virtio/virtio_std.h> -#include <vnet/devices/virtio/vhost_std.h> - -/* vhost-user data structures */ - -#define VHOST_MEMORY_MAX_NREGIONS 8 -#define VHOST_USER_MSG_HDR_SZ 12 -#define VHOST_VRING_INIT_MQ_PAIR_SZ 8 //8TX + 8RX - -/* - * qid is one byte in size in the spec. Please see VHOST_USER_SET_VRING_CALL, - * VHOST_USER_SET_VRING_KICK, and VHOST_USER_SET_VRING_ERR. - * The max number for q pair is naturally 128. - */ -#define VHOST_VRING_MAX_MQ_PAIR_SZ 128 -#define VHOST_VRING_IDX_RX(qid) (2 * (qid)) -#define VHOST_VRING_IDX_TX(qid) (2 * (qid) + 1) - -#define VHOST_USER_VRING_NOFD_MASK 0x100 - -#define VHOST_USER_PROTOCOL_F_MQ 0 -#define VHOST_USER_PROTOCOL_F_LOG_SHMFD 1 -#define VHOST_VRING_F_LOG 0 - -#define VHOST_USER_PROTOCOL_FEATURES ((1ULL << VHOST_USER_PROTOCOL_F_MQ) | \ - (1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD)) - -#define vu_log_debug(dev, f, ...) \ -{ \ - vlib_log(VLIB_LOG_LEVEL_DEBUG, vhost_user_main.log_default, "%U: " f, \ - format_vnet_hw_if_index_name, vnet_get_main(), \ - dev->hw_if_index, ##__VA_ARGS__); \ -}; - -#define vu_log_warn(dev, f, ...) \ -{ \ - vlib_log(VLIB_LOG_LEVEL_WARNING, vhost_user_main.log_default, "%U: " f, \ - format_vnet_hw_if_index_name, vnet_get_main(), \ - dev->hw_if_index, ##__VA_ARGS__); \ -}; -#define vu_log_err(dev, f, ...) \ -{ \ - vlib_log(VLIB_LOG_LEVEL_ERR, vhost_user_main.log_default, "%U: " f, \ - format_vnet_hw_if_index_name, vnet_get_main(), \ - dev->hw_if_index, ##__VA_ARGS__); \ -}; - -#define UNIX_GET_FD(unixfd_idx) ({ \ - typeof(unixfd_idx) __unixfd_idx = (unixfd_idx); \ - (__unixfd_idx != ~0) ? \ - pool_elt_at_index (file_main.file_pool, \ - __unixfd_idx)->file_descriptor : -1; }) - -#define foreach_virtio_trace_flags \ - _ (SIMPLE_CHAINED, 0, "Simple descriptor chaining") \ - _ (SINGLE_DESC, 1, "Single descriptor packet") \ - _ (INDIRECT, 2, "Indirect descriptor") \ - _ (MAP_ERROR, 4, "Memory mapping error") - -typedef enum -{ -#define _(n,i,s) VIRTIO_TRACE_F_##n, - foreach_virtio_trace_flags -#undef _ -} virtio_trace_flag_t; - -#define FEATURE_VIRTIO_NET_F_HOST_TSO_FEATURE_BITS \ - (VIRTIO_FEATURE (VIRTIO_NET_F_CSUM) | \ - VIRTIO_FEATURE (VIRTIO_NET_F_HOST_UFO) | \ - VIRTIO_FEATURE (VIRTIO_NET_F_HOST_TSO4) | \ - VIRTIO_FEATURE (VIRTIO_NET_F_HOST_TSO6)) - -#define FEATURE_VIRTIO_NET_F_GUEST_TSO_FEATURE_BITS \ - (VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_CSUM) | \ - VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_UFO) | \ - VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_TSO4) | \ - VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_TSO6)) - -#define FEATURE_VIRTIO_NET_F_HOST_GUEST_TSO_FEATURE_BITS \ - (FEATURE_VIRTIO_NET_F_HOST_TSO_FEATURE_BITS | \ - FEATURE_VIRTIO_NET_F_GUEST_TSO_FEATURE_BITS) - - -typedef struct -{ - char *sock_filename; - u64 feature_mask; - u32 custom_dev_instance; - u8 hwaddr[6]; - u8 renumber; - u8 is_server; - u8 enable_gso; - u8 enable_packed; - u8 enable_event_idx; - u8 use_custom_mac; - - /* return */ - u32 sw_if_index; -} vhost_user_create_if_args_t; - -int vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm, - vhost_user_create_if_args_t * args); -int vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm, - vhost_user_create_if_args_t * args); -int vhost_user_delete_if (vnet_main_t * vnm, vlib_main_t * vm, - u32 sw_if_index); - -/* *INDENT-OFF* */ -typedef struct vhost_user_memory_region -{ - u64 guest_phys_addr; - u64 memory_size; - u64 userspace_addr; - u64 mmap_offset; -} __attribute ((packed)) vhost_user_memory_region_t; - -typedef struct vhost_user_memory -{ - u32 nregions; - u32 padding; - vhost_user_memory_region_t regions[VHOST_MEMORY_MAX_NREGIONS]; -} __attribute ((packed)) vhost_user_memory_t; - -typedef enum vhost_user_req -{ - VHOST_USER_NONE = 0, - VHOST_USER_GET_FEATURES = 1, - VHOST_USER_SET_FEATURES = 2, - VHOST_USER_SET_OWNER = 3, - VHOST_USER_RESET_OWNER = 4, - VHOST_USER_SET_MEM_TABLE = 5, - VHOST_USER_SET_LOG_BASE = 6, - VHOST_USER_SET_LOG_FD = 7, - VHOST_USER_SET_VRING_NUM = 8, - VHOST_USER_SET_VRING_ADDR = 9, - VHOST_USER_SET_VRING_BASE = 10, - VHOST_USER_GET_VRING_BASE = 11, - VHOST_USER_SET_VRING_KICK = 12, - VHOST_USER_SET_VRING_CALL = 13, - VHOST_USER_SET_VRING_ERR = 14, - VHOST_USER_GET_PROTOCOL_FEATURES = 15, - VHOST_USER_SET_PROTOCOL_FEATURES = 16, - VHOST_USER_GET_QUEUE_NUM = 17, - VHOST_USER_SET_VRING_ENABLE = 18, - VHOST_USER_MAX -} vhost_user_req_t; - -typedef struct vhost_user_msg { - vhost_user_req_t request; - u32 flags; - u32 size; - union - { - u64 u64; - vhost_vring_state_t state; - vhost_vring_addr_t addr; - vhost_user_memory_t memory; - vhost_user_log_t log; - }; -} __attribute ((packed)) vhost_user_msg_t; -/* *INDENT-ON* */ - -typedef struct -{ - CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); - u16 qsz_mask; - u16 last_avail_idx; - u16 last_used_idx; - u16 n_since_last_int; - union - { - vnet_virtio_vring_desc_t *desc; - vnet_virtio_vring_packed_desc_t *packed_desc; - }; - union - { - vnet_virtio_vring_avail_t *avail; - vnet_virtio_vring_desc_event_t *avail_event; - }; - union - { - vnet_virtio_vring_used_t *used; - vnet_virtio_vring_desc_event_t *used_event; - }; - uword desc_user_addr; - uword used_user_addr; - uword avail_user_addr; - f64 int_deadline; - u8 started; - u8 enabled; - u8 log_used; - clib_spinlock_t vring_lock; - - //Put non-runtime in a different cache line - CLIB_CACHE_LINE_ALIGN_MARK (cacheline1); - int errfd; - u32 callfd_idx; - u32 kickfd_idx; - u64 log_guest_addr; - - /* The rx queue policy (interrupt/adaptive/polling) for this queue */ - u32 mode; - - /* - * It contains the device queue number. -1 if it does not. The idea is - * to not invoke vnet_hw_interface_assign_rx_thread and - * vnet_hw_interface_unassign_rx_thread more than once for the duration of - * the interface even if it is disconnected and reconnected. - */ - i16 qid; - - u16 used_wrap_counter; - u16 avail_wrap_counter; - u16 last_kick; - u8 first_kick; - u32 queue_index; - u32 thread_index; -} vhost_user_vring_t; - -#define VHOST_USER_EVENT_START_TIMER 1 -#define VHOST_USER_EVENT_STOP_TIMER 2 - -typedef struct -{ - CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); - u32 is_ready; - u32 admin_up; - u32 unix_server_index; - u32 clib_file_index; - char sock_filename[256]; - int sock_errno; - uword if_index; - u32 hw_if_index, sw_if_index; - - //Feature negotiation - u64 features; - u64 feature_mask; - u64 protocol_features; - - //Memory region information - u32 nregions; - vhost_user_memory_region_t regions[VHOST_MEMORY_MAX_NREGIONS]; - void *region_mmap_addr[VHOST_MEMORY_MAX_NREGIONS]; - u64 region_guest_addr_lo[VHOST_MEMORY_MAX_NREGIONS]; - u64 region_guest_addr_hi[VHOST_MEMORY_MAX_NREGIONS]; - u32 region_mmap_fd[VHOST_MEMORY_MAX_NREGIONS]; - - //Virtual rings - vhost_user_vring_t *vrings; - - /* - * vrings is a dynamic array. It may have more elements than it is - * currently used. num_qid indicates the current total qid's in the - * vrings. For example, vec_len(vrings) = 64, num_qid = 60, so the - * current valid/used qid is (0, 59) in the vrings array. - */ - u32 num_qid; - - int virtio_net_hdr_sz; - int is_any_layout; - - void *log_base_addr; - u64 log_size; - - u8 enable_gso; - - /* Packed ring configured */ - u8 enable_packed; - - u8 enable_event_idx; -} vhost_user_intf_t; - -#define FOR_ALL_VHOST_TXQ(qid, vui) for (qid = 1; qid < vui->num_qid; qid += 2) - -#define FOR_ALL_VHOST_RXQ(qid, vui) for (qid = 0; qid < vui->num_qid; qid += 2) - -#define FOR_ALL_VHOST_RX_TXQ(qid, vui) for (qid = 0; qid < vui->num_qid; qid++) - -typedef struct -{ - uword dst; - uword src; - u32 len; -} vhost_copy_t; - -typedef struct -{ - u16 qid; /** The interface queue index (Not the virtio vring idx) */ - u16 device_index; /** The device index */ - u32 virtio_ring_flags; /** Runtime queue flags **/ - u16 first_desc_len; /** Length of the first data descriptor **/ - vnet_virtio_net_hdr_mrg_rxbuf_t hdr; /** Virtio header **/ -} vhost_trace_t; - -#define VHOST_USER_RX_BUFFERS_N (2 * VLIB_FRAME_SIZE + 2) -#define VHOST_USER_COPY_ARRAY_N (4 * VLIB_FRAME_SIZE) - -typedef struct -{ - u32 rx_buffers_len; - u32 rx_buffers[VHOST_USER_RX_BUFFERS_N]; - - vnet_virtio_net_hdr_mrg_rxbuf_t tx_headers[VLIB_FRAME_SIZE]; - vhost_copy_t copy[VHOST_USER_COPY_ARRAY_N]; - - /* This is here so it doesn't end-up - * using stack or registers. */ - vhost_trace_t *current_trace; - - u32 *to_next_list; - vlib_buffer_t **rx_buffers_pdesc; - u32 polling_q_count; -} vhost_cpu_t; - -typedef struct -{ - mhash_t if_index_by_sock_name; - u32 mtu_bytes; - vhost_user_intf_t *vhost_user_interfaces; - u32 *show_dev_instance_by_real_dev_instance; - u32 coalesce_frames; - f64 coalesce_time; - int dont_dump_vhost_user_memory; - - /** Per-CPU data for vhost-user */ - vhost_cpu_t *cpus; - - /** Pseudo random iterator */ - u32 random; - - /* The number of rx interface/queue pairs in interrupt mode */ - u32 ifq_count; - - /* logging */ - vlib_log_class_t log_default; - - /* gso interface count */ - u32 gso_count; -} vhost_user_main_t; - -typedef struct -{ - u8 if_name[64]; - u32 sw_if_index; - u32 virtio_net_hdr_sz; - u64 features; - u8 is_server; - u8 sock_filename[256]; - u32 num_regions; - int sock_errno; -} vhost_user_intf_details_t; - -int vhost_user_dump_ifs (vnet_main_t * vnm, vlib_main_t * vm, - vhost_user_intf_details_t ** out_vuids); -void vhost_user_set_operation_mode (vhost_user_intf_t *vui, - vhost_user_vring_t *txvq); - -extern vlib_node_registration_t vhost_user_send_interrupt_node; -extern vnet_device_class_t vhost_user_device_class; -extern vlib_node_registration_t vhost_user_input_node; -extern vhost_user_main_t vhost_user_main; - -#endif - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/virtio/vhost_user_api.c b/src/vnet/devices/virtio/vhost_user_api.c deleted file mode 100644 index cc1896b108a..00000000000 --- a/src/vnet/devices/virtio/vhost_user_api.c +++ /dev/null @@ -1,352 +0,0 @@ -/* - *------------------------------------------------------------------ - * vhost-user_api.c - vhost-user api - * - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *------------------------------------------------------------------ - */ - -#include <vnet/vnet.h> -#include <vlibmemory/api.h> - -#include <vnet/interface.h> -#include <vnet/api_errno.h> -#include <vnet/devices/virtio/vhost_user.h> -#include <vnet/ethernet/ethernet.h> -#include <vnet/ethernet/ethernet_types_api.h> -#include <vnet/devices/virtio/virtio_types_api.h> - -#include <vnet/format_fns.h> -#include <vnet/devices/virtio/vhost_user.api_enum.h> -#include <vnet/devices/virtio/vhost_user.api_types.h> - -#define REPLY_MSG_ID_BASE msg_id_base -#include <vlibapi/api_helper_macros.h> - -static u16 msg_id_base; - -static void -vl_api_create_vhost_user_if_t_handler (vl_api_create_vhost_user_if_t * mp) -{ - int rv = 0; - vl_api_create_vhost_user_if_reply_t *rmp; - vnet_main_t *vnm = vnet_get_main (); - vlib_main_t *vm = vlib_get_main (); - u64 disabled_features = (u64) (0ULL); - vhost_user_create_if_args_t args = { 0 }; - - args.sw_if_index = (u32) ~ 0; - args.feature_mask = (u64) ~ (0ULL); - if (mp->disable_mrg_rxbuf) - disabled_features = VIRTIO_FEATURE (VIRTIO_NET_F_MRG_RXBUF); - - if (mp->disable_indirect_desc) - disabled_features |= VIRTIO_FEATURE (VIRTIO_RING_F_INDIRECT_DESC); - - /* - * GSO and PACKED are not supported by feature mask via binary API. We - * disable GSO and PACKED feature in the feature mask. They may be enabled - * explicitly via enable_gso and enable_packed argument - */ - disabled_features |= FEATURE_VIRTIO_NET_F_HOST_GUEST_TSO_FEATURE_BITS | - VIRTIO_FEATURE (VIRTIO_F_RING_PACKED); - - /* EVENT_IDX is disabled by default */ - disabled_features |= VIRTIO_FEATURE (VIRTIO_RING_F_EVENT_IDX); - args.feature_mask &= ~disabled_features; - - if (mp->use_custom_mac) - mac_address_decode (mp->mac_address, (mac_address_t *) args.hwaddr); - - args.use_custom_mac = mp->use_custom_mac; - args.is_server = mp->is_server; - args.sock_filename = (char *) mp->sock_filename; - args.renumber = mp->renumber; - args.custom_dev_instance = ntohl (mp->custom_dev_instance); - args.enable_gso = mp->enable_gso; - args.enable_packed = mp->enable_packed; - rv = vhost_user_create_if (vnm, vm, &args); - - /* Remember an interface tag for the new interface */ - if (rv == 0) - { - /* If a tag was supplied... */ - if (mp->tag[0]) - { - /* Make sure it's a proper C-string */ - mp->tag[ARRAY_LEN (mp->tag) - 1] = 0; - u8 *tag = format (0, "%s%c", mp->tag, 0); - vnet_set_sw_interface_tag (vnm, tag, args.sw_if_index); - } - } - - /* *INDENT-OFF* */ - REPLY_MACRO2(VL_API_CREATE_VHOST_USER_IF_REPLY, - ({ - rmp->sw_if_index = ntohl (args.sw_if_index); - })); - /* *INDENT-ON* */ -} - -static void -vl_api_modify_vhost_user_if_t_handler (vl_api_modify_vhost_user_if_t * mp) -{ - int rv = 0; - vl_api_modify_vhost_user_if_reply_t *rmp; - u64 disabled_features = (u64) (0ULL); - vhost_user_create_if_args_t args = { 0 }; - vnet_main_t *vnm = vnet_get_main (); - vlib_main_t *vm = vlib_get_main (); - - args.feature_mask = (u64) ~ (0ULL); - /* - * GSO and PACKED are not supported by feature mask via binary API. We - * disable GSO and PACKED feature in the feature mask. They may be enabled - * explicitly via enable_gso and enable_packed argument - */ - disabled_features |= FEATURE_VIRTIO_NET_F_HOST_GUEST_TSO_FEATURE_BITS | - VIRTIO_FEATURE (VIRTIO_F_RING_PACKED); - - /* EVENT_IDX is disabled by default */ - disabled_features |= VIRTIO_FEATURE (VIRTIO_RING_F_EVENT_IDX); - args.feature_mask &= ~disabled_features; - - args.sw_if_index = ntohl (mp->sw_if_index); - args.sock_filename = (char *) mp->sock_filename; - args.is_server = mp->is_server; - args.renumber = mp->renumber; - args.custom_dev_instance = ntohl (mp->custom_dev_instance); - args.enable_gso = mp->enable_gso; - args.enable_packed = mp->enable_packed; - rv = vhost_user_modify_if (vnm, vm, &args); - - REPLY_MACRO (VL_API_MODIFY_VHOST_USER_IF_REPLY); -} - -static void -vl_api_create_vhost_user_if_v2_t_handler (vl_api_create_vhost_user_if_v2_t * - mp) -{ - int rv = 0; - vl_api_create_vhost_user_if_v2_reply_t *rmp; - vnet_main_t *vnm = vnet_get_main (); - vlib_main_t *vm = vlib_get_main (); - u64 disabled_features = (u64) (0ULL); - vhost_user_create_if_args_t args = { 0 }; - - args.sw_if_index = (u32) ~ 0; - args.feature_mask = (u64) ~ (0ULL); - if (mp->disable_mrg_rxbuf) - disabled_features = VIRTIO_FEATURE (VIRTIO_NET_F_MRG_RXBUF); - - if (mp->disable_indirect_desc) - disabled_features |= VIRTIO_FEATURE (VIRTIO_RING_F_INDIRECT_DESC); - - /* - * GSO and PACKED are not supported by feature mask via binary API. We - * disable GSO and PACKED feature in the feature mask. They may be enabled - * explicitly via enable_gso and enable_packed argument - */ - disabled_features |= FEATURE_VIRTIO_NET_F_HOST_GUEST_TSO_FEATURE_BITS | - VIRTIO_FEATURE (VIRTIO_F_RING_PACKED); - - /* EVENT_IDX is disabled by default */ - disabled_features |= VIRTIO_FEATURE (VIRTIO_RING_F_EVENT_IDX); - args.feature_mask &= ~disabled_features; - - if (mp->use_custom_mac) - mac_address_decode (mp->mac_address, (mac_address_t *) args.hwaddr); - - args.use_custom_mac = mp->use_custom_mac; - args.is_server = mp->is_server; - args.sock_filename = (char *) mp->sock_filename; - args.renumber = mp->renumber; - args.custom_dev_instance = ntohl (mp->custom_dev_instance); - args.enable_gso = mp->enable_gso; - args.enable_packed = mp->enable_packed; - args.enable_event_idx = mp->enable_event_idx; - rv = vhost_user_create_if (vnm, vm, &args); - - /* Remember an interface tag for the new interface */ - if (rv == 0) - { - /* If a tag was supplied... */ - if (mp->tag[0]) - { - /* Make sure it's a proper C-string */ - mp->tag[ARRAY_LEN (mp->tag) - 1] = 0; - u8 *tag = format (0, "%s%c", mp->tag, 0); - vnet_set_sw_interface_tag (vnm, tag, args.sw_if_index); - } - } - - /* *INDENT-OFF* */ - REPLY_MACRO2(VL_API_CREATE_VHOST_USER_IF_V2_REPLY, - ({ - rmp->sw_if_index = ntohl (args.sw_if_index); - })); - /* *INDENT-ON* */ -} - -static void -vl_api_modify_vhost_user_if_v2_t_handler (vl_api_modify_vhost_user_if_v2_t * - mp) -{ - int rv = 0; - vl_api_modify_vhost_user_if_v2_reply_t *rmp; - u64 disabled_features = (u64) (0ULL); - vhost_user_create_if_args_t args = { 0 }; - vnet_main_t *vnm = vnet_get_main (); - vlib_main_t *vm = vlib_get_main (); - - args.feature_mask = (u64) ~ (0ULL); - /* - * GSO and PACKED are not supported by feature mask via binary API. We - * disable GSO and PACKED feature in the feature mask. They may be enabled - * explicitly via enable_gso and enable_packed argument - */ - disabled_features |= FEATURE_VIRTIO_NET_F_HOST_GUEST_TSO_FEATURE_BITS | - VIRTIO_FEATURE (VIRTIO_F_RING_PACKED); - - /* EVENT_IDX is disabled by default */ - disabled_features |= VIRTIO_FEATURE (VIRTIO_RING_F_EVENT_IDX); - args.feature_mask &= ~disabled_features; - - args.sw_if_index = ntohl (mp->sw_if_index); - args.sock_filename = (char *) mp->sock_filename; - args.is_server = mp->is_server; - args.renumber = mp->renumber; - args.custom_dev_instance = ntohl (mp->custom_dev_instance); - args.enable_gso = mp->enable_gso; - args.enable_packed = mp->enable_packed; - args.enable_event_idx = mp->enable_event_idx; - rv = vhost_user_modify_if (vnm, vm, &args); - - REPLY_MACRO (VL_API_MODIFY_VHOST_USER_IF_V2_REPLY); -} - -static void -vl_api_delete_vhost_user_if_t_handler (vl_api_delete_vhost_user_if_t * mp) -{ - int rv = 0; - vl_api_delete_vhost_user_if_reply_t *rmp; - u32 sw_if_index = ntohl (mp->sw_if_index); - vl_api_registration_t *reg; - - vnet_main_t *vnm = vnet_get_main (); - vlib_main_t *vm = vlib_get_main (); - - rv = vhost_user_delete_if (vnm, vm, sw_if_index); - - REPLY_MACRO (VL_API_DELETE_VHOST_USER_IF_REPLY); - if (!rv) - { - reg = vl_api_client_index_to_registration (mp->client_index); - if (!reg) - return; - - vnet_clear_sw_interface_tag (vnm, sw_if_index); - } -} - -static void -send_sw_interface_vhost_user_details (vpe_api_main_t * am, - vl_api_registration_t * reg, - vhost_user_intf_details_t * vui, - u32 context) -{ - vl_api_sw_interface_vhost_user_details_t *mp; - - mp = vl_msg_api_alloc (sizeof (*mp)); - clib_memset (mp, 0, sizeof (*mp)); - mp->_vl_msg_id = - ntohs (REPLY_MSG_ID_BASE + VL_API_SW_INTERFACE_VHOST_USER_DETAILS); - mp->sw_if_index = ntohl (vui->sw_if_index); - mp->virtio_net_hdr_sz = ntohl (vui->virtio_net_hdr_sz); - virtio_features_encode (vui->features, (u32 *) & mp->features_first_32, - (u32 *) & mp->features_last_32); - mp->is_server = vui->is_server; - mp->num_regions = ntohl (vui->num_regions); - mp->sock_errno = ntohl (vui->sock_errno); - mp->context = context; - - strncpy ((char *) mp->sock_filename, - (char *) vui->sock_filename, ARRAY_LEN (mp->sock_filename) - 1); - strncpy ((char *) mp->interface_name, - (char *) vui->if_name, ARRAY_LEN (mp->interface_name) - 1); - - vl_api_send_msg (reg, (u8 *) mp); -} - -static void - vl_api_sw_interface_vhost_user_dump_t_handler - (vl_api_sw_interface_vhost_user_dump_t * mp) -{ - int rv = 0; - vpe_api_main_t *am = &vpe_api_main; - vnet_main_t *vnm = vnet_get_main (); - vlib_main_t *vm = vlib_get_main (); - vhost_user_intf_details_t *ifaces = NULL; - vhost_user_intf_details_t *vuid = NULL; - vl_api_registration_t *reg; - u32 filter_sw_if_index; - - reg = vl_api_client_index_to_registration (mp->client_index); - if (!reg) - return; - - filter_sw_if_index = htonl (mp->sw_if_index); - if (filter_sw_if_index != ~0) - VALIDATE_SW_IF_INDEX (mp); - - rv = vhost_user_dump_ifs (vnm, vm, &ifaces); - if (rv) - return; - - vec_foreach (vuid, ifaces) - { - if ((filter_sw_if_index == ~0) || - (vuid->sw_if_index == filter_sw_if_index)) - send_sw_interface_vhost_user_details (am, reg, vuid, mp->context); - } - BAD_SW_IF_INDEX_LABEL; - vec_free (ifaces); -} - -#include <vnet/devices/virtio/vhost_user.api.c> -static clib_error_t * -vhost_user_api_hookup (vlib_main_t * vm) -{ - api_main_t *am = vlibapi_get_main (); - /* Mark CREATE_VHOST_USER_IF as mp safe */ - vl_api_set_msg_thread_safe (am, VL_API_CREATE_VHOST_USER_IF, 1); - vl_api_set_msg_thread_safe (am, VL_API_CREATE_VHOST_USER_IF_V2, 1); - - /* - * Set up the (msg_name, crc, message-id) table - */ - REPLY_MSG_ID_BASE = setup_message_id_table (); - - return 0; -} - -VLIB_API_INIT_FUNCTION (vhost_user_api_hookup); - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/virtio/vhost_user_inline.h b/src/vnet/devices/virtio/vhost_user_inline.h deleted file mode 100644 index 8bdff3733a7..00000000000 --- a/src/vnet/devices/virtio/vhost_user_inline.h +++ /dev/null @@ -1,496 +0,0 @@ -/* - * Copyright (c) 2018 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef __VIRTIO_VHOST_USER_INLINE_H__ -#define __VIRTIO_VHOST_USER_INLINE_H__ -/* vhost-user inline functions */ -#include <vppinfra/elog.h> - -static_always_inline void * -map_guest_mem (vhost_user_intf_t * vui, uword addr, u32 * hint) -{ - int i = *hint; - if (PREDICT_TRUE ((vui->regions[i].guest_phys_addr <= addr) && - ((vui->regions[i].guest_phys_addr + - vui->regions[i].memory_size) > addr))) - { - return (void *) (vui->region_mmap_addr[i] + addr - - vui->regions[i].guest_phys_addr); - } -#if __SSE4_2__ - __m128i rl, rh, al, ah, r; - al = _mm_set1_epi64x (addr + 1); - ah = _mm_set1_epi64x (addr); - - rl = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_lo[0]); - rl = _mm_cmpgt_epi64 (al, rl); - rh = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_hi[0]); - rh = _mm_cmpgt_epi64 (rh, ah); - r = _mm_and_si128 (rl, rh); - - rl = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_lo[2]); - rl = _mm_cmpgt_epi64 (al, rl); - rh = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_hi[2]); - rh = _mm_cmpgt_epi64 (rh, ah); - r = _mm_blend_epi16 (r, _mm_and_si128 (rl, rh), 0x22); - - rl = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_lo[4]); - rl = _mm_cmpgt_epi64 (al, rl); - rh = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_hi[4]); - rh = _mm_cmpgt_epi64 (rh, ah); - r = _mm_blend_epi16 (r, _mm_and_si128 (rl, rh), 0x44); - - rl = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_lo[6]); - rl = _mm_cmpgt_epi64 (al, rl); - rh = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_hi[6]); - rh = _mm_cmpgt_epi64 (rh, ah); - r = _mm_blend_epi16 (r, _mm_and_si128 (rl, rh), 0x88); - - r = _mm_shuffle_epi8 (r, _mm_set_epi64x (0, 0x0e060c040a020800)); - i = count_trailing_zeros (_mm_movemask_epi8 (r) | - (1 << VHOST_MEMORY_MAX_NREGIONS)); - - if (i < vui->nregions) - { - *hint = i; - return (void *) (vui->region_mmap_addr[i] + addr - - vui->regions[i].guest_phys_addr); - } -#elif __aarch64__ && __ARM_NEON - uint64x2_t al, ah, rl, rh, r; - uint32_t u32 = 0; - - al = vdupq_n_u64 (addr + 1); - ah = vdupq_n_u64 (addr); - - /*First Iteration */ - rl = vld1q_u64 (&vui->region_guest_addr_lo[0]); - rl = vcgtq_u64 (al, rl); - rh = vld1q_u64 (&vui->region_guest_addr_hi[0]); - rh = vcgtq_u64 (rh, ah); - r = vandq_u64 (rl, rh); - u32 |= (vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 0) & 0x1); - u32 |= ((vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 8) & 0x1) << 1); - - if (u32) - { - i = count_trailing_zeros (u32); - goto vhost_map_guest_mem_done; - } - - /*Second Iteration */ - rl = vld1q_u64 (&vui->region_guest_addr_lo[2]); - rl = vcgtq_u64 (al, rl); - rh = vld1q_u64 (&vui->region_guest_addr_hi[2]); - rh = vcgtq_u64 (rh, ah); - r = vandq_u64 (rl, rh); - u32 |= ((vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 0) & 0x1) << 2); - u32 |= ((vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 8) & 0x1) << 3); - - if (u32) - { - i = count_trailing_zeros (u32); - goto vhost_map_guest_mem_done; - } - - /*Third Iteration */ - rl = vld1q_u64 (&vui->region_guest_addr_lo[4]); - rl = vcgtq_u64 (al, rl); - rh = vld1q_u64 (&vui->region_guest_addr_hi[4]); - rh = vcgtq_u64 (rh, ah); - r = vandq_u64 (rl, rh); - u32 |= ((vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 0) & 0x1) << 6); - u32 |= ((vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 8) & 0x1) << 7); - - i = count_trailing_zeros (u32 | (1 << VHOST_MEMORY_MAX_NREGIONS)); - -vhost_map_guest_mem_done: - if (i < vui->nregions) - { - *hint = i; - return (void *) (vui->region_mmap_addr[i] + addr - - vui->regions[i].guest_phys_addr); - } -#else - for (i = 0; i < vui->nregions; i++) - { - if ((vui->regions[i].guest_phys_addr <= addr) && - ((vui->regions[i].guest_phys_addr + vui->regions[i].memory_size) > - addr)) - { - *hint = i; - return (void *) (vui->region_mmap_addr[i] + addr - - vui->regions[i].guest_phys_addr); - } - } -#endif - /* *INDENT-OFF* */ - ELOG_TYPE_DECLARE (el) = - { - .format = "failed to map guest mem addr %lx", - .format_args = "i8", - }; - /* *INDENT-ON* */ - struct - { - uword addr; - } *ed; - ed = ELOG_DATA (&vlib_global_main.elog_main, el); - ed->addr = addr; - *hint = 0; - return 0; -} - -static_always_inline void * -map_user_mem (vhost_user_intf_t * vui, uword addr) -{ - int i; - for (i = 0; i < vui->nregions; i++) - { - if ((vui->regions[i].userspace_addr <= addr) && - ((vui->regions[i].userspace_addr + vui->regions[i].memory_size) > - addr)) - { - return (void *) (vui->region_mmap_addr[i] + addr - - vui->regions[i].userspace_addr); - } - } - return 0; -} - -#define VHOST_LOG_PAGE 0x1000 - -static_always_inline void -vhost_user_log_dirty_pages_2 (vhost_user_intf_t * vui, - u64 addr, u64 len, u8 is_host_address) -{ - if (PREDICT_TRUE (vui->log_base_addr == 0 - || !(vui->features & VIRTIO_FEATURE (VHOST_F_LOG_ALL)))) - { - return; - } - if (is_host_address) - { - addr = pointer_to_uword (map_user_mem (vui, (uword) addr)); - } - if (PREDICT_FALSE ((addr + len - 1) / VHOST_LOG_PAGE / 8 >= vui->log_size)) - { - vu_log_debug (vui, "vhost_user_log_dirty_pages(): out of range\n"); - return; - } - - CLIB_MEMORY_BARRIER (); - u64 page = addr / VHOST_LOG_PAGE; - while (page * VHOST_LOG_PAGE < addr + len) - { - ((u8 *) vui->log_base_addr)[page / 8] |= 1 << page % 8; - page++; - } -} - -#define vhost_user_log_dirty_ring(vui, vq, member) \ - if (PREDICT_FALSE (vq->log_used)) \ - { \ - vhost_user_log_dirty_pages_2 ( \ - vui, \ - vq->log_guest_addr + \ - STRUCT_OFFSET_OF (vnet_virtio_vring_used_t, member), \ - sizeof (vq->used->member), 0); \ - } - -static_always_inline u8 * -format_vhost_trace (u8 * s, va_list * va) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); - CLIB_UNUSED (vnet_main_t * vnm) = vnet_get_main (); - vhost_user_main_t *vum = &vhost_user_main; - vhost_trace_t *t = va_arg (*va, vhost_trace_t *); - vhost_user_intf_t *vui = vum->vhost_user_interfaces + t->device_index; - vnet_sw_interface_t *sw; - u32 indent; - - if (pool_is_free (vum->vhost_user_interfaces, vui)) - { - s = format (s, "vhost-user interface is deleted"); - return s; - } - sw = vnet_get_sw_interface (vnm, vui->sw_if_index); - indent = format_get_indent (s); - s = format (s, "%U %U queue %d\n", format_white_space, indent, - format_vnet_sw_interface_name, vnm, sw, t->qid); - - s = format (s, "%U virtio flags:\n", format_white_space, indent); -#define _(n,i,st) \ - if (t->virtio_ring_flags & (1 << VIRTIO_TRACE_F_##n)) \ - s = format (s, "%U %s %s\n", format_white_space, indent, #n, st); - foreach_virtio_trace_flags -#undef _ - s = format (s, "%U virtio_net_hdr first_desc_len %u\n", - format_white_space, indent, t->first_desc_len); - - s = format (s, "%U flags 0x%02x gso_type %u\n", - format_white_space, indent, - t->hdr.hdr.flags, t->hdr.hdr.gso_type); - - if (vui->virtio_net_hdr_sz == 12) - s = format (s, "%U num_buff %u", - format_white_space, indent, t->hdr.num_buffers); - - return s; -} - -static_always_inline u64 -vhost_user_is_packed_ring_supported (vhost_user_intf_t * vui) -{ - return (vui->features & VIRTIO_FEATURE (VIRTIO_F_RING_PACKED)); -} - -static_always_inline u64 -vhost_user_is_event_idx_supported (vhost_user_intf_t * vui) -{ - return (vui->features & VIRTIO_FEATURE (VIRTIO_RING_F_EVENT_IDX)); -} - -static_always_inline void -vhost_user_kick (vlib_main_t * vm, vhost_user_vring_t * vq) -{ - vhost_user_main_t *vum = &vhost_user_main; - u64 x = 1; - int fd = UNIX_GET_FD (vq->callfd_idx); - int rv; - - rv = write (fd, &x, sizeof (x)); - if (PREDICT_FALSE (rv <= 0)) - { - clib_unix_warning - ("Error: Could not write to unix socket for callfd %d", fd); - return; - } - - vq->n_since_last_int = 0; - vq->int_deadline = vlib_time_now (vm) + vum->coalesce_time; -} - -static_always_inline u16 -vhost_user_avail_event_idx (vhost_user_vring_t * vq) -{ - volatile u16 *event_idx = (u16 *) & (vq->used->ring[vq->qsz_mask + 1]); - - return *event_idx; -} - -static_always_inline u16 -vhost_user_used_event_idx (vhost_user_vring_t * vq) -{ - volatile u16 *event_idx = (u16 *) & (vq->avail->ring[vq->qsz_mask + 1]); - - return *event_idx; -} - -static_always_inline u16 -vhost_user_need_event (u16 event_idx, u16 new_idx, u16 old_idx) -{ - return ((u16) (new_idx - event_idx - 1) < (u16) (new_idx - old_idx)); -} - -static_always_inline void -vhost_user_send_call_event_idx (vlib_main_t * vm, vhost_user_vring_t * vq) -{ - vhost_user_main_t *vum = &vhost_user_main; - u8 first_kick = vq->first_kick; - u16 event_idx = vhost_user_used_event_idx (vq); - - vq->first_kick = 1; - if (vhost_user_need_event (event_idx, vq->last_used_idx, vq->last_kick) || - PREDICT_FALSE (!first_kick)) - { - vhost_user_kick (vm, vq); - vq->last_kick = event_idx; - } - else - { - vq->n_since_last_int = 0; - vq->int_deadline = vlib_time_now (vm) + vum->coalesce_time; - } -} - -static_always_inline void -vhost_user_send_call_event_idx_packed (vlib_main_t * vm, - vhost_user_vring_t * vq) -{ - vhost_user_main_t *vum = &vhost_user_main; - u8 first_kick = vq->first_kick; - u16 off_wrap; - u16 event_idx; - u16 new_idx = vq->last_used_idx; - u16 old_idx = vq->last_kick; - - if (PREDICT_TRUE (vq->avail_event->flags == VRING_EVENT_F_DESC)) - { - CLIB_COMPILER_BARRIER (); - off_wrap = vq->avail_event->off_wrap; - event_idx = off_wrap & 0x7fff; - if (vq->used_wrap_counter != (off_wrap >> 15)) - event_idx -= (vq->qsz_mask + 1); - - if (new_idx <= old_idx) - old_idx -= (vq->qsz_mask + 1); - - vq->first_kick = 1; - vq->last_kick = event_idx; - if (vhost_user_need_event (event_idx, new_idx, old_idx) || - PREDICT_FALSE (!first_kick)) - vhost_user_kick (vm, vq); - else - { - vq->n_since_last_int = 0; - vq->int_deadline = vlib_time_now (vm) + vum->coalesce_time; - } - } - else - vhost_user_kick (vm, vq); -} - -static_always_inline void -vhost_user_send_call (vlib_main_t * vm, vhost_user_intf_t * vui, - vhost_user_vring_t * vq) -{ - if (vhost_user_is_event_idx_supported (vui)) - { - if (vhost_user_is_packed_ring_supported (vui)) - vhost_user_send_call_event_idx_packed (vm, vq); - else - vhost_user_send_call_event_idx (vm, vq); - } - else - vhost_user_kick (vm, vq); -} - -static_always_inline u8 -vui_is_link_up (vhost_user_intf_t * vui) -{ - return vui->admin_up && vui->is_ready; -} - -static_always_inline void -vhost_user_update_gso_interface_count (vhost_user_intf_t * vui, u8 add) -{ - vhost_user_main_t *vum = &vhost_user_main; - - if (vui->enable_gso) - { - if (add) - { - vum->gso_count++; - } - else - { - ASSERT (vum->gso_count > 0); - vum->gso_count--; - } - } -} - -static_always_inline u8 -vhost_user_packed_desc_available (vhost_user_vring_t * vring, u16 idx) -{ - return (((vring->packed_desc[idx].flags & VRING_DESC_F_AVAIL) == - vring->avail_wrap_counter)); -} - -static_always_inline void -vhost_user_advance_last_avail_idx (vhost_user_vring_t * vring) -{ - vring->last_avail_idx++; - if (PREDICT_FALSE ((vring->last_avail_idx & vring->qsz_mask) == 0)) - { - vring->avail_wrap_counter ^= VRING_DESC_F_AVAIL; - vring->last_avail_idx = 0; - } -} - -static_always_inline void -vhost_user_advance_last_avail_table_idx (vhost_user_intf_t * vui, - vhost_user_vring_t * vring, - u8 chained) -{ - if (chained) - { - vnet_virtio_vring_packed_desc_t *desc_table = vring->packed_desc; - - /* pick up the slot of the next avail idx */ - while (desc_table[vring->last_avail_idx & vring->qsz_mask].flags & - VRING_DESC_F_NEXT) - vhost_user_advance_last_avail_idx (vring); - } - - vhost_user_advance_last_avail_idx (vring); -} - -static_always_inline void -vhost_user_undo_advanced_last_avail_idx (vhost_user_vring_t * vring) -{ - if (PREDICT_FALSE ((vring->last_avail_idx & vring->qsz_mask) == 0)) - vring->avail_wrap_counter ^= VRING_DESC_F_AVAIL; - - if (PREDICT_FALSE (vring->last_avail_idx == 0)) - vring->last_avail_idx = vring->qsz_mask; - else - vring->last_avail_idx--; -} - -static_always_inline void -vhost_user_dequeue_descs (vhost_user_vring_t *rxvq, - vnet_virtio_net_hdr_mrg_rxbuf_t *hdr, - u16 *n_descs_processed) -{ - u16 i; - - *n_descs_processed -= (hdr->num_buffers - 1); - for (i = 0; i < hdr->num_buffers - 1; i++) - vhost_user_undo_advanced_last_avail_idx (rxvq); -} - -static_always_inline void -vhost_user_dequeue_chained_descs (vhost_user_vring_t * rxvq, - u16 * n_descs_processed) -{ - while (*n_descs_processed) - { - vhost_user_undo_advanced_last_avail_idx (rxvq); - (*n_descs_processed)--; - } -} - -static_always_inline void -vhost_user_advance_last_used_idx (vhost_user_vring_t * vring) -{ - vring->last_used_idx++; - if (PREDICT_FALSE ((vring->last_used_idx & vring->qsz_mask) == 0)) - { - vring->used_wrap_counter ^= 1; - vring->last_used_idx = 0; - } -} - -#endif - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/virtio/vhost_user_input.c b/src/vnet/devices/virtio/vhost_user_input.c deleted file mode 100644 index 841a9798212..00000000000 --- a/src/vnet/devices/virtio/vhost_user_input.c +++ /dev/null @@ -1,1474 +0,0 @@ -/* - *------------------------------------------------------------------ - * vhost-user-input - * - * Copyright (c) 2014-2018 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *------------------------------------------------------------------ - */ - -#include <fcntl.h> /* for open */ -#include <sys/ioctl.h> -#include <sys/socket.h> -#include <sys/un.h> -#include <sys/stat.h> -#include <sys/types.h> -#include <sys/uio.h> /* for iovec */ -#include <netinet/in.h> -#include <sys/vfs.h> - -#include <linux/if_arp.h> -#include <linux/if_tun.h> - -#include <vlib/vlib.h> -#include <vlib/unix/unix.h> - -#include <vnet/ethernet/ethernet.h> -#include <vnet/devices/devices.h> -#include <vnet/feature/feature.h> -#include <vnet/udp/udp_packet.h> -#include <vnet/tcp/tcp_packet.h> -#include <vnet/interface/rx_queue_funcs.h> - -#include <vnet/devices/virtio/vhost_user.h> -#include <vnet/devices/virtio/vhost_user_inline.h> - -#include <vnet/ip/ip4_packet.h> -#include <vnet/ip/ip6_packet.h> - -/* - * When an RX queue is down but active, received packets - * must be discarded. This value controls up to how many - * packets will be discarded during each round. - */ -#define VHOST_USER_DOWN_DISCARD_COUNT 256 - -/* - * When the number of available buffers gets under this threshold, - * RX node will start discarding packets. - */ -#define VHOST_USER_RX_BUFFER_STARVATION 32 - -/* - * On the receive side, the host should free descriptors as soon - * as possible in order to avoid TX drop in the VM. - * This value controls the number of copy operations that are stacked - * before copy is done for all and descriptors are given back to - * the guest. - * The value 64 was obtained by testing (48 and 128 were not as good). - */ -#define VHOST_USER_RX_COPY_THRESHOLD 64 - -extern vlib_node_registration_t vhost_user_input_node; - -#define foreach_vhost_user_input_func_error \ - _(NO_ERROR, "no error") \ - _(NO_BUFFER, "no available buffer") \ - _(MMAP_FAIL, "mmap failure") \ - _(INDIRECT_OVERFLOW, "indirect descriptor overflows table") \ - _(UNDERSIZED_FRAME, "undersized ethernet frame received (< 14 bytes)") \ - _(NOT_READY, "vhost interface not ready or down") \ - _(FULL_RX_QUEUE, "full rx queue (possible driver tx drop)") - -typedef enum -{ -#define _(f,s) VHOST_USER_INPUT_FUNC_ERROR_##f, - foreach_vhost_user_input_func_error -#undef _ - VHOST_USER_INPUT_FUNC_N_ERROR, -} vhost_user_input_func_error_t; - -static __clib_unused char *vhost_user_input_func_error_strings[] = { -#define _(n,s) s, - foreach_vhost_user_input_func_error -#undef _ -}; - -static_always_inline void -vhost_user_rx_trace (vhost_trace_t * t, - vhost_user_intf_t * vui, u16 qid, - vlib_buffer_t * b, vhost_user_vring_t * txvq, - u16 last_avail_idx) -{ - vhost_user_main_t *vum = &vhost_user_main; - u32 desc_current = txvq->avail->ring[last_avail_idx & txvq->qsz_mask]; - vnet_virtio_vring_desc_t *hdr_desc = 0; - vnet_virtio_net_hdr_mrg_rxbuf_t *hdr; - u32 hint = 0; - - clib_memset (t, 0, sizeof (*t)); - t->device_index = vui - vum->vhost_user_interfaces; - t->qid = qid; - - hdr_desc = &txvq->desc[desc_current]; - if (txvq->desc[desc_current].flags & VRING_DESC_F_INDIRECT) - { - t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_INDIRECT; - /* Header is the first here */ - hdr_desc = map_guest_mem (vui, txvq->desc[desc_current].addr, &hint); - } - if (txvq->desc[desc_current].flags & VRING_DESC_F_NEXT) - { - t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SIMPLE_CHAINED; - } - if (!(txvq->desc[desc_current].flags & VRING_DESC_F_NEXT) && - !(txvq->desc[desc_current].flags & VRING_DESC_F_INDIRECT)) - { - t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SINGLE_DESC; - } - - t->first_desc_len = hdr_desc ? hdr_desc->len : 0; - - if (!hdr_desc || !(hdr = map_guest_mem (vui, hdr_desc->addr, &hint))) - { - t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_MAP_ERROR; - } - else - { - u32 len = vui->virtio_net_hdr_sz; - memcpy (&t->hdr, hdr, len > hdr_desc->len ? hdr_desc->len : len); - } -} - -static_always_inline u32 -vhost_user_input_copy (vhost_user_intf_t * vui, vhost_copy_t * cpy, - u16 copy_len, u32 * map_hint) -{ - void *src0, *src1, *src2, *src3; - if (PREDICT_TRUE (copy_len >= 4)) - { - if (PREDICT_FALSE (!(src2 = map_guest_mem (vui, cpy[0].src, map_hint)))) - return 1; - if (PREDICT_FALSE (!(src3 = map_guest_mem (vui, cpy[1].src, map_hint)))) - return 1; - - while (PREDICT_TRUE (copy_len >= 4)) - { - src0 = src2; - src1 = src3; - - if (PREDICT_FALSE - (!(src2 = map_guest_mem (vui, cpy[2].src, map_hint)))) - return 1; - if (PREDICT_FALSE - (!(src3 = map_guest_mem (vui, cpy[3].src, map_hint)))) - return 1; - - clib_prefetch_load (src2); - clib_prefetch_load (src3); - - clib_memcpy_fast ((void *) cpy[0].dst, src0, cpy[0].len); - clib_memcpy_fast ((void *) cpy[1].dst, src1, cpy[1].len); - copy_len -= 2; - cpy += 2; - } - } - while (copy_len) - { - if (PREDICT_FALSE (!(src0 = map_guest_mem (vui, cpy->src, map_hint)))) - return 1; - clib_memcpy_fast ((void *) cpy->dst, src0, cpy->len); - copy_len -= 1; - cpy += 1; - } - return 0; -} - -/** - * Try to discard packets from the tx ring (VPP RX path). - * Returns the number of discarded packets. - */ -static_always_inline u32 -vhost_user_rx_discard_packet (vlib_main_t * vm, - vhost_user_intf_t * vui, - vhost_user_vring_t * txvq, u32 discard_max) -{ - /* - * On the RX side, each packet corresponds to one descriptor - * (it is the same whether it is a shallow descriptor, chained, or indirect). - * Therefore, discarding a packet is like discarding a descriptor. - */ - u32 discarded_packets = 0; - u32 avail_idx = txvq->avail->idx; - u16 mask = txvq->qsz_mask; - u16 last_avail_idx = txvq->last_avail_idx; - u16 last_used_idx = txvq->last_used_idx; - while (discarded_packets != discard_max) - { - if (avail_idx == last_avail_idx) - goto out; - - u16 desc_chain_head = txvq->avail->ring[last_avail_idx & mask]; - last_avail_idx++; - txvq->used->ring[last_used_idx & mask].id = desc_chain_head; - txvq->used->ring[last_used_idx & mask].len = 0; - vhost_user_log_dirty_ring (vui, txvq, ring[last_used_idx & mask]); - last_used_idx++; - discarded_packets++; - } - -out: - txvq->last_avail_idx = last_avail_idx; - txvq->last_used_idx = last_used_idx; - CLIB_MEMORY_STORE_BARRIER (); - txvq->used->idx = txvq->last_used_idx; - vhost_user_log_dirty_ring (vui, txvq, idx); - return discarded_packets; -} - -/* - * In case of overflow, we need to rewind the array of allocated buffers. - */ -static_always_inline void -vhost_user_input_rewind_buffers (vlib_main_t * vm, - vhost_cpu_t * cpu, vlib_buffer_t * b_head) -{ - u32 bi_current = cpu->rx_buffers[cpu->rx_buffers_len]; - vlib_buffer_t *b_current = vlib_get_buffer (vm, bi_current); - b_current->current_length = 0; - b_current->flags = 0; - while (b_current != b_head) - { - cpu->rx_buffers_len++; - bi_current = cpu->rx_buffers[cpu->rx_buffers_len]; - b_current = vlib_get_buffer (vm, bi_current); - b_current->current_length = 0; - b_current->flags = 0; - } - cpu->rx_buffers_len++; -} - -static_always_inline void -vhost_user_handle_rx_offload (vlib_buffer_t *b0, u8 *b0_data, - vnet_virtio_net_hdr_t *hdr) -{ - u8 l4_hdr_sz = 0; - u8 l4_proto = 0; - ethernet_header_t *eh = (ethernet_header_t *) b0_data; - u16 ethertype = clib_net_to_host_u16 (eh->type); - u16 l2hdr_sz = sizeof (ethernet_header_t); - vnet_buffer_oflags_t oflags = 0; - - if (ethernet_frame_is_tagged (ethertype)) - { - ethernet_vlan_header_t *vlan = (ethernet_vlan_header_t *) (eh + 1); - - ethertype = clib_net_to_host_u16 (vlan->type); - l2hdr_sz += sizeof (*vlan); - if (ethertype == ETHERNET_TYPE_VLAN) - { - vlan++; - ethertype = clib_net_to_host_u16 (vlan->type); - l2hdr_sz += sizeof (*vlan); - } - } - vnet_buffer (b0)->l2_hdr_offset = 0; - vnet_buffer (b0)->l3_hdr_offset = l2hdr_sz; - vnet_buffer (b0)->l4_hdr_offset = hdr->csum_start; - b0->flags |= (VNET_BUFFER_F_L2_HDR_OFFSET_VALID | - VNET_BUFFER_F_L3_HDR_OFFSET_VALID | - VNET_BUFFER_F_L4_HDR_OFFSET_VALID); - - if (PREDICT_TRUE (ethertype == ETHERNET_TYPE_IP4)) - { - ip4_header_t *ip4 = (ip4_header_t *) (b0_data + l2hdr_sz); - l4_proto = ip4->protocol; - b0->flags |= VNET_BUFFER_F_IS_IP4; - oflags |= VNET_BUFFER_OFFLOAD_F_IP_CKSUM; - } - else if (PREDICT_TRUE (ethertype == ETHERNET_TYPE_IP6)) - { - ip6_header_t *ip6 = (ip6_header_t *) (b0_data + l2hdr_sz); - l4_proto = ip6->protocol; - b0->flags |= VNET_BUFFER_F_IS_IP6; - } - - if (l4_proto == IP_PROTOCOL_TCP) - { - tcp_header_t *tcp = (tcp_header_t *) - (b0_data + vnet_buffer (b0)->l4_hdr_offset); - l4_hdr_sz = tcp_header_bytes (tcp); - oflags |= VNET_BUFFER_OFFLOAD_F_TCP_CKSUM; - } - else if (l4_proto == IP_PROTOCOL_UDP) - { - l4_hdr_sz = sizeof (udp_header_t); - oflags |= VNET_BUFFER_OFFLOAD_F_UDP_CKSUM; - } - - if (hdr->gso_type == VIRTIO_NET_HDR_GSO_UDP) - { - vnet_buffer2 (b0)->gso_size = hdr->gso_size; - vnet_buffer2 (b0)->gso_l4_hdr_sz = l4_hdr_sz; - b0->flags |= VNET_BUFFER_F_GSO; - } - else if (hdr->gso_type == VIRTIO_NET_HDR_GSO_TCPV4) - { - vnet_buffer2 (b0)->gso_size = hdr->gso_size; - vnet_buffer2 (b0)->gso_l4_hdr_sz = l4_hdr_sz; - b0->flags |= (VNET_BUFFER_F_GSO | VNET_BUFFER_F_IS_IP4); - } - else if (hdr->gso_type == VIRTIO_NET_HDR_GSO_TCPV6) - { - vnet_buffer2 (b0)->gso_size = hdr->gso_size; - vnet_buffer2 (b0)->gso_l4_hdr_sz = l4_hdr_sz; - b0->flags |= (VNET_BUFFER_F_GSO | VNET_BUFFER_F_IS_IP6); - } - - if (oflags) - vnet_buffer_offload_flags_set (b0, oflags); -} - -static_always_inline void -vhost_user_input_do_interrupt (vlib_main_t * vm, vhost_user_intf_t * vui, - vhost_user_vring_t * txvq, - vhost_user_vring_t * rxvq) -{ - f64 now = vlib_time_now (vm); - - if ((txvq->n_since_last_int) && (txvq->int_deadline < now)) - vhost_user_send_call (vm, vui, txvq); - - if ((rxvq->n_since_last_int) && (rxvq->int_deadline < now)) - vhost_user_send_call (vm, vui, rxvq); -} - -static_always_inline void -vhost_user_input_setup_frame (vlib_main_t * vm, vlib_node_runtime_t * node, - vhost_user_intf_t * vui, - u32 * current_config_index, u32 * next_index, - u32 ** to_next, u32 * n_left_to_next) -{ - vnet_feature_main_t *fm = &feature_main; - u8 feature_arc_idx = fm->device_input_feature_arc_index; - - if (PREDICT_FALSE (vnet_have_features (feature_arc_idx, vui->sw_if_index))) - { - vnet_feature_config_main_t *cm; - cm = &fm->feature_config_mains[feature_arc_idx]; - *current_config_index = vec_elt (cm->config_index_by_sw_if_index, - vui->sw_if_index); - vnet_get_config_data (&cm->config_main, current_config_index, - next_index, 0); - } - - vlib_get_new_next_frame (vm, node, *next_index, *to_next, *n_left_to_next); - - if (*next_index == VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT) - { - /* give some hints to ethernet-input */ - vlib_next_frame_t *nf; - vlib_frame_t *f; - ethernet_input_frame_t *ef; - nf = vlib_node_runtime_get_next_frame (vm, node, *next_index); - f = vlib_get_frame (vm, nf->frame); - f->flags = ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX; - - ef = vlib_frame_scalar_args (f); - ef->sw_if_index = vui->sw_if_index; - ef->hw_if_index = vui->hw_if_index; - vlib_frame_no_append (f); - } -} - -static_always_inline u32 -vhost_user_if_input (vlib_main_t *vm, vhost_user_main_t *vum, - vhost_user_intf_t *vui, u16 qid, - vlib_node_runtime_t *node, u8 enable_csum) -{ - vhost_user_vring_t *txvq = &vui->vrings[VHOST_VRING_IDX_TX (qid)]; - vnet_feature_main_t *fm = &feature_main; - u16 n_rx_packets = 0; - u32 n_rx_bytes = 0; - u16 n_left; - u32 n_left_to_next, *to_next; - u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; - u32 n_trace = vlib_get_trace_count (vm, node); - u32 buffer_data_size = vlib_buffer_get_default_data_size (vm); - u32 map_hint = 0; - vhost_cpu_t *cpu = &vum->cpus[vm->thread_index]; - u16 copy_len = 0; - u8 feature_arc_idx = fm->device_input_feature_arc_index; - u32 current_config_index = ~(u32) 0; - u16 mask = txvq->qsz_mask; - - /* The descriptor table is not ready yet */ - if (PREDICT_FALSE (txvq->avail == 0)) - goto done; - - { - /* do we have pending interrupts ? */ - vhost_user_vring_t *rxvq = &vui->vrings[VHOST_VRING_IDX_RX (qid)]; - vhost_user_input_do_interrupt (vm, vui, txvq, rxvq); - } - - /* - * For adaptive mode, it is optimized to reduce interrupts. - * If the scheduler switches the input node to polling due - * to burst of traffic, we tell the driver no interrupt. - * When the traffic subsides, the scheduler switches the node back to - * interrupt mode. We must tell the driver we want interrupt. - */ - if (PREDICT_FALSE (txvq->mode == VNET_HW_IF_RX_MODE_ADAPTIVE)) - { - if ((node->flags & - VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE) || - !(node->flags & - VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE)) - /* Tell driver we want notification */ - txvq->used->flags = 0; - else - /* Tell driver we don't want notification */ - txvq->used->flags = VRING_USED_F_NO_NOTIFY; - } - - if (PREDICT_FALSE (txvq->avail->flags & 0xFFFE)) - goto done; - - n_left = (u16) (txvq->avail->idx - txvq->last_avail_idx); - - /* nothing to do */ - if (PREDICT_FALSE (n_left == 0)) - goto done; - - if (PREDICT_FALSE (!vui->admin_up || !(txvq->enabled))) - { - /* - * Discard input packet if interface is admin down or vring is not - * enabled. - * "For example, for a networking device, in the disabled state - * client must not supply any new RX packets, but must process - * and discard any TX packets." - */ - vhost_user_rx_discard_packet (vm, vui, txvq, - VHOST_USER_DOWN_DISCARD_COUNT); - goto done; - } - - if (PREDICT_FALSE (n_left == (mask + 1))) - { - /* - * Informational error logging when VPP is not - * receiving packets fast enough. - */ - vlib_error_count (vm, node->node_index, - VHOST_USER_INPUT_FUNC_ERROR_FULL_RX_QUEUE, 1); - } - - if (n_left > VLIB_FRAME_SIZE) - n_left = VLIB_FRAME_SIZE; - - /* - * For small packets (<2kB), we will not need more than one vlib buffer - * per packet. In case packets are bigger, we will just yield at some point - * in the loop and come back later. This is not an issue as for big packet, - * processing cost really comes from the memory copy. - * The assumption is that big packets will fit in 40 buffers. - */ - if (PREDICT_FALSE (cpu->rx_buffers_len < n_left + 1 || - cpu->rx_buffers_len < 40)) - { - u32 curr_len = cpu->rx_buffers_len; - cpu->rx_buffers_len += - vlib_buffer_alloc (vm, cpu->rx_buffers + curr_len, - VHOST_USER_RX_BUFFERS_N - curr_len); - - if (PREDICT_FALSE - (cpu->rx_buffers_len < VHOST_USER_RX_BUFFER_STARVATION)) - { - /* In case of buffer starvation, discard some packets from the queue - * and log the event. - * We keep doing best effort for the remaining packets. */ - u32 flush = (n_left + 1 > cpu->rx_buffers_len) ? - n_left + 1 - cpu->rx_buffers_len : 1; - flush = vhost_user_rx_discard_packet (vm, vui, txvq, flush); - - n_left -= flush; - vlib_increment_simple_counter (vnet_main. - interface_main.sw_if_counters + - VNET_INTERFACE_COUNTER_DROP, - vm->thread_index, vui->sw_if_index, - flush); - - vlib_error_count (vm, vhost_user_input_node.index, - VHOST_USER_INPUT_FUNC_ERROR_NO_BUFFER, flush); - } - } - - vhost_user_input_setup_frame (vm, node, vui, ¤t_config_index, - &next_index, &to_next, &n_left_to_next); - - u16 last_avail_idx = txvq->last_avail_idx; - u16 last_used_idx = txvq->last_used_idx; - - while (n_left > 0) - { - vlib_buffer_t *b_head, *b_current; - u32 bi_current; - u16 desc_current; - u32 desc_data_offset; - vnet_virtio_vring_desc_t *desc_table = txvq->desc; - - if (PREDICT_FALSE (cpu->rx_buffers_len <= 1)) - { - /* Not enough rx_buffers - * Note: We yeld on 1 so we don't need to do an additional - * check for the next buffer prefetch. - */ - n_left = 0; - break; - } - - desc_current = txvq->avail->ring[last_avail_idx & mask]; - cpu->rx_buffers_len--; - bi_current = cpu->rx_buffers[cpu->rx_buffers_len]; - b_head = b_current = vlib_get_buffer (vm, bi_current); - to_next[0] = bi_current; //We do that now so we can forget about bi_current - to_next++; - n_left_to_next--; - - vlib_prefetch_buffer_with_index - (vm, cpu->rx_buffers[cpu->rx_buffers_len - 1], LOAD); - - /* Just preset the used descriptor id and length for later */ - txvq->used->ring[last_used_idx & mask].id = desc_current; - txvq->used->ring[last_used_idx & mask].len = 0; - vhost_user_log_dirty_ring (vui, txvq, ring[last_used_idx & mask]); - - /* The buffer should already be initialized */ - b_head->total_length_not_including_first_buffer = 0; - b_head->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID; - - if (PREDICT_FALSE - (n_trace > 0 && vlib_trace_buffer (vm, node, next_index, b_head, - /* follow_chain */ 0))) - { - vhost_trace_t *t0 = - vlib_add_trace (vm, node, b_head, sizeof (t0[0])); - vhost_user_rx_trace (t0, vui, qid, b_head, txvq, last_avail_idx); - n_trace--; - vlib_set_trace_count (vm, node, n_trace); - } - - /* This depends on the setup but is very consistent - * So I think the CPU branch predictor will make a pretty good job - * at optimizing the decision. */ - if (txvq->desc[desc_current].flags & VRING_DESC_F_INDIRECT) - { - desc_table = map_guest_mem (vui, txvq->desc[desc_current].addr, - &map_hint); - desc_current = 0; - if (PREDICT_FALSE (desc_table == 0)) - { - vlib_error_count (vm, node->node_index, - VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1); - goto out; - } - } - - desc_data_offset = vui->virtio_net_hdr_sz; - - if (enable_csum) - { - vnet_virtio_net_hdr_mrg_rxbuf_t *hdr; - u8 *b_data; - u16 current; - - hdr = map_guest_mem (vui, desc_table[desc_current].addr, &map_hint); - if (PREDICT_FALSE (hdr == 0)) - { - vlib_error_count (vm, node->node_index, - VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1); - goto out; - } - if (hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) - { - if ((desc_data_offset == desc_table[desc_current].len) && - (desc_table[desc_current].flags & VRING_DESC_F_NEXT)) - { - current = desc_table[desc_current].next; - b_data = map_guest_mem (vui, desc_table[current].addr, - &map_hint); - if (PREDICT_FALSE (b_data == 0)) - { - vlib_error_count (vm, node->node_index, - VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, - 1); - goto out; - } - } - else - b_data = (u8 *) hdr + desc_data_offset; - - vhost_user_handle_rx_offload (b_head, b_data, &hdr->hdr); - } - } - - while (1) - { - /* Get more input if necessary. Or end of packet. */ - if (desc_data_offset == desc_table[desc_current].len) - { - if (PREDICT_FALSE (desc_table[desc_current].flags & - VRING_DESC_F_NEXT)) - { - desc_current = desc_table[desc_current].next; - desc_data_offset = 0; - } - else - { - goto out; - } - } - - /* Get more output if necessary. Or end of packet. */ - if (PREDICT_FALSE (b_current->current_length == buffer_data_size)) - { - if (PREDICT_FALSE (cpu->rx_buffers_len == 0)) - { - /* Cancel speculation */ - to_next--; - n_left_to_next++; - - /* - * Checking if there are some left buffers. - * If not, just rewind the used buffers and stop. - * Note: Scheduled copies are not cancelled. This is - * not an issue as they would still be valid. Useless, - * but valid. - */ - vhost_user_input_rewind_buffers (vm, cpu, b_head); - n_left = 0; - goto stop; - } - - /* Get next output */ - cpu->rx_buffers_len--; - u32 bi_next = cpu->rx_buffers[cpu->rx_buffers_len]; - b_current->next_buffer = bi_next; - b_current->flags |= VLIB_BUFFER_NEXT_PRESENT; - bi_current = bi_next; - b_current = vlib_get_buffer (vm, bi_current); - } - - /* Prepare a copy order executed later for the data */ - ASSERT (copy_len < VHOST_USER_COPY_ARRAY_N); - vhost_copy_t *cpy = &cpu->copy[copy_len]; - copy_len++; - u32 desc_data_l = desc_table[desc_current].len - desc_data_offset; - cpy->len = buffer_data_size - b_current->current_length; - cpy->len = (cpy->len > desc_data_l) ? desc_data_l : cpy->len; - cpy->dst = (uword) (vlib_buffer_get_current (b_current) + - b_current->current_length); - cpy->src = desc_table[desc_current].addr + desc_data_offset; - - desc_data_offset += cpy->len; - - b_current->current_length += cpy->len; - b_head->total_length_not_including_first_buffer += cpy->len; - } - - out: - - n_rx_bytes += b_head->total_length_not_including_first_buffer; - n_rx_packets++; - - b_head->total_length_not_including_first_buffer -= - b_head->current_length; - - /* consume the descriptor and return it as used */ - last_avail_idx++; - last_used_idx++; - - vnet_buffer (b_head)->sw_if_index[VLIB_RX] = vui->sw_if_index; - vnet_buffer (b_head)->sw_if_index[VLIB_TX] = (u32) ~ 0; - b_head->error = 0; - - if (current_config_index != ~(u32) 0) - { - b_head->current_config_index = current_config_index; - vnet_buffer (b_head)->feature_arc_index = feature_arc_idx; - } - - n_left--; - - /* - * Although separating memory copies from virtio ring parsing - * is beneficial, we can offer to perform the copies from time - * to time in order to free some space in the ring. - */ - if (PREDICT_FALSE (copy_len >= VHOST_USER_RX_COPY_THRESHOLD)) - { - if (PREDICT_FALSE (vhost_user_input_copy (vui, cpu->copy, - copy_len, &map_hint))) - { - vlib_error_count (vm, node->node_index, - VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1); - } - copy_len = 0; - - /* give buffers back to driver */ - CLIB_MEMORY_STORE_BARRIER (); - txvq->used->idx = last_used_idx; - vhost_user_log_dirty_ring (vui, txvq, idx); - } - } -stop: - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - - txvq->last_used_idx = last_used_idx; - txvq->last_avail_idx = last_avail_idx; - - /* Do the memory copies */ - if (PREDICT_FALSE (vhost_user_input_copy (vui, cpu->copy, copy_len, - &map_hint))) - { - vlib_error_count (vm, node->node_index, - VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1); - } - - /* give buffers back to driver */ - CLIB_MEMORY_STORE_BARRIER (); - txvq->used->idx = txvq->last_used_idx; - vhost_user_log_dirty_ring (vui, txvq, idx); - - /* interrupt (call) handling */ - if ((txvq->callfd_idx != ~0) && - !(txvq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) - { - txvq->n_since_last_int += n_rx_packets; - - if (txvq->n_since_last_int > vum->coalesce_frames) - vhost_user_send_call (vm, vui, txvq); - } - - /* increase rx counters */ - vlib_increment_combined_counter - (vnet_main.interface_main.combined_sw_if_counters - + VNET_INTERFACE_COUNTER_RX, vm->thread_index, vui->sw_if_index, - n_rx_packets, n_rx_bytes); - - vnet_device_increment_rx_packets (vm->thread_index, n_rx_packets); - -done: - return n_rx_packets; -} - -static_always_inline void -vhost_user_mark_desc_consumed (vhost_user_intf_t * vui, - vhost_user_vring_t * txvq, u16 desc_head, - u16 n_descs_processed) -{ - vnet_virtio_vring_packed_desc_t *desc_table = txvq->packed_desc; - u16 desc_idx; - u16 mask = txvq->qsz_mask; - - for (desc_idx = 0; desc_idx < n_descs_processed; desc_idx++) - { - if (txvq->used_wrap_counter) - desc_table[(desc_head + desc_idx) & mask].flags |= - (VRING_DESC_F_AVAIL | VRING_DESC_F_USED); - else - desc_table[(desc_head + desc_idx) & mask].flags &= - ~(VRING_DESC_F_AVAIL | VRING_DESC_F_USED); - vhost_user_advance_last_used_idx (txvq); - } -} - -static_always_inline void -vhost_user_rx_trace_packed (vhost_trace_t * t, vhost_user_intf_t * vui, - u16 qid, vhost_user_vring_t * txvq, - u16 desc_current) -{ - vhost_user_main_t *vum = &vhost_user_main; - vnet_virtio_vring_packed_desc_t *hdr_desc; - vnet_virtio_net_hdr_mrg_rxbuf_t *hdr; - u32 hint = 0; - - clib_memset (t, 0, sizeof (*t)); - t->device_index = vui - vum->vhost_user_interfaces; - t->qid = qid; - - hdr_desc = &txvq->packed_desc[desc_current]; - if (txvq->packed_desc[desc_current].flags & VRING_DESC_F_INDIRECT) - { - t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_INDIRECT; - /* Header is the first here */ - hdr_desc = map_guest_mem (vui, txvq->packed_desc[desc_current].addr, - &hint); - } - if (txvq->packed_desc[desc_current].flags & VRING_DESC_F_NEXT) - t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SIMPLE_CHAINED; - - if (!(txvq->packed_desc[desc_current].flags & VRING_DESC_F_NEXT) && - !(txvq->packed_desc[desc_current].flags & VRING_DESC_F_INDIRECT)) - t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SINGLE_DESC; - - t->first_desc_len = hdr_desc ? hdr_desc->len : 0; - - if (!hdr_desc || !(hdr = map_guest_mem (vui, hdr_desc->addr, &hint))) - t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_MAP_ERROR; - else - { - u32 len = vui->virtio_net_hdr_sz; - clib_memcpy_fast (&t->hdr, hdr, - len > hdr_desc->len ? hdr_desc->len : len); - } -} - -static_always_inline u32 -vhost_user_rx_discard_packet_packed (vlib_main_t * vm, - vhost_user_intf_t * vui, - vhost_user_vring_t * txvq, - u32 discard_max) -{ - u32 discarded_packets = 0; - u16 mask = txvq->qsz_mask; - u16 desc_current, desc_head; - - desc_head = desc_current = txvq->last_used_idx & mask; - - /* - * On the RX side, each packet corresponds to one descriptor - * (it is the same whether it is a shallow descriptor, chained, or indirect). - * Therefore, discarding a packet is like discarding a descriptor. - */ - while ((discarded_packets != discard_max) && - vhost_user_packed_desc_available (txvq, desc_current)) - { - vhost_user_advance_last_avail_idx (txvq); - discarded_packets++; - desc_current = (desc_current + 1) & mask; - } - - if (PREDICT_TRUE (discarded_packets)) - vhost_user_mark_desc_consumed (vui, txvq, desc_head, discarded_packets); - return (discarded_packets); -} - -static_always_inline u32 -vhost_user_input_copy_packed (vhost_user_intf_t * vui, vhost_copy_t * cpy, - u16 copy_len, u32 * map_hint) -{ - void *src0, *src1, *src2, *src3, *src4, *src5, *src6, *src7; - u8 bad; - u32 rc = VHOST_USER_INPUT_FUNC_ERROR_NO_ERROR; - - if (PREDICT_TRUE (copy_len >= 8)) - { - src4 = map_guest_mem (vui, cpy[0].src, map_hint); - src5 = map_guest_mem (vui, cpy[1].src, map_hint); - src6 = map_guest_mem (vui, cpy[2].src, map_hint); - src7 = map_guest_mem (vui, cpy[3].src, map_hint); - bad = (src4 == 0) + (src5 == 0) + (src6 == 0) + (src7 == 0); - if (PREDICT_FALSE (bad)) - goto one_by_one; - clib_prefetch_load (src4); - clib_prefetch_load (src5); - clib_prefetch_load (src6); - clib_prefetch_load (src7); - - while (PREDICT_TRUE (copy_len >= 8)) - { - src0 = src4; - src1 = src5; - src2 = src6; - src3 = src7; - - src4 = map_guest_mem (vui, cpy[4].src, map_hint); - src5 = map_guest_mem (vui, cpy[5].src, map_hint); - src6 = map_guest_mem (vui, cpy[6].src, map_hint); - src7 = map_guest_mem (vui, cpy[7].src, map_hint); - bad = (src4 == 0) + (src5 == 0) + (src6 == 0) + (src7 == 0); - if (PREDICT_FALSE (bad)) - break; - - clib_prefetch_load (src4); - clib_prefetch_load (src5); - clib_prefetch_load (src6); - clib_prefetch_load (src7); - - clib_memcpy_fast ((void *) cpy[0].dst, src0, cpy[0].len); - clib_memcpy_fast ((void *) cpy[1].dst, src1, cpy[1].len); - clib_memcpy_fast ((void *) cpy[2].dst, src2, cpy[2].len); - clib_memcpy_fast ((void *) cpy[3].dst, src3, cpy[3].len); - copy_len -= 4; - cpy += 4; - } - } - -one_by_one: - while (copy_len) - { - if (PREDICT_FALSE (!(src0 = map_guest_mem (vui, cpy->src, map_hint)))) - { - rc = VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL; - break; - } - clib_memcpy_fast ((void *) cpy->dst, src0, cpy->len); - copy_len -= 1; - cpy += 1; - } - return rc; -} - -static_always_inline u32 -vhost_user_do_offload (vhost_user_intf_t *vui, - vnet_virtio_vring_packed_desc_t *desc_table, - u16 desc_current, u16 mask, vlib_buffer_t *b_head, - u32 *map_hint) -{ - u32 rc = VHOST_USER_INPUT_FUNC_ERROR_NO_ERROR; - vnet_virtio_net_hdr_mrg_rxbuf_t *hdr; - u8 *b_data; - u32 desc_data_offset = vui->virtio_net_hdr_sz; - - hdr = map_guest_mem (vui, desc_table[desc_current].addr, map_hint); - if (PREDICT_FALSE (hdr == 0)) - rc = VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL; - else if (hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) - { - if (desc_data_offset == desc_table[desc_current].len) - { - desc_current = (desc_current + 1) & mask; - b_data = - map_guest_mem (vui, desc_table[desc_current].addr, map_hint); - if (PREDICT_FALSE (b_data == 0)) - rc = VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL; - else - vhost_user_handle_rx_offload (b_head, b_data, &hdr->hdr); - } - else - { - b_data = (u8 *) hdr + desc_data_offset; - vhost_user_handle_rx_offload (b_head, b_data, &hdr->hdr); - } - } - - return rc; -} - -static_always_inline u32 -vhost_user_compute_buffers_required (u32 desc_len, u32 buffer_data_size) -{ - div_t result; - u32 buffers_required; - - if (PREDICT_TRUE (buffer_data_size == 2048)) - { - buffers_required = desc_len >> 11; - if ((desc_len & 2047) != 0) - buffers_required++; - return (buffers_required); - } - - if (desc_len < buffer_data_size) - return 1; - - result = div (desc_len, buffer_data_size); - if (result.rem) - buffers_required = result.quot + 1; - else - buffers_required = result.quot; - - return (buffers_required); -} - -static_always_inline u32 -vhost_user_compute_indirect_desc_len (vhost_user_intf_t * vui, - vhost_user_vring_t * txvq, - u32 buffer_data_size, u16 desc_current, - u32 * map_hint) -{ - vnet_virtio_vring_packed_desc_t *desc_table = txvq->packed_desc; - u32 desc_len = 0; - u16 desc_data_offset = vui->virtio_net_hdr_sz; - u16 desc_idx = desc_current; - u32 n_descs; - - n_descs = desc_table[desc_idx].len >> 4; - desc_table = map_guest_mem (vui, desc_table[desc_idx].addr, map_hint); - if (PREDICT_FALSE (desc_table == 0)) - return 0; - - for (desc_idx = 0; desc_idx < n_descs; desc_idx++) - desc_len += desc_table[desc_idx].len; - - if (PREDICT_TRUE (desc_len > desc_data_offset)) - desc_len -= desc_data_offset; - - return vhost_user_compute_buffers_required (desc_len, buffer_data_size); -} - -static_always_inline u32 -vhost_user_compute_chained_desc_len (vhost_user_intf_t * vui, - vhost_user_vring_t * txvq, - u32 buffer_data_size, u16 * current, - u16 * n_left) -{ - vnet_virtio_vring_packed_desc_t *desc_table = txvq->packed_desc; - u32 desc_len = 0; - u16 mask = txvq->qsz_mask; - - while (desc_table[*current].flags & VRING_DESC_F_NEXT) - { - desc_len += desc_table[*current].len; - (*n_left)++; - *current = (*current + 1) & mask; - vhost_user_advance_last_avail_idx (txvq); - } - desc_len += desc_table[*current].len; - (*n_left)++; - *current = (*current + 1) & mask; - vhost_user_advance_last_avail_idx (txvq); - - if (PREDICT_TRUE (desc_len > vui->virtio_net_hdr_sz)) - desc_len -= vui->virtio_net_hdr_sz; - - return vhost_user_compute_buffers_required (desc_len, buffer_data_size); -} - -static_always_inline void -vhost_user_assemble_packet (vnet_virtio_vring_packed_desc_t *desc_table, - u16 *desc_idx, vlib_buffer_t *b_head, - vlib_buffer_t **b_current, u32 **next, - vlib_buffer_t ***b, u32 *bi_current, - vhost_cpu_t *cpu, u16 *copy_len, u32 *buffers_used, - u32 buffers_required, u32 *desc_data_offset, - u32 buffer_data_size, u16 mask) -{ - u32 desc_data_l; - - while (*desc_data_offset < desc_table[*desc_idx].len) - { - /* Get more output if necessary. Or end of packet. */ - if (PREDICT_FALSE ((*b_current)->current_length == buffer_data_size)) - { - /* Get next output */ - u32 bi_next = **next; - (*next)++; - (*b_current)->next_buffer = bi_next; - (*b_current)->flags |= VLIB_BUFFER_NEXT_PRESENT; - *bi_current = bi_next; - *b_current = **b; - (*b)++; - (*buffers_used)++; - ASSERT (*buffers_used <= buffers_required); - } - - /* Prepare a copy order executed later for the data */ - ASSERT (*copy_len < VHOST_USER_COPY_ARRAY_N); - vhost_copy_t *cpy = &cpu->copy[*copy_len]; - (*copy_len)++; - desc_data_l = desc_table[*desc_idx].len - *desc_data_offset; - cpy->len = buffer_data_size - (*b_current)->current_length; - cpy->len = (cpy->len > desc_data_l) ? desc_data_l : cpy->len; - cpy->dst = (uword) (vlib_buffer_get_current (*b_current) + - (*b_current)->current_length); - cpy->src = desc_table[*desc_idx].addr + *desc_data_offset; - - *desc_data_offset += cpy->len; - - (*b_current)->current_length += cpy->len; - b_head->total_length_not_including_first_buffer += cpy->len; - } - *desc_idx = (*desc_idx + 1) & mask;; - *desc_data_offset = 0; -} - -static_always_inline u32 -vhost_user_if_input_packed (vlib_main_t *vm, vhost_user_main_t *vum, - vhost_user_intf_t *vui, u16 qid, - vlib_node_runtime_t *node, u8 enable_csum) -{ - vhost_user_vring_t *txvq = &vui->vrings[VHOST_VRING_IDX_TX (qid)]; - vnet_feature_main_t *fm = &feature_main; - u8 feature_arc_idx = fm->device_input_feature_arc_index; - u16 n_rx_packets = 0; - u32 n_rx_bytes = 0; - u16 n_left = 0; - u32 buffers_required = 0; - u32 n_left_to_next, *to_next; - u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; - u32 n_trace = vlib_get_trace_count (vm, node); - u32 buffer_data_size = vlib_buffer_get_default_data_size (vm); - u32 map_hint = 0; - vhost_cpu_t *cpu = &vum->cpus[vm->thread_index]; - u16 copy_len = 0; - u32 current_config_index = ~0; - u16 mask = txvq->qsz_mask; - u16 desc_current, desc_head, last_used_idx; - vnet_virtio_vring_packed_desc_t *desc_table = 0; - u32 n_descs_processed = 0; - u32 rv; - vlib_buffer_t **b; - u32 *next; - u32 buffers_used = 0; - u16 current, n_descs_to_process; - - /* The descriptor table is not ready yet */ - if (PREDICT_FALSE (txvq->packed_desc == 0)) - goto done; - - /* do we have pending interrupts ? */ - vhost_user_vring_t *rxvq = &vui->vrings[VHOST_VRING_IDX_RX (qid)]; - vhost_user_input_do_interrupt (vm, vui, txvq, rxvq); - - /* - * For adaptive mode, it is optimized to reduce interrupts. - * If the scheduler switches the input node to polling due - * to burst of traffic, we tell the driver no interrupt. - * When the traffic subsides, the scheduler switches the node back to - * interrupt mode. We must tell the driver we want interrupt. - */ - if (PREDICT_FALSE (txvq->mode == VNET_HW_IF_RX_MODE_ADAPTIVE)) - { - if ((node->flags & - VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE) || - !(node->flags & - VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE)) - /* Tell driver we want notification */ - txvq->used_event->flags = 0; - else - /* Tell driver we don't want notification */ - txvq->used_event->flags = VRING_EVENT_F_DISABLE; - } - - last_used_idx = txvq->last_used_idx & mask; - desc_head = desc_current = last_used_idx; - - if (vhost_user_packed_desc_available (txvq, desc_current) == 0) - goto done; - - if (PREDICT_FALSE (!vui->admin_up || !vui->is_ready || !(txvq->enabled))) - { - /* - * Discard input packet if interface is admin down or vring is not - * enabled. - * "For example, for a networking device, in the disabled state - * client must not supply any new RX packets, but must process - * and discard any TX packets." - */ - rv = vhost_user_rx_discard_packet_packed (vm, vui, txvq, - VHOST_USER_DOWN_DISCARD_COUNT); - vlib_error_count (vm, vhost_user_input_node.index, - VHOST_USER_INPUT_FUNC_ERROR_NOT_READY, rv); - goto done; - } - - vhost_user_input_setup_frame (vm, node, vui, ¤t_config_index, - &next_index, &to_next, &n_left_to_next); - - /* - * Compute n_left and total buffers needed - */ - desc_table = txvq->packed_desc; - current = desc_current; - while (vhost_user_packed_desc_available (txvq, current) && - (n_left < VLIB_FRAME_SIZE)) - { - if (desc_table[current].flags & VRING_DESC_F_INDIRECT) - { - buffers_required += - vhost_user_compute_indirect_desc_len (vui, txvq, buffer_data_size, - current, &map_hint); - n_left++; - current = (current + 1) & mask; - vhost_user_advance_last_avail_idx (txvq); - } - else - { - buffers_required += - vhost_user_compute_chained_desc_len (vui, txvq, buffer_data_size, - ¤t, &n_left); - } - } - - /* Something is broken if we need more than 10000 buffers */ - if (PREDICT_FALSE ((buffers_required == 0) || (buffers_required > 10000))) - { - rv = vhost_user_rx_discard_packet_packed (vm, vui, txvq, n_left); - vlib_error_count (vm, vhost_user_input_node.index, - VHOST_USER_INPUT_FUNC_ERROR_NO_BUFFER, rv); - goto done; - } - - vec_validate (cpu->to_next_list, buffers_required); - rv = vlib_buffer_alloc (vm, cpu->to_next_list, buffers_required); - if (PREDICT_FALSE (rv != buffers_required)) - { - vlib_buffer_free (vm, cpu->to_next_list, rv); - rv = vhost_user_rx_discard_packet_packed (vm, vui, txvq, n_left); - vlib_error_count (vm, vhost_user_input_node.index, - VHOST_USER_INPUT_FUNC_ERROR_NO_BUFFER, rv); - goto done; - } - - next = cpu->to_next_list; - vec_validate (cpu->rx_buffers_pdesc, buffers_required); - vlib_get_buffers (vm, next, cpu->rx_buffers_pdesc, buffers_required); - b = cpu->rx_buffers_pdesc; - n_descs_processed = n_left; - - while (n_left) - { - vlib_buffer_t *b_head, *b_current; - u32 bi_current; - u32 desc_data_offset; - u16 desc_idx = desc_current; - u32 n_descs; - - desc_table = txvq->packed_desc; - to_next[0] = bi_current = next[0]; - b_head = b_current = b[0]; - b++; - buffers_used++; - ASSERT (buffers_used <= buffers_required); - to_next++; - next++; - n_left_to_next--; - - /* The buffer should already be initialized */ - b_head->total_length_not_including_first_buffer = 0; - b_head->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID; - desc_data_offset = vui->virtio_net_hdr_sz; - n_descs_to_process = 1; - - if (desc_table[desc_idx].flags & VRING_DESC_F_INDIRECT) - { - n_descs = desc_table[desc_idx].len >> 4; - desc_table = map_guest_mem (vui, desc_table[desc_idx].addr, - &map_hint); - desc_idx = 0; - if (PREDICT_FALSE (desc_table == 0) || - (enable_csum && - (PREDICT_FALSE - (vhost_user_do_offload - (vui, desc_table, desc_idx, mask, b_head, - &map_hint) != VHOST_USER_INPUT_FUNC_ERROR_NO_ERROR)))) - { - vlib_error_count (vm, node->node_index, - VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1); - to_next--; - next--; - n_left_to_next++; - buffers_used--; - b--; - goto out; - } - while (n_descs) - { - vhost_user_assemble_packet (desc_table, &desc_idx, b_head, - &b_current, &next, &b, &bi_current, - cpu, ©_len, &buffers_used, - buffers_required, &desc_data_offset, - buffer_data_size, mask); - n_descs--; - } - } - else - { - if (enable_csum) - { - rv = vhost_user_do_offload (vui, desc_table, desc_idx, mask, - b_head, &map_hint); - if (PREDICT_FALSE (rv != VHOST_USER_INPUT_FUNC_ERROR_NO_ERROR)) - { - vlib_error_count (vm, node->node_index, rv, 1); - to_next--; - next--; - n_left_to_next++; - buffers_used--; - b--; - goto out; - } - } - /* - * For chained descriptor, we process all chains in a single while - * loop. So count how many descriptors in the chain. - */ - n_descs_to_process = 1; - while (desc_table[desc_idx].flags & VRING_DESC_F_NEXT) - { - vhost_user_assemble_packet (desc_table, &desc_idx, b_head, - &b_current, &next, &b, &bi_current, - cpu, ©_len, &buffers_used, - buffers_required, &desc_data_offset, - buffer_data_size, mask); - n_descs_to_process++; - } - vhost_user_assemble_packet (desc_table, &desc_idx, b_head, - &b_current, &next, &b, &bi_current, - cpu, ©_len, &buffers_used, - buffers_required, &desc_data_offset, - buffer_data_size, mask); - } - - n_rx_bytes += b_head->total_length_not_including_first_buffer; - n_rx_packets++; - - b_head->total_length_not_including_first_buffer -= - b_head->current_length; - - vnet_buffer (b_head)->sw_if_index[VLIB_RX] = vui->sw_if_index; - vnet_buffer (b_head)->sw_if_index[VLIB_TX] = ~0; - b_head->error = 0; - - if (current_config_index != ~0) - { - b_head->current_config_index = current_config_index; - vnet_buffer (b_head)->feature_arc_index = feature_arc_idx; - } - - out: - ASSERT (n_left >= n_descs_to_process); - n_left -= n_descs_to_process; - - /* advance to next descrptor */ - desc_current = (desc_current + n_descs_to_process) & mask; - - /* - * Although separating memory copies from virtio ring parsing - * is beneficial, we can offer to perform the copies from time - * to time in order to free some space in the ring. - */ - if (PREDICT_FALSE (copy_len >= VHOST_USER_RX_COPY_THRESHOLD)) - { - rv = vhost_user_input_copy_packed (vui, cpu->copy, copy_len, - &map_hint); - if (PREDICT_FALSE (rv != VHOST_USER_INPUT_FUNC_ERROR_NO_ERROR)) - vlib_error_count (vm, node->node_index, rv, 1); - copy_len = 0; - } - } - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - - /* Do the memory copies */ - rv = vhost_user_input_copy_packed (vui, cpu->copy, copy_len, &map_hint); - if (PREDICT_FALSE (rv != VHOST_USER_INPUT_FUNC_ERROR_NO_ERROR)) - vlib_error_count (vm, node->node_index, rv, 1); - - /* Must do the tracing before giving buffers back to driver */ - if (PREDICT_FALSE (n_trace)) - { - u32 left = n_rx_packets; - - b = cpu->rx_buffers_pdesc; - while (n_trace && left) - { - if (PREDICT_TRUE - (vlib_trace_buffer - (vm, node, next_index, b[0], /* follow_chain */ 0))) - { - vhost_trace_t *t0; - t0 = vlib_add_trace (vm, node, b[0], sizeof (t0[0])); - vhost_user_rx_trace_packed (t0, vui, qid, txvq, last_used_idx); - last_used_idx = (last_used_idx + 1) & mask; - n_trace--; - vlib_set_trace_count (vm, node, n_trace); - } - left--; - b++; - } - } - - /* - * Give buffers back to driver. - */ - vhost_user_mark_desc_consumed (vui, txvq, desc_head, n_descs_processed); - - /* interrupt (call) handling */ - if ((txvq->callfd_idx != ~0) && - (txvq->avail_event->flags != VRING_EVENT_F_DISABLE)) - { - txvq->n_since_last_int += n_rx_packets; - if (txvq->n_since_last_int > vum->coalesce_frames) - vhost_user_send_call (vm, vui, txvq); - } - - /* increase rx counters */ - vlib_increment_combined_counter - (vnet_main.interface_main.combined_sw_if_counters - + VNET_INTERFACE_COUNTER_RX, vm->thread_index, vui->sw_if_index, - n_rx_packets, n_rx_bytes); - - vnet_device_increment_rx_packets (vm->thread_index, n_rx_packets); - - if (PREDICT_FALSE (buffers_used < buffers_required)) - vlib_buffer_free (vm, next, buffers_required - buffers_used); - -done: - return n_rx_packets; -} - -VLIB_NODE_FN (vhost_user_input_node) (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) -{ - vhost_user_main_t *vum = &vhost_user_main; - uword n_rx_packets = 0; - vhost_user_intf_t *vui; - vnet_hw_if_rxq_poll_vector_t *pv = vnet_hw_if_get_rxq_poll_vector (vm, node); - vnet_hw_if_rxq_poll_vector_t *pve; - - vec_foreach (pve, pv) - { - vui = pool_elt_at_index (vum->vhost_user_interfaces, pve->dev_instance); - if (vhost_user_is_packed_ring_supported (vui)) - { - if (vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_CSUM)) - n_rx_packets += vhost_user_if_input_packed ( - vm, vum, vui, pve->queue_id, node, 1); - else - n_rx_packets += vhost_user_if_input_packed ( - vm, vum, vui, pve->queue_id, node, 0); - } - else - { - if (vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_CSUM)) - n_rx_packets += - vhost_user_if_input (vm, vum, vui, pve->queue_id, node, 1); - else - n_rx_packets += - vhost_user_if_input (vm, vum, vui, pve->queue_id, node, 0); - } - } - - return n_rx_packets; -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (vhost_user_input_node) = { - .type = VLIB_NODE_TYPE_INPUT, - .name = "vhost-user-input", - .sibling_of = "device-input", - .flags = VLIB_NODE_FLAG_TRACE_SUPPORTED, - - /* Will be enabled if/when hardware is detected. */ - .state = VLIB_NODE_STATE_DISABLED, - - .format_buffer = format_ethernet_header_with_length, - .format_trace = format_vhost_trace, - - .n_errors = VHOST_USER_INPUT_FUNC_N_ERROR, - .error_strings = vhost_user_input_func_error_strings, -}; -/* *INDENT-ON* */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/virtio/vhost_user_output.c b/src/vnet/devices/virtio/vhost_user_output.c deleted file mode 100644 index 3b7bf97c3f8..00000000000 --- a/src/vnet/devices/virtio/vhost_user_output.c +++ /dev/null @@ -1,1145 +0,0 @@ -/* - *------------------------------------------------------------------ - * vhost-user-output - * - * Copyright (c) 2014-2018 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *------------------------------------------------------------------ - */ - -#include <stddef.h> -#include <fcntl.h> /* for open */ -#include <sys/ioctl.h> -#include <sys/socket.h> -#include <sys/un.h> -#include <sys/stat.h> -#include <sys/types.h> -#include <sys/uio.h> /* for iovec */ -#include <netinet/in.h> -#include <sys/vfs.h> - -#include <linux/if_arp.h> -#include <linux/if_tun.h> - -#include <vlib/vlib.h> -#include <vlib/unix/unix.h> - -#include <vnet/ethernet/ethernet.h> -#include <vnet/devices/devices.h> -#include <vnet/feature/feature.h> -#include <vnet/ip/ip_psh_cksum.h> - -#include <vnet/devices/virtio/vhost_user.h> -#include <vnet/devices/virtio/vhost_user_inline.h> - -#include <vnet/gso/hdr_offset_parser.h> -/* - * On the transmit side, we keep processing the buffers from vlib in the while - * loop and prepare the copy order to be executed later. However, the static - * array which we keep the copy order is limited to VHOST_USER_COPY_ARRAY_N - * entries. In order to not corrupt memory, we have to do the copy when the - * static array reaches the copy threshold. We subtract 40 in case the code - * goes into the inner loop for a maximum of 64k frames which may require - * more array entries. We subtract 200 because our default buffer size is - * 2048 and the default desc len is likely 1536. While it takes less than 40 - * vlib buffers for the jumbo frame, it may take twice as much descriptors - * for the same jumbo frame. Use 200 for the extra head room. - */ -#define VHOST_USER_TX_COPY_THRESHOLD (VHOST_USER_COPY_ARRAY_N - 200) - -extern vnet_device_class_t vhost_user_device_class; - -#define foreach_vhost_user_tx_func_error \ - _(NONE, "no error") \ - _(NOT_READY, "vhost vring not ready") \ - _(DOWN, "vhost interface is down") \ - _(PKT_DROP_NOBUF, "tx packet drops (no available descriptors)") \ - _(PKT_DROP_NOMRG, "tx packet drops (cannot merge descriptors)") \ - _(MMAP_FAIL, "mmap failure") \ - _(INDIRECT_OVERFLOW, "indirect descriptor table overflow") - -typedef enum -{ -#define _(f,s) VHOST_USER_TX_FUNC_ERROR_##f, - foreach_vhost_user_tx_func_error -#undef _ - VHOST_USER_TX_FUNC_N_ERROR, -} vhost_user_tx_func_error_t; - -static __clib_unused char *vhost_user_tx_func_error_strings[] = { -#define _(n,s) s, - foreach_vhost_user_tx_func_error -#undef _ -}; - -static __clib_unused u8 * -format_vhost_user_interface_name (u8 * s, va_list * args) -{ - u32 i = va_arg (*args, u32); - u32 show_dev_instance = ~0; - vhost_user_main_t *vum = &vhost_user_main; - - if (i < vec_len (vum->show_dev_instance_by_real_dev_instance)) - show_dev_instance = vum->show_dev_instance_by_real_dev_instance[i]; - - if (show_dev_instance != ~0) - i = show_dev_instance; - - s = format (s, "VirtualEthernet0/0/%d", i); - return s; -} - -static __clib_unused int -vhost_user_name_renumber (vnet_hw_interface_t * hi, u32 new_dev_instance) -{ - // FIXME: check if the new dev instance is already used - vhost_user_main_t *vum = &vhost_user_main; - vhost_user_intf_t *vui = pool_elt_at_index (vum->vhost_user_interfaces, - hi->dev_instance); - - vec_validate_init_empty (vum->show_dev_instance_by_real_dev_instance, - hi->dev_instance, ~0); - - vum->show_dev_instance_by_real_dev_instance[hi->dev_instance] = - new_dev_instance; - - vu_log_debug (vui, "renumbered vhost-user interface dev_instance %d to %d", - hi->dev_instance, new_dev_instance); - - return 0; -} - -static_always_inline void -vhost_user_tx_trace (vhost_trace_t * t, - vhost_user_intf_t * vui, u16 qid, - vlib_buffer_t * b, vhost_user_vring_t * rxvq) -{ - vhost_user_main_t *vum = &vhost_user_main; - u32 last_avail_idx = rxvq->last_avail_idx; - u32 desc_current = rxvq->avail->ring[last_avail_idx & rxvq->qsz_mask]; - vnet_virtio_vring_desc_t *hdr_desc = 0; - u32 hint = 0; - - clib_memset (t, 0, sizeof (*t)); - t->device_index = vui - vum->vhost_user_interfaces; - t->qid = qid; - - hdr_desc = &rxvq->desc[desc_current]; - if (rxvq->desc[desc_current].flags & VRING_DESC_F_INDIRECT) - { - t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_INDIRECT; - /* Header is the first here */ - hdr_desc = map_guest_mem (vui, rxvq->desc[desc_current].addr, &hint); - } - if (rxvq->desc[desc_current].flags & VRING_DESC_F_NEXT) - { - t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SIMPLE_CHAINED; - } - if (!(rxvq->desc[desc_current].flags & VRING_DESC_F_NEXT) && - !(rxvq->desc[desc_current].flags & VRING_DESC_F_INDIRECT)) - { - t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SINGLE_DESC; - } - - t->first_desc_len = hdr_desc ? hdr_desc->len : 0; -} - -static_always_inline u32 -vhost_user_tx_copy (vhost_user_intf_t * vui, vhost_copy_t * cpy, - u16 copy_len, u32 * map_hint) -{ - void *dst0, *dst1, *dst2, *dst3; - if (PREDICT_TRUE (copy_len >= 4)) - { - if (PREDICT_FALSE (!(dst2 = map_guest_mem (vui, cpy[0].dst, map_hint)))) - return 1; - if (PREDICT_FALSE (!(dst3 = map_guest_mem (vui, cpy[1].dst, map_hint)))) - return 1; - while (PREDICT_TRUE (copy_len >= 4)) - { - dst0 = dst2; - dst1 = dst3; - - if (PREDICT_FALSE - (!(dst2 = map_guest_mem (vui, cpy[2].dst, map_hint)))) - return 1; - if (PREDICT_FALSE - (!(dst3 = map_guest_mem (vui, cpy[3].dst, map_hint)))) - return 1; - - clib_prefetch_load ((void *) cpy[2].src); - clib_prefetch_load ((void *) cpy[3].src); - - clib_memcpy_fast (dst0, (void *) cpy[0].src, cpy[0].len); - clib_memcpy_fast (dst1, (void *) cpy[1].src, cpy[1].len); - - vhost_user_log_dirty_pages_2 (vui, cpy[0].dst, cpy[0].len, 1); - vhost_user_log_dirty_pages_2 (vui, cpy[1].dst, cpy[1].len, 1); - copy_len -= 2; - cpy += 2; - } - } - while (copy_len) - { - if (PREDICT_FALSE (!(dst0 = map_guest_mem (vui, cpy->dst, map_hint)))) - return 1; - clib_memcpy_fast (dst0, (void *) cpy->src, cpy->len); - vhost_user_log_dirty_pages_2 (vui, cpy->dst, cpy->len, 1); - copy_len -= 1; - cpy += 1; - } - return 0; -} - -static_always_inline void -vhost_user_handle_tx_offload (vhost_user_intf_t *vui, vlib_buffer_t *b, - vnet_virtio_net_hdr_t *hdr) -{ - generic_header_offset_t gho = { 0 }; - int is_ip4 = b->flags & VNET_BUFFER_F_IS_IP4; - int is_ip6 = b->flags & VNET_BUFFER_F_IS_IP6; - vnet_buffer_oflags_t oflags = vnet_buffer (b)->oflags; - u16 psh_cksum = 0; - ip4_header_t *ip4 = 0; - ip6_header_t *ip6 = 0; - - ASSERT (!(is_ip4 && is_ip6)); - vnet_generic_header_offset_parser (b, &gho, 1 /* l2 */ , is_ip4, is_ip6); - if (oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM) - { - ip4 = - (ip4_header_t *) (vlib_buffer_get_current (b) + gho.l3_hdr_offset); - ip4->checksum = ip4_header_checksum (ip4); - psh_cksum = ip4_pseudo_header_cksum (ip4); - } - else - { - ip6 = (ip6_header_t *) (vlib_buffer_get_current (b) + gho.l3_hdr_offset); - psh_cksum = ip6_pseudo_header_cksum (ip6); - } - - /* checksum offload */ - if (oflags & VNET_BUFFER_OFFLOAD_F_UDP_CKSUM) - { - udp_header_t *udp = - (udp_header_t *) (vlib_buffer_get_current (b) + gho.l4_hdr_offset); - udp->checksum = psh_cksum; - hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; - hdr->csum_start = gho.l4_hdr_offset; - hdr->csum_offset = offsetof (udp_header_t, checksum); - } - else if (oflags & VNET_BUFFER_OFFLOAD_F_TCP_CKSUM) - { - tcp_header_t *tcp = - (tcp_header_t *) (vlib_buffer_get_current (b) + gho.l4_hdr_offset); - tcp->checksum = psh_cksum; - hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; - hdr->csum_start = gho.l4_hdr_offset; - hdr->csum_offset = offsetof (tcp_header_t, checksum); - } - - /* GSO offload */ - if (b->flags & VNET_BUFFER_F_GSO) - { - if (oflags & VNET_BUFFER_OFFLOAD_F_TCP_CKSUM) - { - if (is_ip4 && - (vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_TSO4))) - { - hdr->gso_size = vnet_buffer2 (b)->gso_size; - hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; - } - else if (is_ip6 && - (vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_TSO6))) - { - hdr->gso_size = vnet_buffer2 (b)->gso_size; - hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; - } - } - else if ((vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_UFO)) && - (oflags & VNET_BUFFER_OFFLOAD_F_UDP_CKSUM)) - { - hdr->gso_size = vnet_buffer2 (b)->gso_size; - hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP; - } - } -} - -static_always_inline void -vhost_user_mark_desc_available (vlib_main_t * vm, vhost_user_intf_t * vui, - vhost_user_vring_t * rxvq, - u16 * n_descs_processed, u8 chained, - vlib_frame_t * frame, u32 n_left) -{ - u16 desc_idx, flags; - vnet_virtio_vring_packed_desc_t *desc_table = rxvq->packed_desc; - u16 last_used_idx = rxvq->last_used_idx; - - if (PREDICT_FALSE (*n_descs_processed == 0)) - return; - - if (rxvq->used_wrap_counter) - flags = desc_table[last_used_idx & rxvq->qsz_mask].flags | - (VRING_DESC_F_AVAIL | VRING_DESC_F_USED); - else - flags = desc_table[last_used_idx & rxvq->qsz_mask].flags & - ~(VRING_DESC_F_AVAIL | VRING_DESC_F_USED); - - vhost_user_advance_last_used_idx (rxvq); - - for (desc_idx = 1; desc_idx < *n_descs_processed; desc_idx++) - { - if (rxvq->used_wrap_counter) - desc_table[rxvq->last_used_idx & rxvq->qsz_mask].flags |= - (VRING_DESC_F_AVAIL | VRING_DESC_F_USED); - else - desc_table[rxvq->last_used_idx & rxvq->qsz_mask].flags &= - ~(VRING_DESC_F_AVAIL | VRING_DESC_F_USED); - vhost_user_advance_last_used_idx (rxvq); - } - - desc_table[last_used_idx & rxvq->qsz_mask].flags = flags; - - *n_descs_processed = 0; - - if (chained) - { - vnet_virtio_vring_packed_desc_t *desc_table = rxvq->packed_desc; - - while (desc_table[rxvq->last_used_idx & rxvq->qsz_mask].flags & - VRING_DESC_F_NEXT) - vhost_user_advance_last_used_idx (rxvq); - - /* Advance past the current chained table entries */ - vhost_user_advance_last_used_idx (rxvq); - } - - /* interrupt (call) handling */ - if ((rxvq->callfd_idx != ~0) && - (rxvq->avail_event->flags != VRING_EVENT_F_DISABLE)) - { - vhost_user_main_t *vum = &vhost_user_main; - - rxvq->n_since_last_int += frame->n_vectors - n_left; - if (rxvq->n_since_last_int > vum->coalesce_frames) - vhost_user_send_call (vm, vui, rxvq); - } -} - -static_always_inline void -vhost_user_tx_trace_packed (vhost_trace_t * t, vhost_user_intf_t * vui, - u16 qid, vlib_buffer_t * b, - vhost_user_vring_t * rxvq) -{ - vhost_user_main_t *vum = &vhost_user_main; - u32 last_avail_idx = rxvq->last_avail_idx; - u32 desc_current = last_avail_idx & rxvq->qsz_mask; - vnet_virtio_vring_packed_desc_t *hdr_desc = 0; - u32 hint = 0; - - clib_memset (t, 0, sizeof (*t)); - t->device_index = vui - vum->vhost_user_interfaces; - t->qid = qid; - - hdr_desc = &rxvq->packed_desc[desc_current]; - if (rxvq->packed_desc[desc_current].flags & VRING_DESC_F_INDIRECT) - { - t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_INDIRECT; - /* Header is the first here */ - hdr_desc = map_guest_mem (vui, rxvq->packed_desc[desc_current].addr, - &hint); - } - if (rxvq->packed_desc[desc_current].flags & VRING_DESC_F_NEXT) - { - t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SIMPLE_CHAINED; - } - if (!(rxvq->packed_desc[desc_current].flags & VRING_DESC_F_NEXT) && - !(rxvq->packed_desc[desc_current].flags & VRING_DESC_F_INDIRECT)) - { - t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SINGLE_DESC; - } - - t->first_desc_len = hdr_desc ? hdr_desc->len : 0; -} - -static_always_inline uword -vhost_user_device_class_packed (vlib_main_t *vm, vlib_node_runtime_t *node, - vlib_frame_t *frame, vhost_user_intf_t *vui, - vhost_user_vring_t *rxvq) -{ - u32 *buffers = vlib_frame_vector_args (frame); - u32 n_left = frame->n_vectors; - vhost_user_main_t *vum = &vhost_user_main; - u32 qid = rxvq->qid; - u8 error; - u32 thread_index = vm->thread_index; - vhost_cpu_t *cpu = &vum->cpus[thread_index]; - u32 map_hint = 0; - u8 retry = 8; - u16 copy_len; - u16 tx_headers_len; - vnet_virtio_vring_packed_desc_t *desc_table; - u32 or_flags; - u16 desc_head, desc_index, desc_len; - u16 n_descs_processed; - u8 indirect, chained; - -retry: - error = VHOST_USER_TX_FUNC_ERROR_NONE; - tx_headers_len = 0; - copy_len = 0; - n_descs_processed = 0; - - while (n_left > 0) - { - vlib_buffer_t *b0, *current_b0; - uword buffer_map_addr; - u32 buffer_len; - u16 bytes_left; - u32 total_desc_len = 0; - u16 n_entries = 0; - - indirect = 0; - chained = 0; - if (PREDICT_TRUE (n_left > 1)) - vlib_prefetch_buffer_with_index (vm, buffers[1], LOAD); - - b0 = vlib_get_buffer (vm, buffers[0]); - if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) - { - cpu->current_trace = vlib_add_trace (vm, node, b0, - sizeof (*cpu->current_trace)); - vhost_user_tx_trace_packed (cpu->current_trace, vui, qid / 2, b0, - rxvq); - } - - desc_table = rxvq->packed_desc; - desc_head = desc_index = rxvq->last_avail_idx & rxvq->qsz_mask; - if (PREDICT_FALSE (!vhost_user_packed_desc_available (rxvq, desc_head))) - { - error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF; - goto done; - } - /* - * Go deeper in case of indirect descriptor. - * To test it, turn off mrg_rxbuf. - */ - if (desc_table[desc_head].flags & VRING_DESC_F_INDIRECT) - { - indirect = 1; - if (PREDICT_FALSE (desc_table[desc_head].len < - sizeof (vnet_virtio_vring_packed_desc_t))) - { - error = VHOST_USER_TX_FUNC_ERROR_INDIRECT_OVERFLOW; - goto done; - } - n_entries = desc_table[desc_head].len >> 4; - desc_table = map_guest_mem (vui, desc_table[desc_index].addr, - &map_hint); - if (PREDICT_FALSE (desc_table == 0)) - { - error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL; - goto done; - } - desc_index = 0; - } - else if (rxvq->packed_desc[desc_head].flags & VRING_DESC_F_NEXT) - chained = 1; - - desc_len = vui->virtio_net_hdr_sz; - buffer_map_addr = desc_table[desc_index].addr; - buffer_len = desc_table[desc_index].len; - - /* Get a header from the header array */ - vnet_virtio_net_hdr_mrg_rxbuf_t *hdr = &cpu->tx_headers[tx_headers_len]; - tx_headers_len++; - hdr->hdr.flags = 0; - hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE; - hdr->num_buffers = 1; - - or_flags = (b0->flags & VNET_BUFFER_F_OFFLOAD); - - /* Guest supports csum offload and buffer requires checksum offload? */ - if (or_flags && - (vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_CSUM))) - vhost_user_handle_tx_offload (vui, b0, &hdr->hdr); - - /* Prepare a copy order executed later for the header */ - ASSERT (copy_len < VHOST_USER_COPY_ARRAY_N); - vhost_copy_t *cpy = &cpu->copy[copy_len]; - copy_len++; - cpy->len = vui->virtio_net_hdr_sz; - cpy->dst = buffer_map_addr; - cpy->src = (uword) hdr; - - buffer_map_addr += vui->virtio_net_hdr_sz; - buffer_len -= vui->virtio_net_hdr_sz; - bytes_left = b0->current_length; - current_b0 = b0; - while (1) - { - if (buffer_len == 0) - { - /* Get new output */ - if (chained) - { - /* - * Next one is chained - * Test it with both indirect and mrg_rxbuf off - */ - if (PREDICT_FALSE (!(desc_table[desc_index].flags & - VRING_DESC_F_NEXT))) - { - /* - * Last descriptor in chain. - * Dequeue queued descriptors for this packet - */ - vhost_user_dequeue_chained_descs (rxvq, - &n_descs_processed); - error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF; - goto done; - } - vhost_user_advance_last_avail_idx (rxvq); - desc_index = rxvq->last_avail_idx & rxvq->qsz_mask; - n_descs_processed++; - buffer_map_addr = desc_table[desc_index].addr; - buffer_len = desc_table[desc_index].len; - total_desc_len += desc_len; - desc_len = 0; - } - else if (indirect) - { - /* - * Indirect table - * Test it with mrg_rxnuf off - */ - if (PREDICT_TRUE (n_entries > 0)) - n_entries--; - else - { - /* Dequeue queued descriptors for this packet */ - vhost_user_dequeue_chained_descs (rxvq, - &n_descs_processed); - error = VHOST_USER_TX_FUNC_ERROR_INDIRECT_OVERFLOW; - goto done; - } - total_desc_len += desc_len; - desc_index = (desc_index + 1) & rxvq->qsz_mask; - buffer_map_addr = desc_table[desc_index].addr; - buffer_len = desc_table[desc_index].len; - desc_len = 0; - } - else if (vui->virtio_net_hdr_sz == 12) - { - /* - * MRG is available - * This is the default setting for the guest VM - */ - vnet_virtio_net_hdr_mrg_rxbuf_t *hdr = - &cpu->tx_headers[tx_headers_len - 1]; - - desc_table[desc_index].len = desc_len; - vhost_user_advance_last_avail_idx (rxvq); - desc_head = desc_index = - rxvq->last_avail_idx & rxvq->qsz_mask; - hdr->num_buffers++; - n_descs_processed++; - desc_len = 0; - - if (PREDICT_FALSE (!vhost_user_packed_desc_available - (rxvq, desc_index))) - { - /* Dequeue queued descriptors for this packet */ - vhost_user_dequeue_descs (rxvq, hdr, - &n_descs_processed); - error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF; - goto done; - } - - buffer_map_addr = desc_table[desc_index].addr; - buffer_len = desc_table[desc_index].len; - } - else - { - error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOMRG; - goto done; - } - } - - ASSERT (copy_len < VHOST_USER_COPY_ARRAY_N); - vhost_copy_t *cpy = &cpu->copy[copy_len]; - copy_len++; - cpy->len = bytes_left; - cpy->len = (cpy->len > buffer_len) ? buffer_len : cpy->len; - cpy->dst = buffer_map_addr; - cpy->src = (uword) vlib_buffer_get_current (current_b0) + - current_b0->current_length - bytes_left; - - bytes_left -= cpy->len; - buffer_len -= cpy->len; - buffer_map_addr += cpy->len; - desc_len += cpy->len; - - clib_prefetch_load (&rxvq->packed_desc); - - /* Check if vlib buffer has more data. If not, get more or break */ - if (PREDICT_TRUE (!bytes_left)) - { - if (PREDICT_FALSE - (current_b0->flags & VLIB_BUFFER_NEXT_PRESENT)) - { - current_b0 = vlib_get_buffer (vm, current_b0->next_buffer); - bytes_left = current_b0->current_length; - } - else - { - /* End of packet */ - break; - } - } - } - - /* Move from available to used ring */ - total_desc_len += desc_len; - rxvq->packed_desc[desc_head].len = total_desc_len; - - vhost_user_advance_last_avail_table_idx (vui, rxvq, chained); - n_descs_processed++; - - if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) - cpu->current_trace->hdr = cpu->tx_headers[tx_headers_len - 1]; - - n_left--; - - /* - * Do the copy periodically to prevent - * cpu->copy array overflow and corrupt memory - */ - if (PREDICT_FALSE (copy_len >= VHOST_USER_TX_COPY_THRESHOLD) || chained) - { - if (PREDICT_FALSE (vhost_user_tx_copy (vui, cpu->copy, copy_len, - &map_hint))) - vlib_error_count (vm, node->node_index, - VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL, 1); - copy_len = 0; - - /* give buffers back to driver */ - vhost_user_mark_desc_available (vm, vui, rxvq, &n_descs_processed, - chained, frame, n_left); - } - - buffers++; - } - -done: - if (PREDICT_TRUE (copy_len)) - { - if (PREDICT_FALSE (vhost_user_tx_copy (vui, cpu->copy, copy_len, - &map_hint))) - vlib_error_count (vm, node->node_index, - VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL, 1); - - vhost_user_mark_desc_available (vm, vui, rxvq, &n_descs_processed, - chained, frame, n_left); - } - - /* - * When n_left is set, error is always set to something too. - * In case error is due to lack of remaining buffers, we go back up and - * retry. - * The idea is that it is better to waste some time on packets - * that have been processed already than dropping them and get - * more fresh packets with a good likelyhood that they will be dropped too. - * This technique also gives more time to VM driver to pick-up packets. - * In case the traffic flows from physical to virtual interfaces, this - * technique will end-up leveraging the physical NIC buffer in order to - * absorb the VM's CPU jitter. - */ - if (n_left && (error == VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF) && retry) - { - retry--; - goto retry; - } - - clib_spinlock_unlock (&rxvq->vring_lock); - - if (PREDICT_FALSE (n_left && error != VHOST_USER_TX_FUNC_ERROR_NONE)) - { - vlib_error_count (vm, node->node_index, error, n_left); - vlib_increment_simple_counter - (vnet_main.interface_main.sw_if_counters + - VNET_INTERFACE_COUNTER_DROP, thread_index, vui->sw_if_index, n_left); - } - - vlib_buffer_free (vm, vlib_frame_vector_args (frame), frame->n_vectors); - return frame->n_vectors; -} - -VNET_DEVICE_CLASS_TX_FN (vhost_user_device_class) (vlib_main_t * vm, - vlib_node_runtime_t * - node, vlib_frame_t * frame) -{ - u32 *buffers = vlib_frame_vector_args (frame); - u32 n_left = frame->n_vectors; - vhost_user_main_t *vum = &vhost_user_main; - vnet_interface_output_runtime_t *rd = (void *) node->runtime_data; - vhost_user_intf_t *vui = - pool_elt_at_index (vum->vhost_user_interfaces, rd->dev_instance); - u32 qid; - vhost_user_vring_t *rxvq; - u8 error; - u32 thread_index = vm->thread_index; - vhost_cpu_t *cpu = &vum->cpus[thread_index]; - u32 map_hint = 0; - u8 retry = 8; - u16 copy_len; - u16 tx_headers_len; - u32 or_flags; - vnet_hw_if_tx_frame_t *tf = vlib_frame_scalar_args (frame); - - if (PREDICT_FALSE (!vui->admin_up)) - { - error = VHOST_USER_TX_FUNC_ERROR_DOWN; - goto done3; - } - - if (PREDICT_FALSE (!vui->is_ready)) - { - error = VHOST_USER_TX_FUNC_ERROR_NOT_READY; - goto done3; - } - - qid = VHOST_VRING_IDX_RX (tf->queue_id); - rxvq = &vui->vrings[qid]; - ASSERT (tf->queue_id == rxvq->qid); - - if (PREDICT_FALSE (rxvq->avail == 0)) - { - error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL; - goto done3; - } - if (tf->shared_queue) - clib_spinlock_lock (&rxvq->vring_lock); - - if (vhost_user_is_packed_ring_supported (vui)) - return (vhost_user_device_class_packed (vm, node, frame, vui, rxvq)); - -retry: - error = VHOST_USER_TX_FUNC_ERROR_NONE; - tx_headers_len = 0; - copy_len = 0; - while (n_left > 0) - { - vlib_buffer_t *b0, *current_b0; - u16 desc_head, desc_index, desc_len; - vnet_virtio_vring_desc_t *desc_table; - uword buffer_map_addr; - u32 buffer_len; - u16 bytes_left; - - if (PREDICT_TRUE (n_left > 1)) - vlib_prefetch_buffer_with_index (vm, buffers[1], LOAD); - - b0 = vlib_get_buffer (vm, buffers[0]); - - if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) - { - cpu->current_trace = vlib_add_trace (vm, node, b0, - sizeof (*cpu->current_trace)); - vhost_user_tx_trace (cpu->current_trace, vui, qid / 2, b0, rxvq); - } - - if (PREDICT_FALSE (rxvq->last_avail_idx == rxvq->avail->idx)) - { - error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF; - goto done; - } - - desc_table = rxvq->desc; - desc_head = desc_index = - rxvq->avail->ring[rxvq->last_avail_idx & rxvq->qsz_mask]; - - /* Go deeper in case of indirect descriptor - * I don't know of any driver providing indirect for RX. */ - if (PREDICT_FALSE (rxvq->desc[desc_head].flags & VRING_DESC_F_INDIRECT)) - { - if (PREDICT_FALSE (rxvq->desc[desc_head].len < - sizeof (vnet_virtio_vring_desc_t))) - { - error = VHOST_USER_TX_FUNC_ERROR_INDIRECT_OVERFLOW; - goto done; - } - if (PREDICT_FALSE - (!(desc_table = - map_guest_mem (vui, rxvq->desc[desc_index].addr, - &map_hint)))) - { - error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL; - goto done; - } - desc_index = 0; - } - - desc_len = vui->virtio_net_hdr_sz; - buffer_map_addr = desc_table[desc_index].addr; - buffer_len = desc_table[desc_index].len; - - { - // Get a header from the header array - vnet_virtio_net_hdr_mrg_rxbuf_t *hdr = - &cpu->tx_headers[tx_headers_len]; - tx_headers_len++; - hdr->hdr.flags = 0; - hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE; - hdr->num_buffers = 1; //This is local, no need to check - - or_flags = (b0->flags & VNET_BUFFER_F_OFFLOAD); - - /* Guest supports csum offload and buffer requires checksum offload? */ - if (or_flags - && (vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_CSUM))) - vhost_user_handle_tx_offload (vui, b0, &hdr->hdr); - - // Prepare a copy order executed later for the header - ASSERT (copy_len < VHOST_USER_COPY_ARRAY_N); - vhost_copy_t *cpy = &cpu->copy[copy_len]; - copy_len++; - cpy->len = vui->virtio_net_hdr_sz; - cpy->dst = buffer_map_addr; - cpy->src = (uword) hdr; - } - - buffer_map_addr += vui->virtio_net_hdr_sz; - buffer_len -= vui->virtio_net_hdr_sz; - bytes_left = b0->current_length; - current_b0 = b0; - while (1) - { - if (buffer_len == 0) - { //Get new output - if (desc_table[desc_index].flags & VRING_DESC_F_NEXT) - { - //Next one is chained - desc_index = desc_table[desc_index].next; - buffer_map_addr = desc_table[desc_index].addr; - buffer_len = desc_table[desc_index].len; - } - else if (vui->virtio_net_hdr_sz == 12) //MRG is available - { - vnet_virtio_net_hdr_mrg_rxbuf_t *hdr = - &cpu->tx_headers[tx_headers_len - 1]; - - //Move from available to used buffer - rxvq->used->ring[rxvq->last_used_idx & rxvq->qsz_mask].id = - desc_head; - rxvq->used->ring[rxvq->last_used_idx & rxvq->qsz_mask].len = - desc_len; - vhost_user_log_dirty_ring (vui, rxvq, - ring[rxvq->last_used_idx & - rxvq->qsz_mask]); - - rxvq->last_avail_idx++; - rxvq->last_used_idx++; - hdr->num_buffers++; - desc_len = 0; - - if (PREDICT_FALSE - (rxvq->last_avail_idx == rxvq->avail->idx)) - { - //Dequeue queued descriptors for this packet - rxvq->last_used_idx -= hdr->num_buffers - 1; - rxvq->last_avail_idx -= hdr->num_buffers - 1; - error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF; - goto done; - } - - desc_table = rxvq->desc; - desc_head = desc_index = - rxvq->avail->ring[rxvq->last_avail_idx & rxvq->qsz_mask]; - if (PREDICT_FALSE - (rxvq->desc[desc_head].flags & VRING_DESC_F_INDIRECT)) - { - //It is seriously unlikely that a driver will put indirect descriptor - //after non-indirect descriptor. - if (PREDICT_FALSE (rxvq->desc[desc_head].len < - sizeof (vnet_virtio_vring_desc_t))) - { - error = VHOST_USER_TX_FUNC_ERROR_INDIRECT_OVERFLOW; - goto done; - } - if (PREDICT_FALSE - (!(desc_table = - map_guest_mem (vui, - rxvq->desc[desc_index].addr, - &map_hint)))) - { - error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL; - goto done; - } - desc_index = 0; - } - buffer_map_addr = desc_table[desc_index].addr; - buffer_len = desc_table[desc_index].len; - } - else - { - error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOMRG; - goto done; - } - } - - { - ASSERT (copy_len < VHOST_USER_COPY_ARRAY_N); - vhost_copy_t *cpy = &cpu->copy[copy_len]; - copy_len++; - cpy->len = bytes_left; - cpy->len = (cpy->len > buffer_len) ? buffer_len : cpy->len; - cpy->dst = buffer_map_addr; - cpy->src = (uword) vlib_buffer_get_current (current_b0) + - current_b0->current_length - bytes_left; - - bytes_left -= cpy->len; - buffer_len -= cpy->len; - buffer_map_addr += cpy->len; - desc_len += cpy->len; - - clib_prefetch_load (&rxvq->desc); - } - - // Check if vlib buffer has more data. If not, get more or break. - if (PREDICT_TRUE (!bytes_left)) - { - if (PREDICT_FALSE - (current_b0->flags & VLIB_BUFFER_NEXT_PRESENT)) - { - current_b0 = vlib_get_buffer (vm, current_b0->next_buffer); - bytes_left = current_b0->current_length; - } - else - { - //End of packet - break; - } - } - } - - //Move from available to used ring - rxvq->used->ring[rxvq->last_used_idx & rxvq->qsz_mask].id = desc_head; - rxvq->used->ring[rxvq->last_used_idx & rxvq->qsz_mask].len = desc_len; - vhost_user_log_dirty_ring (vui, rxvq, - ring[rxvq->last_used_idx & rxvq->qsz_mask]); - rxvq->last_avail_idx++; - rxvq->last_used_idx++; - - if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) - { - cpu->current_trace->hdr = cpu->tx_headers[tx_headers_len - 1]; - } - - n_left--; //At the end for error counting when 'goto done' is invoked - - /* - * Do the copy periodically to prevent - * cpu->copy array overflow and corrupt memory - */ - if (PREDICT_FALSE (copy_len >= VHOST_USER_TX_COPY_THRESHOLD)) - { - if (PREDICT_FALSE (vhost_user_tx_copy (vui, cpu->copy, copy_len, - &map_hint))) - { - vlib_error_count (vm, node->node_index, - VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL, 1); - } - copy_len = 0; - - /* give buffers back to driver */ - CLIB_MEMORY_BARRIER (); - rxvq->used->idx = rxvq->last_used_idx; - vhost_user_log_dirty_ring (vui, rxvq, idx); - } - buffers++; - } - -done: - //Do the memory copies - if (PREDICT_FALSE (vhost_user_tx_copy (vui, cpu->copy, copy_len, - &map_hint))) - { - vlib_error_count (vm, node->node_index, - VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL, 1); - } - - CLIB_MEMORY_BARRIER (); - rxvq->used->idx = rxvq->last_used_idx; - vhost_user_log_dirty_ring (vui, rxvq, idx); - - /* - * When n_left is set, error is always set to something too. - * In case error is due to lack of remaining buffers, we go back up and - * retry. - * The idea is that it is better to waste some time on packets - * that have been processed already than dropping them and get - * more fresh packets with a good likelihood that they will be dropped too. - * This technique also gives more time to VM driver to pick-up packets. - * In case the traffic flows from physical to virtual interfaces, this - * technique will end-up leveraging the physical NIC buffer in order to - * absorb the VM's CPU jitter. - */ - if (n_left && (error == VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF) && retry) - { - retry--; - goto retry; - } - - /* interrupt (call) handling */ - if ((rxvq->callfd_idx != ~0) && - !(rxvq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) - { - rxvq->n_since_last_int += frame->n_vectors - n_left; - - if (rxvq->n_since_last_int > vum->coalesce_frames) - vhost_user_send_call (vm, vui, rxvq); - } - - clib_spinlock_unlock (&rxvq->vring_lock); - -done3: - if (PREDICT_FALSE (n_left && error != VHOST_USER_TX_FUNC_ERROR_NONE)) - { - vlib_error_count (vm, node->node_index, error, n_left); - vlib_increment_simple_counter - (vnet_main.interface_main.sw_if_counters - + VNET_INTERFACE_COUNTER_DROP, - thread_index, vui->sw_if_index, n_left); - } - - vlib_buffer_free (vm, vlib_frame_vector_args (frame), frame->n_vectors); - return frame->n_vectors; -} - -static __clib_unused clib_error_t * -vhost_user_interface_rx_mode_change (vnet_main_t * vnm, u32 hw_if_index, - u32 qid, vnet_hw_if_rx_mode mode) -{ - vlib_main_t *vm = vnm->vlib_main; - vnet_hw_interface_t *hif = vnet_get_hw_interface (vnm, hw_if_index); - vhost_user_main_t *vum = &vhost_user_main; - vhost_user_intf_t *vui = - pool_elt_at_index (vum->vhost_user_interfaces, hif->dev_instance); - vhost_user_vring_t *txvq = &vui->vrings[VHOST_VRING_IDX_TX (qid)]; - vhost_cpu_t *cpu; - - if (mode == txvq->mode) - return 0; - - if ((mode != VNET_HW_IF_RX_MODE_POLLING) && - (mode != VNET_HW_IF_RX_MODE_ADAPTIVE) && - (mode != VNET_HW_IF_RX_MODE_INTERRUPT)) - { - vu_log_err (vui, "unhandled mode %d changed for if %d queue %d", mode, - hw_if_index, qid); - return clib_error_return (0, "unsupported"); - } - - if (txvq->thread_index == ~0) - return clib_error_return (0, "Queue initialization is not finished yet"); - - cpu = vec_elt_at_index (vum->cpus, txvq->thread_index); - if ((mode == VNET_HW_IF_RX_MODE_INTERRUPT) || - (mode == VNET_HW_IF_RX_MODE_ADAPTIVE)) - { - if (txvq->kickfd_idx == ~0) - { - // We cannot support interrupt mode if the driver opts out - return clib_error_return (0, "Driver does not support interrupt"); - } - if (txvq->mode == VNET_HW_IF_RX_MODE_POLLING) - { - ASSERT (cpu->polling_q_count != 0); - if (cpu->polling_q_count) - cpu->polling_q_count--; - vum->ifq_count++; - // Start the timer if this is the first encounter on interrupt - // interface/queue - if ((vum->ifq_count == 1) && - ((vum->coalesce_time > 0.0) || (vum->coalesce_frames > 0))) - vlib_process_signal_event (vm, - vhost_user_send_interrupt_node.index, - VHOST_USER_EVENT_START_TIMER, 0); - } - } - else if (mode == VNET_HW_IF_RX_MODE_POLLING) - { - if (((txvq->mode == VNET_HW_IF_RX_MODE_INTERRUPT) || - (txvq->mode == VNET_HW_IF_RX_MODE_ADAPTIVE)) && vum->ifq_count) - { - cpu->polling_q_count++; - vum->ifq_count--; - // Stop the timer if there is no more interrupt interface/queue - if (vum->ifq_count == 0) - vlib_process_signal_event (vm, - vhost_user_send_interrupt_node.index, - VHOST_USER_EVENT_STOP_TIMER, 0); - } - } - - txvq->mode = mode; - vhost_user_set_operation_mode (vui, txvq); - - return 0; -} - -static __clib_unused clib_error_t * -vhost_user_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, - u32 flags) -{ - vnet_hw_interface_t *hif = vnet_get_hw_interface (vnm, hw_if_index); - vhost_user_main_t *vum = &vhost_user_main; - vhost_user_intf_t *vui = - pool_elt_at_index (vum->vhost_user_interfaces, hif->dev_instance); - u8 link_old, link_new; - - link_old = vui_is_link_up (vui); - - vui->admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0; - - link_new = vui_is_link_up (vui); - - if (link_old != link_new) - vnet_hw_interface_set_flags (vnm, vui->hw_if_index, link_new ? - VNET_HW_INTERFACE_FLAG_LINK_UP : 0); - - return /* no error */ 0; -} - -/* *INDENT-OFF* */ -VNET_DEVICE_CLASS (vhost_user_device_class) = { - .name = "vhost-user", - .tx_function_n_errors = VHOST_USER_TX_FUNC_N_ERROR, - .tx_function_error_strings = vhost_user_tx_func_error_strings, - .format_device_name = format_vhost_user_interface_name, - .name_renumber = vhost_user_name_renumber, - .admin_up_down_function = vhost_user_interface_admin_up_down, - .rx_mode_change_function = vhost_user_interface_rx_mode_change, - .format_tx_trace = format_vhost_trace, -}; - -/* *INDENT-ON* */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/virtio/virtio.api b/src/vnet/devices/virtio/virtio.api index bbe2341a001..a11492ec258 100644 --- a/src/vnet/devices/virtio/virtio.api +++ b/src/vnet/devices/virtio/virtio.api @@ -56,7 +56,7 @@ define virtio_pci_create_reply vl_api_interface_index_t sw_if_index; }; -enum virtio_flags { +enumflag virtio_flags { VIRTIO_API_FLAG_GSO = 1, /* enable gso on the interface */ VIRTIO_API_FLAG_CSUM_OFFLOAD = 2, /* enable checksum offload without gso on the interface */ VIRTIO_API_FLAG_GRO_COALESCE = 4, /* enable packet coalescing on tx side, provided gso enabled */ diff --git a/src/vnet/devices/virtio/virtio.c b/src/vnet/devices/virtio/virtio.c index 33af8b8c455..d2302fa1dc4 100644 --- a/src/vnet/devices/virtio/virtio.c +++ b/src/vnet/devices/virtio/virtio.c @@ -19,7 +19,11 @@ #include <sys/stat.h> #include <fcntl.h> #include <net/if.h> +#ifdef __linux__ #include <linux/if_tun.h> +#elif __FreeBSD__ +#include <net/if_tun.h> +#endif /* __linux__ */ #include <sys/ioctl.h> #include <sys/eventfd.h> @@ -207,7 +211,6 @@ virtio_set_packet_buffering (virtio_if_t * vif, u16 buffering_size) vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, vif->hw_if_index); vnet_virtio_vring_t *vring; clib_error_t *error = 0; - vif->packet_buffering = 1; vec_foreach (vring, vif->txq_vrings) { diff --git a/src/vnet/devices/virtio/virtio_pci_modern.c b/src/vnet/devices/virtio/virtio_pci_modern.c index f7313d84bbd..50a7b392367 100644 --- a/src/vnet/devices/virtio/virtio_pci_modern.c +++ b/src/vnet/devices/virtio/virtio_pci_modern.c @@ -164,9 +164,7 @@ virtio_pci_modern_set_queue_size (vlib_main_t * vm, virtio_if_t * vif, return; } - if (virtio_pci_modern_get_queue_size (vm, vif, queue_id) > queue_size) - virtio_pci_reg_write_u16 (vif, VIRTIO_QUEUE_SIZE_OFFSET (vif), - queue_size); + virtio_pci_reg_write_u16 (vif, VIRTIO_QUEUE_SIZE_OFFSET (vif), queue_size); } static u16 diff --git a/src/vnet/devices/virtio/virtio_pre_input.c b/src/vnet/devices/virtio/virtio_pre_input.c index eb208fd3a39..80cc8d6edb0 100644 --- a/src/vnet/devices/virtio/virtio_pre_input.c +++ b/src/vnet/devices/virtio/virtio_pre_input.c @@ -31,7 +31,7 @@ virtio_pre_input_inline (vlib_main_t *vm, vnet_virtio_vring_t *txq_vring, if (clib_spinlock_trylock (&txq_vring->lockp)) { if (virtio_txq_is_scheduled (txq_vring)) - return 0; + goto unlock; if (packet_coalesce) vnet_gro_flow_table_schedule_node_on_dispatcher ( vm, txq, txq_vring->flow_table); @@ -39,6 +39,7 @@ virtio_pre_input_inline (vlib_main_t *vm, vnet_virtio_vring_t *txq_vring, virtio_vring_buffering_schedule_node_on_dispatcher ( vm, txq, txq_vring->buffering); virtio_txq_set_scheduled (txq_vring); + unlock: clib_spinlock_unlock (&txq_vring->lockp); } } diff --git a/src/vnet/devices/virtio/virtio_process.c b/src/vnet/devices/virtio/virtio_process.c index 18b34e0aa62..13ba590659c 100644 --- a/src/vnet/devices/virtio/virtio_process.c +++ b/src/vnet/devices/virtio/virtio_process.c @@ -70,13 +70,11 @@ virtio_send_interrupt_process (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (virtio_send_interrupt_node) = { .function = virtio_send_interrupt_process, .type = VLIB_NODE_TYPE_PROCESS, .name = "virtio-send-interrupt-process", }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/devices/virtio/virtio_std.h b/src/vnet/devices/virtio/virtio_std.h index 86984339bc2..ec988c08dbb 100644 --- a/src/vnet/devices/virtio/virtio_std.h +++ b/src/vnet/devices/virtio/virtio_std.h @@ -122,7 +122,6 @@ typedef struct /* u16 avail_event; */ } vnet_virtio_vring_used_t; -/* *INDENT-OFF* */ typedef CLIB_PACKED (struct { u64 addr; // packet data buffer address u32 len; // packet data buffer size @@ -170,7 +169,6 @@ typedef CLIB_PACKED (struct { u16 num_buffers; }) vnet_virtio_net_hdr_mrg_rxbuf_t; -/* *INDENT-ON* */ #endif /* diff --git a/src/vnet/dpo/dpo.c b/src/vnet/dpo/dpo.c index d8342ff17ae..fc789ae0a7f 100644 --- a/src/vnet/dpo/dpo.c +++ b/src/vnet/dpo/dpo.c @@ -613,12 +613,10 @@ dpo_module_init (vlib_main_t * vm) return (NULL); } -/* *INDENT-OFF* */ VLIB_INIT_FUNCTION(dpo_module_init) = { .runs_before = VLIB_INITS ("ip_main_init"), }; -/* *INDENT-ON* */ static clib_error_t * dpo_memory_show (vlib_main_t * vm, @@ -640,7 +638,6 @@ dpo_memory_show (vlib_main_t * vm, return (NULL); } -/* *INDENT-OFF* */ /*? * The '<em>sh dpo memory </em>' command displays the memory usage for each * data-plane object type. @@ -662,6 +659,5 @@ VLIB_CLI_COMMAND (show_fib_memory, static) = { .function = dpo_memory_show, .short_help = "show dpo memory", }; -/* *INDENT-ON* */ // clang-format on diff --git a/src/vnet/dpo/dvr_dpo.c b/src/vnet/dpo/dvr_dpo.c index 5db9c803145..2b66467837c 100644 --- a/src/vnet/dpo/dvr_dpo.c +++ b/src/vnet/dpo/dvr_dpo.c @@ -206,12 +206,9 @@ format_dvr_dpo (u8* s, va_list *ap) vnet_main_t * vnm = vnet_get_main(); dvr_dpo_t *dd = dvr_dpo_get(index); - return (format(s, "%U-dvr-%U-dpo %U", - format_dpo_proto, dd->dd_proto, - format_vnet_sw_interface_name, - vnm, - vnet_get_sw_interface(vnm, dd->dd_sw_if_index), - format_dvr_reinject, dd->dd_reinject)); + return format (s, "%U-dvr-%U-dpo %U", format_dpo_proto, dd->dd_proto, + format_vnet_sw_if_index_name, vnm, dd->dd_sw_if_index, + format_dvr_reinject, dd->dd_reinject); } static void diff --git a/src/vnet/dpo/interface_rx_dpo.c b/src/vnet/dpo/interface_rx_dpo.c index d3615d0ce76..5a519d344c1 100644 --- a/src/vnet/dpo/interface_rx_dpo.c +++ b/src/vnet/dpo/interface_rx_dpo.c @@ -160,11 +160,8 @@ format_interface_rx_dpo (u8* s, va_list *ap) vnet_main_t * vnm = vnet_get_main(); interface_rx_dpo_t *ido = interface_rx_dpo_get(index); - return (format(s, "%U-rx-dpo: %U", - format_vnet_sw_interface_name, - vnm, - vnet_get_sw_interface(vnm, ido->ido_sw_if_index), - format_dpo_proto, ido->ido_proto)); + return format (s, "%U-rx-dpo: %U", format_vnet_sw_if_index_name, vnm, + ido->ido_sw_if_index, format_dpo_proto, ido->ido_proto); } static void diff --git a/src/vnet/dpo/interface_tx_dpo.c b/src/vnet/dpo/interface_tx_dpo.c index 870579884a0..73f4e906268 100644 --- a/src/vnet/dpo/interface_tx_dpo.c +++ b/src/vnet/dpo/interface_tx_dpo.c @@ -50,10 +50,7 @@ format_interface_tx_dpo (u8* s, va_list *ap) CLIB_UNUSED(u32 indent) = va_arg(*ap, u32); vnet_main_t * vnm = vnet_get_main(); - return (format(s, "%U-tx-dpo:", - format_vnet_sw_interface_name, - vnm, - vnet_get_sw_interface(vnm, index))); + return format (s, "%U-tx-dpo:", format_vnet_sw_if_index_name, vnm, index); } static void diff --git a/src/vnet/dpo/ip6_ll_dpo.c b/src/vnet/dpo/ip6_ll_dpo.c index deb67d88137..86908efbc04 100644 --- a/src/vnet/dpo/ip6_ll_dpo.c +++ b/src/vnet/dpo/ip6_ll_dpo.c @@ -191,7 +191,6 @@ static char *ip6_ll_dpo_error_strings[] = { /** * @brief */ -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip6_ll_dpo_node) = { .function = ip6_ll_dpo_switch, @@ -206,7 +205,6 @@ VLIB_REGISTER_NODE (ip6_ll_dpo_node) = [IP6_LL_NEXT_LOOKUP] = "ip6-lookup", }, }; -/* *INDENT-ON* */ void ip6_ll_dpo_module_init (void) diff --git a/src/vnet/dpo/l3_proxy_dpo.c b/src/vnet/dpo/l3_proxy_dpo.c index 41156301a0e..f89554d775f 100644 --- a/src/vnet/dpo/l3_proxy_dpo.c +++ b/src/vnet/dpo/l3_proxy_dpo.c @@ -116,9 +116,8 @@ format_l3_proxy_dpo (u8 *s, va_list *ap) if (~0 != l3p->l3p_sw_if_index) { - return (format(s, "dpo-l3_proxy: %U", - format_vnet_sw_interface_name, vnm, - vnet_get_sw_interface(vnm, l3p->l3p_sw_if_index))); + return (format (s, "dpo-l3_proxy: %U", format_vnet_sw_if_index_name, vnm, + l3p->l3p_sw_if_index)); } else { diff --git a/src/vnet/dpo/load_balance.c b/src/vnet/dpo/load_balance.c index ff46d56e3e2..8f2a0de6ea8 100644 --- a/src/vnet/dpo/load_balance.c +++ b/src/vnet/dpo/load_balance.c @@ -149,7 +149,13 @@ load_balance_format (index_t lbi, dpo_id_t *buckets; u32 i; - lb = load_balance_get(lbi); + lb = load_balance_get_or_null(lbi); + if (lb == NULL) + { + s = format(s, "DELETED lb:%u", lbi); + return (s); + } + vlib_get_combined_counter(&(load_balance_main.lbm_to_counters), lbi, &to); vlib_get_combined_counter(&(load_balance_main.lbm_via_counters), lbi, &via); buckets = load_balance_get_buckets(lb); @@ -244,6 +250,8 @@ load_balance_create_i (u32 num_buckets, { load_balance_t *lb; + ASSERT (num_buckets <= LB_MAX_BUCKETS); + lb = load_balance_alloc_i(); lb->lb_hash_config = fhc; lb->lb_n_buckets = num_buckets; @@ -455,8 +463,9 @@ ip_multipath_normalize_next_hops (const load_balance_path_t * raw_next_hops, /* Try larger and larger power of 2 sized adjacency blocks until we find one where traffic flows to within 1% of specified weights. */ - for (n_adj = max_pow2 (n_nhs); ; n_adj *= 2) + for (n_adj = clib_min(max_pow2 (n_nhs), LB_MAX_BUCKETS); ; n_adj *= 2) { + ASSERT (n_adj <= LB_MAX_BUCKETS); error = 0; norm = n_adj / ((f64) sum_weight); @@ -487,12 +496,22 @@ ip_multipath_normalize_next_hops (const load_balance_path_t * raw_next_hops, nhs[0].path_weight += n_adj_left; - /* Less than 5% average error per adjacency with this size adjacency block? */ - if (error <= multipath_next_hop_error_tolerance*n_adj) + /* Less than 1% average error per adjacency with this size adjacency block, + * or did we reached the maximum number of buckets we support? */ + if (error <= multipath_next_hop_error_tolerance*n_adj || + n_adj >= LB_MAX_BUCKETS) { - /* Truncate any next hops with zero weight. */ - vec_set_len (nhs, i); - break; + if (i < n_nhs) + { + /* Truncate any next hops in excess */ + vlib_log_err(load_balance_logger, + "Too many paths for load-balance, truncating %d -> %d", + n_nhs, i); + for (int j = i; j < n_nhs; j++) + dpo_reset (&vec_elt(nhs, j).path_dpo); + } + vec_set_len (nhs, i); + break; } } @@ -622,6 +641,7 @@ static inline void load_balance_set_n_buckets (load_balance_t *lb, u32 n_buckets) { + ASSERT (n_buckets <= LB_MAX_BUCKETS); lb->lb_n_buckets = n_buckets; lb->lb_n_buckets_minus_1 = n_buckets-1; } @@ -651,8 +671,6 @@ load_balance_multipath_update (const dpo_id_t *dpo, &sum_of_weights, multipath_next_hop_error_tolerance); - ASSERT (n_buckets >= vec_len (raw_nhs)); - /* * Save the old load-balance map used, and get a new one if required. */ diff --git a/src/vnet/dpo/load_balance.h b/src/vnet/dpo/load_balance.h index 5428e20e981..eee073f5892 100644 --- a/src/vnet/dpo/load_balance.h +++ b/src/vnet/dpo/load_balance.h @@ -50,6 +50,12 @@ typedef struct load_balance_main_t_ extern load_balance_main_t load_balance_main; /** + * The maximum number of buckets that a load-balance object can have + * This must not overflow the lb_n_buckets field + */ +#define LB_MAX_BUCKETS 8192 + +/** * The number of buckets that a load-balance object can have and still * fit in one cache-line */ @@ -176,6 +182,10 @@ typedef struct load_balance_t_ { STATIC_ASSERT(sizeof(load_balance_t) <= CLIB_CACHE_LINE_BYTES, "A load_balance object size exceeds one cacheline"); +STATIC_ASSERT (LB_MAX_BUCKETS <= CLIB_U16_MAX, + "Too many buckets for load_balance object"); +STATIC_ASSERT (LB_MAX_BUCKETS && !(LB_MAX_BUCKETS & (LB_MAX_BUCKETS - 1)), + "LB_MAX_BUCKETS must be a power of 2"); /** * Flags controlling load-balance formatting/display @@ -222,6 +232,14 @@ load_balance_get (index_t lbi) return (pool_elt_at_index(load_balance_pool, lbi)); } +static inline load_balance_t * +load_balance_get_or_null (index_t lbi) +{ + if (pool_is_free_index (load_balance_pool, lbi)) + return 0; + return (pool_elt_at_index (load_balance_pool, lbi)); +} + #define LB_HAS_INLINE_BUCKETS(_lb) \ ((_lb)->lb_n_buckets <= LB_NUM_INLINE_BUCKETS) diff --git a/src/vnet/dpo/mpls_label_dpo.c b/src/vnet/dpo/mpls_label_dpo.c index 7856f050cb0..872577dfbe1 100644 --- a/src/vnet/dpo/mpls_label_dpo.c +++ b/src/vnet/dpo/mpls_label_dpo.c @@ -84,6 +84,7 @@ mpls_label_dpo_create (fib_mpls_label_t *label_stack, mld = mpls_label_dpo_alloc(); mld->mld_flags = flags; + mld->mld_payload_proto = payload_proto; dtype = mpls_label_dpo_types[flags]; if (MPLS_LABEL_DPO_MAX_N_LABELS < vec_len(label_stack)) @@ -92,13 +93,12 @@ mpls_label_dpo_create (fib_mpls_label_t *label_stack, dpo_stack(dtype, mld->mld_payload_proto, &mld->mld_dpo, - drop_dpo_get(DPO_PROTO_MPLS)); + drop_dpo_get(mld->mld_payload_proto)); } else { mld->mld_n_labels = vec_len(label_stack); mld->mld_n_hdr_bytes = mld->mld_n_labels * sizeof(mld->mld_hdr[0]); - mld->mld_payload_proto = payload_proto; /* * construct label rewrite headers for each value passed. @@ -398,22 +398,22 @@ mpls_label_imposition_inline (vlib_main_t * vm, /* Prefetch next iteration. */ { - vlib_buffer_t * p2, * p3, *p4, *p5; - - p2 = vlib_get_buffer (vm, from[2]); - p3 = vlib_get_buffer (vm, from[3]); - p4 = vlib_get_buffer (vm, from[4]); - p5 = vlib_get_buffer (vm, from[5]); - - vlib_prefetch_buffer_header (p2, STORE); - vlib_prefetch_buffer_header (p3, STORE); - vlib_prefetch_buffer_header (p4, STORE); - vlib_prefetch_buffer_header (p5, STORE); - - CLIB_PREFETCH (p2->data, sizeof (hdr0[0]), STORE); - CLIB_PREFETCH (p3->data, sizeof (hdr0[0]), STORE); - CLIB_PREFETCH (p4->data, sizeof (hdr0[0]), STORE); - CLIB_PREFETCH (p5->data, sizeof (hdr0[0]), STORE); + vlib_buffer_t *p4, *p5, *p6, *p7; + + p4 = vlib_get_buffer (vm, from[4]); + p5 = vlib_get_buffer (vm, from[5]); + p6 = vlib_get_buffer (vm, from[6]); + p7 = vlib_get_buffer (vm, from[7]); + + vlib_prefetch_buffer_header (p4, STORE); + vlib_prefetch_buffer_header (p5, STORE); + vlib_prefetch_buffer_header (p6, STORE); + vlib_prefetch_buffer_header (p7, STORE); + + CLIB_PREFETCH (p4->data, sizeof (hdr0[0]), STORE); + CLIB_PREFETCH (p5->data, sizeof (hdr0[0]), STORE); + CLIB_PREFETCH (p6->data, sizeof (hdr0[0]), STORE); + CLIB_PREFETCH (p7->data, sizeof (hdr0[0]), STORE); } from += 4; diff --git a/src/vnet/dpo/receive_dpo.c b/src/vnet/dpo/receive_dpo.c index 0a97e1d373b..413c3ae5b47 100644 --- a/src/vnet/dpo/receive_dpo.c +++ b/src/vnet/dpo/receive_dpo.c @@ -122,10 +122,9 @@ format_receive_dpo (u8 *s, va_list *ap) if (~0 != rd->rd_sw_if_index) { - return (format(s, "dpo-receive: %U on %U", - format_ip46_address, &rd->rd_addr, IP46_TYPE_ANY, - format_vnet_sw_interface_name, vnm, - vnet_get_sw_interface(vnm, rd->rd_sw_if_index))); + return (format (s, "dpo-receive: %U on %U", format_ip46_address, + &rd->rd_addr, IP46_TYPE_ANY, + format_vnet_sw_if_index_name, vnm, rd->rd_sw_if_index)); } else { diff --git a/src/vnet/dpo/replicate_dpo.c b/src/vnet/dpo/replicate_dpo.c index 5f88f12b910..0474fd82984 100644 --- a/src/vnet/dpo/replicate_dpo.c +++ b/src/vnet/dpo/replicate_dpo.c @@ -172,6 +172,8 @@ replicate_create_i (u32 num_buckets, { replicate_t *rep; + ASSERT (num_buckets <= REP_MAX_BUCKETS); + rep = replicate_alloc_i(); rep->rep_n_buckets = num_buckets; rep->rep_proto = rep_proto; @@ -311,7 +313,8 @@ static inline void replicate_set_n_buckets (replicate_t *rep, u32 n_buckets) { - rep->rep_n_buckets = n_buckets; + ASSERT (n_buckets <= REP_MAX_BUCKETS); + rep->rep_n_buckets = n_buckets; } void @@ -331,6 +334,17 @@ replicate_multipath_update (const dpo_id_t *dpo, rep->rep_proto); n_buckets = vec_len(nhs); + if (n_buckets > REP_MAX_BUCKETS) + { + vlib_log_err (replicate_logger, + "Too many paths for replicate, truncating %d -> %d", + n_buckets, REP_MAX_BUCKETS); + for (int i = REP_MAX_BUCKETS; i < n_buckets; i++) + dpo_reset (&vec_elt (nhs, i).path_dpo); + vec_set_len (nhs, REP_MAX_BUCKETS); + n_buckets = REP_MAX_BUCKETS; + } + if (0 == rep->rep_n_buckets) { /* diff --git a/src/vnet/dpo/replicate_dpo.h b/src/vnet/dpo/replicate_dpo.h index 908c20c1d56..d21f52a4833 100644 --- a/src/vnet/dpo/replicate_dpo.h +++ b/src/vnet/dpo/replicate_dpo.h @@ -41,6 +41,12 @@ typedef struct replicate_main_t_ extern replicate_main_t replicate_main; /** + * The number of buckets that a replicate object can have + * This must not overflow the rep_n_buckets field + */ +#define REP_MAX_BUCKETS 1024 + +/** * The number of buckets that a load-balance object can have and still * fit in one cache-line */ @@ -108,6 +114,8 @@ typedef struct replicate_t_ { STATIC_ASSERT(sizeof(replicate_t) <= CLIB_CACHE_LINE_BYTES, "A replicate object size exceeds one cacheline"); +STATIC_ASSERT (REP_MAX_BUCKETS <= CLIB_U16_MAX, + "Too many buckets for replicate object"); /** * Flags controlling load-balance formatting/display diff --git a/src/vnet/error.h b/src/vnet/error.h index 39a609bdb49..fa1337538c4 100644 --- a/src/vnet/error.h +++ b/src/vnet/error.h @@ -156,7 +156,10 @@ _ (EAGAIN, -165, "Retry stream call with cursor") \ _ (INVALID_VALUE_4, -166, "Invalid value #4") \ _ (BUSY, -167, "Busy") \ - _ (BUG, -168, "Bug") + _ (BUG, -168, "Bug") \ + _ (FEATURE_ALREADY_DISABLED, -169, "Feature already disabled") \ + _ (FEATURE_ALREADY_ENABLED, -170, "Feature already enabled") \ + _ (INVALID_PREFIX_LENGTH, -171, "Invalid prefix length") typedef enum { diff --git a/src/vnet/ethernet/arp_packet.h b/src/vnet/ethernet/arp_packet.h index c406dade6e2..9a9df680853 100644 --- a/src/vnet/ethernet/arp_packet.h +++ b/src/vnet/ethernet/arp_packet.h @@ -110,12 +110,10 @@ typedef enum IP4_ARP_N_NEXT, } ip4_arp_next_t; -/* *INDENT-OFF* */ typedef CLIB_PACKED (struct { mac_address_t mac; ip4_address_t ip4; }) ethernet_arp_ip4_over_ethernet_address_t; -/* *INDENT-ON* */ STATIC_ASSERT (sizeof (ethernet_arp_ip4_over_ethernet_address_t) == 10, "Packet ethernet address and IP4 address too big"); diff --git a/src/vnet/ethernet/init.c b/src/vnet/ethernet/init.c index f78b65c7cc0..3921e1ec0e6 100644 --- a/src/vnet/ethernet/init.c +++ b/src/vnet/ethernet/init.c @@ -62,7 +62,6 @@ add_type (ethernet_main_t * em, ethernet_type_t type, char *type_name) } /* Built-in ip4 tx feature path definition */ -/* *INDENT-OFF* */ VNET_FEATURE_ARC_INIT (ethernet_output, static) = { .arc_name = "ethernet-output", @@ -77,7 +76,6 @@ VNET_FEATURE_INIT (ethernet_tx_drop, static) = .node_name = "error-drop", .runs_before = 0, /* not before any other features */ }; -/* *INDENT-ON* */ static clib_error_t * ethernet_init (vlib_main_t * vm) @@ -107,7 +105,6 @@ ethernet_init (vlib_main_t * vm) return 0; } -/* *INDENT-OFF* */ VLIB_INIT_FUNCTION (ethernet_init) = { /* @@ -119,7 +116,6 @@ VLIB_INIT_FUNCTION (ethernet_init) = "llc_init", "vnet_feature_init"), }; -/* *INDENT-ON* */ ethernet_main_t * ethernet_get_main (vlib_main_t * vm) diff --git a/src/vnet/ethernet/interface.c b/src/vnet/ethernet/interface.c index 46d4203cda1..f1bb6b81070 100644 --- a/src/vnet/ethernet/interface.c +++ b/src/vnet/ethernet/interface.c @@ -303,8 +303,17 @@ ethernet_mac_change (vnet_hw_interface_t * hi, { ethernet_address_change_ctx_t *cb; + u32 id, sw_if_index; vec_foreach (cb, em->address_change_callbacks) - cb->function (em, hi->sw_if_index, cb->function_opaque); + { + cb->function (em, hi->sw_if_index, cb->function_opaque); + /* clang-format off */ + hash_foreach (id, sw_if_index, hi->sub_interface_sw_if_index_by_id, + ({ + cb->function (em, sw_if_index, cb->function_opaque); + })); + /* clang-format on */ + } } return (NULL); @@ -325,7 +334,6 @@ ethernet_set_max_frame_size (vnet_main_t *vnm, vnet_hw_interface_t *hi, "underlying driver doesn't support changing Max Frame Size"); } -/* *INDENT-OFF* */ VNET_HW_INTERFACE_CLASS (ethernet_hw_interface_class) = { .name = "Ethernet", .tx_hash_fn_type = VNET_HASH_FN_TYPE_ETHERNET, @@ -338,7 +346,6 @@ VNET_HW_INTERFACE_CLASS (ethernet_hw_interface_class) = { .mac_addr_change_function = ethernet_mac_change, .set_max_frame_size = ethernet_set_max_frame_size, }; -/* *INDENT-ON* */ uword unformat_ethernet_interface (unformat_input_t * input, va_list * args) @@ -527,7 +534,7 @@ simulated_ethernet_interface_tx (vlib_main_t * vm, while (n_left_from >= 4) { u32 sw_if_index0, sw_if_index1, sw_if_index2, sw_if_index3; - u32 not_all_match_config; + u32x4 xor_ifx4; /* Prefetch next iteration. */ if (PREDICT_TRUE (n_left_from >= 8)) @@ -544,12 +551,11 @@ simulated_ethernet_interface_tx (vlib_main_t * vm, sw_if_index2 = vnet_buffer (b[2])->sw_if_index[VLIB_TX]; sw_if_index3 = vnet_buffer (b[3])->sw_if_index[VLIB_TX]; - not_all_match_config = (sw_if_index0 ^ sw_if_index1) - ^ (sw_if_index2 ^ sw_if_index3); - not_all_match_config += sw_if_index0 ^ new_rx_sw_if_index; + xor_ifx4 = u32x4_gather (&sw_if_index0, &sw_if_index1, &sw_if_index2, + &sw_if_index3); /* Speed path / expected case: all pkts on the same intfc */ - if (PREDICT_TRUE (not_all_match_config == 0)) + if (PREDICT_TRUE (u32x4_is_all_equal (xor_ifx4, new_rx_sw_if_index))) { next[0] = next_index; next[1] = next_index; @@ -752,7 +758,6 @@ simulated_ethernet_mac_change (vnet_hw_interface_t * hi, } -/* *INDENT-OFF* */ VNET_DEVICE_CLASS (ethernet_simulated_device_class) = { .name = "Loopback", .format_device_name = format_simulated_ethernet_name, @@ -760,7 +765,6 @@ VNET_DEVICE_CLASS (ethernet_simulated_device_class) = { .admin_up_down_function = simulated_ethernet_admin_up_down, .mac_addr_change_function = simulated_ethernet_mac_change, }; -/* *INDENT-ON* */ /* * Maintain a bitmap of allocated loopback instance numbers. @@ -949,13 +953,11 @@ create_simulated_ethernet_interfaces (vlib_main_t * vm, * Example of how to create a loopback interface: * @cliexcmd{loopback create-interface} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (create_simulated_ethernet_interface_command, static) = { .path = "loopback create-interface", .short_help = "loopback create-interface [mac <mac-addr>] [instance <instance>]", .function = create_simulated_ethernet_interfaces, }; -/* *INDENT-ON* */ /*? * Create a loopback interface. Optionally, a MAC Address can be @@ -968,13 +970,11 @@ VLIB_CLI_COMMAND (create_simulated_ethernet_interface_command, static) = { * Example of how to create a loopback interface: * @cliexcmd{create loopback interface} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (create_loopback_interface_command, static) = { .path = "create loopback interface", .short_help = "create loopback interface [mac <mac-addr>] [instance <instance>]", .function = create_simulated_ethernet_interfaces, }; -/* *INDENT-ON* */ ethernet_interface_t * ethernet_get_interface (ethernet_main_t * em, u32 hw_if_index) @@ -1185,13 +1185,11 @@ delete_sub_interface (vlib_main_t * vm, * Example of how to delete a loopback interface: * @cliexcmd{loopback delete-interface intfc loop0} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (delete_simulated_ethernet_interface_command, static) = { .path = "loopback delete-interface", .short_help = "loopback delete-interface intfc <interface>", .function = delete_simulated_ethernet_interfaces, }; -/* *INDENT-ON* */ /*? * Delete a loopback interface. @@ -1203,13 +1201,11 @@ VLIB_CLI_COMMAND (delete_simulated_ethernet_interface_command, static) = { * Example of how to delete a loopback interface: * @cliexcmd{delete loopback interface intfc loop0} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (delete_loopback_interface_command, static) = { .path = "delete loopback interface", .short_help = "delete loopback interface intfc <interface>", .function = delete_simulated_ethernet_interfaces, }; -/* *INDENT-ON* */ /*? * Delete a sub-interface. @@ -1218,13 +1214,11 @@ VLIB_CLI_COMMAND (delete_loopback_interface_command, static) = { * Example of how to delete a sub-interface: * @cliexcmd{delete sub-interface GigabitEthernet0/8/0.200} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (delete_sub_interface_command, static) = { .path = "delete sub-interface", .short_help = "delete sub-interface <interface>", .function = delete_sub_interface, }; -/* *INDENT-ON* */ /* ethernet { ... } configuration. */ /*? diff --git a/src/vnet/ethernet/mac_address.c b/src/vnet/ethernet/mac_address.c index 2237c3772b8..098b3ce19c1 100644 --- a/src/vnet/ethernet/mac_address.c +++ b/src/vnet/ethernet/mac_address.c @@ -15,13 +15,11 @@ #include <vnet/ethernet/mac_address.h> -/* *INDENT-OFF* */ const mac_address_t ZERO_MAC_ADDRESS = { .bytes = { 0, 0, 0, 0, 0, 0, }, }; -/* *INDENT-ON* */ u8 * format_mac_address_t (u8 * s, va_list * args) diff --git a/src/vnet/ethernet/node.c b/src/vnet/ethernet/node.c index 4ef575a85fc..03cbdde1c2b 100644 --- a/src/vnet/ethernet/node.c +++ b/src/vnet/ethernet/node.c @@ -982,8 +982,31 @@ eth_input_process_frame (vlib_main_t * vm, vlib_node_runtime_t * node, else { for (int j = 0; j < 16; j++) - if (next[j] == 0) - slowpath_indices[n_slowpath++] = i + j; + { + if (next[j] == 0) + slowpath_indices[n_slowpath++] = i + j; + else if (dmac_check && main_is_l3 && dmacs_bad[i + j]) + { + next[j] = 0; + slowpath_indices[n_slowpath++] = i + j; + } + } + } + } + else + { + if (dmac_check && main_is_l3) + { + u8x16 dmac_bad = u8x16_load_unaligned (&dmacs_bad[i]); + if (!u8x16_is_all_zero (dmac_bad)) + { + for (int j = 0; j < 16; j++) + if (dmacs_bad[i + j]) + { + next[j] = 0; + slowpath_indices[n_slowpath++] = i + j; + } + } } } @@ -994,7 +1017,12 @@ eth_input_process_frame (vlib_main_t * vm, vlib_node_runtime_t * node, continue; } #endif - if (main_is_l3 && etype[0] == et_ip4) + if (dmac_check && main_is_l3 && dmacs_bad[i]) + { + next[0] = 0; + slowpath_indices[n_slowpath++] = i; + } + else if (main_is_l3 && etype[0] == et_ip4) next[0] = next_ip4; else if (main_is_l3 && etype[0] == et_ip6) next[0] = next_ip6; @@ -1052,7 +1080,7 @@ eth_input_process_frame (vlib_main_t * vm, vlib_node_runtime_t * node, } else { - /* untagged packet with not well known etyertype */ + /* untagged packet with not well known ethertype */ if (last_unknown_etype != etype) { last_unknown_etype = etype; @@ -2098,7 +2126,6 @@ static char *ethernet_error_strings[] = { #undef ethernet_error }; -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ethernet_input_node) = { .name = "ethernet-input", /* Takes a vector of packets. */ @@ -2140,7 +2167,6 @@ VLIB_REGISTER_NODE (ethernet_input_not_l2_node) = { #undef _ }, }; -/* *INDENT-ON* */ #ifndef CLIB_MARCH_VARIANT void diff --git a/src/vnet/ethernet/p2p_ethernet.c b/src/vnet/ethernet/p2p_ethernet.c index ddf23901419..0ece84fd9cc 100644 --- a/src/vnet/ethernet/p2p_ethernet.c +++ b/src/vnet/ethernet/p2p_ethernet.c @@ -146,6 +146,8 @@ p2p_ethernet_add_del (vlib_main_t * vm, u32 parent_if_index, vnet_feature_enable_disable ("device-input", "p2p-ethernet-input", parent_if_index, 1, 0, 0); + vnet_feature_enable_disable ("port-rx-eth", "p2p-ethernet-input", + parent_if_index, 1, 0, 0); /* Set promiscuous mode on the l2 interface */ ethernet_set_flags (vnm, parent_if_index, ETHERNET_INTERFACE_FLAG_ACCEPT_ALL); @@ -153,7 +155,7 @@ p2p_ethernet_add_del (vlib_main_t * vm, u32 parent_if_index, } p2pm->p2p_ethernet_by_sw_if_index[parent_if_index]++; /* set the interface mode */ - set_int_l2_mode (vm, vnm, MODE_L3, p2pe_subif_id, 0, + set_int_l2_mode (vm, vnm, MODE_L3, p2pe_sw_if_index, 0, L2_BD_PORT_TYPE_NORMAL, 0, 0); return 0; } @@ -176,6 +178,9 @@ p2p_ethernet_add_del (vlib_main_t * vm, u32 parent_if_index, vnet_feature_enable_disable ("device-input", "p2p-ethernet-input", parent_if_index, 0, 0, 0); + vnet_feature_enable_disable ("port-rx-eth", + "p2p-ethernet-input", + parent_if_index, 0, 0, 0); /* Disable promiscuous mode on the l2 interface */ ethernet_set_flags (vnm, parent_if_index, 0); } @@ -248,10 +253,11 @@ vnet_p2p_ethernet_add_del (vlib_main_t * vm, unformat_input_t * input, return 0; } -VLIB_CLI_COMMAND (p2p_ethernet_add_del_command, static) = -{ -.path = "p2p_ethernet ",.function = vnet_p2p_ethernet_add_del,.short_help = - "p2p_ethernet <intfc> <mac-address> [sub-id <id> | del]",}; +VLIB_CLI_COMMAND (p2p_ethernet_add_del_command, static) = { + .path = "p2p_ethernet", + .function = vnet_p2p_ethernet_add_del, + .short_help = "p2p_ethernet <intfc> <mac-address> [sub-id <id>|del]", +}; static clib_error_t * p2p_ethernet_init (vlib_main_t * vm) diff --git a/src/vnet/ethernet/p2p_ethernet_api.c b/src/vnet/ethernet/p2p_ethernet_api.c index a9a8cc0a444..903678ce445 100644 --- a/src/vnet/ethernet/p2p_ethernet_api.c +++ b/src/vnet/ethernet/p2p_ethernet_api.c @@ -58,14 +58,12 @@ vl_api_p2p_ethernet_add_t_handler (vl_api_p2p_ethernet_add_t * mp) BAD_SW_IF_INDEX_LABEL; - /* *INDENT-OFF* */ REPLY_MACRO2(VL_API_P2P_ETHERNET_ADD_REPLY, ({ rmp->sw_if_index = htonl(p2pe_if_index); })); - /* *INDENT-ON* */ } void diff --git a/src/vnet/ethernet/p2p_ethernet_input.c b/src/vnet/ethernet/p2p_ethernet_input.c index 3e9589e0e19..3d81e99cff2 100644 --- a/src/vnet/ethernet/p2p_ethernet_input.c +++ b/src/vnet/ethernet/p2p_ethernet_input.c @@ -235,7 +235,6 @@ VLIB_NODE_FN (p2p_ethernet_input_node) (vlib_main_t * vm, return frame->n_vectors; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (p2p_ethernet_input_node) = { .name = "p2p-ethernet-input", .vector_size = sizeof (u32), @@ -253,7 +252,6 @@ VLIB_REGISTER_NODE (p2p_ethernet_input_node) = { [0] = "error-drop", }, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/ethernet/packet.h b/src/vnet/ethernet/packet.h index e1e42badd06..007f93596f3 100644 --- a/src/vnet/ethernet/packet.h +++ b/src/vnet/ethernet/packet.h @@ -184,7 +184,6 @@ typedef struct #define ETHERNET_N_PBB (1 << 24) } ethernet_pbb_header_t; -/* *INDENT-OFF* */ typedef CLIB_PACKED (struct { /* Backbone source/destination address. */ @@ -201,7 +200,6 @@ typedef CLIB_PACKED (struct /* 3 bit priority, 1 bit DEI, 1 bit UCA, 3 bit RES and 24 bit I_SID (service identifier) */ u32 priority_dei_uca_res_sid; }) ethernet_pbb_header_packed_t; -/* *INDENT-ON* */ #endif /* included_ethernet_packet_h */ diff --git a/src/vnet/feature/feature.c b/src/vnet/feature/feature.c index 1750612783b..a7246fbb16a 100644 --- a/src/vnet/feature/feature.c +++ b/src/vnet/feature/feature.c @@ -533,13 +533,11 @@ show_features_command_fn (vlib_main_t * vm, * @cliexend * @endparblock ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_features_command, static) = { .path = "show features", .short_help = "show features [verbose]", .function = show_features_command_fn, }; -/* *INDENT-ON* */ /** Display the set of driver features configured on a specific interface * Called by "show interface" handler @@ -700,14 +698,12 @@ done: * @cliexend * @endparblock ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (set_interface_feature_command, static) = { .path = "set interface feature", .short_help = "set interface feature <intfc> <feature_name> arc <arc_name> " "[disable]", .function = set_interface_features_command_fn, }; -/* *INDENT-ON* */ static clib_error_t * vnet_feature_add_del_sw_interface (vnet_main_t * vnm, u32 sw_if_index, diff --git a/src/vnet/feature/feature.h b/src/vnet/feature/feature.h index a8235d3d9ee..b1917e8df13 100644 --- a/src/vnet/feature/feature.h +++ b/src/vnet/feature/feature.h @@ -344,8 +344,8 @@ vnet_device_input_have_features (u32 sw_if_index) } static_always_inline void -vnet_feature_start_device_input_x1 (u32 sw_if_index, u32 * next0, - vlib_buffer_t * b0) +vnet_feature_start_device_input (u32 sw_if_index, u32 *next0, + vlib_buffer_t *b0) { vnet_feature_main_t *fm = &feature_main; vnet_feature_config_main_t *cm; @@ -356,118 +356,11 @@ vnet_feature_start_device_input_x1 (u32 sw_if_index, u32 * next0, (clib_bitmap_get (fm->sw_if_index_has_features[feature_arc_index], sw_if_index))) { - /* - * Save next0 so that the last feature in the chain - * can skip ethernet-input if indicated... - */ - u16 adv; - - adv = device_input_next_node_advance[*next0]; - vlib_buffer_advance (b0, -adv); - - vnet_buffer (b0)->feature_arc_index = feature_arc_index; - b0->current_config_index = - vec_elt (cm->config_index_by_sw_if_index, sw_if_index); - vnet_get_config_data (&cm->config_main, &b0->current_config_index, - next0, /* # bytes of config data */ 0); - } -} - -static_always_inline void -vnet_feature_start_device_input_x2 (u32 sw_if_index, - u32 * next0, - u32 * next1, - vlib_buffer_t * b0, vlib_buffer_t * b1) -{ - vnet_feature_main_t *fm = &feature_main; - vnet_feature_config_main_t *cm; - u8 feature_arc_index = fm->device_input_feature_arc_index; - cm = &fm->feature_config_mains[feature_arc_index]; - - if (PREDICT_FALSE - (clib_bitmap_get - (fm->sw_if_index_has_features[feature_arc_index], sw_if_index))) - { - /* - * Save next0 so that the last feature in the chain - * can skip ethernet-input if indicated... - */ - u16 adv; - - adv = device_input_next_node_advance[*next0]; - vlib_buffer_advance (b0, -adv); - - adv = device_input_next_node_advance[*next1]; - vlib_buffer_advance (b1, -adv); - - vnet_buffer (b0)->feature_arc_index = feature_arc_index; - vnet_buffer (b1)->feature_arc_index = feature_arc_index; - b0->current_config_index = - vec_elt (cm->config_index_by_sw_if_index, sw_if_index); - b1->current_config_index = b0->current_config_index; - vnet_get_config_data (&cm->config_main, &b0->current_config_index, - next0, /* # bytes of config data */ 0); - vnet_get_config_data (&cm->config_main, &b1->current_config_index, - next1, /* # bytes of config data */ 0); - } -} - -static_always_inline void -vnet_feature_start_device_input_x4 (u32 sw_if_index, - u32 * next0, - u32 * next1, - u32 * next2, - u32 * next3, - vlib_buffer_t * b0, - vlib_buffer_t * b1, - vlib_buffer_t * b2, vlib_buffer_t * b3) -{ - vnet_feature_main_t *fm = &feature_main; - vnet_feature_config_main_t *cm; - u8 feature_arc_index = fm->device_input_feature_arc_index; - cm = &fm->feature_config_mains[feature_arc_index]; - - if (PREDICT_FALSE - (clib_bitmap_get - (fm->sw_if_index_has_features[feature_arc_index], sw_if_index))) - { - /* - * Save next0 so that the last feature in the chain - * can skip ethernet-input if indicated... - */ - u16 adv; - - adv = device_input_next_node_advance[*next0]; - vlib_buffer_advance (b0, -adv); - - adv = device_input_next_node_advance[*next1]; - vlib_buffer_advance (b1, -adv); - - adv = device_input_next_node_advance[*next2]; - vlib_buffer_advance (b2, -adv); - - adv = device_input_next_node_advance[*next3]; - vlib_buffer_advance (b3, -adv); - vnet_buffer (b0)->feature_arc_index = feature_arc_index; - vnet_buffer (b1)->feature_arc_index = feature_arc_index; - vnet_buffer (b2)->feature_arc_index = feature_arc_index; - vnet_buffer (b3)->feature_arc_index = feature_arc_index; - b0->current_config_index = vec_elt (cm->config_index_by_sw_if_index, sw_if_index); - b1->current_config_index = b0->current_config_index; - b2->current_config_index = b0->current_config_index; - b3->current_config_index = b0->current_config_index; - vnet_get_config_data (&cm->config_main, &b0->current_config_index, next0, /* # bytes of config data */ 0); - vnet_get_config_data (&cm->config_main, &b1->current_config_index, - next1, /* # bytes of config data */ 0); - vnet_get_config_data (&cm->config_main, &b2->current_config_index, - next2, /* # bytes of config data */ 0); - vnet_get_config_data (&cm->config_main, &b3->current_config_index, - next3, /* # bytes of config data */ 0); } } diff --git a/src/vnet/feature/registration.c b/src/vnet/feature/registration.c index 537a4ada6e4..bc20412b9cf 100644 --- a/src/vnet/feature/registration.c +++ b/src/vnet/feature/registration.c @@ -351,12 +351,10 @@ again: *in_feature_nodes = feature_nodes; /* Finally, clean up all the shit we allocated */ - /* *INDENT-OFF* */ hash_foreach_pair (hp, index_by_name, ({ vec_add1 (keys_to_delete, (u8 *)hp->key); })); - /* *INDENT-ON* */ hash_free (index_by_name); for (i = 0; i < vec_len (keys_to_delete); i++) vec_free (keys_to_delete[i]); diff --git a/src/vnet/fib/fib.c b/src/vnet/fib/fib.c index ddfa830bb0f..cce03b4b49c 100644 --- a/src/vnet/fib/fib.c +++ b/src/vnet/fib/fib.c @@ -32,9 +32,7 @@ fib_module_init (vlib_main_t * vm) return (NULL); } -/* *INDENT-OFF* */ VLIB_INIT_FUNCTION (fib_module_init) = { .runs_after = VLIB_INITS("dpo_module_init", "adj_module_init"), }; -/* *INDENT-ON* */ diff --git a/src/vnet/fib/fib_api.c b/src/vnet/fib/fib_api.c index 75a17cfca02..07d6699d87a 100644 --- a/src/vnet/fib/fib_api.c +++ b/src/vnet/fib/fib_api.c @@ -69,7 +69,7 @@ fib_api_next_hop_decode (const vl_api_fib_path_t *in, *out = to_ip46 (FIB_API_PATH_NH_PROTO_IP6 == in->proto, (void *)&in->nh.address); } -static vl_api_fib_path_nh_proto_t +vl_api_fib_path_nh_proto_t fib_api_path_dpo_proto_to_nh (dpo_proto_t dproto) { switch (dproto) @@ -108,7 +108,7 @@ fib_api_next_hop_encode (const fib_route_path_t *rpath, sizeof (rpath->frp_addr.ip6)); } -static int +int fib_api_path_nh_proto_to_dpo (vl_api_fib_path_nh_proto_t pp, dpo_proto_t *dproto) { @@ -448,6 +448,9 @@ fib_api_route_add_del (u8 is_add, fib_entry_flag_t entry_flags, fib_route_path_t *rpaths) { + if (!fib_prefix_validate(prefix)) { + return (VNET_API_ERROR_INVALID_PREFIX_LENGTH); + } if (is_multipath) { if (vec_len(rpaths) == 0) diff --git a/src/vnet/fib/fib_api.h b/src/vnet/fib/fib_api.h index 7fd7d16cb33..0c59531b438 100644 --- a/src/vnet/fib/fib_api.h +++ b/src/vnet/fib/fib_api.h @@ -29,6 +29,8 @@ struct _vl_api_fib_prefix; /** * Encode and decode functions from the API types to internal types */ +extern vl_api_fib_path_nh_proto_t fib_api_path_dpo_proto_to_nh (dpo_proto_t dproto); +extern int fib_api_path_nh_proto_to_dpo (vl_api_fib_path_nh_proto_t pp, dpo_proto_t *dproto); extern void fib_api_path_encode(const fib_route_path_t * api_rpath, vl_api_fib_path_t *out); extern int fib_api_path_decode(vl_api_fib_path_t *in, diff --git a/src/vnet/fib/fib_attached_export.c b/src/vnet/fib/fib_attached_export.c index 206d10e7140..c6ba0575a04 100644 --- a/src/vnet/fib/fib_attached_export.c +++ b/src/vnet/fib/fib_attached_export.c @@ -378,6 +378,7 @@ fib_attached_export_purge (fib_entry_t *fib_entry) */ if (0 == --export->faee_locks) { + vec_free (export->faee_importers); pool_put(fib_ae_export_pool, export); fib_entry_delegate_remove(export_entry, FIB_ENTRY_DELEGATE_ATTACHED_EXPORT); diff --git a/src/vnet/fib/fib_bfd.c b/src/vnet/fib/fib_bfd.c index b02fbc67a63..6bfd29ae2cc 100644 --- a/src/vnet/fib/fib_bfd.c +++ b/src/vnet/fib/fib_bfd.c @@ -188,9 +188,7 @@ fib_bfd_main_init (vlib_main_t * vm) return (NULL); } -/* *INDENT-OFF* */ VLIB_INIT_FUNCTION (fib_bfd_main_init) = { .runs_after = VLIB_INITS("bfd_main_init"), }; -/* *INDENT-ON* */ diff --git a/src/vnet/fib/fib_entry.h b/src/vnet/fib/fib_entry.h index 4053ff65181..7331f803ec4 100644 --- a/src/vnet/fib/fib_entry.h +++ b/src/vnet/fib/fib_entry.h @@ -154,9 +154,13 @@ typedef enum fib_entry_src_attribute_t_ { */ FIB_ENTRY_SRC_ATTRIBUTE_INHERITED, /** + * the source is currently used as glean src address + */ + FIB_ENTRY_SRC_ATTRIBUTE_PROVIDES_GLEAN, + /** * Marker. add new entries before this one. */ - FIB_ENTRY_SRC_ATTRIBUTE_LAST = FIB_ENTRY_SRC_ATTRIBUTE_INHERITED, + FIB_ENTRY_SRC_ATTRIBUTE_LAST = FIB_ENTRY_SRC_ATTRIBUTE_PROVIDES_GLEAN, } fib_entry_src_attribute_t; @@ -166,6 +170,7 @@ typedef enum fib_entry_src_attribute_t_ { [FIB_ENTRY_SRC_ATTRIBUTE_ACTIVE] = "active", \ [FIB_ENTRY_SRC_ATTRIBUTE_STALE] = "stale", \ [FIB_ENTRY_SRC_ATTRIBUTE_INHERITED] = "inherited", \ + [FIB_ENTRY_SRC_ATTRIBUTE_PROVIDES_GLEAN] = "provides-glean", \ } #define FOR_EACH_FIB_SRC_ATTRIBUTE(_item) \ @@ -180,6 +185,7 @@ typedef enum fib_entry_src_flag_t_ { FIB_ENTRY_SRC_FLAG_ACTIVE = (1 << FIB_ENTRY_SRC_ATTRIBUTE_ACTIVE), FIB_ENTRY_SRC_FLAG_STALE = (1 << FIB_ENTRY_SRC_ATTRIBUTE_STALE), FIB_ENTRY_SRC_FLAG_INHERITED = (1 << FIB_ENTRY_SRC_ATTRIBUTE_INHERITED), + FIB_ENTRY_SRC_FLAG_PROVIDES_GLEAN = (1 << FIB_ENTRY_SRC_ATTRIBUTE_PROVIDES_GLEAN), } __attribute__ ((packed)) fib_entry_src_flag_t; extern u8 * format_fib_entry_src_flags(u8 *s, va_list *args); @@ -421,6 +427,9 @@ extern const int fib_entry_get_dpo_for_source ( fib_node_index_t fib_entry_index, fib_source_t source, dpo_id_t *dpo); +extern fib_node_index_t fib_entry_get_path_list_for_source ( + fib_node_index_t fib_entry_index, + fib_source_t source); extern adj_index_t fib_entry_get_adj(fib_node_index_t fib_entry_index); diff --git a/src/vnet/fib/fib_entry_src.c b/src/vnet/fib/fib_entry_src.c index 39e719e6a7a..c79b745b5b5 100644 --- a/src/vnet/fib/fib_entry_src.c +++ b/src/vnet/fib/fib_entry_src.c @@ -757,6 +757,7 @@ fib_entry_src_action_uninstall (fib_entry_t *fib_entry) &fib_entry->fe_prefix, &fib_entry->fe_lb); + vlib_worker_wait_one_loop(); dpo_reset(&fib_entry->fe_lb); } } @@ -1797,6 +1798,25 @@ fib_entry_get_dpo_for_source (fib_node_index_t fib_entry_index, return (0); } +fib_node_index_t +fib_entry_get_path_list_for_source (fib_node_index_t fib_entry_index, + fib_source_t source) +{ + fib_entry_t *fib_entry; + fib_entry_src_t *esrc; + + if (FIB_NODE_INDEX_INVALID == fib_entry_index) + return FIB_NODE_INDEX_INVALID; + + fib_entry = fib_entry_get(fib_entry_index); + esrc = fib_entry_src_find(fib_entry, source); + + if (esrc) + return esrc->fes_pl; + + return FIB_NODE_INDEX_INVALID; +} + u32 fib_entry_get_resolving_interface_for_source (fib_node_index_t entry_index, fib_source_t source) diff --git a/src/vnet/fib/fib_entry_src_interface.c b/src/vnet/fib/fib_entry_src_interface.c index 402369d1dfc..c5028dc8798 100644 --- a/src/vnet/fib/fib_entry_src_interface.c +++ b/src/vnet/fib/fib_entry_src_interface.c @@ -87,8 +87,16 @@ fib_entry_src_interface_update_glean (fib_entry_t *cover, if (fib_prefix_is_cover(&adj->sub_type.glean.rx_pfx, &local->fe_prefix)) { - adj->sub_type.glean.rx_pfx.fp_addr = local->fe_prefix.fp_addr; - return (1); + fib_entry_src_t *local_src; + + local_src = fib_entry_src_find (local, FIB_SOURCE_INTERFACE); + if (local_src != NULL) + { + adj->sub_type.glean.rx_pfx.fp_addr = + local->fe_prefix.fp_addr; + local_src->fes_flags |= FIB_ENTRY_SRC_FLAG_PROVIDES_GLEAN; + return (1); + } } } } @@ -116,6 +124,52 @@ fib_entry_src_interface_path_swap (fib_entry_src_t *src, src->fes_pl = fib_path_list_create(pl_flags, paths); } +typedef struct fesi_find_glean_ctx_t_ { + fib_node_index_t glean_node_index; +} fesi_find_glean_ctx_t; + +static walk_rc_t +fib_entry_src_interface_find_glean_walk (fib_entry_t *cover, + fib_node_index_t covered, + void *ctx) +{ + fesi_find_glean_ctx_t *find_glean_ctx = ctx; + fib_entry_t *covered_entry; + fib_entry_src_t *covered_src; + + covered_entry = fib_entry_get (covered); + covered_src = fib_entry_src_find (covered_entry, FIB_SOURCE_INTERFACE); + if ((covered_src != NULL) && + (covered_src->fes_flags & FIB_ENTRY_SRC_FLAG_PROVIDES_GLEAN)) + { + find_glean_ctx->glean_node_index = covered; + return WALK_STOP; + } + + return WALK_CONTINUE; +} + +static fib_entry_t * +fib_entry_src_interface_find_glean (fib_entry_t *cover) +{ + fib_entry_src_t *src; + + src = fib_entry_src_find (cover, FIB_SOURCE_INTERFACE); + if (src == NULL) + /* the cover is not an interface source */ + return NULL; + + fesi_find_glean_ctx_t ctx = { + .glean_node_index = ~0, + }; + + fib_entry_cover_walk (cover, fib_entry_src_interface_find_glean_walk, + &ctx); + + return (ctx.glean_node_index == ~0) ? NULL : + fib_entry_get (ctx.glean_node_index); +} + /* * Source activate. * Called when the source is teh new longer best source on the entry @@ -128,6 +182,8 @@ fib_entry_src_interface_activate (fib_entry_src_t *src, if (FIB_ENTRY_FLAG_LOCAL & src->fes_entry_flags) { + u8 update_glean; + /* * Track the covering attached/connected cover. This is so that * during an attached export of the cover, this local prefix is @@ -141,10 +197,17 @@ fib_entry_src_interface_activate (fib_entry_src_t *src, cover = fib_entry_get(src->u.interface.fesi_cover); + /* + * Before adding as a child of the cover, check whether an existing + * child has already been used to populate the glean adjacency. If so, + * we don't need to update the adjacency. + */ + update_glean = (fib_entry_src_interface_find_glean (cover) == NULL); src->u.interface.fesi_sibling = fib_entry_cover_track(cover, fib_entry_get_index(fib_entry)); - fib_entry_src_interface_update_glean(cover, fib_entry); + if (update_glean) + fib_entry_src_interface_update_glean(cover, fib_entry); } return (!0); @@ -167,15 +230,19 @@ fib_entry_src_interface_deactivate (fib_entry_src_t *src, if (FIB_NODE_INDEX_INVALID != src->u.interface.fesi_cover) { cover = fib_entry_get(src->u.interface.fesi_cover); - fib_entry_cover_untrack(cover, src->u.interface.fesi_sibling); src->u.interface.fesi_cover = FIB_NODE_INDEX_INVALID; src->u.interface.fesi_sibling = ~0; - fib_entry_cover_walk(cover, - fib_entry_src_interface_update_glean_walk, - NULL); + /* If this was the glean address, find a new one */ + if (src->fes_flags & FIB_ENTRY_SRC_FLAG_PROVIDES_GLEAN) + { + fib_entry_cover_walk(cover, + fib_entry_src_interface_update_glean_walk, + NULL); + src->fes_flags &= ~FIB_ENTRY_SRC_FLAG_PROVIDES_GLEAN; + } } } diff --git a/src/vnet/fib/fib_node.c b/src/vnet/fib/fib_node.c index ff72bcfde40..e668c4fc51f 100644 --- a/src/vnet/fib/fib_node.c +++ b/src/vnet/fib/fib_node.c @@ -268,7 +268,6 @@ fib_memory_show (vlib_main_t * vm, return (NULL); } -/* *INDENT-OFF* */ /*? * The '<em>sh fib memory </em>' command displays the memory usage for each * FIB object type. @@ -301,4 +300,3 @@ VLIB_CLI_COMMAND (show_fib_memory, static) = { .function = fib_memory_show, .short_help = "show fib memory", }; -/* *INDENT-ON* */ diff --git a/src/vnet/fib/fib_path.c b/src/vnet/fib/fib_path.c index db78587fb27..95e7cb6ba7d 100644 --- a/src/vnet/fib/fib_path.c +++ b/src/vnet/fib/fib_path.c @@ -501,11 +501,9 @@ format_fib_path (u8 * s, va_list * args) else { s = format (s, " %U", - format_vnet_sw_interface_name, + format_vnet_sw_if_index_name, vnm, - vnet_get_sw_interface( - vnm, - path->attached_next_hop.fp_interface)); + path->attached_next_hop.fp_interface); if (vnet_sw_interface_is_p2p(vnet_get_main(), path->attached_next_hop.fp_interface)) { @@ -532,11 +530,8 @@ format_fib_path (u8 * s, va_list * args) else { s = format (s, " %U", - format_vnet_sw_interface_name, - vnm, - vnet_get_sw_interface( - vnm, - path->attached.fp_interface)); + format_vnet_sw_if_index_name, + vnm, path->attached.fp_interface); } break; case FIB_PATH_TYPE_RECURSIVE: @@ -587,11 +582,8 @@ format_fib_path (u8 * s, va_list * args) break; case FIB_PATH_TYPE_DVR: s = format (s, " %U", - format_vnet_sw_interface_name, - vnm, - vnet_get_sw_interface( - vnm, - path->dvr.fp_interface)); + format_vnet_sw_if_index_name, + vnm, path->dvr.fp_interface); break; case FIB_PATH_TYPE_DEAG: s = format (s, " %sfib-index:%d", @@ -1365,7 +1357,8 @@ fib_path_create (fib_node_index_t pl_index, dpo_copy(&path->exclusive.fp_ex_dpo, &rpath->dpo); } else if ((path->fp_cfg_flags & FIB_PATH_CFG_FLAG_ICMP_PROHIBIT) || - (path->fp_cfg_flags & FIB_PATH_CFG_FLAG_ICMP_UNREACH)) + (path->fp_cfg_flags & FIB_PATH_CFG_FLAG_ICMP_UNREACH) || + (path->fp_cfg_flags & FIB_PATH_CFG_FLAG_DROP)) { path->fp_type = FIB_PATH_TYPE_SPECIAL; } @@ -1998,7 +1991,11 @@ fib_path_resolve (fib_node_index_t path_index) } else { - fib_prefix_from_ip46_addr(&path->recursive.fp_nh.fp_ip, &pfx); + ASSERT(!ip46_address_is_zero(&path->recursive.fp_nh.fp_ip)); + + fib_protocol_t fp = (ip46_address_is_ip4(&path->recursive.fp_nh.fp_ip) ? + FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6); + fib_prefix_from_ip46_addr(fp, &path->recursive.fp_nh.fp_ip, &pfx); } fib_table_lock(path->recursive.fp_tbl_id, diff --git a/src/vnet/fib/fib_table.c b/src/vnet/fib/fib_table.c index 3a46d226ebd..b2a32d0da56 100644 --- a/src/vnet/fib/fib_table.c +++ b/src/vnet/fib/fib_table.c @@ -25,6 +25,13 @@ const static char * fib_table_flags_strings[] = FIB_TABLE_ATTRIBUTES; +/* + * Default names for IP4, IP6, and MPLS FIB table index 0. + * Nominally like "ipv6-VRF:0", but this will override that name if set + * in a config section of the startup.conf file. + */ +char *fib_table_default_names[FIB_PROTOCOL_MAX]; + fib_table_t * fib_table_get (fib_node_index_t index, fib_protocol_t proto) @@ -534,7 +541,11 @@ fib_table_route_path_fixup (const fib_prefix_t *prefix, else if (fib_route_path_is_attached(path)) { path->frp_flags |= FIB_ROUTE_PATH_GLEAN; - fib_prefix_normalize(prefix, &path->frp_connected); + /* + * attached prefixes are not suitable as the source of ARP requests + * so don't save the prefix in the glean adj + */ + clib_memset(&path->frp_connected, 0, sizeof(path->frp_connected)); } if (*eflags & FIB_ENTRY_FLAG_DROP) { @@ -1149,21 +1160,29 @@ fib_table_find_or_create_and_lock_i (fib_protocol_t proto, fib_table = fib_table_get(fi, proto); - if (NULL == fib_table->ft_desc) + if (fib_table->ft_desc) + return fi; + + if (name && name[0]) { - if (name && name[0]) - { - fib_table->ft_desc = format(NULL, "%s", name); - } - else - { - fib_table->ft_desc = format(NULL, "%U-VRF:%d", - format_fib_protocol, proto, - table_id); - } + fib_table->ft_desc = format(NULL, "%s", name); + return fi; } - return (fi); + if (table_id == 0) + { + char *default_name = fib_table_default_names[proto]; + if (default_name && default_name[0]) + { + fib_table->ft_desc = format(NULL, "%s", default_name); + return fi; + } + } + + fib_table->ft_desc = format(NULL, "%U-VRF:%d", + format_fib_protocol, proto, + table_id); + return fi; } u32 diff --git a/src/vnet/fib/fib_table.h b/src/vnet/fib/fib_table.h index 11137e173cf..0eaaa67eea2 100644 --- a/src/vnet/fib/fib_table.h +++ b/src/vnet/fib/fib_table.h @@ -122,6 +122,15 @@ typedef struct fib_table_t_ u8* ft_desc; } fib_table_t; + +/** + * @brief + * Default names for IP4, IP6, and MPLS FIB table index 0. + * Nominally like "ipv4-VRF:0", but this will override that name if set + * in a config section of the startup.conf file. + */ +extern char *fib_table_default_names[FIB_PROTOCOL_MAX]; + /** * @brief * Format the description/name of the table diff --git a/src/vnet/fib/fib_types.c b/src/vnet/fib/fib_types.c index 7eeb79fffa5..c4472c7122d 100644 --- a/src/vnet/fib/fib_types.c +++ b/src/vnet/fib/fib_types.c @@ -78,16 +78,15 @@ format_fib_mpls_label (u8 *s, va_list *ap) } void -fib_prefix_from_ip46_addr (const ip46_address_t *addr, +fib_prefix_from_ip46_addr (fib_protocol_t fproto, + const ip46_address_t *addr, fib_prefix_t *pfx) { - ASSERT(!ip46_address_is_zero(addr)); + ASSERT(FIB_PROTOCOL_MPLS != fproto); - pfx->fp_proto = ((ip46_address_is_ip4(addr) ? - FIB_PROTOCOL_IP4 : - FIB_PROTOCOL_IP6)); - pfx->fp_len = ((ip46_address_is_ip4(addr) ? - 32 : 128)); + pfx->fp_proto = fproto; + pfx->fp_len = ((FIB_PROTOCOL_IP4 == fproto) ? + 32 : 128); pfx->fp_addr = *addr; pfx->___fp___pad = 0; } @@ -709,6 +708,13 @@ unformat_fib_route_path (unformat_input_t * input, va_list * args) rpath->frp_proto = DPO_PROTO_IP4; rpath->frp_flags = FIB_ROUTE_PATH_INTF_RX; } + else if (unformat (input, "rx-ip6 %U", + unformat_vnet_sw_interface, vnm, + &rpath->frp_sw_if_index)) + { + rpath->frp_proto = DPO_PROTO_IP6; + rpath->frp_flags = FIB_ROUTE_PATH_INTF_RX; + } else if (unformat (input, "local")) { clib_memset (&rpath->frp_addr, 0, sizeof (rpath->frp_addr)); @@ -776,6 +782,7 @@ fib_route_path_is_attached (const fib_route_path_t *rpath) * L3 game with these */ if (rpath->frp_flags & (FIB_ROUTE_PATH_DVR | + FIB_ROUTE_PATH_INTF_RX | FIB_ROUTE_PATH_UDP_ENCAP)) { return (0); diff --git a/src/vnet/fib/fib_types.h b/src/vnet/fib/fib_types.h index dbd4e97e867..b9346c75108 100644 --- a/src/vnet/fib/fib_types.h +++ b/src/vnet/fib/fib_types.h @@ -276,8 +276,9 @@ extern void fib_prefix_normalize(const fib_prefix_t *p, /** * \brief Host prefix from ip */ -extern void fib_prefix_from_ip46_addr (const ip46_address_t *addr, - fib_prefix_t *pfx); +extern void fib_prefix_from_ip46_addr (fib_protocol_t fproto, + const ip46_address_t *addr, + fib_prefix_t *pfx); extern u8 * format_fib_prefix(u8 * s, va_list * args); extern u8 * format_fib_forw_chain_type(u8 * s, va_list * args); @@ -632,7 +633,7 @@ extern int fib_route_path_is_attached (const fib_route_path_t *rpath); /** * A help string to list the FIB path options */ -#define FIB_ROUTE_PATH_HELP "[next-hop-address] [next-hop-interface] [next-hop-table <value>] [weight <value>] [preference <value>] [udp-encap-id <value>] [ip4-lookup-in-table <value>] [ip6-lookup-in-table <value>] [mpls-lookup-in-table <value>] [resolve-via-host] [resolve-via-connected] [rx-ip4 <interface>] [out-labels <value value value>]" +#define FIB_ROUTE_PATH_HELP "[next-hop-address] [next-hop-interface] [next-hop-table <value>] [weight <value>] [preference <value>] [udp-encap-id <value>] [ip4-lookup-in-table <value>] [ip6-lookup-in-table <value>] [mpls-lookup-in-table <value>] [resolve-via-host] [resolve-via-connected] [rx-ip4|rx-ip6 <interface>] [out-labels <value value value>]" /** * return code to control pat-hlist walk diff --git a/src/vnet/fib/fib_urpf_list.c b/src/vnet/fib/fib_urpf_list.c index b1bbe7399d1..67be6699a0e 100644 --- a/src/vnet/fib/fib_urpf_list.c +++ b/src/vnet/fib/fib_urpf_list.c @@ -228,7 +228,6 @@ show_fib_urpf_list_command (vlib_main_t * vm, return (NULL); } -/* *INDENT-OFF* */ /*? * The '<em>sh fib uRPF [index] </em>' command displays the uRPF lists * @@ -246,4 +245,3 @@ VLIB_CLI_COMMAND (show_fib_urpf_list, static) = { .function = show_fib_urpf_list_command, .short_help = "show fib uRPF", }; -/* *INDENT-OFF* */ diff --git a/src/vnet/fib/fib_walk.c b/src/vnet/fib/fib_walk.c index b3b2b1e7944..236607cb891 100644 --- a/src/vnet/fib/fib_walk.c +++ b/src/vnet/fib/fib_walk.c @@ -611,13 +611,11 @@ fib_walk_process (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (fib_walk_process_node,static) = { .function = fib_walk_process, .type = VLIB_NODE_TYPE_PROCESS, .name = "fib-walk", }; -/* *INDENT-ON* */ /** * @brief Allocate a new walk object diff --git a/src/vnet/fib/ip4_fib.c b/src/vnet/fib/ip4_fib.c index 8e580a54716..0eff8d0d485 100644 --- a/src/vnet/fib/ip4_fib.c +++ b/src/vnet/fib/ip4_fib.c @@ -621,10 +621,29 @@ ip4_show_fib (vlib_main_t * vm, * 32 4 * @cliexend ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (ip4_show_fib_command, static) = { .path = "show ip fib", .short_help = "show ip fib [summary] [table <table-id>] [index <fib-id>] [<ip4-addr>[/<mask>]] [mtrie] [detail]", .function = ip4_show_fib, }; -/* *INDENT-ON* */ + +static clib_error_t * +ip_config (vlib_main_t * vm, unformat_input_t * input) +{ + char *default_name = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "default-table-name %s", &default_name)) + ; + else + return clib_error_return (0, "unknown input '%U'", + format_unformat_error, input); + } + + fib_table_default_names[FIB_PROTOCOL_IP4] = default_name; + + return 0; +} + +VLIB_EARLY_CONFIG_FUNCTION (ip_config, "ip"); diff --git a/src/vnet/fib/ip6_fib.c b/src/vnet/fib/ip6_fib.c index 6c73d19d8e3..d37b77e08a4 100644 --- a/src/vnet/fib/ip6_fib.c +++ b/src/vnet/fib/ip6_fib.c @@ -862,19 +862,18 @@ ip6_show_fib (vlib_main_t * vm, * @cliexend * @endparblock ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (ip6_show_fib_command, static) = { .path = "show ip6 fib", .short_help = "show ip6 fib [summary] [table <table-id>] [index <fib-id>] [<ip6-addr>[/<width>]] [detail]", .function = ip6_show_fib, }; -/* *INDENT-ON* */ static clib_error_t * ip6_config (vlib_main_t * vm, unformat_input_t * input) { uword heapsize = 0; u32 nbuckets = 0; + char *default_name = 0; while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { @@ -883,6 +882,8 @@ ip6_config (vlib_main_t * vm, unformat_input_t * input) else if (unformat (input, "heap-size %U", unformat_memory_size, &heapsize)) ; + else if (unformat (input, "default-table-name %s", &default_name)) + ; else return clib_error_return (0, "unknown input '%U'", format_unformat_error, input); @@ -890,6 +891,7 @@ ip6_config (vlib_main_t * vm, unformat_input_t * input) ip6_fib_table_nbuckets = nbuckets; ip6_fib_table_size = heapsize; + fib_table_default_names[FIB_PROTOCOL_IP6] = default_name; return 0; } diff --git a/src/vnet/fib/mpls_fib.c b/src/vnet/fib/mpls_fib.c index 5dcd70b4c53..767fc84c8a8 100644 --- a/src/vnet/fib/mpls_fib.c +++ b/src/vnet/fib/mpls_fib.c @@ -481,3 +481,24 @@ VLIB_CLI_COMMAND (mpls_fib_show_command, static) = { .short_help = "show mpls fib [summary] [table <n>]", .function = mpls_fib_show, }; + +static clib_error_t * +mpls_config (vlib_main_t * vm, unformat_input_t * input) +{ + char *default_name = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "default-table-name %s", &default_name)) + ; + else + return clib_error_return (0, "unknown input '%U'", + format_unformat_error, input); + } + + fib_table_default_names[FIB_PROTOCOL_MPLS] = default_name; + + return 0; +} + +VLIB_EARLY_CONFIG_FUNCTION (mpls_config, "mpls"); diff --git a/src/vnet/flow/flow.api b/src/vnet/flow/flow.api index dff3eec370d..1e807b539d5 100644 --- a/src/vnet/flow/flow.api +++ b/src/vnet/flow/flow.api @@ -13,7 +13,7 @@ * limitations under the License. */ -option version = "0.0.3"; +option version = "1.0.3"; import "vnet/interface_types.api"; import "vnet/ip/ip_types.api"; @@ -26,6 +26,8 @@ import "vnet/flow/flow_types.api"; */ define flow_add { + option deprecated; + u32 client_index; u32 context; vl_api_flow_rule_t flow; @@ -52,6 +54,8 @@ define flow_add_v2 */ define flow_add_reply { + option deprecated; + u32 context; i32 retval; u32 flow_index; diff --git a/src/vnet/flow/flow.c b/src/vnet/flow/flow.c index 9b6a376af3e..eda15356958 100644 --- a/src/vnet/flow/flow.c +++ b/src/vnet/flow/flow.c @@ -74,12 +74,10 @@ vnet_flow_del (vnet_main_t * vnm, u32 flow_index) if (f == 0) return VNET_FLOW_ERROR_NO_SUCH_ENTRY; - /* *INDENT-OFF* */ hash_foreach (hw_if_index, private_data, f->private_data, ({ vnet_flow_disable (vnm, flow_index, hw_if_index); })); - /* *INDENT-ON* */ hash_free (f->private_data); clib_memset (f, 0, sizeof (*f)); diff --git a/src/vnet/flow/flow.h b/src/vnet/flow/flow.h index 194579b88d8..ada822257e3 100644 --- a/src/vnet/flow/flow.h +++ b/src/vnet/flow/flow.h @@ -45,7 +45,16 @@ _ (IP4_GTPC, ip4_gtpc, "ipv4-gtpc") \ _ (IP4_GTPU, ip4_gtpu, "ipv4-gtpu") \ /* generic flow */ \ - _ (GENERIC, generic, "generic") + _ (GENERIC, generic, "generic") \ + /* IP in IP */ \ + _ (IP6_IP6, ip6_ip6, "ipv6-ipv6") \ + _ (IP6_IP4, ip6_ip4, "ipv6-ipv4") \ + _ (IP4_IP6, ip4_ip6, "ipv4-ipv6") \ + _ (IP4_IP4, ip4_ip4, "ipv4-ipv4") \ + _ (IP6_IP6_N_TUPLE, ip6_ip6_n_tuple, "ipv6-ipv6-n-tuple") \ + _ (IP6_IP4_N_TUPLE, ip6_ip4_n_tuple, "ipv6-ipv4-n-tuple") \ + _ (IP4_IP6_N_TUPLE, ip4_ip6_n_tuple, "ipv4-ipv6-n-tuple") \ + _ (IP4_IP4_N_TUPLE, ip4_ip4_n_tuple, "ipv4-ipv4-n-tuple") #define foreach_flow_entry_ethernet \ _fe(ethernet_header_t, eth_hdr) @@ -106,6 +115,42 @@ foreach_flow_entry_ip4_n_tuple \ _fe(u32, teid) +#define foreach_flow_entry_ip6_ip6 \ + foreach_flow_entry_ip6 _fe (ip6_address_and_mask_t, in_src_addr) \ + _fe (ip6_address_and_mask_t, in_dst_addr) \ + _fe (ip_prot_and_mask_t, in_protocol) + +#define foreach_flow_entry_ip6_ip6_n_tuple \ + foreach_flow_entry_ip6_ip6 _fe (ip_port_and_mask_t, in_src_port) \ + _fe (ip_port_and_mask_t, in_dst_port) + +#define foreach_flow_entry_ip6_ip4 \ + foreach_flow_entry_ip6 _fe (ip4_address_and_mask_t, in_src_addr) \ + _fe (ip4_address_and_mask_t, in_dst_addr) \ + _fe (ip_prot_and_mask_t, in_protocol) + +#define foreach_flow_entry_ip6_ip4_n_tuple \ + foreach_flow_entry_ip6_ip4 _fe (ip_port_and_mask_t, in_src_port) \ + _fe (ip_port_and_mask_t, in_dst_port) + +#define foreach_flow_entry_ip4_ip6 \ + foreach_flow_entry_ip4 _fe (ip6_address_and_mask_t, in_src_addr) \ + _fe (ip6_address_and_mask_t, in_dst_addr) \ + _fe (ip_prot_and_mask_t, in_protocol) + +#define foreach_flow_entry_ip4_ip6_n_tuple \ + foreach_flow_entry_ip4_ip6 _fe (ip_port_and_mask_t, in_src_port) \ + _fe (ip_port_and_mask_t, in_dst_port) + +#define foreach_flow_entry_ip4_ip4 \ + foreach_flow_entry_ip4 _fe (ip4_address_and_mask_t, in_src_addr) \ + _fe (ip4_address_and_mask_t, in_dst_addr) \ + _fe (ip_prot_and_mask_t, in_protocol) + +#define foreach_flow_entry_ip4_ip4_n_tuple \ + foreach_flow_entry_ip4_ip4 _fe (ip_port_and_mask_t, in_src_port) \ + _fe (ip_port_and_mask_t, in_dst_port) + #define foreach_flow_entry_generic _fe (generic_pattern_t, pattern) #define foreach_flow_action \ @@ -155,6 +200,7 @@ typedef enum _ (19, NVGRE, "nvgre") \ _ (20, GTPU, "gtpu") \ _ (21, ESP, "esp") \ + _ (22, L2TPV3, "l2tpv3") \ _ (60, L4_DST_ONLY, "l4-dst-only") \ _ (61, L4_SRC_ONLY, "l4-src-only") \ _ (62, L3_DST_ONLY, "l3-dst-only") \ diff --git a/src/vnet/flow/flow_api.c b/src/vnet/flow/flow_api.c index 0e25fb3017b..bfe97ec2978 100644 --- a/src/vnet/flow/flow_api.c +++ b/src/vnet/flow/flow_api.c @@ -299,12 +299,10 @@ vl_api_flow_add_t_handler (vl_api_flow_add_t * mp) rv = vnet_flow_add (vnm, &flow, &flow_index); out: - /* *INDENT-OFF* */ REPLY_MACRO2(VL_API_FLOW_ADD_REPLY, ({ rmp->flow_index = ntohl (flow_index); })); - /* *INDENT-ON* */ } static void @@ -328,7 +326,7 @@ vl_api_flow_add_v2_t_handler (vl_api_flow_add_v2_t *mp) flow.buffer_advance = ntohl (f->buffer_advance); flow.queue_index = ntohl (f->queue_index); flow.queue_num = ntohl (f->queue_num); - flow.rss_types = ntohl (f->rss_types); + flow.rss_types = clib_net_to_host_u64 (f->rss_types); flow.rss_fun = ntohl (f->rss_fun); switch (flow.type) diff --git a/src/vnet/flow/flow_cli.c b/src/vnet/flow/flow_cli.c index 5f44a099f57..e4b73717241 100644 --- a/src/vnet/flow/flow_cli.c +++ b/src/vnet/flow/flow_cli.c @@ -138,13 +138,11 @@ format_flow_enabled_hw (u8 * s, va_list * args) u32 hw_if_index; uword private_data; vnet_main_t *vnm = vnet_get_main (); - /* *INDENT-OFF* */ hash_foreach (hw_if_index, private_data, f->private_data, ({ t = format (t, "%s%U", t ? ", " : "", format_vnet_hw_if_index_name, vnm, hw_if_index); })); - /* *INDENT-ON* */ s = format (s, "%v", t); vec_free (t); return s; @@ -228,7 +226,6 @@ show_flow_entry (vlib_main_t * vm, unformat_input_t * input, vlib_cli_output (vm, "%s: %s", "spec", f->generic.pattern.spec); vlib_cli_output (vm, "%s: %s", "mask", f->generic.pattern.mask); } - /* *INDENT-OFF* */ hash_foreach (hw_if_index, private_data, f->private_data, ({ hi = vnet_get_hw_interface (vnm, hw_if_index); @@ -239,12 +236,10 @@ show_flow_entry (vlib_main_t * vm, unformat_input_t * input, vlib_cli_output (vm, " %U\n", dev_class->format_flow, hi->dev_instance, f->index, private_data); })); - /* *INDENT-ON* */ return 0; } no_args: - /* *INDENT-OFF* */ pool_foreach (f, fm->global_flow_pool) { vlib_cli_output (vm, "%U\n", format_flow, f); @@ -254,18 +249,15 @@ no_args: vlib_cli_output (vm, "%s: %s", "mask", f->generic.pattern.mask); } } - /* *INDENT-ON* */ return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_flow_entry_command, static) = { .path = "show flow entry", .short_help = "show flow entry [index <index>]", .function = show_flow_entry, }; -/* *INDENT-ON* */ static clib_error_t * show_flow_ranges (vlib_main_t * vm, unformat_input_t * input, @@ -276,22 +268,18 @@ show_flow_ranges (vlib_main_t * vm, unformat_input_t * input, vlib_cli_output (vm, "%8s %8s %s", "Start", "Count", "Owner"); - /* *INDENT-OFF* */ vec_foreach (r, fm->ranges) { vlib_cli_output (vm, "%8u %8u %s", r->start, r->count, r->owner); }; - /* *INDENT-ON* */ return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_flow_ranges_command, static) = { .path = "show flow ranges", .short_help = "show flow ranges", .function = show_flow_ranges, }; -/* *INDENT-ON* */ static clib_error_t * show_flow_interface (vlib_main_t * vm, unformat_input_t * input, @@ -329,13 +317,11 @@ show_flow_interface (vlib_main_t * vm, unformat_input_t * input, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_flow_interface_command, static) = { .path = "show flow interface", .short_help = "show flow interface <interface name>", .function = show_flow_interface, }; -/* *INDENT-ON* */ static clib_error_t * test_flow (vlib_main_t * vm, unformat_input_t * input, @@ -366,15 +352,16 @@ test_flow (vlib_main_t * vm, unformat_input_t * input, u32 vni = 0; u32 queue_start = 0, queue_end = 0; vnet_flow_type_t type = VNET_FLOW_TYPE_UNKNOWN; - ip4_address_and_mask_t ip4s = { }; - ip4_address_and_mask_t ip4d = { }; - ip6_address_and_mask_t ip6s = { }; - ip6_address_and_mask_t ip6d = { }; - ip_port_and_mask_t sport = { }; - ip_port_and_mask_t dport = { }; - ip_prot_and_mask_t protocol = { }; + ip4_address_and_mask_t ip4s = {}, in_ip4s = {}; + ip4_address_and_mask_t ip4d = {}, in_ip4d = {}; + ip6_address_and_mask_t ip6s = {}, in_ip6s = {}; + ip6_address_and_mask_t ip6d = {}, in_ip6d = {}; + ip_port_and_mask_t sport = {}, in_sport = {}; + ip_port_and_mask_t dport = {}, in_dport = {}; + ip_prot_and_mask_t protocol = {}, in_proto = {}; u16 eth_type; - bool tcp_udp_port_set = false; + bool inner_ip4_set = false, inner_ip6_set = false; + bool tcp_udp_port_set = false, inner_port_set = false; bool gtpc_set = false; bool gtpu_set = false; bool vni_set = false; @@ -415,12 +402,24 @@ test_flow (vlib_main_t * vm, unformat_input_t * input, else if (unformat (line_input, "dst-ip %U", unformat_ip4_address_and_mask, &ip4d)) flow_class = FLOW_IPV4_CLASS; + else if (unformat (line_input, "in-src-ip %U", + unformat_ip4_address_and_mask, &in_ip4s)) + inner_ip4_set = true; + else if (unformat (line_input, "in-dst-ip %U", + unformat_ip4_address_and_mask, &in_ip4d)) + inner_ip4_set = true; else if (unformat (line_input, "ip6-src-ip %U", unformat_ip6_address_and_mask, &ip6s)) flow_class = FLOW_IPV6_CLASS; else if (unformat (line_input, "ip6-dst-ip %U", unformat_ip6_address_and_mask, &ip6d)) flow_class = FLOW_IPV6_CLASS; + else if (unformat (line_input, "in-ip6-src-ip %U", + unformat_ip6_address_and_mask, &in_ip6s)) + inner_ip6_set = true; + else if (unformat (line_input, "in-ip6-dst-ip %U", + unformat_ip6_address_and_mask, &in_ip6d)) + inner_ip6_set = true; else if (unformat (line_input, "src-port %U", unformat_ip_port_and_mask, &sport)) tcp_udp_port_set = true; @@ -432,6 +431,15 @@ test_flow (vlib_main_t * vm, unformat_input_t * input, (line_input, "proto %U", unformat_ip_protocol_and_mask, &protocol)) ; + else if (unformat (line_input, "in-src-port %U", + unformat_ip_port_and_mask, &in_sport)) + inner_port_set = true; + else if (unformat (line_input, "in-dst-port %U", + unformat_ip_port_and_mask, &in_dport)) + inner_port_set = true; + else if (unformat (line_input, "in-proto %U", + unformat_ip_protocol_and_mask, &in_proto)) + ; else if (unformat (line_input, "gtpc teid %u", &teid)) gtpc_set = true; else if (unformat (line_input, "gtpu teid %u", &teid)) @@ -592,6 +600,22 @@ test_flow (vlib_main_t * vm, unformat_input_t * input, type = VNET_FLOW_TYPE_IP4_IPSEC_AH; else if (tcp_udp_port_set) type = VNET_FLOW_TYPE_IP4_N_TUPLE; + else if (inner_ip4_set) + { + if (inner_port_set) + type = VNET_FLOW_TYPE_IP4_IP4_N_TUPLE; + else + type = VNET_FLOW_TYPE_IP4_IP4; + protocol.prot = IP_PROTOCOL_IP_IN_IP; + } + else if (inner_ip6_set) + { + if (inner_port_set) + type = VNET_FLOW_TYPE_IP4_IP6_N_TUPLE; + else + type = VNET_FLOW_TYPE_IP4_IP6; + protocol.prot = IP_PROTOCOL_IPV6; + } else type = VNET_FLOW_TYPE_IP4; break; @@ -600,6 +624,22 @@ test_flow (vlib_main_t * vm, unformat_input_t * input, type = VNET_FLOW_TYPE_IP6_N_TUPLE; else if (vni_set) type = VNET_FLOW_TYPE_IP6_VXLAN; + else if (inner_ip4_set) + { + if (inner_port_set) + type = VNET_FLOW_TYPE_IP6_IP4_N_TUPLE; + else + type = VNET_FLOW_TYPE_IP6_IP4; + protocol.prot = IP_PROTOCOL_IP_IN_IP; + } + else if (inner_ip6_set) + { + if (inner_port_set) + type = VNET_FLOW_TYPE_IP6_IP6_N_TUPLE; + else + type = VNET_FLOW_TYPE_IP6_IP6; + protocol.prot = IP_PROTOCOL_IPV6; + } else type = VNET_FLOW_TYPE_IP6; break; @@ -660,6 +700,30 @@ test_flow (vlib_main_t * vm, unformat_input_t * input, case IP_PROTOCOL_IPSEC_AH: flow.ip4_ipsec_esp.spi = spi; break; + case IP_PROTOCOL_IP_IN_IP: + clib_memcpy (&flow.ip4_ip4.in_src_addr, &in_ip4s, + sizeof (ip4_address_and_mask_t)); + clib_memcpy (&flow.ip4_ip4.in_dst_addr, &in_ip4d, + sizeof (ip4_address_and_mask_t)); + if (type == VNET_FLOW_TYPE_IP4_IP4_N_TUPLE) + { + flow.ip4_ip4.in_protocol.prot = in_proto.prot; + flow.ip4_ip4_n_tuple.in_src_port = in_sport; + flow.ip4_ip4_n_tuple.in_dst_port = in_dport; + } + break; + case IP_PROTOCOL_IPV6: + clib_memcpy (&flow.ip4_ip6.in_src_addr, &in_ip6s, + sizeof (ip6_address_and_mask_t)); + clib_memcpy (&flow.ip4_ip6.in_dst_addr, &in_ip6d, + sizeof (ip6_address_and_mask_t)); + if (type == VNET_FLOW_TYPE_IP4_IP6_N_TUPLE) + { + flow.ip4_ip6.in_protocol.prot = in_proto.prot; + flow.ip4_ip6_n_tuple.in_src_port = in_sport; + flow.ip4_ip6_n_tuple.in_dst_port = in_dport; + } + break; default: break; } @@ -693,6 +757,30 @@ test_flow (vlib_main_t * vm, unformat_input_t * input, if (type == VNET_FLOW_TYPE_IP6_VXLAN) flow.ip6_vxlan.vni = vni; break; + case IP_PROTOCOL_IP_IN_IP: + clib_memcpy (&flow.ip6_ip4.in_src_addr, &in_ip4s, + sizeof (ip4_address_and_mask_t)); + clib_memcpy (&flow.ip6_ip4.in_dst_addr, &in_ip4d, + sizeof (ip4_address_and_mask_t)); + if (type == VNET_FLOW_TYPE_IP6_IP4_N_TUPLE) + { + flow.ip6_ip4.in_protocol.prot = in_proto.prot; + flow.ip6_ip4_n_tuple.in_src_port = in_sport; + flow.ip6_ip4_n_tuple.in_dst_port = in_dport; + } + break; + case IP_PROTOCOL_IPV6: + clib_memcpy (&flow.ip6_ip6.in_src_addr, &in_ip6s, + sizeof (ip6_address_and_mask_t)); + clib_memcpy (&flow.ip6_ip6.in_dst_addr, &in_ip6d, + sizeof (ip6_address_and_mask_t)); + if (type == VNET_FLOW_TYPE_IP6_IP6_N_TUPLE) + { + flow.ip6_ip6.in_protocol.prot = in_proto.prot; + flow.ip6_ip6_n_tuple.in_src_port = in_sport; + flow.ip6_ip6_n_tuple.in_dst_port = in_dport; + } + break; default: break; } @@ -731,7 +819,6 @@ test_flow (vlib_main_t * vm, unformat_input_t * input, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (test_flow_command, static) = { .path = "test flow", .short_help = "test flow [add|del|enable|disable] [index <id>] " @@ -748,7 +835,6 @@ VLIB_CLI_COMMAND (test_flow_command, static) = { "[rss queues <queue_start> to <queue_end>]", .function = test_flow, }; -/* *INDENT-ON* */ static u8 * format_flow_match_element (u8 * s, va_list * args) diff --git a/src/vnet/gre/FEATURE.yaml b/src/vnet/gre/FEATURE.yaml deleted file mode 100644 index 4b35b870dc3..00000000000 --- a/src/vnet/gre/FEATURE.yaml +++ /dev/null @@ -1,13 +0,0 @@ ---- -name: Generic Routing Encapsulation -maintainer: Neale Ranns <nranns@cisco.com> -features: - - L3 tunnels, all combinations of IPv4 and IPv6 - - Encap/Decap flags to control the copying of DSCP, ECN, DF from overlay to - underlay and vice-versa. - - L2 tunnels -missing: - - GRE keys -description: "An implementation of Generic Routing Encapsulation (GRE)" -state: production -properties: [API, CLI, MULTITHREAD] diff --git a/src/vnet/gre/error.def b/src/vnet/gre/error.def deleted file mode 100644 index 161ecc1d874..00000000000 --- a/src/vnet/gre/error.def +++ /dev/null @@ -1,23 +0,0 @@ -/* - * gre_error.def: gre errors - * - * Copyright (c) 2012 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -gre_error (NONE, "no error") -gre_error (UNKNOWN_PROTOCOL, "unknown protocol") -gre_error (UNSUPPORTED_VERSION, "unsupported version") -gre_error (PKTS_DECAP, "GRE input packets decapsulated") -gre_error (PKTS_ENCAP, "GRE output packets encapsulated") -gre_error (NO_SUCH_TUNNEL, "GRE input packets dropped due to missing tunnel") diff --git a/src/vnet/gre/gre.api b/src/vnet/gre/gre.api deleted file mode 100644 index 9c69ba4007d..00000000000 --- a/src/vnet/gre/gre.api +++ /dev/null @@ -1,110 +0,0 @@ -/* Hey Emacs use -*- mode: C -*- */ -/* - * Copyright (c) 2015-2020 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -option version = "2.1.1"; - -import "vnet/interface_types.api"; -import "vnet/tunnel/tunnel_types.api"; -import "vnet/ip/ip_types.api"; - -/** \brief A GRE tunnel type -*/ -enum gre_tunnel_type : u8 -{ - GRE_API_TUNNEL_TYPE_L3 = 0, - /* L2 Transparent Ethernet Bridge */ - GRE_API_TUNNEL_TYPE_TEB, - /* Encapsulated Remote Switched Port ANalyzer */ - GRE_API_TUNNEL_TYPE_ERSPAN, -}; - -/** \brief A composite type uniquely defining a GRE tunnel. - @param type - tunnel type (see enum definition), 0: L3, 1: TEB, 2: ERSPAN - @param mode - P2P or P2MP - @param flags - to control encap/decap behaviour - @param session_id - session for ERSPAN tunnel, range 0-1023 - @param instance - optional unique custom device instance, else ~0. - @param outer_table_id - Encap FIB table ID - @param sw_if_index - ignored on create/delete, present in details. - @param src - Source IP address - @param dst - Destination IP address, can be multicast -*/ -typedef gre_tunnel -{ - vl_api_gre_tunnel_type_t type; - vl_api_tunnel_mode_t mode; - vl_api_tunnel_encap_decap_flags_t flags; - u16 session_id; - u32 instance; - u32 outer_table_id; - vl_api_interface_index_t sw_if_index; - vl_api_address_t src; - vl_api_address_t dst; -}; - -/** \brief Add or delete a single GRE tunnel. - @param client_index - opaque cookie to identify the sender. - @param context - sender context, to match reply w/ request. - @param is_add - add if true, delete if false. - @param tunnel - tunnel definition to add or delete. -*/ -define gre_tunnel_add_del -{ - u32 client_index; - u32 context; - bool is_add; - vl_api_gre_tunnel_t tunnel; -}; - -/** \brief Add or delete a single GRE tunnel. - @param context - sender context, to match reply w/ request. - @param retval - return code for the request. - @param sw_if_index - the interface corresponding to the affected tunnel. -*/ -define gre_tunnel_add_del_reply -{ - u32 context; - i32 retval; - vl_api_interface_index_t sw_if_index; -}; - -/** \brief Dump details of all or just a single GRE tunnel. - @param client_index - opaque cookie to identify the sender. - @param context - sender context, to match reply w/ request. - @param sw_if_index - filter for tunnel of this interface index, ~0 for all. -*/ -define gre_tunnel_dump -{ - u32 client_index; - u32 context; - vl_api_interface_index_t sw_if_index; -}; - -/** \brief Details response for one of the requested GRE tunnels. - @param context - sender context, to match reply w/ request. - @param tunnel - definition of the dumped tunnel. -*/ -define gre_tunnel_details -{ - u32 context; - vl_api_gre_tunnel_t tunnel; -}; - -/* - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/gre/gre.c b/src/vnet/gre/gre.c deleted file mode 100644 index dc735e6a77b..00000000000 --- a/src/vnet/gre/gre.c +++ /dev/null @@ -1,867 +0,0 @@ -/* - * gre.c: gre - * - * Copyright (c) 2012 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include <vnet/vnet.h> -#include <vnet/gre/gre.h> -#include <vnet/adj/adj_midchain.h> -#include <vnet/tunnel/tunnel_dp.h> - -extern gre_main_t gre_main; - -#ifndef CLIB_MARCH_VARIANT -gre_main_t gre_main; - -typedef struct -{ - union - { - ip4_and_gre_header_t ip4_and_gre; - u64 as_u64[3]; - }; -} ip4_and_gre_union_t; - -typedef struct -{ - union - { - ip6_and_gre_header_t ip6_and_gre; - u64 as_u64[3]; - }; -} ip6_and_gre_union_t; -#endif /* CLIB_MARCH_VARIANT */ - - -/* Packet trace structure */ -typedef struct -{ - /* Tunnel-id / index in tunnel vector */ - u32 tunnel_id; - - /* pkt length */ - u32 length; - - /* tunnel ip addresses */ - ip46_address_t src; - ip46_address_t dst; -} gre_tx_trace_t; - -extern u8 *format_gre_tx_trace (u8 * s, va_list * args); - -#ifndef CLIB_MARCH_VARIANT -u8 * -format_gre_tx_trace (u8 * s, va_list * args) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); - gre_tx_trace_t *t = va_arg (*args, gre_tx_trace_t *); - - s = format (s, "GRE: tunnel %d len %d src %U dst %U", - t->tunnel_id, t->length, - format_ip46_address, &t->src, IP46_TYPE_ANY, - format_ip46_address, &t->dst, IP46_TYPE_ANY); - return s; -} - -u8 * -format_gre_protocol (u8 * s, va_list * args) -{ - gre_protocol_t p = va_arg (*args, u32); - gre_main_t *gm = &gre_main; - gre_protocol_info_t *pi = gre_get_protocol_info (gm, p); - - if (pi) - s = format (s, "%s", pi->name); - else - s = format (s, "0x%04x", p); - - return s; -} - -u8 * -format_gre_header_with_length (u8 * s, va_list * args) -{ - gre_main_t *gm = &gre_main; - gre_header_t *h = va_arg (*args, gre_header_t *); - u32 max_header_bytes = va_arg (*args, u32); - gre_protocol_t p = clib_net_to_host_u16 (h->protocol); - u32 indent, header_bytes; - - header_bytes = sizeof (h[0]); - if (max_header_bytes != 0 && header_bytes > max_header_bytes) - return format (s, "gre header truncated"); - - indent = format_get_indent (s); - - s = format (s, "GRE %U", format_gre_protocol, p); - - if (max_header_bytes != 0 && header_bytes < max_header_bytes) - { - gre_protocol_info_t *pi = gre_get_protocol_info (gm, p); - vlib_node_t *node = vlib_get_node (gm->vlib_main, pi->node_index); - if (node->format_buffer) - s = format (s, "\n%U%U", - format_white_space, indent, - node->format_buffer, (void *) (h + 1), - max_header_bytes - header_bytes); - } - - return s; -} - -u8 * -format_gre_header (u8 * s, va_list * args) -{ - gre_header_t *h = va_arg (*args, gre_header_t *); - return format (s, "%U", format_gre_header_with_length, h, 0); -} - -/* Returns gre protocol as an int in host byte order. */ -uword -unformat_gre_protocol_host_byte_order (unformat_input_t * input, - va_list * args) -{ - u16 *result = va_arg (*args, u16 *); - gre_main_t *gm = &gre_main; - int i; - - /* Named type. */ - if (unformat_user (input, unformat_vlib_number_by_name, - gm->protocol_info_by_name, &i)) - { - gre_protocol_info_t *pi = vec_elt_at_index (gm->protocol_infos, i); - *result = pi->protocol; - return 1; - } - - return 0; -} - -uword -unformat_gre_protocol_net_byte_order (unformat_input_t * input, - va_list * args) -{ - u16 *result = va_arg (*args, u16 *); - if (!unformat_user (input, unformat_gre_protocol_host_byte_order, result)) - return 0; - *result = clib_host_to_net_u16 ((u16) * result); - return 1; -} - -uword -unformat_gre_header (unformat_input_t * input, va_list * args) -{ - u8 **result = va_arg (*args, u8 **); - gre_header_t _h, *h = &_h; - u16 p; - - if (!unformat (input, "%U", unformat_gre_protocol_host_byte_order, &p)) - return 0; - - h->protocol = clib_host_to_net_u16 (p); - - /* Add header to result. */ - { - void *p; - u32 n_bytes = sizeof (h[0]); - - vec_add2 (*result, p, n_bytes); - clib_memcpy (p, h, n_bytes); - } - - return 1; -} - -static int -gre_proto_from_vnet_link (vnet_link_t link) -{ - switch (link) - { - case VNET_LINK_IP4: - return (GRE_PROTOCOL_ip4); - case VNET_LINK_IP6: - return (GRE_PROTOCOL_ip6); - case VNET_LINK_MPLS: - return (GRE_PROTOCOL_mpls_unicast); - case VNET_LINK_ETHERNET: - return (GRE_PROTOCOL_teb); - case VNET_LINK_ARP: - return (GRE_PROTOCOL_arp); - case VNET_LINK_NSH: - ASSERT (0); - break; - } - ASSERT (0); - return (GRE_PROTOCOL_ip4); -} - -static u8 * -gre_build_rewrite (vnet_main_t * vnm, - u32 sw_if_index, - vnet_link_t link_type, const void *dst_address) -{ - gre_main_t *gm = &gre_main; - const ip46_address_t *dst; - ip4_and_gre_header_t *h4; - ip6_and_gre_header_t *h6; - gre_header_t *gre; - u8 *rewrite = NULL; - gre_tunnel_t *t; - u32 ti; - u8 is_ipv6; - - dst = dst_address; - ti = gm->tunnel_index_by_sw_if_index[sw_if_index]; - - if (~0 == ti) - /* not one of ours */ - return (0); - - t = pool_elt_at_index (gm->tunnels, ti); - - is_ipv6 = t->tunnel_dst.fp_proto == FIB_PROTOCOL_IP6 ? 1 : 0; - - if (!is_ipv6) - { - vec_validate (rewrite, sizeof (*h4) - 1); - h4 = (ip4_and_gre_header_t *) rewrite; - gre = &h4->gre; - h4->ip4.ip_version_and_header_length = 0x45; - h4->ip4.ttl = 254; - h4->ip4.protocol = IP_PROTOCOL_GRE; - /* fixup ip4 header length and checksum after-the-fact */ - h4->ip4.src_address.as_u32 = t->tunnel_src.ip4.as_u32; - h4->ip4.dst_address.as_u32 = dst->ip4.as_u32; - h4->ip4.checksum = ip4_header_checksum (&h4->ip4); - } - else - { - vec_validate (rewrite, sizeof (*h6) - 1); - h6 = (ip6_and_gre_header_t *) rewrite; - gre = &h6->gre; - h6->ip6.ip_version_traffic_class_and_flow_label = - clib_host_to_net_u32 (6 << 28); - h6->ip6.hop_limit = 255; - h6->ip6.protocol = IP_PROTOCOL_GRE; - /* fixup ip6 header length and checksum after-the-fact */ - h6->ip6.src_address.as_u64[0] = t->tunnel_src.ip6.as_u64[0]; - h6->ip6.src_address.as_u64[1] = t->tunnel_src.ip6.as_u64[1]; - h6->ip6.dst_address.as_u64[0] = dst->ip6.as_u64[0]; - h6->ip6.dst_address.as_u64[1] = dst->ip6.as_u64[1]; - } - - if (PREDICT_FALSE (t->type == GRE_TUNNEL_TYPE_ERSPAN)) - { - gre->protocol = clib_host_to_net_u16 (GRE_PROTOCOL_erspan); - gre->flags_and_version = clib_host_to_net_u16 (GRE_FLAGS_SEQUENCE); - } - else - gre->protocol = - clib_host_to_net_u16 (gre_proto_from_vnet_link (link_type)); - - return (rewrite); -} - -static void -gre44_fixup (vlib_main_t * vm, - const ip_adjacency_t * adj, vlib_buffer_t * b0, const void *data) -{ - tunnel_encap_decap_flags_t flags; - ip4_and_gre_header_t *ip0; - - ip0 = vlib_buffer_get_current (b0); - flags = pointer_to_uword (data); - - /* Fixup the checksum and len fields in the GRE tunnel encap - * that was applied at the midchain node */ - ip0->ip4.length = - clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)); - tunnel_encap_fixup_4o4 (flags, (ip4_header_t *) (ip0 + 1), &ip0->ip4); - ip0->ip4.checksum = ip4_header_checksum (&ip0->ip4); -} - -static void -gre64_fixup (vlib_main_t * vm, - const ip_adjacency_t * adj, vlib_buffer_t * b0, const void *data) -{ - tunnel_encap_decap_flags_t flags; - ip4_and_gre_header_t *ip0; - - ip0 = vlib_buffer_get_current (b0); - flags = pointer_to_uword (data); - - /* Fixup the checksum and len fields in the GRE tunnel encap - * that was applied at the midchain node */ - ip0->ip4.length = - clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)); - tunnel_encap_fixup_6o4 (flags, (ip6_header_t *) (ip0 + 1), &ip0->ip4); - ip0->ip4.checksum = ip4_header_checksum (&ip0->ip4); -} - -static void -grex4_fixup (vlib_main_t * vm, - const ip_adjacency_t * adj, vlib_buffer_t * b0, const void *data) -{ - ip4_header_t *ip0; - - ip0 = vlib_buffer_get_current (b0); - - /* Fixup the checksum and len fields in the GRE tunnel encap - * that was applied at the midchain node */ - ip0->length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)); - ip0->checksum = ip4_header_checksum (ip0); -} - -static void -gre46_fixup (vlib_main_t * vm, - const ip_adjacency_t * adj, vlib_buffer_t * b0, const void *data) -{ - tunnel_encap_decap_flags_t flags; - ip6_and_gre_header_t *ip0; - - ip0 = vlib_buffer_get_current (b0); - flags = pointer_to_uword (data); - - /* Fixup the payload length field in the GRE tunnel encap that was applied - * at the midchain node */ - ip0->ip6.payload_length = - clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) - - sizeof (ip0->ip6)); - tunnel_encap_fixup_4o6 (flags, b0, (ip4_header_t *) (ip0 + 1), &ip0->ip6); -} - -static void -gre66_fixup (vlib_main_t * vm, - const ip_adjacency_t * adj, vlib_buffer_t * b0, const void *data) -{ - tunnel_encap_decap_flags_t flags; - ip6_and_gre_header_t *ip0; - - ip0 = vlib_buffer_get_current (b0); - flags = pointer_to_uword (data); - - /* Fixup the payload length field in the GRE tunnel encap that was applied - * at the midchain node */ - ip0->ip6.payload_length = - clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) - - sizeof (ip0->ip6)); - tunnel_encap_fixup_6o6 (flags, (ip6_header_t *) (ip0 + 1), &ip0->ip6); -} - -static void -grex6_fixup (vlib_main_t * vm, - const ip_adjacency_t * adj, vlib_buffer_t * b0, const void *data) -{ - ip6_and_gre_header_t *ip0; - - ip0 = vlib_buffer_get_current (b0); - - /* Fixup the payload length field in the GRE tunnel encap that was applied - * at the midchain node */ - ip0->ip6.payload_length = - clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) - - sizeof (ip0->ip6)); -} - -/** - * return the appropriate fixup function given the overlay (link-type) and - * underlay (fproto) combination - */ -static adj_midchain_fixup_t -gre_get_fixup (fib_protocol_t fproto, vnet_link_t lt) -{ - if (fproto == FIB_PROTOCOL_IP6 && lt == VNET_LINK_IP6) - return (gre66_fixup); - if (fproto == FIB_PROTOCOL_IP6 && lt == VNET_LINK_IP4) - return (gre46_fixup); - if (fproto == FIB_PROTOCOL_IP4 && lt == VNET_LINK_IP6) - return (gre64_fixup); - if (fproto == FIB_PROTOCOL_IP4 && lt == VNET_LINK_IP4) - return (gre44_fixup); - if (fproto == FIB_PROTOCOL_IP6) - return (grex6_fixup); - if (fproto == FIB_PROTOCOL_IP4) - return (grex4_fixup); - - ASSERT (0); - return (gre44_fixup); -} - -void -gre_update_adj (vnet_main_t * vnm, u32 sw_if_index, adj_index_t ai) -{ - gre_main_t *gm = &gre_main; - gre_tunnel_t *t; - adj_flags_t af; - u32 ti; - - ti = gm->tunnel_index_by_sw_if_index[sw_if_index]; - t = pool_elt_at_index (gm->tunnels, ti); - af = ADJ_FLAG_NONE; - - /* - * the user has not requested that the load-balancing be based on - * a flow hash of the inner packet. so use the stacking to choose - * a path. - */ - if (!(t->flags & TUNNEL_ENCAP_DECAP_FLAG_ENCAP_INNER_HASH)) - af |= ADJ_FLAG_MIDCHAIN_IP_STACK; - - adj_nbr_midchain_update_rewrite - (ai, gre_get_fixup (t->tunnel_dst.fp_proto, - adj_get_link_type (ai)), - uword_to_pointer (t->flags, void *), af, - gre_build_rewrite (vnm, sw_if_index, adj_get_link_type (ai), - &t->tunnel_dst.fp_addr)); - - gre_tunnel_stack (ai); -} - -adj_walk_rc_t -mgre_mk_complete_walk (adj_index_t ai, void *data) -{ - mgre_walk_ctx_t *ctx = data; - adj_flags_t af; - - af = ADJ_FLAG_NONE; - - /* - * the user has not requested that the load-balancing be based on - * a flow hash of the inner packet. so use the stacking to choose - * a path. - */ - if (!(ctx->t->flags & TUNNEL_ENCAP_DECAP_FLAG_ENCAP_INNER_HASH)) - af |= ADJ_FLAG_MIDCHAIN_IP_STACK; - - adj_nbr_midchain_update_rewrite - (ai, gre_get_fixup (ctx->t->tunnel_dst.fp_proto, - adj_get_link_type (ai)), - uword_to_pointer (ctx->t->flags, void *), - af, - gre_build_rewrite (vnet_get_main (), - ctx->t->sw_if_index, - adj_get_link_type (ai), - &teib_entry_get_nh (ctx->ne)->fp_addr)); - - teib_entry_adj_stack (ctx->ne, ai); - - return (ADJ_WALK_RC_CONTINUE); -} - -adj_walk_rc_t -mgre_mk_incomplete_walk (adj_index_t ai, void *data) -{ - gre_tunnel_t *t = data; - - adj_nbr_midchain_update_rewrite (ai, gre_get_fixup (t->tunnel_dst.fp_proto, - adj_get_link_type (ai)), - NULL, ADJ_FLAG_NONE, NULL); - - adj_midchain_delegate_unstack (ai); - - return (ADJ_WALK_RC_CONTINUE); -} - -void -mgre_update_adj (vnet_main_t * vnm, u32 sw_if_index, adj_index_t ai) -{ - gre_main_t *gm = &gre_main; - ip_adjacency_t *adj; - teib_entry_t *ne; - gre_tunnel_t *t; - u32 ti; - - adj = adj_get (ai); - ti = gm->tunnel_index_by_sw_if_index[sw_if_index]; - t = pool_elt_at_index (gm->tunnels, ti); - - ne = teib_entry_find_46 (sw_if_index, - adj->ia_nh_proto, &adj->sub_type.nbr.next_hop); - - if (NULL == ne) - { - // no TEIB entry to provide the next-hop - adj_nbr_midchain_update_rewrite ( - ai, gre_get_fixup (t->tunnel_dst.fp_proto, adj_get_link_type (ai)), - uword_to_pointer (t->flags, void *), ADJ_FLAG_NONE, NULL); - return; - } - - mgre_walk_ctx_t ctx = { - .t = t, - .ne = ne - }; - adj_nbr_walk_nh (sw_if_index, - adj->ia_nh_proto, - &adj->sub_type.nbr.next_hop, mgre_mk_complete_walk, &ctx); -} -#endif /* CLIB_MARCH_VARIANT */ - -typedef enum -{ - GRE_ENCAP_NEXT_L2_MIDCHAIN, - GRE_ENCAP_N_NEXT, -} gre_encap_next_t; - -/** - * @brief TX function. Only called for L2 payload including TEB or ERSPAN. - * L3 traffic uses the adj-midchains. - */ -static_always_inline u32 -gre_encap_inline (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame, gre_tunnel_type_t type) -{ - gre_main_t *gm = &gre_main; - u32 *from, n_left_from; - vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs; - u32 sw_if_index[2] = { ~0, ~0 }; - const gre_tunnel_t *gt[2] = { 0 }; - adj_index_t adj_index[2] = { ADJ_INDEX_INVALID, ADJ_INDEX_INVALID }; - - from = vlib_frame_vector_args (frame); - n_left_from = frame->n_vectors; - vlib_get_buffers (vm, from, bufs, n_left_from); - - while (n_left_from >= 2) - { - - if (PREDICT_FALSE - (sw_if_index[0] != vnet_buffer (b[0])->sw_if_index[VLIB_TX])) - { - const vnet_hw_interface_t *hi; - sw_if_index[0] = vnet_buffer (b[0])->sw_if_index[VLIB_TX]; - hi = vnet_get_sup_hw_interface (gm->vnet_main, sw_if_index[0]); - gt[0] = &gm->tunnels[hi->dev_instance]; - adj_index[0] = gt[0]->l2_adj_index; - } - if (PREDICT_FALSE - (sw_if_index[1] != vnet_buffer (b[1])->sw_if_index[VLIB_TX])) - { - const vnet_hw_interface_t *hi; - sw_if_index[1] = vnet_buffer (b[1])->sw_if_index[VLIB_TX]; - hi = vnet_get_sup_hw_interface (gm->vnet_main, sw_if_index[1]); - gt[1] = &gm->tunnels[hi->dev_instance]; - adj_index[1] = gt[1]->l2_adj_index; - } - - vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = adj_index[0]; - vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = adj_index[1]; - - if (type == GRE_TUNNEL_TYPE_ERSPAN) - { - /* Encap GRE seq# and ERSPAN type II header */ - erspan_t2_t *h0; - u32 seq_num; - u64 hdr; - vlib_buffer_advance (b[0], -sizeof (erspan_t2_t)); - h0 = vlib_buffer_get_current (b[0]); - seq_num = clib_atomic_fetch_add (>[0]->gre_sn->seq_num, 1); - hdr = clib_host_to_net_u64 (ERSPAN_HDR2); - h0->seq_num = clib_host_to_net_u32 (seq_num); - h0->t2_u64 = hdr; - h0->t2.cos_en_t_session |= clib_host_to_net_u16 (gt[0]->session_id); - } - if (type == GRE_TUNNEL_TYPE_ERSPAN) - { - /* Encap GRE seq# and ERSPAN type II header */ - erspan_t2_t *h0; - u32 seq_num; - u64 hdr; - vlib_buffer_advance (b[1], -sizeof (erspan_t2_t)); - h0 = vlib_buffer_get_current (b[1]); - seq_num = clib_atomic_fetch_add (>[1]->gre_sn->seq_num, 1); - hdr = clib_host_to_net_u64 (ERSPAN_HDR2); - h0->seq_num = clib_host_to_net_u32 (seq_num); - h0->t2_u64 = hdr; - h0->t2.cos_en_t_session |= clib_host_to_net_u16 (gt[1]->session_id); - } - - if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED)) - { - gre_tx_trace_t *tr = vlib_add_trace (vm, node, - b[0], sizeof (*tr)); - tr->tunnel_id = gt[0] - gm->tunnels; - tr->src = gt[0]->tunnel_src; - tr->dst = gt[0]->tunnel_dst.fp_addr; - tr->length = vlib_buffer_length_in_chain (vm, b[0]); - } - if (PREDICT_FALSE (b[1]->flags & VLIB_BUFFER_IS_TRACED)) - { - gre_tx_trace_t *tr = vlib_add_trace (vm, node, - b[1], sizeof (*tr)); - tr->tunnel_id = gt[1] - gm->tunnels; - tr->src = gt[1]->tunnel_src; - tr->dst = gt[1]->tunnel_dst.fp_addr; - tr->length = vlib_buffer_length_in_chain (vm, b[1]); - } - - b += 2; - n_left_from -= 2; - } - - while (n_left_from >= 1) - { - - if (PREDICT_FALSE - (sw_if_index[0] != vnet_buffer (b[0])->sw_if_index[VLIB_TX])) - { - const vnet_hw_interface_t *hi; - sw_if_index[0] = vnet_buffer (b[0])->sw_if_index[VLIB_TX]; - hi = vnet_get_sup_hw_interface (gm->vnet_main, sw_if_index[0]); - gt[0] = &gm->tunnels[hi->dev_instance]; - adj_index[0] = gt[0]->l2_adj_index; - } - - vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = adj_index[0]; - - if (type == GRE_TUNNEL_TYPE_ERSPAN) - { - /* Encap GRE seq# and ERSPAN type II header */ - erspan_t2_t *h0; - u32 seq_num; - u64 hdr; - ASSERT (gt[0]->type == GRE_TUNNEL_TYPE_ERSPAN); - vlib_buffer_advance (b[0], -sizeof (erspan_t2_t)); - h0 = vlib_buffer_get_current (b[0]); - seq_num = clib_atomic_fetch_add (>[0]->gre_sn->seq_num, 1); - hdr = clib_host_to_net_u64 (ERSPAN_HDR2); - h0->seq_num = clib_host_to_net_u32 (seq_num); - h0->t2_u64 = hdr; - h0->t2.cos_en_t_session |= clib_host_to_net_u16 (gt[0]->session_id); - } - - if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED)) - { - gre_tx_trace_t *tr = vlib_add_trace (vm, node, - b[0], sizeof (*tr)); - tr->tunnel_id = gt[0] - gm->tunnels; - tr->src = gt[0]->tunnel_src; - tr->dst = gt[0]->tunnel_dst.fp_addr; - tr->length = vlib_buffer_length_in_chain (vm, b[0]); - } - - b += 1; - n_left_from -= 1; - } - - vlib_buffer_enqueue_to_single_next (vm, node, from, - GRE_ENCAP_NEXT_L2_MIDCHAIN, - frame->n_vectors); - - vlib_node_increment_counter (vm, node->node_index, - GRE_ERROR_PKTS_ENCAP, frame->n_vectors); - - return frame->n_vectors; -} - -static char *gre_error_strings[] = { -#define gre_error(n,s) s, -#include "error.def" -#undef gre_error -}; - -VLIB_NODE_FN (gre_teb_encap_node) (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) -{ - return (gre_encap_inline (vm, node, frame, GRE_TUNNEL_TYPE_TEB)); -} - -VLIB_NODE_FN (gre_erspan_encap_node) (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) -{ - return (gre_encap_inline (vm, node, frame, GRE_TUNNEL_TYPE_ERSPAN)); -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (gre_teb_encap_node) = -{ - .name = "gre-teb-encap", - .vector_size = sizeof (u32), - .format_trace = format_gre_tx_trace, - .type = VLIB_NODE_TYPE_INTERNAL, - .n_errors = GRE_N_ERROR, - .error_strings = gre_error_strings, - .n_next_nodes = GRE_ENCAP_N_NEXT, - .next_nodes = { - [GRE_ENCAP_NEXT_L2_MIDCHAIN] = "adj-l2-midchain", - }, -}; -VLIB_REGISTER_NODE (gre_erspan_encap_node) = -{ - .name = "gre-erspan-encap", - .vector_size = sizeof (u32), - .format_trace = format_gre_tx_trace, - .type = VLIB_NODE_TYPE_INTERNAL, - .n_errors = GRE_N_ERROR, - .error_strings = gre_error_strings, - .n_next_nodes = GRE_ENCAP_N_NEXT, - .next_nodes = { - [GRE_ENCAP_NEXT_L2_MIDCHAIN] = "adj-l2-midchain", - }, -}; -/* *INDENT-ON* */ - -#ifndef CLIB_MARCH_VARIANT -static u8 * -format_gre_tunnel_name (u8 * s, va_list * args) -{ - u32 dev_instance = va_arg (*args, u32); - gre_main_t *gm = &gre_main; - gre_tunnel_t *t; - - if (dev_instance >= vec_len (gm->tunnels)) - return format (s, "<improperly-referenced>"); - - t = pool_elt_at_index (gm->tunnels, dev_instance); - return format (s, "gre%d", t->user_instance); -} - -static u8 * -format_gre_device (u8 * s, va_list * args) -{ - u32 dev_instance = va_arg (*args, u32); - CLIB_UNUSED (int verbose) = va_arg (*args, int); - - s = format (s, "GRE tunnel: id %d\n", dev_instance); - return s; -} - -static int -gre_tunnel_desc (u32 sw_if_index, - ip46_address_t * src, ip46_address_t * dst, u8 * is_l2) -{ - gre_main_t *gm = &gre_main; - gre_tunnel_t *t; - u32 ti; - - ti = gm->tunnel_index_by_sw_if_index[sw_if_index]; - - if (~0 == ti) - /* not one of ours */ - return -1; - - t = pool_elt_at_index (gm->tunnels, ti); - - *src = t->tunnel_src; - *dst = t->tunnel_dst.fp_addr; - *is_l2 = t->type == GRE_TUNNEL_TYPE_TEB; - - return (0); -} - -/* *INDENT-OFF* */ -VNET_DEVICE_CLASS (gre_device_class) = { - .name = "GRE tunnel device", - .format_device_name = format_gre_tunnel_name, - .format_device = format_gre_device, - .format_tx_trace = format_gre_tx_trace, - .admin_up_down_function = gre_interface_admin_up_down, - .ip_tun_desc = gre_tunnel_desc, -#ifdef SOON - .clear counter = 0; -#endif -}; - -VNET_HW_INTERFACE_CLASS (gre_hw_interface_class) = { - .name = "GRE", - .format_header = format_gre_header_with_length, - .unformat_header = unformat_gre_header, - .build_rewrite = gre_build_rewrite, - .update_adjacency = gre_update_adj, - .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P, -}; - -VNET_HW_INTERFACE_CLASS (mgre_hw_interface_class) = { - .name = "mGRE", - .format_header = format_gre_header_with_length, - .unformat_header = unformat_gre_header, - .build_rewrite = gre_build_rewrite, - .update_adjacency = mgre_update_adj, - .flags = VNET_HW_INTERFACE_CLASS_FLAG_NBMA, -}; -/* *INDENT-ON* */ -#endif /* CLIB_MARCH_VARIANT */ - -static void -add_protocol (gre_main_t * gm, gre_protocol_t protocol, char *protocol_name) -{ - gre_protocol_info_t *pi; - u32 i; - - vec_add2 (gm->protocol_infos, pi, 1); - i = pi - gm->protocol_infos; - - pi->name = protocol_name; - pi->protocol = protocol; - pi->next_index = pi->node_index = ~0; - - hash_set (gm->protocol_info_by_protocol, protocol, i); - hash_set_mem (gm->protocol_info_by_name, pi->name, i); -} - -static clib_error_t * -gre_init (vlib_main_t * vm) -{ - gre_main_t *gm = &gre_main; - clib_error_t *error; - ip_main_t *im = &ip_main; - ip_protocol_info_t *pi; - - clib_memset (gm, 0, sizeof (gm[0])); - gm->vlib_main = vm; - gm->vnet_main = vnet_get_main (); - - if ((error = vlib_call_init_function (vm, ip_main_init))) - return error; - - if ((error = vlib_call_init_function (vm, ip4_lookup_init))) - return error; - - if ((error = vlib_call_init_function (vm, ip6_lookup_init))) - return error; - - /* Set up the ip packet generator */ - pi = ip_get_protocol_info (im, IP_PROTOCOL_GRE); - pi->format_header = format_gre_header; - pi->unformat_pg_edit = unformat_pg_gre_header; - - gm->protocol_info_by_name = hash_create_string (0, sizeof (uword)); - gm->protocol_info_by_protocol = hash_create (0, sizeof (uword)); - gm->tunnel_by_key4 = - hash_create_mem (0, sizeof (gre_tunnel_key4_t), sizeof (uword)); - gm->tunnel_by_key6 = - hash_create_mem (0, sizeof (gre_tunnel_key6_t), sizeof (uword)); - gm->seq_num_by_key = - hash_create_mem (0, sizeof (gre_sn_key_t), sizeof (uword)); - -#define _(n,s) add_protocol (gm, GRE_PROTOCOL_##s, #s); - foreach_gre_protocol -#undef _ - return vlib_call_init_function (vm, gre_input_init); -} - -VLIB_INIT_FUNCTION (gre_init); - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/gre/gre.h b/src/vnet/gre/gre.h deleted file mode 100644 index ea085bf0fa1..00000000000 --- a/src/vnet/gre/gre.h +++ /dev/null @@ -1,443 +0,0 @@ -/* - * gre.h: types/functions for gre. - * - * Copyright (c) 2012 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef included_gre_h -#define included_gre_h - -#include <vnet/vnet.h> -#include <vnet/gre/packet.h> -#include <vnet/ip/ip.h> -#include <vnet/ip/format.h> -#include <vnet/adj/adj_types.h> -#include <vnet/tunnel/tunnel.h> -#include <vnet/teib/teib.h> - -extern vnet_hw_interface_class_t gre_hw_interface_class; -extern vnet_hw_interface_class_t mgre_hw_interface_class; - -typedef enum -{ -#define gre_error(n,s) GRE_ERROR_##n, -#include <vnet/gre/error.def> -#undef gre_error - GRE_N_ERROR, -} gre_error_t; - -/** - * L3: GRE (i.e. this tunnel is in L3 mode) - * TEB: Transparent Ethernet Bridging - the tunnel is in L2 mode - * ERSPAN: type 2 - the tunnel is for port mirror SPAN output. Each tunnel is - * associated with a session ID and expected to be used for encap - * and output of mirrored packet from a L2 network only. There is - * no support for receiving ERSPAN packets from a GRE ERSPAN tunnel - */ -#define foreach_gre_tunnel_type \ - _(L3, "L3") \ - _(TEB, "TEB") \ - _(ERSPAN, "ERSPAN") \ - -/** - * @brief The GRE tunnel type - */ -typedef enum gre_tunnel_type_t_ -{ -#define _(n, s) GRE_TUNNEL_TYPE_##n, - foreach_gre_tunnel_type -#undef _ -} __clib_packed gre_tunnel_type_t; - -extern u8 *format_gre_tunnel_type (u8 * s, va_list * args); - - -/** - * A GRE payload protocol registration - */ -typedef struct -{ - /** Name (a c string). */ - char *name; - - /** GRE protocol type in host byte order. */ - gre_protocol_t protocol; - - /** GRE tunnel type */ - gre_tunnel_type_t tunnel_type; - - /** Node which handles this type. */ - u32 node_index; - - /** Next index for this type. */ - u32 next_index; -} gre_protocol_info_t; - -/** - * Elements of the GRE key that are common for v6 and v6 addresses - */ -typedef struct gre_tunnel_key_common_t_ -{ - union - { - struct - { - u32 fib_index; - u16 session_id; - gre_tunnel_type_t type; - tunnel_mode_t mode; - }; - u64 as_u64; - }; -} gre_tunnel_key_common_t; - -STATIC_ASSERT_SIZEOF (gre_tunnel_key_common_t, sizeof (u64)); - -/** - * @brief Key for a IPv4 GRE Tunnel - */ -typedef struct gre_tunnel_key4_t_ -{ - /** - * Source and destination IP addresses - */ - union - { - struct - { - ip4_address_t gtk_src; - ip4_address_t gtk_dst; - }; - u64 gtk_as_u64; - }; - - /** address independent attributes */ - gre_tunnel_key_common_t gtk_common; -} __attribute__ ((packed)) gre_tunnel_key4_t; - -STATIC_ASSERT_SIZEOF (gre_tunnel_key4_t, 2 * sizeof (u64)); - -/** - * @brief Key for a IPv6 GRE Tunnel - * We use a different type so that the V4 key hash is as small as possible - */ -typedef struct gre_tunnel_key6_t_ -{ - /** - * Source and destination IP addresses - */ - ip6_address_t gtk_src; - ip6_address_t gtk_dst; - - /** address independent attributes */ - gre_tunnel_key_common_t gtk_common; -} __attribute__ ((packed)) gre_tunnel_key6_t; - -STATIC_ASSERT_SIZEOF (gre_tunnel_key6_t, 5 * sizeof (u64)); - -/** - * Union of the two possible key types - */ -typedef union gre_tunnel_key_t_ -{ - gre_tunnel_key4_t gtk_v4; - gre_tunnel_key6_t gtk_v6; -} gre_tunnel_key_t; - -/** - * The session ID is only a 10 bit value - */ -#define GTK_SESSION_ID_MAX (0x3ff) - -/** - * Used for GRE header seq number generation for ERSPAN encap - */ -typedef struct -{ - u32 seq_num; - u32 ref_count; -} gre_sn_t; - -/** - * Hash key for GRE header seq number generation for ERSPAN encap - */ -typedef struct -{ - ip46_address_t src; - ip46_address_t dst; - u32 fib_index; -} gre_sn_key_t; - -/** - * @brief A representation of a GRE tunnel - */ -typedef struct -{ - /** - * Required for pool_get_aligned - */ - CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); - - /** - * The tunnel's source/local address - */ - ip46_address_t tunnel_src; - /** - * The tunnel's destination/remote address - */ - fib_prefix_t tunnel_dst; - /** - * The FIB in which the src.dst address are present - */ - u32 outer_fib_index; - u32 hw_if_index; - u32 sw_if_index; - gre_tunnel_type_t type; - tunnel_mode_t mode; - tunnel_encap_decap_flags_t flags; - - /** - * an L2 tunnel always rquires an L2 midchain. cache here for DP. - */ - adj_index_t l2_adj_index; - - /** - * ERSPAN type 2 session ID, least significant 10 bits of u16 - */ - u16 session_id; - - /** - * GRE header sequence number (SN) used for ERSPAN type 2 header, must be - * bumped automically to be thread safe. As multiple GRE tunnels are created - * for the same fib-idx/DIP/SIP with different ERSPAN session number, they all - * share the same SN which is kept per FIB/DIP/SIP, as specified by RFC2890. - */ - gre_sn_t *gre_sn; - - - u32 dev_instance; /* Real device instance in tunnel vector */ - u32 user_instance; /* Instance name being shown to user */ -} gre_tunnel_t; - -typedef struct -{ - u8 next_index; - u8 tunnel_type; -} next_info_t; - -/** - * @brief GRE related global data - */ -typedef struct -{ - /** - * pool of tunnel instances - */ - gre_tunnel_t *tunnels; - - /** - * GRE payload protocol registrations - */ - gre_protocol_info_t *protocol_infos; - - /** - * Hash tables mapping name/protocol to protocol info index. - */ - uword *protocol_info_by_name, *protocol_info_by_protocol; - - /** - * Hash mapping to tunnels with ipv4 src/dst addr - */ - uword *tunnel_by_key4; - - /** - * Hash mapping to tunnels with ipv6 src/dst addr - */ - uword *tunnel_by_key6; - - /** - * Hash mapping tunnel src/dst addr and fib-idx to sequence number - */ - uword *seq_num_by_key; - - /** - * Mapping from sw_if_index to tunnel index - */ - u32 *tunnel_index_by_sw_if_index; - - /* Sparse vector mapping gre protocol in network byte order - to next index. */ - next_info_t *next_by_protocol; - - /* convenience */ - vlib_main_t *vlib_main; - vnet_main_t *vnet_main; - - /* Record used instances */ - uword *instance_used; - - u16 msg_id_base; -} gre_main_t; - -/** - * @brief IPv4 and GRE header. - */ -/* *INDENT-OFF* */ -typedef CLIB_PACKED (struct { - ip4_header_t ip4; - gre_header_t gre; -}) ip4_and_gre_header_t; -/* *INDENT-ON* */ - -/** - * @brief IPv6 and GRE header. - */ -/* *INDENT-OFF* */ -typedef CLIB_PACKED (struct { - ip6_header_t ip6; - gre_header_t gre; -}) ip6_and_gre_header_t; -/* *INDENT-ON* */ - -always_inline gre_protocol_info_t * -gre_get_protocol_info (gre_main_t * em, gre_protocol_t protocol) -{ - uword *p = hash_get (em->protocol_info_by_protocol, protocol); - return p ? vec_elt_at_index (em->protocol_infos, p[0]) : 0; -} - -extern gre_main_t gre_main; - -extern clib_error_t *gre_interface_admin_up_down (vnet_main_t * vnm, - u32 hw_if_index, u32 flags); - -extern void gre_tunnel_stack (adj_index_t ai); -extern void gre_update_adj (vnet_main_t * vnm, - u32 sw_if_index, adj_index_t ai); - -typedef struct mgre_walk_ctx_t_ -{ - const gre_tunnel_t *t; - const teib_entry_t *ne; -} mgre_walk_ctx_t; - -adj_walk_rc_t mgre_mk_complete_walk (adj_index_t ai, void *data); -adj_walk_rc_t mgre_mk_incomplete_walk (adj_index_t ai, void *data); - -format_function_t format_gre_protocol; -format_function_t format_gre_header; -format_function_t format_gre_header_with_length; - -extern vlib_node_registration_t gre4_input_node; -extern vlib_node_registration_t gre6_input_node; -extern vlib_node_registration_t gre_erspan_encap_node; -extern vlib_node_registration_t gre_teb_encap_node; -extern vnet_device_class_t gre_device_class; - -/* Parse gre protocol as 0xXXXX or protocol name. - In either host or network byte order. */ -unformat_function_t unformat_gre_protocol_host_byte_order; -unformat_function_t unformat_gre_protocol_net_byte_order; - -/* Parse gre header. */ -unformat_function_t unformat_gre_header; -unformat_function_t unformat_pg_gre_header; - -void -gre_register_input_protocol (vlib_main_t * vm, gre_protocol_t protocol, - u32 node_index, gre_tunnel_type_t tunnel_type); - -/* manually added to the interface output node in gre.c */ -#define GRE_OUTPUT_NEXT_LOOKUP 1 - -typedef struct -{ - u8 is_add; - gre_tunnel_type_t type; - tunnel_mode_t mode; - u8 is_ipv6; - u32 instance; - ip46_address_t src, dst; - u32 outer_table_id; - u16 session_id; - tunnel_encap_decap_flags_t flags; -} vnet_gre_tunnel_add_del_args_t; - -extern int vnet_gre_tunnel_add_del (vnet_gre_tunnel_add_del_args_t * a, - u32 * sw_if_indexp); - -static inline void -gre_mk_key4 (ip4_address_t src, - ip4_address_t dst, - u32 fib_index, - gre_tunnel_type_t ttype, - tunnel_mode_t tmode, u16 session_id, gre_tunnel_key4_t * key) -{ - key->gtk_src = src; - key->gtk_dst = dst; - key->gtk_common.type = ttype; - key->gtk_common.mode = tmode; - key->gtk_common.fib_index = fib_index; - key->gtk_common.session_id = session_id; -} - -static inline int -gre_match_key4 (const gre_tunnel_key4_t * key1, - const gre_tunnel_key4_t * key2) -{ - return ((key1->gtk_as_u64 == key2->gtk_as_u64) && - (key1->gtk_common.as_u64 == key2->gtk_common.as_u64)); -} - -static inline void -gre_mk_key6 (const ip6_address_t * src, - const ip6_address_t * dst, - u32 fib_index, - gre_tunnel_type_t ttype, - tunnel_mode_t tmode, u16 session_id, gre_tunnel_key6_t * key) -{ - key->gtk_src = *src; - key->gtk_dst = *dst; - key->gtk_common.type = ttype; - key->gtk_common.mode = tmode; - key->gtk_common.fib_index = fib_index; - key->gtk_common.session_id = session_id; -} - -static inline int -gre_match_key6 (const gre_tunnel_key6_t * key1, - const gre_tunnel_key6_t * key2) -{ - return (ip6_address_is_equal (&key1->gtk_src, &key2->gtk_src) && - ip6_address_is_equal (&key1->gtk_dst, &key2->gtk_dst) && - (key1->gtk_common.as_u64 == key2->gtk_common.as_u64)); -} - -static inline void -gre_mk_sn_key (const gre_tunnel_t * gt, gre_sn_key_t * key) -{ - key->src = gt->tunnel_src; - key->dst = gt->tunnel_dst.fp_addr; - key->fib_index = gt->outer_fib_index; -} - -#endif /* included_gre_h */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/gre/gre_api.c b/src/vnet/gre/gre_api.c deleted file mode 100644 index 59a1d3d738d..00000000000 --- a/src/vnet/gre/gre_api.c +++ /dev/null @@ -1,220 +0,0 @@ -/* - *------------------------------------------------------------------ - * gre_api.c - gre api - * - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *------------------------------------------------------------------ - */ - -#include <vnet/vnet.h> -#include <vlibmemory/api.h> - -#include <vnet/interface.h> -#include <vnet/api_errno.h> - -#include <vnet/gre/gre.h> -#include <vnet/fib/fib_table.h> -#include <vnet/tunnel/tunnel_types_api.h> -#include <vnet/ip/ip_types_api.h> - -#include <vnet/gre/gre.api_enum.h> -#include <vnet/gre/gre.api_types.h> - -#define REPLY_MSG_ID_BASE gre_main.msg_id_base -#include <vlibapi/api_helper_macros.h> - -static int -gre_tunnel_type_decode (vl_api_gre_tunnel_type_t in, gre_tunnel_type_t * out) -{ - switch (in) - { -#define _(n, v) \ - case GRE_API_TUNNEL_TYPE_##n: \ - *out = GRE_TUNNEL_TYPE_##n; \ - return (0); - foreach_gre_tunnel_type -#undef _ - } - - return (VNET_API_ERROR_INVALID_VALUE); -} - -static vl_api_gre_tunnel_type_t -gre_tunnel_type_encode (gre_tunnel_type_t in) -{ - vl_api_gre_tunnel_type_t out = GRE_API_TUNNEL_TYPE_L3; - - switch (in) - { -#define _(n, v) \ - case GRE_TUNNEL_TYPE_##n: \ - out = GRE_API_TUNNEL_TYPE_##n; \ - break; - foreach_gre_tunnel_type -#undef _ - } - - return (out); -} - -static void vl_api_gre_tunnel_add_del_t_handler - (vl_api_gre_tunnel_add_del_t * mp) -{ - vnet_gre_tunnel_add_del_args_t _a = { }, *a = &_a; - vl_api_gre_tunnel_add_del_reply_t *rmp; - tunnel_encap_decap_flags_t flags; - u32 sw_if_index = ~0; - ip46_type_t itype[2]; - int rv = 0; - - itype[0] = ip_address_decode (&mp->tunnel.src, &a->src); - itype[1] = ip_address_decode (&mp->tunnel.dst, &a->dst); - - if (itype[0] != itype[1]) - { - rv = VNET_API_ERROR_INVALID_PROTOCOL; - goto out; - } - - if (ip46_address_is_equal (&a->src, &a->dst)) - { - rv = VNET_API_ERROR_SAME_SRC_DST; - goto out; - } - - rv = gre_tunnel_type_decode (mp->tunnel.type, &a->type); - - if (rv) - goto out; - - rv = tunnel_mode_decode (mp->tunnel.mode, &a->mode); - - if (rv) - goto out; - - rv = tunnel_encap_decap_flags_decode (mp->tunnel.flags, &flags); - - if (rv) - goto out; - - a->is_add = mp->is_add; - a->is_ipv6 = (itype[0] == IP46_TYPE_IP6); - a->instance = ntohl (mp->tunnel.instance); - a->session_id = ntohs (mp->tunnel.session_id); - a->outer_table_id = ntohl (mp->tunnel.outer_table_id); - a->flags = flags; - - rv = vnet_gre_tunnel_add_del (a, &sw_if_index); - -out: - /* *INDENT-OFF* */ - REPLY_MACRO2(VL_API_GRE_TUNNEL_ADD_DEL_REPLY, - ({ - rmp->sw_if_index = ntohl (sw_if_index); - })); - /* *INDENT-ON* */ -} - -static void send_gre_tunnel_details - (gre_tunnel_t * t, vl_api_gre_tunnel_dump_t * mp) -{ - vl_api_gre_tunnel_details_t *rmp; - - /* *INDENT-OFF* */ - REPLY_MACRO_DETAILS2(VL_API_GRE_TUNNEL_DETAILS, - ({ - ip_address_encode (&t->tunnel_src, IP46_TYPE_ANY, &rmp->tunnel.src); - ip_address_encode (&t->tunnel_dst.fp_addr, IP46_TYPE_ANY, &rmp->tunnel.dst); - - rmp->tunnel.outer_table_id = - htonl (fib_table_get_table_id - (t->outer_fib_index, t->tunnel_dst.fp_proto)); - - rmp->tunnel.type = gre_tunnel_type_encode (t->type); - rmp->tunnel.mode = tunnel_mode_encode (t->mode); - rmp->tunnel.flags = tunnel_encap_decap_flags_encode (t->flags); - rmp->tunnel.instance = htonl (t->user_instance); - rmp->tunnel.sw_if_index = htonl (t->sw_if_index); - rmp->tunnel.session_id = htons (t->session_id); - })); - /* *INDENT-ON* */ -} - -static void -vl_api_gre_tunnel_dump_t_handler (vl_api_gre_tunnel_dump_t * mp) -{ - vl_api_registration_t *reg; - gre_main_t *gm = &gre_main; - gre_tunnel_t *t; - u32 sw_if_index; - - reg = vl_api_client_index_to_registration (mp->client_index); - if (!reg) - return; - - sw_if_index = ntohl (mp->sw_if_index); - - if (~0 == sw_if_index) - { - /* *INDENT-OFF* */ - pool_foreach (t, gm->tunnels) - { - send_gre_tunnel_details(t, mp); - } - /* *INDENT-ON* */ - } - - else - { - if ((sw_if_index >= vec_len (gm->tunnel_index_by_sw_if_index)) || - (~0 == gm->tunnel_index_by_sw_if_index[sw_if_index])) - { - return; - } - t = &gm->tunnels[gm->tunnel_index_by_sw_if_index[sw_if_index]]; - send_gre_tunnel_details (t, mp); - } -} - -/* - * gre_api_hookup - * Add vpe's API message handlers to the table. - * vlib has already mapped shared memory and - * added the client registration handlers. - * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process() - */ -/* API definitions */ -#include <vnet/format_fns.h> -#include <vnet/gre/gre.api.c> - -static clib_error_t * -gre_api_hookup (vlib_main_t * vm) -{ - /* - * Set up the (msg_name, crc, message-id) table - */ - gre_main.msg_id_base = setup_message_id_table (); - - return 0; -} - -VLIB_API_INIT_FUNCTION (gre_api_hookup); - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/gre/interface.c b/src/vnet/gre/interface.c deleted file mode 100644 index bb0be865664..00000000000 --- a/src/vnet/gre/interface.c +++ /dev/null @@ -1,845 +0,0 @@ -/* - * gre_interface.c: gre interfaces - * - * Copyright (c) 2012 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include <vnet/vnet.h> -#include <vnet/gre/gre.h> -#include <vnet/ip/format.h> -#include <vnet/fib/fib_table.h> -#include <vnet/adj/adj_midchain.h> -#include <vnet/adj/adj_nbr.h> -#include <vnet/mpls/mpls.h> -#include <vnet/l2/l2_input.h> -#include <vnet/teib/teib.h> - -u8 * -format_gre_tunnel_type (u8 * s, va_list * args) -{ - gre_tunnel_type_t type = va_arg (*args, int); - - switch (type) - { -#define _(n, v) case GRE_TUNNEL_TYPE_##n: \ - s = format (s, "%s", v); \ - break; - foreach_gre_tunnel_type -#undef _ - } - - return (s); -} - -static u8 * -format_gre_tunnel (u8 * s, va_list * args) -{ - gre_tunnel_t *t = va_arg (*args, gre_tunnel_t *); - - s = format (s, "[%d] instance %d src %U dst %U fib-idx %d sw-if-idx %d ", - t->dev_instance, t->user_instance, - format_ip46_address, &t->tunnel_src, IP46_TYPE_ANY, - format_ip46_address, &t->tunnel_dst.fp_addr, IP46_TYPE_ANY, - t->outer_fib_index, t->sw_if_index); - - s = format (s, "payload %U ", format_gre_tunnel_type, t->type); - s = format (s, "%U ", format_tunnel_mode, t->mode); - - if (t->type == GRE_TUNNEL_TYPE_ERSPAN) - s = format (s, "session %d ", t->session_id); - - if (t->type != GRE_TUNNEL_TYPE_L3) - s = format (s, "l2-adj-idx %d ", t->l2_adj_index); - - return s; -} - -static gre_tunnel_t * -gre_tunnel_db_find (const vnet_gre_tunnel_add_del_args_t * a, - u32 outer_fib_index, gre_tunnel_key_t * key) -{ - gre_main_t *gm = &gre_main; - uword *p; - - if (!a->is_ipv6) - { - gre_mk_key4 (a->src.ip4, a->dst.ip4, outer_fib_index, - a->type, a->mode, a->session_id, &key->gtk_v4); - p = hash_get_mem (gm->tunnel_by_key4, &key->gtk_v4); - } - else - { - gre_mk_key6 (&a->src.ip6, &a->dst.ip6, outer_fib_index, - a->type, a->mode, a->session_id, &key->gtk_v6); - p = hash_get_mem (gm->tunnel_by_key6, &key->gtk_v6); - } - - if (NULL == p) - return (NULL); - - return (pool_elt_at_index (gm->tunnels, p[0])); -} - -static void -gre_tunnel_db_add (gre_tunnel_t * t, gre_tunnel_key_t * key) -{ - gre_main_t *gm = &gre_main; - - if (t->tunnel_dst.fp_proto == FIB_PROTOCOL_IP6) - { - hash_set_mem_alloc (&gm->tunnel_by_key6, &key->gtk_v6, t->dev_instance); - } - else - { - hash_set_mem_alloc (&gm->tunnel_by_key4, &key->gtk_v4, t->dev_instance); - } -} - -static void -gre_tunnel_db_remove (gre_tunnel_t * t, gre_tunnel_key_t * key) -{ - gre_main_t *gm = &gre_main; - - if (t->tunnel_dst.fp_proto == FIB_PROTOCOL_IP6) - { - hash_unset_mem_free (&gm->tunnel_by_key6, &key->gtk_v6); - } - else - { - hash_unset_mem_free (&gm->tunnel_by_key4, &key->gtk_v4); - } -} - -/** - * gre_tunnel_stack - * - * 'stack' (resolve the recursion for) the tunnel's midchain adjacency - */ -void -gre_tunnel_stack (adj_index_t ai) -{ - gre_main_t *gm = &gre_main; - ip_adjacency_t *adj; - gre_tunnel_t *gt; - u32 sw_if_index; - - adj = adj_get (ai); - sw_if_index = adj->rewrite_header.sw_if_index; - - if ((vec_len (gm->tunnel_index_by_sw_if_index) <= sw_if_index) || - (~0 == gm->tunnel_index_by_sw_if_index[sw_if_index])) - return; - - gt = pool_elt_at_index (gm->tunnels, - gm->tunnel_index_by_sw_if_index[sw_if_index]); - - if ((vnet_hw_interface_get_flags (vnet_get_main (), gt->hw_if_index) & - VNET_HW_INTERFACE_FLAG_LINK_UP) == 0) - { - adj_midchain_delegate_unstack (ai); - } - else - { - adj_midchain_delegate_stack (ai, gt->outer_fib_index, >->tunnel_dst); - } -} - -/** - * mgre_tunnel_stack - * - * 'stack' (resolve the recursion for) the tunnel's midchain adjacency - */ -static void -mgre_tunnel_stack (adj_index_t ai) -{ - gre_main_t *gm = &gre_main; - const ip_adjacency_t *adj; - const gre_tunnel_t *gt; - u32 sw_if_index; - - adj = adj_get (ai); - sw_if_index = adj->rewrite_header.sw_if_index; - - if ((vec_len (gm->tunnel_index_by_sw_if_index) <= sw_if_index) || - (~0 == gm->tunnel_index_by_sw_if_index[sw_if_index])) - return; - - gt = pool_elt_at_index (gm->tunnels, - gm->tunnel_index_by_sw_if_index[sw_if_index]); - - if ((vnet_hw_interface_get_flags (vnet_get_main (), gt->hw_if_index) & - VNET_HW_INTERFACE_FLAG_LINK_UP) == 0) - { - adj_midchain_delegate_unstack (ai); - } - else - { - const teib_entry_t *ne; - - ne = teib_entry_find_46 (sw_if_index, adj->ia_nh_proto, - &adj->sub_type.nbr.next_hop); - if (NULL != ne) - teib_entry_adj_stack (ne, ai); - } -} - -/** - * @brief Call back when restacking all adjacencies on a GRE interface - */ -static adj_walk_rc_t -gre_adj_walk_cb (adj_index_t ai, void *ctx) -{ - gre_tunnel_stack (ai); - - return (ADJ_WALK_RC_CONTINUE); -} -static adj_walk_rc_t -mgre_adj_walk_cb (adj_index_t ai, void *ctx) -{ - mgre_tunnel_stack (ai); - - return (ADJ_WALK_RC_CONTINUE); -} - -static void -gre_tunnel_restack (gre_tunnel_t * gt) -{ - fib_protocol_t proto; - - /* - * walk all the adjacencies on th GRE interface and restack them - */ - FOR_EACH_FIB_IP_PROTOCOL (proto) - { - switch (gt->mode) - { - case TUNNEL_MODE_P2P: - adj_nbr_walk (gt->sw_if_index, proto, gre_adj_walk_cb, NULL); - break; - case TUNNEL_MODE_MP: - adj_nbr_walk (gt->sw_if_index, proto, mgre_adj_walk_cb, NULL); - break; - } - } -} - -static void -gre_teib_mk_key (const gre_tunnel_t * t, - const teib_entry_t * ne, gre_tunnel_key_t * key) -{ - const fib_prefix_t *nh; - - nh = teib_entry_get_nh (ne); - - /* construct the key using mode P2P so it can be found in the DP */ - if (FIB_PROTOCOL_IP4 == nh->fp_proto) - gre_mk_key4 (t->tunnel_src.ip4, - nh->fp_addr.ip4, - teib_entry_get_fib_index (ne), - t->type, TUNNEL_MODE_P2P, 0, &key->gtk_v4); - else - gre_mk_key6 (&t->tunnel_src.ip6, - &nh->fp_addr.ip6, - teib_entry_get_fib_index (ne), - t->type, TUNNEL_MODE_P2P, 0, &key->gtk_v6); -} - -/** - * An TEIB entry has been added - */ -static void -gre_teib_entry_added (const teib_entry_t * ne) -{ - gre_main_t *gm = &gre_main; - const ip_address_t *nh; - gre_tunnel_key_t key; - gre_tunnel_t *t; - u32 sw_if_index; - u32 t_idx; - - sw_if_index = teib_entry_get_sw_if_index (ne); - if (vec_len (gm->tunnel_index_by_sw_if_index) < sw_if_index) - return; - - t_idx = gm->tunnel_index_by_sw_if_index[sw_if_index]; - - if (INDEX_INVALID == t_idx) - return; - - /* entry has been added on an interface for which there is a GRE tunnel */ - t = pool_elt_at_index (gm->tunnels, t_idx); - - if (t->mode != TUNNEL_MODE_MP) - return; - - /* the next-hop (underlay) of the NHRP entry will form part of the key for - * ingress lookup to match packets to this interface */ - gre_teib_mk_key (t, ne, &key); - gre_tunnel_db_add (t, &key); - - /* update the rewrites for each of the adjacencies for this peer (overlay) - * using the next-hop (underlay) */ - mgre_walk_ctx_t ctx = { - .t = t, - .ne = ne - }; - nh = teib_entry_get_peer (ne); - adj_nbr_walk_nh (teib_entry_get_sw_if_index (ne), - (AF_IP4 == ip_addr_version (nh) ? - FIB_PROTOCOL_IP4 : - FIB_PROTOCOL_IP6), - &ip_addr_46 (nh), mgre_mk_complete_walk, &ctx); -} - -static void -gre_teib_entry_deleted (const teib_entry_t * ne) -{ - gre_main_t *gm = &gre_main; - const ip_address_t *nh; - gre_tunnel_key_t key; - gre_tunnel_t *t; - u32 sw_if_index; - u32 t_idx; - - sw_if_index = teib_entry_get_sw_if_index (ne); - if (vec_len (gm->tunnel_index_by_sw_if_index) < sw_if_index) - return; - - t_idx = gm->tunnel_index_by_sw_if_index[sw_if_index]; - - if (INDEX_INVALID == t_idx) - return; - - t = pool_elt_at_index (gm->tunnels, t_idx); - - /* remove the next-hop as an ingress lookup key */ - gre_teib_mk_key (t, ne, &key); - gre_tunnel_db_remove (t, &key); - - nh = teib_entry_get_peer (ne); - - /* make all the adjacencies incomplete */ - adj_nbr_walk_nh (teib_entry_get_sw_if_index (ne), - (AF_IP4 == ip_addr_version (nh) ? - FIB_PROTOCOL_IP4 : - FIB_PROTOCOL_IP6), - &ip_addr_46 (nh), mgre_mk_incomplete_walk, t); -} - -static walk_rc_t -gre_tunnel_delete_teib_walk (index_t nei, void *ctx) -{ - gre_tunnel_t *t = ctx; - gre_tunnel_key_t key; - - gre_teib_mk_key (t, teib_entry_get (nei), &key); - gre_tunnel_db_remove (t, &key); - - return (WALK_CONTINUE); -} - -static walk_rc_t -gre_tunnel_add_teib_walk (index_t nei, void *ctx) -{ - gre_tunnel_t *t = ctx; - gre_tunnel_key_t key = {}; - - gre_teib_mk_key (t, teib_entry_get (nei), &key); - gre_tunnel_db_add (t, &key); - - return (WALK_CONTINUE); -} - -static int -vnet_gre_tunnel_add (vnet_gre_tunnel_add_del_args_t * a, - u32 outer_fib_index, u32 * sw_if_indexp) -{ - gre_main_t *gm = &gre_main; - vnet_main_t *vnm = gm->vnet_main; - gre_tunnel_t *t; - vnet_hw_interface_t *hi; - u32 hw_if_index, sw_if_index; - u8 is_ipv6 = a->is_ipv6; - gre_tunnel_key_t key; - - t = gre_tunnel_db_find (a, outer_fib_index, &key); - if (NULL != t) - return VNET_API_ERROR_IF_ALREADY_EXISTS; - - pool_get_aligned (gm->tunnels, t, CLIB_CACHE_LINE_BYTES); - clib_memset (t, 0, sizeof (*t)); - - /* Reconcile the real dev_instance and a possible requested instance */ - u32 t_idx = t - gm->tunnels; /* tunnel index (or instance) */ - u32 u_idx = a->instance; /* user specified instance */ - if (u_idx == ~0) - u_idx = t_idx; - if (hash_get (gm->instance_used, u_idx)) - { - pool_put (gm->tunnels, t); - return VNET_API_ERROR_INSTANCE_IN_USE; - } - hash_set (gm->instance_used, u_idx, 1); - - t->dev_instance = t_idx; /* actual */ - t->user_instance = u_idx; /* name */ - - t->type = a->type; - t->mode = a->mode; - t->flags = a->flags; - if (t->type == GRE_TUNNEL_TYPE_ERSPAN) - t->session_id = a->session_id; - - if (t->type == GRE_TUNNEL_TYPE_L3) - { - if (t->mode == TUNNEL_MODE_P2P) - hw_if_index = - vnet_register_interface (vnm, gre_device_class.index, t_idx, - gre_hw_interface_class.index, t_idx); - else - hw_if_index = - vnet_register_interface (vnm, gre_device_class.index, t_idx, - mgre_hw_interface_class.index, t_idx); - } - else - { - vnet_eth_interface_registration_t eir = {}; - - /* Default MAC address (d00b:eed0:0000 + sw_if_index) */ - u8 address[6] = - { 0xd0, 0x0b, 0xee, 0xd0, (u8) (t_idx >> 8), (u8) t_idx }; - - eir.dev_class_index = gre_device_class.index; - eir.dev_instance = t_idx; - eir.address = address; - hw_if_index = vnet_eth_register_interface (vnm, &eir); - } - - /* Set GRE tunnel interface output node (not used for L3 payload) */ - if (GRE_TUNNEL_TYPE_ERSPAN == t->type) - vnet_set_interface_output_node (vnm, hw_if_index, - gre_erspan_encap_node.index); - else - vnet_set_interface_output_node (vnm, hw_if_index, - gre_teb_encap_node.index); - - hi = vnet_get_hw_interface (vnm, hw_if_index); - sw_if_index = hi->sw_if_index; - - t->hw_if_index = hw_if_index; - t->outer_fib_index = outer_fib_index; - t->sw_if_index = sw_if_index; - t->l2_adj_index = ADJ_INDEX_INVALID; - - vec_validate_init_empty (gm->tunnel_index_by_sw_if_index, sw_if_index, ~0); - gm->tunnel_index_by_sw_if_index[sw_if_index] = t_idx; - - if (!is_ipv6) - { - hi->frame_overhead = sizeof (gre_header_t) + sizeof (ip4_header_t); - hi->min_frame_size = hi->frame_overhead + 64; - } - else - { - hi->frame_overhead = sizeof (gre_header_t) + sizeof (ip6_header_t); - hi->min_frame_size = hi->frame_overhead + 64; - } - - /* Standard default gre MTU. */ - vnet_sw_interface_set_mtu (vnm, sw_if_index, 9000); - - /* - * source the FIB entry for the tunnel's destination - * and become a child thereof. The tunnel will then get poked - * when the forwarding for the entry updates, and the tunnel can - * re-stack accordingly - */ - - clib_memcpy (&t->tunnel_src, &a->src, sizeof (t->tunnel_src)); - t->tunnel_dst.fp_len = !is_ipv6 ? 32 : 128; - t->tunnel_dst.fp_proto = !is_ipv6 ? FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6; - t->tunnel_dst.fp_addr = a->dst; - - gre_tunnel_db_add (t, &key); - - if (t->mode == TUNNEL_MODE_MP) - teib_walk_itf (t->sw_if_index, gre_tunnel_add_teib_walk, t); - - if (t->type == GRE_TUNNEL_TYPE_ERSPAN) - { - gre_sn_key_t skey; - gre_sn_t *gre_sn; - - gre_mk_sn_key (t, &skey); - gre_sn = (gre_sn_t *) hash_get_mem (gm->seq_num_by_key, &skey); - if (gre_sn != NULL) - { - gre_sn->ref_count++; - t->gre_sn = gre_sn; - } - else - { - gre_sn = clib_mem_alloc (sizeof (gre_sn_t)); - gre_sn->seq_num = 0; - gre_sn->ref_count = 1; - t->gre_sn = gre_sn; - hash_set_mem_alloc (&gm->seq_num_by_key, &skey, (uword) gre_sn); - } - } - - if (t->type != GRE_TUNNEL_TYPE_L3) - { - t->l2_adj_index = adj_nbr_add_or_lock - (t->tunnel_dst.fp_proto, VNET_LINK_ETHERNET, &zero_addr, sw_if_index); - vnet_set_interface_l3_output_node (gm->vlib_main, sw_if_index, - (u8 *) "tunnel-output-no-count"); - gre_update_adj (vnm, t->sw_if_index, t->l2_adj_index); - } - else - { - vnet_set_interface_l3_output_node (gm->vlib_main, sw_if_index, - (u8 *) "tunnel-output"); - } - if (sw_if_indexp) - *sw_if_indexp = sw_if_index; - - /* register gre46-input nodes */ - ip4_register_protocol (IP_PROTOCOL_GRE, gre4_input_node.index); - ip6_register_protocol (IP_PROTOCOL_GRE, gre6_input_node.index); - - return 0; -} - -static int -vnet_gre_tunnel_delete (vnet_gre_tunnel_add_del_args_t * a, - u32 outer_fib_index, u32 * sw_if_indexp) -{ - gre_main_t *gm = &gre_main; - vnet_main_t *vnm = gm->vnet_main; - gre_tunnel_t *t; - gre_tunnel_key_t key; - u32 sw_if_index; - - t = gre_tunnel_db_find (a, outer_fib_index, &key); - if (NULL == t) - return VNET_API_ERROR_NO_SUCH_ENTRY; - - if (t->mode == TUNNEL_MODE_MP) - teib_walk_itf (t->sw_if_index, gre_tunnel_delete_teib_walk, t); - - sw_if_index = t->sw_if_index; - vnet_sw_interface_set_flags (vnm, sw_if_index, 0 /* down */ ); - - /* make sure tunnel is removed from l2 bd or xconnect */ - set_int_l2_mode (gm->vlib_main, vnm, MODE_L3, sw_if_index, 0, - L2_BD_PORT_TYPE_NORMAL, 0, 0); - gm->tunnel_index_by_sw_if_index[sw_if_index] = ~0; - - if (t->type == GRE_TUNNEL_TYPE_L3) - vnet_delete_hw_interface (vnm, t->hw_if_index); - else - ethernet_delete_interface (vnm, t->hw_if_index); - - if (t->l2_adj_index != ADJ_INDEX_INVALID) - { - adj_midchain_delegate_unstack (t->l2_adj_index); - adj_unlock (t->l2_adj_index); - } - - ASSERT ((t->type != GRE_TUNNEL_TYPE_ERSPAN) || (t->gre_sn != NULL)); - if ((t->type == GRE_TUNNEL_TYPE_ERSPAN) && (t->gre_sn->ref_count-- == 1)) - { - gre_sn_key_t skey; - gre_mk_sn_key (t, &skey); - hash_unset_mem_free (&gm->seq_num_by_key, &skey); - clib_mem_free (t->gre_sn); - } - - vnet_reset_interface_l3_output_node (gm->vlib_main, sw_if_index); - hash_unset (gm->instance_used, t->user_instance); - gre_tunnel_db_remove (t, &key); - pool_put (gm->tunnels, t); - - if (sw_if_indexp) - *sw_if_indexp = sw_if_index; - - return 0; -} - -int -vnet_gre_tunnel_add_del (vnet_gre_tunnel_add_del_args_t * a, - u32 * sw_if_indexp) -{ - u32 outer_fib_index; - - outer_fib_index = fib_table_find ((a->is_ipv6 ? - FIB_PROTOCOL_IP6 : - FIB_PROTOCOL_IP4), a->outer_table_id); - - if (~0 == outer_fib_index) - return VNET_API_ERROR_NO_SUCH_FIB; - - if (a->session_id > GTK_SESSION_ID_MAX) - return VNET_API_ERROR_INVALID_SESSION_ID; - - if (a->mode == TUNNEL_MODE_MP && !ip46_address_is_zero (&a->dst)) - return (VNET_API_ERROR_INVALID_DST_ADDRESS); - - if (a->is_add) - return (vnet_gre_tunnel_add (a, outer_fib_index, sw_if_indexp)); - else - return (vnet_gre_tunnel_delete (a, outer_fib_index, sw_if_indexp)); -} - -clib_error_t * -gre_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) -{ - gre_main_t *gm = &gre_main; - vnet_hw_interface_t *hi; - gre_tunnel_t *t; - u32 ti; - - hi = vnet_get_hw_interface (vnm, hw_if_index); - - if (NULL == gm->tunnel_index_by_sw_if_index || - hi->sw_if_index >= vec_len (gm->tunnel_index_by_sw_if_index)) - return (NULL); - - ti = gm->tunnel_index_by_sw_if_index[hi->sw_if_index]; - - if (~0 == ti) - /* not one of ours */ - return (NULL); - - t = pool_elt_at_index (gm->tunnels, ti); - - if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) - vnet_hw_interface_set_flags (vnm, hw_if_index, - VNET_HW_INTERFACE_FLAG_LINK_UP); - else - vnet_hw_interface_set_flags (vnm, hw_if_index, 0 /* down */ ); - - gre_tunnel_restack (t); - - return /* no error */ 0; -} - -static clib_error_t * -create_gre_tunnel_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - vnet_gre_tunnel_add_del_args_t _a, *a = &_a; - ip46_address_t src = ip46_address_initializer, dst = - ip46_address_initializer; - u32 instance = ~0; - u32 outer_table_id = 0; - gre_tunnel_type_t t_type = GRE_TUNNEL_TYPE_L3; - tunnel_mode_t t_mode = TUNNEL_MODE_P2P; - tunnel_encap_decap_flags_t flags = TUNNEL_ENCAP_DECAP_FLAG_NONE; - u32 session_id = 0; - int rv; - u8 is_add = 1; - u32 sw_if_index; - clib_error_t *error = NULL; - - /* Get a line of input. */ - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (line_input, "del")) - is_add = 0; - else if (unformat (line_input, "instance %d", &instance)) - ; - else if (unformat (line_input, "src %U", unformat_ip46_address, &src)) - ; - else if (unformat (line_input, "dst %U", unformat_ip46_address, &dst)) - ; - else if (unformat (line_input, "outer-table-id %d", &outer_table_id)) - ; - else if (unformat (line_input, "multipoint")) - t_mode = TUNNEL_MODE_MP; - else if (unformat (line_input, "teb")) - t_type = GRE_TUNNEL_TYPE_TEB; - else if (unformat (line_input, "erspan %d", &session_id)) - t_type = GRE_TUNNEL_TYPE_ERSPAN; - else - if (unformat - (line_input, "flags %U", unformat_tunnel_encap_decap_flags, - &flags)) - ; - else - { - error = clib_error_return (0, "unknown input `%U'", - format_unformat_error, line_input); - goto done; - } - } - - if (ip46_address_is_equal (&src, &dst)) - { - error = clib_error_return (0, "src and dst are identical"); - goto done; - } - - if (t_mode != TUNNEL_MODE_MP && ip46_address_is_zero (&dst)) - { - error = clib_error_return (0, "destination address not specified"); - goto done; - } - - if (ip46_address_is_zero (&src)) - { - error = clib_error_return (0, "source address not specified"); - goto done; - } - - if (ip46_address_is_ip4 (&src) != ip46_address_is_ip4 (&dst)) - { - error = - clib_error_return (0, "src and dst address must be the same AF"); - goto done; - } - - clib_memset (a, 0, sizeof (*a)); - a->is_add = is_add; - a->outer_table_id = outer_table_id; - a->type = t_type; - a->mode = t_mode; - a->session_id = session_id; - a->is_ipv6 = !ip46_address_is_ip4 (&src); - a->instance = instance; - a->flags = flags; - clib_memcpy (&a->src, &src, sizeof (a->src)); - clib_memcpy (&a->dst, &dst, sizeof (a->dst)); - - rv = vnet_gre_tunnel_add_del (a, &sw_if_index); - - switch (rv) - { - case 0: - vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name, - vnet_get_main (), sw_if_index); - break; - case VNET_API_ERROR_IF_ALREADY_EXISTS: - error = clib_error_return (0, "GRE tunnel already exists..."); - goto done; - case VNET_API_ERROR_NO_SUCH_FIB: - error = clib_error_return (0, "outer table ID %d doesn't exist\n", - outer_table_id); - goto done; - case VNET_API_ERROR_NO_SUCH_ENTRY: - error = clib_error_return (0, "GRE tunnel doesn't exist"); - goto done; - case VNET_API_ERROR_INVALID_SESSION_ID: - error = clib_error_return (0, "session ID %d out of range\n", - session_id); - goto done; - case VNET_API_ERROR_INSTANCE_IN_USE: - error = clib_error_return (0, "Instance is in use"); - goto done; - default: - error = - clib_error_return (0, "vnet_gre_tunnel_add_del returned %d", rv); - goto done; - } - -done: - unformat_free (line_input); - - return error; -} - -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (create_gre_tunnel_command, static) = { - .path = "create gre tunnel", - .short_help = "create gre tunnel src <addr> dst <addr> [instance <n>] " - "[outer-fib-id <fib>] [teb | erspan <session-id>] [del] " - "[multipoint]", - .function = create_gre_tunnel_command_fn, -}; -/* *INDENT-ON* */ - -static clib_error_t * -show_gre_tunnel_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - gre_main_t *gm = &gre_main; - gre_tunnel_t *t; - u32 ti = ~0; - - if (pool_elts (gm->tunnels) == 0) - vlib_cli_output (vm, "No GRE tunnels configured..."); - - while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (input, "%d", &ti)) - ; - else - break; - } - - if (~0 == ti) - { - /* *INDENT-OFF* */ - pool_foreach (t, gm->tunnels) - { - vlib_cli_output (vm, "%U", format_gre_tunnel, t); - } - /* *INDENT-ON* */ - } - else - { - t = pool_elt_at_index (gm->tunnels, ti); - - vlib_cli_output (vm, "%U", format_gre_tunnel, t); - } - - return 0; -} - -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (show_gre_tunnel_command, static) = { - .path = "show gre tunnel", - .function = show_gre_tunnel_command_fn, -}; -/* *INDENT-ON* */ - -const static teib_vft_t gre_teib_vft = { - .nv_added = gre_teib_entry_added, - .nv_deleted = gre_teib_entry_deleted, -}; - -/* force inclusion from application's main.c */ -clib_error_t * -gre_interface_init (vlib_main_t * vm) -{ - teib_register (&gre_teib_vft); - - return (NULL); -} - -VLIB_INIT_FUNCTION (gre_interface_init); - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/gre/node.c b/src/vnet/gre/node.c deleted file mode 100644 index fdd3118bf3c..00000000000 --- a/src/vnet/gre/node.c +++ /dev/null @@ -1,598 +0,0 @@ -/* - * node.c: gre packet processing - * - * Copyright (c) 2012 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include <vlib/vlib.h> -#include <vnet/pg/pg.h> -#include <vnet/gre/gre.h> -#include <vnet/mpls/mpls.h> -#include <vppinfra/sparse_vec.h> - -#define foreach_gre_input_next \ -_(PUNT, "error-punt") \ -_(DROP, "error-drop") \ -_(ETHERNET_INPUT, "ethernet-input") \ -_(IP4_INPUT, "ip4-input") \ -_(IP6_INPUT, "ip6-input") \ -_(MPLS_INPUT, "mpls-input") - -typedef enum -{ -#define _(s,n) GRE_INPUT_NEXT_##s, - foreach_gre_input_next -#undef _ - GRE_INPUT_N_NEXT, -} gre_input_next_t; - -typedef struct -{ - u32 tunnel_id; - u32 length; - ip46_address_t src; - ip46_address_t dst; -} gre_rx_trace_t; - -extern u8 *format_gre_rx_trace (u8 * s, va_list * args); - -#ifndef CLIB_MARCH_VARIANT -u8 * -format_gre_rx_trace (u8 * s, va_list * args) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); - gre_rx_trace_t *t = va_arg (*args, gre_rx_trace_t *); - - s = format (s, "GRE: tunnel %d len %d src %U dst %U", - t->tunnel_id, clib_net_to_host_u16 (t->length), - format_ip46_address, &t->src, IP46_TYPE_ANY, - format_ip46_address, &t->dst, IP46_TYPE_ANY); - return s; -} -#endif /* CLIB_MARCH_VARIANT */ - -typedef struct -{ - /* Sparse vector mapping gre protocol in network byte order - to next index. */ - u16 *next_by_protocol; -} gre_input_runtime_t; - -always_inline void -gre_trace (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_buffer_t * b, - u32 tun_sw_if_index, const ip6_header_t * ip6, - const ip4_header_t * ip4, int is_ipv6) -{ - gre_rx_trace_t *tr = vlib_add_trace (vm, node, - b, sizeof (*tr)); - tr->tunnel_id = tun_sw_if_index; - if (is_ipv6) - { - tr->length = ip6->payload_length; - tr->src.ip6.as_u64[0] = ip6->src_address.as_u64[0]; - tr->src.ip6.as_u64[1] = ip6->src_address.as_u64[1]; - tr->dst.ip6.as_u64[0] = ip6->dst_address.as_u64[0]; - tr->dst.ip6.as_u64[1] = ip6->dst_address.as_u64[1]; - } - else - { - tr->length = ip4->length; - tr->src.as_u64[0] = tr->src.as_u64[1] = 0; - tr->dst.as_u64[0] = tr->dst.as_u64[1] = 0; - tr->src.ip4.as_u32 = ip4->src_address.as_u32; - tr->dst.ip4.as_u32 = ip4->dst_address.as_u32; - } -} - -always_inline void -gre_tunnel_get (const gre_main_t * gm, vlib_node_runtime_t * node, - vlib_buffer_t * b, u16 * next, const gre_tunnel_key_t * key, - gre_tunnel_key_t * cached_key, u32 * tun_sw_if_index, - u32 * cached_tun_sw_if_index, int is_ipv6) -{ - const uword *p; - p = is_ipv6 ? hash_get_mem (gm->tunnel_by_key6, &key->gtk_v6) - : hash_get_mem (gm->tunnel_by_key4, &key->gtk_v4); - if (PREDICT_FALSE (!p)) - { - *next = GRE_INPUT_NEXT_DROP; - b->error = node->errors[GRE_ERROR_NO_SUCH_TUNNEL]; - *tun_sw_if_index = ~0; - } - else - { - const gre_tunnel_t *tun; - tun = pool_elt_at_index (gm->tunnels, *p); - *cached_tun_sw_if_index = *tun_sw_if_index = tun->sw_if_index; - if (is_ipv6) - cached_key->gtk_v6 = key->gtk_v6; - else - cached_key->gtk_v4 = key->gtk_v4; - } -} - -always_inline uword -gre_input (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame, - const int is_ipv6) -{ - gre_main_t *gm = &gre_main; - u32 *from, n_left_from; - vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs; - u16 nexts[VLIB_FRAME_SIZE], *next = nexts; - u16 cached_protocol = ~0; - u32 cached_next_index = SPARSE_VEC_INVALID_INDEX; - u32 cached_tun_sw_if_index = ~0; - gre_tunnel_key_t cached_key; - - from = vlib_frame_vector_args (frame); - n_left_from = frame->n_vectors; - vlib_get_buffers (vm, from, bufs, n_left_from); - - if (is_ipv6) - clib_memset (&cached_key.gtk_v6, 0xff, sizeof (cached_key.gtk_v6)); - else - clib_memset (&cached_key.gtk_v4, 0xff, sizeof (cached_key.gtk_v4)); - - while (n_left_from >= 2) - { - const ip6_header_t *ip6[2]; - const ip4_header_t *ip4[2]; - const gre_header_t *gre[2]; - u32 nidx[2]; - next_info_t ni[2]; - u8 type[2]; - u16 version[2]; - u32 len[2]; - gre_tunnel_key_t key[2]; - u8 matched[2]; - u32 tun_sw_if_index[2]; - - if (PREDICT_TRUE (n_left_from >= 6)) - { - vlib_prefetch_buffer_data (b[2], LOAD); - vlib_prefetch_buffer_data (b[3], LOAD); - vlib_prefetch_buffer_header (b[4], STORE); - vlib_prefetch_buffer_header (b[5], STORE); - } - - if (is_ipv6) - { - /* ip6_local hands us the ip header, not the gre header */ - ip6[0] = vlib_buffer_get_current (b[0]); - ip6[1] = vlib_buffer_get_current (b[1]); - gre[0] = (void *) (ip6[0] + 1); - gre[1] = (void *) (ip6[1] + 1); - vlib_buffer_advance (b[0], sizeof (*ip6[0]) + sizeof (*gre[0])); - vlib_buffer_advance (b[1], sizeof (*ip6[0]) + sizeof (*gre[0])); - } - else - { - /* ip4_local hands us the ip header, not the gre header */ - ip4[0] = vlib_buffer_get_current (b[0]); - ip4[1] = vlib_buffer_get_current (b[1]); - gre[0] = (void *) (ip4[0] + 1); - gre[1] = (void *) (ip4[1] + 1); - vlib_buffer_advance (b[0], sizeof (*ip4[0]) + sizeof (*gre[0])); - vlib_buffer_advance (b[1], sizeof (*ip4[0]) + sizeof (*gre[0])); - } - - if (PREDICT_TRUE (cached_protocol == gre[0]->protocol)) - { - nidx[0] = cached_next_index; - } - else - { - cached_next_index = nidx[0] = - sparse_vec_index (gm->next_by_protocol, gre[0]->protocol); - cached_protocol = gre[0]->protocol; - } - if (PREDICT_TRUE (cached_protocol == gre[1]->protocol)) - { - nidx[1] = cached_next_index; - } - else - { - cached_next_index = nidx[1] = - sparse_vec_index (gm->next_by_protocol, gre[1]->protocol); - cached_protocol = gre[1]->protocol; - } - - ni[0] = vec_elt (gm->next_by_protocol, nidx[0]); - ni[1] = vec_elt (gm->next_by_protocol, nidx[1]); - next[0] = ni[0].next_index; - next[1] = ni[1].next_index; - type[0] = ni[0].tunnel_type; - type[1] = ni[1].tunnel_type; - - b[0]->error = nidx[0] == SPARSE_VEC_INVALID_INDEX - ? node->errors[GRE_ERROR_UNKNOWN_PROTOCOL] - : node->errors[GRE_ERROR_NONE]; - b[1]->error = nidx[1] == SPARSE_VEC_INVALID_INDEX - ? node->errors[GRE_ERROR_UNKNOWN_PROTOCOL] - : node->errors[GRE_ERROR_NONE]; - - version[0] = clib_net_to_host_u16 (gre[0]->flags_and_version); - version[1] = clib_net_to_host_u16 (gre[1]->flags_and_version); - version[0] &= GRE_VERSION_MASK; - version[1] &= GRE_VERSION_MASK; - - b[0]->error = version[0] - ? node->errors[GRE_ERROR_UNSUPPORTED_VERSION] : b[0]->error; - next[0] = version[0] ? GRE_INPUT_NEXT_DROP : next[0]; - b[1]->error = version[1] - ? node->errors[GRE_ERROR_UNSUPPORTED_VERSION] : b[1]->error; - next[1] = version[1] ? GRE_INPUT_NEXT_DROP : next[1]; - - len[0] = vlib_buffer_length_in_chain (vm, b[0]); - len[1] = vlib_buffer_length_in_chain (vm, b[1]); - - /* always search for P2P types in the DP */ - if (is_ipv6) - { - gre_mk_key6 (&ip6[0]->dst_address, - &ip6[0]->src_address, - vnet_buffer (b[0])->ip.fib_index, - type[0], TUNNEL_MODE_P2P, 0, &key[0].gtk_v6); - gre_mk_key6 (&ip6[1]->dst_address, - &ip6[1]->src_address, - vnet_buffer (b[1])->ip.fib_index, - type[1], TUNNEL_MODE_P2P, 0, &key[1].gtk_v6); - matched[0] = gre_match_key6 (&cached_key.gtk_v6, &key[0].gtk_v6); - matched[1] = gre_match_key6 (&cached_key.gtk_v6, &key[1].gtk_v6); - } - else - { - gre_mk_key4 (ip4[0]->dst_address, - ip4[0]->src_address, - vnet_buffer (b[0])->ip.fib_index, - type[0], TUNNEL_MODE_P2P, 0, &key[0].gtk_v4); - gre_mk_key4 (ip4[1]->dst_address, - ip4[1]->src_address, - vnet_buffer (b[1])->ip.fib_index, - type[1], TUNNEL_MODE_P2P, 0, &key[1].gtk_v4); - matched[0] = gre_match_key4 (&cached_key.gtk_v4, &key[0].gtk_v4); - matched[1] = gre_match_key4 (&cached_key.gtk_v4, &key[1].gtk_v4); - } - - tun_sw_if_index[0] = cached_tun_sw_if_index; - tun_sw_if_index[1] = cached_tun_sw_if_index; - if (PREDICT_FALSE (!matched[0])) - gre_tunnel_get (gm, node, b[0], &next[0], &key[0], &cached_key, - &tun_sw_if_index[0], &cached_tun_sw_if_index, - is_ipv6); - if (PREDICT_FALSE (!matched[1])) - gre_tunnel_get (gm, node, b[1], &next[1], &key[1], &cached_key, - &tun_sw_if_index[1], &cached_tun_sw_if_index, - is_ipv6); - - if (PREDICT_TRUE (next[0] > GRE_INPUT_NEXT_DROP)) - { - vlib_increment_combined_counter (&gm->vnet_main-> - interface_main.combined_sw_if_counters - [VNET_INTERFACE_COUNTER_RX], - vm->thread_index, - tun_sw_if_index[0], - 1 /* packets */ , - len[0] /* bytes */ ); - vnet_buffer (b[0])->sw_if_index[VLIB_RX] = tun_sw_if_index[0]; - } - if (PREDICT_TRUE (next[1] > GRE_INPUT_NEXT_DROP)) - { - vlib_increment_combined_counter (&gm->vnet_main-> - interface_main.combined_sw_if_counters - [VNET_INTERFACE_COUNTER_RX], - vm->thread_index, - tun_sw_if_index[1], - 1 /* packets */ , - len[1] /* bytes */ ); - vnet_buffer (b[1])->sw_if_index[VLIB_RX] = tun_sw_if_index[1]; - } - - vnet_buffer (b[0])->sw_if_index[VLIB_TX] = (u32) ~0; - vnet_buffer (b[1])->sw_if_index[VLIB_TX] = (u32) ~0; - - if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED)) - gre_trace (vm, node, b[0], tun_sw_if_index[0], ip6[0], ip4[0], - is_ipv6); - if (PREDICT_FALSE (b[1]->flags & VLIB_BUFFER_IS_TRACED)) - gre_trace (vm, node, b[1], tun_sw_if_index[1], ip6[1], ip4[1], - is_ipv6); - - b += 2; - next += 2; - n_left_from -= 2; - } - - while (n_left_from >= 1) - { - const ip6_header_t *ip6[1]; - const ip4_header_t *ip4[1]; - const gre_header_t *gre[1]; - u32 nidx[1]; - next_info_t ni[1]; - u8 type[1]; - u16 version[1]; - u32 len[1]; - gre_tunnel_key_t key[1]; - u8 matched[1]; - u32 tun_sw_if_index[1]; - - if (PREDICT_TRUE (n_left_from >= 3)) - { - vlib_prefetch_buffer_data (b[1], LOAD); - vlib_prefetch_buffer_header (b[2], STORE); - } - - if (is_ipv6) - { - /* ip6_local hands us the ip header, not the gre header */ - ip6[0] = vlib_buffer_get_current (b[0]); - gre[0] = (void *) (ip6[0] + 1); - vlib_buffer_advance (b[0], sizeof (*ip6[0]) + sizeof (*gre[0])); - } - else - { - /* ip4_local hands us the ip header, not the gre header */ - ip4[0] = vlib_buffer_get_current (b[0]); - gre[0] = (void *) (ip4[0] + 1); - vlib_buffer_advance (b[0], sizeof (*ip4[0]) + sizeof (*gre[0])); - } - - if (PREDICT_TRUE (cached_protocol == gre[0]->protocol)) - { - nidx[0] = cached_next_index; - } - else - { - cached_next_index = nidx[0] = - sparse_vec_index (gm->next_by_protocol, gre[0]->protocol); - cached_protocol = gre[0]->protocol; - } - - ni[0] = vec_elt (gm->next_by_protocol, nidx[0]); - next[0] = ni[0].next_index; - type[0] = ni[0].tunnel_type; - - b[0]->error = nidx[0] == SPARSE_VEC_INVALID_INDEX - ? node->errors[GRE_ERROR_UNKNOWN_PROTOCOL] - : node->errors[GRE_ERROR_NONE]; - - version[0] = clib_net_to_host_u16 (gre[0]->flags_and_version); - version[0] &= GRE_VERSION_MASK; - - b[0]->error = version[0] - ? node->errors[GRE_ERROR_UNSUPPORTED_VERSION] : b[0]->error; - next[0] = version[0] ? GRE_INPUT_NEXT_DROP : next[0]; - - len[0] = vlib_buffer_length_in_chain (vm, b[0]); - - if (is_ipv6) - { - gre_mk_key6 (&ip6[0]->dst_address, - &ip6[0]->src_address, - vnet_buffer (b[0])->ip.fib_index, - type[0], TUNNEL_MODE_P2P, 0, &key[0].gtk_v6); - matched[0] = gre_match_key6 (&cached_key.gtk_v6, &key[0].gtk_v6); - } - else - { - gre_mk_key4 (ip4[0]->dst_address, - ip4[0]->src_address, - vnet_buffer (b[0])->ip.fib_index, - type[0], TUNNEL_MODE_P2P, 0, &key[0].gtk_v4); - matched[0] = gre_match_key4 (&cached_key.gtk_v4, &key[0].gtk_v4); - } - - tun_sw_if_index[0] = cached_tun_sw_if_index; - if (PREDICT_FALSE (!matched[0])) - gre_tunnel_get (gm, node, b[0], &next[0], &key[0], &cached_key, - &tun_sw_if_index[0], &cached_tun_sw_if_index, - is_ipv6); - - if (PREDICT_TRUE (next[0] > GRE_INPUT_NEXT_DROP)) - { - vlib_increment_combined_counter (&gm->vnet_main-> - interface_main.combined_sw_if_counters - [VNET_INTERFACE_COUNTER_RX], - vm->thread_index, - tun_sw_if_index[0], - 1 /* packets */ , - len[0] /* bytes */ ); - vnet_buffer (b[0])->sw_if_index[VLIB_RX] = tun_sw_if_index[0]; - } - - vnet_buffer (b[0])->sw_if_index[VLIB_TX] = (u32) ~0; - - if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED)) - gre_trace (vm, node, b[0], tun_sw_if_index[0], ip6[0], ip4[0], - is_ipv6); - - b += 1; - next += 1; - n_left_from -= 1; - } - - vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors); - - vlib_node_increment_counter (vm, - is_ipv6 ? gre6_input_node.index : - gre4_input_node.index, GRE_ERROR_PKTS_DECAP, - n_left_from); - - return frame->n_vectors; -} - -VLIB_NODE_FN (gre4_input_node) (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * from_frame) -{ - return gre_input (vm, node, from_frame, /* is_ip6 */ 0); -} - -VLIB_NODE_FN (gre6_input_node) (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * from_frame) -{ - return gre_input (vm, node, from_frame, /* is_ip6 */ 1); -} - -static char *gre_error_strings[] = { -#define gre_error(n,s) s, -#include "error.def" -#undef gre_error -}; - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (gre4_input_node) = { - .name = "gre4-input", - /* Takes a vector of packets. */ - .vector_size = sizeof (u32), - - .n_errors = GRE_N_ERROR, - .error_strings = gre_error_strings, - - .n_next_nodes = GRE_INPUT_N_NEXT, - .next_nodes = { -#define _(s,n) [GRE_INPUT_NEXT_##s] = n, - foreach_gre_input_next -#undef _ - }, - - .format_buffer = format_gre_header_with_length, - .format_trace = format_gre_rx_trace, - .unformat_buffer = unformat_gre_header, -}; - -VLIB_REGISTER_NODE (gre6_input_node) = { - .name = "gre6-input", - /* Takes a vector of packets. */ - .vector_size = sizeof (u32), - - .runtime_data_bytes = sizeof (gre_input_runtime_t), - - .n_errors = GRE_N_ERROR, - .error_strings = gre_error_strings, - - .n_next_nodes = GRE_INPUT_N_NEXT, - .next_nodes = { -#define _(s,n) [GRE_INPUT_NEXT_##s] = n, - foreach_gre_input_next -#undef _ - }, - - .format_buffer = format_gre_header_with_length, - .format_trace = format_gre_rx_trace, - .unformat_buffer = unformat_gre_header, -}; -/* *INDENT-ON* */ - -#ifndef CLIB_MARCH_VARIANT -void -gre_register_input_protocol (vlib_main_t * vm, - gre_protocol_t protocol, u32 node_index, - gre_tunnel_type_t tunnel_type) -{ - gre_main_t *em = &gre_main; - gre_protocol_info_t *pi; - next_info_t *n; - u32 i; - - { - clib_error_t *error = vlib_call_init_function (vm, gre_input_init); - if (error) - clib_error_report (error); - } - - pi = gre_get_protocol_info (em, protocol); - pi->node_index = node_index; - pi->tunnel_type = tunnel_type; - pi->next_index = vlib_node_add_next (vm, gre4_input_node.index, node_index); - i = vlib_node_add_next (vm, gre6_input_node.index, node_index); - ASSERT (i == pi->next_index); - - /* Setup gre protocol -> next index sparse vector mapping. */ - n = sparse_vec_validate (em->next_by_protocol, - clib_host_to_net_u16 (protocol)); - n->next_index = pi->next_index; - n->tunnel_type = tunnel_type; -} - -static void -gre_setup_node (vlib_main_t * vm, u32 node_index) -{ - vlib_node_t *n = vlib_get_node (vm, node_index); - pg_node_t *pn = pg_get_node (node_index); - - n->format_buffer = format_gre_header_with_length; - n->unformat_buffer = unformat_gre_header; - pn->unformat_edit = unformat_pg_gre_header; -} - -static clib_error_t * -gre_input_init (vlib_main_t * vm) -{ - gre_main_t *gm = &gre_main; - vlib_node_t *ethernet_input, *ip4_input, *ip6_input, *mpls_unicast_input; - - { - clib_error_t *error; - error = vlib_call_init_function (vm, gre_init); - if (error) - clib_error_report (error); - } - - gre_setup_node (vm, gre4_input_node.index); - gre_setup_node (vm, gre6_input_node.index); - - gm->next_by_protocol = sparse_vec_new - ( /* elt bytes */ sizeof (gm->next_by_protocol[0]), - /* bits in index */ BITS (((gre_header_t *) 0)->protocol)); - - /* These could be moved to the supported protocol input node defn's */ - ethernet_input = vlib_get_node_by_name (vm, (u8 *) "ethernet-input"); - ASSERT (ethernet_input); - ip4_input = vlib_get_node_by_name (vm, (u8 *) "ip4-input"); - ASSERT (ip4_input); - ip6_input = vlib_get_node_by_name (vm, (u8 *) "ip6-input"); - ASSERT (ip6_input); - mpls_unicast_input = vlib_get_node_by_name (vm, (u8 *) "mpls-input"); - ASSERT (mpls_unicast_input); - - gre_register_input_protocol (vm, GRE_PROTOCOL_teb, - ethernet_input->index, GRE_TUNNEL_TYPE_TEB); - - gre_register_input_protocol (vm, GRE_PROTOCOL_ip4, - ip4_input->index, GRE_TUNNEL_TYPE_L3); - - gre_register_input_protocol (vm, GRE_PROTOCOL_ip6, - ip6_input->index, GRE_TUNNEL_TYPE_L3); - - gre_register_input_protocol (vm, GRE_PROTOCOL_mpls_unicast, - mpls_unicast_input->index, GRE_TUNNEL_TYPE_L3); - - return 0; -} - -VLIB_INIT_FUNCTION (gre_input_init); - -#endif /* CLIB_MARCH_VARIANT */ -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/gre/packet.h b/src/vnet/gre/packet.h index bbd67d565c5..bbda2df3f68 100644 --- a/src/vnet/gre/packet.h +++ b/src/vnet/gre/packet.h @@ -138,7 +138,6 @@ typedef struct This field is platform dependent. */ -/* *INDENT-OFF* */ typedef CLIB_PACKED (struct { u32 seq_num; union @@ -158,7 +157,6 @@ typedef CLIB_PACKED (struct { erspan_t2_t erspan; }) erspan_t2_header_t; -/* *INDENT-ON* */ /* u64 template for ERSPAN type 2 header with both EN bits set */ #define ERSPAN_HDR2 0x1000180000000000ul diff --git a/src/vnet/gre/pg.c b/src/vnet/gre/pg.c deleted file mode 100644 index 38a3a07ebad..00000000000 --- a/src/vnet/gre/pg.c +++ /dev/null @@ -1,86 +0,0 @@ -/* - * hdlc_pg.c: packet generator gre interface - * - * Copyright (c) 2012 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include <vlib/vlib.h> -#include <vnet/pg/pg.h> -#include <vnet/gre/gre.h> - -typedef struct -{ - pg_edit_t flags_and_version; - pg_edit_t protocol; -} pg_gre_header_t; - -static inline void -pg_gre_header_init (pg_gre_header_t * e) -{ - pg_edit_init (&e->flags_and_version, gre_header_t, flags_and_version); - pg_edit_init (&e->protocol, gre_header_t, protocol); -} - -uword -unformat_pg_gre_header (unformat_input_t * input, va_list * args) -{ - pg_stream_t *s = va_arg (*args, pg_stream_t *); - pg_gre_header_t *h; - u32 group_index, error; - - h = pg_create_edit_group (s, sizeof (h[0]), sizeof (gre_header_t), - &group_index); - pg_gre_header_init (h); - - pg_edit_set_fixed (&h->flags_and_version, 0); - - error = 1; - if (!unformat (input, "%U", - unformat_pg_edit, - unformat_gre_protocol_net_byte_order, &h->protocol)) - goto done; - - { - gre_main_t *pm = &gre_main; - gre_protocol_info_t *pi = 0; - pg_node_t *pg_node = 0; - - if (h->protocol.type == PG_EDIT_FIXED) - { - u16 t = *(u16 *) h->protocol.values[PG_EDIT_LO]; - pi = gre_get_protocol_info (pm, clib_net_to_host_u16 (t)); - if (pi && pi->node_index != ~0) - pg_node = pg_get_node (pi->node_index); - } - - if (pg_node && pg_node->unformat_edit - && unformat_user (input, pg_node->unformat_edit, s)) - ; - } - - error = 0; -done: - if (error) - pg_free_edit_group (s); - return error == 0; -} - - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/gso/FEATURE.yaml b/src/vnet/gso/FEATURE.yaml index d3db0cc23e3..5f6275caca2 100644 --- a/src/vnet/gso/FEATURE.yaml +++ b/src/vnet/gso/FEATURE.yaml @@ -1,6 +1,6 @@ --- name: VNET GSO -maintainer: ayourtch@gmail.com sykazmi@cisco.com +maintainer: ayourtch@gmail.com mohsin.kazmi14@gmail.com features: - Basic GSO support - GSO for VLAN tagged packets diff --git a/src/vnet/gso/cli.c b/src/vnet/gso/cli.c index 060ce812fad..11dbaad728f 100644 --- a/src/vnet/gso/cli.c +++ b/src/vnet/gso/cli.c @@ -76,13 +76,11 @@ done: return error; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (set_interface_feature_gso_command, static) = { .path = "set interface feature gso", .short_help = "set interface feature gso <intfc> [enable | disable]", .function = set_interface_feature_gso_command_fn, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/gso/gro_func.h b/src/vnet/gso/gro_func.h index c7649318c43..e2e4e93850b 100644 --- a/src/vnet/gso/gro_func.h +++ b/src/vnet/gso/gro_func.h @@ -384,6 +384,7 @@ gro_fixup_header (vlib_main_t *vm, vlib_buffer_t *b0, u32 ack_number, u8 is_l2) 1 /* is_ip6 */ ); vnet_buffer2 (b0)->gso_size = b0->current_length - gho0.hdr_sz; + vnet_buffer (b0)->l2_hdr_offset = b0->current_data; if (gho0.gho_flags & GHO_F_IP4) { @@ -392,6 +393,7 @@ gro_fixup_header (vlib_main_t *vm, vlib_buffer_t *b0, u32 ack_number, u8 is_l2) ip4->length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) - gho0.l3_hdr_offset); + vnet_buffer (b0)->l3_hdr_offset = (u8 *) ip4 - b0->data; b0->flags |= (VNET_BUFFER_F_GSO | VNET_BUFFER_F_IS_IP4); vnet_buffer_offload_flags_set (b0, (VNET_BUFFER_OFFLOAD_F_TCP_CKSUM | VNET_BUFFER_OFFLOAD_F_IP_CKSUM)); @@ -403,12 +405,15 @@ gro_fixup_header (vlib_main_t *vm, vlib_buffer_t *b0, u32 ack_number, u8 is_l2) ip6->payload_length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) - gho0.l4_hdr_offset); + vnet_buffer (b0)->l3_hdr_offset = (u8 *) ip6 - b0->data; b0->flags |= (VNET_BUFFER_F_GSO | VNET_BUFFER_F_IS_IP6); vnet_buffer_offload_flags_set (b0, VNET_BUFFER_OFFLOAD_F_TCP_CKSUM); } tcp_header_t *tcp0 = (tcp_header_t *) (vlib_buffer_get_current (b0) + gho0.l4_hdr_offset); + vnet_buffer (b0)->l4_hdr_offset = (u8 *) tcp0 - b0->data; + vnet_buffer2 (b0)->gso_l4_hdr_sz = tcp_header_bytes (tcp0); tcp0->ack_number = ack_number; b0->flags &= ~VLIB_BUFFER_IS_TRACED; } diff --git a/src/vnet/gso/gso.h b/src/vnet/gso/gso.h index 041fab3bcc4..dee5da5c70b 100644 --- a/src/vnet/gso/gso.h +++ b/src/vnet/gso/gso.h @@ -39,13 +39,13 @@ gso_init_bufs_from_template_base (vlib_buffer_t **bufs, vlib_buffer_t *b0, u32 flags, u16 n_bufs, u16 hdr_sz) { u32 i = n_bufs; - while (i >= 4) + while (i >= 6) { /* prefetches */ CLIB_PREFETCH (bufs[2], 2 * CLIB_CACHE_LINE_BYTES, LOAD); CLIB_PREFETCH (bufs[3], 2 * CLIB_CACHE_LINE_BYTES, LOAD); - vlib_prefetch_buffer_data (bufs[2], LOAD); - vlib_prefetch_buffer_data (bufs[3], LOAD); + vlib_prefetch_buffer_data (bufs[4], LOAD); + vlib_prefetch_buffer_data (bufs[5], LOAD); /* copying objects from cacheline 0 */ bufs[0]->current_data = 0; @@ -70,10 +70,26 @@ gso_init_bufs_from_template_base (vlib_buffer_t **bufs, vlib_buffer_t *b0, bufs[0]->total_length_not_including_first_buffer = 0; bufs[1]->total_length_not_including_first_buffer = 0; + clib_memcpy_fast (&bufs[0]->opaque2, &b0->opaque2, sizeof (b0->opaque2)); + clib_memcpy_fast (&bufs[1]->opaque2, &b0->opaque2, sizeof (b0->opaque2)); + /* copying data */ clib_memcpy_fast (bufs[0]->data, vlib_buffer_get_current (b0), hdr_sz); clib_memcpy_fast (bufs[1]->data, vlib_buffer_get_current (b0), hdr_sz); + /* header offset fixup */ + vnet_buffer (bufs[0])->l2_hdr_offset -= b0->current_data; + vnet_buffer (bufs[0])->l3_hdr_offset -= b0->current_data; + vnet_buffer (bufs[0])->l4_hdr_offset -= b0->current_data; + vnet_buffer2 (bufs[0])->outer_l3_hdr_offset -= b0->current_data; + vnet_buffer2 (bufs[0])->outer_l4_hdr_offset -= b0->current_data; + + vnet_buffer (bufs[1])->l2_hdr_offset -= b0->current_data; + vnet_buffer (bufs[1])->l3_hdr_offset -= b0->current_data; + vnet_buffer (bufs[1])->l4_hdr_offset -= b0->current_data; + vnet_buffer2 (bufs[1])->outer_l3_hdr_offset -= b0->current_data; + vnet_buffer2 (bufs[1])->outer_l4_hdr_offset -= b0->current_data; + bufs += 2; i -= 2; } @@ -92,10 +108,18 @@ gso_init_bufs_from_template_base (vlib_buffer_t **bufs, vlib_buffer_t *b0, /* copying objects from cacheline 1 */ bufs[0]->trace_handle = b0->trace_handle; bufs[0]->total_length_not_including_first_buffer = 0; + clib_memcpy_fast (&bufs[0]->opaque2, &b0->opaque2, sizeof (b0->opaque2)); /* copying data */ clib_memcpy_fast (bufs[0]->data, vlib_buffer_get_current (b0), hdr_sz); + /* header offset fixup */ + vnet_buffer (bufs[0])->l2_hdr_offset -= b0->current_data; + vnet_buffer (bufs[0])->l3_hdr_offset -= b0->current_data; + vnet_buffer (bufs[0])->l4_hdr_offset -= b0->current_data; + vnet_buffer2 (bufs[0])->outer_l3_hdr_offset -= b0->current_data; + vnet_buffer2 (bufs[0])->outer_l4_hdr_offset -= b0->current_data; + bufs++; i--; } @@ -103,27 +127,41 @@ gso_init_bufs_from_template_base (vlib_buffer_t **bufs, vlib_buffer_t *b0, static_always_inline void gso_fixup_segmented_buf (vlib_main_t *vm, vlib_buffer_t *b0, u32 next_tcp_seq, - int is_l2, int is_ip6, generic_header_offset_t *gho, - clib_ip_csum_t *c, u8 tcp_flags) + int is_l2, u8 oflags, u16 hdr_sz, u16 l4_hdr_sz, + clib_ip_csum_t *c, u8 tcp_flags, u8 is_prefetch, + vlib_buffer_t *b1) { - ip4_header_t *ip4 = - (ip4_header_t *) (vlib_buffer_get_current (b0) + gho->l3_hdr_offset + - gho->outer_hdr_sz); - ip6_header_t *ip6 = - (ip6_header_t *) (vlib_buffer_get_current (b0) + gho->l3_hdr_offset + - gho->outer_hdr_sz); - tcp_header_t *tcp = - (tcp_header_t *) (vlib_buffer_get_current (b0) + gho->l4_hdr_offset + - gho->outer_hdr_sz); + i16 l3_hdr_offset = vnet_buffer (b0)->l3_hdr_offset; + i16 l4_hdr_offset = vnet_buffer (b0)->l4_hdr_offset; + + ip4_header_t *ip4 = (ip4_header_t *) (b0->data + l3_hdr_offset); + ip6_header_t *ip6 = (ip6_header_t *) (b0->data + l3_hdr_offset); + tcp_header_t *tcp = (tcp_header_t *) (b0->data + l4_hdr_offset); tcp->flags = tcp_flags; tcp->seq_number = clib_host_to_net_u32 (next_tcp_seq); + c->odd = 0; - if (is_ip6) + if (oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM) + { + ip4->length = + clib_host_to_net_u16 (b0->current_length - hdr_sz + + (l4_hdr_offset - l3_hdr_offset) + l4_hdr_sz); + ip4->checksum = 0; + ip4->checksum = ip4_header_checksum (ip4); + vnet_buffer_offload_flags_clear (b0, (VNET_BUFFER_OFFLOAD_F_IP_CKSUM | + VNET_BUFFER_OFFLOAD_F_TCP_CKSUM)); + c->sum += clib_mem_unaligned (&ip4->src_address, u32); + c->sum += clib_mem_unaligned (&ip4->dst_address, u32); + c->sum += clib_host_to_net_u32 ( + (clib_net_to_host_u16 (ip4->length) - ip4_header_bytes (ip4)) + + (ip4->protocol << 16)); + } + else { - ip6->payload_length = clib_host_to_net_u16 ( - b0->current_length - gho->l4_hdr_offset - gho->outer_hdr_sz); + ip6->payload_length = + clib_host_to_net_u16 (b0->current_length - hdr_sz + l4_hdr_sz); vnet_buffer_offload_flags_clear (b0, VNET_BUFFER_OFFLOAD_F_TCP_CKSUM); ip6_psh_t psh = { 0 }; u32 *p = (u32 *) &psh; @@ -134,24 +172,15 @@ gso_fixup_segmented_buf (vlib_main_t *vm, vlib_buffer_t *b0, u32 next_tcp_seq, for (int i = 0; i < 10; i++) c->sum += p[i]; } - else - { - ip4->length = clib_host_to_net_u16 ( - b0->current_length - gho->l3_hdr_offset - gho->outer_hdr_sz); - if (gho->gho_flags & GHO_F_IP4) - ip4->checksum = ip4_header_checksum (ip4); - vnet_buffer_offload_flags_clear (b0, (VNET_BUFFER_OFFLOAD_F_IP_CKSUM | - VNET_BUFFER_OFFLOAD_F_TCP_CKSUM)); - c->sum += clib_mem_unaligned (&ip4->src_address, u32); - c->sum += clib_mem_unaligned (&ip4->dst_address, u32); - c->sum += clib_host_to_net_u32 ( - (clib_net_to_host_u16 (ip4->length) - ip4_header_bytes (ip4)) + - (ip4->protocol << 16)); - } - clib_ip_csum_chunk (c, (u8 *) tcp, gho->l4_hdr_sz); + + if (is_prefetch) + CLIB_PREFETCH (vlib_buffer_get_current (b1) + hdr_sz, + CLIB_CACHE_LINE_BYTES, LOAD); + + clib_ip_csum_chunk (c, (u8 *) tcp, l4_hdr_sz); tcp->checksum = clib_ip_csum_fold (c); - if (!is_l2 && ((gho->gho_flags & GHO_F_TUNNEL) == 0)) + if (!is_l2 && ((oflags & VNET_BUFFER_OFFLOAD_F_TNL_MASK) == 0)) { u32 adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX]; @@ -168,16 +197,20 @@ gso_fixup_segmented_buf (vlib_main_t *vm, vlib_buffer_t *b0, u32 next_tcp_seq, static_always_inline u32 gso_segment_buffer_inline (vlib_main_t *vm, vnet_interface_per_thread_data_t *ptd, - vlib_buffer_t *b, generic_header_offset_t *gho, - int is_l2, int is_ip6) + vlib_buffer_t *b, int is_l2) { vlib_buffer_t **bufs = 0; u32 n_tx_bytes = 0; + + u8 oflags = vnet_buffer (b)->oflags; + i16 l4_hdr_offset = vnet_buffer (b)->l4_hdr_offset; u16 gso_size = vnet_buffer2 (b)->gso_size; + u16 l4_hdr_sz = vnet_buffer2 (b)->gso_l4_hdr_sz; + u8 tcp_flags = 0, tcp_flags_no_fin_psh = 0; u32 default_bflags = b->flags & ~(VNET_BUFFER_F_GSO | VLIB_BUFFER_NEXT_PRESENT); - u16 hdr_sz = gho->hdr_sz + gho->outer_hdr_sz; + u16 hdr_sz = (l4_hdr_offset - b->current_data) + l4_hdr_sz; u32 next_tcp_seq = 0, tcp_seq = 0; u32 data_size = vlib_buffer_length_in_chain (vm, b) - hdr_sz; u16 size = @@ -199,9 +232,8 @@ gso_segment_buffer_inline (vlib_main_t *vm, vec_validate (bufs, n_bufs - 1); vlib_get_buffers (vm, ptd->split_buffers, bufs, n_bufs); - tcp_header_t *tcp = - (tcp_header_t *) (vlib_buffer_get_current (b) + gho->l4_hdr_offset + - gho->outer_hdr_sz); + tcp_header_t *tcp = (tcp_header_t *) (b->data + l4_hdr_offset); + tcp_seq = next_tcp_seq = clib_net_to_host_u32 (tcp->seq_number); /* store original flags for last packet and reset FIN and PSH */ tcp_flags = tcp->flags; @@ -246,11 +278,11 @@ gso_segment_buffer_inline (vlib_main_t *vm, if (0 == dst_left && data_size) { vlib_prefetch_buffer_header (bufs[i + 1], LOAD); - vlib_prefetch_buffer_data (bufs[i + 1], LOAD); n_tx_bytes += bufs[i]->current_length; - gso_fixup_segmented_buf (vm, bufs[i], tcp_seq, is_l2, is_ip6, gho, - &c, tcp_flags_no_fin_psh); + gso_fixup_segmented_buf (vm, bufs[i], tcp_seq, is_l2, oflags, hdr_sz, + l4_hdr_sz, &c, tcp_flags_no_fin_psh, 1, + bufs[i + 1]); i++; dst_left = size; dst_ptr = vlib_buffer_get_current (bufs[i]) + hdr_sz; @@ -263,8 +295,8 @@ gso_segment_buffer_inline (vlib_main_t *vm, ASSERT ((i + 1) == n_alloc); n_tx_bytes += bufs[i]->current_length; - gso_fixup_segmented_buf (vm, bufs[i], tcp_seq, is_l2, is_ip6, gho, &c, - tcp_flags); + gso_fixup_segmented_buf (vm, bufs[i], tcp_seq, is_l2, oflags, hdr_sz, + l4_hdr_sz, &c, tcp_flags, 0, NULL); vec_free (bufs); return n_tx_bytes; diff --git a/src/vnet/gso/gso.rst b/src/vnet/gso/gso.rst new file mode 100644 index 00000000000..78788f82216 --- /dev/null +++ b/src/vnet/gso/gso.rst @@ -0,0 +1,154 @@ +.. _gso_doc: + +Generic Segmentation Offload +============================ + +Overview +________ + +Modern physical NICs provide offload capabilities to software based network +stacks to transfer some type of the packet processing from CPU to physical +NICs. TCP Segmentation Offload (TSO) is one among many which is provided by +modern physical NICs. Software based network stack can offload big (up to 64KB) +TCP packets to NIC and NIC will segment them into Maximum Segment Size packets. +Hence network stack save CPU cycles by processing few big packets instead of +processing many small packets. + +GSO is software based analogous to TSO which is used by virtual interfaces +i.e. tap, virtio, af_packet, vhost-user etc. Typically, virtual interfaces +provide capability to offload big packets (64KB size). But in reality, they +just pass the packet as it is to the other end without segmenting it. Hence, it +is necessary to validate the support of GSO offloading in whole setup otherwise +packet will be dropped when it will be processed by virtual entity which does +not support GSO. + +The GSO Infrastructure +_______________________ + +Software based network stacks implements GSO packet segmentation in software +where egress interface (virtual or physical) does not support GSO or TSO +offload. VPP implements GSO stack to provide support for software based packet +chunking of GSO packets when egress interface does not support GSO or TSO +offload. + +It is implemented as a feature node on interface-output feature arc. It +implements support for basic GSO, GSO with VXLAN tunnel and GSO with IPIP +tunnel. GSO with Geneve and GSO with NVGRE are not supported today. But one can +enable GSO feature node on tunnel interfaces i.e. IPSEC etc to segment GSO +packets before they will be tunneled. + +Virtual interfaces does not support GSO with tunnels. So, special care is +needed when user configures tunnel(s) along with GSO in the setup. In such case, +either enable GSO feature node on tunnel interface (mean chunk the GSO packets +before they will be encapsulated in tunnel) or disable the GSO offload on the +egress interface (only work for VXLAN tunnel and IPIP tunnel), if it is enabled, +should work fine. + +Similarly, many physical interfaces does not support GSO with tunnels too. User +can do the same configuration as it is mentioned previously for virtual +interfaces. + +Data structures +^^^^^^^^^^^^^^^ + +VPP ``vlib_buffer_t`` uses ``VNET_BUFFER_F_GSO`` flags to mark the buffer carrying GSO +packet and also contain metadata fields with respect to GSO: + +.. code:: c + + i16 l2_hdr_offset; + i16 l3_hdr_offset; + i16 l4_hdr_offset; + + u16 gso_size; + u16 gso_l4_hdr_sz; + i16 outer_l3_hdr_offset; + i16 outer_l4_hdr_offset; + +Packet header offsets are computed from the reference of ``vlib_buffer_t`` data +pointer. + +``l2_hdr_offset``, ``l3_hdr_offset`` and ``l4_hdr_offset`` are set on input of checksum +offload or GSO enabled interfaces or features i.e. host stack. Appropriate +offload flags are also set to ``vnet_buffer_oflags_t`` to reflect the actual packet +offloads which will be used later at egress interface tx node or +interface-output node or GSO node to process the packet appropriately. These +fields are present in 1st cache line and does not incur extra cycles as most of +the VPP features fetch the ``vlib_buffer_t`` 1st cache line to access ``current_data`` +or ``current_length`` fields of the packet. + +Please note that ``gso_size``, ``gso_l4_hdr_sz``, ``outer_l3_hdr_offset`` and +``outer_l4_hdr_offset`` are in second cache line of ``vlib_buffer_t``. Accessing them in +data plane will incur some extra cycles but cost of these cycles will be +amortized over (up to 64KB) packet. + +The ``gso_size`` and ``gso_l4_hdr_sz`` are set on input of GSO enabled interfaces (tap, +virtio, af_packet etc) or features (vpp host stack), when we receive a GSO +packet (a chain of buffers with the first one having ``VNET_BUFFER_F_GSO`` bit set), +and needs to persist all the way to the interface-output, in case the egress +interface is not GSO-enabled - then we need to perform the segmentation, and use +these values to chunk the payload appropriately. + +``outer_l3_hdr_offset`` and ``outer_l4_hdr_offset`` are used in case of tunneled packet +(i.e. VXLAN or IPIP). ``outer_l3_hdr_offset`` will point to outer l3 header of the +tunnel headers and ``outer_l4_hdr_offset`` will point to outer l4 header of the +tunnel headers, if any. + +Following are the helper functions used to set and clear the offload flags from +``vlib_buffer_t`` metadata: + +.. code:: c + + static_always_inline void + vnet_buffer_offload_flags_set (vlib_buffer_t *b, vnet_buffer_oflags_t oflags) + { + if (b->flags & VNET_BUFFER_F_OFFLOAD) + { + /* add a flag to existing offload */ + vnet_buffer (b)->oflags |= oflags; + } + else + { + /* no offload yet: reset offload flags to new value */ + vnet_buffer (b)->oflags = oflags; + b->flags |= VNET_BUFFER_F_OFFLOAD; + } + } + + static_always_inline void + vnet_buffer_offload_flags_clear (vlib_buffer_t *b, vnet_buffer_oflags_t oflags) + { + vnet_buffer (b)->oflags &= ~oflags; + if (0 == vnet_buffer (b)->oflags) + b->flags &= ~VNET_BUFFER_F_OFFLOAD; + } + + +ENABLE GSO FEATURE NODE +----------------------- + +GSO feature node is not enabled by default when egress interface does not +support GSO. User has to enable it explicitly using api or cli. + +GSO API +^^^^^^^ + +This API message is used to enable GSO feature node on an interface. + +.. code:: c + + autoreply define feature_gso_enable_disable + { + u32 client_index; + u32 context; + vl_api_interface_index_t sw_if_index; + bool enable_disable; + option vat_help = "<intfc> | sw_if_index <nn> [enable | disable]"; + }; + +GSO CLI +^^^^^^^ + +:: + + set interface feature gso <intfc> [enable | disable] diff --git a/src/vnet/gso/hdr_offset_parser.h b/src/vnet/gso/hdr_offset_parser.h index 999a27880af..08037f57ea0 100644 --- a/src/vnet/gso/hdr_offset_parser.h +++ b/src/vnet/gso/hdr_offset_parser.h @@ -23,7 +23,8 @@ #include <vnet/udp/udp_packet.h> #include <vnet/tcp/tcp_packet.h> #include <vnet/vnet.h> -#include <vnet/vxlan/vxlan_packet.h> + +#define VXLAN_HEADER_SIZE 8 #define foreach_gho_flag \ _( 0, IP4) \ @@ -437,7 +438,7 @@ vnet_generic_outer_header_parser_inline (vlib_buffer_t * b0, if (UDP_DST_PORT_vxlan == clib_net_to_host_u16 (udp->dst_port)) { gho->gho_flags |= GHO_F_VXLAN_TUNNEL; - gho->hdr_sz += sizeof (vxlan_header_t); + gho->hdr_sz += VXLAN_HEADER_SIZE; } else if (UDP_DST_PORT_geneve == clib_net_to_host_u16 (udp->dst_port)) { diff --git a/src/vnet/gso/node.c b/src/vnet/gso/node.c index d755784d0cb..c1d4459476e 100644 --- a/src/vnet/gso/node.c +++ b/src/vnet/gso/node.c @@ -80,113 +80,108 @@ format_gso_trace (u8 * s, va_list * args) return s; } -static_always_inline u16 -tso_segment_ipip_tunnel_fixup (vlib_main_t * vm, - vnet_interface_per_thread_data_t * ptd, - vlib_buffer_t * sb0, - generic_header_offset_t * gho) +static_always_inline void +tso_segment_ipip_tunnel_fixup (vlib_main_t *vm, + vnet_interface_per_thread_data_t *ptd, + vlib_buffer_t *sb0) { u16 n_tx_bufs = vec_len (ptd->split_buffers); - u16 i = 0, n_tx_bytes = 0; + u16 i = 0; while (i < n_tx_bufs) { vlib_buffer_t *b0 = vlib_get_buffer (vm, ptd->split_buffers[i]); + i16 outer_l3_hdr_offset = vnet_buffer2 (b0)->outer_l3_hdr_offset; + i16 l3_hdr_offset = vnet_buffer (b0)->l3_hdr_offset; - ip4_header_t *ip4 = - (ip4_header_t *) (vlib_buffer_get_current (b0) + - gho->outer_l3_hdr_offset); - ip6_header_t *ip6 = - (ip6_header_t *) (vlib_buffer_get_current (b0) + - gho->outer_l3_hdr_offset); + ip4_header_t *ip4 = (ip4_header_t *) (b0->data + outer_l3_hdr_offset); + ip6_header_t *ip6 = (ip6_header_t *) (b0->data + outer_l3_hdr_offset); - if (gho->gho_flags & GHO_F_OUTER_IP4) + if (vnet_buffer (b0)->oflags & VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM) { - ip4->length = - clib_host_to_net_u16 (b0->current_length - - gho->outer_l3_hdr_offset); + ip4->length = clib_host_to_net_u16 ( + b0->current_length - (outer_l3_hdr_offset - b0->current_data)); ip4->checksum = ip4_header_checksum (ip4); + vnet_buffer_offload_flags_clear ( + b0, VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM | + VNET_BUFFER_OFFLOAD_F_TNL_IPIP); } - else if (gho->gho_flags & GHO_F_OUTER_IP6) + else { - ip6->payload_length = - clib_host_to_net_u16 (b0->current_length - - gho->outer_l4_hdr_offset); + ip6->payload_length = clib_host_to_net_u16 ( + b0->current_length - (l3_hdr_offset - b0->current_data)); + vnet_buffer_offload_flags_clear (b0, VNET_BUFFER_OFFLOAD_F_TNL_IPIP); } - n_tx_bytes += gho->outer_hdr_sz; i++; } - return n_tx_bytes; } static_always_inline void -tso_segment_vxlan_tunnel_headers_fixup (vlib_main_t * vm, vlib_buffer_t * b, - generic_header_offset_t * gho) +tso_segment_vxlan_tunnel_headers_fixup (vlib_main_t *vm, vlib_buffer_t *b) { - u8 proto = 0; ip4_header_t *ip4 = 0; ip6_header_t *ip6 = 0; udp_header_t *udp = 0; + i16 outer_l3_hdr_offset = vnet_buffer2 (b)->outer_l3_hdr_offset; + i16 outer_l4_hdr_offset = vnet_buffer2 (b)->outer_l4_hdr_offset; - ip4 = - (ip4_header_t *) (vlib_buffer_get_current (b) + gho->outer_l3_hdr_offset); - ip6 = - (ip6_header_t *) (vlib_buffer_get_current (b) + gho->outer_l3_hdr_offset); - udp = - (udp_header_t *) (vlib_buffer_get_current (b) + gho->outer_l4_hdr_offset); + ip4 = (ip4_header_t *) (b->data + outer_l3_hdr_offset); + ip6 = (ip6_header_t *) (b->data + outer_l3_hdr_offset); + udp = (udp_header_t *) (b->data + outer_l4_hdr_offset); - if (gho->gho_flags & GHO_F_OUTER_IP4) + if (vnet_buffer (b)->oflags & VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM) { - proto = ip4->protocol; - ip4->length = - clib_host_to_net_u16 (b->current_length - gho->outer_l3_hdr_offset); + ip4->length = clib_host_to_net_u16 ( + b->current_length - (outer_l3_hdr_offset - b->current_data)); ip4->checksum = ip4_header_checksum (ip4); + if (vnet_buffer (b)->oflags & VNET_BUFFER_OFFLOAD_F_OUTER_UDP_CKSUM) + { + udp->length = clib_host_to_net_u16 ( + b->current_length - (outer_l4_hdr_offset - b->current_data)); + // udp checksum is 0, in udp tunnel + udp->checksum = 0; + } + vnet_buffer_offload_flags_clear ( + b, VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM | + VNET_BUFFER_OFFLOAD_F_OUTER_UDP_CKSUM | + VNET_BUFFER_OFFLOAD_F_TNL_VXLAN); } - else if (gho->gho_flags & GHO_F_OUTER_IP6) - { - proto = ip6->protocol; - ip6->payload_length = - clib_host_to_net_u16 (b->current_length - gho->outer_l4_hdr_offset); - } - if (proto == IP_PROTOCOL_UDP) + else { - int bogus; - udp->length = - clib_host_to_net_u16 (b->current_length - gho->outer_l4_hdr_offset); - udp->checksum = 0; - if (gho->gho_flags & GHO_F_OUTER_IP6) + ip6->payload_length = clib_host_to_net_u16 ( + b->current_length - (outer_l4_hdr_offset - b->current_data)); + + if (vnet_buffer (b)->oflags & VNET_BUFFER_OFFLOAD_F_OUTER_UDP_CKSUM) { + int bogus; + udp->length = ip6->payload_length; + // udp checksum is 0, in udp tunnel + udp->checksum = 0; udp->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b, ip6, &bogus); + vnet_buffer_offload_flags_clear ( + b, VNET_BUFFER_OFFLOAD_F_OUTER_UDP_CKSUM | + VNET_BUFFER_OFFLOAD_F_TNL_VXLAN); } - else if (gho->gho_flags & GHO_F_OUTER_IP4) - { - udp->checksum = ip4_tcp_udp_compute_checksum (vm, b, ip4); - } - /* FIXME: it should be OUTER_UDP_CKSUM */ - vnet_buffer_offload_flags_clear (b, VNET_BUFFER_OFFLOAD_F_UDP_CKSUM); } } -static_always_inline u16 -tso_segment_vxlan_tunnel_fixup (vlib_main_t * vm, - vnet_interface_per_thread_data_t * ptd, - vlib_buffer_t * sb0, - generic_header_offset_t * gho) +static_always_inline void +tso_segment_vxlan_tunnel_fixup (vlib_main_t *vm, + vnet_interface_per_thread_data_t *ptd, + vlib_buffer_t *sb0) { u16 n_tx_bufs = vec_len (ptd->split_buffers); - u16 i = 0, n_tx_bytes = 0; + u16 i = 0; while (i < n_tx_bufs) { vlib_buffer_t *b0 = vlib_get_buffer (vm, ptd->split_buffers[i]); - tso_segment_vxlan_tunnel_headers_fixup (vm, b0, gho); - n_tx_bytes += gho->outer_hdr_sz; + tso_segment_vxlan_tunnel_headers_fixup (vm, b0); i++; } - return n_tx_bytes; } static_always_inline u16 @@ -682,32 +677,10 @@ vnet_gso_node_inline (vlib_main_t * vm, to_next -= 1; n_left_to_next += 1; /* undo the counting. */ - generic_header_offset_t gho = { 0 }; u32 n_tx_bytes = 0; - u32 inner_is_ip6 = is_ip6; - - vnet_generic_header_offset_parser (b[0], &gho, is_l2, - is_ip4, is_ip6); - - if (PREDICT_FALSE (gho.gho_flags & GHO_F_TUNNEL)) - { - if (PREDICT_FALSE - (gho.gho_flags & (GHO_F_GRE_TUNNEL | - GHO_F_GENEVE_TUNNEL))) - { - /* not supported yet */ - drop_one_buffer_and_count (vm, vnm, node, from - 1, - hi->sw_if_index, - GSO_ERROR_UNHANDLED_TYPE); - b += 1; - continue; - } - inner_is_ip6 = (gho.gho_flags & GHO_F_IP6) != 0; - } - - n_tx_bytes = gso_segment_buffer_inline (vm, ptd, b[0], &gho, - is_l2, inner_is_ip6); + n_tx_bytes = + gso_segment_buffer_inline (vm, ptd, b[0], is_l2); if (PREDICT_FALSE (n_tx_bytes == 0)) { @@ -718,19 +691,15 @@ vnet_gso_node_inline (vlib_main_t * vm, continue; } - - if (PREDICT_FALSE (gho.gho_flags & GHO_F_VXLAN_TUNNEL)) + if (PREDICT_FALSE (vnet_buffer (b[0])->oflags & + VNET_BUFFER_OFFLOAD_F_TNL_VXLAN)) { - n_tx_bytes += - tso_segment_vxlan_tunnel_fixup (vm, ptd, b[0], &gho); + tso_segment_vxlan_tunnel_fixup (vm, ptd, b[0]); } - else - if (PREDICT_FALSE - (gho.gho_flags & (GHO_F_IPIP_TUNNEL | - GHO_F_IPIP6_TUNNEL))) + else if (PREDICT_FALSE (vnet_buffer (b[0])->oflags & + VNET_BUFFER_OFFLOAD_F_TNL_IPIP)) { - n_tx_bytes += - tso_segment_ipip_tunnel_fixup (vm, ptd, b[0], &gho); + tso_segment_ipip_tunnel_fixup (vm, ptd, b[0]); } u16 n_tx_bufs = vec_len (ptd->split_buffers); @@ -838,7 +807,6 @@ VLIB_NODE_FN (gso_ip6_node) (vlib_main_t * vm, vlib_node_runtime_t * node, 1 /* ip6 */ ); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (gso_l2_ip4_node) = { .vector_size = sizeof (u32), diff --git a/src/vnet/handoff.c b/src/vnet/handoff.c index 5d4ef6f5c1b..e9c3bb6de67 100644 --- a/src/vnet/handoff.c +++ b/src/vnet/handoff.c @@ -244,6 +244,8 @@ interface_handoff_enable_disable (vlib_main_t *vm, u32 sw_if_index, vnet_feature_enable_disable ("device-input", "worker-handoff", sw_if_index, enable_disable, 0, 0); + vnet_feature_enable_disable ("port-rx-eth", "worker-handoff", sw_if_index, + enable_disable, 0, 0); return rv; } @@ -310,14 +312,12 @@ set_interface_handoff_command_fn (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (set_interface_handoff_command, static) = { .path = "set interface handoff", .short_help = "set interface handoff <interface-name> workers <workers-list>" " [symmetrical|asymmetrical]", .function = set_interface_handoff_command_fn, }; -/* *INDENT-ON* */ clib_error_t * handoff_init (vlib_main_t * vm) diff --git a/src/vnet/hash/FEATURE.yaml b/src/vnet/hash/FEATURE.yaml index 1e3d23ea882..d5b9a069c27 100644 --- a/src/vnet/hash/FEATURE.yaml +++ b/src/vnet/hash/FEATURE.yaml @@ -1,6 +1,6 @@ --- name: Hash infrastructure -maintainer: Mohsin Kazmi <sykazmi@cisco.com>, Damjan Marion <damarion@cisco.com> +maintainer: Mohsin Kazmi <mohsin.kazmi14@gmail.com>, Damjan Marion <damarion@cisco.com> features: - Ethernet - IP diff --git a/src/vnet/hash/hash.rst b/src/vnet/hash/hash.rst new file mode 100644 index 00000000000..3db74e2f093 --- /dev/null +++ b/src/vnet/hash/hash.rst @@ -0,0 +1,90 @@ +.. _hash_doc: + +Hash Infra +========== + +Overview +________ + +Modern physical NICs uses packet flow hash for different purposes, i.e. Receive +Side Scaling, flow steering and interface bonding etc. NICs can also provide +packet flow hash prepended to data packet as metadata which can be used by +applications without recomputing the packet flow hash. + +As more and more services are deployed in virtualized environment, making use of +virtual interfaces to interconnect those services. + +The Hash Infrastructure +_______________________ + +VPP implements software based hashing functionality which can be used for different +purposes. It also provides users a centralized way to registry custom hash functions +based on traffic profile to be used in different vpp features i.e. Multi-TXQ, +software RSS or bonding driver. + +Data structures +^^^^^^^^^^^^^^^ + +Hashing infra provides two types of hashing functions: +``VNET_HASH_FN_TYPE_ETHERNET`` and ``VNET_HASH_FN_TYPE_IP`` for ethernet traffic and +IP traffic respectively. +Hashing infra provides uniform signature to the functions to be implemented: + +.. code:: c + + void (*vnet_hash_fn_t) (void **p, u32 *h, u32 n_packets); + +Here ``**p`` is the array of pointers pointing to the beginning of packet headers +(either ethernet or ip). +``*h`` is an empty array of size n_packets. On return, it will contain hashes. +``n_packets`` is the number of packets pass to this function. + +Custom hashing functions can be registered through ``VNET_REGISTER_HASH_FUNCTION``. +Users need to provide a name, description, priority and hashing functions for +registration. + +Default hashing function is selected based on the highest priority among the registered +hashing functions. + +.. code:: c + + typedef struct vnet_hash_function_registration + { + const char *name; + const char *description; + int priority; + vnet_hash_fn_t function[VNET_HASH_FN_TYPE_N]; + + struct vnet_hash_function_registration *next; + } vnet_hash_function_registration_t; + +For example, ``crc32c_5tuple`` provides two hashing functions: for IP traffic and for +ethernet traffic. It uses 5 tuples from the flow to compute the crc32 hash on it. + +.. code:: c + + void vnet_crc32c_5tuple_ip_func (void **p, u32 *hash, u32 n_packets); + void vnet_crc32c_5tuple_ethernet_func (void **p, u32 *hash, u32 n_packets); + + VNET_REGISTER_HASH_FUNCTION (crc32c_5tuple, static) = { + .name = "crc32c-5tuple", + .description = "IPv4/IPv6 header and TCP/UDP ports", + .priority = 50, + .function[VNET_HASH_FN_TYPE_ETHERNET] = vnet_crc32c_5tuple_ethernet_func, + .function[VNET_HASH_FN_TYPE_IP] = vnet_crc32c_5tuple_ip_func, + }; + + +Users can see all the registered hash functions along with priority and description. + +Hash API +^^^^^^^^ + +There is no Hash API at the moment. + +Hash CLI +^^^^^^^^ + +:: + + show hash diff --git a/src/vnet/hdlc/hdlc.c b/src/vnet/hdlc/hdlc.c index fa1e7cd5eaf..443a0396e9e 100644 --- a/src/vnet/hdlc/hdlc.c +++ b/src/vnet/hdlc/hdlc.c @@ -197,7 +197,6 @@ hdlc_build_rewrite (vnet_main_t * vnm, return (rewrite); } -/* *INDENT-OFF* */ VNET_HW_INTERFACE_CLASS (hdlc_hw_interface_class) = { .name = "HDLC", .format_header = format_hdlc_header_with_length, @@ -205,7 +204,6 @@ VNET_HW_INTERFACE_CLASS (hdlc_hw_interface_class) = { .build_rewrite = hdlc_build_rewrite, .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P, }; -/* *INDENT-ON* */ static void add_protocol (hdlc_main_t * pm, hdlc_protocol_t protocol, char *protocol_name) diff --git a/src/vnet/hdlc/node.c b/src/vnet/hdlc/node.c index 8bb621231c7..48269a3b8d3 100644 --- a/src/vnet/hdlc/node.c +++ b/src/vnet/hdlc/node.c @@ -279,7 +279,6 @@ static char *hdlc_error_strings[] = { #undef hdlc_error }; -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (hdlc_input_node) = { .function = hdlc_input, .name = "hdlc-input", @@ -302,7 +301,6 @@ VLIB_REGISTER_NODE (hdlc_input_node) = { .format_trace = format_hdlc_input_trace, .unformat_buffer = unformat_hdlc_header, }; -/* *INDENT-ON* */ static clib_error_t * hdlc_input_runtime_init (vlib_main_t * vm) diff --git a/src/vnet/interface.api b/src/vnet/interface.api index 172f6afb818..eea86aa1ac8 100644 --- a/src/vnet/interface.api +++ b/src/vnet/interface.api @@ -733,6 +733,61 @@ autoreply define collect_detailed_interface_stats bool enable_disable; }; +/** \brief pcap_set_filter_function + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param filter_function_name - the name of the filter function + to set for pcap capture +*/ +autoreply define pcap_set_filter_function +{ + u32 client_index; + u32 context; + + string filter_function_name[]; +}; + +/** \brief pcap_trace_on + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param capture_rx - capture received packets + @param capture_tx - capture transmitted packets + @param capture_drop - capture dropped packets + @param filter - is a filter is being used on this capture + @param preallocate_data - preallocate the data buffer + @param free_data - free the data buffer + @param max_packets - depth of local buffer + @param max_bytes_per_packet - maximum number of bytes to capture + for each packet + @param sw_if_index - specify a given interface, or 0 for any + @param error - filter packets based on a specific error. + @param filename - output filename, will be placed in /tmp +*/ +autoreply define pcap_trace_on +{ + u32 client_index; + u32 context; + bool capture_rx; + bool capture_tx; + bool capture_drop; + bool filter; + bool preallocate_data; + bool free_data; + u32 max_packets [default=1000]; + u32 max_bytes_per_packet [default=512]; + vl_api_interface_index_t sw_if_index; + string error[128]; + string filename[64]; + + option vat_help = "pcap_trace_on [capture_rx] [capture_tx] [capture_drop] [max_packets <nn>] [sw_if_index <sw_if_index>|0 for any] [error <node>.<error>] [filename <name>] [max_bytes_per_packet <nnnn>] [filter] [preallocate_data] [free_data]"; +}; + +autoreply define pcap_trace_off +{ + u32 client_index; + u32 context; +}; + /* * Local Variables: * eval: (c-set-style "gnu") diff --git a/src/vnet/interface.c b/src/vnet/interface.c index dd4399864f7..5fb2ff65fa2 100644 --- a/src/vnet/interface.c +++ b/src/vnet/interface.c @@ -45,11 +45,9 @@ #include <vnet/interface/rx_queue_funcs.h> #include <vnet/interface/tx_queue_funcs.h> -/* *INDENT-OFF* */ VLIB_REGISTER_LOG_CLASS (if_default_log, static) = { .class_name = "interface", }; -/* *INDENT-ON* */ #define log_debug(fmt,...) vlib_log_debug(if_default_log.class, fmt, __VA_ARGS__) #define log_err(fmt,...) vlib_log_err(if_default_log.class, fmt, __VA_ARGS__) @@ -141,15 +139,12 @@ serialize_vnet_interface_state (serialize_main_t * m, va_list * va) /* Serialize hardware interface classes since they may have changed. Must do this before sending up/down flags. */ - /* *INDENT-OFF* */ pool_foreach (hif, im->hw_interfaces) { vnet_hw_interface_class_t * hw_class = vnet_get_hw_interface_class (vnm, hif->hw_class_index); serialize_cstring (m, hw_class->name); } - /* *INDENT-ON* */ /* Send sw/hw interface state when non-zero. */ - /* *INDENT-OFF* */ pool_foreach (sif, im->sw_interfaces) { if (sif->flags != 0) { @@ -158,14 +153,12 @@ serialize_vnet_interface_state (serialize_main_t * m, va_list * va) st->flags = sif->flags; } } - /* *INDENT-ON* */ vec_serialize (m, sts, serialize_vec_vnet_sw_hw_interface_state); if (sts) vec_set_len (sts, 0); - /* *INDENT-OFF* */ pool_foreach (hif, im->hw_interfaces) { if (hif->flags != 0) { @@ -174,7 +167,6 @@ serialize_vnet_interface_state (serialize_main_t * m, va_list * va) st->flags = vnet_hw_interface_flags_to_sw(hif->flags); } } - /* *INDENT-ON* */ vec_serialize (m, sts, serialize_vec_vnet_sw_hw_interface_state); @@ -206,7 +198,6 @@ unserialize_vnet_interface_state (serialize_main_t * m, va_list * va) uword *p; clib_error_t *error; - /* *INDENT-OFF* */ pool_foreach (hif, im->hw_interfaces) { unserialize_cstring (m, &class_name); p = hash_get_mem (im->hw_interface_class_by_name, class_name); @@ -222,7 +213,6 @@ unserialize_vnet_interface_state (serialize_main_t * m, va_list * va) clib_error_report (error); vec_free (class_name); } - /* *INDENT-ON* */ } vec_unserialize (m, &sts, unserialize_vec_vnet_sw_hw_interface_state); @@ -655,6 +645,7 @@ vnet_create_sw_interface (vnet_main_t * vnm, vnet_sw_interface_t * template, /* undo the work done by vnet_create_sw_interface_no_callbacks() */ log_err ("create_sw_interface: set flags failed\n %U", format_clib_error, error); + call_sw_interface_add_del_callbacks (vnm, *sw_if_index, 0); vnet_sw_interface_t *sw = pool_elt_at_index (im->sw_interfaces, *sw_if_index); pool_put (im->sw_interfaces, sw); @@ -776,8 +767,7 @@ vnet_hw_interface_set_max_frame_size (vnet_main_t *vnm, u32 hw_if_index, vnet_hw_interface_class_t *hw_if_class = vnet_get_hw_interface_class (vnm, hi->hw_class_index); clib_error_t *err = 0; - - log_debug ("set_max_frame_size: interface %s, max_frame_size %u -> %u", + log_debug ("set_max_frame_size: interface %v, max_frame_size %u -> %u", hi->name, hi->max_frame_size, fs); if (hw_if_class->set_max_frame_size == 0) @@ -1116,7 +1106,6 @@ vnet_delete_hw_interface (vnet_main_t * vnm, u32 hw_if_index) /* Delete any sub-interfaces. */ { u32 id, sw_if_index; - /* *INDENT-OFF* */ hash_foreach (id, sw_if_index, hw->sub_interface_sw_if_index_by_id, ({ vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index); @@ -1126,7 +1115,6 @@ vnet_delete_hw_interface (vnet_main_t * vnm, u32 hw_if_index) vnet_delete_sw_interface (vnm, sw_if_index); })); hash_free (hw->sub_interface_sw_if_index_by_id); - /* *INDENT-ON* */ } /* Delete software interface corresponding to hardware interface. */ @@ -1177,14 +1165,12 @@ vnet_hw_interface_walk_sw (vnet_main_t * vnm, if (WALK_STOP == fn (vnm, hi->sw_if_index, ctx)) return; - /* *INDENT-OFF* */ hash_foreach (id, sw_if_index, hi->sub_interface_sw_if_index_by_id, ({ if (WALK_STOP == fn (vnm, sw_if_index, ctx)) break; })); - /* *INDENT-ON* */ } void @@ -1196,13 +1182,11 @@ vnet_hw_interface_walk (vnet_main_t * vnm, im = &vnm->interface_main; - /* *INDENT-OFF* */ pool_foreach (hi, im->hw_interfaces) { if (WALK_STOP == fn(vnm, hi->hw_if_index, ctx)) break; } - /* *INDENT-ON* */ } void @@ -1214,13 +1198,11 @@ vnet_sw_interface_walk (vnet_main_t * vnm, im = &vnm->interface_main; - /* *INDENT-OFF* */ pool_foreach (si, im->sw_interfaces) { if (WALK_STOP == fn (vnm, si, ctx)) break; } - /* *INDENT-ON* */ } void @@ -1358,7 +1340,10 @@ vnet_hw_interface_compare (vnet_main_t * vnm, int vnet_sw_interface_is_p2p (vnet_main_t * vnm, u32 sw_if_index) { - vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index); + vnet_sw_interface_t *si = vnet_get_sw_interface_or_null (vnm, sw_if_index); + if (si == NULL) + return -1; + if ((si->type == VNET_SW_INTERFACE_TYPE_P2P) || (si->type == VNET_SW_INTERFACE_TYPE_PIPE)) return 1; @@ -1403,6 +1388,26 @@ vnet_sw_interface_supports_addressing (vnet_main_t *vnm, u32 sw_if_index) return NULL; } +u32 +vnet_register_device_class (vlib_main_t *vm, vnet_device_class_t *c) +{ + vnet_main_t *vnm = vnet_get_main (); + vnet_interface_main_t *im = &vnm->interface_main; + c->index = vec_len (im->device_classes); + hash_set_mem (im->device_class_by_name, c->name, c->index); + + /* to avoid confusion, please remove ".tx_function" statement + from VNET_DEVICE_CLASS() if using function candidates */ + ASSERT (c->tx_fn_registrations == 0 || c->tx_function == 0); + + if (c->tx_fn_registrations) + c->tx_function = + vlib_node_get_preferred_node_fn_variant (vm, c->tx_fn_registrations); + + vec_add1 (im->device_classes, c[0]); + return c->index; +} + clib_error_t * vnet_interface_init (vlib_main_t * vm) { @@ -1449,28 +1454,10 @@ vnet_interface_init (vlib_main_t * vm) im->device_class_by_name = hash_create_string ( /* size */ 0, sizeof (uword)); - { - vnet_device_class_t *c; - - c = vnm->device_class_registrations; - - while (c) - { - c->index = vec_len (im->device_classes); - hash_set_mem (im->device_class_by_name, c->name, c->index); - /* to avoid confusion, please remove ".tx_function" statement - from VNET_DEVICE_CLASS() if using function candidates */ - ASSERT (c->tx_fn_registrations == 0 || c->tx_function == 0); - - if (c->tx_fn_registrations) - c->tx_function = vlib_node_get_preferred_node_fn_variant ( - vm, c->tx_fn_registrations); - - vec_add1 (im->device_classes, c[0]); - c = c->next_class_registration; - } - } + for (vnet_device_class_t *c = vnm->device_class_registrations; c; + c = c->next_class_registration) + vnet_register_device_class (vm, c); im->hw_interface_class_by_name = hash_create_string ( /* size */ 0, sizeof (uword)); @@ -1940,13 +1927,11 @@ done: return error; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (collect_detailed_interface_stats_command, static) = { .path = "interface collect detailed-stats", .short_help = "interface collect detailed-stats <enable|disable>", .function = collect_detailed_interface_stats_cli, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/interface.h b/src/vnet/interface.h index c8fbc61ec7b..f0cb540f979 100644 --- a/src/vnet/interface.h +++ b/src/vnet/interface.h @@ -292,6 +292,8 @@ typedef struct _vnet_device_class } vnet_device_class_t; +u32 vnet_register_device_class (vlib_main_t *, vnet_device_class_t *); + #ifndef CLIB_MARCH_VARIANT #define VNET_DEVICE_CLASS(x,...) \ __VA_ARGS__ vnet_device_class_t x; \ @@ -320,7 +322,8 @@ static __clib_unused vnet_device_class_t __clib_unused_##x #endif #define VNET_DEVICE_CLASS_TX_FN(devclass) \ - uword CLIB_MARCH_SFX (devclass##_tx_fn) (); \ + uword CLIB_MARCH_SFX (devclass##_tx_fn) ( \ + vlib_main_t *, vlib_node_runtime_t *, vlib_frame_t *); \ static vlib_node_fn_registration_t CLIB_MARCH_SFX ( \ devclass##_tx_fn_registration) = { \ .function = &CLIB_MARCH_SFX (devclass##_tx_fn), \ diff --git a/src/vnet/interface/runtime.c b/src/vnet/interface/runtime.c index 5c215e88501..a88a23bd4c9 100644 --- a/src/vnet/interface/runtime.c +++ b/src/vnet/interface/runtime.c @@ -289,10 +289,9 @@ vnet_hw_if_update_runtime_data (vnet_main_t *vnm, u32 hw_if_index) { void *in = rt->rxq_interrupts; int int_num = -1; - while ((int_num = clib_interrupt_get_next (in, int_num)) != - -1) + while ((int_num = clib_interrupt_get_next_and_clear ( + in, int_num)) != -1) { - clib_interrupt_clear (in, int_num); pending_int = clib_bitmap_set (pending_int, int_num, 1); last_int = clib_max (last_int, int_num); } diff --git a/src/vnet/interface/rx_queue.c b/src/vnet/interface/rx_queue.c index cec0296519c..b1fc82f38e9 100644 --- a/src/vnet/interface/rx_queue.c +++ b/src/vnet/interface/rx_queue.c @@ -124,7 +124,10 @@ vnet_hw_if_unregister_all_rx_queues (vnet_main_t *vnm, u32 hw_if_index) vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index); vnet_interface_main_t *im = &vnm->interface_main; vnet_hw_if_rx_queue_t *rxq; + vlib_main_t *vm; + vnet_hw_if_rx_node_runtime_t *rt; u64 key; + u32 queue_index; log_debug ("unregister_all: interface %v", hi->name); @@ -132,6 +135,15 @@ vnet_hw_if_unregister_all_rx_queues (vnet_main_t *vnm, u32 hw_if_index) { rxq = vnet_hw_if_get_rx_queue (vnm, hi->rx_queue_indices[i]); key = rx_queue_key (rxq->hw_if_index, rxq->queue_id); + if (PREDICT_FALSE (rxq->mode == VNET_HW_IF_RX_MODE_INTERRUPT || + rxq->mode == VNET_HW_IF_RX_MODE_ADAPTIVE)) + { + vm = vlib_get_main_by_index (rxq->thread_index); + queue_index = vnet_hw_if_get_rx_queue_index_by_id (vnm, hw_if_index, + rxq->queue_id); + rt = vlib_node_get_runtime_data (vm, hi->input_node_index); + clib_interrupt_clear (rt->rxq_interrupts, queue_index); + } hash_unset_mem_free (&im->rxq_index_by_hw_if_index_and_queue_id, &key); pool_put_index (im->hw_if_rx_queues, hi->rx_queue_indices[i]); @@ -240,14 +252,12 @@ vnet_hw_if_generate_rxq_int_poll_vector (vlib_main_t *vm, vec_reset_length (rt->rxq_vector_int); - while ((int_num = clib_interrupt_get_next (rt->rxq_interrupts, int_num)) != - -1) + while ((int_num = clib_interrupt_get_next_and_clear (rt->rxq_interrupts, + int_num)) != -1) { vnet_hw_if_rx_queue_t *rxq = vnet_hw_if_get_rx_queue (vnm, int_num); vnet_hw_if_rxq_poll_vector_t *pv; - clib_interrupt_clear (rt->rxq_interrupts, int_num); - vec_add2 (rt->rxq_vector_int, pv, 1); pv->dev_instance = rxq->dev_instance; pv->queue_id = rxq->queue_id; diff --git a/src/vnet/interface/stats.c b/src/vnet/interface/stats.c index f58ffa32586..4f3213aafc3 100644 --- a/src/vnet/interface/stats.c +++ b/src/vnet/interface/stats.c @@ -25,6 +25,8 @@ static struct static clib_error_t * statseg_sw_interface_add_del (vnet_main_t *vnm, u32 sw_if_index, u32 is_add) { + u8 *name = 0; + if (if_names == 0) { if_names = vlib_stats_add_string_vector ("/if/names"); @@ -42,7 +44,6 @@ statseg_sw_interface_add_del (vnet_main_t *vnm, u32 sw_if_index, u32 is_add) { vnet_sw_interface_t *si, *si_sup; vnet_hw_interface_t *hi_sup; - u8 *name; si = vnet_get_sw_interface (vnm, sw_if_index); si_sup = vnet_get_sup_sw_interface (vnm, si->sw_if_index); @@ -63,16 +64,18 @@ statseg_sw_interface_add_del (vnet_main_t *vnm, u32 sw_if_index, u32 is_add) ASSERT (index != ~0); vec_add1 (dir_entry_indices[sw_if_index], index); } - - vec_free (name); } else { + name = format (0, "%s", "deleted"); + vlib_stats_set_string_vector (&if_names, sw_if_index, "%v", name); for (u32 i = 0; i < vec_len (dir_entry_indices[sw_if_index]); i++) vlib_stats_remove_entry (dir_entry_indices[sw_if_index][i]); vec_free (dir_entry_indices[sw_if_index]); } + vec_free (name); + vlib_stats_segment_unlock (); return 0; diff --git a/src/vnet/interface/tx_queue.rst b/src/vnet/interface/tx_queue.rst new file mode 100644 index 00000000000..e8f0e039b8e --- /dev/null +++ b/src/vnet/interface/tx_queue.rst @@ -0,0 +1,159 @@ +.. _TX_Queue_doc: + +Transmit Queues +=============== + +Overview +________ + +VPP implements Transmit queues infra to access and manage them. It provides +common registration functions to register or unregister interfaces’ transmit +queues. It also provides functions for queues placement on given thread(s). + +The TXQ Infrastructure +_______________________ + +Infra registers each queue using a unique key which is formed by concatenating +the hardware interface index ``hw_if_index`` and unique queue identifier for +given interface ``queue_id``. As a result of registration of queue, infra +returns back a unique global ``queue_index`` which can be used by driver to +access that queue later. + +Interface output node uses pre-computed ``output_node_thread_runtime`` data +which provides essential information related to queue placements on given +thread of given interface. Transmit queue infra implements an algorithm to +pre-compute this information. It also pre-computes scalar arguments of frame +``vnet_hw_if_tx_frame_t``. It also pre-calculates a ``lookup_table`` for +thread if there are multiple transmit queues are placed on that thread. +Interface drivers call ``vnet_hw_if_update_runtime_data()`` to execute that +algorithm after registering the transmit queues to TXQ infra. + +The algorithm makes the copy of existing runtime data and iterate through them +for each vpp main and worker thread. In each iteration, algorithm loop through +all the tx queues of given interface to fill the information in the frame data +structure ``vnet_hw_if_tx_frame_t``. Algorithm also updates the information +related to number of transmit queues of given interface on given vpp thread in +data structure ``output_node_thread_runtime``. As a consequence of any update +to the copy, triggers the function to update the actual working copy by taking +the worker barrier and free the old copy of ``output_node_thread_runtime``. + +Multi-TXQ infra +^^^^^^^^^^^^^^^ + +Interface output node uses packet flow hash using hash infra in case of multi-txq +on given thread. Each hardware interface class contains type of the hash required +for interfaces from that hardware interface class i.e. ethernet interface hardware +class contains type ``VNET_HASH_FN_TYPE_ETHERNET``. Though, the hash function +itself is contained by hardware interface data structure of given interface. Default +hashing function is selected upon interface creation based on priority. User can +configure a different hash to an interface for multi-txq use case. + +Interface output node uses packet flow hash as an index to the pre-calculated lookup +table to get the queue identifier for given transmit queue. Interface output node +enqueues the packets to respective frame and also copies the ``vnet_hw_if_tx_frame_t`` +to frame scalar arguments. Drivers use scalar arguments ``vnet_hw_if_tx_frame_t`` +of the given frame to extract the information about the transmit queue to be used to +transmit the packets. Drivers may need to acquire a lock on given queue before +transmitting the packets based on the ``shared_queue`` bit status. + +Data structures +^^^^^^^^^^^^^^^ + +Queue information is stored in data structure ``vnet_hw_if_tx_queue_t``: + +.. code:: c + + typedef struct + { + /* either this queue is shared among multiple threads */ + u8 shared_queue : 1; + /* hw interface index */ + u32 hw_if_index; + + /* hardware queue identifier */ + u32 queue_id; + + /* bitmap of threads which use this queue */ + clib_bitmap_t *threads; + } vnet_hw_if_tx_queue_t; + + +Frame information is stored in data structure: ``vnet_hw_if_tx_frame_t``: + +.. code:: c + + typedef enum + { + VNET_HW_IF_TX_FRAME_HINT_NOT_CHAINED = (1 << 0), + VNET_HW_IF_TX_FRAME_HINT_NO_GSO = (1 << 1), + VNET_HW_IF_TX_FRAME_HINT_NO_CKSUM_OFFLOAD = (1 << 2), + } vnet_hw_if_tx_frame_hint_t; + + typedef struct + { + u8 shared_queue : 1; + vnet_hw_if_tx_frame_hint_t hints : 16; + u32 queue_id; + } vnet_hw_if_tx_frame_t; + +Output node runtime information is stored in data structure: ``output_node_thread_runtime``: + +.. code:: c + + typedef struct + { + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + vnet_hw_if_tx_frame_t *frame; + u32 *lookup_table; + u32 n_queues; + } vnet_hw_if_output_node_runtime_t; + + +MultiTXQ API +^^^^^^^^^^^^ + +This API message is used to place tx queue of an interface to vpp main or worker(s) thread(s). + +.. code:: c + + autoendian autoreply define sw_interface_set_tx_placement + { + u32 client_index; + u32 context; + vl_api_interface_index_t sw_if_index; + u32 queue_id; + u32 array_size; + u32 threads[array_size]; + option vat_help = "<interface | sw_if_index <index>> queue <n> [threads <list> | mask <hex>]"; + }; + +Multi-TXQ CLI +^^^^^^^^^^^^^ + +:: + + set interface tx-queue set interface tx-queue <interface> queue <n> [threads <list>] + set interface tx-hash set interface tx-hash <interface> hash-name <hash-name> + +:: + + show hardware-interfaces + + Name Idx Link Hardware + tap0 1 up tap0 + Link speed: unknown + RX Queues: + queue thread mode + 0 main (0) polling + TX Queues: + TX Hash: [name: crc32c-5tuple priority: 50 description: IPv4/IPv6 header and TCP/UDP ports] + queue shared thread(s) + 0 no 0 + Ethernet address 02:fe:27:69:5a:b5 + VIRTIO interface + instance 0 + RX QUEUE : Total Packets + 0 : 0 + TX QUEUE : Total Packets + 0 : 0 + diff --git a/src/vnet/interface_api.c b/src/vnet/interface_api.c index 5766f2ca21f..c727e519138 100644 --- a/src/vnet/interface_api.c +++ b/src/vnet/interface_api.c @@ -17,6 +17,9 @@ *------------------------------------------------------------------ */ +#define _GNU_SOURCE +#include <string.h> + #include <vnet/vnet.h> #include <vlibmemory/api.h> @@ -384,8 +387,6 @@ vl_api_sw_interface_dump_t_handler (vl_api_sw_interface_dump_t * mp) vec_add1 (filter, 0); /* Ensure it's a C string for strcasecmp() */ } - char *strcasestr (char *, char *); /* lnx hdr file botch */ - /* *INDENT-OFF* */ pool_foreach (swif, im->sw_interfaces) { if (!vnet_swif_is_api_visible (swif)) @@ -399,7 +400,6 @@ vl_api_sw_interface_dump_t_handler (vl_api_sw_interface_dump_t * mp) send_sw_interface_details (am, rp, swif, name, mp->context); } - /* *INDENT-ON* */ vec_free (name); vec_free (filter); @@ -808,14 +808,12 @@ link_state_process (vlib_main_t * vm, if (event_by_sw_if_index[i] == 0) continue; - /* *INDENT-OFF* */ pool_foreach (reg, vam->interface_events_registrations) { vl_reg = vl_api_client_index_to_registration (reg->client_index); if (vl_reg) send_sw_interface_event (vam, reg, vl_reg, i, event_by_sw_if_index[i]); } - /* *INDENT-ON* */ } vec_reset_length (event_by_sw_if_index); } @@ -831,13 +829,11 @@ static clib_error_t *sw_interface_add_del_function (vnet_main_t * vm, u32 sw_if_index, u32 flags); -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (link_state_process_node,static) = { .function = link_state_process, .type = VLIB_NODE_TYPE_PROCESS, .name = "vpe-link-state-process", }; -/* *INDENT-ON* */ VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (admin_up_down_function); VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION (link_up_down_function); @@ -1024,21 +1020,19 @@ vl_api_sw_interface_set_interface_name_t_handler ( { vl_api_sw_interface_set_interface_name_reply_t *rmp; vnet_main_t *vnm = vnet_get_main (); - u32 sw_if_index = ntohl (mp->sw_if_index); - vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index); clib_error_t *error; int rv = 0; + VALIDATE_SW_IF_INDEX (mp); + + u32 sw_if_index = ntohl (mp->sw_if_index); + vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index); + if (mp->name[0] == 0) { rv = VNET_API_ERROR_INVALID_VALUE; goto out; } - if (si == 0) - { - rv = VNET_API_ERROR_INVALID_SW_IF_INDEX; - goto out; - } error = vnet_rename_interface (vnm, si->hw_if_index, (char *) mp->name); if (error) @@ -1048,6 +1042,7 @@ vl_api_sw_interface_set_interface_name_t_handler ( } out: + BAD_SW_IF_INDEX_LABEL; REPLY_MACRO (VL_API_SW_INTERFACE_SET_INTERFACE_NAME_REPLY); } @@ -1214,7 +1209,7 @@ out: static void send_interface_tx_placement_details (vnet_hw_if_tx_queue_t **all_queues, u32 index, vl_api_registration_t *rp, - u32 native_context) + u32 context) { vnet_main_t *vnm = vnet_get_main (); vl_api_sw_interface_tx_placement_details_t *rmp; @@ -1223,29 +1218,24 @@ send_interface_tx_placement_details (vnet_hw_if_tx_queue_t **all_queues, uword *bitmap = q[0]->threads; u32 hw_if_index = q[0]->hw_if_index; vnet_hw_interface_t *hw_if = vnet_get_hw_interface (vnm, hw_if_index); - u32 context = clib_host_to_net_u32 (native_context); n_bits = clib_bitmap_count_set_bits (bitmap); u32 n = n_bits * sizeof (u32); - /* - * FIXME: Use the REPLY_MACRO_DETAILS5_END once endian handler is registered - * and available. - */ - REPLY_MACRO_DETAILS5 ( - VL_API_SW_INTERFACE_TX_PLACEMENT_DETAILS, n, rp, context, ({ - rmp->sw_if_index = clib_host_to_net_u32 (hw_if->sw_if_index); - rmp->queue_id = clib_host_to_net_u32 (q[0]->queue_id); - rmp->shared = q[0]->shared_queue; - rmp->array_size = clib_host_to_net_u32 (n_bits); - - v = clib_bitmap_first_set (bitmap); - for (u32 i = 0; i < n_bits; i++) - { - rmp->threads[i] = clib_host_to_net_u32 (v); - v = clib_bitmap_next_set (bitmap, v + 1); - } - })); + REPLY_MACRO_DETAILS5_END (VL_API_SW_INTERFACE_TX_PLACEMENT_DETAILS, n, rp, + context, ({ + rmp->sw_if_index = hw_if->sw_if_index; + rmp->queue_id = q[0]->queue_id; + rmp->shared = q[0]->shared_queue; + rmp->array_size = n_bits; + + v = clib_bitmap_first_set (bitmap); + for (u32 i = 0; i < n_bits; i++) + { + rmp->threads[i] = v; + v = clib_bitmap_next_set (bitmap, v + 1); + } + })); } static void @@ -1480,12 +1470,10 @@ vl_api_create_subif_t_handler (vl_api_create_subif_t * mp) BAD_SW_IF_INDEX_LABEL; - /* *INDENT-OFF* */ REPLY_MACRO2(VL_API_CREATE_SUBIF_REPLY, ({ rmp->sw_if_index = ntohl(sub_sw_if_index); })); - /* *INDENT-ON* */ } static void @@ -1527,12 +1515,10 @@ vl_api_create_loopback_t_handler (vl_api_create_loopback_t * mp) mac_address_decode (mp->mac_address, &mac); rv = vnet_create_loopback_interface (&sw_if_index, (u8 *) & mac, 0, 0); - /* *INDENT-OFF* */ REPLY_MACRO2(VL_API_CREATE_LOOPBACK_REPLY, ({ rmp->sw_if_index = ntohl (sw_if_index); })); - /* *INDENT-ON* */ } static void vl_api_create_loopback_instance_t_handler @@ -1549,12 +1535,10 @@ static void vl_api_create_loopback_instance_t_handler rv = vnet_create_loopback_interface (&sw_if_index, (u8 *) & mac, is_specified, user_instance); - /* *INDENT-OFF* */ REPLY_MACRO2(VL_API_CREATE_LOOPBACK_INSTANCE_REPLY, ({ rmp->sw_if_index = ntohl (sw_if_index); })); - /* *INDENT-ON* */ } static void @@ -1608,6 +1592,92 @@ static void REPLY_MACRO (VL_API_SW_INTERFACE_ADDRESS_REPLACE_END_REPLY); } +static void +vl_api_pcap_set_filter_function_t_handler ( + vl_api_pcap_set_filter_function_t *mp) +{ + vnet_main_t *vnm = vnet_get_main (); + vnet_pcap_t *pp = &vnm->pcap; + vl_api_pcap_set_filter_function_reply_t *rmp; + unformat_input_t input = { 0 }; + vlib_is_packet_traced_fn_t *f; + char *filter_name; + int rv = 0; + filter_name = vl_api_from_api_to_new_c_string (&mp->filter_function_name); + unformat_init_cstring (&input, filter_name); + if (unformat (&input, "%U", unformat_vlib_trace_filter_function, &f) == 0) + { + rv = -1; + goto done; + } + + pp->current_filter_function = f; + +done: + unformat_free (&input); + vec_free (filter_name); + REPLY_MACRO (VL_API_PCAP_SET_FILTER_FUNCTION_REPLY); +} + +static void +vl_api_pcap_trace_on_t_handler (vl_api_pcap_trace_on_t *mp) +{ + vl_api_pcap_trace_on_reply_t *rmp; + unformat_input_t filename, drop_err_name; + vnet_pcap_dispatch_trace_args_t capture_args; + int rv = 0; + + VALIDATE_SW_IF_INDEX (mp); + + unformat_init_cstring (&filename, (char *) mp->filename); + if (!unformat_user (&filename, unformat_vlib_tmpfile, + &capture_args.filename)) + { + rv = VNET_API_ERROR_ILLEGAL_NAME; + goto out; + } + + capture_args.rx_enable = mp->capture_rx; + capture_args.tx_enable = mp->capture_tx; + capture_args.preallocate_data = mp->preallocate_data; + capture_args.free_data = mp->free_data; + capture_args.drop_enable = mp->capture_drop; + capture_args.status = 0; + capture_args.packets_to_capture = ntohl (mp->max_packets); + capture_args.sw_if_index = ntohl (mp->sw_if_index); + capture_args.filter = mp->filter; + capture_args.max_bytes_per_pkt = ntohl (mp->max_bytes_per_packet); + capture_args.drop_err = ~0; + + unformat_init_cstring (&drop_err_name, (char *) mp->error); + unformat_user (&drop_err_name, unformat_vlib_error, vlib_get_main (), + &capture_args.drop_err); + + rv = vnet_pcap_dispatch_trace_configure (&capture_args); + + BAD_SW_IF_INDEX_LABEL; + +out: + unformat_free (&filename); + unformat_free (&drop_err_name); + + REPLY_MACRO (VL_API_PCAP_TRACE_ON_REPLY); +} + +static void +vl_api_pcap_trace_off_t_handler (vl_api_pcap_trace_off_t *mp) +{ + vl_api_pcap_trace_off_reply_t *rmp; + vnet_pcap_dispatch_trace_args_t capture_args; + int rv = 0; + + clib_memset (&capture_args, 0, sizeof (capture_args)); + + rv = vnet_pcap_dispatch_trace_configure (&capture_args); + + REPLY_MACRO (VL_API_PCAP_TRACE_OFF_REPLY); +} + /* * vpe_api_hookup * Add vpe's API message handlers to the table. diff --git a/src/vnet/interface_cli.c b/src/vnet/interface_cli.c index 3515c395e53..c56eb9777cf 100644 --- a/src/vnet/interface_cli.c +++ b/src/vnet/interface_cli.c @@ -54,6 +54,9 @@ #include <vnet/interface/rx_queue_funcs.h> #include <vnet/interface/tx_queue_funcs.h> #include <vnet/hash/hash.h> +#include <vnet/dev/dev.h> +#include <vnet/dev/dev_funcs.h> + static int compare_interface_names (void *a1, void *a2) { @@ -146,14 +149,12 @@ skip_unformat: vlib_cli_output (vm, "%U\n", format_vnet_hw_interface, vnm, hi, verbose); - /* *INDENT-OFF* */ clib_bitmap_foreach (hw_idx, hi->bond_info) { shi = vnet_get_hw_interface(vnm, hw_idx); vlib_cli_output (vm, "%U\n", format_vnet_hw_interface, vnm, shi, verbose); } - /* *INDENT-ON* */ } } } @@ -247,14 +248,12 @@ clear_hw_interfaces (vlib_main_t * vm, * cpu socket 0 * @cliexend ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_hw_interfaces_command, static) = { .path = "show hardware-interfaces", .short_help = "show hardware-interfaces [brief|verbose|detail] [bond] " "[<interface> [<interface> [..]]] [<sw_idx> [<sw_idx> [..]]]", .function = show_hw_interfaces, }; -/* *INDENT-ON* */ /*? @@ -268,14 +267,12 @@ VLIB_CLI_COMMAND (show_hw_interfaces_command, static) = { * name and software index (where 2 is the software index): * @cliexcmd{clear hardware-interfaces GigabitEthernet7/0/0 2} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (clear_hw_interface_counters_command, static) = { .path = "clear hardware-interfaces", .short_help = "clear hardware-interfaces " "[<interface> [<interface> [..]]] [<sw_idx> [<sw_idx> [..]]]", .function = clear_hw_interfaces, }; -/* *INDENT-ON* */ static int sw_interface_name_compare (void *a1, void *a2) @@ -417,14 +414,12 @@ show_sw_interfaces (vlib_main_t * vm, sorted_sis = vec_new (vnet_sw_interface_t, pool_elts (im->sw_interfaces)); vec_set_len (sorted_sis, 0); - /* *INDENT-OFF* */ pool_foreach (si, im->sw_interfaces) { int visible = vnet_swif_is_api_visible (si); if (visible) vec_add1 (sorted_sis, si[0]); } - /* *INDENT-ON* */ /* Sort by name. */ vec_sort_with_function (sorted_sis, sw_interface_name_compare); } @@ -466,7 +461,6 @@ show_sw_interfaces (vlib_main_t * vm, /* Display any L2 info */ vlib_cli_output (vm, "%U", format_l2_input, si->sw_if_index); - /* *INDENT-OFF* */ /* Display any IP4 addressing info */ foreach_ip_interface_address (lm4, ia, si->sw_if_index, 1 /* honor unnumbered */, @@ -481,9 +475,7 @@ show_sw_interfaces (vlib_main_t * vm, vlib_cli_output (vm, " L3 %U/%d", format_ip4_address, r4, ia->address_length); })); - /* *INDENT-ON* */ - /* *INDENT-OFF* */ /* Display any IP6 addressing info */ foreach_ip_interface_address (lm6, ia, si->sw_if_index, 1 /* honor unnumbered */, @@ -498,7 +490,6 @@ show_sw_interfaces (vlib_main_t * vm, vlib_cli_output (vm, " L3 %U/%d", format_ip6_address, r6, ia->address_length); })); - /* *INDENT-ON* */ } } else @@ -514,29 +505,24 @@ done: return error; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_sw_interfaces_command, static) = { .path = "show interface", - .short_help = "show interface [address|addr|features|feat|vtr] [<interface> [<interface> [..]]] [verbose]", + .short_help = "show interface [address|addr|features|feat|vtr|tag] " + "[<interface> [<interface> [..]]] [verbose]", .function = show_sw_interfaces, .is_mp_safe = 1, }; -/* *INDENT-ON* */ /* Root of all interface commands. */ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (vnet_cli_interface_command, static) = { .path = "interface", .short_help = "Interface commands", }; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (vnet_cli_set_interface_command, static) = { .path = "set interface", .short_help = "Interface commands", }; -/* *INDENT-ON* */ static clib_error_t * clear_interface_counters (vlib_main_t * vm, @@ -577,13 +563,11 @@ clear_interface_counters (vlib_main_t * vm, * Example of how to clear the statistics for all interfaces: * @cliexcmd{clear interfaces} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (clear_interface_counters_command, static) = { .path = "clear interfaces", .short_help = "clear interfaces", .function = clear_interface_counters, }; -/* *INDENT-ON* */ /** * Parse subinterface names. @@ -908,7 +892,6 @@ done: * @cliexcmd{set interface GigabitEthernet2/0/0.7 up} * @endparblock ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (create_sub_interfaces_command, static) = { .path = "create sub-interfaces", .short_help = "create sub-interfaces <interface> " @@ -917,7 +900,6 @@ VLIB_CLI_COMMAND (create_sub_interfaces_command, static) = { "{<subId> dot1q|dot1ad <vlanId>|any [inner-dot1q <vlanId>|any] [exact-match]}", .function = create_sub_interfaces, }; -/* *INDENT-ON* */ static clib_error_t * set_state (vlib_main_t * vm, @@ -966,13 +948,11 @@ done: '<em>down</em>': * @cliexcmd{set interface state GigabitEthernet2/0/0 down} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (set_state_command, static) = { .path = "set interface state", .short_help = "set interface state <interface> [up|down|punt|enable]", .function = set_state, }; -/* *INDENT-ON* */ static clib_error_t * set_unnumbered (vlib_main_t * vm, @@ -1022,13 +1002,11 @@ set_unnumbered (vlib_main_t * vm, return (NULL); } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (set_unnumbered_command, static) = { .path = "set interface unnumbered", .short_help = "set interface unnumbered [<interface> use <interface> | del <interface>]", .function = set_unnumbered, }; -/* *INDENT-ON* */ @@ -1065,13 +1043,11 @@ done: return error; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (set_hw_class_command, static) = { .path = "set interface hw-class", .short_help = "Set interface hardware class", .function = set_hw_class, }; -/* *INDENT-ON* */ static clib_error_t * vnet_interface_cli_init (vlib_main_t * vm) @@ -1115,13 +1091,11 @@ renumber_interface_command_fn (vlib_main_t * vm, } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (renumber_interface_command, static) = { .path = "renumber interface", .short_help = "renumber interface <interface> <new-dev-instance>", .function = renumber_interface_command_fn, }; -/* *INDENT-ON* */ static clib_error_t * promiscuous_cmd (vlib_main_t * vm, @@ -1151,13 +1125,11 @@ promiscuous_cmd (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (set_interface_promiscuous_cmd, static) = { .path = "set interface promiscuous", .short_help = "set interface promiscuous [on|off] <interface>", .function = promiscuous_cmd, }; -/* *INDENT-ON* */ static clib_error_t * mtu_cmd (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) @@ -1208,13 +1180,11 @@ done: return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (set_interface_mtu_cmd, static) = { .path = "set interface mtu", .short_help = "set interface mtu [packet|ip4|ip6|mpls] <value> <interface>", .function = mtu_cmd, }; -/* *INDENT-ON* */ static clib_error_t * show_interface_sec_mac_addr_fn (vlib_main_t * vm, unformat_input_t * input, @@ -1238,14 +1208,12 @@ show_interface_sec_mac_addr_fn (vlib_main_t * vm, unformat_input_t * input, sorted_sis = vec_new (vnet_sw_interface_t, pool_elts (im->sw_interfaces)); vec_set_len (sorted_sis, 0); - /* *INDENT-OFF* */ pool_foreach (si, im->sw_interfaces) { int visible = vnet_swif_is_api_visible (si); if (visible) vec_add1 (sorted_sis, si[0]); } - /* *INDENT-ON* */ /* Sort by name. */ vec_sort_with_function (sorted_sis, sw_interface_name_compare); } @@ -1286,13 +1254,11 @@ show_interface_sec_mac_addr_fn (vlib_main_t * vm, unformat_input_t * input, * @cliexstart{show interface secondary-mac-address} * @cliexend ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_interface_sec_mac_addr, static) = { .path = "show interface secondary-mac-address", .short_help = "show interface secondary-mac-address [<interface>]", .function = show_interface_sec_mac_addr_fn, }; -/* *INDENT-ON* */ static clib_error_t * interface_add_del_mac_address (vlib_main_t * vm, unformat_input_t * input, @@ -1360,13 +1326,11 @@ done: * @cliexcmd{set interface secondary-mac-address GigabitEthernet0/8/0 aa:bb:cc:dd:ee:01 del} * @endparblock ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (interface_add_del_mac_address_cmd, static) = { .path = "set interface secondary-mac-address", .short_help = "set interface secondary-mac-address <interface> <mac-address> [(add|del)]", .function = interface_add_del_mac_address, }; -/* *INDENT-ON* */ static clib_error_t * set_interface_mac_address (vlib_main_t * vm, unformat_input_t * input, @@ -1410,13 +1374,11 @@ done: * @cliexcmd{set interface mac address pg0 aa:bb:cc:dd:ee:04} * @endparblock ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (set_interface_mac_address_cmd, static) = { .path = "set interface mac address", .short_help = "set interface mac address <interface> <mac-address>", .function = set_interface_mac_address, }; -/* *INDENT-ON* */ static clib_error_t * set_tag (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) @@ -1435,13 +1397,11 @@ set_tag (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (set_tag_command, static) = { .path = "set interface tag", .short_help = "set interface tag <interface> <tag>", .function = set_tag, }; -/* *INDENT-ON* */ static clib_error_t * clear_tag (vlib_main_t * vm, unformat_input_t * input, @@ -1459,13 +1419,11 @@ clear_tag (vlib_main_t * vm, unformat_input_t * input, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (clear_tag_command, static) = { .path = "clear interface tag", .short_help = "clear interface tag <interface>", .function = clear_tag, }; -/* *INDENT-ON* */ static clib_error_t * set_ip_directed_broadcast (vlib_main_t * vm, @@ -1499,13 +1457,11 @@ set_ip_directed_broadcast (vlib_main_t * vm, * subnet broadcast address will be sent L2 broadcast on the interface, * otherwise it is dropped. ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (set_ip_directed_broadcast_command, static) = { .path = "set interface ip directed-broadcast", .short_help = "set interface enable <interface> <enable|disable>", .function = set_ip_directed_broadcast, }; -/* *INDENT-ON* */ clib_error_t * set_hw_interface_change_rx_mode (vnet_main_t * vnm, u32 hw_if_index, @@ -1515,6 +1471,33 @@ set_hw_interface_change_rx_mode (vnet_main_t * vnm, u32 hw_if_index, clib_error_t *error = 0; vnet_hw_interface_t *hw; u32 *queue_indices = 0; + vnet_dev_port_t *port; + + port = vnet_dev_get_port_from_hw_if_index (hw_if_index); + + if (port) + { + vlib_main_t *vm = vlib_get_main (); + vnet_dev_rv_t rv; + + vnet_dev_port_cfg_change_req_t req = { + .type = mode == VNET_HW_IF_RX_MODE_POLLING ? + VNET_DEV_PORT_CFG_RXQ_INTR_MODE_DISABLE : + VNET_DEV_PORT_CFG_RXQ_INTR_MODE_ENABLE, + .queue_id = queue_id_valid ? queue_id : 0, + .all_queues = queue_id_valid ? 0 : 1, + }; + + if ((rv = vnet_dev_port_cfg_change_req_validate (vm, port, &req))) + return vnet_dev_port_err ( + vm, port, rv, "rx queue interupt mode enable/disable not supported"); + + if ((rv = vnet_dev_process_port_cfg_change_req (vm, port, &req))) + return vnet_dev_port_err ( + vm, port, rv, + "device failed to enable/disable queue interrupt mode"); + return 0; + } hw = vnet_get_hw_interface (vnm, hw_if_index); @@ -1634,13 +1617,11 @@ set_interface_rx_mode (vlib_main_t * vm, unformat_input_t * input, * VirtualEthernet0/0/13 queue 3 (polling) * @cliexend ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (cmd_set_if_rx_mode,static) = { .path = "set interface rx-mode", .short_help = "set interface rx-mode <interface> [queue <n>] [polling | interrupt | adaptive]", .function = set_interface_rx_mode, }; -/* *INDENT-ON* */ static clib_error_t * show_interface_rx_placement_fn (vlib_main_t * vm, unformat_input_t * input, @@ -1706,13 +1687,11 @@ show_interface_rx_placement_fn (vlib_main_t * vm, unformat_input_t * input, * VirtualEthernet0/0/13 queue 3 (polling) * @cliexend ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_interface_rx_placement, static) = { .path = "show interface rx-placement", .short_help = "show interface rx-placement", .function = show_interface_rx_placement_fn, }; -/* *INDENT-ON* */ clib_error_t * set_hw_interface_rx_placement (u32 hw_if_index, u32 queue_id, u32 thread_index, u8 is_main) @@ -1837,7 +1816,6 @@ set_interface_rx_placement (vlib_main_t *vm, unformat_input_t *input, * VirtualEthernet0/0/13 queue 3 (polling) * @cliexend ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (cmd_set_if_rx_placement,static) = { .path = "set interface rx-placement", .short_help = "set interface rx-placement <interface> [queue <n>] " @@ -1845,7 +1823,6 @@ VLIB_CLI_COMMAND (cmd_set_if_rx_placement,static) = { .function = set_interface_rx_placement, .is_mp_safe = 1, }; -/* *INDENT-ON* */ int set_hw_interface_tx_queue (u32 hw_if_index, u32 queue_id, uword *bitmap) @@ -2030,13 +2007,11 @@ done: * @cliexstart{set interface rss queues VirtualFunctionEthernet18/1/0 list 0,2-5,7} * @cliexend ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (cmd_set_interface_rss_queues,static) = { .path = "set interface rss queues", .short_help = "set interface rss queues <interface> <list <queue-list>>", .function = set_interface_rss_queues_fn, }; -/* *INDENT-ON* */ static u8 * format_vnet_pcap (u8 * s, va_list * args) @@ -2384,13 +2359,13 @@ pcap_trace_command_fn (vlib_main_t * vm, * packet capture are preserved, so '<em>any</em>' can be used to reset * the interface setting. * - * - <b>filter</b> - Use the pcap rx / tx / drop trace filter, which + * - <b>filter</b> - Use the pcap trace rx / tx / drop filter, which * must be configured. Use <b>classify filter pcap...</b> to configure the * filter. The filter will only be executed if the per-interface or * any-interface tests fail. * * - <b>error <node>.<error></b> - filter packets based on a specific error. - * For example: error {ip4-udp-lookup}.{No listener for dst port} + * For example: error {ip4-udp-lookup}.{no_listener} * * - <b>file <name></b> - Used to specify the output filename. The file will * be placed in the '<em>/tmp</em>' directory, so only the filename is @@ -2426,7 +2401,6 @@ pcap_trace_command_fn (vlib_main_t * vm, * saved to /tmp/vppTest.pcap... * @cliexend ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (pcap_tx_trace_command, static) = { .path = "pcap trace", @@ -2436,7 +2410,72 @@ VLIB_CLI_COMMAND (pcap_tx_trace_command, static) = { " [preallocate-data][free-data]", .function = pcap_trace_command_fn, }; -/* *INDENT-ON* */ + +static clib_error_t * +set_pcap_filter_function (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + vnet_pcap_t *pp = &vnet_get_main ()->pcap; + unformat_input_t _line_input, *line_input = &_line_input; + vlib_is_packet_traced_fn_t *res = 0; + clib_error_t *error = 0; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != (uword) UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "%U", unformat_vlib_trace_filter_function, + &res)) + ; + else + { + error = clib_error_create ( + "expected valid trace filter function, got `%U'", + format_unformat_error, line_input); + goto done; + } + } + pp->current_filter_function = res; + +done: + unformat_free (line_input); + + return error; +} + +VLIB_CLI_COMMAND (set_pcap_filter_function_cli, static) = { + .path = "set pcap filter function", + .short_help = "set pcap filter function <func_name>", + .function = set_pcap_filter_function, +}; + +static clib_error_t * +show_pcap_filter_function (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + vnet_pcap_t *pp = &vnet_get_main ()->pcap; + vlib_trace_filter_main_t *tfm = &vlib_trace_filter_main; + vlib_is_packet_traced_fn_t *current_trace_filter_fn = + pp->current_filter_function; + vlib_trace_filter_function_registration_t *reg = + tfm->trace_filter_registration; + + while (reg) + { + vlib_cli_output (vm, "%sname:%s description: %s priority: %u", + reg->function == current_trace_filter_fn ? "(*) " : "", + reg->name, reg->description, reg->priority); + reg = reg->next; + } + return 0; +} + +VLIB_CLI_COMMAND (show_pcap_filter_function_cli, static) = { + .path = "show pcap filter function", + .short_help = "show pcap filter function", + .function = show_pcap_filter_function, +}; static clib_error_t * set_interface_name (vlib_main_t *vm, unformat_input_t *input, diff --git a/src/vnet/interface_format.c b/src/vnet/interface_format.c index 0c051dd4757..0eff8c4597c 100644 --- a/src/vnet/interface_format.c +++ b/src/vnet/interface_format.c @@ -143,11 +143,9 @@ format_vnet_hw_interface_rss_queues (u8 * s, va_list * args) if (bitmap) { - /* *INDENT-OFF* */ clib_bitmap_foreach (i, bitmap) { s = format (s, "%u ", i); } - /* *INDENT-ON* */ } return s; @@ -290,7 +288,7 @@ format_vnet_sw_if_index_name (u8 * s, va_list * args) if (NULL == si) { - return format (s, "DELETED"); + return format (s, "DELETED (%u)", sw_if_index); } return format (s, "%U", format_vnet_sw_interface_name, vnm, si); } @@ -305,7 +303,7 @@ format_vnet_hw_if_index_name (u8 * s, va_list * args) hi = vnet_get_hw_interface (vnm, hw_if_index); if (hi == 0) - return format (s, "DELETED"); + return format (s, "DELETED (%u)", hw_if_index); return format (s, "%v", hi->name); } diff --git a/src/vnet/interface_funcs.h b/src/vnet/interface_funcs.h index 02d80996a15..511df4920e4 100644 --- a/src/vnet/interface_funcs.h +++ b/src/vnet/interface_funcs.h @@ -483,12 +483,14 @@ unformat_function_t unformat_vnet_sw_interface_flags; format_function_t format_vtr; /* Node runtime for interface output function. */ +struct vnet_dev_tx_queue; typedef struct { u32 hw_if_index; u32 sw_if_index; u32 dev_instance; - u32 is_deleted; + u8 is_deleted; + struct vnet_dev_tx_queue *tx_queue; } vnet_interface_output_runtime_t; /* Interface output function. */ diff --git a/src/vnet/interface_output.c b/src/vnet/interface_output.c index a19bbb867c3..47844dcd68a 100644 --- a/src/vnet/interface_output.c +++ b/src/vnet/interface_output.c @@ -85,9 +85,8 @@ format_vnet_interface_output_trace (u8 * s, va_list * va) else { si = vnet_get_sw_interface (vnm, t->sw_if_index); - s = - format (s, "%U ", format_vnet_sw_interface_name, vnm, si, - t->flags); + s = format (s, "%U flags 0x%08x", format_vnet_sw_interface_name, vnm, + si, t->flags); } s = format (s, "\n%U%U", format_white_space, indent, @@ -1222,7 +1221,6 @@ VLIB_NODE_FN (interface_punt) (vlib_main_t * vm, return interface_drop_punt (vm, node, frame, VNET_ERROR_DISPOSITION_PUNT); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (interface_drop) = { .name = "error-drop", .vector_size = sizeof (u32), @@ -1233,9 +1231,7 @@ VLIB_REGISTER_NODE (interface_drop) = { [0] = "drop", }, }; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (interface_punt) = { .name = "error-punt", .vector_size = sizeof (u32), @@ -1246,7 +1242,6 @@ VLIB_REGISTER_NODE (interface_punt) = { [0] = "punt", }, }; -/* *INDENT-ON* */ VLIB_REGISTER_NODE (vnet_per_buffer_interface_output_node) = { .name = "interface-output", diff --git a/src/vnet/interface_stats.c b/src/vnet/interface_stats.c index 3afde0ea54f..ff1a2af9130 100644 --- a/src/vnet/interface_stats.c +++ b/src/vnet/interface_stats.c @@ -170,7 +170,6 @@ VLIB_NODE_FN (stats_collect_tx_node) (vlib_main_t * vm, return stats_collect_inline (vm, node, frame, VLIB_TX); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (stats_collect_rx_node) = { .vector_size = sizeof (u32), .format_trace = format_stats_collect_trace, @@ -201,7 +200,6 @@ VNET_FEATURE_INIT (stats_collect_tx_node, static) = { .runs_before = VNET_FEATURES ("interface-output-arc-end"), }; -/* *INDENT-ON* */ static clib_error_t * stats_collect_init (vlib_main_t * vm) diff --git a/src/vnet/interface_test.c b/src/vnet/interface_test.c index c3ddcd74cc4..2d0c0ee81d1 100644 --- a/src/vnet/interface_test.c +++ b/src/vnet/interface_test.c @@ -1283,6 +1283,30 @@ api_sw_interface_set_interface_name (vat_main_t *vam) return -1; } +static int +api_pcap_set_filter_function (vat_main_t *vam) +{ + vl_api_pcap_set_filter_function_t *mp; + int ret; + + M (PCAP_SET_FILTER_FUNCTION, mp); + S (mp); + W (ret); + return ret; +} + +static int +api_pcap_trace_on (vat_main_t *vam) +{ + return -1; +} + +static int +api_pcap_trace_off (vat_main_t *vam) +{ + return -1; +} + #include <vnet/interface.api_test.c> /* diff --git a/src/vnet/ip-neighbor/ip4_neighbor.c b/src/vnet/ip-neighbor/ip4_neighbor.c index 5a6e8dd154c..61b9e768fe5 100644 --- a/src/vnet/ip-neighbor/ip4_neighbor.c +++ b/src/vnet/ip-neighbor/ip4_neighbor.c @@ -187,12 +187,16 @@ ip4_arp_inline (vlib_main_t * vm, /* resolve the packet's destination */ ip4_header_t *ip0 = vlib_buffer_get_current (p0); resolve0 = ip0->dst_address; - src0 = adj0->sub_type.glean.rx_pfx.fp_addr.ip4; } else + /* resolve the incomplete adj */ + resolve0 = adj0->sub_type.nbr.next_hop.ip4; + + if (is_glean && adj0->sub_type.glean.rx_pfx.fp_len) + /* the glean is for a connected, local prefix */ + src0 = adj0->sub_type.glean.rx_pfx.fp_addr.ip4; + else { - /* resolve the incomplete adj */ - resolve0 = adj0->sub_type.nbr.next_hop.ip4; /* Src IP address in ARP header. */ if (!fib_sas4_get (sw_if_index0, &resolve0, &src0) && !ip4_sas_by_sw_if_index (sw_if_index0, &resolve0, &src0)) @@ -270,7 +274,6 @@ VLIB_NODE_FN (ip4_glean_node) (vlib_main_t * vm, vlib_node_runtime_t * node, return (ip4_arp_inline (vm, node, frame, 1)); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_arp_node) = { .name = "ip4-arp", @@ -296,7 +299,6 @@ VLIB_REGISTER_NODE (ip4_glean_node) = [IP4_ARP_NEXT_DROP] = "ip4-drop", }, }; -/* *INDENT-ON* */ #define foreach_notrace_ip4_arp_error \ _(THROTTLED) \ @@ -328,7 +330,7 @@ ip4_neighbor_main_loop_enter (vlib_main_t * vm) vlib_thread_main_t *tm = &vlib_thread_main; u32 n_vlib_mains = tm->n_vlib_mains; - throttle_init (&arp_throttle, n_vlib_mains, 1e-3); + throttle_init (&arp_throttle, n_vlib_mains, THROTTLE_BITS, 1e-3); return (NULL); } diff --git a/src/vnet/ip-neighbor/ip6_neighbor.c b/src/vnet/ip-neighbor/ip6_neighbor.c index 576ae570c0f..ca8aed3d4ca 100644 --- a/src/vnet/ip-neighbor/ip6_neighbor.c +++ b/src/vnet/ip-neighbor/ip6_neighbor.c @@ -217,13 +217,14 @@ ip6_discover_neighbor_inline (vlib_main_t * vm, * Choose source address based on destination lookup * adjacency. */ - if (!fib_sas6_get (sw_if_index0, &ip0->dst_address, &src) || - !ip6_sas_by_sw_if_index (sw_if_index0, &ip0->dst_address, &src)) + const ip6_address_t *ll = ip6_get_link_local_address (sw_if_index0); + if (!ll) { /* There is no address on the interface */ p0->error = node->errors[IP6_NEIGHBOR_ERROR_NO_SOURCE_ADDRESS]; continue; } + ip6_address_copy (&src, ll); b0 = ip6_neighbor_probe (vm, vnm, sw_if_index0, thread_index, &src, &ip0->dst_address); @@ -263,7 +264,6 @@ ip6_glean (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) return (ip6_discover_neighbor_inline (vm, node, frame, 1)); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip6_glean_node) = { .function = ip6_glean, @@ -294,7 +294,6 @@ VLIB_REGISTER_NODE (ip6_discover_neighbor_node) = [IP6_NBR_NEXT_REPLY_TX] = "ip6-rewrite-mcast", }, }; -/* *INDENT-ON* */ /* Template used to generate IP6 neighbor solicitation packets. */ vlib_packet_template_t ip6_neighbor_packet_template; @@ -338,7 +337,7 @@ ip6_nd_main_loop_enter (vlib_main_t * vm) { vlib_thread_main_t *tm = &vlib_thread_main; - throttle_init (&nd_throttle, tm->n_vlib_mains, 1e-3); + throttle_init (&nd_throttle, tm->n_vlib_mains, THROTTLE_BITS, 1e-3); return 0; } diff --git a/src/vnet/ip-neighbor/ip_neighbor.api b/src/vnet/ip-neighbor/ip_neighbor.api index a04fcbc569e..24cddd42fab 100644 --- a/src/vnet/ip-neighbor/ip_neighbor.api +++ b/src/vnet/ip-neighbor/ip_neighbor.api @@ -20,7 +20,7 @@ called through a shared memory interface. */ -option version = "1.0.0"; +option version = "1.0.1"; import "vnet/ip/ip_types.api"; import "vnet/ethernet/ethernet_types.api"; @@ -126,6 +126,40 @@ autoreply define ip_neighbor_config bool recycle; }; +/** \brief Get neighbor database configuration per AF + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param af - Address family (v4/v6) +*/ +define ip_neighbor_config_get +{ + option in_progress; + u32 client_index; + u32 context; + vl_api_address_family_t af; +}; + +/** \brief Neighbor database configuration reply + @param context - sender context, to match reply w/ request + @param retval - error (0 is "no error") + @param af - Address family (v4/v6) + @param max_number - The maximum number of neighbours that will be created + @param max_age - The maximum age (in seconds) before an inactive neighbour + is flushed + @param recycle - If max_number of neighbours is reached and new ones need + to be created, should the oldest neighbour be 'recycled' +*/ +define ip_neighbor_config_get_reply +{ + option in_progress; + u32 context; + i32 retval; + vl_api_address_family_t af; + u32 max_number; + u32 max_age; + bool recycle; +}; + /** \brief IP neighbour replace begin The use-case is that, for some unspecified reason, the control plane diff --git a/src/vnet/ip-neighbor/ip_neighbor.c b/src/vnet/ip-neighbor/ip_neighbor.c index b33ca8a3538..d340037a15d 100644 --- a/src/vnet/ip-neighbor/ip_neighbor.c +++ b/src/vnet/ip-neighbor/ip_neighbor.c @@ -130,7 +130,6 @@ typedef struct ip_neighbor_db_t_ static vlib_log_class_t ipn_logger; /* DBs of neighbours one per AF */ -/* *INDENT-OFF* */ static ip_neighbor_db_t ip_neighbor_db[N_AF] = { [AF_IP4] = { .ipndb_limit = 50000, @@ -145,7 +144,6 @@ static ip_neighbor_db_t ip_neighbor_db[N_AF] = { .ipndb_recycle = false, } }; -/* *INDENT-ON* */ #define IP_NEIGHBOR_DBG(...) \ vlib_log_debug (ipn_logger, __VA_ARGS__); @@ -797,7 +795,7 @@ ip_neighbor_cmd (vlib_main_t * vm, vnet_main_t *vnm = vnet_get_main (); ip_neighbor_flags_t flags; u32 sw_if_index = ~0; - int is_add = 1; + int is_add = 1, is_flush = 0; int count = 1; flags = IP_NEIGHBOR_FLAG_DYNAMIC; @@ -811,6 +809,8 @@ ip_neighbor_cmd (vlib_main_t * vm, ; else if (unformat (input, "delete") || unformat (input, "del")) is_add = 0; + else if (unformat (input, "flush")) + is_flush = 1; else if (unformat (input, "static")) { flags |= IP_NEIGHBOR_FLAG_STATIC; @@ -824,6 +824,13 @@ ip_neighbor_cmd (vlib_main_t * vm, break; } + if (is_flush) + { + ip_neighbor_del_all (AF_IP4, sw_if_index); + ip_neighbor_del_all (AF_IP6, sw_if_index); + return NULL; + } + if (sw_if_index == ~0 || ip_address_is_zero (&ip) || mac_address_is_zero (&mac)) return clib_error_return (0, @@ -846,11 +853,10 @@ ip_neighbor_cmd (vlib_main_t * vm, return NULL; } -/* *INDENT-OFF* */ /*? * Add or delete IPv4 ARP cache entries. * - * @note 'set ip neighbor' options (e.g. delete, static, 'fib-id <id>', + * @note 'set ip neighbor' options (e.g. delete, static, * 'count <number>', 'interface ip4_addr mac_addr') can be added in * any order and combination. * @@ -859,35 +865,39 @@ ip_neighbor_cmd (vlib_main_t * vm, * Add or delete IPv4 ARP cache entries as follows. MAC Address can be in * either aa:bb:cc:dd:ee:ff format or aabb.ccdd.eeff format. * @cliexcmd{set ip neighbor GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe} - * @cliexcmd{set ip neighbor delete GigabitEthernet2/0/0 6.0.0.3 de:ad:be:ef:ba:be} + * @cliexcmd{set ip neighbor delete GigabitEthernet2/0/0 6.0.0.3 + * de:ad:be:ef:ba:be} * - * To add or delete an IPv4 ARP cache entry to or from a specific fib + * To add or delete an IPv4 ARP cache entry * table: - * @cliexcmd{set ip neighbor fib-id 1 GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe} - * @cliexcmd{set ip neighbor fib-id 1 delete GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe} + * @cliexcmd{set ip neighbor GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe} + * @cliexcmd{set ip neighbor delete GigabitEthernet2/0/0 6.0.0.3 + * dead.beef.babe} * * Add or delete IPv4 static ARP cache entries as follows: - * @cliexcmd{set ip neighbor static GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe} - * @cliexcmd{set ip neighbor static delete GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe} + * @cliexcmd{set ip neighbor static GigabitEthernet2/0/0 6.0.0.3 + * dead.beef.babe} + * @cliexcmd{set ip neighbor static delete GigabitEthernet2/0/0 6.0.0.3 + * dead.beef.babe} * * For testing / debugging purposes, the 'set ip neighbor' command can add or * delete multiple entries. Supply the 'count N' parameter: - * @cliexcmd{set ip neighbor count 10 GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe} + * @cliexcmd{set ip neighbor count 10 GigabitEthernet2/0/0 6.0.0.3 + * dead.beef.babe} * @endparblock ?*/ VLIB_CLI_COMMAND (ip_neighbor_command, static) = { .path = "set ip neighbor", - .short_help = - "set ip neighbor [del] <intfc> <ip-address> <mac-address> [static] [no-fib-entry] [count <count>] [fib-id <fib-id>] [proxy <lo-addr> - <hi-addr>]", + .short_help = "set ip neighbor [del] <intfc> <ip-address> <mac-address> " + "[static] [no-fib-entry] [count <count>]", .function = ip_neighbor_cmd, }; VLIB_CLI_COMMAND (ip_neighbor_command2, static) = { .path = "ip neighbor", - .short_help = - "ip neighbor [del] <intfc> <ip-address> <mac-address> [static] [no-fib-entry] [count <count>] [fib-id <fib-id>] [proxy <lo-addr> - <hi-addr>]", + .short_help = "ip neighbor [del] [flush] <intfc> <ip-address> <mac-address> " + "[static] [no-fib-entry] [count <count>]", .function = ip_neighbor_cmd, }; -/* *INDENT-ON* */ static int ip_neighbor_sort (void *a1, void *a2) @@ -913,7 +923,6 @@ ip_neighbor_entries (u32 sw_if_index, ip_address_family_t af) index_t *ipnis = NULL; ip_neighbor_t *ipn; - /* *INDENT-OFF* */ pool_foreach (ipn, ip_neighbor_pool) { if ((sw_if_index == ~0 || @@ -923,7 +932,6 @@ ip_neighbor_entries (u32 sw_if_index, ip_address_family_t af) vec_add1 (ipnis, ip_neighbor_get_index(ipn)); } - /* *INDENT-ON* */ if (ipnis) vec_sort_with_function (ipnis, ip_neighbor_sort); @@ -943,7 +951,6 @@ ip_neighbor_show_sorted_i (vlib_main_t * vm, vlib_cli_output (vm, "%=12s%=40s%=6s%=20s%=24s", "Time", "IP", "Flags", "Ethernet", "Interface"); - /* *INDENT-OFF*/ /* the list is time sorted, newest first, so start from the back * and work forwards. Stop when we get to one that is alive */ clib_llist_foreach_reverse(ip_neighbor_elt_pool, @@ -951,7 +958,6 @@ ip_neighbor_show_sorted_i (vlib_main_t * vm, ({ vlib_cli_output (vm, "%U", format_ip_neighbor, elt->ipne_index); })); - /* *INDENT-ON*/ return (NULL); } @@ -1033,7 +1039,6 @@ ip4_neighbor_show_sorted (vlib_main_t * vm, * Fib_index 0 6.0.0.1 - 6.0.0.11 * @cliexend ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_ip_neighbors_cmd_node, static) = { .path = "show ip neighbors", .function = ip_neighbor_show, @@ -1074,7 +1079,6 @@ VLIB_CLI_COMMAND (show_ip6_neighbor_sorted_cmd_node, static) = { .function = ip6_neighbor_show_sorted, .short_help = "show ip6 neighbor-sorted", }; -/* *INDENT-ON* */ static ip_neighbor_vft_t ip_nbr_vfts[N_AF]; @@ -1124,13 +1128,11 @@ ip_neighbor_walk (ip_address_family_t af, vec_foreach (hash, ip_neighbor_db[af].ipndb_hash) { - /* *INDENT-OFF* */ hash_foreach (key, ipni, *hash, ({ if (WALK_STOP == cb (ipni, ctx)) break; })); - /* *INDENT-ON* */ } } else @@ -1141,13 +1143,11 @@ ip_neighbor_walk (ip_address_family_t af, return; hash = ip_neighbor_db[af].ipndb_hash[sw_if_index]; - /* *INDENT-OFF* */ hash_foreach (key, ipni, hash, ({ if (WALK_STOP == cb (ipni, ctx)) break; })); - /* *INDENT-ON* */ } } @@ -1226,14 +1226,12 @@ ip_neighbor_populate (ip_address_family_t af, u32 sw_if_index) format_vnet_sw_if_index_name, vnet_get_main (), sw_if_index, format_ip_address_family, af); - /* *INDENT-OFF* */ pool_foreach (ipn, ip_neighbor_pool) { if (ip_neighbor_get_af(ipn) == af && ipn->ipn_key->ipnk_sw_if_index == sw_if_index) vec_add1 (ipnis, ipn - ip_neighbor_pool); } - /* *INDENT-ON* */ vec_foreach (ipni, ipnis) { @@ -1259,7 +1257,6 @@ ip_neighbor_flush (ip_address_family_t af, u32 sw_if_index) format_vnet_sw_if_index_name, vnet_get_main (), sw_if_index, format_ip_address_family, af); - /* *INDENT-OFF* */ pool_foreach (ipn, ip_neighbor_pool) { if (ip_neighbor_get_af(ipn) == af && @@ -1267,7 +1264,6 @@ ip_neighbor_flush (ip_address_family_t af, u32 sw_if_index) ip_neighbor_is_dynamic (ipn)) vec_add1 (ipnis, ipn - ip_neighbor_pool); } - /* *INDENT-ON* */ vec_foreach (ipni, ipnis) ip_neighbor_destroy (ip_neighbor_get (*ipni)); vec_free (ipnis); @@ -1447,7 +1443,6 @@ ip_neighbor_add_del_interface_address_v4 (ip4_main_t * im, if (is_del) { - /* *INDENT-OFF* */ ip_neighbor_walk_covered_ctx_t ctx = { .addr = { .ip.ip4 = *address, @@ -1455,7 +1450,6 @@ ip_neighbor_add_del_interface_address_v4 (ip4_main_t * im, }, .length = address_length, }; - /* *INDENT-ON* */ index_t *ipni; ip_neighbor_walk (AF_IP4, sw_if_index, ip_neighbor_walk_covered, &ctx); @@ -1489,7 +1483,6 @@ ip_neighbor_add_del_interface_address_v6 (ip6_main_t * im, if (is_del) { - /* *INDENT-OFF* */ ip_neighbor_walk_covered_ctx_t ctx = { .addr = { .ip.ip6 = *address, @@ -1497,7 +1490,6 @@ ip_neighbor_add_del_interface_address_v6 (ip6_main_t * im, }, .length = address_length, }; - /* *INDENT-ON* */ index_t *ipni; ip_neighbor_walk (AF_IP6, sw_if_index, ip_neighbor_walk_covered, &ctx); @@ -1593,8 +1585,8 @@ ip_neighbour_age_out (index_t ipni, f64 now, f64 * wait) } else { - ip_neighbor_probe_dst (ip_neighbor_get_sw_if_index (ipn), af, - vlib_get_thread_index (), + ip_neighbor_probe_dst (ip_neighbor_get_sw_if_index (ipn), + vlib_get_thread_index (), af, &ip_addr_46 (&ipn->ipn_key->ipnk_ip)); ipn->ipn_n_probes++; @@ -1653,7 +1645,6 @@ ip_neighbor_age_loop (vlib_main_t * vm, head = pool_elt_at_index (ip_neighbor_elt_pool, ip_neighbor_list_head[af]); - /* *INDENT-OFF*/ /* the list is time sorted, newest first, so start from the back * and work forwards. Stop when we get to one that is alive */ restart: @@ -1678,7 +1669,6 @@ ip_neighbor_age_loop (vlib_main_t * vm, timeout = clib_min (wait, timeout); })); - /* *INDENT-ON* */ break; } case IP_NEIGHBOR_AGE_PROCESS_WAKEUP: @@ -1725,7 +1715,6 @@ ip6_neighbor_age_process (vlib_main_t * vm, return (ip_neighbor_age_loop (vm, rt, f, AF_IP6)); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_neighbor_age_process_node,static) = { .function = ip4_neighbor_age_process, .type = VLIB_NODE_TYPE_PROCESS, @@ -1736,7 +1725,6 @@ VLIB_REGISTER_NODE (ip6_neighbor_age_process_node,static) = { .type = VLIB_NODE_TYPE_PROCESS, .name = "ip6-neighbor-age-process", }; -/* *INDENT-ON* */ int ip_neighbor_config (ip_address_family_t af, u32 limit, u32 age, bool recycle) @@ -1754,13 +1742,23 @@ ip_neighbor_config (ip_address_family_t af, u32 limit, u32 age, bool recycle) return (0); } +int +ip_neighbor_get_config (ip_address_family_t af, u32 *limit, u32 *age, + bool *recycle) +{ + *limit = ip_neighbor_db[af].ipndb_limit; + *age = ip_neighbor_db[af].ipndb_age; + *recycle = ip_neighbor_db[af].ipndb_recycle; + + return (0); +} + static clib_error_t * ip_neighbor_config_show (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { ip_address_family_t af; - /* *INDENT-OFF* */ FOR_EACH_IP_ADDRESS_FAMILY(af) { vlib_cli_output (vm, "%U:", format_ip_address_family, af); vlib_cli_output (vm, " limit:%d, age:%d, recycle:%d", @@ -1769,7 +1767,6 @@ ip_neighbor_config_show (vlib_main_t * vm, ip_neighbor_db[af].ipndb_recycle); } - /* *INDENT-ON* */ return (NULL); } @@ -1861,7 +1858,6 @@ ip_neighbor_stats_show (vlib_main_t *vm, unformat_input_t *input, return (NULL); } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_ip_neighbor_cfg_cmd_node, static) = { .path = "show ip neighbor-config", .function = ip_neighbor_config_show, @@ -1878,7 +1874,6 @@ VLIB_CLI_COMMAND (show_ip_neighbor_stats_cmd_node, static) = { .function = ip_neighbor_stats_show, .short_help = "show ip neighbor-stats [interface]", }; -/* *INDENT-ON* */ static clib_error_t * ip_neighbor_init (vlib_main_t * vm) @@ -1918,12 +1913,10 @@ ip_neighbor_init (vlib_main_t * vm) return (NULL); } -/* *INDENT-OFF* */ VLIB_INIT_FUNCTION (ip_neighbor_init) = { .runs_after = VLIB_INITS("ip_main_init"), }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/ip-neighbor/ip_neighbor.h b/src/vnet/ip-neighbor/ip_neighbor.h index 8c07df86ba8..cc888ba2054 100644 --- a/src/vnet/ip-neighbor/ip_neighbor.h +++ b/src/vnet/ip-neighbor/ip_neighbor.h @@ -36,6 +36,8 @@ extern int ip_neighbor_del (const ip_address_t * ip, u32 sw_if_index); extern int ip_neighbor_config (ip_address_family_t af, u32 limit, u32 age, bool recycle); +extern int ip_neighbor_get_config (ip_address_family_t af, u32 *limit, + u32 *age, bool *recycle); extern void ip_neighbor_del_all (ip_address_family_t af, u32 sw_if_index); diff --git a/src/vnet/ip-neighbor/ip_neighbor_api.c b/src/vnet/ip-neighbor/ip_neighbor_api.c index 81af86211de..2297546f111 100644 --- a/src/vnet/ip-neighbor/ip_neighbor_api.c +++ b/src/vnet/ip-neighbor/ip_neighbor_api.c @@ -234,12 +234,10 @@ vl_api_ip_neighbor_add_del_t_handler (vl_api_ip_neighbor_add_del_t * mp, BAD_SW_IF_INDEX_LABEL; - /* *INDENT-OFF* */ REPLY_MACRO2 (VL_API_IP_NEIGHBOR_ADD_DEL_REPLY, ({ rmp->stats_index = htonl (stats_index); })); - /* *INDENT-ON* */ } static void @@ -314,6 +312,32 @@ vl_api_ip_neighbor_config_t_handler (vl_api_ip_neighbor_config_t * mp) } static void +vl_api_ip_neighbor_config_get_t_handler (vl_api_ip_neighbor_config_get_t *mp) +{ + vl_api_ip_neighbor_config_get_reply_t *rmp; + int rv; + ip_address_family_t af = AF_IP4; + u32 max_number = ~0; + u32 max_age = ~0; + bool recycle = false; + + rv = ip_address_family_decode (mp->af, &af); + + if (!rv) + rv = ip_neighbor_get_config (af, &max_number, &max_age, &recycle); + + // clang-format off + REPLY_MACRO2 (VL_API_IP_NEIGHBOR_CONFIG_GET_REPLY, + ({ + rmp->af = ip_address_family_encode (af); + rmp->max_number = htonl (max_number); + rmp->max_age = htonl (max_age); + rmp->recycle = recycle; + })); + // clang-format on +} + +static void vl_api_ip_neighbor_replace_begin_t_handler (vl_api_ip_neighbor_replace_begin_t * mp) { diff --git a/src/vnet/ip-neighbor/ip_neighbor_watch.c b/src/vnet/ip-neighbor/ip_neighbor_watch.c index 72908f4e613..74f450114e1 100644 --- a/src/vnet/ip-neighbor/ip_neighbor_watch.c +++ b/src/vnet/ip-neighbor/ip_neighbor_watch.c @@ -66,13 +66,11 @@ ip_neighbor_event_process (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip_neighbor_event_process_node) = { .function = ip_neighbor_event_process, .type = VLIB_NODE_TYPE_PROCESS, .name = "ip-neighbor-event", }; -/* *INDENT-ON* */ static clib_error_t * @@ -84,7 +82,6 @@ want_ip_neighbor_events_reaper (u32 client_index) i32 pos; /* walk the entire IP neighbour DB and removes the client's registrations */ - /* *INDENT-OFF* */ mhash_foreach(key, v, &ipnw_db.ipnwdb_hash, ({ watchers = (ip_neighbor_watcher_t*) *v; @@ -97,7 +94,6 @@ want_ip_neighbor_events_reaper (u32 client_index) if (vec_len(watchers) == 0) vec_add1 (empty_keys, *key); })); - /* *INDENT-OFF* */ vec_foreach (key, empty_keys) mhash_unset (&ipnw_db.ipnwdb_hash, key, NULL); @@ -236,7 +232,6 @@ ip_neighbor_watchers_show (vlib_main_t * vm, ip_neighbor_key_t *key; uword *v; - /* *INDENT-OFF* */ mhash_foreach(key, v, &ipnw_db.ipnwdb_hash, ({ watchers = (ip_neighbor_watcher_t*) *v; @@ -247,17 +242,14 @@ ip_neighbor_watchers_show (vlib_main_t * vm, vec_foreach (watcher, watchers) vlib_cli_output (vm, " %U", format_ip_neighbor_watcher, watcher); })); - /* *INDENT-ON* */ return (NULL); } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_ip_neighbor_watchers_cmd_node, static) = { .path = "show ip neighbor-watcher", .function = ip_neighbor_watchers_show, .short_help = "show ip neighbors-watcher", }; -/* *INDENT-ON* */ static clib_error_t * ip_neighbor_watch_init (vlib_main_t * vm) @@ -267,12 +259,10 @@ ip_neighbor_watch_init (vlib_main_t * vm) return (NULL); } -/* *INDENT-OFF* */ VLIB_INIT_FUNCTION (ip_neighbor_watch_init) = { .runs_after = VLIB_INITS("ip_neighbor_init"), }; -/* *INDENT-ON* */ /* diff --git a/src/vnet/ip/icmp4.c b/src/vnet/ip/icmp4.c index 318081b9c9f..fa4a0e12276 100644 --- a/src/vnet/ip/icmp4.c +++ b/src/vnet/ip/icmp4.c @@ -204,7 +204,6 @@ ip4_icmp_input (vlib_main_t * vm, return frame->n_vectors; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_icmp_input_node) = { .function = ip4_icmp_input, .name = "ip4-icmp-input", @@ -221,7 +220,6 @@ VLIB_REGISTER_NODE (ip4_icmp_input_node) = { [ICMP_INPUT_NEXT_ERROR] = "ip4-punt", }, }; -/* *INDENT-ON* */ typedef enum { @@ -318,13 +316,14 @@ ip4_icmp_error (vlib_main_t * vm, sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX]; - vlib_buffer_copy_trace_flag (vm, p0, pi0); + vlib_buffer_copy_trace_flag (vm, org_p0, pi0); /* Add IP header and ICMPv4 header including a 4 byte data field */ vlib_buffer_advance (p0, -sizeof (ip4_header_t) - sizeof (icmp46_header_t) - 4); + p0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED; p0->current_length = p0->current_length > 576 ? 576 : p0->current_length; out_ip0 = vlib_buffer_get_current (p0); @@ -342,7 +341,7 @@ ip4_icmp_error (vlib_main_t * vm, /* Prefer a source address from "offending interface" */ if (!ip4_sas_by_sw_if_index (sw_if_index0, &out_ip0->dst_address, &out_ip0->src_address)) - { /* interface has no IP6 address - should not happen */ + { /* interface has no IP4 address - should not happen */ next0 = IP4_ICMP_ERROR_NEXT_DROP; error0 = ICMP4_ERROR_DROP; } @@ -387,7 +386,6 @@ ip4_icmp_error (vlib_main_t * vm, return frame->n_vectors; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_icmp_error_node) = { .function = ip4_icmp_error, .name = "ip4-icmp-error", @@ -404,7 +402,6 @@ VLIB_REGISTER_NODE (ip4_icmp_error_node) = { .format_trace = format_icmp_input_trace, }; -/* *INDENT-ON* */ static uword @@ -590,7 +587,7 @@ icmp4_init (vlib_main_t * vm) vlib_thread_main_t *tm = &vlib_thread_main; u32 n_vlib_mains = tm->n_vlib_mains; - throttle_init (&icmp_throttle, n_vlib_mains, 1e-3); + throttle_init (&icmp_throttle, n_vlib_mains, THROTTLE_BITS, 1e-5); return 0; } diff --git a/src/vnet/ip/icmp46_packet.h b/src/vnet/ip/icmp46_packet.h index 0545046fe60..08e73f6cd7d 100644 --- a/src/vnet/ip/icmp46_packet.h +++ b/src/vnet/ip/icmp46_packet.h @@ -187,7 +187,6 @@ typedef enum #undef _ } icmp6_code_t; -/* *INDENT-OFF* */ typedef CLIB_PACKED (struct { u8 type; @@ -195,7 +194,6 @@ typedef CLIB_PACKED (struct /* IP checksum of icmp header plus data which follows. */ u16 checksum; }) icmp46_header_t; -/* *INDENT-ON* */ /* ip6 neighbor discovery */ #define foreach_icmp6_neighbor_discovery_option \ @@ -238,7 +236,6 @@ typedef enum icmp6_neighbor_discovery_option_type #undef _ } icmp6_neighbor_discovery_option_type_t; -/* *INDENT-OFF* */ typedef CLIB_PACKED (struct { /* Option type. */ @@ -357,6 +354,5 @@ typedef CLIB_PACKED (struct icmp6_neighbor_discovery_ethernet_link_layer_address_option_t link_layer_option; }) icmp6_neighbor_solicitation_header_t; -/* *INDENT-ON* */ #endif /* included_vnet_icmp46_packet_h */ diff --git a/src/vnet/ip/icmp6.c b/src/vnet/ip/icmp6.c index 4cabc0e083f..b095f679cc8 100644 --- a/src/vnet/ip/icmp6.c +++ b/src/vnet/ip/icmp6.c @@ -235,7 +235,6 @@ ip6_icmp_input (vlib_main_t * vm, return frame->n_vectors; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip6_icmp_input_node) = { .function = ip6_icmp_input, .name = "ip6-icmp-input", @@ -252,7 +251,6 @@ VLIB_REGISTER_NODE (ip6_icmp_input_node) = { [ICMP_INPUT_NEXT_PUNT] = "ip6-punt", }, }; -/* *INDENT-ON* */ typedef enum { @@ -359,14 +357,13 @@ ip6_icmp_error (vlib_main_t * vm, sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX]; - vlib_buffer_copy_trace_flag (vm, p0, pi0); + vlib_buffer_copy_trace_flag (vm, org_p0, pi0); /* Add IP header and ICMPv6 header including a 4 byte data field */ vlib_buffer_advance (p0, -(sizeof (ip6_header_t) + sizeof (icmp46_header_t) + 4)); - vnet_buffer (p0)->sw_if_index[VLIB_TX] = ~0; p0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED; p0->current_length = p0->current_length > 1280 ? 1280 : p0->current_length; @@ -427,7 +424,6 @@ ip6_icmp_error (vlib_main_t * vm, return frame->n_vectors; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip6_icmp_error_node) = { .function = ip6_icmp_error, .name = "ip6-icmp-error", @@ -444,7 +440,6 @@ VLIB_REGISTER_NODE (ip6_icmp_error_node) = { .format_trace = format_icmp6_input_trace, }; -/* *INDENT-ON* */ static uword @@ -644,7 +639,7 @@ icmp6_init (vlib_main_t * vm) vlib_thread_main_t *tm = &vlib_thread_main; u32 n_vlib_mains = tm->n_vlib_mains; - throttle_init (&icmp_throttle, n_vlib_mains, 1e-3); + throttle_init (&icmp_throttle, n_vlib_mains, THROTTLE_BITS, 1e-3); return (NULL); } diff --git a/src/vnet/ip/ip.api b/src/vnet/ip/ip.api index 23e094b48a0..967f56cf917 100644 --- a/src/vnet/ip/ip.api +++ b/src/vnet/ip/ip.api @@ -366,6 +366,41 @@ autoreply define set_ip_flow_hash_v2 vl_api_ip_flow_hash_config_t flow_hash_config; }; +/** + @brief flow hash settings for an IP table + @param src - include src in flow hash + @param dst - include dst in flow hash + @param sport - include sport in flow hash + @param dport - include dport in flow hash + @param proto - include proto in flow hash + @param reverse - include reverse in flow hash + @param symmetric - include symmetry in flow hash + @param flowlabel - include flowlabel in flow hash + @param gtpv1teid - include gtpv1teid in flow hash +*/ +enumflag ip_flow_hash_config_v2 +{ + IP_API_V2_FLOW_HASH_SRC_IP = 0x01, + IP_API_V2_FLOW_HASH_DST_IP = 0x02, + IP_API_V2_FLOW_HASH_SRC_PORT = 0x04, + IP_API_V2_FLOW_HASH_DST_PORT = 0x08, + IP_API_V2_FLOW_HASH_PROTO = 0x10, + IP_API_V2_FLOW_HASH_REVERSE = 0x20, + IP_API_V2_FLOW_HASH_SYMETRIC = 0x40, + IP_API_V2_FLOW_HASH_FLOW_LABEL = 0x80, + IP_API_V2_FLOW_HASH_GTPV1_TEID = 0x100, +}; + +autoreply define set_ip_flow_hash_v3 +{ + u32 client_index; + u32 context; + u32 table_id; + vl_api_address_family_t af; + vl_api_ip_flow_hash_config_v2_t flow_hash_config; + option status="in_progress"; +}; + /** \brief Set the ip flow hash router ID @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -587,6 +622,7 @@ typedef punt_redirect autoreply define ip_punt_redirect { option deprecated; + u32 client_index; u32 context; vl_api_punt_redirect_t punt; @@ -595,6 +631,8 @@ autoreply define ip_punt_redirect define ip_punt_redirect_dump { + option deprecated; + u32 client_index; u32 context; vl_api_interface_index_t sw_if_index; @@ -603,6 +641,8 @@ define ip_punt_redirect_dump define ip_punt_redirect_details { + option deprecated; + u32 context; vl_api_punt_redirect_t punt; }; @@ -1020,6 +1060,12 @@ counters ip4 { units "packets"; description "ip4 ttl <= 1"; }; + hdr_too_short { + severity error; + type counter64; + units "packets"; + description "ip4 IHL < 5"; + }; /* Errors signalled by ip4-rewrite. */ mtu_exceeded { diff --git a/src/vnet/ip/ip.c b/src/vnet/ip/ip.c index 0a602b43ac7..586f7dfbc85 100644 --- a/src/vnet/ip/ip.c +++ b/src/vnet/ip/ip.c @@ -118,7 +118,6 @@ ip_set (ip46_address_t * dst, void *src, u8 is_ip4) sizeof (ip6_address_t)); } -/* *INDENT-OFF* */ static const char *ip_arc_names[N_IP_FEATURE_LOCATIONS][N_AF][N_SAFI] = { [IP_FEATURE_INPUT] = { [AF_IP4] = { @@ -171,7 +170,6 @@ static const char *ip_arc_names[N_IP_FEATURE_LOCATIONS][N_AF][N_SAFI] = { }, }, }; -/* *INDENT-ON* */ void ip_feature_enable_disable (ip_address_family_t af, @@ -203,7 +201,8 @@ ip_feature_enable_disable (ip_address_family_t af, } int -ip_flow_hash_set (ip_address_family_t af, u32 table_id, u32 flow_hash_config) +ip_flow_hash_set (ip_address_family_t af, u32 table_id, + flow_hash_config_t flow_hash_config) { fib_protocol_t fproto; u32 fib_index; diff --git a/src/vnet/ip/ip4.h b/src/vnet/ip/ip4.h index e969594ec00..45d07c2e0f6 100644 --- a/src/vnet/ip/ip4.h +++ b/src/vnet/ip/ip4.h @@ -211,7 +211,6 @@ ip4_interface_address_matching_destination (ip4_main_t * im, ip_interface_address_t *ia; ip4_address_t *result = 0; - /* *INDENT-OFF* */ foreach_ip_interface_address (lm, ia, sw_if_index, 1 /* honor unnumbered */, ({ @@ -222,7 +221,6 @@ ip4_interface_address_matching_destination (ip4_main_t * im, break; } })); - /* *INDENT-ON* */ if (result_ia) *result_ia = result ? ia : 0; return result; diff --git a/src/vnet/ip/ip46_address.h b/src/vnet/ip/ip46_address.h index f726178ee63..90f766464f6 100644 --- a/src/vnet/ip/ip46_address.h +++ b/src/vnet/ip/ip46_address.h @@ -34,7 +34,6 @@ typedef enum extern u8 *format_ip46_type (u8 * s, va_list * args); -/* *INDENT-OFF* */ typedef CLIB_PACKED (union ip46_address_t_ { struct { u32 pad[3]; @@ -44,7 +43,6 @@ typedef CLIB_PACKED (union ip46_address_t_ { u8 as_u8[16]; u64 as_u64[2]; }) ip46_address_t; -/* *INDENT-ON* */ format_function_t format_ip46_address; diff --git a/src/vnet/ip/ip46_cli.c b/src/vnet/ip/ip46_cli.c index f58be898d9b..e3da27914bd 100644 --- a/src/vnet/ip/ip46_cli.c +++ b/src/vnet/ip/ip46_cli.c @@ -71,12 +71,10 @@ ip6_address_compare (ip6_address_t * a1, ip6_address_t * a2) return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (set_interface_ip_command, static) = { .path = "set interface ip", .short_help = "IP4/IP6 commands", }; -/* *INDENT-ON* */ void ip_del_all_interface_addresses (vlib_main_t * vm, u32 sw_if_index) @@ -90,7 +88,6 @@ ip_del_all_interface_addresses (vlib_main_t * vm, u32 sw_if_index) ip_interface_address_t *ia; int i; - /* *INDENT-OFF* */ foreach_ip_interface_address (&im4->lookup_main, ia, sw_if_index, 0 /* honor unnumbered */, ({ @@ -99,9 +96,7 @@ ip_del_all_interface_addresses (vlib_main_t * vm, u32 sw_if_index) vec_add1 (ip4_addrs, x[0]); vec_add1 (ip4_masks, ia->address_length); })); - /* *INDENT-ON* */ - /* *INDENT-OFF* */ foreach_ip_interface_address (&im6->lookup_main, ia, sw_if_index, 0 /* honor unnumbered */, ({ @@ -110,7 +105,6 @@ ip_del_all_interface_addresses (vlib_main_t * vm, u32 sw_if_index) vec_add1 (ip6_addrs, x[0]); vec_add1 (ip6_masks, ia->address_length); })); - /* *INDENT-ON* */ for (i = 0; i < vec_len (ip4_addrs); i++) ip4_add_del_interface_address (vm, sw_if_index, &ip4_addrs[i], @@ -212,13 +206,11 @@ done: * @cliexcmd{set interface ip address del GigabitEthernet2/0/0 all} * @endparblock ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (set_interface_ip_address_command, static) = { .path = "set interface ip address", .function = add_del_ip_address, .short_help = "set interface ip address [del] <interface> <ip-addr>/<mask> | [all]", }; -/* *INDENT-ON* */ static clib_error_t * set_reassembly_command_fn (vlib_main_t * vm, @@ -294,13 +286,11 @@ set_reassembly_command_fn (vlib_main_t * vm, return NULL; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (set_reassembly_command, static) = { .path = "set interface reassembly", .short_help = "set interface reassembly <interface-name> [on|off|ip4|ip6]", .function = set_reassembly_command_fn, }; -/* *INDENT-ON* */ /* Dummy init function to get us linked in. */ static clib_error_t * diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c index 826fa573e9c..ff74b52eb18 100644 --- a/src/vnet/ip/ip4_forward.c +++ b/src/vnet/ip/ip4_forward.c @@ -103,7 +103,6 @@ VLIB_NODE_FN (ip4_lookup_node) (vlib_main_t * vm, vlib_node_runtime_t * node, static u8 *format_ip4_lookup_trace (u8 * s, va_list * args); -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_lookup_node) = { .name = "ip4-lookup", @@ -112,7 +111,6 @@ VLIB_REGISTER_NODE (ip4_lookup_node) = .n_next_nodes = IP_LOOKUP_N_NEXT, .next_nodes = IP4_LOOKUP_NEXT_NODES, }; -/* *INDENT-ON* */ VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -268,7 +266,6 @@ VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm, return frame->n_vectors; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_load_balance_node) = { .name = "ip4-load-balance", @@ -276,7 +273,6 @@ VLIB_REGISTER_NODE (ip4_load_balance_node) = .sibling_of = "ip4-lookup", .format_trace = format_ip4_lookup_trace, }; -/* *INDENT-ON* */ #ifndef CLIB_MARCH_VARIANT /* get first interface address */ @@ -288,7 +284,6 @@ ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index, ip_interface_address_t *ia = 0; ip4_address_t *result = 0; - /* *INDENT-OFF* */ foreach_ip_interface_address (lm, ia, sw_if_index, 1 /* honor unnumbered */ , @@ -298,7 +293,6 @@ ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index, result = a; break; })); - /* *INDENT-OFF* */ if (result_ia) *result_ia = result ? ia : 0; return result; @@ -671,7 +665,6 @@ ip4_add_del_interface_address_internal (vlib_main_t * vm, * subnets on interfaces. Easy fix - disallow overlapping subnets, like * most routers do. */ - /* *INDENT-OFF* */ if (!is_del) { /* When adding an address check that it does not conflict @@ -732,7 +725,6 @@ ip4_add_del_interface_address_internal (vlib_main_t * vm, } } } - /* *INDENT-ON* */ if_address_index = ip_interface_address_find (lm, addr_fib, address_length); @@ -853,7 +845,6 @@ ip4_directed_broadcast (u32 sw_if_index, u8 enable) * when directed broadcast is enabled, the subnet braodcast route will forward * packets using an adjacency with a broadcast MAC. otherwise it drops */ - /* *INDENT-OFF* */ foreach_ip_interface_address(&im->lookup_main, ia, sw_if_index, 0, ({ @@ -877,7 +868,6 @@ ip4_directed_broadcast (u32 sw_if_index, u8 enable) &pfx, sw_if_index); } })); - /* *INDENT-ON* */ } #endif @@ -897,7 +887,6 @@ ip4_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags) fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index); - /* *INDENT-OFF* */ foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 0 /* honor unnumbered */, ({ @@ -911,7 +900,6 @@ ip4_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags) im, fib_index, a, ia->address_length); })); - /* *INDENT-ON* */ return 0; } @@ -919,7 +907,6 @@ ip4_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags) VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down); /* Built-in ip4 unicast rx feature path definition */ -/* *INDENT-OFF* */ VNET_FEATURE_ARC_INIT (ip4_unicast, static) = { .arc_name = "ip4-unicast", @@ -1058,7 +1045,6 @@ VNET_FEATURE_INIT (ip4_interface_output, static) = .node_name = "interface-output", .runs_before = 0, /* not before any other features */ }; -/* *INDENT-ON* */ static clib_error_t * ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add) @@ -1083,13 +1069,11 @@ ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add) vlib_main_t *vm = vlib_get_main (); vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0); - /* *INDENT-OFF* */ foreach_ip_interface_address (lm4, ia, sw_if_index, 0, ({ address = ip_interface_address_get_address (lm4, ia); ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1); })); - /* *INDENT-ON* */ ip4_mfib_interface_enable_disable (sw_if_index, 0); if (0 != im4->fib_index_by_sw_if_index[sw_if_index]) @@ -1206,9 +1190,11 @@ format_ip4_forward_next_trace (u8 * s, va_list * args) CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *); u32 indent = format_get_indent (s); - s = format (s, "%U%U", - format_white_space, indent, - format_ip4_header, t->packet_data, sizeof (t->packet_data)); + + s = format (s, "%Ufib:%d adj:%d flow:0x%08x", format_white_space, indent, + t->fib_index, t->dpo_index, t->flow_hash); + s = format (s, "\n%U%U", format_white_space, indent, format_ip4_header, + t->packet_data, sizeof (t->packet_data)); return s; } #endif @@ -1397,13 +1383,11 @@ ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0) } #endif -/* *INDENT-OFF* */ VNET_FEATURE_ARC_INIT (ip4_local) = { .arc_name = "ip4-local", .start_nodes = VNET_FEATURES ("ip4-local", "ip4-receive"), .last_in_arc = "ip4-local-end-of-arc", }; -/* *INDENT-ON* */ static inline void ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p, @@ -1479,10 +1463,10 @@ ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b, if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0]) || ip4_local_need_csum_check (is_tcp_udp[1], b[1]))) { - if (is_tcp_udp[0]) + if (is_tcp_udp[0] && !ip4_local_csum_is_offloaded (b[0])) ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0], &good_tcp_udp[0]); - if (is_tcp_udp[1]) + if (is_tcp_udp[1] && !ip4_local_csum_is_offloaded (b[1])) ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1], &good_tcp_udp[1]); } @@ -1989,14 +1973,12 @@ show_ip_local_command_fn (vlib_main_t * vm, * 47 * @cliexend ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_ip_local, static) = { .path = "show ip local", .function = show_ip_local_command_fn, .short_help = "show ip local", }; -/* *INDENT-ON* */ typedef enum { @@ -2243,9 +2225,6 @@ ip4_rewrite_inline (vlib_main_t *vm, vlib_node_runtime_t *node, adj0->ia_cfg_index); next[0] = next_index; - if (is_midchain) - vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ , - 0 /* is_ip6 */ ); } else { @@ -2268,9 +2247,6 @@ ip4_rewrite_inline (vlib_main_t *vm, vlib_node_runtime_t *node, &next_index, b[1], adj1->ia_cfg_index); next[1] = next_index; - if (is_midchain) - vnet_calc_checksums_inline (vm, b[1], 1 /* is_ip4 */ , - 0 /* is_ip6 */ ); } else { @@ -2420,9 +2396,6 @@ ip4_rewrite_inline (vlib_main_t *vm, vlib_node_runtime_t *node, if (is_midchain) { - vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ , - 0 /* is_ip6 */ ); - /* Guess we are only writing on ipv4 header. */ vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip4_header_t)); } @@ -2526,10 +2499,6 @@ ip4_rewrite_inline (vlib_main_t *vm, vlib_node_runtime_t *node, if (is_midchain) { - /* this acts on the packet that is about to be encapped */ - vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ , - 0 /* is_ip6 */ ); - /* Guess we are only writing on ipv4 header. */ vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip4_header_t)); } @@ -2656,7 +2625,6 @@ VLIB_NODE_FN (ip4_mcast_midchain_node) (vlib_main_t * vm, return ip4_rewrite_inline (vm, node, frame, 0, 1, 1); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_rewrite_node) = { .name = "ip4-rewrite", .vector_size = sizeof (u32), @@ -2701,7 +2669,6 @@ VLIB_REGISTER_NODE (ip4_midchain_node) = { .format_trace = format_ip4_rewrite_trace, .sibling_of = "ip4-rewrite", }; -/* *INDENT-ON */ static clib_error_t * set_ip_flow_hash_command_fn (vlib_main_t * vm, @@ -2833,15 +2800,12 @@ set_ip_flow_hash_command_fn (vlib_main_t * vm, * [0] [@0]: dpo-drop ip6 * @cliexend ?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) = -{ +VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) = { .path = "set ip flow-hash", - .short_help = - "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]", + .short_help = "set ip flow-hash table <table-id> [src] [dst] [sport] " + "[dport] [proto] [reverse] [gtpv1teid]", .function = set_ip_flow_hash_command_fn, }; -/* *INDENT-ON* */ #ifndef CLIB_MARCH_VARIANT int @@ -2958,7 +2922,6 @@ set_ip_classify_command_fn (vlib_main_t * vm, * Example of how to assign a classification table to an interface: * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (set_ip_classify_command, static) = { .path = "set ip classify", @@ -2966,7 +2929,6 @@ VLIB_CLI_COMMAND (set_ip_classify_command, static) = "set ip classify intfc <interface> table-index <classify-idx>", .function = set_ip_classify_command_fn, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/ip/ip4_inlines.h b/src/vnet/ip/ip4_inlines.h index ca7327fbcdc..b4fcebc9896 100644 --- a/src/vnet/ip/ip4_inlines.h +++ b/src/vnet/ip/ip4_inlines.h @@ -43,6 +43,7 @@ #include <vnet/ip/ip_flow_hash.h> #include <vnet/ip/ip4_packet.h> #include <vnet/tcp/tcp_packet.h> +#include <vnet/udp/udp_packet.h> #define IP_DF 0x4000 /* don't fragment */ @@ -53,9 +54,11 @@ ip4_compute_flow_hash (const ip4_header_t * ip, flow_hash_config_t flow_hash_config) { tcp_header_t *tcp = (void *) (ip + 1); + udp_header_t *udp = (void *) (ip + 1); + gtpv1u_header_t *gtpu = (void *) (udp + 1); u32 a, b, c, t1, t2; - uword is_tcp_udp = (ip->protocol == IP_PROTOCOL_TCP - || ip->protocol == IP_PROTOCOL_UDP); + uword is_udp = ip->protocol == IP_PROTOCOL_UDP; + uword is_tcp_udp = (ip->protocol == IP_PROTOCOL_TCP || is_udp); t1 = (flow_hash_config & IP_FLOW_HASH_SRC_ADDR) ? ip->src_address.data_u32 : 0; @@ -90,6 +93,13 @@ ip4_compute_flow_hash (const ip4_header_t * ip, b ^= (flow_hash_config & IP_FLOW_HASH_PROTO) ? ip->protocol : 0; c = (flow_hash_config & IP_FLOW_HASH_REVERSE_SRC_DST) ? (t1 << 16) | t2 : (t2 << 16) | t1; + if (PREDICT_TRUE (is_udp) && + PREDICT_FALSE ((flow_hash_config & IP_FLOW_HASH_GTPV1_TEID) && + udp->dst_port == GTPV1_PORT_BE)) + { + t1 = gtpu->teid; + c ^= t1; + } a ^= ip_flow_hash_router_id; hash_v3_mix32 (a, b, c); diff --git a/src/vnet/ip/ip4_input.c b/src/vnet/ip/ip4_input.c index 436e52ff12c..106d17da3cb 100644 --- a/src/vnet/ip/ip4_input.c +++ b/src/vnet/ip/ip4_input.c @@ -374,7 +374,6 @@ VLIB_NODE_FN (ip4_input_no_checksum_node) (vlib_main_t * vm, return ip4_input_inline (vm, node, frame, /* verify_checksum */ 0); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_input_node) = { .name = "ip4-input", .vector_size = sizeof (u32), @@ -405,7 +404,6 @@ VLIB_REGISTER_NODE (ip4_input_no_checksum_node) = { .format_buffer = format_ip4_header, .format_trace = format_ip4_input_trace, }; -/* *INDENT-ON* */ static clib_error_t * ip4_init (vlib_main_t * vm) diff --git a/src/vnet/ip/ip4_input.h b/src/vnet/ip/ip4_input.h index 57aef0bf77a..d2ed13fa35f 100644 --- a/src/vnet/ip/ip4_input.h +++ b/src/vnet/ip/ip4_input.h @@ -60,15 +60,17 @@ check_ver_opt_csum (ip4_header_t * ip, u8 * error, int verify_checksum) { if (PREDICT_FALSE (ip->ip_version_and_header_length != 0x45)) { - if ((ip->ip_version_and_header_length & 0xf) != 5) + if ((ip->ip_version_and_header_length & 0xf0) != 0x40) + *error = IP4_ERROR_VERSION; + else if ((ip->ip_version_and_header_length & 0x0f) < 5) + *error = IP4_ERROR_HDR_TOO_SHORT; + else { *error = IP4_ERROR_OPTIONS; if (verify_checksum && clib_ip_csum ((u8 *) ip, ip4_header_bytes (ip)) != 0) *error = IP4_ERROR_BAD_CHECKSUM; } - else - *error = IP4_ERROR_VERSION; } else if (PREDICT_FALSE (verify_checksum && clib_ip_csum ((u8 *) ip, sizeof (ip4_header_t)) != diff --git a/src/vnet/ip/ip4_options.c b/src/vnet/ip/ip4_options.c index 6ef6b6030cc..bbe311ffb20 100644 --- a/src/vnet/ip/ip4_options.c +++ b/src/vnet/ip/ip4_options.c @@ -127,7 +127,6 @@ format_ip4_options_trace (u8 * s, va_list * args) return s; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_options_node) = { .name = "ip4-options", .vector_size = sizeof (u32), @@ -140,7 +139,6 @@ VLIB_REGISTER_NODE (ip4_options_node) = { .format_buffer = format_ip4_header, .format_trace = format_ip4_options_trace, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/ip/ip4_packet.h b/src/vnet/ip/ip4_packet.h index 2673558e19e..269049194e6 100644 --- a/src/vnet/ip/ip4_packet.h +++ b/src/vnet/ip/ip4_packet.h @@ -129,19 +129,15 @@ typedef union /* For checksumming we'll want to access IP header in word sized chunks. */ /* For 64 bit machines. */ - /* *INDENT-OFF* */ CLIB_PACKED (struct { u64 checksum_data_64[2]; u32 checksum_data_64_32[1]; }); - /* *INDENT-ON* */ /* For 32 bit machines. */ - /* *INDENT-OFF* */ CLIB_PACKED (struct { u32 checksum_data_32[5]; }); - /* *INDENT-ON* */ } ip4_header_t; /* Value of ip_version_and_header_length for packets w/o options. */ @@ -200,9 +196,7 @@ ip4_next_header (ip4_header_t * i) /* Turn off array bounds check due to ip4_header_t option field operations. */ -/* *INDENT-OFF* */ WARN_OFF(array-bounds) -/* *INDENT-ON* */ static_always_inline u16 ip4_header_checksum_inline (ip4_header_t * i, int with_checksum) @@ -305,9 +299,7 @@ ip4_header_checksum_inline (ip4_header_t * i, int with_checksum) return ~((u16) sum); } -/* *INDENT-OFF* */ WARN_ON(array-bounds) -/* *INDENT-ON* */ always_inline u16 ip4_header_checksum (ip4_header_t * i) diff --git a/src/vnet/ip/ip4_punt_drop.c b/src/vnet/ip/ip4_punt_drop.c index f2985a244aa..b8cc3304437 100644 --- a/src/vnet/ip/ip4_punt_drop.c +++ b/src/vnet/ip/ip4_punt_drop.c @@ -18,7 +18,6 @@ #include <vnet/policer/policer.h> #include <vnet/policer/police_inlines.h> -/* *INDENT-OFF* */ VNET_FEATURE_ARC_INIT (ip4_punt) = { .arc_name = "ip4-punt", @@ -30,7 +29,6 @@ VNET_FEATURE_ARC_INIT (ip4_drop) = .arc_name = "ip4-drop", .start_nodes = VNET_FEATURES ("ip4-drop", "ip4-not-enabled"), }; -/* *INDENT-ON* */ extern ip_punt_policer_t ip4_punt_policer_cfg; @@ -89,7 +87,6 @@ VLIB_NODE_FN (ip4_punt_policer_node) (vlib_main_t * vm, ip4_punt_policer_cfg.policer_index)); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_punt_policer_node) = { .name = "ip4-punt-policer", .vector_size = sizeof (u32), @@ -109,7 +106,6 @@ VNET_FEATURE_INIT (ip4_punt_policer_node) = { .node_name = "ip4-punt-policer", .runs_before = VNET_FEATURES("ip4-punt-redirect"), }; -/* *INDENT-ON* */ #define foreach_ip4_punt_redirect_error \ @@ -138,7 +134,6 @@ VLIB_NODE_FN (ip4_punt_redirect_node) (vlib_main_t * vm, FIB_PROTOCOL_IP4)); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_punt_redirect_node) = { .name = "ip4-punt-redirect", .vector_size = sizeof (u32), @@ -160,7 +155,6 @@ VNET_FEATURE_INIT (ip4_punt_redirect_node, static) = { .node_name = "ip4-punt-redirect", .runs_before = VNET_FEATURES("error-punt"), }; -/* *INDENT-ON* */ VLIB_NODE_FN (ip4_drop_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) @@ -194,7 +188,6 @@ ip4_punt (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) vnet_feat_arc_ip4_punt.feature_arc_index); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_drop_node) = { .name = "ip4-drop", @@ -237,7 +230,6 @@ VNET_FEATURE_INIT (ip4_drop_end_of_arc, static) = { .node_name = "error-drop", .runs_before = 0, /* not before any other features */ }; -/* *INDENT-ON */ #ifndef CLIB_MARCH_VARIANT void @@ -301,14 +293,12 @@ done: * @cliexpar * @cliexcmd{set ip punt policer <INDEX>} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (ip4_punt_policer_command, static) = { .path = "ip punt policer", .function = ip4_punt_police_cmd, .short_help = "ip punt policer [add|del] <index>", }; -/* *INDENT-ON* */ #ifndef CLIB_MARCH_VARIANT @@ -404,14 +394,12 @@ done: * @cliexpar * @cliexcmd{set ip punt policer} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (ip4_punt_redirect_command, static) = { .path = "ip punt redirect", .function = ip4_punt_redirect_cmd, .short_help = "ip punt redirect [add|del] rx [<interface>|all] via [<nh>] <tx_interface>", }; -/* *INDENT-ON* */ static clib_error_t * ip4_punt_redirect_show_cmd (vlib_main_t * vm, @@ -428,7 +416,6 @@ ip4_punt_redirect_show_cmd (vlib_main_t * vm, * @cliexpar * @cliexcmd{set ip punt redierect} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_ip4_punt_redirect_command, static) = { .path = "show ip punt redirect", @@ -436,7 +423,6 @@ VLIB_CLI_COMMAND (show_ip4_punt_redirect_command, static) = .short_help = "show ip punt redirect", .is_mp_safe = 1, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/ip/ip4_source_and_port_range_check.c b/src/vnet/ip/ip4_source_and_port_range_check.c index 2edbeeddf10..27b2d549ea7 100644 --- a/src/vnet/ip/ip4_source_and_port_range_check.c +++ b/src/vnet/ip/ip4_source_and_port_range_check.c @@ -563,7 +563,6 @@ ip4_source_and_port_range_check_tx (vlib_main_t * vm, if this changes can easily make new function */ -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_source_port_and_range_check_rx) = { .function = ip4_source_and_port_range_check_rx, .name = "ip4-source-and-port-range-check-rx", @@ -580,9 +579,7 @@ VLIB_REGISTER_NODE (ip4_source_port_and_range_check_rx) = { .format_buffer = format_ip4_header, .format_trace = format_ip4_source_and_port_range_check_trace, }; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_source_port_and_range_check_tx) = { .function = ip4_source_and_port_range_check_tx, .name = "ip4-source-and-port-range-check-tx", @@ -599,7 +596,6 @@ VLIB_REGISTER_NODE (ip4_source_port_and_range_check_tx) = { .format_buffer = format_ip4_header, .format_trace = format_ip4_source_and_port_range_check_trace, }; -/* *INDENT-ON* */ int set_ip_source_and_port_range_check (vlib_main_t * vm, @@ -797,13 +793,11 @@ set_ip_source_and_port_range_check_fn (vlib_main_t * vm, * @cliexend * @endparblock ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (set_interface_ip_source_and_port_range_check_command, static) = { .path = "set interface ip source-and-port-range-check", .function = set_ip_source_and_port_range_check_fn, .short_help = "set interface ip source-and-port-range-check <interface> [tcp-out-vrf <table-id>] [udp-out-vrf <table-id>] [tcp-in-vrf <table-id>] [udp-in-vrf <table-id>] [del]", }; -/* *INDENT-ON* */ static u8 * format_ppr_dpo (u8 * s, va_list * args) @@ -1264,14 +1258,12 @@ ip_source_and_port_range_check_command_fn (vlib_main_t * vm, * Example of how to delete an IPv4 subnet and range of ports from an IPv4 FIB table: * @cliexcmd{set ip source-and-port-range-check vrf 7 172.16.1.0/24 range 23 - 100 del} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (ip_source_and_port_range_check_command, static) = { .path = "set ip source-and-port-range-check", .function = ip_source_and_port_range_check_command_fn, .short_help = "set ip source-and-port-range-check vrf <table-id> <ip-addr>/<mask> {port nn | range <nn> - <nn>} [del]", }; -/* *INDENT-ON* */ static clib_error_t * @@ -1390,14 +1382,12 @@ show_source_and_port_range_check_fn (vlib_main_t * vm, * 172.16.2.2 port 250 FAIL * @cliexend ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_source_and_port_range_check, static) = { .path = "show ip source-and-port-range-check", .function = show_source_and_port_range_check_fn, .short_help = "show ip source-and-port-range-check vrf <table-id> <ip-addr> [port <n>]", }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/ip/ip4_to_ip6.h b/src/vnet/ip/ip4_to_ip6.h index a6d87f1f962..57c2b6ff78b 100644 --- a/src/vnet/ip/ip4_to_ip6.h +++ b/src/vnet/ip/ip4_to_ip6.h @@ -28,14 +28,12 @@ typedef int (*ip4_to_ip6_set_fn_t) (vlib_buffer_t * b, ip4_header_t * ip4, ip6_header_t * ip6, void *ctx); -/* *INDENT-OFF* */ static u8 icmp_to_icmp6_updater_pointer_table[] = { 0, 1, 4, 4, ~0, ~0, ~0, ~0, 7, 6, ~0, ~0, 8, 8, 8, 8, 24, 24, 24, 24 }; -/* *INDENT-ON* */ #define frag_id_4to6(id) (id) diff --git a/src/vnet/ip/ip6.h b/src/vnet/ip/ip6.h index f33780f1a98..56eec523d5b 100644 --- a/src/vnet/ip/ip6.h +++ b/src/vnet/ip/ip6.h @@ -238,7 +238,6 @@ ip6_interface_address_matching_destination (ip6_main_t * im, ip_interface_address_t *ia; ip6_address_t *result = 0; - /* *INDENT-OFF* */ foreach_ip_interface_address (lm, ia, sw_if_index, 1 /* honor unnumbered */, ({ @@ -249,7 +248,6 @@ ip6_interface_address_matching_destination (ip6_main_t * im, break; } })); - /* *INDENT-ON* */ if (result_ia) *result_ia = result ? ia : 0; return result; diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c index 84ea5a068aa..48fb633fd32 100644 --- a/src/vnet/ip/ip6_forward.c +++ b/src/vnet/ip/ip6_forward.c @@ -71,7 +71,6 @@ ip6_add_interface_prefix_routes (ip6_main_t * im, ip_lookup_main_t *lm = &im->lookup_main; ip_interface_prefix_t *if_prefix; - /* *INDENT-OFF* */ ip_interface_prefix_key_t key = { .prefix = { .fp_len = address_length, @@ -85,7 +84,6 @@ ip6_add_interface_prefix_routes (ip6_main_t * im, }, .sw_if_index = sw_if_index, }; - /* *INDENT-ON* */ /* If prefix already set on interface, just increment ref count & return */ if_prefix = ip_get_interface_prefix (lm, &key); @@ -178,7 +176,6 @@ ip6_del_interface_prefix_routes (ip6_main_t * im, ip_lookup_main_t *lm = &im->lookup_main; ip_interface_prefix_t *if_prefix; - /* *INDENT-OFF* */ ip_interface_prefix_key_t key = { .prefix = { .fp_len = address_length, @@ -192,13 +189,12 @@ ip6_del_interface_prefix_routes (ip6_main_t * im, }, .sw_if_index = sw_if_index, }; - /* *INDENT-ON* */ if_prefix = ip_get_interface_prefix (lm, &key); if (!if_prefix) { clib_warning ("Prefix not found while deleting %U", - format_ip4_address_and_length, address, address_length); + format_ip6_address_and_length, address, address_length); return; } @@ -283,7 +279,6 @@ ip6_interface_first_address (ip6_main_t * im, u32 sw_if_index) ip_interface_address_t *ia = 0; ip6_address_t *result = 0; - /* *INDENT-OFF* */ foreach_ip_interface_address (lm, ia, sw_if_index, 1 /* honor unnumbered */, ({ @@ -291,7 +286,6 @@ ip6_interface_first_address (ip6_main_t * im, u32 sw_if_index) result = a; break; })); - /* *INDENT-ON* */ return result; } @@ -359,7 +353,6 @@ ip6_add_del_interface_address (vlib_main_t * vm, vec_elt (im->fib_index_by_sw_if_index, sw_if_index)); vec_add1 (addr_fib, ip6_af); - /* *INDENT-OFF* */ if (!is_del) { /* When adding an address check that it does not conflict @@ -417,7 +410,6 @@ ip6_add_del_interface_address (vlib_main_t * vm, } } } - /* *INDENT-ON* */ if_address_index = ip_interface_address_find (lm, addr_fib, address_length); @@ -537,7 +529,6 @@ ip6_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags) fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index); - /* *INDENT-OFF* */ foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 0 /* honor unnumbered */, ({ @@ -550,7 +541,6 @@ ip6_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags) ip6_del_interface_routes (sw_if_index, im, fib_index, a, ia->address_length); })); - /* *INDENT-ON* */ return 0; } @@ -558,7 +548,6 @@ ip6_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags) VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip6_sw_interface_admin_up_down); /* Built-in ip6 unicast rx feature path definition */ -/* *INDENT-OFF* */ VNET_FEATURE_ARC_INIT (ip6_unicast, static) = { .arc_name = "ip6-unicast", @@ -683,7 +672,6 @@ VNET_FEATURE_INIT (ip6_interface_output, static) = { .node_name = "interface-output", .runs_before = 0, /* not before any other features */ }; -/* *INDENT-ON* */ static clib_error_t * ip6_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add) @@ -709,13 +697,11 @@ ip6_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add) vlib_main_t *vm = vlib_get_main (); vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0); - /* *INDENT-OFF* */ foreach_ip_interface_address (lm6, ia, sw_if_index, 0, ({ address = ip_interface_address_get_address (lm6, ia); ip6_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1); })); - /* *INDENT-ON* */ ip6_mfib_interface_enable_disable (sw_if_index, 0); if (0 != im6->fib_index_by_sw_if_index[sw_if_index]) @@ -748,7 +734,6 @@ VLIB_NODE_FN (ip6_lookup_node) (vlib_main_t * vm, static u8 *format_ip6_lookup_trace (u8 * s, va_list * args); -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip6_lookup_node) = { .name = "ip6-lookup", @@ -757,7 +742,6 @@ VLIB_REGISTER_NODE (ip6_lookup_node) = .n_next_nodes = IP6_LOOKUP_N_NEXT, .next_nodes = IP6_LOOKUP_NEXT_NODES, }; -/* *INDENT-ON* */ VLIB_NODE_FN (ip6_load_balance_node) (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -935,7 +919,6 @@ VLIB_NODE_FN (ip6_load_balance_node) (vlib_main_t * vm, return frame->n_vectors; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip6_load_balance_node) = { .name = "ip6-load-balance", @@ -943,7 +926,6 @@ VLIB_REGISTER_NODE (ip6_load_balance_node) = .sibling_of = "ip6-lookup", .format_trace = format_ip6_lookup_trace, }; -/* *INDENT-ON* */ typedef struct { @@ -966,8 +948,7 @@ format_ip6_forward_next_trace (u8 * s, va_list * args) ip6_forward_next_trace_t *t = va_arg (*args, ip6_forward_next_trace_t *); u32 indent = format_get_indent (s); - s = format (s, "%Ufib:%d adj:%d flow:%d", - format_white_space, indent, + s = format (s, "%Ufib:%d adj:%d flow:0x%08x", format_white_space, indent, t->fib_index, t->adj_index, t->flow_hash); s = format (s, "\n%U%U", format_white_space, indent, @@ -1234,12 +1215,10 @@ ip6_next_proto_is_tcp_udp (vlib_buffer_t * p0, ip6_header_t * ip0, return 0; } -/* *INDENT-OFF* */ VNET_FEATURE_ARC_INIT (ip6_local) = { .arc_name = "ip6-local", .start_nodes = VNET_FEATURES ("ip6-local", "ip6-receive"), }; -/* *INDENT-ON* */ static_always_inline u8 ip6_tcp_udp_icmp_bad_length (vlib_main_t * vm, vlib_buffer_t * p0) @@ -1991,13 +1970,6 @@ ip6_rewrite_inline_with_gso (vlib_main_t * vm, if (is_midchain) { - /* before we paint on the next header, update the L4 - * checksums if required, since there's no offload on a tunnel */ - vnet_calc_checksums_inline (vm, p0, 0 /* is_ip4 */ , - 1 /* is_ip6 */ ); - vnet_calc_checksums_inline (vm, p1, 0 /* is_ip4 */ , - 1 /* is_ip6 */ ); - /* Guess we are only writing on ipv6 header. */ vnet_rewrite_two_headers (adj0[0], adj1[0], ip0, ip1, sizeof (ip6_header_t)); @@ -2091,9 +2063,6 @@ ip6_rewrite_inline_with_gso (vlib_main_t * vm, if (is_midchain) { - vnet_calc_checksums_inline (vm, p0, 0 /* is_ip4 */ , - 1 /* is_ip6 */ ); - /* Guess we are only writing on ip6 header. */ vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip6_header_t)); } @@ -2243,7 +2212,6 @@ VLIB_NODE_FN (ip6_mcast_midchain_node) (vlib_main_t * vm, return ip6_rewrite_inline (vm, node, frame, 0, 1, 1); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip6_midchain_node) = { .name = "ip6-midchain", .vector_size = sizeof (u32), @@ -2256,8 +2224,6 @@ VLIB_REGISTER_NODE (ip6_rewrite_node) = .name = "ip6-rewrite", .vector_size = sizeof (u32), .format_trace = format_ip6_rewrite_trace, - .n_errors = IP6_N_ERROR, - .error_counters = ip6_error_counters, .n_next_nodes = IP6_REWRITE_N_NEXT, .next_nodes = { @@ -2292,7 +2258,6 @@ VLIB_REGISTER_NODE (ip6_mcast_midchain_node) = .sibling_of = "ip6-rewrite", }; -/* *INDENT-ON* */ /* * Hop-by-Hop handling @@ -2306,7 +2271,6 @@ _(PROCESSED, "pkts with ip6 hop-by-hop options") \ _(FORMAT, "incorrectly formatted hop-by-hop options") \ _(UNKNOWN_OPTION, "unknown ip6 hop-by-hop options") -/* *INDENT-OFF* */ typedef enum { #define _(sym,str) IP6_HOP_BY_HOP_ERROR_##sym, @@ -2314,7 +2278,6 @@ typedef enum #undef _ IP6_HOP_BY_HOP_N_ERROR, } ip6_hop_by_hop_error_t; -/* *INDENT-ON* */ /* * Primary h-b-h handler trace support @@ -2741,7 +2704,6 @@ VLIB_NODE_FN (ip6_hop_by_hop_node) (vlib_main_t * vm, return frame->n_vectors; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip6_hop_by_hop_node) = { .name = "ip6-hop-by-hop", @@ -2753,7 +2715,6 @@ VLIB_REGISTER_NODE (ip6_hop_by_hop_node) = .error_strings = ip6_hop_by_hop_error_strings, .n_next_nodes = 0, }; -/* *INDENT-ON* */ static clib_error_t * ip6_hop_by_hop_init (vlib_main_t * vm) @@ -3005,14 +2966,12 @@ set_ip6_flow_hash_command_fn (vlib_main_t * vm, * @cliexend * @endparblock ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (set_ip6_flow_hash_command, static) = { .path = "set ip6 flow-hash", .short_help = "set ip6 flow-hash table <table-id> [src] [dst] [sport] " "[dport] [proto] [reverse] [flowlabel]", .function = set_ip6_flow_hash_command_fn, }; -/* *INDENT-ON* */ static clib_error_t * show_ip6_local_command_fn (vlib_main_t * vm, @@ -3053,14 +3012,12 @@ show_ip6_local_command_fn (vlib_main_t * vm, * 115 * @cliexend ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_ip6_local, static) = { .path = "show ip6 local", .function = show_ip6_local_command_fn, .short_help = "show ip6 local", }; -/* *INDENT-ON* */ #ifndef CLIB_MARCH_VARIANT int @@ -3172,7 +3129,6 @@ set_ip6_classify_command_fn (vlib_main_t * vm, * Example of how to assign a classification table to an interface: * @cliexcmd{set ip6 classify intfc GigabitEthernet2/0/0 table-index 1} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (set_ip6_classify_command, static) = { .path = "set ip6 classify", @@ -3180,7 +3136,6 @@ VLIB_CLI_COMMAND (set_ip6_classify_command, static) = "set ip6 classify intfc <interface> table-index <classify-idx>", .function = set_ip6_classify_command_fn, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/ip/ip6_hop_by_hop.c b/src/vnet/ip/ip6_hop_by_hop.c index e66084c2c4d..412741abcf8 100644 --- a/src/vnet/ip/ip6_hop_by_hop.c +++ b/src/vnet/ip/ip6_hop_by_hop.c @@ -438,8 +438,7 @@ VLIB_NODE_FN (ip6_add_hop_by_hop_node) (vlib_main_t * vm, return frame->n_vectors; } -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (ip6_add_hop_by_hop_node) = /* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ip6_add_hop_by_hop_node) = { .name = "ip6-add-hop-by-hop", .vector_size = sizeof (u32), @@ -455,7 +454,6 @@ VLIB_REGISTER_NODE (ip6_add_hop_by_hop_node) = /* *INDENT-OFF* */ #undef _ }, }; -/* *INDENT-ON* */ /* The main h-b-h tracer was already invoked, no need to do much here */ typedef struct @@ -778,7 +776,6 @@ VLIB_NODE_FN (ip6_pop_hop_by_hop_node) (vlib_main_t * vm, return frame->n_vectors; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip6_pop_hop_by_hop_node) = { .name = "ip6-pop-hop-by-hop", @@ -791,7 +788,6 @@ VLIB_REGISTER_NODE (ip6_pop_hop_by_hop_node) = /* See ip/lookup.h */ .n_next_nodes = 0, }; -/* *INDENT-ON* */ typedef struct { @@ -1006,7 +1002,6 @@ VLIB_NODE_FN (ip6_local_hop_by_hop_node) (vlib_main_t * vm, } #ifndef CLIB_MARCH_VARIANT -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip6_local_hop_by_hop_node) = { .name = "ip6-local-hop-by-hop", @@ -1025,7 +1020,6 @@ VLIB_REGISTER_NODE (ip6_local_hop_by_hop_node) = [IP6_LOCAL_HOP_BY_HOP_NEXT_DROP] = "error-drop", }, }; -/* *INDENT-ON* */ clib_error_t * show_ip6_hbh_command_fn (vlib_main_t * vm, @@ -1059,13 +1053,11 @@ show_ip6_hbh_command_fn (vlib_main_t * vm, * Display ip6 local hop-by-hop next protocol handler nodes * @cliexcmd{show ip6 hbh} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_ip6_hbh, static) = { .path = "show ip6 hbh", .short_help = "show ip6 hbh", .function = show_ip6_hbh_command_fn, }; -/* *INDENT-ON* */ #endif /* CLIB_MARCH_VARIANT */ @@ -1105,12 +1097,10 @@ ip6_hop_by_hop_ioam_init (vlib_main_t * vm) return (0); } -/* *INDENT-OFF* */ VLIB_INIT_FUNCTION (ip6_hop_by_hop_ioam_init) = { .runs_after = VLIB_INITS("ip_main_init", "ip6_lookup_init"), }; -/* *INDENT-ON* */ void ip6_local_hop_by_hop_register_protocol (u32 protocol, u32 node_index) @@ -1264,13 +1254,11 @@ clear_ioam_rewrite_command_fn (vlib_main_t * vm, * Example of how to clear iOAM features: * @cliexcmd{clear ioam rewrite} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (ip6_clear_ioam_rewrite_cmd, static) = { .path = "clear ioam rewrite", .short_help = "clear ioam rewrite", .function = clear_ioam_rewrite_command_fn, }; -/* *INDENT-ON* */ clib_error_t * ip6_ioam_enable (int has_trace_option, int has_pot_option, @@ -1371,13 +1359,11 @@ ip6_set_ioam_rewrite_command_fn (vlib_main_t * vm, * Example of how to enable trace and pot with ppc set to encap: * @cliexcmd{set ioam rewrite trace pot ppc encap} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (ip6_set_ioam_rewrite_cmd, static) = { .path = "set ioam rewrite", .short_help = "set ioam [trace] [pot] [seqno] [analyse]", .function = ip6_set_ioam_rewrite_command_fn, }; -/* *INDENT-ON* */ static clib_error_t * ip6_show_ioam_summary_cmd_fn (vlib_main_t * vm, @@ -1455,13 +1441,11 @@ ip6_show_ioam_summary_cmd_fn (vlib_main_t * vm, * EDGE TO EDGE - PPC OPTION - 1 (Encap) * @cliexend ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (ip6_show_ioam_run_cmd, static) = { .path = "show ioam summary", .short_help = "show ioam summary", .function = ip6_show_ioam_summary_cmd_fn, }; -/* *INDENT-ON* */ void vnet_register_ioam_end_of_path_callback (void *cb) diff --git a/src/vnet/ip/ip6_inlines.h b/src/vnet/ip/ip6_inlines.h index 9c2be60b267..9bd475224eb 100644 --- a/src/vnet/ip/ip6_inlines.h +++ b/src/vnet/ip/ip6_inlines.h @@ -49,29 +49,40 @@ always_inline u32 ip6_compute_flow_hash (const ip6_header_t * ip, flow_hash_config_t flow_hash_config) { - tcp_header_t *tcp; + const tcp_header_t *tcp; + const udp_header_t *udp = (void *) (ip + 1); + const gtpv1u_header_t *gtpu = (void *) (udp + 1); u64 a, b, c; u64 t1, t2; + u32 t3; uword is_tcp_udp = 0; u8 protocol = ip->protocol; + uword is_udp = protocol == IP_PROTOCOL_UDP; - if (PREDICT_TRUE - ((ip->protocol == IP_PROTOCOL_TCP) - || (ip->protocol == IP_PROTOCOL_UDP))) + if (PREDICT_TRUE ((protocol == IP_PROTOCOL_TCP) || is_udp)) { is_tcp_udp = 1; tcp = (void *) (ip + 1); } - else if (ip->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) + else { - ip6_hop_by_hop_header_t *hbh = (ip6_hop_by_hop_header_t *) (ip + 1); - if ((hbh->protocol == IP_PROTOCOL_TCP) || - (hbh->protocol == IP_PROTOCOL_UDP)) + const void *cur = ip + 1; + if (protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) + { + const ip6_hop_by_hop_header_t *hbh = cur; + protocol = hbh->protocol; + cur += (hbh->length + 1) * 8; + } + if (protocol == IP_PROTOCOL_IPV6_FRAGMENTATION) + { + const ip6_fragment_ext_header_t *frag = cur; + protocol = frag->protocol; + } + else if (protocol == IP_PROTOCOL_TCP || protocol == IP_PROTOCOL_UDP) { is_tcp_udp = 1; - tcp = (tcp_header_t *) ((u8 *) hbh + ((hbh->length + 1) << 3)); + tcp = cur; } - protocol = hbh->protocol; } t1 = (ip->src_address.as_u64[0] ^ ip->src_address.as_u64[1]); @@ -113,7 +124,13 @@ ip6_compute_flow_hash (const ip6_header_t * ip, ((flow_hash_config & IP_FLOW_HASH_FL) ? ip6_flow_label_network_order (ip) : 0); c ^= t1; - + if (PREDICT_TRUE (is_udp) && + PREDICT_FALSE ((flow_hash_config & IP_FLOW_HASH_GTPV1_TEID) && + udp->dst_port == GTPV1_PORT_BE)) + { + t3 = gtpu->teid; + a ^= t3; + } hash_mix64 (a, b, c); return (u32) c; } diff --git a/src/vnet/ip/ip6_input.c b/src/vnet/ip/ip6_input.c index 8d89890f999..64c9d76ebaa 100644 --- a/src/vnet/ip/ip6_input.c +++ b/src/vnet/ip/ip6_input.c @@ -219,7 +219,6 @@ VLIB_NODE_FN (ip6_input_node) (vlib_main_t * vm, vlib_node_runtime_t * node, return frame->n_vectors; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip6_input_node) = { .name = "ip6-input", .vector_size = sizeof (u32), @@ -238,7 +237,6 @@ VLIB_REGISTER_NODE (ip6_input_node) = { .format_buffer = format_ip6_header, .format_trace = format_ip6_input_trace, }; -/* *INDENT-ON* */ static clib_error_t * ip6_init (vlib_main_t * vm) diff --git a/src/vnet/ip/ip6_link.c b/src/vnet/ip/ip6_link.c index afa9d8e3ea9..c2a7ccacbc1 100644 --- a/src/vnet/ip/ip6_link.c +++ b/src/vnet/ip/ip6_link.c @@ -242,12 +242,10 @@ ip6_link_delegate_flush (ip6_link_t * il) { ip6_link_delegate_t *ild; - /* *INDENT-OFF* */ FOREACH_IP6_LINK_DELEGATE (ild, il, ({ il_delegate_vfts[ild->ild_type].ildv_disable(ild->ild_index); })); - /* *INDENT-ON* */ vec_free (il->il_delegates); il->il_delegates = NULL; @@ -357,14 +355,12 @@ ip6_link_set_local_address (u32 sw_if_index, const ip6_address_t * address) ip6_address_copy (&ilp.ilp_addr, address); ip6_ll_table_entry_update (&ilp, FIB_ROUTE_PATH_LOCAL); - /* *INDENT-OFF* */ FOREACH_IP6_LINK_DELEGATE (ild, il, ({ if (NULL != il_delegate_vfts[ild->ild_type].ildv_ll_change) il_delegate_vfts[ild->ild_type].ildv_ll_change(ild->ild_index, &il->il_ll_addr); })); - /* *INDENT-ON* */ return (0); } @@ -465,7 +461,6 @@ ip6_link_add_del_address (ip6_main_t * im, if (NULL == il) return; - /* *INDENT-OFF* */ FOREACH_IP6_LINK_DELEGATE (ild, il, ({ if (is_delete) @@ -481,7 +476,6 @@ ip6_link_add_del_address (ip6_main_t * im, address, address_length); } })); - /* *INDENT-ON* */ } static clib_error_t * @@ -555,14 +549,12 @@ test_ip6_link_command_fn (vlib_main_t * vm, * Original MAC address: 16:d9:e0:91:79:86 * @cliexend ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (test_link_command, static) = { .path = "test ip6 link", .function = test_ip6_link_command_fn, .short_help = "test ip6 link <mac-address>", }; -/* *INDENT-ON* */ static u8 * ip6_print_addrs (u8 * s, u32 * addrs) @@ -594,11 +586,10 @@ format_ip6_link (u8 * s, va_list * arg) if (!ip6_link_is_enabled_i (il)) return (s); - s = format (s, "%U is admin %s\n", - format_vnet_sw_interface_name, vnm, - vnet_get_sw_interface (vnm, il->il_sw_if_index), - (vnet_sw_interface_is_admin_up (vnm, il->il_sw_if_index) ? - "up" : "down")); + s = format ( + s, "%U is admin %s\n", format_vnet_sw_if_index_name, vnm, + il->il_sw_if_index, + (vnet_sw_interface_is_admin_up (vnm, il->il_sw_if_index) ? "up" : "down")); u32 ai; u32 *link_scope = 0, *global_scope = 0; @@ -660,13 +651,11 @@ format_ip6_link (u8 * s, va_list * arg) s = format (s, "%U%U\n", format_white_space, 4, format_ip6_address, &il->il_ll_addr); - /* *INDENT-OFF* */ FOREACH_IP6_LINK_DELEGATE(ild, il, ({ s = format (s, "%U", il_delegate_vfts[ild->ild_type].ildv_format, ild->ild_index, 2); })); - /* *INDENT-ON* */ return (s); } @@ -739,14 +728,12 @@ ip6_link_show (vlib_main_t * vm, * show ip6 interface: IPv6 not enabled on interface * @cliexend ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (ip6_link_show_command, static) = { .path = "show ip6 interface", .function = ip6_link_show, .short_help = "show ip6 interface <interface>", }; -/* *INDENT-ON* */ static clib_error_t * enable_ip6_interface_cmd (vlib_main_t * vm, @@ -779,14 +766,12 @@ enable_ip6_interface_cmd (vlib_main_t * vm, * Example of how enable IPv6 on a given interface: * @cliexcmd{enable ip6 interface GigabitEthernet2/0/0} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (enable_ip6_interface_command, static) = { .path = "enable ip6 interface", .function = enable_ip6_interface_cmd, .short_help = "enable ip6 interface <interface>", }; -/* *INDENT-ON* */ static clib_error_t * disable_ip6_interface_cmd (vlib_main_t * vm, @@ -819,14 +804,12 @@ disable_ip6_interface_cmd (vlib_main_t * vm, * Example of how disable IPv6 on a given interface: * @cliexcmd{disable ip6 interface GigabitEthernet2/0/0} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (disable_ip6_interface_command, static) = { .path = "disable ip6 interface", .function = disable_ip6_interface_cmd, .short_help = "disable ip6 interface <interface>", }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/ip/ip6_ll_table.c b/src/vnet/ip/ip6_ll_table.c index b3f42daf26c..f9172f6c50c 100644 --- a/src/vnet/ip/ip6_ll_table.c +++ b/src/vnet/ip/ip6_ll_table.c @@ -52,9 +52,8 @@ ip6_ll_fib_create (u32 sw_if_index) vnet_main_t *vnm = vnet_get_main (); u8 *desc; - desc = format (NULL, "IP6-link-local:%U", - format_vnet_sw_interface_name, - vnm, vnet_get_sw_interface (vnm, sw_if_index)); + desc = format (NULL, "IP6-link-local:%U", format_vnet_sw_if_index_name, vnm, + sw_if_index); ip6_ll_table.ilt_fibs[sw_if_index] = ip6_fib_table_create_and_lock (FIB_SOURCE_IP6_ND, @@ -64,7 +63,6 @@ ip6_ll_fib_create (u32 sw_if_index) * leave the default route as a drop, but fix fe::/10 to be a glean * via the interface. */ - /* *INDENT-OFF* */ fib_prefix_t pfx = { .fp_proto = FIB_PROTOCOL_IP6, .fp_len = 10, @@ -90,7 +88,6 @@ ip6_ll_fib_create (u32 sw_if_index) 1, NULL, FIB_ROUTE_PATH_FLAG_NONE); - /* *INDENT-ON* */ } static void @@ -111,8 +108,13 @@ ip6_ll_table_entry_update (const ip6_ll_prefix_t * ilp, .frp_flags = flags, .frp_sw_if_index = ilp->ilp_sw_if_index, .frp_proto = DPO_PROTO_IP6, + .frp_fib_index = ~0, + .frp_weight = 1, }; - fib_prefix_t fp; + fib_prefix_t fp = { 0 }; + + if (flags & FIB_ROUTE_PATH_LOCAL) + rpath.frp_addr.ip6 = ilp->ilp_addr; vec_validate_init_empty (ip6_ll_table.ilt_fibs, ilp->ilp_sw_if_index, ~0); @@ -345,13 +347,11 @@ ip6_ll_show_fib (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (ip6_show_fib_command, static) = { .path = "show ip6-ll", .short_help = "show ip6-ll [summary] [interface] [<ip6-addr>[/<width>]] [detail]", .function = ip6_ll_show_fib, }; -/* *INDENT-ON* */ static clib_error_t * ip6_ll_sw_interface_add_del (vnet_main_t *vnm, u32 sw_if_index, u32 is_add) diff --git a/src/vnet/ip/ip6_ll_types.c b/src/vnet/ip/ip6_ll_types.c index a7ac164b05a..b074b6e991c 100644 --- a/src/vnet/ip/ip6_ll_types.c +++ b/src/vnet/ip/ip6_ll_types.c @@ -23,10 +23,8 @@ format_ip6_ll_prefix (u8 * s, va_list * args) ip6_ll_prefix_t *ilp = va_arg (*args, ip6_ll_prefix_t *); vnet_main_t *vnm = vnet_get_main (); - s = format (s, "(%U, %U)", - format_ip6_address, &ilp->ilp_addr, - format_vnet_sw_interface_name, - vnm, vnet_get_sw_interface (vnm, ilp->ilp_sw_if_index)); + s = format (s, "(%U, %U)", format_ip6_address, &ilp->ilp_addr, + format_vnet_sw_if_index_name, vnm, ilp->ilp_sw_if_index); return (s); } diff --git a/src/vnet/ip/ip6_packet.h b/src/vnet/ip/ip6_packet.h index e71604ce7d3..c506792ddcf 100644 --- a/src/vnet/ip/ip6_packet.h +++ b/src/vnet/ip/ip6_packet.h @@ -441,6 +441,13 @@ typedef CLIB_PACKED (struct { }) ip6_router_alert_option_t; typedef CLIB_PACKED (struct { + u8 protocol; + u8 reserved; + u16 fragoff; + u32 id; +}) ip6_fragment_ext_header_t; + +typedef CLIB_PACKED (struct { u8 next_hdr; /* Length of this header plus option data in 8 byte units. */ u8 n_data_u64s; diff --git a/src/vnet/ip/ip6_punt_drop.c b/src/vnet/ip/ip6_punt_drop.c index 32a2ab760ff..78ca9521f53 100644 --- a/src/vnet/ip/ip6_punt_drop.c +++ b/src/vnet/ip/ip6_punt_drop.c @@ -18,7 +18,6 @@ #include <vnet/policer/policer.h> #include <vnet/policer/police_inlines.h> -/* *INDENT-OFF* */ VNET_FEATURE_ARC_INIT (ip6_punt) = { .arc_name = "ip6-punt", @@ -30,7 +29,6 @@ VNET_FEATURE_ARC_INIT (ip6_drop) = .arc_name = "ip6-drop", .start_nodes = VNET_FEATURES ("ip6-drop", "ip6-not-enabled"), }; -/* *INDENT-ON* */ extern ip_punt_policer_t ip6_punt_policer_cfg; @@ -77,7 +75,6 @@ VLIB_NODE_FN (ip6_punt_policer_node) (vlib_main_t * vm, } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip6_punt_policer_node) = { .name = "ip6-punt-policer", @@ -99,7 +96,6 @@ VNET_FEATURE_INIT (ip6_punt_policer_node, static) = { .node_name = "ip6-punt-policer", .runs_before = VNET_FEATURES("ip6-punt-redirect") }; -/* *INDENT-ON* */ VLIB_NODE_FN (ip6_drop_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) @@ -134,7 +130,6 @@ VLIB_NODE_FN (ip6_punt_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vnet_feat_arc_ip6_punt.feature_arc_index); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip6_drop_node) = { .name = "ip6-drop", @@ -175,7 +170,6 @@ VNET_FEATURE_INIT (ip6_drop_end_of_arc, static) = { .node_name = "error-drop", .runs_before = 0, /* not before any other features */ }; -/* *INDENT-ON */ #ifndef CLIB_MARCH_VARIANT void @@ -239,7 +233,6 @@ done: * @cliexpar * @cliexcmd{set ip punt policer <INDEX>} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (ip6_punt_policer_command, static) = { .path = "ip6 punt policer", @@ -247,7 +240,6 @@ VLIB_CLI_COMMAND (ip6_punt_policer_command, static) = .short_help = "ip6 punt policer [add|del] <index>", }; -/* *INDENT-ON* */ #define foreach_ip6_punt_redirect_error \ _(DROP, "ip6 punt redirect drop") @@ -275,7 +267,6 @@ VLIB_NODE_FN (ip6_punt_redirect_node) (vlib_main_t * vm, FIB_PROTOCOL_IP6)); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip6_punt_redirect_node) = { .name = "ip6-punt-redirect", .vector_size = sizeof (u32), @@ -297,7 +288,6 @@ VNET_FEATURE_INIT (ip6_punt_redirect_node, static) = { .node_name = "ip6-punt-redirect", .runs_before = VNET_FEATURES("error-punt") }; -/* *INDENT-ON* */ #ifndef CLIB_MARCH_VARIANT @@ -393,14 +383,12 @@ done: * @cliexpar * @cliexcmd{set ip punt policer <INDEX>} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (ip6_punt_redirect_command, static) = { .path = "ip6 punt redirect", .function = ip6_punt_redirect_cmd, .short_help = "ip6 punt redirect [add|del] rx [<interface>|all] via [<nh>] <tx_interface>", }; -/* *INDENT-ON* */ #ifndef CLIB_MARCH_VARIANT @@ -421,7 +409,6 @@ ip6_punt_redirect_show_cmd (vlib_main_t * vm, * @cliexpar * @cliexcmd{set ip punt policer <INDEX>} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_ip6_punt_redirect_command, static) = { .path = "show ip6 punt redirect", @@ -429,7 +416,6 @@ VLIB_CLI_COMMAND (show_ip6_punt_redirect_command, static) = .short_help = "show ip6 punt redirect", .is_mp_safe = 1, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/ip/ip6_to_ip4.h b/src/vnet/ip/ip6_to_ip4.h index b1b5bdb2d11..29d5718d4da 100644 --- a/src/vnet/ip/ip6_to_ip4.h +++ b/src/vnet/ip/ip6_to_ip4.h @@ -31,7 +31,6 @@ typedef int (*ip6_to_ip4_tcp_udp_set_fn_t) (vlib_buffer_t * b, ip6_header_t * ip6, ip4_header_t * ip4, void *ctx); -/* *INDENT-OFF* */ static u8 icmp6_to_icmp_updater_pointer_table[] = { 0, 1, ~0, ~0, 2, 2, 9, 8, @@ -44,7 +43,6 @@ static u8 icmp6_to_icmp_updater_pointer_table[] = 24, 24, 24, 24, 24, 24, 24, 24 }; -/* *INDENT-ON* */ #define frag_id_6to4(id) ((id) ^ ((id) >> 16)) diff --git a/src/vnet/ip/ip_api.c b/src/vnet/ip/ip_api.c index e03b0103391..644b4988abc 100644 --- a/src/vnet/ip/ip_api.c +++ b/src/vnet/ip/ip_api.c @@ -106,7 +106,6 @@ vl_api_ip_table_dump_t_handler (vl_api_ip_table_dump_t * mp) if (!reg) return; - /* *INDENT-OFF* */ pool_foreach (fib_table, ip4_main.fibs) { send_ip_table_details(am, reg, mp->context, fib_table); @@ -118,7 +117,6 @@ vl_api_ip_table_dump_t_handler (vl_api_ip_table_dump_t * mp) continue; send_ip_table_details(am, reg, mp->context, fib_table); } - /* *INDENT-ON* */ } typedef struct vl_api_ip_fib_dump_walk_ctx_t_ @@ -326,7 +324,6 @@ vl_api_ip_mtable_dump_t_handler (vl_api_ip_mtable_dump_t * mp) if (!reg) return; - /* *INDENT-OFF* */ pool_foreach (mfib_table, ip4_main.mfibs) { send_ip_mtable_details (reg, mp->context, mfib_table); @@ -335,7 +332,6 @@ vl_api_ip_mtable_dump_t_handler (vl_api_ip_mtable_dump_t * mp) { send_ip_mtable_details (reg, mp->context, mfib_table); } - /* *INDENT-ON* */ } typedef struct vl_api_ip_mfib_dump_ctx_t_ @@ -782,12 +778,10 @@ vl_api_ip_route_add_del_t_handler (vl_api_ip_route_add_del_t * mp) rv = ip_route_add_del_t_handler (mp, &stats_index); - /* *INDENT-OFF* */ REPLY_MACRO2 (VL_API_IP_ROUTE_ADD_DEL_REPLY, ({ rmp->stats_index = htonl (stats_index); })) - /* *INDENT-ON* */ } void @@ -839,7 +833,6 @@ vl_api_ip_route_lookup_t_handler (vl_api_ip_route_lookup_t * mp) } } - /* *INDENT-OFF* */ REPLY_MACRO3_ZERO(VL_API_IP_ROUTE_LOOKUP_REPLY, npaths * sizeof (*fp), ({ @@ -859,7 +852,6 @@ vl_api_ip_route_lookup_t_handler (vl_api_ip_route_lookup_t * mp) } } })); - /* *INDENT-ON* */ vec_free (rpaths); } @@ -1049,12 +1041,10 @@ vl_api_ip_mroute_add_del_t_handler (vl_api_ip_mroute_add_del_t * mp) rv = api_mroute_add_del_t_handler (mp, &stats_index); - /* *INDENT-OFF* */ REPLY_MACRO2 (VL_API_IP_MROUTE_ADD_DEL_REPLY, ({ rmp->stats_index = htonl (stats_index); })); - /* *INDENT-ON* */ } static void @@ -1117,7 +1107,6 @@ vl_api_ip_address_dump_t_handler (vl_api_ip_address_dump_t * mp) if (mp->is_ipv6) { - /* *INDENT-OFF* */ /* Do not send subnet details of the IP-interface for * unnumbered interfaces. otherwise listening clients * will be confused that the subnet is applied on more @@ -1131,11 +1120,9 @@ vl_api_ip_address_dump_t_handler (vl_api_ip_address_dump_t * mp) }; send_ip_address_details(am, reg, &pfx, sw_if_index, mp->context); })); - /* *INDENT-ON* */ } else { - /* *INDENT-OFF* */ foreach_ip_interface_address (lm4, ia, sw_if_index, 0, ({ fib_prefix_t pfx = { @@ -1146,7 +1133,6 @@ vl_api_ip_address_dump_t_handler (vl_api_ip_address_dump_t * mp) send_ip_address_details(am, reg, &pfx, sw_if_index, mp->context); })); - /* *INDENT-ON* */ } BAD_SW_IF_INDEX_LABEL; @@ -1203,7 +1189,6 @@ vl_api_ip_unnumbered_dump_t_handler (vl_api_ip_unnumbered_dump_t * mp) } else { - /* *INDENT-OFF* */ pool_foreach (si, im->sw_interfaces) { if ((si->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED)) @@ -1214,7 +1199,6 @@ vl_api_ip_unnumbered_dump_t_handler (vl_api_ip_unnumbered_dump_t * mp) mp->context); } } - /* *INDENT-ON* */ } BAD_SW_IF_INDEX_LABEL; @@ -1238,12 +1222,10 @@ vl_api_ip_dump_t_handler (vl_api_ip_dump_t * mp) /* Gather interfaces. */ sorted_sis = vec_new (vnet_sw_interface_t, pool_elts (im->sw_interfaces)); vec_set_len (sorted_sis, 0); - /* *INDENT-OFF* */ pool_foreach (si, im->sw_interfaces) { vec_add1 (sorted_sis, si[0]); } - /* *INDENT-ON* */ vec_foreach (si, sorted_sis) { @@ -1298,6 +1280,22 @@ vl_api_set_ip_flow_hash_v2_t_handler (vl_api_set_ip_flow_hash_v2_t *mp) } static void +vl_api_set_ip_flow_hash_v3_t_handler (vl_api_set_ip_flow_hash_v3_t *mp) +{ + vl_api_set_ip_flow_hash_v3_reply_t *rmp; + ip_address_family_t af; + int rv; + + rv = ip_address_family_decode (mp->af, &af); + + if (!rv) + rv = ip_flow_hash_set (af, htonl (mp->table_id), + htonl (mp->flow_hash_config)); + + REPLY_MACRO (VL_API_SET_IP_FLOW_HASH_V3_REPLY); +} + +static void vl_api_set_ip_flow_hash_router_id_t_handler ( vl_api_set_ip_flow_hash_router_id_t *mp) { @@ -1707,7 +1705,6 @@ vl_api_ip_table_flush_t_handler (vl_api_ip_table_flush_t * mp) vnet_sw_interface_t *si; /* Shut down interfaces in this FIB / clean out intfc routes */ - /* *INDENT-OFF* */ pool_foreach (si, im->sw_interfaces) { if (fib_index == fib_table_get_index_for_sw_if_index (fproto, @@ -1718,7 +1715,6 @@ vl_api_ip_table_flush_t_handler (vl_api_ip_table_flush_t * mp) vnet_sw_interface_set_flags (vnm, si->sw_if_index, flags); } } - /* *INDENT-ON* */ fib_table_flush (fib_index, fproto, FIB_SOURCE_API); mfib_table_flush (mfib_table_find (fproto, ntohl (mp->table.table_id)), @@ -2117,17 +2113,21 @@ ip_api_hookup (vlib_main_t * vm) api_main_t *am = vlibapi_get_main (); /* - * Mark the route add/del API as MP safe + * Set up the (msg_name, crc, message-id) table */ - vl_api_set_msg_thread_safe (am, VL_API_IP_ROUTE_ADD_DEL, 1); - vl_api_set_msg_thread_safe (am, VL_API_IP_ROUTE_ADD_DEL_REPLY, 1); - vl_api_set_msg_thread_safe (am, VL_API_IP_ROUTE_ADD_DEL_V2, 1); - vl_api_set_msg_thread_safe (am, VL_API_IP_ROUTE_ADD_DEL_V2_REPLY, 1); + REPLY_MSG_ID_BASE = setup_message_id_table (); /* - * Set up the (msg_name, crc, message-id) table + * Mark the route add/del API as MP safe */ - REPLY_MSG_ID_BASE = setup_message_id_table (); + vl_api_set_msg_thread_safe (am, REPLY_MSG_ID_BASE + VL_API_IP_ROUTE_ADD_DEL, + 1); + vl_api_set_msg_thread_safe ( + am, REPLY_MSG_ID_BASE + VL_API_IP_ROUTE_ADD_DEL_REPLY, 1); + vl_api_set_msg_thread_safe ( + am, REPLY_MSG_ID_BASE + VL_API_IP_ROUTE_ADD_DEL_V2, 1); + vl_api_set_msg_thread_safe ( + am, REPLY_MSG_ID_BASE + VL_API_IP_ROUTE_ADD_DEL_V2_REPLY, 1); return 0; } diff --git a/src/vnet/ip/ip_checksum.c b/src/vnet/ip/ip_checksum.c index 1ac7248ea05..4fbf1fb74fa 100644 --- a/src/vnet/ip/ip_checksum.c +++ b/src/vnet/ip/ip_checksum.c @@ -165,14 +165,12 @@ test_ip_checksum_fn (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (test_checksum, static) = { .path = "test ip checksum", .short_help = "test ip checksum", .function = test_ip_checksum_fn, }; -/* *INDENT-ON* */ #endif /* CLIB_DEBUG */ diff --git a/src/vnet/ip/ip_container_proxy.c b/src/vnet/ip/ip_container_proxy.c index 18d07ba6082..1618704e804 100644 --- a/src/vnet/ip/ip_container_proxy.c +++ b/src/vnet/ip/ip_container_proxy.c @@ -138,7 +138,6 @@ ip_container_proxy_walk (ip_container_proxy_cb_t cb, void *ctx) }; u32 fib_index; - /* *INDENT-OFF* */ pool_foreach_index (fib_index, ip4_main.fibs) { fib_table_walk (fib_index, FIB_PROTOCOL_IP4, @@ -149,7 +148,6 @@ ip_container_proxy_walk (ip_container_proxy_cb_t cb, void *ctx) fib_table_walk (fib_index, FIB_PROTOCOL_IP6, ip_container_proxy_fib_table_walk, &wctx); } - /* *INDENT-ON* */ } clib_error_t * @@ -216,14 +214,12 @@ ip_container_cmd (vlib_main_t * vm, return (NULL); } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (ip_container_command_node, static) = { .path = "ip container", .function = ip_container_cmd, .short_help = "ip container <address> <interface>", .is_mp_safe = 1, }; -/* *INDENT-ON* */ clib_error_t * show_ip_container_cmd_fn (vlib_main_t * vm, unformat_input_t * main_input, @@ -275,14 +271,12 @@ show_ip_container_cmd_fn (vlib_main_t * vm, unformat_input_t * main_input, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_ip_container_command, static) = { .path = "show ip container", .function = show_ip_container_cmd_fn, .short_help = "show ip container <address> <interface>", .is_mp_safe = 1, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/ip/ip_flow_hash.h b/src/vnet/ip/ip_flow_hash.h index bd37ef7307b..30dfcd70a1b 100644 --- a/src/vnet/ip/ip_flow_hash.h +++ b/src/vnet/ip/ip_flow_hash.h @@ -38,7 +38,17 @@ _ (proto, 4, IP_FLOW_HASH_PROTO) \ _ (reverse, 5, IP_FLOW_HASH_REVERSE_SRC_DST) \ _ (symmetric, 6, IP_FLOW_HASH_SYMMETRIC) \ - _ (flowlabel, 7, IP_FLOW_HASH_FL) + _ (flowlabel, 7, IP_FLOW_HASH_FL) \ + _ (gtpv1teid, 8, IP_FLOW_HASH_GTPV1_TEID) + +typedef struct +{ + u8 ver_flags; + u8 type; + u16 length; + u32 teid; +} __attribute__ ((packed)) gtpv1u_header_t; +#define GTPV1_PORT_BE 0x6808 /** * A flow hash configuration is a mask of the flow hash options diff --git a/src/vnet/ip/ip_frag.c b/src/vnet/ip/ip_frag.c index 5e8d3682eaa..934e40a5d18 100644 --- a/src/vnet/ip/ip_frag.c +++ b/src/vnet/ip/ip_frag.c @@ -500,7 +500,6 @@ ip6_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u16 mtu, return IP_FRAG_ERROR_NONE; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_frag_node) = { .function = ip4_frag, .name = IP4_FRAG_NODE_NAME, @@ -519,9 +518,7 @@ VLIB_REGISTER_NODE (ip4_frag_node) = { [IP_FRAG_NEXT_ICMP_ERROR] = "ip4-icmp-error", [IP_FRAG_NEXT_DROP] = "ip4-drop" }, }; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip6_frag_node) = { .function = ip6_frag, .name = IP6_FRAG_NODE_NAME, @@ -540,7 +537,6 @@ VLIB_REGISTER_NODE (ip6_frag_node) = { [IP_FRAG_NEXT_ICMP_ERROR] = "error-drop", [IP_FRAG_NEXT_DROP] = "ip6-drop" }, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/ip/ip_in_out_acl.c b/src/vnet/ip/ip_in_out_acl.c index d8d6d768e93..eb3c94a188a 100644 --- a/src/vnet/ip/ip_in_out_acl.c +++ b/src/vnet/ip/ip_in_out_acl.c @@ -327,8 +327,9 @@ ip_in_out_acl_inline_trace ( { while (1) { - if (PREDICT_TRUE (t[0]->next_table_index != ~0)) - t[0] = pool_elt_at_index (tables, t[0]->next_table_index); + table_index[0] = t[0]->next_table_index; + if (PREDICT_TRUE (table_index[0] != ~0)) + t[0] = pool_elt_at_index (tables, table_index[0]); else { _next[0] = (t[0]->miss_next_index < n_next_nodes) ? @@ -434,8 +435,9 @@ ip_in_out_acl_inline_trace ( { while (1) { - if (PREDICT_TRUE (t[1]->next_table_index != ~0)) - t[1] = pool_elt_at_index (tables, t[1]->next_table_index); + table_index[1] = t[1]->next_table_index; + if (PREDICT_TRUE (table_index[1] != ~0)) + t[1] = pool_elt_at_index (tables, table_index[1]); else { _next[1] = (t[1]->miss_next_index < n_next_nodes) ? @@ -636,8 +638,9 @@ ip_in_out_acl_inline_trace ( { while (1) { - if (PREDICT_TRUE (t0->next_table_index != ~0)) - t0 = pool_elt_at_index (tables, t0->next_table_index); + table_index0 = t0->next_table_index; + if (PREDICT_TRUE (table_index0 != ~0)) + t0 = pool_elt_at_index (tables, table_index0); else { next0 = (t0->miss_next_index < n_next_nodes) ? @@ -813,7 +816,6 @@ VLIB_NODE_FN (ip4_outacl_node) VLIB_TX, 1 /* is_output */); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_inacl_node) = { .name = "ip4-inacl", .vector_size = sizeof (u32), @@ -852,7 +854,6 @@ VLIB_REGISTER_NODE (ip4_outacl_node) = { [ACL_NEXT_INDEX_DENY] = "ip4-drop", }, }; -/* *INDENT-ON* */ VNET_FEATURE_INIT (ip4_punt_acl_feature) = { .arc_name = "ip4-punt", @@ -888,7 +889,6 @@ VLIB_NODE_FN (ip6_outacl_node) (vlib_main_t * vm, vlib_node_runtime_t * node, VLIB_TX, 1 /* is_output */); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip6_inacl_node) = { .name = "ip6-inacl", .vector_size = sizeof (u32), @@ -927,7 +927,6 @@ VLIB_REGISTER_NODE (ip6_outacl_node) = { [ACL_NEXT_INDEX_DENY] = "ip6-drop", }, }; -/* *INDENT-ON* */ VNET_FEATURE_INIT (ip6_punt_acl_feature) = { .arc_name = "ip6-punt", diff --git a/src/vnet/ip/ip_init.c b/src/vnet/ip/ip_init.c index 8894a878881..c2490f196ef 100644 --- a/src/vnet/ip/ip_init.c +++ b/src/vnet/ip/ip_init.c @@ -104,7 +104,6 @@ do { \ return error; } -/* *INDENT-OFF* */ VLIB_INIT_FUNCTION (ip_main_init) = { .init_order = VLIB_INITS ("vnet_main_init", "ip4_init", "ip6_init", "icmp4_init", "icmp6_init", "ip6_hop_by_hop_init", @@ -112,7 +111,6 @@ VLIB_INIT_FUNCTION (ip_main_init) = { "in_out_acl_init", "policer_classify_init", "flow_classify_init"), }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/ip/ip_interface.c b/src/vnet/ip/ip_interface.c index d5ee7fd9b2b..ca1938f651a 100644 --- a/src/vnet/ip/ip_interface.c +++ b/src/vnet/ip/ip_interface.c @@ -145,27 +145,23 @@ ip_interface_has_address (u32 sw_if_index, ip46_address_t * ip, u8 is_ip4) { ip_lookup_main_t *lm4 = &ip4_main.lookup_main; ip4_address_t *ip4; - /* *INDENT-OFF* */ foreach_ip_interface_address (lm4, ia, sw_if_index, 1 /* unnumbered */ , ({ ip4 = ip_interface_address_get_address (lm4, ia); if (ip4_address_compare (ip4, &ip->ip4) == 0) return 1; })); - /* *INDENT-ON* */ } else { ip_lookup_main_t *lm6 = &ip6_main.lookup_main; ip6_address_t *ip6; - /* *INDENT-OFF* */ foreach_ip_interface_address (lm6, ia, sw_if_index, 1 /* unnumbered */ , ({ ip6 = ip_interface_address_get_address (lm6, ia); if (ip6_address_compare (ip6, &ip->ip6) == 0) return 1; })); - /* *INDENT-ON* */ } return 0; } @@ -179,16 +175,13 @@ ip_interface_get_first_ip (u32 sw_if_index, u8 is_ip4) if (is_ip4) { - /* *INDENT-OFF* */ foreach_ip_interface_address (lm4, ia, sw_if_index, 1 /* unnumbered */ , ({ return ip_interface_address_get_address (lm4, ia); })); - /* *INDENT-ON* */ } else { - /* *INDENT-OFF* */ foreach_ip_interface_address (lm6, ia, sw_if_index, 1 /* unnumbered */ , ({ ip6_address_t *rv; @@ -197,7 +190,6 @@ ip_interface_get_first_ip (u32 sw_if_index, u8 is_ip4) if (!ip6_address_is_link_local_unicast (rv)) return rv; })); - /* *INDENT-ON* */ } return 0; @@ -211,7 +203,6 @@ ip_interface_address_mark_one_interface (vnet_main_t *vnm, ip_lookup_main_t *lm6 = &ip6_main.lookup_main; ip_interface_address_t *ia = 0; - /* *INDENT-OFF* */ foreach_ip_interface_address (lm4, ia, si->sw_if_index, 1 /* unnumbered */ , ({ ia->flags |= IP_INTERFACE_ADDRESS_FLAG_STALE; @@ -220,7 +211,6 @@ ip_interface_address_mark_one_interface (vnet_main_t *vnm, ({ ia->flags |= IP_INTERFACE_ADDRESS_FLAG_STALE; })); - /* *INDENT-ON* */ return (WALK_CONTINUE); } @@ -246,7 +236,6 @@ ip_interface_address_sweep_one_interface (vnet_main_t * vnm, u32 *ip4_masks = 0; int i; - /* *INDENT-OFF* */ foreach_ip_interface_address (&im4->lookup_main, ia, si->sw_if_index, 1, ({ if (ia->flags & IP_INTERFACE_ADDRESS_FLAG_STALE) @@ -268,7 +257,6 @@ ip_interface_address_sweep_one_interface (vnet_main_t * vnm, vec_add1 (ip6_masks, ia->address_length); } })); - /* *INDENT-ON* */ for (i = 0; i < vec_len (ip4_addrs); i++) ip4_add_del_interface_address (vm, si->sw_if_index, &ip4_addrs[i], diff --git a/src/vnet/ip/ip_interface.h b/src/vnet/ip/ip_interface.h index f0474c1bf9a..f0034ed0314 100644 --- a/src/vnet/ip/ip_interface.h +++ b/src/vnet/ip/ip_interface.h @@ -56,7 +56,6 @@ ip_get_interface_prefix (ip_lookup_main_t * lm, ip_interface_prefix_key_t * k) return p ? pool_elt_at_index (lm->if_prefix_pool, p[0]) : 0; } -/* *INDENT-OFF* */ #define foreach_ip_interface_address(lm,a,sw_if_index,loop,body) \ do { \ vnet_main_t *_vnm = vnet_get_main(); \ @@ -90,7 +89,6 @@ do { \ body; \ } \ } while (0) -/* *INDENT-ON* */ #endif /* included_ip_interface_h */ diff --git a/src/vnet/ip/ip_psh_cksum.h b/src/vnet/ip/ip_psh_cksum.h index 8723749865f..a80211561b7 100644 --- a/src/vnet/ip/ip_psh_cksum.h +++ b/src/vnet/ip/ip_psh_cksum.h @@ -38,8 +38,7 @@ ip4_pseudo_header_cksum (ip4_header_t *ip4) psh.proto = ip4->protocol; psh.l4len = clib_host_to_net_u16 (clib_net_to_host_u16 (ip4->length) - sizeof (ip4_header_t)); - return ~clib_net_to_host_u16 ( - clib_ip_csum ((u8 *) &psh, sizeof (ip4_psh_t))); + return ~(clib_ip_csum ((u8 *) &psh, sizeof (ip4_psh_t))); } static_always_inline u16 @@ -50,8 +49,7 @@ ip6_pseudo_header_cksum (ip6_header_t *ip6) psh.dst = ip6->dst_address; psh.l4len = ip6->payload_length; psh.proto = clib_host_to_net_u32 ((u32) ip6->protocol); - return ~clib_net_to_host_u16 ( - clib_ip_csum ((u8 *) &psh, sizeof (ip6_psh_t))); + return ~(clib_ip_csum ((u8 *) &psh, sizeof (ip6_psh_t))); } #endif /* included_ip_psh_cksum_h */ diff --git a/src/vnet/ip/ip_punt_drop.c b/src/vnet/ip/ip_punt_drop.c index bf01adadb10..dc113f51386 100644 --- a/src/vnet/ip/ip_punt_drop.c +++ b/src/vnet/ip/ip_punt_drop.c @@ -143,9 +143,8 @@ format_ip_punt_redirect (u8 * s, va_list * args) rx = ip_punt_redirect_get (rxs[rx_sw_if_index]); - s = format (s, " rx %U via:\n", - format_vnet_sw_interface_name, vnm, - vnet_get_sw_interface (vnm, rx_sw_if_index)); + s = format (s, " rx %U via:\n", format_vnet_sw_if_index_name, vnm, + rx_sw_if_index); s = format (s, " %U", format_fib_path_list, rx->pl, 2); s = format (s, " forwarding\n", format_dpo_id, &rx->dpo, 0); s = format (s, " %U\n", format_dpo_id, &rx->dpo, 0); diff --git a/src/vnet/ip/ip_test.c b/src/vnet/ip/ip_test.c index 7c994868d87..727afba67f4 100644 --- a/src/vnet/ip/ip_test.c +++ b/src/vnet/ip/ip_test.c @@ -1277,6 +1277,12 @@ api_set_ip_flow_hash_v2 (vat_main_t *vat) } static int +api_set_ip_flow_hash_v3 (vat_main_t *vat) +{ + return -1; +} + +static int api_ip_mroute_add_del (vat_main_t *vam) { unformat_input_t *i = vam->input; diff --git a/src/vnet/ip/ip_types.c b/src/vnet/ip/ip_types.c index 88b3f7b9820..ec80a96f15c 100644 --- a/src/vnet/ip/ip_types.c +++ b/src/vnet/ip/ip_types.c @@ -41,16 +41,17 @@ uword unformat_ip_address (unformat_input_t * input, va_list * args) { ip_address_t *a = va_arg (*args, ip_address_t *); + ip_address_t tmp, *p_tmp = &tmp; - if (unformat_user (input, unformat_ip46_address, &ip_addr_46 (a), - IP46_TYPE_ANY)) - { - ip_addr_version (a) = - ip46_address_is_ip4 (&ip_addr_46 (a)) ? AF_IP4 : AF_IP6; - return 1; - } - - return 0; + clib_memset (p_tmp, 0, sizeof (*p_tmp)); + if (unformat (input, "%U", unformat_ip4_address, &ip_addr_v4 (p_tmp))) + ip_addr_version (p_tmp) = AF_IP4; + else if (unformat_user (input, unformat_ip6_address, &ip_addr_v6 (p_tmp))) + ip_addr_version (p_tmp) = AF_IP6; + else + return 0; + *a = *p_tmp; + return 1; } u8 * diff --git a/src/vnet/ip/ip_types.h b/src/vnet/ip/ip_types.h index e4d89ebd88d..f1b387df194 100644 --- a/src/vnet/ip/ip_types.h +++ b/src/vnet/ip/ip_types.h @@ -75,13 +75,11 @@ typedef enum ip_feature_location_t_ #define N_IP_FEATURE_LOCATIONS (IP_FEATURE_DROP+1) -/* *INDENT-OFF* */ typedef struct ip_address { ip46_address_t ip; ip_address_family_t version; } __clib_packed ip_address_t; -/* *INDENT-ON* */ #define IP_ADDRESS_V4_ALL_0S {.ip.ip4.as_u32 = 0, .version = AF_IP4} #define IP_ADDRESS_V6_ALL_0S {.ip.ip6.as_u64 = {0, 0}, .version = AF_IP6} @@ -112,13 +110,11 @@ extern void ip_address_from_46 (const ip46_address_t * a, extern void ip_address_increment (ip_address_t * ip); extern void ip_address_reset (ip_address_t * ip); -/* *INDENT-OFF* */ typedef struct ip_prefix { ip_address_t addr; u8 len; } __clib_packed ip_prefix_t; -/* *INDENT-ON* */ #define ip_prefix_addr(_a) (_a)->addr #define ip_prefix_version(_a) ip_addr_version(&ip_prefix_addr(_a)) diff --git a/src/vnet/ip/lookup.c b/src/vnet/ip/lookup.c index 26bdaa635aa..c225c222a38 100644 --- a/src/vnet/ip/lookup.c +++ b/src/vnet/ip/lookup.c @@ -145,13 +145,13 @@ unformat_ip_flow_hash_config (unformat_input_t *input, va_list *args) { if (unformat (input, "%_,")) ; -#define _(a, b) \ +#define _(a, b, c) \ else if (unformat (input, "%_" #a)) \ { \ - *flow_hash_config |= b; \ + *flow_hash_config |= c; \ matched_once = 1; \ } - foreach_flow_hash_bit_v1 + foreach_flow_hash_bit #undef _ else { @@ -220,6 +220,27 @@ const ip46_address_t zero_addr = { 0, 0}, }; +bool +fib_prefix_validate (const fib_prefix_t *prefix) +{ + if (FIB_PROTOCOL_IP4 == prefix->fp_proto) + { + if (prefix->fp_len > 32) + { + return false; + } + } + + if (FIB_PROTOCOL_IP6 == prefix->fp_proto) + { + if (prefix->fp_len > 128) + { + return false; + } + } + return true; +} + static clib_error_t * vnet_ip_route_cmd (vlib_main_t * vm, unformat_input_t * main_input, vlib_cli_command_t * cmd) @@ -353,6 +374,12 @@ vnet_ip_route_cmd (vlib_main_t * vm, .fp_addr = prefixs[i].fp_addr, }; + if (!fib_prefix_validate (&rpfx)) + { + vlib_cli_output (vm, "Invalid prefix len: %d", rpfx.fp_len); + continue; + } + if (is_del) fib_table_entry_path_remove2 (fib_index, &rpfx, FIB_SOURCE_CLI, rpaths); @@ -530,33 +557,25 @@ vnet_show_ip6_table_cmd (vlib_main_t *vm, unformat_input_t *main_input, return (vnet_show_ip_table_cmd (vm, main_input, cmd, FIB_PROTOCOL_IP6)); } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (vlib_cli_ip_command, static) = { .path = "ip", .short_help = "Internet protocol (IP) commands", }; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (vlib_cli_ip6_command, static) = { .path = "ip6", .short_help = "Internet protocol version 6 (IPv6) commands", }; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (vlib_cli_show_ip_command, static) = { .path = "show ip", .short_help = "Internet protocol (IP) show commands", }; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (vlib_cli_show_ip6_command, static) = { .path = "show ip6", .short_help = "Internet protocol version 6 (IPv6) show commands", }; -/* *INDENT-ON* */ /*? * This command is used to add or delete IPv4 or IPv6 routes. All @@ -585,7 +604,6 @@ VLIB_CLI_COMMAND (vlib_cli_show_ip6_command, static) = { * To add a route to a particular FIB table (VRF), use: * @cliexcmd{ip route add 172.16.24.0/24 table 7 via GigabitEthernet2/0/0} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (ip_route_command, static) = { .path = "ip route", .short_help = "ip route [add|del] [count <n>] <dst-ip-addr>/<width> [table " @@ -593,35 +611,30 @@ VLIB_CLI_COMMAND (ip_route_command, static) = { "[next-hop-table <value>] [weight <value>] [preference " "<value>] [udp-encap <value>] [ip4-lookup-in-table <value>] " "[ip6-lookup-in-table <value>] [mpls-lookup-in-table <value>] " - "[resolve-via-host] [resolve-via-connected] [rx-ip4 " + "[resolve-via-host] [resolve-via-connected] [rx-ip4|rx-ip6 " "<interface>] [out-labels <value value value>]", .function = vnet_ip_route_cmd, .is_mp_safe = 1, }; -/* *INDENT-ON* */ /*? * This command is used to add or delete IPv4 Tables. All * Tables must be explicitly added before that can be used. Creating a * table will add both unicast and multicast FIBs * ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (ip4_table_command, static) = { .path = "ip table", .short_help = "ip table [add|del] <table-id>", .function = vnet_ip4_table_cmd, }; -/* *INDENT-ON* */ -/* *INDENT-ON* */ /*? * This command is used to add or delete IPv4 Tables. All * Tables must be explicitly added before that can be used. Creating a * table will add both unicast and multicast FIBs * ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (ip6_table_command, static) = { .path = "ip6 table", .short_help = "ip6 table [add|del] <table-id>", @@ -726,14 +739,12 @@ ip6_table_bind_cmd (vlib_main_t * vm, * Example of how to add an interface to an IPv4 FIB table (where 2 is the table-id): * @cliexcmd{set interface ip table GigabitEthernet2/0/0 2} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = { .path = "set interface ip table", .function = ip4_table_bind_cmd, .short_help = "set interface ip table <interface> <table-id>", }; -/* *INDENT-ON* */ /*? * Place the indicated interface into the supplied IPv6 FIB table (also known @@ -754,14 +765,12 @@ VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = * Example of how to add an interface to an IPv6 FIB table (where 2 is the table-id): * @cliexcmd{set interface ip6 table GigabitEthernet2/0/0 2} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (set_interface_ip6_table_command, static) = { .path = "set interface ip6 table", .function = ip6_table_bind_cmd, .short_help = "set interface ip6 table <interface> <table-id>" }; -/* *INDENT-ON* */ clib_error_t * vnet_ip_mroute_cmd (vlib_main_t * vm, @@ -998,7 +1007,6 @@ done: * @cliexcmd{ip mroute add 232.1.1.1 Signal} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (ip_mroute_command, static) = { .path = "ip mroute", @@ -1006,7 +1014,6 @@ VLIB_CLI_COMMAND (ip_mroute_command, static) = .function = vnet_ip_mroute_cmd, .is_mp_safe = 1, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/ip/lookup.h b/src/vnet/ip/lookup.h index aa998273213..8083d974df6 100644 --- a/src/vnet/ip/lookup.h +++ b/src/vnet/ip/lookup.h @@ -168,17 +168,16 @@ always_inline void ip_lookup_set_buffer_fib_index (u32 * fib_index_by_sw_if_index, vlib_buffer_t * b) { - /* *INDENT-OFF* */ vnet_buffer (b)->ip.fib_index = vec_elt (fib_index_by_sw_if_index, vnet_buffer (b)->sw_if_index[VLIB_RX]); vnet_buffer (b)->ip.fib_index = ((vnet_buffer (b)->sw_if_index[VLIB_TX] == (u32) ~ 0) ? vnet_buffer (b)->ip.fib_index : vnet_buffer (b)->sw_if_index[VLIB_TX]); - /* *INDENT-ON* */ } void ip_lookup_init (ip_lookup_main_t * lm, u32 ip_lookup_node_index); +bool fib_prefix_validate (const fib_prefix_t *prefix); #endif /* included_ip_lookup_h */ /* diff --git a/src/vnet/ip/punt.c b/src/vnet/ip/punt.c index 10deb2e8849..3c46549634a 100644 --- a/src/vnet/ip/punt.c +++ b/src/vnet/ip/punt.c @@ -148,14 +148,31 @@ punt_socket_register_l4 (vlib_main_t * vm, punt_main_t *pm = &punt_main; punt_client_t *c; - /* For now we only support UDP punt */ - if (protocol != IP_PROTOCOL_UDP) - return clib_error_return (0, - "only UDP protocol (%d) is supported, got %d", - IP_PROTOCOL_UDP, protocol); - if (port == (u16) ~ 0) - return clib_error_return (0, "UDP port number required"); + return clib_error_return (0, "Port number required"); + + u32 node_index; + switch (protocol) + { + case IP_PROTOCOL_UDP: + node_index = (af == AF_IP4 ? udp4_punt_socket_node.index : + udp6_punt_socket_node.index); + udp_register_dst_port (vm, port, node_index, af == AF_IP4); + break; + case IP_PROTOCOL_ICMP6: + if (af != AF_IP6) + return clib_error_return ( + 0, "only UDP or ICMP6 protocol (%d, %d) is supported, got %d", + IP_PROTOCOL_UDP, IP_PROTOCOL_ICMP6, protocol); + + node_index = icmp6_punt_socket_node.index; + icmp6_register_type (vm, port, node_index); + break; + default: + return clib_error_return ( + 0, "only UDP or ICMP6 protocol (%d) is supported, got %d", + IP_PROTOCOL_UDP, protocol); + } c = punt_client_l4_get (af, port); @@ -173,12 +190,6 @@ punt_socket_register_l4 (vlib_main_t * vm, c->reg.punt.l4.protocol = protocol; c->reg.punt.l4.af = af; - u32 node_index = (af == AF_IP4 ? - udp4_punt_socket_node.index : - udp6_punt_socket_node.index); - - udp_register_dst_port (vm, port, node_index, af == AF_IP4); - return (NULL); } @@ -463,7 +474,6 @@ punt_cli (vlib_main_t * vm, unformat_input_t line_input, *input = &line_input; clib_error_t *error = NULL; bool is_add = true; - /* *INDENT-OFF* */ punt_reg_t pr = { .punt = { .l4 = { @@ -475,7 +485,6 @@ punt_cli (vlib_main_t * vm, .type = PUNT_TYPE_L4, }; u32 port; - /* *INDENT-ON* */ if (!unformat_user (input__, unformat_line_input, input)) return 0; @@ -541,13 +550,11 @@ done: * @cliexcmd{set punt udp del all} * @endparblock ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (punt_command, static) = { .path = "set punt", .short_help = "set punt [IPV4|ip6|ipv6] [UDP|tcp] [del] [ALL|<port-num>]", .function = punt_cli, }; -/* *INDENT-ON* */ static clib_error_t * punt_socket_register_cmd (vlib_main_t * vm, @@ -557,7 +564,6 @@ punt_socket_register_cmd (vlib_main_t * vm, unformat_input_t line_input, *input = &line_input; u8 *socket_name = 0; clib_error_t *error = NULL; - /* *INDENT-OFF* */ punt_reg_t pr = { .punt = { .l4 = { @@ -568,7 +574,6 @@ punt_socket_register_cmd (vlib_main_t * vm, }, .type = PUNT_TYPE_L4, }; - /* *INDENT-ON* */ if (!unformat_user (input__, unformat_line_input, input)) return 0; @@ -616,7 +621,6 @@ done: * @cliexcmd{punt socket register socket punt_l4_foo.sock} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (punt_socket_register_command, static) = { .path = "punt socket register", @@ -624,7 +628,6 @@ VLIB_CLI_COMMAND (punt_socket_register_command, static) = .short_help = "punt socket register [IPV4|ipv6] [UDP|tcp] [ALL|<port-num>] socket <socket>", .is_mp_safe = 1, }; -/* *INDENT-ON* */ static clib_error_t * punt_socket_deregister_cmd (vlib_main_t * vm, @@ -633,7 +636,6 @@ punt_socket_deregister_cmd (vlib_main_t * vm, { unformat_input_t line_input, *input = &line_input; clib_error_t *error = NULL; - /* *INDENT-OFF* */ punt_reg_t pr = { .punt = { .l4 = { @@ -644,7 +646,6 @@ punt_socket_deregister_cmd (vlib_main_t * vm, }, .type = PUNT_TYPE_L4, }; - /* *INDENT-ON* */ if (!unformat_user (input__, unformat_line_input, input)) return 0; @@ -685,7 +686,6 @@ done: * @cliexpar * @cliexcmd{punt socket register} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (punt_socket_deregister_command, static) = { .path = "punt socket deregister", @@ -693,7 +693,6 @@ VLIB_CLI_COMMAND (punt_socket_deregister_command, static) = .short_help = "punt socket deregister [IPV4|ipv6] [UDP|tcp] [ALL|<port-num>]", .is_mp_safe = 1, }; -/* *INDENT-ON* */ void punt_client_walk (punt_type_t pt, punt_client_walk_cb_t cb, void *ctx) @@ -706,24 +705,20 @@ punt_client_walk (punt_type_t pt, punt_client_walk_cb_t cb, void *ctx) { u32 pci, key; - /* *INDENT-OFF* */ hash_foreach(key, pci, pm->db.clients_by_l4_port, ({ cb (pool_elt_at_index(pm->punt_client_pool, pci), ctx); })); - /* *INDENT-ON* */ break; } case PUNT_TYPE_IP_PROTO: { u32 pci, key; - /* *INDENT-OFF* */ hash_foreach(key, pci, pm->db.clients_by_ip_proto, ({ cb (pool_elt_at_index(pm->punt_client_pool, pci), ctx); })); - /* *INDENT-ON* */ break; } case PUNT_TYPE_EXCEPTION: @@ -821,7 +816,6 @@ done: * @cliexpar * @cliexcmd{show punt socket ipv4} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_punt_socket_registration_command, static) = { .path = "show punt socket registrations", @@ -829,7 +823,6 @@ VLIB_CLI_COMMAND (show_punt_socket_registration_command, static) = .short_help = "show punt socket registrations [l4|exception]", .is_mp_safe = 1, }; -/* *INDENT-ON* */ clib_error_t * ip_punt_init (vlib_main_t * vm) diff --git a/src/vnet/ip/punt.h b/src/vnet/ip/punt.h index a2612d60f07..e8495caad61 100644 --- a/src/vnet/ip/punt.h +++ b/src/vnet/ip/punt.h @@ -20,7 +20,12 @@ #ifndef included_punt_h #define included_punt_h +#ifdef __linux__ #include <linux/un.h> +#elif __FreeBSD__ +#include <sys/un.h> +#define UNIX_PATH_MAX SUNPATHLEN +#endif /* __linux__ */ #include <stdbool.h> #include <vnet/ip/ip.h> @@ -239,6 +244,7 @@ extern vlib_node_registration_t udp4_punt_node; extern vlib_node_registration_t udp6_punt_node; extern vlib_node_registration_t udp4_punt_socket_node; extern vlib_node_registration_t udp6_punt_socket_node; +extern vlib_node_registration_t icmp6_punt_socket_node; extern vlib_node_registration_t ip4_proto_punt_socket_node; extern vlib_node_registration_t ip6_proto_punt_socket_node; extern vlib_node_registration_t punt_socket_rx_node; diff --git a/src/vnet/ip/punt_api.c b/src/vnet/ip/punt_api.c index bcbf939f69d..20297af2e75 100644 --- a/src/vnet/ip/punt_api.c +++ b/src/vnet/ip/punt_api.c @@ -224,12 +224,10 @@ vl_api_punt_socket_register_t_handler (vl_api_punt_socket_register_t * mp) char *p = vnet_punt_get_server_pathname (); - /* *INDENT-OFF* */ REPLY_MACRO2 (VL_API_PUNT_SOCKET_REGISTER_REPLY, ({ memcpy ((char *) rmp->pathname, p, sizeof (rmp->pathname)); })); - /* *INDENT-ON* */ } typedef struct punt_socket_send_ctx_t_ diff --git a/src/vnet/ip/punt_node.c b/src/vnet/ip/punt_node.c index 7f9beef0ffe..6400e49c626 100644 --- a/src/vnet/ip/punt_node.c +++ b/src/vnet/ip/punt_node.c @@ -23,6 +23,7 @@ */ #include <vnet/ip/ip.h> +#include <vnet/ethernet/ethernet.h> #include <vlib/vlib.h> #include <vnet/ip/punt.h> #include <vlib/unix/unix.h> @@ -182,7 +183,6 @@ VLIB_NODE_FN (udp6_punt_node) (vlib_main_t * vm, return udp46_punt_inline (vm, node, from_frame, 0 /* is_ip4 */ ); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (udp4_punt_node) = { .name = "ip4-udp-punt", /* Takes a vector of packets. */ @@ -214,7 +214,6 @@ VLIB_REGISTER_NODE (udp6_punt_node) = { #undef _ }, }; -/* *INDENT-ON* */ typedef struct { @@ -243,10 +242,9 @@ format_udp_punt_trace (u8 * s, va_list * args) } always_inline uword -punt_socket_inline (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame, - punt_type_t pt, ip_address_family_t af) +punt_socket_inline2 (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame, punt_type_t pt, + ip_address_family_t af, ip_protocol_t protocol) { u32 *buffers = vlib_frame_vector_args (frame); u32 thread_index = vm->thread_index; @@ -266,33 +264,42 @@ punt_socket_inline (vlib_main_t * vm, uword l; punt_packetdesc_t packetdesc; punt_client_t *c; - + u16 port = 0; b = vlib_get_buffer (vm, buffers[i]); if (PUNT_TYPE_L4 == pt) { - /* Reverse UDP Punt advance */ - udp_header_t *udp; - if (AF_IP4 == af) + if (protocol == IP_PROTOCOL_UDP) { - vlib_buffer_advance (b, -(sizeof (ip4_header_t) + - sizeof (udp_header_t))); - ip4_header_t *ip = vlib_buffer_get_current (b); - udp = (udp_header_t *) (ip + 1); + /* Reverse UDP Punt advance */ + udp_header_t *udp; + if (AF_IP4 == af) + { + vlib_buffer_advance ( + b, -(sizeof (ip4_header_t) + sizeof (udp_header_t))); + ip4_header_t *ip = vlib_buffer_get_current (b); + udp = (udp_header_t *) (ip + 1); + } + else + { + vlib_buffer_advance ( + b, -(sizeof (ip6_header_t) + sizeof (udp_header_t))); + ip6_header_t *ip = vlib_buffer_get_current (b); + udp = (udp_header_t *) (ip + 1); + } + port = clib_net_to_host_u16 (udp->dst_port); } - else + else if (protocol == IP_PROTOCOL_ICMP6) { - vlib_buffer_advance (b, -(sizeof (ip6_header_t) + - sizeof (udp_header_t))); ip6_header_t *ip = vlib_buffer_get_current (b); - udp = (udp_header_t *) (ip + 1); + icmp46_header_t *icmp = ip6_next_header (ip); + port = icmp->type; } - /* * Find registerered client * If no registered client, drop packet and count */ - c = punt_client_l4_get (af, clib_net_to_host_u16 (udp->dst_port)); + c = punt_client_l4_get (af, port); } else if (PUNT_TYPE_IP_PROTO == pt) { @@ -339,7 +346,7 @@ punt_socket_inline (vlib_main_t * vm, iov->iov_len = sizeof (packetdesc); /** VLIB buffer chain -> Unix iovec(s). */ - vlib_buffer_advance (b, -(sizeof (ethernet_header_t))); + vlib_buffer_advance (b, -ethernet_buffer_header_size (b)); vec_add2 (ptd->iovecs, iov, 1); iov->iov_base = b->data + b->current_data; iov->iov_len = l = b->current_length; @@ -396,6 +403,14 @@ error: return n_packets; } +always_inline uword +punt_socket_inline (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame, punt_type_t pt, + ip_address_family_t af) +{ + return punt_socket_inline2 (vm, node, frame, pt, af, IP_PROTOCOL_UDP); +} + static uword udp4_punt_socket (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * from_frame) @@ -427,6 +442,14 @@ ip6_proto_punt_socket (vlib_main_t * vm, } static uword +icmp6_punt_socket (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *from_frame) +{ + return punt_socket_inline2 (vm, node, from_frame, PUNT_TYPE_L4, AF_IP6, + IP_PROTOCOL_ICMP6); +} + +static uword exception_punt_socket (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * from_frame) { @@ -435,7 +458,6 @@ exception_punt_socket (vlib_main_t * vm, } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (udp4_punt_socket_node) = { .function = udp4_punt_socket, .name = "ip4-udp-punt-socket", @@ -483,7 +505,16 @@ VLIB_REGISTER_NODE (exception_punt_socket_node) = { .n_errors = PUNT_N_ERROR, .error_strings = punt_error_strings, }; -/* *INDENT-ON* */ +VLIB_REGISTER_NODE (icmp6_punt_socket_node) = { + .function = icmp6_punt_socket, + .name = "ip6-icmp-punt-socket", + .format_trace = format_udp_punt_trace, + .flags = VLIB_NODE_FLAG_IS_DROP, + .vector_size = sizeof (u32), + .n_errors = PUNT_N_ERROR, + .error_strings = punt_error_strings, +}; + typedef struct { @@ -614,7 +645,6 @@ punt_socket_rx (vlib_main_t * vm, return total_count; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (punt_socket_rx_node) = { .function = punt_socket_rx, @@ -633,7 +663,6 @@ VLIB_REGISTER_NODE (punt_socket_rx_node) = }, .format_trace = format_punt_trace, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/ip/reass/ip4_full_reass.c b/src/vnet/ip/reass/ip4_full_reass.c index 5b69234e438..bab7d479dcf 100644 --- a/src/vnet/ip/reass/ip4_full_reass.c +++ b/src/vnet/ip/reass/ip4_full_reass.c @@ -427,8 +427,7 @@ ip4_full_reass_free (ip4_full_reass_main_t * rm, * with local variables would cause either buffer leak or corruption */ always_inline void ip4_full_reass_drop_all (vlib_main_t *vm, vlib_node_runtime_t *node, - ip4_full_reass_t *reass, u32 *n_left_to_next, - u32 **to_next) + ip4_full_reass_t *reass) { u32 range_bi = reass->first_bi; vlib_buffer_t *range_b; @@ -452,40 +451,23 @@ ip4_full_reass_drop_all (vlib_main_t *vm, vlib_node_runtime_t *node, if (~0 != reass->error_next_index && reass->error_next_index < node->n_next_nodes) { - u32 next_index; - - next_index = reass->error_next_index; - u32 bi = ~0; + u32 n_free = vec_len (to_free); /* record number of packets sent to custom app */ vlib_node_increment_counter (vm, node->node_index, - IP4_ERROR_REASS_TO_CUSTOM_APP, - vec_len (to_free)); - - while (vec_len (to_free) > 0) - { - vlib_get_next_frame (vm, node, next_index, *to_next, - (*n_left_to_next)); + IP4_ERROR_REASS_TO_CUSTOM_APP, n_free); - while (vec_len (to_free) > 0 && (*n_left_to_next) > 0) - { - bi = vec_pop (to_free); + if (node->flags & VLIB_NODE_FLAG_TRACE) + for (u32 i = 0; i < n_free; i++) + { + vlib_buffer_t *b = vlib_get_buffer (vm, to_free[i]); + if (PREDICT_FALSE (b->flags & VLIB_BUFFER_IS_TRACED)) + ip4_full_reass_add_trace (vm, node, reass, to_free[i], + RANGE_DISCARD, 0, ~0); + } - if (~0 != bi) - { - vlib_buffer_t *b = vlib_get_buffer (vm, bi); - if (PREDICT_FALSE (b->flags & VLIB_BUFFER_IS_TRACED)) - { - ip4_full_reass_add_trace (vm, node, reass, bi, - RANGE_DISCARD, 0, ~0); - } - *to_next[0] = bi; - (*to_next) += 1; - (*n_left_to_next) -= 1; - } - } - vlib_put_next_frame (vm, node, next_index, (*n_left_to_next)); - } + vlib_buffer_enqueue_to_single_next (vm, node, to_free, + reass->error_next_index, n_free); } else { @@ -564,8 +546,7 @@ always_inline ip4_full_reass_t * ip4_full_reass_find_or_create (vlib_main_t *vm, vlib_node_runtime_t *node, ip4_full_reass_main_t *rm, ip4_full_reass_per_thread_t *rt, - ip4_full_reass_kv_t *kv, u8 *do_handoff, - u32 *n_left_to_next, u32 **to_next) + ip4_full_reass_kv_t *kv, u8 *do_handoff) { ip4_full_reass_t *reass; f64 now; @@ -590,7 +571,7 @@ again: { vlib_node_increment_counter (vm, node->node_index, IP4_ERROR_REASS_TIMEOUT, 1); - ip4_full_reass_drop_all (vm, node, reass, n_left_to_next, to_next); + ip4_full_reass_drop_all (vm, node, reass); ip4_full_reass_free (rm, rt, reass); reass = NULL; } @@ -647,7 +628,6 @@ ip4_full_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_buffer_t *last_b = NULL; u32 sub_chain_bi = reass->first_bi; u32 total_length = 0; - u32 buf_cnt = 0; do { u32 tmp_bi = sub_chain_bi; @@ -684,7 +664,6 @@ ip4_full_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_buffer_length_in_chain (vm, tmp) - trim_front - trim_end; while (1) { - ++buf_cnt; if (trim_front) { if (trim_front > tmp->current_length) @@ -1184,205 +1163,195 @@ ip4_full_reass_inline (vlib_main_t *vm, vlib_node_runtime_t *node, bool is_local) { u32 *from = vlib_frame_vector_args (frame); - u32 n_left_from, n_left_to_next, *to_next, next_index; + u32 n_left, n_next = 0, to_next[VLIB_FRAME_SIZE]; ip4_full_reass_main_t *rm = &ip4_full_reass_main; ip4_full_reass_per_thread_t *rt = &rm->per_thread_data[vm->thread_index]; + u16 nexts[VLIB_FRAME_SIZE]; + clib_spinlock_lock (&rt->lock); - n_left_from = frame->n_vectors; - next_index = node->cached_next_index; - while (n_left_from > 0) + n_left = frame->n_vectors; + while (n_left > 0) { - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + u32 bi0; + vlib_buffer_t *b0; + u32 next0; + u32 error0 = IP4_ERROR_NONE; - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 bi0; - vlib_buffer_t *b0; - u32 next0; - u32 error0 = IP4_ERROR_NONE; - - bi0 = from[0]; - b0 = vlib_get_buffer (vm, bi0); + bi0 = from[0]; + b0 = vlib_get_buffer (vm, bi0); - ip4_header_t *ip0 = vlib_buffer_get_current (b0); - if (!ip4_get_fragment_more (ip0) && !ip4_get_fragment_offset (ip0)) + ip4_header_t *ip0 = vlib_buffer_get_current (b0); + if (!ip4_get_fragment_more (ip0) && !ip4_get_fragment_offset (ip0)) + { + // this is a whole packet - no fragmentation + if (CUSTOM != type) { - // this is a whole packet - no fragmentation - if (CUSTOM != type) - { - next0 = IP4_FULL_REASS_NEXT_INPUT; - } - else - { - next0 = vnet_buffer (b0)->ip.reass.next_index; - } - ip4_full_reass_add_trace (vm, node, NULL, bi0, PASSTHROUGH, 0, - ~0); - goto packet_enqueue; + next0 = IP4_FULL_REASS_NEXT_INPUT; } - - if (is_local && !rm->is_local_reass_enabled) + else { - next0 = IP4_FULL_REASS_NEXT_DROP; - goto packet_enqueue; + next0 = vnet_buffer (b0)->ip.reass.next_index; } + ip4_full_reass_add_trace (vm, node, NULL, bi0, PASSTHROUGH, 0, ~0); + goto packet_enqueue; + } - const u32 fragment_first = ip4_get_fragment_offset_bytes (ip0); - const u32 fragment_length = - clib_net_to_host_u16 (ip0->length) - ip4_header_bytes (ip0); - const u32 fragment_last = fragment_first + fragment_length - 1; + if (is_local && !rm->is_local_reass_enabled) + { + next0 = IP4_FULL_REASS_NEXT_DROP; + goto packet_enqueue; + } - /* Keep track of received fragments */ - vlib_node_increment_counter (vm, node->node_index, - IP4_ERROR_REASS_FRAGMENTS_RCVD, 1); + const u32 fragment_first = ip4_get_fragment_offset_bytes (ip0); + const u32 fragment_length = + clib_net_to_host_u16 (ip0->length) - ip4_header_bytes (ip0); + const u32 fragment_last = fragment_first + fragment_length - 1; - if (fragment_first > fragment_last || - fragment_first + fragment_length > UINT16_MAX - 20 || - (fragment_length < 8 && // 8 is minimum frag length per RFC 791 - ip4_get_fragment_more (ip0))) - { - next0 = IP4_FULL_REASS_NEXT_DROP; - error0 = IP4_ERROR_REASS_MALFORMED_PACKET; - goto packet_enqueue; - } + /* Keep track of received fragments */ + vlib_node_increment_counter (vm, node->node_index, + IP4_ERROR_REASS_FRAGMENTS_RCVD, 1); - u32 fib_index = vec_elt (ip4_main.fib_index_by_sw_if_index, - vnet_buffer (b0)->sw_if_index[VLIB_RX]); + if (fragment_first > fragment_last || + fragment_first + fragment_length > UINT16_MAX - 20 || + (fragment_length < 8 && // 8 is minimum frag length per RFC 791 + ip4_get_fragment_more (ip0))) + { + next0 = IP4_FULL_REASS_NEXT_DROP; + error0 = IP4_ERROR_REASS_MALFORMED_PACKET; + goto packet_enqueue; + } + + u32 fib_index = (vnet_buffer (b0)->sw_if_index[VLIB_TX] == (u32) ~0) ? + vec_elt (ip4_main.fib_index_by_sw_if_index, + vnet_buffer (b0)->sw_if_index[VLIB_RX]) : + vnet_buffer (b0)->sw_if_index[VLIB_TX]; - ip4_full_reass_kv_t kv = { .k.fib_index = fib_index, - .k.src.as_u32 = ip0->src_address.as_u32, - .k.dst.as_u32 = ip0->dst_address.as_u32, - .k.frag_id = ip0->fragment_id, - .k.proto = ip0->protocol + ip4_full_reass_kv_t kv = { .k.fib_index = fib_index, + .k.src.as_u32 = ip0->src_address.as_u32, + .k.dst.as_u32 = ip0->dst_address.as_u32, + .k.frag_id = ip0->fragment_id, + .k.proto = ip0->protocol - }; - u8 do_handoff = 0; + }; + u8 do_handoff = 0; - ip4_full_reass_t *reass = ip4_full_reass_find_or_create ( - vm, node, rm, rt, &kv, &do_handoff, &n_left_to_next, &to_next); + ip4_full_reass_t *reass = + ip4_full_reass_find_or_create (vm, node, rm, rt, &kv, &do_handoff); - if (reass) + if (reass) + { + const u32 fragment_first = ip4_get_fragment_offset_bytes (ip0); + if (0 == fragment_first) { - const u32 fragment_first = ip4_get_fragment_offset_bytes (ip0); - if (0 == fragment_first) - { - reass->sendout_thread_index = vm->thread_index; - } + reass->sendout_thread_index = vm->thread_index; } + } - if (PREDICT_FALSE (do_handoff)) + if (PREDICT_FALSE (do_handoff)) + { + next0 = IP4_FULL_REASS_NEXT_HANDOFF; + vnet_buffer (b0)->ip.reass.owner_thread_index = + kv.v.memory_owner_thread_index; + } + else if (reass) + { + u32 handoff_thread_idx; + u32 counter = ~0; + switch (ip4_full_reass_update (vm, node, rm, rt, reass, &bi0, &next0, + &error0, CUSTOM == type, + &handoff_thread_idx)) { + case IP4_REASS_RC_OK: + /* nothing to do here */ + break; + case IP4_REASS_RC_HANDOFF: next0 = IP4_FULL_REASS_NEXT_HANDOFF; + b0 = vlib_get_buffer (vm, bi0); vnet_buffer (b0)->ip.reass.owner_thread_index = - kv.v.memory_owner_thread_index; + handoff_thread_idx; + break; + case IP4_REASS_RC_TOO_MANY_FRAGMENTS: + counter = IP4_ERROR_REASS_FRAGMENT_CHAIN_TOO_LONG; + break; + case IP4_REASS_RC_NO_BUF: + counter = IP4_ERROR_REASS_NO_BUF; + break; + case IP4_REASS_RC_INTERNAL_ERROR: + counter = IP4_ERROR_REASS_INTERNAL_ERROR; + /* Sanitization is needed in internal error cases only, as + * the incoming packet is already dropped in other cases, + * also adding bi0 back to the reassembly list, fixes the + * leaking of buffers during internal errors. + * + * Also it doesnt make sense to send these buffers custom + * app, these fragments are with internal errors */ + sanitize_reass_buffers_add_missing (vm, reass, &bi0); + reass->error_next_index = ~0; + break; } - else if (reass) - { - u32 handoff_thread_idx; - u32 counter = ~0; - switch (ip4_full_reass_update - (vm, node, rm, rt, reass, &bi0, &next0, - &error0, CUSTOM == type, &handoff_thread_idx)) - { - case IP4_REASS_RC_OK: - /* nothing to do here */ - break; - case IP4_REASS_RC_HANDOFF: - next0 = IP4_FULL_REASS_NEXT_HANDOFF; - b0 = vlib_get_buffer (vm, bi0); - vnet_buffer (b0)->ip.reass.owner_thread_index = - handoff_thread_idx; - break; - case IP4_REASS_RC_TOO_MANY_FRAGMENTS: - counter = IP4_ERROR_REASS_FRAGMENT_CHAIN_TOO_LONG; - break; - case IP4_REASS_RC_NO_BUF: - counter = IP4_ERROR_REASS_NO_BUF; - break; - case IP4_REASS_RC_INTERNAL_ERROR: - counter = IP4_ERROR_REASS_INTERNAL_ERROR; - /* Sanitization is needed in internal error cases only, as - * the incoming packet is already dropped in other cases, - * also adding bi0 back to the reassembly list, fixes the - * leaking of buffers during internal errors. - * - * Also it doesnt make sense to send these buffers custom - * app, these fragments are with internal errors */ - sanitize_reass_buffers_add_missing (vm, reass, &bi0); - reass->error_next_index = ~0; - break; - } - if (~0 != counter) - { - vlib_node_increment_counter (vm, node->node_index, counter, - 1); - ip4_full_reass_drop_all (vm, node, reass, &n_left_to_next, - &to_next); - ip4_full_reass_free (rm, rt, reass); - goto next_packet; - } - } - else + if (~0 != counter) { - next0 = IP4_FULL_REASS_NEXT_DROP; - error0 = IP4_ERROR_REASS_LIMIT_REACHED; + vlib_node_increment_counter (vm, node->node_index, counter, 1); + ip4_full_reass_drop_all (vm, node, reass); + ip4_full_reass_free (rm, rt, reass); + goto next_packet; } + } + else + { + next0 = IP4_FULL_REASS_NEXT_DROP; + error0 = IP4_ERROR_REASS_LIMIT_REACHED; + } + packet_enqueue: - packet_enqueue: - - if (bi0 != ~0) + if (bi0 != ~0) + { + /* bi0 might have been updated by reass_finalize, reload */ + b0 = vlib_get_buffer (vm, bi0); + if (IP4_ERROR_NONE != error0) { - to_next[0] = bi0; - to_next += 1; - n_left_to_next -= 1; - - /* bi0 might have been updated by reass_finalize, reload */ - b0 = vlib_get_buffer (vm, bi0); - if (IP4_ERROR_NONE != error0) - { - b0->error = node->errors[error0]; - } - - if (next0 == IP4_FULL_REASS_NEXT_HANDOFF) - { - if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) - { - ip4_full_reass_add_trace ( - vm, node, NULL, bi0, HANDOFF, 0, - vnet_buffer (b0)->ip.reass.owner_thread_index); - } - } - else if (FEATURE == type && IP4_ERROR_NONE == error0) - { - vnet_feature_next (&next0, b0); - } + b0->error = node->errors[error0]; + } - /* Increment the counter to-custom-app also as this fragment is - * also going to application */ - if (CUSTOM == type) + if (next0 == IP4_FULL_REASS_NEXT_HANDOFF) + { + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) { - vlib_node_increment_counter ( - vm, node->node_index, IP4_ERROR_REASS_TO_CUSTOM_APP, 1); + ip4_full_reass_add_trace ( + vm, node, NULL, bi0, HANDOFF, 0, + vnet_buffer (b0)->ip.reass.owner_thread_index); } + } + else if (FEATURE == type && IP4_ERROR_NONE == error0) + { + vnet_feature_next (&next0, b0); + } - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, - bi0, next0); - IP4_REASS_DEBUG_BUFFER (bi0, enqueue_next); + /* Increment the counter to-custom-app also as this fragment is + * also going to application */ + if (CUSTOM == type) + { + vlib_node_increment_counter (vm, node->node_index, + IP4_ERROR_REASS_TO_CUSTOM_APP, 1); } - next_packet: - from += 1; - n_left_from -= 1; + to_next[n_next] = bi0; + nexts[n_next] = next0; + n_next++; + IP4_REASS_DEBUG_BUFFER (bi0, enqueue_next); } - vlib_put_next_frame (vm, node, next_index, n_left_to_next); + next_packet: + from += 1; + n_left -= 1; } clib_spinlock_unlock (&rt->lock); + + vlib_buffer_enqueue_to_next (vm, node, to_next, nexts, n_next); return frame->n_vectors; } @@ -1455,11 +1424,11 @@ VLIB_REGISTER_NODE (ip4_full_reass_node_feature) = { }; VNET_FEATURE_INIT (ip4_full_reass_feature, static) = { - .arc_name = "ip4-unicast", - .node_name = "ip4-full-reassembly-feature", - .runs_before = VNET_FEATURES ("ip4-lookup", - "ipsec4-input-feature"), - .runs_after = 0, + .arc_name = "ip4-unicast", + .node_name = "ip4-full-reassembly-feature", + .runs_before = VNET_FEATURES ("ip4-lookup", "ipsec4-input-feature", + "ip4-sv-reassembly-feature"), + .runs_after = 0, }; VLIB_NODE_FN (ip4_full_reass_node_custom) (vlib_main_t * vm, @@ -1484,15 +1453,6 @@ VLIB_REGISTER_NODE (ip4_full_reass_node_custom) = { }, }; -VNET_FEATURE_INIT (ip4_full_reass_custom, static) = { - .arc_name = "ip4-unicast", - .node_name = "ip4-full-reassembly-feature", - .runs_before = VNET_FEATURES ("ip4-lookup", - "ipsec4-input-feature"), - .runs_after = 0, -}; - - #ifndef CLIB_MARCH_VARIANT uword ip4_full_reass_custom_register_next_node (uword node_index) @@ -1688,7 +1648,6 @@ ip4_full_reass_walk_expired (vlib_main_t *vm, vlib_node_runtime_t *node, uword thread_index = 0; int index; const uword nthreads = vlib_num_workers () + 1; - u32 n_left_to_next, *to_next; for (thread_index = 0; thread_index < nthreads; ++thread_index) { @@ -1734,8 +1693,7 @@ ip4_full_reass_walk_expired (vlib_main_t *vm, vlib_node_runtime_t *node, vec_foreach (i, pool_indexes_to_free) { ip4_full_reass_t *reass = pool_elt_at_index (rt->pool, i[0]); - ip4_full_reass_drop_all (vm, node, reass, &n_left_to_next, - &to_next); + ip4_full_reass_drop_all (vm, node, reass); ip4_full_reass_free (rm, rt, reass); } @@ -2101,7 +2059,7 @@ ip4_full_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable) "ip4-full-reassembly-feature", sw_if_index, 0, 0, 0); } - return -1; + return 0; } void diff --git a/src/vnet/ip/reass/ip4_sv_reass.c b/src/vnet/ip/reass/ip4_sv_reass.c index 4ef144e9bee..7c3c2fff217 100644 --- a/src/vnet/ip/reass/ip4_sv_reass.c +++ b/src/vnet/ip/reass/ip4_sv_reass.c @@ -150,6 +150,7 @@ typedef struct /** Worker handoff */ u32 fq_index; u32 fq_feature_index; + u32 fq_custom_context_index; // reference count for enabling/disabling feature - per interface u32 *feature_use_refcount_per_intf; @@ -457,14 +458,19 @@ l4_layer_truncated (ip4_header_t *ip) } always_inline uword -ip4_sv_reass_inline (vlib_main_t * vm, vlib_node_runtime_t * node, - vlib_frame_t * frame, bool is_feature, - bool is_output_feature, bool is_custom) +ip4_sv_reass_inline (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame, bool is_feature, + bool is_output_feature, bool is_custom, + bool with_custom_context) { u32 *from = vlib_frame_vector_args (frame); - u32 n_left_from, n_left_to_next, *to_next, next_index; + u32 n_left_from, n_left_to_next, *to_next, *to_next_aux, next_index; ip4_sv_reass_main_t *rm = &ip4_sv_reass_main; ip4_sv_reass_per_thread_t *rt = &rm->per_thread_data[vm->thread_index]; + u32 *context; + if (with_custom_context) + context = vlib_frame_aux_args (frame); + clib_spinlock_lock (&rt->lock); n_left_from = frame->n_vectors; @@ -621,6 +627,8 @@ ip4_sv_reass_inline (vlib_main_t * vm, vlib_node_runtime_t * node, next[0] = next0; next[1] = next1; next += 2; + if (with_custom_context) + context += 2; } while (n_left_from > 0) @@ -696,6 +704,8 @@ ip4_sv_reass_inline (vlib_main_t * vm, vlib_node_runtime_t * node, n_left_from -= 1; next[0] = next0; next += 1; + if (with_custom_context) + context += 1; } vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts, @@ -709,7 +719,11 @@ slow_path: while (n_left_from > 0) { - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + if (with_custom_context) + vlib_get_next_frame_with_aux_safe (vm, node, next_index, to_next, + to_next_aux, n_left_to_next); + else + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); while (n_left_from > 0 && n_left_to_next > 0) { @@ -717,6 +731,7 @@ slow_path: vlib_buffer_t *b0; u32 next0; u32 error0 = IP4_ERROR_NONE; + u8 forward_context = 0; bi0 = from[0]; b0 = vlib_get_buffer (vm, bi0); @@ -792,13 +807,17 @@ slow_path: ip4_sv_reass_kv_t kv; u8 do_handoff = 0; - kv.k.as_u64[0] = - (u64) vec_elt (ip4_main.fib_index_by_sw_if_index, - vnet_buffer (b0)->sw_if_index[VLIB_RX]) | - (u64) ip0->src_address.as_u32 << 32; - kv.k.as_u64[1] = - (u64) ip0->dst_address. - as_u32 | (u64) ip0->fragment_id << 32 | (u64) ip0->protocol << 48; + if (with_custom_context) + kv.k.as_u64[0] = (u64) *context | (u64) ip0->src_address.as_u32 + << 32; + else + kv.k.as_u64[0] = + (u64) vec_elt (ip4_main.fib_index_by_sw_if_index, + vnet_buffer (b0)->sw_if_index[VLIB_RX]) | + (u64) ip0->src_address.as_u32 << 32; + kv.k.as_u64[1] = (u64) ip0->dst_address.as_u32 | + (u64) ip0->fragment_id << 32 | + (u64) ip0->protocol << 48; ip4_sv_reass_t *reass = ip4_sv_reass_find_or_create (vm, rm, rt, &kv, &do_handoff); @@ -808,6 +827,8 @@ slow_path: next0 = IP4_SV_REASSEMBLY_NEXT_HANDOFF; vnet_buffer (b0)->ip.reass.owner_thread_index = kv.v.thread_index; + if (with_custom_context) + forward_context = 1; goto packet_enqueue; } @@ -938,13 +959,26 @@ slow_path: b0 = vlib_get_buffer (vm, bi0); vnet_feature_next (&next0, b0); } - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, - bi0, next0); + if (with_custom_context && forward_context) + { + if (to_next_aux) + { + to_next_aux[0] = *context; + to_next_aux += 1; + } + vlib_validate_buffer_enqueue_with_aux_x1 ( + vm, node, next_index, to_next, to_next_aux, n_left_to_next, + bi0, *context, next0); + } + else + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); next_packet: from += 1; n_left_from -= 1; + if (with_custom_context) + context += 1; } vlib_put_next_frame (vm, node, next_index, n_left_to_next); @@ -959,12 +993,11 @@ VLIB_NODE_FN (ip4_sv_reass_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return ip4_sv_reass_inline (vm, node, frame, false /* is_feature */ , - false /* is_output_feature */ , - false /* is_custom */ ); + return ip4_sv_reass_inline ( + vm, node, frame, false /* is_feature */, false /* is_output_feature */, + false /* is_custom */, false /* with_custom_context */); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_sv_reass_node) = { .name = "ip4-sv-reassembly", .vector_size = sizeof (u32), @@ -980,18 +1013,16 @@ VLIB_REGISTER_NODE (ip4_sv_reass_node) = { }, }; -/* *INDENT-ON* */ VLIB_NODE_FN (ip4_sv_reass_node_feature) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return ip4_sv_reass_inline (vm, node, frame, true /* is_feature */ , - false /* is_output_feature */ , - false /* is_custom */ ); + return ip4_sv_reass_inline ( + vm, node, frame, true /* is_feature */, false /* is_output_feature */, + false /* is_custom */, false /* with_custom_context */); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_sv_reass_node_feature) = { .name = "ip4-sv-reassembly-feature", .vector_size = sizeof (u32), @@ -1006,28 +1037,24 @@ VLIB_REGISTER_NODE (ip4_sv_reass_node_feature) = { [IP4_SV_REASSEMBLY_NEXT_HANDOFF] = "ip4-sv-reass-feature-hoff", }, }; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ VNET_FEATURE_INIT (ip4_sv_reass_feature) = { .arc_name = "ip4-unicast", .node_name = "ip4-sv-reassembly-feature", .runs_before = VNET_FEATURES ("ip4-lookup"), .runs_after = 0, }; -/* *INDENT-ON* */ VLIB_NODE_FN (ip4_sv_reass_node_output_feature) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return ip4_sv_reass_inline (vm, node, frame, true /* is_feature */ , - true /* is_output_feature */ , - false /* is_custom */ ); + return ip4_sv_reass_inline ( + vm, node, frame, true /* is_feature */, true /* is_output_feature */, + false /* is_custom */, false /* with_custom_context */); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_sv_reass_node_output_feature) = { .name = "ip4-sv-reassembly-output-feature", .vector_size = sizeof (u32), @@ -1042,18 +1069,14 @@ VLIB_REGISTER_NODE (ip4_sv_reass_node_output_feature) = { [IP4_SV_REASSEMBLY_NEXT_HANDOFF] = "ip4-sv-reass-feature-hoff", }, }; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ VNET_FEATURE_INIT (ip4_sv_reass_output_feature) = { .arc_name = "ip4-output", .node_name = "ip4-sv-reassembly-output-feature", .runs_before = 0, .runs_after = 0, }; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_sv_reass_custom_node) = { .name = "ip4-sv-reassembly-custom-next", .vector_size = sizeof (u32), @@ -1069,15 +1092,39 @@ VLIB_REGISTER_NODE (ip4_sv_reass_custom_node) = { }, }; -/* *INDENT-ON* */ VLIB_NODE_FN (ip4_sv_reass_custom_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return ip4_sv_reass_inline (vm, node, frame, false /* is_feature */ , - false /* is_output_feature */ , - true /* is_custom */ ); + return ip4_sv_reass_inline ( + vm, node, frame, false /* is_feature */, false /* is_output_feature */, + true /* is_custom */, false /* with_custom_context */); +} + +VLIB_REGISTER_NODE (ip4_sv_reass_custom_context_node) = { + .name = "ip4-sv-reassembly-custom-context", + .vector_size = sizeof (u32), + .aux_size = sizeof(u32), + .format_trace = format_ip4_sv_reass_trace, + .n_errors = IP4_N_ERROR, + .error_counters = ip4_error_counters, + .n_next_nodes = IP4_SV_REASSEMBLY_N_NEXT, + .next_nodes = + { + [IP4_SV_REASSEMBLY_NEXT_INPUT] = "ip4-input", + [IP4_SV_REASSEMBLY_NEXT_DROP] = "ip4-drop", + [IP4_SV_REASSEMBLY_NEXT_HANDOFF] = "ip4-sv-reassembly-custom-context-handoff", + + }, +}; + +VLIB_NODE_FN (ip4_sv_reass_custom_context_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + return ip4_sv_reass_inline ( + vm, node, frame, false /* is_feature */, false /* is_output_feature */, + true /* is_custom */, true /* with_custom_context */); } #ifndef CLIB_MARCH_VARIANT @@ -1222,6 +1269,8 @@ ip4_sv_reass_init_function (vlib_main_t * vm) rm->fq_index = vlib_frame_queue_main_init (ip4_sv_reass_node.index, 0); rm->fq_feature_index = vlib_frame_queue_main_init (ip4_sv_reass_node_feature.index, 0); + rm->fq_custom_context_index = + vlib_frame_queue_main_init (ip4_sv_reass_custom_context_node.index, 0); rm->feature_use_refcount_per_intf = NULL; rm->output_feature_use_refcount_per_intf = NULL; @@ -1274,7 +1323,6 @@ ip4_sv_reass_walk_expired (vlib_main_t *vm, clib_spinlock_lock (&rt->lock); vec_reset_length (pool_indexes_to_free); - /* *INDENT-OFF* */ pool_foreach_index (index, rt->pool) { reass = pool_elt_at_index (rt->pool, index); if (now > reass->last_heard + rm->timeout) @@ -1282,15 +1330,12 @@ ip4_sv_reass_walk_expired (vlib_main_t *vm, vec_add1 (pool_indexes_to_free, index); } } - /* *INDENT-ON* */ int *i; - /* *INDENT-OFF* */ vec_foreach (i, pool_indexes_to_free) { ip4_sv_reass_t *reass = pool_elt_at_index (rt->pool, i[0]); ip4_sv_reass_free (vm, rm, rt, reass); } - /* *INDENT-ON* */ clib_spinlock_unlock (&rt->lock); } @@ -1305,7 +1350,6 @@ ip4_sv_reass_walk_expired (vlib_main_t *vm, return 0; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_sv_reass_expire_node) = { .function = ip4_sv_reass_walk_expired, .type = VLIB_NODE_TYPE_PROCESS, @@ -1314,7 +1358,6 @@ VLIB_REGISTER_NODE (ip4_sv_reass_expire_node) = { .n_errors = IP4_N_ERROR, .error_counters = ip4_error_counters, }; -/* *INDENT-ON* */ static u8 * format_ip4_sv_reass_key (u8 * s, va_list * args) @@ -1381,11 +1424,9 @@ show_ip4_reass (vlib_main_t * vm, clib_spinlock_lock (&rt->lock); if (details) { - /* *INDENT-OFF* */ pool_foreach (reass, rt->pool) { vlib_cli_output (vm, "%U", format_ip4_sv_reass, vm, reass); } - /* *INDENT-ON* */ } sum_reass_n += rt->reass_n; clib_spinlock_unlock (&rt->lock); @@ -1409,13 +1450,11 @@ show_ip4_reass (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_ip4_sv_reass_cmd, static) = { .path = "show ip4-sv-reassembly", .short_help = "show ip4-sv-reassembly [details]", .function = show_ip4_reass, }; -/* *INDENT-ON* */ #ifndef CLIB_MARCH_VARIANT vnet_api_error_t @@ -1466,25 +1505,30 @@ format_ip4_sv_reass_handoff_trace (u8 * s, va_list * args) } always_inline uword -ip4_sv_reass_handoff_node_inline (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame, bool is_feature) +ip4_sv_reass_handoff_node_inline (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame, bool is_feature, + bool is_custom_context) { ip4_sv_reass_main_t *rm = &ip4_sv_reass_main; vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b; - u32 n_enq, n_left_from, *from; + u32 n_enq, n_left_from, *from, *context; u16 thread_indices[VLIB_FRAME_SIZE], *ti; u32 fq_index; from = vlib_frame_vector_args (frame); + if (is_custom_context) + context = vlib_frame_aux_args (frame); + n_left_from = frame->n_vectors; vlib_get_buffers (vm, from, bufs, n_left_from); b = bufs; ti = thread_indices; - fq_index = (is_feature) ? rm->fq_feature_index : rm->fq_index; + fq_index = (is_feature) ? rm->fq_feature_index : + (is_custom_context ? rm->fq_custom_context_index : + rm->fq_index); while (n_left_from > 0) { @@ -1503,8 +1547,12 @@ ip4_sv_reass_handoff_node_inline (vlib_main_t * vm, ti += 1; b += 1; } - n_enq = vlib_buffer_enqueue_to_thread (vm, node, fq_index, from, - thread_indices, frame->n_vectors, 1); + if (is_custom_context) + n_enq = vlib_buffer_enqueue_to_thread_with_aux ( + vm, node, fq_index, from, context, thread_indices, frame->n_vectors, 1); + else + n_enq = vlib_buffer_enqueue_to_thread ( + vm, node, fq_index, from, thread_indices, frame->n_vectors, 1); if (n_enq < frame->n_vectors) vlib_node_increment_counter (vm, node->node_index, @@ -1517,12 +1565,11 @@ VLIB_NODE_FN (ip4_sv_reass_handoff_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return ip4_sv_reass_handoff_node_inline (vm, node, frame, - false /* is_feature */ ); + return ip4_sv_reass_handoff_node_inline ( + vm, node, frame, false /* is_feature */, false /* is_custom_context */); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_sv_reass_handoff_node) = { .name = "ip4-sv-reassembly-handoff", .vector_size = sizeof (u32), @@ -1536,22 +1583,39 @@ VLIB_REGISTER_NODE (ip4_sv_reass_handoff_node) = { [0] = "error-drop", }, }; -/* *INDENT-ON* */ +VLIB_NODE_FN (ip4_sv_reass_custom_context_handoff_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + return ip4_sv_reass_handoff_node_inline ( + vm, node, frame, false /* is_feature */, true /* is_custom_context */); +} + +VLIB_REGISTER_NODE (ip4_sv_reass_custom_context_handoff_node) = { + .name = "ip4-sv-reassembly-custom-context-handoff", + .vector_size = sizeof (u32), + .aux_size = sizeof (u32), + .n_errors = ARRAY_LEN(ip4_sv_reass_handoff_error_strings), + .error_strings = ip4_sv_reass_handoff_error_strings, + .format_trace = format_ip4_sv_reass_handoff_trace, + + .n_next_nodes = 1, + + .next_nodes = { + [0] = "error-drop", + }, +}; -/* *INDENT-OFF* */ VLIB_NODE_FN (ip4_sv_reass_feature_handoff_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return ip4_sv_reass_handoff_node_inline (vm, node, frame, - true /* is_feature */ ); + return ip4_sv_reass_handoff_node_inline ( + vm, node, frame, true /* is_feature */, false /* is_custom_context */); } -/* *INDENT-ON* */ -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_sv_reass_feature_handoff_node) = { .name = "ip4-sv-reass-feature-hoff", .vector_size = sizeof (u32), @@ -1565,7 +1629,6 @@ VLIB_REGISTER_NODE (ip4_sv_reass_feature_handoff_node) = { [0] = "error-drop", }, }; -/* *INDENT-ON* */ #ifndef CLIB_MARCH_VARIANT int @@ -1603,6 +1666,13 @@ ip4_sv_reass_custom_register_next_node (uword node_index) node_index); } +uword +ip4_sv_reass_custom_context_register_next_node (uword node_index) +{ + return vlib_node_add_next ( + vlib_get_main (), ip4_sv_reass_custom_context_node.index, node_index); +} + int ip4_sv_reass_output_enable_disable_with_refcnt (u32 sw_if_index, int is_enable) diff --git a/src/vnet/ip/reass/ip4_sv_reass.h b/src/vnet/ip/reass/ip4_sv_reass.h index e926dbeebcc..3a684eb9809 100644 --- a/src/vnet/ip/reass/ip4_sv_reass.h +++ b/src/vnet/ip/reass/ip4_sv_reass.h @@ -49,6 +49,7 @@ int ip4_sv_reass_output_enable_disable_with_refcnt (u32 sw_if_index, int is_enable); uword ip4_sv_reass_custom_register_next_node (uword node_index); +uword ip4_sv_reass_custom_context_register_next_node (uword node_index); #endif /* __included_ip4_sv_reass_h__ */ diff --git a/src/vnet/ip/reass/ip6_full_reass.c b/src/vnet/ip/reass/ip6_full_reass.c index 97815572ee2..27647985877 100644 --- a/src/vnet/ip/reass/ip6_full_reass.c +++ b/src/vnet/ip/reass/ip6_full_reass.c @@ -705,8 +705,6 @@ ip6_full_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_buffer_t *last_b = NULL; u32 sub_chain_bi = reass->first_bi; u32 total_length = 0; - u32 buf_cnt = 0; - u32 dropped_cnt = 0; u32 *vec_drop_compress = NULL; ip6_full_reass_rc_t rv = IP6_FULL_REASS_RC_OK; do @@ -748,7 +746,6 @@ ip6_full_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_buffer_length_in_chain (vm, tmp) - trim_front - trim_end; while (1) { - ++buf_cnt; if (trim_front) { if (trim_front > tmp->current_length) @@ -804,7 +801,6 @@ ip6_full_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node, goto free_buffers_and_return; } vec_add1 (vec_drop_compress, tmp_bi); - ++dropped_cnt; } if (tmp->flags & VLIB_BUFFER_NEXT_PRESENT) { @@ -1283,15 +1279,17 @@ ip6_full_reassembly_inline (vlib_main_t *vm, vlib_node_runtime_t *node, } else { + u32 fib_index = + (vnet_buffer (b0)->sw_if_index[VLIB_TX] == (u32) ~0) ? + vec_elt (ip6_main.fib_index_by_sw_if_index, + vnet_buffer (b0)->sw_if_index[VLIB_RX]) : + vnet_buffer (b0)->sw_if_index[VLIB_TX]; kv.k.as_u64[0] = ip0->src_address.as_u64[0]; kv.k.as_u64[1] = ip0->src_address.as_u64[1]; kv.k.as_u64[2] = ip0->dst_address.as_u64[0]; kv.k.as_u64[3] = ip0->dst_address.as_u64[1]; kv.k.as_u64[4] = - ((u64) vec_elt (ip6_main.fib_index_by_sw_if_index, - vnet_buffer (b0)->sw_if_index[VLIB_RX])) - << 32 | - (u64) frag_hdr->identification; + ((u64) fib_index) << 32 | (u64) frag_hdr->identification; /* RFC 8200: The Next Header values in the Fragment headers of * different fragments of the same original packet may differ. * Only the value from the Offset zero fragment packet is used @@ -2187,7 +2185,7 @@ ip6_full_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable) "ip6-full-reassembly-feature", sw_if_index, 0, 0, 0); } - return -1; + return 0; } void diff --git a/src/vnet/ip/reass/ip6_sv_reass.c b/src/vnet/ip/reass/ip6_sv_reass.c index c7f64ca3338..fe2ed05555c 100644 --- a/src/vnet/ip/reass/ip6_sv_reass.c +++ b/src/vnet/ip/reass/ip6_sv_reass.c @@ -150,6 +150,7 @@ typedef struct /** Worker handoff */ u32 fq_index; u32 fq_feature_index; + u32 fq_custom_context_index; // reference count for enabling/disabling feature - per interface u32 *feature_use_refcount_per_intf; @@ -513,14 +514,18 @@ ip6_sv_reass_verify_packet_size_lt_64k (vlib_main_t * vm, } always_inline uword -ip6_sv_reassembly_inline (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame, bool is_feature) +ip6_sv_reassembly_inline (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame, bool is_feature, + bool custom_next, bool custom_context) { u32 *from = vlib_frame_vector_args (frame); - u32 n_left_from, n_left_to_next, *to_next, next_index; + u32 n_left_from, n_left_to_next, *to_next, *to_next_aux, next_index; ip6_sv_reass_main_t *rm = &ip6_sv_reass_main; ip6_sv_reass_per_thread_t *rt = &rm->per_thread_data[vm->thread_index]; + u32 *context; + if (custom_context) + context = vlib_frame_aux_args (frame); + clib_spinlock_lock (&rt->lock); n_left_from = frame->n_vectors; @@ -528,7 +533,11 @@ ip6_sv_reassembly_inline (vlib_main_t * vm, while (n_left_from > 0) { - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + if (custom_context) + vlib_get_next_frame_with_aux_safe (vm, node, next_index, to_next, + to_next_aux, n_left_to_next); + else + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); while (n_left_from > 0 && n_left_to_next > 0) { @@ -536,7 +545,7 @@ ip6_sv_reassembly_inline (vlib_main_t * vm, vlib_buffer_t *b0; u32 next0 = IP6_SV_REASSEMBLY_NEXT_DROP; u32 error0 = IP6_ERROR_NONE; - + u8 forward_context = 0; bi0 = from[0]; b0 = vlib_get_buffer (vm, bi0); @@ -576,7 +585,8 @@ ip6_sv_reassembly_inline (vlib_main_t * vm, goto packet_enqueue; } vnet_buffer (b0)->ip.reass.is_non_first_fragment = 0; - next0 = IP6_SV_REASSEMBLY_NEXT_INPUT; + next0 = custom_next ? vnet_buffer (b0)->ip.reass.next_index : + IP6_SV_REASSEMBLY_NEXT_INPUT; if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) { ip6_sv_reass_add_trace ( @@ -615,10 +625,15 @@ ip6_sv_reassembly_inline (vlib_main_t * vm, kv.k.as_u64[1] = ip0->src_address.as_u64[1]; kv.k.as_u64[2] = ip0->dst_address.as_u64[0]; kv.k.as_u64[3] = ip0->dst_address.as_u64[1]; - kv.k.as_u64[4] = - ((u64) vec_elt (ip6_main.fib_index_by_sw_if_index, - vnet_buffer (b0)->sw_if_index[VLIB_RX])) << 32 | - (u64) frag_hdr->identification; + if (custom_context) + kv.k.as_u64[4] = + (u64) *context << 32 | (u64) frag_hdr->identification; + else + kv.k.as_u64[4] = + ((u64) vec_elt (ip6_main.fib_index_by_sw_if_index, + vnet_buffer (b0)->sw_if_index[VLIB_RX])) + << 32 | + (u64) frag_hdr->identification; kv.k.as_u64[5] = ip0->protocol; ip6_sv_reass_t *reass = @@ -629,6 +644,8 @@ ip6_sv_reassembly_inline (vlib_main_t * vm, next0 = IP6_SV_REASSEMBLY_NEXT_HANDOFF; vnet_buffer (b0)->ip.reass.owner_thread_index = kv.v.thread_index; + if (custom_context) + forward_context = 1; goto packet_enqueue; } @@ -653,7 +670,8 @@ ip6_sv_reassembly_inline (vlib_main_t * vm, reass->tcp_seq_number; vnet_buffer (b0)->ip.reass.l4_src_port = reass->l4_src_port; vnet_buffer (b0)->ip.reass.l4_dst_port = reass->l4_dst_port; - next0 = IP6_SV_REASSEMBLY_NEXT_INPUT; + next0 = custom_next ? vnet_buffer (b0)->ip.reass.next_index : + IP6_SV_REASSEMBLY_NEXT_INPUT; if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) { ip6_sv_reass_add_trace ( @@ -748,11 +766,25 @@ ip6_sv_reassembly_inline (vlib_main_t * vm, b0 = vlib_get_buffer (vm, bi0); vnet_feature_next (&next0, b0); } - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, - n_left_to_next, bi0, next0); + if (custom_context && forward_context) + { + if (to_next_aux) + { + to_next_aux[0] = *context; + to_next_aux += 1; + } + vlib_validate_buffer_enqueue_with_aux_x1 ( + vm, node, next_index, to_next, to_next_aux, n_left_to_next, + bi0, *context, next0); + } + else + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); next_packet: from += 1; + if (custom_context) + context += 1; n_left_from -= 1; } @@ -767,10 +799,11 @@ VLIB_NODE_FN (ip6_sv_reass_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return ip6_sv_reassembly_inline (vm, node, frame, false /* is_feature */ ); + return ip6_sv_reassembly_inline (vm, node, frame, false /* is_feature */, + false /* custom next */, + false /* custom context */); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip6_sv_reass_node) = { .name = "ip6-sv-reassembly", .vector_size = sizeof (u32), @@ -786,16 +819,16 @@ VLIB_REGISTER_NODE (ip6_sv_reass_node) = { [IP6_SV_REASSEMBLY_NEXT_HANDOFF] = "ip6-sv-reassembly-handoff", }, }; -/* *INDENT-ON* */ VLIB_NODE_FN (ip6_sv_reass_node_feature) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return ip6_sv_reassembly_inline (vm, node, frame, true /* is_feature */ ); + return ip6_sv_reassembly_inline (vm, node, frame, true /* is_feature */, + false /* custom next */, + false /* custom context */); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip6_sv_reass_node_feature) = { .name = "ip6-sv-reassembly-feature", .vector_size = sizeof (u32), @@ -811,16 +844,38 @@ VLIB_REGISTER_NODE (ip6_sv_reass_node_feature) = { [IP6_SV_REASSEMBLY_NEXT_HANDOFF] = "ip6-sv-reass-feature-hoff", }, }; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ VNET_FEATURE_INIT (ip6_sv_reassembly_feature) = { .arc_name = "ip6-unicast", .node_name = "ip6-sv-reassembly-feature", .runs_before = VNET_FEATURES ("ip6-lookup"), .runs_after = 0, }; -/* *INDENT-ON* */ + +VLIB_NODE_FN (ip6_sv_reass_custom_context_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + return ip6_sv_reassembly_inline (vm, node, frame, false /* is_feature */, + true /* custom next */, + true /* custom context */); +} + +VLIB_REGISTER_NODE (ip6_sv_reass_custom_context_node) = { + .name = "ip6-sv-reassembly-custom-context", + .vector_size = sizeof (u32), + .aux_size = sizeof (u32), + .format_trace = format_ip6_sv_reass_trace, + .n_errors = IP6_N_ERROR, + .error_counters = ip6_error_counters, + .n_next_nodes = IP6_SV_REASSEMBLY_N_NEXT, + .next_nodes = + { + [IP6_SV_REASSEMBLY_NEXT_INPUT] = "ip6-input", + [IP6_SV_REASSEMBLY_NEXT_DROP] = "ip6-drop", + [IP6_SV_REASSEMBLY_NEXT_ICMP_ERROR] = "ip6-icmp-error", + [IP6_SV_REASSEMBLY_NEXT_HANDOFF] = "ip6-sv-reassembly-custom-context-handoff", + }, +}; #ifndef CLIB_MARCH_VARIANT static u32 @@ -971,6 +1026,8 @@ ip6_sv_reass_init_function (vlib_main_t * vm) rm->fq_index = vlib_frame_queue_main_init (ip6_sv_reass_node.index, 0); rm->fq_feature_index = vlib_frame_queue_main_init (ip6_sv_reass_node_feature.index, 0); + rm->fq_custom_context_index = + vlib_frame_queue_main_init (ip6_sv_reass_custom_context_node.index, 0); rm->feature_use_refcount_per_intf = NULL; @@ -1021,7 +1078,6 @@ ip6_sv_reass_walk_expired (vlib_main_t *vm, clib_spinlock_lock (&rt->lock); vec_reset_length (pool_indexes_to_free); - /* *INDENT-OFF* */ pool_foreach_index (index, rt->pool) { reass = pool_elt_at_index (rt->pool, index); if (now > reass->last_heard + rm->timeout) @@ -1029,15 +1085,12 @@ ip6_sv_reass_walk_expired (vlib_main_t *vm, vec_add1 (pool_indexes_to_free, index); } } - /* *INDENT-ON* */ int *i; - /* *INDENT-OFF* */ vec_foreach (i, pool_indexes_to_free) { ip6_sv_reass_t *reass = pool_elt_at_index (rt->pool, i[0]); ip6_sv_reass_free (vm, rm, rt, reass); } - /* *INDENT-ON* */ clib_spinlock_unlock (&rt->lock); } @@ -1052,7 +1105,6 @@ ip6_sv_reass_walk_expired (vlib_main_t *vm, return 0; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip6_sv_reass_expire_node) = { .function = ip6_sv_reass_walk_expired, .format_trace = format_ip6_sv_reass_trace, @@ -1062,7 +1114,6 @@ VLIB_REGISTER_NODE (ip6_sv_reass_expire_node) = { .n_errors = IP6_N_ERROR, .error_counters = ip6_error_counters, }; -/* *INDENT-ON* */ static u8 * format_ip6_sv_reass_key (u8 * s, va_list * args) @@ -1128,11 +1179,9 @@ show_ip6_sv_reass (vlib_main_t * vm, unformat_input_t * input, clib_spinlock_lock (&rt->lock); if (details) { - /* *INDENT-OFF* */ pool_foreach (reass, rt->pool) { vlib_cli_output (vm, "%U", format_ip6_sv_reass, vm, reass); } - /* *INDENT-ON* */ } sum_reass_n += rt->reass_n; clib_spinlock_unlock (&rt->lock); @@ -1158,13 +1207,11 @@ show_ip6_sv_reass (vlib_main_t * vm, unformat_input_t * input, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_ip6_sv_reassembly_cmd, static) = { .path = "show ip6-sv-reassembly", .short_help = "show ip6-sv-reassembly [details]", .function = show_ip6_sv_reass, }; -/* *INDENT-ON* */ #ifndef CLIB_MARCH_VARIANT vnet_api_error_t @@ -1214,25 +1261,29 @@ format_ip6_sv_reassembly_handoff_trace (u8 * s, va_list * args) } always_inline uword -ip6_sv_reassembly_handoff_inline (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame, bool is_feature) +ip6_sv_reassembly_handoff_inline (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame, bool is_feature, + bool custom_context) { ip6_sv_reass_main_t *rm = &ip6_sv_reass_main; vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b; - u32 n_enq, n_left_from, *from; + u32 n_enq, n_left_from, *from, *context; u16 thread_indices[VLIB_FRAME_SIZE], *ti; u32 fq_index; from = vlib_frame_vector_args (frame); + if (custom_context) + context = vlib_frame_aux_args (frame); n_left_from = frame->n_vectors; vlib_get_buffers (vm, from, bufs, n_left_from); b = bufs; ti = thread_indices; - fq_index = (is_feature) ? rm->fq_feature_index : rm->fq_index; + fq_index = (is_feature) ? + rm->fq_feature_index : + (custom_context ? rm->fq_custom_context_index : rm->fq_index); while (n_left_from > 0) { @@ -1251,8 +1302,12 @@ ip6_sv_reassembly_handoff_inline (vlib_main_t * vm, ti += 1; b += 1; } - n_enq = vlib_buffer_enqueue_to_thread (vm, node, fq_index, from, - thread_indices, frame->n_vectors, 1); + if (custom_context) + n_enq = vlib_buffer_enqueue_to_thread_with_aux ( + vm, node, fq_index, from, context, thread_indices, frame->n_vectors, 1); + else + n_enq = vlib_buffer_enqueue_to_thread ( + vm, node, fq_index, from, thread_indices, frame->n_vectors, 1); if (n_enq < frame->n_vectors) vlib_node_increment_counter (vm, node->node_index, @@ -1265,11 +1320,10 @@ VLIB_NODE_FN (ip6_sv_reassembly_handoff_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return ip6_sv_reassembly_handoff_inline (vm, node, frame, - false /* is_feature */ ); + return ip6_sv_reassembly_handoff_inline ( + vm, node, frame, false /* is_feature */, false /* custom_context */); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip6_sv_reassembly_handoff_node) = { .name = "ip6-sv-reassembly-handoff", .vector_size = sizeof (u32), @@ -1288,11 +1342,11 @@ VLIB_REGISTER_NODE (ip6_sv_reassembly_handoff_node) = { VLIB_NODE_FN (ip6_sv_reassembly_feature_handoff_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return ip6_sv_reassembly_handoff_inline (vm, node, frame, true /* is_feature */ ); + return ip6_sv_reassembly_handoff_inline ( + vm, node, frame, true /* is_feature */, false /* custom_context */); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip6_sv_reassembly_feature_handoff_node) = { .name = "ip6-sv-reass-feature-hoff", .vector_size = sizeof (u32), @@ -1306,7 +1360,28 @@ VLIB_REGISTER_NODE (ip6_sv_reassembly_feature_handoff_node) = { [0] = "error-drop", }, }; -/* *INDENT-ON* */ + +VLIB_NODE_FN (ip6_sv_reassembly_custom_context_handoff_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + return ip6_sv_reassembly_handoff_inline ( + vm, node, frame, false /* is_feature */, true /* custom_context */); +} + +VLIB_REGISTER_NODE (ip6_sv_reassembly_custom_context_handoff_node) = { + .name = "ip6-sv-reassembly-custom-context-handoff", + .vector_size = sizeof (u32), + .aux_size = sizeof (u32), + .n_errors = ARRAY_LEN(ip6_sv_reassembly_handoff_error_strings), + .error_strings = ip6_sv_reassembly_handoff_error_strings, + .format_trace = format_ip6_sv_reassembly_handoff_trace, + + .n_next_nodes = 1, + + .next_nodes = { + [0] = "error-drop", + }, +}; #ifndef CLIB_MARCH_VARIANT int @@ -1335,6 +1410,14 @@ ip6_sv_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable) } return 0; } + +uword +ip6_sv_reass_custom_context_register_next_node (uword node_index) +{ + return vlib_node_add_next ( + vlib_get_main (), ip6_sv_reassembly_custom_context_handoff_node.index, + node_index); +} #endif /* diff --git a/src/vnet/ip/reass/ip6_sv_reass.h b/src/vnet/ip/reass/ip6_sv_reass.h index 81ac2312bdf..7dc9df132dd 100644 --- a/src/vnet/ip/reass/ip6_sv_reass.h +++ b/src/vnet/ip/reass/ip6_sv_reass.h @@ -44,6 +44,7 @@ vnet_api_error_t ip6_sv_reass_enable_disable (u32 sw_if_index, u8 enable_disable); int ip6_sv_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable); +uword ip6_sv_reass_custom_context_register_next_node (uword node_index); #endif /* __included_ip6_sv_reass_h */ diff --git a/src/vnet/ip/vtep.h b/src/vnet/ip/vtep.h index 8b2c7fe723f..97e74429e88 100644 --- a/src/vnet/ip/vtep.h +++ b/src/vnet/ip/vtep.h @@ -29,7 +29,6 @@ * processing and go directly to the tunnel protocol handler node. */ -/* *INDENT-OFF* */ typedef CLIB_PACKED (struct { union { @@ -40,7 +39,6 @@ typedef CLIB_PACKED u64 as_u64; }; }) vtep4_key_t; -/* *INDENT-ON* */ /** * @brief Tunnel endpoint key (IPv6) @@ -51,13 +49,11 @@ typedef CLIB_PACKED * processing and go directly to the tunnel protocol handler node. */ -/* *INDENT-OFF* */ typedef CLIB_PACKED (struct { ip6_address_t addr; u32 fib_index; }) vtep6_key_t; -/* *INDENT-ON* */ typedef struct { diff --git a/src/vnet/ip6-nd/ip6_mld.c b/src/vnet/ip6-nd/ip6_mld.c index ea70bcc5d19..74428ec93c3 100644 --- a/src/vnet/ip6-nd/ip6_mld.c +++ b/src/vnet/ip6-nd/ip6_mld.c @@ -33,7 +33,6 @@ * adjacency tables and neighbor discovery logic. */ -/* *INDENT-OFF*/ /* multicast listener report packet format for ethernet. */ typedef CLIB_PACKED (struct { @@ -51,7 +50,6 @@ typedef CLIB_PACKED (struct ip6_header_t ip; icmp6_multicast_listener_report_header_t report_hdr; }) icmp6_multicast_listener_report_packet_t; -/* *INDENT-ON*/ typedef struct { @@ -224,12 +222,10 @@ ip6_mld_delegate_disable (index_t imdi) imd = pool_elt_at_index (ip6_mld_pool, imdi); /* clean MLD pools */ - /* *INDENT-OFF* */ pool_flush (m, imd->mldp_group_pool, ({ mhash_unset (&imd->address_to_mldp_index, &m->mcast_address, 0); })); - /* *INDENT-ON* */ pool_free (imd->mldp_group_pool); @@ -326,7 +322,6 @@ ip6_neighbor_send_mldpv2_report (u32 sw_if_index) rh0->icmp.checksum = 0; - /* *INDENT-OFF* */ pool_foreach (m, imd->mldp_group_pool) { rr.type = m->type; @@ -345,7 +340,6 @@ ip6_neighbor_send_mldpv2_report (u32 sw_if_index) payload_length += sizeof( icmp6_multicast_address_record_t); } - /* *INDENT-ON* */ rh0->rsvd = 0; rh0->num_addr_records = clib_host_to_net_u16 (num_addr_records); @@ -388,7 +382,6 @@ ip6_mld_timer_event (vlib_main_t * vm, ip6_mld_t *imd; /* Interface ip6 radv info list */ - /* *INDENT-OFF* */ pool_foreach (imd, ip6_mld_pool) { if (!vnet_sw_interface_is_admin_up (vnm, imd->sw_if_index)) @@ -405,7 +398,6 @@ ip6_mld_timer_event (vlib_main_t * vm, imd->all_routers_mcast = 1; } } - /* *INDENT-ON* */ return 0; } @@ -433,13 +425,11 @@ ip6_mld_event_process (vlib_main_t * vm, return frame->n_vectors; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip6_mld_event_process_node) = { .function = ip6_mld_event_process, .name = "ip6-mld-process", .type = VLIB_NODE_TYPE_PROCESS, }; -/* *INDENT-ON* */ static u8 * format_ip6_mld (u8 * s, va_list * args) @@ -453,7 +443,6 @@ format_ip6_mld (u8 * s, va_list * args) s = format (s, "%UJoined group address(es):\n", format_white_space, indent); - /* *INDENT-OFF* */ pool_foreach (m, imd->mldp_group_pool) { s = format (s, "%U%U\n", @@ -461,7 +450,6 @@ format_ip6_mld (u8 * s, va_list * args) format_ip6_address, &m->mcast_address); } - /* *INDENT-ON* */ return (s); } @@ -526,12 +514,10 @@ ip6_mld_init (vlib_main_t * vm) return (NULL); } -/* *INDENT-OFF* */ VLIB_INIT_FUNCTION (ip6_mld_init) = { .runs_after = VLIB_INITS("icmp6_init"), }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/ip6-nd/ip6_nd.api b/src/vnet/ip6-nd/ip6_nd.api index 0a519c16f7f..3ddf25103c1 100644 --- a/src/vnet/ip6-nd/ip6_nd.api +++ b/src/vnet/ip6-nd/ip6_nd.api @@ -20,7 +20,7 @@ called through a shared memory interface. */ -option version = "1.0.0"; +option version = "1.1.0"; import "vnet/ip/ip_types.api"; import "vnet/interface_types.api"; @@ -106,6 +106,134 @@ autoreply define sw_interface_ip6nd_ra_prefix u32 pref_lifetime; }; +/** \brief IPv6 Router Advertisements prefix entry + @param prefix - prefix to advertise + @param onlink_flag - if true, the prefix can be used for on-link + determination + @param autonomous_flag - if true, the prefix can be used for stateless + address configuration + @param val_lifetime - valid lifetime in seconds (0xffffffff represents + infinity) + @param pref_lifetime - preferred lifetime in seconds (0xffffffff represents + infinity) + @param valid_lifetime_expires - number of seconds in which valid lifetime + expires (zero means never, negative value + means expired this number of seconds ago) + @param pref_lifetime_expires - number of seconds in which preferred + lifetime expires (zero means never, negative + value means expired this number of seconds + ago) + @param decrement_lifetime_flag - if true, decrement valid lifetime and + preferred lifetime + @param no_advertise - if true, the prefix will not be advertised +*/ +typedef ip6nd_ra_prefix +{ + vl_api_prefix_t prefix; + bool onlink_flag; + bool autonomous_flag; + u32 val_lifetime; + u32 pref_lifetime; + f64 valid_lifetime_expires; + f64 pref_lifetime_expires; + bool decrement_lifetime_flag; + bool no_advertise; +}; + +/** \brief Dump IPv6 Router Advertisements details on a per-interface basis + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - interface index to use as a filter (0xffffffff + represents all interfaces) +*/ +define sw_interface_ip6nd_ra_dump +{ + option in_progress; + u32 client_index; + u32 context; + vl_api_interface_index_t sw_if_index; + option vat_help = "[(<if-name>|sw_if_index <if-idx>)]"; +}; + +/** \brief Details on IPv6 Router Advertisements for a single interface + @param context - returned sender context, to match reply w/ request + @param sw_if_index - interface index the details are belong to + @param cur_hop_limit - current hop limit + @param adv_managed_flag - if true, enable DHCP for address + @param adv_other_flag - if true, Enable DHCP for other information + @param adv_router_lifetime - lifetime associated with the default router in + seconds (zero indicates that the router is not + a default router) + @param adv_neighbor_reachable_time - number of milliseconds within which a + neighbor is assumed to be reachable + (zero means unspecified) + @param adv_retransmit_interval - number of milliseconds between + retransmitted Neighbor Solicitation + messages (zero means unspecified) + @param adv_link_mtu - MTU that all the nodes on a link use + @param send_radv - if true, send periodic Router Advertisements + @param cease_radv - if true, cease to send periodic Router Advertisements + @param send_unicast - if true, destination address of a Router + Advertisement message will use the source address of + the Router Solicitation message (when available). + Otherwise, multicast address will be used + @param adv_link_layer_address - if true, add link layer address option + @param max_radv_interval - maximum time in seconds allowed between sending + unsolicited multicast Router Advertisements + @param min_radv_interval - minimum time in seconds allowed between sending + unsolicited multicast Router Advertisements + @param last_radv_time - number of seconds since the last time a solicited + Router Advertisement message was sent (zero means + never) + @param last_multicast_time - number of seconds since the last time a + multicast Router Advertisements message was + sent (zero means never) + @param next_multicast_time - number of seconds within which next time a + multicast Router Advertisement message will be + sent (zero means never) + @param initial_adverts_count - number of initial Router Advertisement + messages to send + @param initial_adverts_interval - number of seconds between initial Router + Advertisement messages + @param initial_adverts_sent - if true, all initial Router Advertisement + messages were sent + @param n_advertisements_sent - number of Router Advertisements sent + @param n_solicitations_rcvd - number of Router Solicitations received + @param n_solicitations_dropped - number of Router Solicitations dropped + @param n_prefixes - number of prefix entries + @param prefixes - array of prefix entries +*/ +define sw_interface_ip6nd_ra_details +{ + option in_progress; + u32 context; + vl_api_interface_index_t sw_if_index; + u8 cur_hop_limit; + bool adv_managed_flag; + bool adv_other_flag; + u16 adv_router_lifetime; + u32 adv_neighbor_reachable_time; + u32 adv_retransmit_interval; + u32 adv_link_mtu; + bool send_radv; + bool cease_radv; + bool send_unicast; + bool adv_link_layer_address; + f64 max_radv_interval; + f64 min_radv_interval; + f64 last_radv_time; + f64 last_multicast_time; + f64 next_multicast_time; + u32 initial_adverts_count; + f64 initial_adverts_interval; + bool initial_adverts_sent; + u32 n_advertisements_sent; + u32 n_solicitations_rcvd; + u32 n_solicitations_dropped; + u32 n_prefixes; + vl_api_ip6nd_ra_prefix_t prefixes[n_prefixes]; +}; + /** \brief IPv6 ND (mirror) proxy @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request diff --git a/src/vnet/ip6-nd/ip6_nd.c b/src/vnet/ip6-nd/ip6_nd.c index 513d2bf6e87..763aca290e6 100644 --- a/src/vnet/ip6-nd/ip6_nd.c +++ b/src/vnet/ip6-nd/ip6_nd.c @@ -149,7 +149,6 @@ icmp6_neighbor_solicitation_or_advertisement (vlib_main_t * vm, if (PREDICT_TRUE (error0 == ICMP6_ERROR_NONE && o0 != 0 && !ip6_sadd_unspecified)) { - /* *INDENT-OFF* */ ip_neighbor_learn_t learn = { .sw_if_index = sw_if_index0, .ip = { @@ -159,7 +158,6 @@ icmp6_neighbor_solicitation_or_advertisement (vlib_main_t * vm, h0->target_address), } }; - /* *INDENT-ON* */ memcpy (&learn.mac, o0->ethernet_address, sizeof (learn.mac)); ip_neighbor_learn_dp (&learn); } @@ -343,7 +341,6 @@ icmp6_neighbor_advertisement (vlib_main_t * vm, 0); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip6_icmp_neighbor_solicitation_node,static) = { .function = icmp6_neighbor_solicitation, @@ -374,7 +371,6 @@ VLIB_REGISTER_NODE (ip6_icmp_neighbor_advertisement_node,static) = [0] = "ip6-punt", }, }; -/* *INDENT-ON* */ static u8 * format_ip6_nd (u8 * s, va_list * args) @@ -427,12 +423,10 @@ ip6_nd_init (vlib_main_t * vm) return 0; } -/* *INDENT-OFF* */ VLIB_INIT_FUNCTION (ip6_nd_init) = { .runs_after = VLIB_INITS("icmp6_init"), }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/ip6-nd/ip6_nd_api.c b/src/vnet/ip6-nd/ip6_nd_api.c index 6520a61f691..5555d8fea64 100644 --- a/src/vnet/ip6-nd/ip6_nd_api.c +++ b/src/vnet/ip6-nd/ip6_nd_api.c @@ -95,13 +95,11 @@ vl_api_ip6nd_proxy_dump_t_handler (vl_api_ip6nd_proxy_dump_t * mp) if (!reg) return; - /* *INDENT-OFF* */ pool_foreach_index (fib_index, im6->fibs) { fib_table_walk (fib_index, FIB_PROTOCOL_IP6, api_ip6nd_proxy_fib_table_walk, &ctx); } - /* *INDENT-ON* */ vec_sort_with_function (ctx.indices, fib_entry_cmp_for_sort); @@ -222,6 +220,175 @@ static void } static void +ip6_radv_prefix_encode (f64 now, const ip6_radv_prefix_t *in, + vl_api_ip6nd_ra_prefix_t *out) +{ + fib_prefix_t in_ip6_pfx = { + .fp_addr = { + .ip6 = in->prefix, + }, + .fp_len = in->prefix_len, + .fp_proto = FIB_PROTOCOL_IP6, + }; + + ip_prefix_encode (&in_ip6_pfx, &out->prefix); + + out->onlink_flag = in->adv_on_link_flag; + out->autonomous_flag = in->adv_autonomous_flag; + out->val_lifetime = htonl (in->adv_valid_lifetime_in_secs); + out->pref_lifetime = htonl (in->adv_pref_lifetime_in_secs); + + if (in->adv_valid_lifetime_in_secs != ~0) + { + out->valid_lifetime_expires = + clib_host_to_net_f64 (in->valid_lifetime_expires - now); + } + + if (in->adv_pref_lifetime_in_secs != ~0) + { + out->pref_lifetime_expires = + clib_host_to_net_f64 (in->pref_lifetime_expires - now); + } + + out->decrement_lifetime_flag = in->decrement_lifetime_flag; + out->no_advertise = (in->enabled == 0); +} + +static void +send_sw_interface_ip6nd_ra_details (vl_api_registration_t *reg, u32 context, + ip6_ra_t *radv_info) +{ + vl_api_sw_interface_ip6nd_ra_details_t *rmp = 0; + vl_api_ip6nd_ra_prefix_t *api_radv_pfx; + u32 n_prefixes = pool_elts (radv_info->adv_prefixes_pool); + ip6_radv_prefix_t *radv_pfx; + u32 msg_size = sizeof (*rmp) + n_prefixes * sizeof (*api_radv_pfx); + vlib_main_t *vm = vlib_get_main (); + f64 now = vlib_time_now (vm); + + rmp = vl_msg_api_alloc (msg_size); + if (!rmp) + return; + clib_memset (rmp, 0, msg_size); + rmp->_vl_msg_id = + ntohs (VL_API_SW_INTERFACE_IP6ND_RA_DETAILS + REPLY_MSG_ID_BASE); + rmp->context = context; + + rmp->sw_if_index = htonl (radv_info->sw_if_index); + rmp->cur_hop_limit = radv_info->curr_hop_limit; + rmp->adv_managed_flag = radv_info->adv_managed_flag; + rmp->adv_other_flag = radv_info->adv_other_flag; + rmp->adv_router_lifetime = htons (radv_info->adv_router_lifetime_in_sec); + rmp->adv_neighbor_reachable_time = + htonl (radv_info->adv_neighbor_reachable_time_in_msec); + rmp->adv_retransmit_interval = htonl ( + radv_info->adv_time_in_msec_between_retransmitted_neighbor_solicitations); + rmp->adv_link_mtu = htonl (radv_info->adv_link_mtu); + rmp->send_radv = radv_info->send_radv; + rmp->cease_radv = radv_info->cease_radv; + rmp->send_unicast = radv_info->send_unicast; + rmp->adv_link_layer_address = radv_info->adv_link_layer_address; + rmp->max_radv_interval = clib_host_to_net_f64 (radv_info->max_radv_interval); + rmp->min_radv_interval = clib_host_to_net_f64 (radv_info->min_radv_interval); + + if (radv_info->last_radv_time > 0.0) + { + rmp->last_radv_time = + clib_host_to_net_f64 (now - radv_info->last_radv_time); + } + + if ((radv_info->next_multicast_time - radv_info->last_multicast_time) > 0.0) + { + rmp->last_multicast_time = + clib_host_to_net_f64 (now - radv_info->last_multicast_time); + rmp->next_multicast_time = + clib_host_to_net_f64 (radv_info->next_multicast_time - now); + } + + rmp->initial_adverts_count = htonl (radv_info->initial_adverts_count); + rmp->initial_adverts_interval = + clib_host_to_net_f64 (radv_info->initial_adverts_interval); + rmp->initial_adverts_sent = (radv_info->initial_adverts_sent == 0); + rmp->n_advertisements_sent = htonl (radv_info->n_advertisements_sent); + rmp->n_solicitations_rcvd = htonl (radv_info->n_solicitations_rcvd); + rmp->n_solicitations_dropped = htonl (radv_info->n_solicitations_dropped); + rmp->n_prefixes = htonl (n_prefixes); + + api_radv_pfx = rmp->prefixes; + pool_foreach (radv_pfx, radv_info->adv_prefixes_pool) + { + ip6_radv_prefix_encode (now, radv_pfx, api_radv_pfx); + + api_radv_pfx++; + } + + vl_api_send_msg (reg, (u8 *) rmp); +} + +typedef struct +{ + u32 *sw_if_indices; +} api_dump_ip6_ra_itf_walk_ctx_t; + +static walk_rc_t +api_dump_ip6_ra_itf_walk_fn (u32 sw_if_index, void *arg) +{ + api_dump_ip6_ra_itf_walk_ctx_t *ctx = arg; + + vec_add1 (ctx->sw_if_indices, sw_if_index); + + return (WALK_CONTINUE); +} + +static void +vl_api_sw_interface_ip6nd_ra_dump_t_handler ( + vl_api_sw_interface_ip6nd_ra_dump_t *mp) +{ + vl_api_registration_t *reg; + u32 sw_if_index; + ip6_ra_t *radv_info; + + reg = vl_api_client_index_to_registration (mp->client_index); + if (!reg) + return; + + sw_if_index = ntohl (mp->sw_if_index); + + if (sw_if_index == INDEX_INVALID) + { + /* dump all interfaces */ + + api_dump_ip6_ra_itf_walk_ctx_t ctx = { + .sw_if_indices = NULL, + }; + u32 *sw_if_i; + + ip6_ra_itf_walk (api_dump_ip6_ra_itf_walk_fn, &ctx); + + vec_foreach (sw_if_i, ctx.sw_if_indices) + { + radv_info = ip6_ra_get_itf (*sw_if_i); + if (radv_info != NULL) + { + send_sw_interface_ip6nd_ra_details (reg, mp->context, radv_info); + } + } + + vec_free (ctx.sw_if_indices); + } + else + { + /* dump a single interface */ + + radv_info = ip6_ra_get_itf (sw_if_index); + if (radv_info != NULL) + { + send_sw_interface_ip6nd_ra_details (reg, mp->context, radv_info); + } + } +} + +static void vl_api_ip6nd_send_router_solicitation_t_handler (vl_api_ip6nd_send_router_solicitation_t * mp) { @@ -250,7 +417,6 @@ static void static void ip6_ra_handle_report (const ip6_ra_report_t * rap) { - /* *INDENT-OFF* */ vpe_client_registration_t *rp; pool_foreach (rp, vpe_api_main.ip6_ra_events_registrations) @@ -304,7 +470,6 @@ ip6_ra_handle_report (const ip6_ra_report_t * rap) vl_api_send_msg (vl_reg, (u8 *) event); } } - /* *INDENT-ON* */ } static void diff --git a/src/vnet/ip6-nd/ip6_nd_inline.h b/src/vnet/ip6-nd/ip6_nd_inline.h index 5e8b9d6e4c0..c959c94ed1d 100644 --- a/src/vnet/ip6-nd/ip6_nd_inline.h +++ b/src/vnet/ip6-nd/ip6_nd_inline.h @@ -23,6 +23,7 @@ #include <vnet/ip/icmp46_packet.h> #include <vnet/ip/ip6.h> #include <vnet/ip-neighbor/ip_neighbor_types.h> +#include <vnet/ip6-nd/ip6_ra.h> typedef enum { @@ -71,6 +72,13 @@ icmp6_send_neighbor_advertisement ( clib_host_to_net_u32 (ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_SOLICITED | ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_OVERRIDE); + /* if sending RAs is enabled, the "router" flag should be set, + * otherwise, neighbors may believe we have changed from a router + * to a host - RFC 4861 section 4.4 */ + if (ip6_ra_adv_enabled (sw_if_index0)) + icmp6_nsa->advertisement_flags |= + clib_host_to_net_u32 (ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_ROUTER); + icmp6_nsa->icmp.checksum = 0; icmp6_nsa->icmp.checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b, ip6_h, &bogus_length); diff --git a/src/vnet/ip6-nd/ip6_nd_proxy.c b/src/vnet/ip6-nd/ip6_nd_proxy.c index 256b48581bb..f7f07cb59f6 100644 --- a/src/vnet/ip6-nd/ip6_nd_proxy.c +++ b/src/vnet/ip6-nd/ip6_nd_proxy.c @@ -23,7 +23,6 @@ static int ip6_nd_proxy_add_del (u32 sw_if_index, const ip6_address_t * addr, u8 is_del) { - /* *INDENT-OFF* */ u32 fib_index; fib_prefix_t pfx = { .fp_len = 128, @@ -35,7 +34,6 @@ ip6_nd_proxy_add_del (u32 sw_if_index, const ip6_address_t * addr, u8 is_del) ip46_address_t nh = { .ip6 = *addr, }; - /* *INDENT-ON* */ fib_index = ip6_fib_table_get_index_for_sw_if_index (sw_if_index); @@ -117,14 +115,12 @@ set_ip6_nd_proxy_cmd (vlib_main_t * vm, return error; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (set_ip6_nd_proxy_command, static) = { .path = "set ip6 nd proxy", .short_help = "set ip6 nd proxy <interface> [del] <host-ip>", .function = set_ip6_nd_proxy_cmd, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/ip6-nd/ip6_nd_test.c b/src/vnet/ip6-nd/ip6_nd_test.c index 933029d7593..488ca591ba0 100644 --- a/src/vnet/ip6-nd/ip6_nd_test.c +++ b/src/vnet/ip6-nd/ip6_nd_test.c @@ -325,6 +325,63 @@ api_ip6nd_proxy_enable_disable (vat_main_t *vam) return -1; } +static int +api_sw_interface_ip6nd_ra_dump (vat_main_t *vam) +{ + unformat_input_t *i = vam->input; + vl_api_sw_interface_ip6nd_ra_dump_t *mp; + vl_api_control_ping_t *mp_ping; + u32 sw_if_index = ~0; + int ret; + + /* Parse args required to build the message */ + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "%U", unformat_sw_if_index, vam, &sw_if_index)) + ; + else if (unformat (i, "sw_if_index %u", &sw_if_index)) + ; + else + { + clib_warning ("parse error '%U'", format_unformat_error, i); + return -99; + } + } + + /* Construct the API message */ + M (SW_INTERFACE_IP6ND_RA_DUMP, mp); + mp->sw_if_index = ntohl (sw_if_index); + + /* Send it */ + S (mp); + + /* Use control ping for synchronization */ + PING (&ip6_nd_test_main, mp_ping); + S (mp_ping); + + /* Wait for a reply... */ + W (ret); + + return ret; +} + +static void +vl_api_sw_interface_ip6nd_ra_details_t_handler ( + vl_api_sw_interface_ip6nd_ra_details_t *mp) +{ + vat_main_t *vam = ip6_nd_test_main.vat_main; + u32 sw_if_index; + u8 send_radv; + + /* Read the message */ + sw_if_index = ntohl (mp->sw_if_index); + send_radv = mp->send_radv; + + /* Print it */ + print (vam->ofp, "sw_if_index: %u, send_radv: %s", sw_if_index, + (send_radv ? "on" : "off")); +} + #include <ip6-nd/ip6_nd.api_test.c> /* diff --git a/src/vnet/ip6-nd/ip6_ra.c b/src/vnet/ip6-nd/ip6_ra.c index 53f5a41418c..ffc02e813e2 100644 --- a/src/vnet/ip6-nd/ip6_ra.c +++ b/src/vnet/ip6-nd/ip6_ra.c @@ -30,7 +30,6 @@ * The files contains the API and CLI code for managing IPv6 RAs */ -/* *INDENT-OFF* */ /* Router solicitation packet format for ethernet. */ typedef CLIB_PACKED (struct { @@ -51,7 +50,6 @@ typedef CLIB_PACKED (struct icmp6_neighbor_discovery_prefix_information_option_t prefix[0]; }) icmp6_router_advertisement_packet_t; -/* *INDENT-ON* */ #define DEF_MAX_RADV_INTERVAL 200 #define DEF_MIN_RADV_INTERVAL .75 * DEF_MAX_RADV_INTERVAL @@ -65,95 +63,6 @@ typedef CLIB_PACKED (struct #define MAX_DELAY_BETWEEN_RAS 1800 /* seconds */ #define MAX_RA_DELAY_TIME .5 /* seconds */ -/* advertised prefix option */ -typedef struct -{ - /* basic advertised information */ - ip6_address_t prefix; - u8 prefix_len; - int adv_on_link_flag; - int adv_autonomous_flag; - u32 adv_valid_lifetime_in_secs; - u32 adv_pref_lifetime_in_secs; - - /* advertised values are computed from these times if decrementing */ - f64 valid_lifetime_expires; - f64 pref_lifetime_expires; - - /* local information */ - int enabled; - int deprecated_prefix_flag; - int decrement_lifetime_flag; - -#define MIN_ADV_VALID_LIFETIME 7203 /* seconds */ -#define DEF_ADV_VALID_LIFETIME 2592000 -#define DEF_ADV_PREF_LIFETIME 604800 - - /* extensions are added here, mobile, DNS etc.. */ -} ip6_radv_prefix_t; - -typedef struct ip6_ra_t_ -{ - /* advertised config information, zero means unspecified */ - u8 curr_hop_limit; - int adv_managed_flag; - int adv_other_flag; - u16 adv_router_lifetime_in_sec; - u32 adv_neighbor_reachable_time_in_msec; - u32 adv_time_in_msec_between_retransmitted_neighbor_solicitations; - - /* mtu option */ - u32 adv_link_mtu; - - /* local information */ - u32 sw_if_index; - int send_radv; /* radv on/off on this interface - set by config */ - int cease_radv; /* we are ceasing to send - set byf config */ - int send_unicast; - int adv_link_layer_address; - int prefix_option; - int failed_device_check; - int ref_count; - - /* prefix option */ - ip6_radv_prefix_t *adv_prefixes_pool; - - /* Hash table mapping address to index in interface advertised prefix pool. */ - mhash_t address_to_prefix_index; - - f64 max_radv_interval; - f64 min_radv_interval; - f64 min_delay_between_radv; - f64 max_delay_between_radv; - f64 max_rtr_default_lifetime; - - f64 last_radv_time; - f64 last_multicast_time; - f64 next_multicast_time; - - - u32 initial_adverts_count; - f64 initial_adverts_interval; - u32 initial_adverts_sent; - - /* stats */ - u32 n_advertisements_sent; - u32 n_solicitations_rcvd; - u32 n_solicitations_dropped; - - /* router solicitations sending state */ - u8 keep_sending_rs; /* when true then next fields are valid */ - icmp6_send_router_solicitation_params_t params; - f64 sleep_interval; - f64 due_time; - u32 n_left; - f64 start_time; - vlib_buffer_t *buffer; - - u32 seed; - -} ip6_ra_t; - static ip6_link_delegate_id_t ip6_ra_delegate_id; static ip6_ra_t *ip6_ra_pool; @@ -191,7 +100,7 @@ ip6_ra_report_unregister (ip6_ra_report_notify_t fn) } } -static inline ip6_ra_t * +ip6_ra_t * ip6_ra_get_itf (u32 sw_if_index) { index_t rai; @@ -204,6 +113,28 @@ ip6_ra_get_itf (u32 sw_if_index) return (NULL); } +u8 +ip6_ra_adv_enabled (u32 sw_if_index) +{ + ip6_ra_t *ra; + + ra = ip6_ra_get_itf (sw_if_index); + + return ((ra != NULL) && (ra->send_radv != 0)); +} + +void +ip6_ra_itf_walk (ip6_ra_itf_walk_fn_t fn, void *ctx) +{ + ip6_ra_t *radv_info; + + pool_foreach (radv_info, ip6_ra_pool) + { + if (WALK_STOP == fn (radv_info->sw_if_index, ctx)) + break; + } +} + /* for "syslogging" - use elog for now */ #define foreach_log_level \ _ (DEBUG, "DEBUG") \ @@ -372,7 +303,6 @@ icmp6_router_solicitation (vlib_main_t * vm, if (PREDICT_TRUE (error0 == ICMP6_ERROR_NONE && o0 != 0 && !is_unspecified && !is_link_local)) { - /* *INDENT-OFF* */ ip_neighbor_learn_t learn = { .sw_if_index = sw_if_index0, .ip = { @@ -380,7 +310,6 @@ icmp6_router_solicitation (vlib_main_t * vm, .version = AF_IP6, }, }; - /* *INDENT-ON* */ memcpy (&learn.mac, o0->ethernet_address, sizeof (learn.mac)); ip_neighbor_learn_dp (&learn); } @@ -527,7 +456,6 @@ icmp6_router_solicitation (vlib_main_t * vm, /* add advertised prefix options */ ip6_radv_prefix_t *pr_info; - /* *INDENT-OFF* */ pool_foreach (pr_info, radv_info->adv_prefixes_pool) { if(pr_info->enabled && @@ -593,7 +521,6 @@ icmp6_router_solicitation (vlib_main_t * vm, } } - /* *INDENT-ON* */ /* add additional options before here */ @@ -701,7 +628,6 @@ icmp6_router_solicitation (vlib_main_t * vm, return frame->n_vectors; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip6_icmp_router_solicitation_node,static) = { .function = icmp6_router_solicitation, @@ -718,7 +644,6 @@ VLIB_REGISTER_NODE (ip6_icmp_router_solicitation_node,static) = [ICMP6_ROUTER_SOLICITATION_NEXT_REPLY_TX] = "interface-output", }, }; -/* *INDENT-ON* */ /* validate advertised info for consistancy (see RFC-4861 section 6.2.7) - log any inconsistencies, packet will always be dropped */ static_always_inline uword @@ -1011,7 +936,6 @@ icmp6_router_advertisement (vlib_main_t * vm, prefix->prefix.fp_proto = FIB_PROTOCOL_IP6; /* look for matching prefix - if we our advertising it, it better be consistant */ - /* *INDENT-OFF* */ pool_foreach (pr_info, radv_info->adv_prefixes_pool) { @@ -1042,7 +966,6 @@ icmp6_router_advertisement (vlib_main_t * vm, } break; } - /* *INDENT-ON* */ break; } default: @@ -1076,7 +999,6 @@ icmp6_router_advertisement (vlib_main_t * vm, return frame->n_vectors; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip6_icmp_router_advertisement_node,static) = { .function = icmp6_router_advertisement, @@ -1091,7 +1013,6 @@ VLIB_REGISTER_NODE (ip6_icmp_router_advertisement_node,static) = [0] = "ip6-drop", }, }; -/* *INDENT-ON* */ static inline f64 random_f64_from_to (f64 from, f64 to) @@ -1281,14 +1202,12 @@ send_rs_process (vlib_main_t * vm, vlib_node_runtime_t * rt, do { due_time = current_time + 1e9; - /* *INDENT-OFF* */ pool_foreach (radv_info, ip6_ra_pool) { if (check_send_rs (vm, radv_info, current_time, &dt) && (dt < due_time)) due_time = dt; } - /* *INDENT-ON* */ current_time = vlib_time_now (vm); } while (due_time < current_time); @@ -1299,13 +1218,11 @@ send_rs_process (vlib_main_t * vm, vlib_node_runtime_t * rt, return 0; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip6_rs_process_node) = { .function = send_rs_process, .type = VLIB_NODE_TYPE_PROCESS, .name = "ip6-rs-process", }; -/* *INDENT-ON* */ void icmp6_send_router_solicitation (vlib_main_t * vm, u32 sw_if_index, u8 stop, @@ -1413,12 +1330,10 @@ ip6_ra_delegate_disable (index_t rai) radv_info = pool_elt_at_index (ip6_ra_pool, rai); /* clean up prefix and MDP pools */ - /* *INDENT-OFF* */ pool_flush(p, radv_info->adv_prefixes_pool, ({ mhash_unset (&radv_info->address_to_prefix_index, &p->prefix, 0); })); - /* *INDENT-ON* */ pool_free (radv_info->adv_prefixes_pool); @@ -1440,12 +1355,10 @@ ip6_ra_update_secondary_radv_info (ip6_address_t * address, u8 prefix_len, ip6_address_mask_from_width (&mask, prefix_len); vec_reset_length (radv_indices); - /* *INDENT-OFF* */ pool_foreach (radv_info, ip6_ra_pool) { vec_add1 (radv_indices, radv_info - ip6_ra_pool); } - /* *INDENT-ON* */ /* * If we have another customer for this prefix, @@ -1460,7 +1373,6 @@ ip6_ra_update_secondary_radv_info (ip6_address_t * address, u8 prefix_len, if (radv_info->sw_if_index == primary_sw_if_index) continue; - /* *INDENT-OFF* */ pool_foreach (this_prefix, radv_info->adv_prefixes_pool) { if (this_prefix->prefix_len == prefix_len @@ -1483,7 +1395,6 @@ ip6_ra_update_secondary_radv_info (ip6_address_t * address, u8 prefix_len, clib_warning ("ip6_neighbor_ra_prefix returned %d", rv); } } - /* *INDENT-ON*/ } } @@ -1504,7 +1415,6 @@ ip6_ra_process_timer_event (vlib_main_t * vm, f64 now = vlib_time_now (vm); /* Interface ip6 radv info list */ - /* *INDENT-OFF* */ pool_foreach (radv_info, ip6_ra_pool) { if( !vnet_sw_interface_is_admin_up (vnm, radv_info->sw_if_index)) @@ -1594,7 +1504,6 @@ ip6_ra_process_timer_event (vlib_main_t * vm, } } } - /* *INDENT-ON* */ if (f) { @@ -1651,14 +1560,12 @@ ip6_ra_event_process (vlib_main_t * vm, return frame->n_vectors; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip6_ra_process_node) = { .function = ip6_ra_event_process, .name = "ip6-ra-process", .type = VLIB_NODE_TYPE_PROCESS, }; -/* *INDENT-ON* */ static void ip6_ra_signal_report (ip6_ra_report_t * r) @@ -1700,6 +1607,9 @@ ip6_ra_config (vlib_main_t * vm, u32 sw_if_index, if (!radv_info) return (VNET_API_ERROR_IP6_NOT_ENABLED); + /* Start off believing that we're going to send radv's */ + radv_info->send_radv = 1; + if ((max_interval != 0) && (min_interval == 0)) min_interval = .75 * max_interval; @@ -2117,14 +2027,12 @@ format_ip6_ra (u8 * s, va_list * args) indent += 2; - /* *INDENT-OFF* */ pool_foreach (p, radv_info->adv_prefixes_pool) { s = format (s, "%Uprefix %U, length %d\n", format_white_space, indent+2, format_ip6_address, &p->prefix, p->prefix_len); } - /* *INDENT-ON* */ s = format (s, "%UMTU is %d\n", format_white_space, indent, radv_info->adv_link_mtu); @@ -2300,14 +2208,12 @@ format_ip6_ra (u8 * s, va_list * args) * Example of how to delete a prefix: * @cliexcmd{ip6 nd GigabitEthernet2/0/0 no prefix fe80::fe:28ff:fe9c:75b3/64} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (ip6_nd_command, static) = { .path = "ip6 nd", .short_help = "ip6 nd <interface> ...", .function = ip6_ra_cmd, }; -/* *INDENT-ON* */ /** * VFT for registering as a delegate to an IP6 link @@ -2333,12 +2239,10 @@ ip6_ra_init (vlib_main_t * vm) return (NULL); } -/* *INDENT-OFF* */ VLIB_INIT_FUNCTION (ip6_ra_init) = { .runs_after = VLIB_INITS("icmp6_init"), }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/ip6-nd/ip6_ra.h b/src/vnet/ip6-nd/ip6_ra.h index d09e8c0c975..958845b0a55 100644 --- a/src/vnet/ip6-nd/ip6_ra.h +++ b/src/vnet/ip6-nd/ip6_ra.h @@ -21,6 +21,105 @@ #include <vnet/fib/fib_types.h> +/* advertised prefix option */ +typedef struct +{ + /* basic advertised information */ + ip6_address_t prefix; + u8 prefix_len; + int adv_on_link_flag; + int adv_autonomous_flag; + u32 adv_valid_lifetime_in_secs; + u32 adv_pref_lifetime_in_secs; + + /* advertised values are computed from these times if decrementing */ + f64 valid_lifetime_expires; + f64 pref_lifetime_expires; + + /* local information */ + int enabled; + int deprecated_prefix_flag; + int decrement_lifetime_flag; + +#define MIN_ADV_VALID_LIFETIME 7203 /* seconds */ +#define DEF_ADV_VALID_LIFETIME 2592000 +#define DEF_ADV_PREF_LIFETIME 604800 + + /* extensions are added here, mobile, DNS etc.. */ +} ip6_radv_prefix_t; + +typedef struct +{ + u32 irt; + u32 mrt; + u32 mrc; + u32 mrd; +} icmp6_send_router_solicitation_params_t; + +typedef struct ip6_ra_t_ +{ + /* advertised config information, zero means unspecified */ + u8 curr_hop_limit; + int adv_managed_flag; + int adv_other_flag; + u16 adv_router_lifetime_in_sec; + u32 adv_neighbor_reachable_time_in_msec; + u32 adv_time_in_msec_between_retransmitted_neighbor_solicitations; + + /* mtu option */ + u32 adv_link_mtu; + + /* local information */ + u32 sw_if_index; + int send_radv; /* radv on/off on this interface - set by config */ + int cease_radv; /* we are ceasing to send - set byf config */ + int send_unicast; + int adv_link_layer_address; + int prefix_option; + int failed_device_check; + int ref_count; + + /* prefix option */ + ip6_radv_prefix_t *adv_prefixes_pool; + + /* Hash table mapping address to index in interface advertised prefix pool. + */ + mhash_t address_to_prefix_index; + + f64 max_radv_interval; + f64 min_radv_interval; + f64 min_delay_between_radv; + f64 max_delay_between_radv; + f64 max_rtr_default_lifetime; + + f64 last_radv_time; + f64 last_multicast_time; + f64 next_multicast_time; + + u32 initial_adverts_count; + f64 initial_adverts_interval; + u32 initial_adverts_sent; + + /* stats */ + u32 n_advertisements_sent; + u32 n_solicitations_rcvd; + u32 n_solicitations_dropped; + + /* router solicitations sending state */ + u8 keep_sending_rs; /* when true then next fields are valid */ + icmp6_send_router_solicitation_params_t params; + f64 sleep_interval; + f64 due_time; + u32 n_left; + f64 start_time; + vlib_buffer_t *buffer; + + u32 seed; + +} ip6_ra_t; + +extern ip6_ra_t *ip6_ra_get_itf (u32 sw_if_index); + extern int ip6_ra_config (vlib_main_t * vm, u32 sw_if_index, u8 suppress, u8 managed, u8 other, u8 ll_option, u8 send_unicast, u8 cease, @@ -35,13 +134,9 @@ extern int ip6_ra_prefix (vlib_main_t * vm, u32 sw_if_index, u8 off_link, u8 no_autoconfig, u8 no_onlink, u8 is_no); -typedef struct -{ - u32 irt; - u32 mrt; - u32 mrc; - u32 mrd; -} icmp6_send_router_solicitation_params_t; +typedef walk_rc_t (*ip6_ra_itf_walk_fn_t) (u32 sw_if_index, void *ctx); + +extern void ip6_ra_itf_walk (ip6_ra_itf_walk_fn_t fn, void *ctx); extern void icmp6_send_router_solicitation (vlib_main_t * vm, u32 sw_if_index, @@ -82,7 +177,7 @@ extern void ip6_ra_update_secondary_radv_info (ip6_address_t * address, u32 primary_sw_if_index, u32 valid_time, u32 preferred_time); - +extern u8 ip6_ra_adv_enabled (u32 sw_if_index); #endif /* included_ip6_neighbor_h */ /* diff --git a/src/vnet/ip6-nd/rd_cp.c b/src/vnet/ip6-nd/rd_cp.c index 13fd90db288..5d419286051 100644 --- a/src/vnet/ip6-nd/rd_cp.c +++ b/src/vnet/ip6-nd/rd_cp.c @@ -72,8 +72,6 @@ enum RD_CP_EVENT_INTERRUPT, }; -#define vl_api_ip6_nd_address_autoconfig_t_print vl_noop_handler - static void router_solicitation_start_stop (u32 sw_if_index, u8 start) { @@ -262,7 +260,6 @@ ip6_ra_report_handler (const ip6_ra_report_t * r) { router_lifetime_in_sec = r->router_lifetime_in_sec; u8 route_already_present = 0; - /* *INDENT-OFF* */ pool_foreach (default_route, rm->default_route_pool) { if (default_route->sw_if_index != sw_if_index) @@ -276,7 +273,6 @@ ip6_ra_report_handler (const ip6_ra_report_t * r) goto default_route_pool_foreach_out; } } - /* *INDENT-ON* */ default_route_pool_foreach_out: if (!route_already_present) @@ -333,7 +329,6 @@ ip6_ra_report_handler (const ip6_ra_report_t * r) continue; u8 address_already_present = 0; - /* *INDENT-OFF* */ pool_foreach (slaac_address, rm->slaac_address_pool) { if (slaac_address->sw_if_index != sw_if_index) @@ -349,7 +344,6 @@ ip6_ra_report_handler (const ip6_ra_report_t * r) goto slaac_address_pool_foreach_out; } } - /* *INDENT-ON* */ slaac_address_pool_foreach_out: if (address_already_present) @@ -414,7 +408,6 @@ rd_cp_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) * we do not use pool_foreach() to iterate over pool elements here * as we are removing elements inside the loop body */ - /* *INDENT-OFF* */ pool_foreach_index (index, rm->slaac_address_pool) { slaac_address = pool_elt_at_index(rm->slaac_address_pool, index); @@ -442,7 +435,6 @@ rd_cp_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) else remove_default_route (vm, default_route); } - /* *INDENT-ON* */ current_time = vlib_time_now (vm); } while (due_time < current_time); @@ -453,13 +445,11 @@ rd_cp_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) return 0; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (rd_cp_process_node) = { .function = rd_cp_process, .type = VLIB_NODE_TYPE_PROCESS, .name = "rd-cp-process", }; -/* *INDENT-ON* */ static void interrupt_process (void) @@ -514,21 +504,17 @@ rd_cp_set_address_autoconfig (u32 sw_if_index, if (if_config->enabled && !enable) { - /* *INDENT-OFF* */ pool_foreach (slaac_address, rm->slaac_address_pool) { remove_slaac_address (vm, slaac_address); } - /* *INDENT-ON* */ } if (if_config->install_default_routes && !install_default_routes) { - /* *INDENT-OFF* */ pool_foreach (default_route, rm->default_route_pool) { remove_default_route (vm, default_route); } - /* *INDENT-ON* */ } if_config->enabled = enable; @@ -588,13 +574,11 @@ ip6_nd_address_autoconfig (vlib_main_t * vm, * @cliexcmd{ip6 nd address autoconfig GigabitEthernet2/0/0 disable} * @endparblock ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (ip6_nd_address_autoconfig_command, static) = { .path = "ip6 nd address autoconfig", .short_help = "ip6 nd address autoconfig <interface> [default-route|disable]", .function = ip6_nd_address_autoconfig, }; -/* *INDENT-ON* */ static clib_error_t * rd_cp_init (vlib_main_t * vm) diff --git a/src/vnet/ipfix-export/flow_report.c b/src/vnet/ipfix-export/flow_report.c index de4c72c437f..4eb93520ed8 100644 --- a/src/vnet/ipfix-export/flow_report.c +++ b/src/vnet/ipfix-export/flow_report.c @@ -579,13 +579,11 @@ flow_report_process (vlib_main_t * vm, return 0; /* not so much */ } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (flow_report_process_node) = { .function = flow_report_process, .type = VLIB_NODE_TYPE_PROCESS, .name = "flow-report-process", }; -/* *INDENT-ON* */ int vnet_flow_report_add_del (ipfix_exporter_t *exp, @@ -862,7 +860,6 @@ set_ipfix_exporter_command_fn (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (set_ipfix_exporter_command, static) = { .path = "set ipfix exporter", .short_help = "set ipfix exporter " @@ -873,7 +870,6 @@ VLIB_CLI_COMMAND (set_ipfix_exporter_command, static) = { "[udp-checksum]", .function = set_ipfix_exporter_command_fn, }; -/* *INDENT-ON* */ static clib_error_t * @@ -885,13 +881,11 @@ ipfix_flush_command_fn (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (ipfix_flush_command, static) = { .path = "ipfix flush", .short_help = "flush the current ipfix data [for make test]", .function = ipfix_flush_command_fn, }; -/* *INDENT-ON* */ static clib_error_t * flow_report_init (vlib_main_t * vm) diff --git a/src/vnet/ipfix-export/flow_report_classify.c b/src/vnet/ipfix-export/flow_report_classify.c index ea6ba5cab58..9e1b99f252d 100644 --- a/src/vnet/ipfix-export/flow_report_classify.c +++ b/src/vnet/ipfix-export/flow_report_classify.c @@ -179,7 +179,6 @@ ipfix_classify_send_flows (flow_report_main_t *frm, ipfix_exporter_t *exp, tcpudp_header_t *tcpudp; udp_header_t *udp; int field_index; - u32 records_this_buffer; u16 new_l0, old_l0; ip_csum_t sum0; vlib_main_t *vm = frm->vlib_main; @@ -251,7 +250,6 @@ ipfix_classify_send_flows (flow_report_main_t *frm, ipfix_exporter_t *exp, next_offset = (u32) (((u8 *) (s + 1)) - (u8 *) tp); record_offset = next_offset; - records_this_buffer = 0; } field_index = 0; @@ -275,7 +273,6 @@ ipfix_classify_send_flows (flow_report_main_t *frm, ipfix_exporter_t *exp, sizeof (packets)); next_offset += sizeof (packets); } - records_this_buffer++; stream->sequence_number++; /* Next record will have the same size as this record */ @@ -483,13 +480,11 @@ ipfix_classify_table_add_del_command_fn (vlib_main_t * vm, return error; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (ipfix_classify_table_add_del_command, static) = { .path = "ipfix classify table", .short_help = "ipfix classify table add|del <table-index>", .function = ipfix_classify_table_add_del_command_fn, }; -/* *INDENT-ON* */ static clib_error_t * set_ipfix_classify_stream_command_fn (vlib_main_t * vm, @@ -526,14 +521,12 @@ set_ipfix_classify_stream_command_fn (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (set_ipfix_classify_stream_command, static) = { .path = "set ipfix classify stream", .short_help = "set ipfix classify stream" "[domain <domain-id>] [src-port <src-port>]", .function = set_ipfix_classify_stream_command_fn, }; -/* *INDENT-ON* */ static clib_error_t * flow_report_classify_init (vlib_main_t * vm) diff --git a/src/vnet/ipip/ipip.c b/src/vnet/ipip/ipip.c index 600f5421125..aaf21468d1e 100644 --- a/src/vnet/ipip/ipip.c +++ b/src/vnet/ipip/ipip.c @@ -148,7 +148,14 @@ ipip64_fixup (vlib_main_t * vm, const ip_adjacency_t * adj, vlib_buffer_t * b, ip4->length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b)); tunnel_encap_fixup_6o4 (flags, ((ip6_header_t *) (ip4 + 1)), ip4); - ip4->checksum = ip4_header_checksum (ip4); + if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_GSO)) + { + vnet_buffer2 (b)->outer_l3_hdr_offset = (u8 *) ip4 - b->data; + vnet_buffer_offload_flags_set (b, VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM | + VNET_BUFFER_OFFLOAD_F_TNL_IPIP); + } + else + ip4->checksum = ip4_header_checksum (ip4); } static void @@ -164,7 +171,14 @@ ipip44_fixup (vlib_main_t * vm, const ip_adjacency_t * adj, vlib_buffer_t * b, ip4->length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b)); tunnel_encap_fixup_4o4 (flags, ip4 + 1, ip4); - ip4->checksum = ip4_header_checksum (ip4); + if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_GSO)) + { + vnet_buffer2 (b)->outer_l3_hdr_offset = (u8 *) ip4 - b->data; + vnet_buffer_offload_flags_set (b, VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM | + VNET_BUFFER_OFFLOAD_F_TNL_IPIP); + } + else + ip4->checksum = ip4_header_checksum (ip4); } static void @@ -185,6 +199,12 @@ ipip46_fixup (vlib_main_t * vm, const ip_adjacency_t * adj, vlib_buffer_t * b, clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b) - sizeof (*ip6)); tunnel_encap_fixup_4o6 (flags, b, ((ip4_header_t *) (ip6 + 1)), ip6); + + if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_GSO)) + { + vnet_buffer2 (b)->outer_l3_hdr_offset = (u8 *) ip6 - b->data; + vnet_buffer_offload_flags_set (b, VNET_BUFFER_OFFLOAD_F_TNL_IPIP); + } } static void @@ -205,6 +225,12 @@ ipip66_fixup (vlib_main_t * vm, clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b) - sizeof (*ip6)); tunnel_encap_fixup_6o6 (flags, ip6 + 1, ip6); + + if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_GSO)) + { + vnet_buffer2 (b)->outer_l3_hdr_offset = (u8 *) ip6 - b->data; + vnet_buffer_offload_flags_set (b, VNET_BUFFER_OFFLOAD_F_TNL_IPIP); + } } static void @@ -226,6 +252,12 @@ ipipm6_fixup (vlib_main_t *vm, const ip_adjacency_t *adj, vlib_buffer_t *b, clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b) - sizeof (*ip6)); tunnel_encap_fixup_mplso6 (flags, b, (mpls_unicast_header_t *) (ip6 + 1), ip6); + + if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_GSO)) + { + vnet_buffer2 (b)->outer_l3_hdr_offset = (u8 *) ip6 - b->data; + vnet_buffer_offload_flags_set (b, VNET_BUFFER_OFFLOAD_F_TNL_IPIP); + } } static void @@ -245,7 +277,15 @@ ipipm4_fixup (vlib_main_t *vm, const ip_adjacency_t *adj, vlib_buffer_t *b, ip4->length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b) - sizeof (*ip4)); tunnel_encap_fixup_mplso4 (flags, (mpls_unicast_header_t *) (ip4 + 1), ip4); - ip4->checksum = ip4_header_checksum (ip4); + + if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_GSO)) + { + vnet_buffer2 (b)->outer_l3_hdr_offset = (u8 *) ip4 - b->data; + vnet_buffer_offload_flags_set (b, VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM | + VNET_BUFFER_OFFLOAD_F_TNL_IPIP); + } + else + ip4->checksum = ip4_header_checksum (ip4); } static void @@ -269,7 +309,6 @@ ipip_tunnel_stack (adj_index_t ai) } else { - /* *INDENT-OFF* */ fib_prefix_t dst = { .fp_len = t->transport == IPIP_TRANSPORT_IP6 ? 128 : 32, .fp_proto = (t->transport == IPIP_TRANSPORT_IP6 ? @@ -277,7 +316,6 @@ ipip_tunnel_stack (adj_index_t ai) FIB_PROTOCOL_IP4), .fp_addr = t->tunnel_dst }; - /* *INDENT-ON* */ adj_midchain_delegate_stack (ai, t->fib_index, &dst); } @@ -512,7 +550,6 @@ ipip_tunnel_desc (u32 sw_if_index, return (0); } -/* *INDENT-OFF* */ VNET_DEVICE_CLASS(ipip_device_class) = { .name = "IPIP tunnel device", .format_device_name = format_ipip_tunnel_name, @@ -542,7 +579,6 @@ VNET_HW_INTERFACE_CLASS(mipip_hw_interface_class) = { .update_adjacency = mipip_update_adj, .flags = VNET_HW_INTERFACE_CLASS_FLAG_NBMA, }; -/* *INDENT-ON* */ ipip_tunnel_t * ipip_tunnel_db_find (const ipip_tunnel_key_t * key) diff --git a/src/vnet/ipip/ipip_api.c b/src/vnet/ipip/ipip_api.c index 50b6731af44..2cb7bdf8dae 100644 --- a/src/vnet/ipip/ipip_api.c +++ b/src/vnet/ipip/ipip_api.c @@ -86,12 +86,10 @@ vl_api_ipip_add_tunnel_t_handler (vl_api_ipip_add_tunnel_t * mp) } out: - /* *INDENT-OFF* */ REPLY_MACRO2(VL_API_IPIP_ADD_TUNNEL_REPLY, ({ rmp->sw_if_index = ntohl(sw_if_index); })); - /* *INDENT-ON* */ } static void @@ -105,29 +103,45 @@ vl_api_ipip_del_tunnel_t_handler (vl_api_ipip_del_tunnel_t * mp) REPLY_MACRO (VL_API_IPIP_DEL_TUNNEL_REPLY); } +static vl_api_tunnel_mode_t +ipip_tunnel_mode_encode (ipip_mode_t mode) +{ + switch (mode) + { + case IPIP_MODE_P2P: + return TUNNEL_API_MODE_P2P; + case IPIP_MODE_P2MP: + return TUNNEL_API_MODE_MP; + case IPIP_MODE_6RD: + return TUNNEL_API_MODE_P2P; + default: + return TUNNEL_API_MODE_P2P; + } +} + static void send_ipip_tunnel_details (ipip_tunnel_t * t, vl_api_ipip_tunnel_dump_t * mp) { ipip_main_t *im = &ipip_main; vl_api_ipip_tunnel_details_t *rmp; bool is_ipv6 = t->transport == IPIP_TRANSPORT_IP6 ? true : false; + ip46_type_t ip_type = is_ipv6 ? IP46_TYPE_IP6 : IP46_TYPE_IP4; fib_table_t *ft; - ft = fib_table_get (t->fib_index, (is_ipv6 ? FIB_PROTOCOL_IP6 : - FIB_PROTOCOL_IP4)); - - /* *INDENT-OFF* */ - REPLY_MACRO_DETAILS2(VL_API_IPIP_TUNNEL_DETAILS, - ({ - ip_address_encode (&t->tunnel_src, IP46_TYPE_ANY, &rmp->tunnel.src); - ip_address_encode (&t->tunnel_dst, IP46_TYPE_ANY, &rmp->tunnel.dst); - rmp->tunnel.table_id = htonl (ft->ft_table_id); - rmp->tunnel.instance = htonl (t->user_instance); - rmp->tunnel.sw_if_index = htonl (t->sw_if_index); - rmp->tunnel.dscp = ip_dscp_encode(t->dscp); - rmp->tunnel.flags = tunnel_encap_decap_flags_encode(t->flags); - })); - /* *INDENT-ON* */ + ft = fib_table_get (t->fib_index, + (is_ipv6 ? FIB_PROTOCOL_IP6 : FIB_PROTOCOL_IP4)); + + REPLY_MACRO_DETAILS2 ( + VL_API_IPIP_TUNNEL_DETAILS, ({ + ip_address_encode (&t->tunnel_src, ip_type, &rmp->tunnel.src); + ip_address_encode (&t->tunnel_dst, ip_type, &rmp->tunnel.dst); + rmp->tunnel.table_id = htonl (ft->ft_table_id); + rmp->tunnel.instance = htonl (t->user_instance); + rmp->tunnel.sw_if_index = htonl (t->sw_if_index); + rmp->tunnel.dscp = ip_dscp_encode (t->dscp); + rmp->tunnel.flags = tunnel_encap_decap_flags_encode (t->flags); + rmp->tunnel.mode = ipip_tunnel_mode_encode (t->mode); + })); } static void @@ -141,12 +155,10 @@ vl_api_ipip_tunnel_dump_t_handler (vl_api_ipip_tunnel_dump_t * mp) if (sw_if_index == ~0) { - /* *INDENT-OFF* */ pool_foreach (t, im->tunnels) { send_ipip_tunnel_details(t, mp); } - /* *INDENT-ON* */ } else { @@ -185,12 +197,10 @@ vl_api_ipip_6rd_add_tunnel_t_handler (vl_api_ipip_6rd_add_tunnel_t * mp) &sixrd_tunnel_index); } - /* *INDENT-OFF* */ REPLY_MACRO2 (VL_API_IPIP_6RD_ADD_TUNNEL_REPLY, ({ rmp->sw_if_index = htonl (sixrd_tunnel_index); })); - /* *INDENT-ON* */ } static void diff --git a/src/vnet/ipip/ipip_cli.c b/src/vnet/ipip/ipip_cli.c index 1a8e8896965..606a1f53f9a 100644 --- a/src/vnet/ipip/ipip_cli.c +++ b/src/vnet/ipip/ipip_cli.c @@ -197,7 +197,6 @@ done: return error; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND(create_ipip_tunnel_command, static) = { .path = "create ipip tunnel", .short_help = "create ipip tunnel src <addr> dst <addr> [instance <n>] " @@ -209,7 +208,6 @@ VLIB_CLI_COMMAND(delete_ipip_tunnel_command, static) = { .short_help = "delete ipip tunnel sw_if_index <sw_if_index>", .function = delete_ipip_tunnel_command_fn, }; -/* *INDENT-ON* */ static u8 * format_ipip_tunnel (u8 * s, va_list * args) @@ -274,10 +272,8 @@ show_ipip_tunnel_command_fn (vlib_main_t * vm, if (ti == ~0) { - /* *INDENT-OFF* */ pool_foreach (t, gm->tunnels) {vlib_cli_output(vm, "%U", format_ipip_tunnel, t); } - /* *INDENT-ON* */ } else { @@ -290,12 +286,10 @@ show_ipip_tunnel_command_fn (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND(show_ipip_tunnel_command, static) = { .path = "show ipip tunnel", .function = show_ipip_tunnel_command_fn, }; -/* *INDENT-ON* */ static u8 * format_ipip_tunnel_key (u8 * s, va_list * args) @@ -318,12 +312,10 @@ ipip_tunnel_hash_show (vlib_main_t * vm, ipip_tunnel_key_t *key; u32 index; - /* *INDENT-OFF* */ hash_foreach(key, index, im->tunnel_by_key, ({ vlib_cli_output (vm, " %U -> %d", format_ipip_tunnel_key, key, index); })); - /* *INDENT-ON* */ return NULL; } @@ -331,14 +323,12 @@ ipip_tunnel_hash_show (vlib_main_t * vm, /** * show IPSEC tunnel protection hash tables */ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (ipip_tunnel_hash_show_node, static) = { .path = "show ipip tunnel-hash", .function = ipip_tunnel_hash_show, .short_help = "show ipip tunnel-hash", }; -/* *INDENT-ON* */ static clib_error_t * create_sixrd_tunnel_command_fn (vlib_main_t * vm, @@ -464,7 +454,6 @@ done: return error; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND(create_sixrd_tunnel_command, static) = { .path = "create 6rd tunnel", .short_help = "create 6rd tunnel ip6-pfx <ip6-pfx> ip4-pfx <ip4-pfx> " @@ -477,7 +466,6 @@ VLIB_CLI_COMMAND(delete_sixrd_tunnel_command, static) = { .short_help = "delete 6rd tunnel sw_if_index <sw_if_index>", .function = delete_sixrd_tunnel_command_fn, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/ipip/node.c b/src/vnet/ipip/node.c index b008a21a20f..a289cc885df 100644 --- a/src/vnet/ipip/node.c +++ b/src/vnet/ipip/node.c @@ -260,7 +260,6 @@ static char *ipip_error_strings[] = { #undef _ }; -/* *INDENT-OFF* */ VLIB_REGISTER_NODE(ipip4_input_node) = { .name = "ipip4-input", /* Takes a vector of packets. */ @@ -293,7 +292,6 @@ VLIB_REGISTER_NODE(ipip6_input_node) = { .format_trace = format_ipip_rx_trace, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/ipip/sixrd.c b/src/vnet/ipip/sixrd.c index 3fb7b52dca6..6e0bfb042cc 100644 --- a/src/vnet/ipip/sixrd.c +++ b/src/vnet/ipip/sixrd.c @@ -250,7 +250,6 @@ sixrd_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) return /* no error */ 0; } -/* *INDENT-OFF* */ VNET_HW_INTERFACE_CLASS(sixrd_hw_interface_class) = { .name = "ip6ip-6rd", .build_rewrite = sixrd_build_rewrite, @@ -265,7 +264,6 @@ VNET_DEVICE_CLASS(sixrd_device_class) = { #endif } ; -/* *INDENT-ON* */ int sixrd_add_tunnel (ip6_address_t * ip6_prefix, u8 ip6_prefix_len, @@ -341,7 +339,6 @@ sixrd_add_tunnel (ip6_address_t * ip6_prefix, u8 ip6_prefix_len, ip6_sw_interface_enable_disable (t->sw_if_index, true); /* Create IPv6 route/adjacency */ - /* *INDENT-OFF* */ fib_prefix_t pfx6 = { .fp_proto = FIB_PROTOCOL_IP6, .fp_len = t->sixrd.ip6_prefix_len, @@ -349,7 +346,6 @@ sixrd_add_tunnel (ip6_address_t * ip6_prefix, u8 ip6_prefix_len, .ip6 = t->sixrd.ip6_prefix, }, }; - /* *INDENT-ON* */ fib_table_lock (ip6_fib_index, FIB_PROTOCOL_IP6, FIB_SOURCE_6RD); fib_table_entry_update_one_path (ip6_fib_index, &pfx6, FIB_SOURCE_6RD, @@ -386,7 +382,6 @@ sixrd_del_tunnel (u32 sw_if_index) return -1; } - /* *INDENT-OFF* */ fib_prefix_t pfx6 = { .fp_proto = FIB_PROTOCOL_IP6, .fp_len = t->sixrd.ip6_prefix_len, @@ -394,7 +389,6 @@ sixrd_del_tunnel (u32 sw_if_index) .ip6 = t->sixrd.ip6_prefix, }, }; - /* *INDENT-ON* */ fib_table_entry_path_remove (t->sixrd.ip6_fib_index, &pfx6, FIB_SOURCE_6RD, diff --git a/src/vnet/ipsec/ah.h b/src/vnet/ipsec/ah.h index d0b4c21a4bc..450c9cfd6dc 100644 --- a/src/vnet/ipsec/ah.h +++ b/src/vnet/ipsec/ah.h @@ -17,6 +17,7 @@ #include <vnet/ip/ip.h> #include <vnet/ipsec/ipsec.h> +#include <vnet/ipsec/ipsec.api_enum.h> typedef struct { @@ -29,19 +30,67 @@ typedef struct } ah_header_t; -/* *INDENT-OFF* */ typedef CLIB_PACKED (struct { ip4_header_t ip4; ah_header_t ah; }) ip4_and_ah_header_t; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ typedef CLIB_PACKED (struct { ip6_header_t ip6; ah_header_t ah; }) ip6_and_ah_header_t; -/* *INDENT-ON* */ + +always_inline u32 +ah_encrypt_err_to_sa_err (u32 err) +{ + switch (err) + { + case AH_ENCRYPT_ERROR_CRYPTO_ENGINE_ERROR: + return IPSEC_SA_ERROR_CRYPTO_ENGINE_ERROR; + case AH_ENCRYPT_ERROR_SEQ_CYCLED: + return IPSEC_SA_ERROR_SEQ_CYCLED; + } + return ~0; +} + +always_inline u32 +ah_decrypt_err_to_sa_err (u32 err) +{ + switch (err) + { + case AH_DECRYPT_ERROR_DECRYPTION_FAILED: + return IPSEC_SA_ERROR_DECRYPTION_FAILED; + case AH_DECRYPT_ERROR_INTEG_ERROR: + return IPSEC_SA_ERROR_INTEG_ERROR; + case AH_DECRYPT_ERROR_NO_TAIL_SPACE: + return IPSEC_SA_ERROR_NO_TAIL_SPACE; + case AH_DECRYPT_ERROR_DROP_FRAGMENTS: + return IPSEC_SA_ERROR_DROP_FRAGMENTS; + case AH_DECRYPT_ERROR_REPLAY: + return IPSEC_SA_ERROR_REPLAY; + } + return ~0; +} + +always_inline void +ah_encrypt_set_next_index (vlib_buffer_t *b, vlib_node_runtime_t *node, + u32 thread_index, u32 err, u16 index, u16 *nexts, + u16 drop_next, u32 sa_index) +{ + ipsec_set_next_index (b, node, thread_index, err, + ah_encrypt_err_to_sa_err (err), index, nexts, + drop_next, sa_index); +} + +always_inline void +ah_decrypt_set_next_index (vlib_buffer_t *b, vlib_node_runtime_t *node, + u32 thread_index, u32 err, u16 index, u16 *nexts, + u16 drop_next, u32 sa_index) +{ + ipsec_set_next_index (b, node, thread_index, err, + ah_decrypt_err_to_sa_err (err), index, nexts, + drop_next, sa_index); +} always_inline u8 ah_calc_icv_padding_len (u8 icv_size, int is_ipv6) diff --git a/src/vnet/ipsec/ah_decrypt.c b/src/vnet/ipsec/ah_decrypt.c index c9209d6ceb0..918ebf03f67 100644 --- a/src/vnet/ipsec/ah_decrypt.c +++ b/src/vnet/ipsec/ah_decrypt.c @@ -23,7 +23,6 @@ #include <vnet/ipsec/esp.h> #include <vnet/ipsec/ah.h> #include <vnet/ipsec/ipsec_io.h> -#include <vnet/ipsec/ipsec.api_enum.h> #define foreach_ah_decrypt_next \ _(DROP, "error-drop") \ @@ -104,8 +103,9 @@ ah_process_ops (vlib_main_t * vm, vlib_node_runtime_t * node, if (op->status != VNET_CRYPTO_OP_STATUS_COMPLETED) { u32 bi = op->user_data; - b[bi]->error = node->errors[AH_DECRYPT_ERROR_INTEG_ERROR]; - nexts[bi] = AH_DECRYPT_NEXT_DROP; + ah_decrypt_set_next_index ( + b[bi], node, vm->thread_index, AH_DECRYPT_ERROR_INTEG_ERROR, bi, + nexts, AH_DECRYPT_NEXT_DROP, vnet_buffer (b[bi])->ipsec.sad_index); n_fail--; } op++; @@ -128,6 +128,7 @@ ah_decrypt_inline (vlib_main_t * vm, from = vlib_frame_vector_args (from_frame); n_left = from_frame->n_vectors; ipsec_sa_t *sa0 = 0; + bool anti_replay_result; u32 current_sa_index = ~0, current_sa_bytes = 0, current_sa_pkts = 0; clib_memset (pkt_data, 0, VLIB_FRAME_SIZE * sizeof (pkt_data[0])); @@ -145,8 +146,7 @@ ah_decrypt_inline (vlib_main_t * vm, { if (current_sa_index != ~0) vlib_increment_combined_counter (&ipsec_sa_counters, thread_index, - current_sa_index, - current_sa_pkts, + current_sa_index, current_sa_pkts, current_sa_bytes); current_sa_index = vnet_buffer (b[0])->ipsec.sad_index; sa0 = ipsec_sa_get (current_sa_index); @@ -156,7 +156,7 @@ ah_decrypt_inline (vlib_main_t * vm, thread_index, current_sa_index); } - if (PREDICT_FALSE (~0 == sa0->thread_index)) + if (PREDICT_FALSE ((u16) ~0 == sa0->thread_index)) { /* this is the first packet to use this SA, claim the SA * for this thread. this could happen simultaneously on @@ -190,8 +190,9 @@ ah_decrypt_inline (vlib_main_t * vm, { if (ip4_is_fragment (ih4)) { - b[0]->error = node->errors[AH_DECRYPT_ERROR_DROP_FRAGMENTS]; - next[0] = AH_DECRYPT_NEXT_DROP; + ah_decrypt_set_next_index ( + b[0], node, vm->thread_index, AH_DECRYPT_ERROR_DROP_FRAGMENTS, + 0, next, AH_DECRYPT_NEXT_DROP, current_sa_index); goto next; } pd->ip_hdr_size = ip4_header_bytes (ih4); @@ -201,11 +202,21 @@ ah_decrypt_inline (vlib_main_t * vm, pd->seq = clib_host_to_net_u32 (ah0->seq_no); /* anti-replay check */ - if (ipsec_sa_anti_replay_and_sn_advance (sa0, pd->seq, ~0, false, - &pd->seq_hi)) + if (PREDICT_FALSE (ipsec_sa_is_set_ANTI_REPLAY_HUGE (sa0))) { - b[0]->error = node->errors[AH_DECRYPT_ERROR_REPLAY]; - next[0] = AH_DECRYPT_NEXT_DROP; + anti_replay_result = ipsec_sa_anti_replay_and_sn_advance ( + sa0, pd->seq, ~0, false, &pd->seq_hi, true); + } + else + { + anti_replay_result = ipsec_sa_anti_replay_and_sn_advance ( + sa0, pd->seq, ~0, false, &pd->seq_hi, false); + } + if (anti_replay_result) + { + ah_decrypt_set_next_index (b[0], node, vm->thread_index, + AH_DECRYPT_ERROR_REPLAY, 0, next, + AH_DECRYPT_NEXT_DROP, current_sa_index); goto next; } @@ -220,8 +231,9 @@ ah_decrypt_inline (vlib_main_t * vm, pd->current_data + b[0]->current_length + sizeof (u32) > buffer_data_size)) { - b[0]->error = node->errors[AH_DECRYPT_ERROR_NO_TAIL_SPACE]; - next[0] = AH_DECRYPT_NEXT_DROP; + ah_decrypt_set_next_index ( + b[0], node, vm->thread_index, AH_DECRYPT_ERROR_NO_TAIL_SPACE, + 0, next, AH_DECRYPT_NEXT_DROP, current_sa_index); goto next; } @@ -304,23 +316,43 @@ ah_decrypt_inline (vlib_main_t * vm, if (PREDICT_TRUE (sa0->integ_alg != IPSEC_INTEG_ALG_NONE)) { /* redo the anti-reply check. see esp_decrypt for details */ - if (ipsec_sa_anti_replay_and_sn_advance (sa0, pd->seq, pd->seq_hi, - true, NULL)) + if (PREDICT_FALSE (ipsec_sa_is_set_ANTI_REPLAY_HUGE (sa0))) { - b[0]->error = node->errors[AH_DECRYPT_ERROR_REPLAY]; - next[0] = AH_DECRYPT_NEXT_DROP; - goto trace; + if (ipsec_sa_anti_replay_and_sn_advance ( + sa0, pd->seq, pd->seq_hi, true, NULL, true)) + { + ah_decrypt_set_next_index ( + b[0], node, vm->thread_index, AH_DECRYPT_ERROR_REPLAY, 0, + next, AH_DECRYPT_NEXT_DROP, pd->sa_index); + goto trace; + } + n_lost = ipsec_sa_anti_replay_advance ( + sa0, thread_index, pd->seq, pd->seq_hi, true); + } + else + { + if (ipsec_sa_anti_replay_and_sn_advance ( + sa0, pd->seq, pd->seq_hi, true, NULL, false)) + { + ah_decrypt_set_next_index ( + b[0], node, vm->thread_index, AH_DECRYPT_ERROR_REPLAY, 0, + next, AH_DECRYPT_NEXT_DROP, pd->sa_index); + goto trace; + } + n_lost = ipsec_sa_anti_replay_advance ( + sa0, thread_index, pd->seq, pd->seq_hi, false); } - n_lost = ipsec_sa_anti_replay_advance (sa0, thread_index, pd->seq, - pd->seq_hi); - vlib_prefetch_simple_counter (&ipsec_sa_lost_counters, thread_index, - pd->sa_index); + vlib_prefetch_simple_counter ( + &ipsec_sa_err_counters[IPSEC_SA_ERROR_LOST], thread_index, + pd->sa_index); } u16 ah_hdr_len = sizeof (ah_header_t) + pd->icv_size + pd->icv_padding_len; vlib_buffer_advance (b[0], pd->ip_hdr_size + ah_hdr_len); b[0]->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID; + b[0]->flags &= ~(VNET_BUFFER_F_L4_CHECKSUM_COMPUTED | + VNET_BUFFER_F_L4_CHECKSUM_CORRECT); if (PREDICT_TRUE (ipsec_sa_is_set_IS_TUNNEL (sa0))) { /* tunnel mode */ @@ -330,8 +362,10 @@ ah_decrypt_inline (vlib_main_t * vm, next[0] = AH_DECRYPT_NEXT_IP6_INPUT; else { - b[0]->error = node->errors[AH_DECRYPT_ERROR_DECRYPTION_FAILED]; - next[0] = AH_DECRYPT_NEXT_DROP; + ah_decrypt_set_next_index (b[0], node, vm->thread_index, + AH_DECRYPT_ERROR_DECRYPTION_FAILED, 0, + next, AH_DECRYPT_NEXT_DROP, + pd->sa_index); goto trace; } } @@ -382,8 +416,9 @@ ah_decrypt_inline (vlib_main_t * vm, } if (PREDICT_FALSE (n_lost)) - vlib_increment_simple_counter (&ipsec_sa_lost_counters, thread_index, - pd->sa_index, n_lost); + vlib_increment_simple_counter ( + &ipsec_sa_err_counters[IPSEC_SA_ERROR_LOST], thread_index, + pd->sa_index, n_lost); vnet_buffer (b[0])->sw_if_index[VLIB_TX] = (u32) ~ 0; trace: @@ -415,7 +450,6 @@ VLIB_NODE_FN (ah4_decrypt_node) (vlib_main_t * vm, return ah_decrypt_inline (vm, node, from_frame, 0 /* is_ip6 */ ); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ah4_decrypt_node) = { .name = "ah4-decrypt", .vector_size = sizeof (u32), @@ -433,7 +467,6 @@ VLIB_REGISTER_NODE (ah4_decrypt_node) = { [AH_DECRYPT_NEXT_HANDOFF] = "ah4-decrypt-handoff", }, }; -/* *INDENT-ON* */ VLIB_NODE_FN (ah6_decrypt_node) (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -442,7 +475,6 @@ VLIB_NODE_FN (ah6_decrypt_node) (vlib_main_t * vm, return ah_decrypt_inline (vm, node, from_frame, 1 /* is_ip6 */ ); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ah6_decrypt_node) = { .name = "ah6-decrypt", .vector_size = sizeof (u32), @@ -460,7 +492,6 @@ VLIB_REGISTER_NODE (ah6_decrypt_node) = { [AH_DECRYPT_NEXT_HANDOFF] = "ah6-decrypt-handoff", }, }; -/* *INDENT-ON* */ #ifndef CLIB_MARCH_VARIANT diff --git a/src/vnet/ipsec/ah_encrypt.c b/src/vnet/ipsec/ah_encrypt.c index 7116a160926..960327f071d 100644 --- a/src/vnet/ipsec/ah_encrypt.c +++ b/src/vnet/ipsec/ah_encrypt.c @@ -81,8 +81,10 @@ ah_process_ops (vlib_main_t * vm, vlib_node_runtime_t * node, if (op->status != VNET_CRYPTO_OP_STATUS_COMPLETED) { u32 bi = op->user_data; - b[bi]->error = node->errors[AH_ENCRYPT_ERROR_CRYPTO_ENGINE_ERROR]; - nexts[bi] = AH_ENCRYPT_NEXT_DROP; + ah_encrypt_set_next_index (b[bi], node, vm->thread_index, + AH_ENCRYPT_ERROR_CRYPTO_ENGINE_ERROR, bi, + nexts, AH_ENCRYPT_NEXT_DROP, + vnet_buffer (b[bi])->ipsec.sad_index); n_fail--; } op++; @@ -153,19 +155,20 @@ ah_encrypt_inline (vlib_main_t * vm, { if (current_sa_index != ~0) vlib_increment_combined_counter (&ipsec_sa_counters, thread_index, - current_sa_index, - current_sa_pkts, + current_sa_index, current_sa_pkts, current_sa_bytes); current_sa_index = vnet_buffer (b[0])->ipsec.sad_index; sa0 = ipsec_sa_get (current_sa_index); current_sa_bytes = current_sa_pkts = 0; + vlib_prefetch_combined_counter (&ipsec_sa_counters, thread_index, + current_sa_index); } pd->sa_index = current_sa_index; next[0] = AH_ENCRYPT_NEXT_DROP; - if (PREDICT_FALSE (~0 == sa0->thread_index)) + if (PREDICT_FALSE ((u16) ~0 == sa0->thread_index)) { /* this is the first packet to use this SA, claim the SA * for this thread. this could happen simultaneously on @@ -183,7 +186,9 @@ ah_encrypt_inline (vlib_main_t * vm, if (PREDICT_FALSE (esp_seq_advance (sa0))) { - b[0]->error = node->errors[AH_ENCRYPT_ERROR_SEQ_CYCLED]; + ah_encrypt_set_next_index (b[0], node, vm->thread_index, + AH_ENCRYPT_ERROR_SEQ_CYCLED, 0, next, + AH_ENCRYPT_NEXT_DROP, current_sa_index); pd->skip = 1; goto next; } @@ -437,7 +442,6 @@ VLIB_NODE_FN (ah4_encrypt_node) (vlib_main_t * vm, return ah_encrypt_inline (vm, node, from_frame, 0 /* is_ip6 */ ); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ah4_encrypt_node) = { .name = "ah4-encrypt", .vector_size = sizeof (u32), @@ -454,7 +458,6 @@ VLIB_REGISTER_NODE (ah4_encrypt_node) = { [AH_ENCRYPT_NEXT_INTERFACE_OUTPUT] = "interface-output", }, }; -/* *INDENT-ON* */ VLIB_NODE_FN (ah6_encrypt_node) (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -463,7 +466,6 @@ VLIB_NODE_FN (ah6_encrypt_node) (vlib_main_t * vm, return ah_encrypt_inline (vm, node, from_frame, 1 /* is_ip6 */ ); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ah6_encrypt_node) = { .name = "ah6-encrypt", .vector_size = sizeof (u32), @@ -480,7 +482,6 @@ VLIB_REGISTER_NODE (ah6_encrypt_node) = { [AH_ENCRYPT_NEXT_INTERFACE_OUTPUT] = "interface-output", }, }; -/* *INDENT-ON* */ #ifndef CLIB_MARCH_VARIANT diff --git a/src/vnet/ipsec/esp.h b/src/vnet/ipsec/esp.h index 8d7e0563a59..1c3ce776ad2 100644 --- a/src/vnet/ipsec/esp.h +++ b/src/vnet/ipsec/esp.h @@ -18,6 +18,7 @@ #include <vnet/ip/ip.h> #include <vnet/crypto/crypto.h> #include <vnet/ipsec/ipsec.h> +#include <vnet/ipsec/ipsec.api_enum.h> typedef struct { @@ -36,27 +37,21 @@ typedef struct u8 next_header; } esp_footer_t; -/* *INDENT-OFF* */ typedef CLIB_PACKED (struct { ip4_header_t ip4; esp_header_t esp; }) ip4_and_esp_header_t; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ typedef CLIB_PACKED (struct { ip4_header_t ip4; udp_header_t udp; esp_header_t esp; }) ip4_and_udp_and_esp_header_t; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ typedef CLIB_PACKED (struct { ip6_header_t ip6; esp_header_t esp; }) ip6_and_esp_header_t; -/* *INDENT-ON* */ /** * AES counter mode nonce @@ -85,9 +80,6 @@ typedef struct esp_aead_t_ } __clib_packed esp_aead_t; #define ESP_SEQ_MAX (4294967295UL) -#define ESP_MAX_BLOCK_SIZE (16) -#define ESP_MAX_IV_SIZE (16) -#define ESP_MAX_ICV_SIZE (32) u8 *format_esp_header (u8 * s, va_list * args); @@ -141,38 +133,76 @@ esp_aad_fill (u8 *data, const esp_header_t *esp, const ipsec_sa_t *sa, } } -/* Special case to drop or hand off packets for sync/async modes. - * - * Different than sync mode, async mode only enqueue drop or hand-off packets - * to next nodes. - */ -always_inline void -esp_set_next_index (vlib_buffer_t *b, vlib_node_runtime_t *node, u32 err, - u16 index, u16 *nexts, u16 drop_next) +always_inline u32 +esp_encrypt_err_to_sa_err (u32 err) { - nexts[index] = drop_next; - b->error = node->errors[err]; + switch (err) + { + case ESP_ENCRYPT_ERROR_HANDOFF: + return IPSEC_SA_ERROR_HANDOFF; + case ESP_ENCRYPT_ERROR_SEQ_CYCLED: + return IPSEC_SA_ERROR_SEQ_CYCLED; + case ESP_ENCRYPT_ERROR_CRYPTO_ENGINE_ERROR: + return IPSEC_SA_ERROR_CRYPTO_ENGINE_ERROR; + case ESP_ENCRYPT_ERROR_CRYPTO_QUEUE_FULL: + return IPSEC_SA_ERROR_CRYPTO_QUEUE_FULL; + case ESP_ENCRYPT_ERROR_NO_BUFFERS: + return IPSEC_SA_ERROR_NO_BUFFERS; + case ESP_ENCRYPT_ERROR_NO_ENCRYPTION: + return IPSEC_SA_ERROR_NO_ENCRYPTION; + } + return ~0; } -/* when submitting a frame is failed, drop all buffers in the frame */ always_inline u32 -esp_async_recycle_failed_submit (vlib_main_t *vm, vnet_crypto_async_frame_t *f, - vlib_node_runtime_t *node, u32 err, u16 index, - u32 *from, u16 *nexts, u16 drop_next_index) +esp_decrypt_err_to_sa_err (u32 err) { - u32 n_drop = f->n_elts; - u32 *bi = f->buffer_indices; - - while (n_drop--) + switch (err) { - from[index] = bi[0]; - esp_set_next_index (vlib_get_buffer (vm, bi[0]), node, err, index, nexts, - drop_next_index); - bi++; - index++; + case ESP_DECRYPT_ERROR_HANDOFF: + return IPSEC_SA_ERROR_HANDOFF; + case ESP_DECRYPT_ERROR_DECRYPTION_FAILED: + return IPSEC_SA_ERROR_DECRYPTION_FAILED; + case ESP_DECRYPT_ERROR_INTEG_ERROR: + return IPSEC_SA_ERROR_INTEG_ERROR; + case ESP_DECRYPT_ERROR_CRYPTO_ENGINE_ERROR: + return IPSEC_SA_ERROR_CRYPTO_ENGINE_ERROR; + case ESP_DECRYPT_ERROR_REPLAY: + return IPSEC_SA_ERROR_REPLAY; + case ESP_DECRYPT_ERROR_RUNT: + return IPSEC_SA_ERROR_RUNT; + case ESP_DECRYPT_ERROR_NO_BUFFERS: + return IPSEC_SA_ERROR_NO_BUFFERS; + case ESP_DECRYPT_ERROR_OVERSIZED_HEADER: + return IPSEC_SA_ERROR_OVERSIZED_HEADER; + case ESP_DECRYPT_ERROR_NO_TAIL_SPACE: + return IPSEC_SA_ERROR_NO_TAIL_SPACE; + case ESP_DECRYPT_ERROR_TUN_NO_PROTO: + return IPSEC_SA_ERROR_TUN_NO_PROTO; + case ESP_DECRYPT_ERROR_UNSUP_PAYLOAD: + return IPSEC_SA_ERROR_UNSUP_PAYLOAD; } + return ~0; +} - return (f->n_elts); +always_inline void +esp_encrypt_set_next_index (vlib_buffer_t *b, vlib_node_runtime_t *node, + u32 thread_index, u32 err, u16 index, u16 *nexts, + u16 drop_next, u32 sa_index) +{ + ipsec_set_next_index (b, node, thread_index, err, + esp_encrypt_err_to_sa_err (err), index, nexts, + drop_next, sa_index); +} + +always_inline void +esp_decrypt_set_next_index (vlib_buffer_t *b, vlib_node_runtime_t *node, + u32 thread_index, u32 err, u16 index, u16 *nexts, + u16 drop_next, u32 sa_index) +{ + ipsec_set_next_index (b, node, thread_index, err, + esp_decrypt_err_to_sa_err (err), index, nexts, + drop_next, sa_index); } /** @@ -249,6 +279,43 @@ typedef struct extern esp_async_post_next_t esp_encrypt_async_next; extern esp_async_post_next_t esp_decrypt_async_next; +/* when submitting a frame is failed, drop all buffers in the frame */ +always_inline u32 +esp_async_recycle_failed_submit (vlib_main_t *vm, vnet_crypto_async_frame_t *f, + vlib_node_runtime_t *node, u32 err, + u32 ipsec_sa_err, u16 index, u32 *from, + u16 *nexts, u16 drop_next_index, + bool is_encrypt) +{ + vlib_buffer_t *b; + u32 n_drop = f->n_elts; + u32 *bi = f->buffer_indices; + + while (n_drop--) + { + u32 sa_index; + + from[index] = bi[0]; + b = vlib_get_buffer (vm, bi[0]); + + if (is_encrypt) + { + sa_index = vnet_buffer (b)->ipsec.sad_index; + } + else + { + sa_index = esp_post_data (b)->decrypt_data.sa_index; + } + + ipsec_set_next_index (b, node, vm->thread_index, err, ipsec_sa_err, + index, nexts, drop_next_index, sa_index); + bi++; + index++; + } + + return (f->n_elts); +} + #endif /* __ESP_H__ */ /* diff --git a/src/vnet/ipsec/esp_decrypt.c b/src/vnet/ipsec/esp_decrypt.c index af90bc4c7ba..26d8ca1deee 100644 --- a/src/vnet/ipsec/esp_decrypt.c +++ b/src/vnet/ipsec/esp_decrypt.c @@ -23,7 +23,6 @@ #include <vnet/ipsec/esp.h> #include <vnet/ipsec/ipsec_io.h> #include <vnet/ipsec/ipsec_tun.h> -#include <vnet/ipsec/ipsec.api_enum.h> #include <vnet/gre/packet.h> @@ -114,8 +113,9 @@ esp_process_ops (vlib_main_t * vm, vlib_node_runtime_t * node, err = e; else err = ESP_DECRYPT_ERROR_CRYPTO_ENGINE_ERROR; - b[bi]->error = node->errors[err]; - nexts[bi] = ESP_DECRYPT_NEXT_DROP; + esp_decrypt_set_next_index (b[bi], node, vm->thread_index, err, bi, + nexts, ESP_DECRYPT_NEXT_DROP, + vnet_buffer (b[bi])->ipsec.sad_index); n_fail--; } op++; @@ -146,8 +146,9 @@ esp_process_chained_ops (vlib_main_t * vm, vlib_node_runtime_t * node, err = e; else err = ESP_DECRYPT_ERROR_CRYPTO_ENGINE_ERROR; - b[bi]->error = node->errors[err]; - nexts[bi] = ESP_DECRYPT_NEXT_DROP; + esp_decrypt_set_next_index (b[bi], node, vm->thread_index, err, bi, + nexts, ESP_DECRYPT_NEXT_DROP, + vnet_buffer (b[bi])->ipsec.sad_index); n_fail--; } op++; @@ -160,6 +161,9 @@ esp_remove_tail (vlib_main_t * vm, vlib_buffer_t * b, vlib_buffer_t * last, { vlib_buffer_t *before_last = b; + if (b != last) + b->total_length_not_including_first_buffer -= tail; + if (last->current_length > tail) { last->current_length -= tail; @@ -177,6 +181,37 @@ esp_remove_tail (vlib_main_t * vm, vlib_buffer_t * b, vlib_buffer_t * last, before_last->flags &= ~VLIB_BUFFER_NEXT_PRESENT; } +always_inline void +esp_remove_tail_and_tfc_padding (vlib_main_t *vm, vlib_node_runtime_t *node, + const esp_decrypt_packet_data_t *pd, + vlib_buffer_t *b, vlib_buffer_t *last, + u16 *next, u16 tail, int is_ip6) +{ + const u16 total_buffer_length = vlib_buffer_length_in_chain (vm, b); + u16 ip_packet_length; + if (is_ip6) + { + const ip6_header_t *ip6 = vlib_buffer_get_current (b); + ip_packet_length = + clib_net_to_host_u16 (ip6->payload_length) + sizeof (ip6_header_t); + } + else + { + const ip4_header_t *ip4 = vlib_buffer_get_current (b); + ip_packet_length = clib_net_to_host_u16 (ip4->length); + } + /* In case of TFC padding, the size of the buffer data needs to be adjusted + * to the ip packet length */ + if (PREDICT_FALSE (total_buffer_length < ip_packet_length + tail)) + { + esp_decrypt_set_next_index (b, node, vm->thread_index, + ESP_DECRYPT_ERROR_NO_TAIL_SPACE, 0, next, + ESP_DECRYPT_NEXT_DROP, pd->sa_index); + return; + } + esp_remove_tail (vm, b, last, total_buffer_length - ip_packet_length); +} + /* ICV is splitted in last two buffers so move it to the last buffer and return pointer to it */ static_always_inline u8 * @@ -202,9 +237,12 @@ esp_move_icv (vlib_main_t * vm, vlib_buffer_t * first, before_last->current_length -= first_sz; if (before_last == first) pd->current_length -= first_sz; + else + first->total_length_not_including_first_buffer -= first_sz; clib_memset (vlib_buffer_get_tail (before_last), 0, first_sz); if (dif) dif[0] = first_sz; + first->total_length_not_including_first_buffer -= last_sz; pd2->lb = before_last; pd2->icv_removed = 1; pd2->free_buffer_index = before_last->next_buffer; @@ -456,18 +494,16 @@ esp_decrypt_chain_crypto (vlib_main_t * vm, ipsec_per_thread_data_t * ptd, return total_len; } -static_always_inline void -esp_decrypt_prepare_sync_op (vlib_main_t * vm, vlib_node_runtime_t * node, - ipsec_per_thread_data_t * ptd, - vnet_crypto_op_t *** crypto_ops, - vnet_crypto_op_t *** integ_ops, - vnet_crypto_op_t * op, - ipsec_sa_t * sa0, u8 * payload, - u16 len, u8 icv_sz, u8 iv_sz, - esp_decrypt_packet_data_t * pd, - esp_decrypt_packet_data2_t * pd2, - vlib_buffer_t * b, u16 * next, u32 index) +static_always_inline esp_decrypt_error_t +esp_decrypt_prepare_sync_op (vlib_main_t *vm, ipsec_per_thread_data_t *ptd, + ipsec_sa_t *sa0, u8 *payload, u16 len, u8 icv_sz, + u8 iv_sz, esp_decrypt_packet_data_t *pd, + esp_decrypt_packet_data2_t *pd2, vlib_buffer_t *b, + u32 index) { + vnet_crypto_op_t **crypto_ops; + vnet_crypto_op_t **integ_ops; + vnet_crypto_op_t _op, *op = &_op; const u8 esp_sz = sizeof (esp_header_t); if (PREDICT_TRUE (sa0->integ_op_id != VNET_CRYPTO_OP_NONE)) @@ -484,6 +520,8 @@ esp_decrypt_prepare_sync_op (vlib_main_t * vm, vlib_node_runtime_t * node, if (pd->is_chain) { /* buffer is chained */ + integ_ops = &ptd->chained_integ_ops; + op->len = pd->current_length; /* special case when ICV is splitted and needs to be reassembled @@ -509,8 +547,7 @@ esp_decrypt_prepare_sync_op (vlib_main_t * vm, vlib_node_runtime_t * node, { /* we now have a single buffer of crypto data, adjust * the length (second buffer contains only ICV) */ - *integ_ops = &ptd->integ_ops; - *crypto_ops = &ptd->crypto_ops; + integ_ops = &ptd->integ_ops; len = b->current_length; goto out; } @@ -524,17 +561,16 @@ esp_decrypt_prepare_sync_op (vlib_main_t * vm, vlib_node_runtime_t * node, if (esp_decrypt_chain_integ (vm, ptd, pd, pd2, sa0, b, icv_sz, payload, pd->current_length, &op->digest, &op->n_chunks, 0) < 0) - { - b->error = node->errors[ESP_DECRYPT_ERROR_NO_BUFFERS]; - next[0] = ESP_DECRYPT_NEXT_DROP; - return; - } + return ESP_DECRYPT_ERROR_NO_BUFFERS; } else - esp_insert_esn (vm, sa0, pd, pd2, &op->len, &op->digest, &len, b, - payload); + { + integ_ops = &ptd->integ_ops; + esp_insert_esn (vm, sa0, pd, pd2, &op->len, &op->digest, &len, b, + payload); + } out: - vec_add_aligned (*(integ_ops[0]), op, 1, CLIB_CACHE_LINE_BYTES); + vec_add_aligned (*integ_ops, op, 1, CLIB_CACHE_LINE_BYTES); } payload += esp_sz; @@ -560,6 +596,12 @@ esp_decrypt_prepare_sync_op (vlib_main_t * vm, vlib_node_runtime_t * node, op->aad_len = esp_aad_fill (op->aad, esp0, sa0, pd->seq_hi); op->tag = payload + len; op->tag_len = 16; + if (PREDICT_FALSE (ipsec_sa_is_set_IS_NULL_GMAC (sa0))) + { + /* RFC-4543 ENCR_NULL_AUTH_AES_GMAC: IV is part of AAD */ + payload -= iv_sz; + len += iv_sz; + } } else { @@ -582,26 +624,32 @@ esp_decrypt_prepare_sync_op (vlib_main_t * vm, vlib_node_runtime_t * node, esp_decrypt_chain_crypto (vm, ptd, pd, pd2, sa0, b, icv_sz, payload, len - pd->iv_sz + pd->icv_sz, &op->tag, &op->n_chunks); + crypto_ops = &ptd->chained_crypto_ops; + } + else + { + crypto_ops = &ptd->crypto_ops; } - vec_add_aligned (*(crypto_ops[0]), op, 1, CLIB_CACHE_LINE_BYTES); + vec_add_aligned (*crypto_ops, op, 1, CLIB_CACHE_LINE_BYTES); } + + return ESP_DECRYPT_ERROR_RX_PKTS; } static_always_inline esp_decrypt_error_t -esp_decrypt_prepare_async_frame (vlib_main_t *vm, vlib_node_runtime_t *node, - ipsec_per_thread_data_t *ptd, +esp_decrypt_prepare_async_frame (vlib_main_t *vm, ipsec_per_thread_data_t *ptd, vnet_crypto_async_frame_t *f, ipsec_sa_t *sa0, u8 *payload, u16 len, u8 icv_sz, u8 iv_sz, esp_decrypt_packet_data_t *pd, esp_decrypt_packet_data2_t *pd2, u32 bi, - vlib_buffer_t *b, u16 *next, u16 async_next) + vlib_buffer_t *b, u16 async_next) { const u8 esp_sz = sizeof (esp_header_t); esp_decrypt_packet_data_t *async_pd = &(esp_post_data (b))->decrypt_data; esp_decrypt_packet_data2_t *async_pd2 = esp_post_data2 (b); u8 *tag = payload + len, *iv = payload + esp_sz, *aad = 0; - u32 key_index; + const u32 key_index = sa0->crypto_key_index; u32 crypto_len, integ_len = 0; i16 crypto_start_offset, integ_start_offset = 0; u8 flags = 0; @@ -609,7 +657,6 @@ esp_decrypt_prepare_async_frame (vlib_main_t *vm, vlib_node_runtime_t *node, if (!ipsec_sa_is_set_IS_AEAD (sa0)) { /* linked algs */ - key_index = sa0->linked_key_index; integ_start_offset = payload - b->data; integ_len = len; if (PREDICT_TRUE (sa0->integ_op_id != VNET_CRYPTO_OP_NONE)) @@ -662,8 +709,6 @@ esp_decrypt_prepare_async_frame (vlib_main_t *vm, vlib_node_runtime_t *node, else esp_insert_esn (vm, sa0, pd, pd2, &integ_len, &tag, &len, b, payload); } - else - key_index = sa0->crypto_key_index; out: /* crypto */ @@ -683,6 +728,12 @@ out: aad = (u8 *) nonce - sizeof (esp_aead_t); esp_aad_fill (aad, esp0, sa0, pd->seq_hi); tag = payload + len; + if (PREDICT_FALSE (ipsec_sa_is_set_IS_NULL_GMAC (sa0))) + { + /* RFC-4543 ENCR_NULL_AUTH_AES_GMAC: IV is part of AAD */ + payload -= iv_sz; + len += iv_sz; + } } else { @@ -721,7 +772,7 @@ out: } static_always_inline void -esp_decrypt_post_crypto (vlib_main_t *vm, const vlib_node_runtime_t *node, +esp_decrypt_post_crypto (vlib_main_t *vm, vlib_node_runtime_t *node, const u16 *next_by_next_header, const esp_decrypt_packet_data_t *pd, const esp_decrypt_packet_data2_t *pd2, @@ -734,6 +785,7 @@ esp_decrypt_post_crypto (vlib_main_t *vm, const vlib_node_runtime_t *node, const u8 tun_flags = IPSEC_SA_FLAG_IS_TUNNEL | IPSEC_SA_FLAG_IS_TUNNEL_V6; u8 pad_length = 0, next_header = 0; u16 icv_sz; + u64 n_lost; /* * redo the anti-reply check @@ -742,34 +794,50 @@ esp_decrypt_post_crypto (vlib_main_t *vm, const vlib_node_runtime_t *node, * check above we did so against the state of the window (W), * after packet s-1. So each of the packets in the sequence will be * accepted. - * This time s will be cheked against Ws-1, s+1 chceked against Ws - * (i.e. the window state is updated/advnaced) - * so this time the successive s+! packet will be dropped. + * This time s will be cheked against Ws-1, s+1 checked against Ws + * (i.e. the window state is updated/advanced) + * so this time the successive s+1 packet will be dropped. * This is a consequence of batching the decrypts. If the - * check-dcrypt-advance process was done for each packet it would + * check-decrypt-advance process was done for each packet it would * be fine. But we batch the decrypts because it's much more efficient * to do so in SW and if we offload to HW and the process is async. * * You're probably thinking, but this means an attacker can send the - * above sequence and cause VPP to perform decrpyts that will fail, + * above sequence and cause VPP to perform decrypts that will fail, * and that's true. But if the attacker can determine s (a valid * sequence number in the window) which is non-trivial, it can generate * a sequence s, s+1, s+2, s+3, ... s+n and nothing will prevent any * implementation, sequential or batching, from decrypting these. */ - if (ipsec_sa_anti_replay_and_sn_advance (sa0, pd->seq, pd->seq_hi, true, - NULL)) + if (PREDICT_FALSE (ipsec_sa_is_set_ANTI_REPLAY_HUGE (sa0))) { - b->error = node->errors[ESP_DECRYPT_ERROR_REPLAY]; - next[0] = ESP_DECRYPT_NEXT_DROP; - return; + if (ipsec_sa_anti_replay_and_sn_advance (sa0, pd->seq, pd->seq_hi, true, + NULL, true)) + { + esp_decrypt_set_next_index (b, node, vm->thread_index, + ESP_DECRYPT_ERROR_REPLAY, 0, next, + ESP_DECRYPT_NEXT_DROP, pd->sa_index); + return; + } + n_lost = ipsec_sa_anti_replay_advance (sa0, vm->thread_index, pd->seq, + pd->seq_hi, true); + } + else + { + if (ipsec_sa_anti_replay_and_sn_advance (sa0, pd->seq, pd->seq_hi, true, + NULL, false)) + { + esp_decrypt_set_next_index (b, node, vm->thread_index, + ESP_DECRYPT_ERROR_REPLAY, 0, next, + ESP_DECRYPT_NEXT_DROP, pd->sa_index); + return; + } + n_lost = ipsec_sa_anti_replay_advance (sa0, vm->thread_index, pd->seq, + pd->seq_hi, false); } - u64 n_lost = - ipsec_sa_anti_replay_advance (sa0, vm->thread_index, pd->seq, pd->seq_hi); - - vlib_prefetch_simple_counter (&ipsec_sa_lost_counters, vm->thread_index, - pd->sa_index); + vlib_prefetch_simple_counter (&ipsec_sa_err_counters[IPSEC_SA_ERROR_LOST], + vm->thread_index, pd->sa_index); if (pd->is_chain) { @@ -828,7 +896,8 @@ esp_decrypt_post_crypto (vlib_main_t *vm, const vlib_node_runtime_t *node, u16 adv = pd->iv_sz + esp_sz; u16 tail = sizeof (esp_footer_t) + pad_length + icv_sz; u16 tail_orig = sizeof (esp_footer_t) + pad_length + pd->icv_sz; - b->flags &= ~VLIB_BUFFER_TOTAL_LENGTH_VALID; + b->flags &= + ~(VNET_BUFFER_F_L4_CHECKSUM_COMPUTED | VNET_BUFFER_F_L4_CHECKSUM_CORRECT); if ((pd->flags & tun_flags) == 0 && !is_tun) /* transport mode */ { @@ -878,14 +947,16 @@ esp_decrypt_post_crypto (vlib_main_t *vm, const vlib_node_runtime_t *node, next[0] = ESP_DECRYPT_NEXT_IP4_INPUT; b->current_data = pd->current_data + adv; b->current_length = pd->current_length - adv; - esp_remove_tail (vm, b, lb, tail); + esp_remove_tail_and_tfc_padding (vm, node, pd, b, lb, next, tail, + false); } else if (next_header == IP_PROTOCOL_IPV6) { next[0] = ESP_DECRYPT_NEXT_IP6_INPUT; b->current_data = pd->current_data + adv; b->current_length = pd->current_length - adv; - esp_remove_tail (vm, b, lb, tail); + esp_remove_tail_and_tfc_padding (vm, node, pd, b, lb, next, tail, + true); } else if (next_header == IP_PROTOCOL_MPLS_IN_IP) { @@ -918,8 +989,9 @@ esp_decrypt_post_crypto (vlib_main_t *vm, const vlib_node_runtime_t *node, next[0] = ESP_DECRYPT_NEXT_IP6_INPUT; break; default: - b->error = node->errors[ESP_DECRYPT_ERROR_UNSUP_PAYLOAD]; - next[0] = ESP_DECRYPT_NEXT_DROP; + esp_decrypt_set_next_index ( + b, node, vm->thread_index, ESP_DECRYPT_ERROR_UNSUP_PAYLOAD, 0, + next, ESP_DECRYPT_NEXT_DROP, pd->sa_index); break; } } @@ -932,8 +1004,9 @@ esp_decrypt_post_crypto (vlib_main_t *vm, const vlib_node_runtime_t *node, } else { - next[0] = ESP_DECRYPT_NEXT_DROP; - b->error = node->errors[ESP_DECRYPT_ERROR_UNSUP_PAYLOAD]; + esp_decrypt_set_next_index (b, node, vm->thread_index, + ESP_DECRYPT_ERROR_UNSUP_PAYLOAD, 0, next, + ESP_DECRYPT_NEXT_DROP, pd->sa_index); return; } @@ -973,8 +1046,10 @@ esp_decrypt_post_crypto (vlib_main_t *vm, const vlib_node_runtime_t *node, !ip46_address_is_equal_v4 (&itp->itp_tun.dst, &ip4->src_address)) { - next[0] = ESP_DECRYPT_NEXT_DROP; - b->error = node->errors[ESP_DECRYPT_ERROR_TUN_NO_PROTO]; + esp_decrypt_set_next_index ( + b, node, vm->thread_index, + ESP_DECRYPT_ERROR_TUN_NO_PROTO, 0, next, + ESP_DECRYPT_NEXT_DROP, pd->sa_index); } } else if (next_header == IP_PROTOCOL_IPV6) @@ -988,8 +1063,10 @@ esp_decrypt_post_crypto (vlib_main_t *vm, const vlib_node_runtime_t *node, !ip46_address_is_equal_v6 (&itp->itp_tun.dst, &ip6->src_address)) { - next[0] = ESP_DECRYPT_NEXT_DROP; - b->error = node->errors[ESP_DECRYPT_ERROR_TUN_NO_PROTO]; + esp_decrypt_set_next_index ( + b, node, vm->thread_index, + ESP_DECRYPT_ERROR_TUN_NO_PROTO, 0, next, + ESP_DECRYPT_NEXT_DROP, pd->sa_index); } } } @@ -997,8 +1074,8 @@ esp_decrypt_post_crypto (vlib_main_t *vm, const vlib_node_runtime_t *node, } if (PREDICT_FALSE (n_lost)) - vlib_increment_simple_counter (&ipsec_sa_lost_counters, vm->thread_index, - pd->sa_index, n_lost); + vlib_increment_simple_counter (&ipsec_sa_err_counters[IPSEC_SA_ERROR_LOST], + vm->thread_index, pd->sa_index, n_lost); } always_inline uword @@ -1016,8 +1093,7 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs; vlib_buffer_t *sync_bufs[VLIB_FRAME_SIZE]; u16 sync_nexts[VLIB_FRAME_SIZE], *sync_next = sync_nexts, n_sync = 0; - u16 async_nexts[VLIB_FRAME_SIZE], *async_next = async_nexts; - u16 noop_nexts[VLIB_FRAME_SIZE], *noop_next = noop_nexts, n_noop = 0; + u16 noop_nexts[VLIB_FRAME_SIZE], n_noop = 0; u32 sync_bi[VLIB_FRAME_SIZE]; u32 noop_bi[VLIB_FRAME_SIZE]; esp_decrypt_packet_data_t pkt_data[VLIB_FRAME_SIZE], *pd = pkt_data; @@ -1026,9 +1102,7 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node, u32 current_sa_index = ~0, current_sa_bytes = 0, current_sa_pkts = 0; const u8 esp_sz = sizeof (esp_header_t); ipsec_sa_t *sa0 = 0; - vnet_crypto_op_t _op, *op = &_op; - vnet_crypto_op_t **crypto_ops; - vnet_crypto_op_t **integ_ops; + bool anti_replay_result; int is_async = im->async_mode; vnet_crypto_async_op_id_t async_op = ~0; vnet_crypto_async_frame_t *async_frames[VNET_CRYPTO_ASYNC_OP_N_IDS]; @@ -1066,8 +1140,9 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node, if (n_bufs == 0) { err = ESP_DECRYPT_ERROR_NO_BUFFERS; - esp_set_next_index (b[0], node, err, n_noop, noop_nexts, - ESP_DECRYPT_NEXT_DROP); + esp_decrypt_set_next_index (b[0], node, thread_index, err, n_noop, + noop_nexts, ESP_DECRYPT_NEXT_DROP, + vnet_buffer (b[0])->ipsec.sad_index); goto next; } @@ -1075,12 +1150,13 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node, { if (current_sa_pkts) vlib_increment_combined_counter (&ipsec_sa_counters, thread_index, - current_sa_index, - current_sa_pkts, + current_sa_index, current_sa_pkts, current_sa_bytes); current_sa_bytes = current_sa_pkts = 0; current_sa_index = vnet_buffer (b[0])->ipsec.sad_index; + vlib_prefetch_combined_counter (&ipsec_sa_counters, thread_index, + current_sa_index); sa0 = ipsec_sa_get (current_sa_index); /* fetch the second cacheline ASAP */ @@ -1092,7 +1168,7 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node, is_async = im->async_mode | ipsec_sa_is_set_IS_ASYNC (sa0); } - if (PREDICT_FALSE (~0 == sa0->thread_index)) + if (PREDICT_FALSE ((u16) ~0 == sa0->thread_index)) { /* this is the first packet to use this SA, claim the SA * for this thread. this could happen simultaneously on @@ -1105,8 +1181,9 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node, { vnet_buffer (b[0])->ipsec.thread_index = sa0->thread_index; err = ESP_DECRYPT_ERROR_HANDOFF; - esp_set_next_index (b[0], node, err, n_noop, noop_nexts, - ESP_DECRYPT_NEXT_HANDOFF); + esp_decrypt_set_next_index (b[0], node, thread_index, err, n_noop, + noop_nexts, ESP_DECRYPT_NEXT_HANDOFF, + current_sa_index); goto next; } @@ -1127,33 +1204,37 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node, /* find last buffer in the chain */ while (pd2->lb->flags & VLIB_BUFFER_NEXT_PRESENT) pd2->lb = vlib_get_buffer (vm, pd2->lb->next_buffer); + } - crypto_ops = &ptd->chained_crypto_ops; - integ_ops = &ptd->chained_integ_ops; + pd->current_length = b[0]->current_length; + + /* anti-reply check */ + if (PREDICT_FALSE (ipsec_sa_is_set_ANTI_REPLAY_HUGE (sa0))) + { + anti_replay_result = ipsec_sa_anti_replay_and_sn_advance ( + sa0, pd->seq, ~0, false, &pd->seq_hi, true); } else { - crypto_ops = &ptd->crypto_ops; - integ_ops = &ptd->integ_ops; + anti_replay_result = ipsec_sa_anti_replay_and_sn_advance ( + sa0, pd->seq, ~0, false, &pd->seq_hi, false); } - pd->current_length = b[0]->current_length; - - /* anti-reply check */ - if (ipsec_sa_anti_replay_and_sn_advance (sa0, pd->seq, ~0, false, - &pd->seq_hi)) + if (anti_replay_result) { err = ESP_DECRYPT_ERROR_REPLAY; - esp_set_next_index (b[0], node, err, n_noop, noop_nexts, - ESP_DECRYPT_NEXT_DROP); + esp_decrypt_set_next_index (b[0], node, thread_index, err, n_noop, + noop_nexts, ESP_DECRYPT_NEXT_DROP, + current_sa_index); goto next; } if (pd->current_length < cpd.icv_sz + esp_sz + cpd.iv_sz) { err = ESP_DECRYPT_ERROR_RUNT; - esp_set_next_index (b[0], node, err, n_noop, noop_nexts, - ESP_DECRYPT_NEXT_DROP); + esp_decrypt_set_next_index (b[0], node, thread_index, err, n_noop, + noop_nexts, ESP_DECRYPT_NEXT_DROP, + current_sa_index); goto next; } @@ -1172,31 +1253,47 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node, { async_frames[async_op] = vnet_crypto_async_get_frame (vm, async_op); + if (PREDICT_FALSE (!async_frames[async_op])) + { + err = ESP_DECRYPT_ERROR_NO_AVAIL_FRAME; + esp_decrypt_set_next_index ( + b[0], node, thread_index, err, n_noop, noop_nexts, + ESP_DECRYPT_NEXT_DROP, current_sa_index); + goto next; + } + /* Save the frame to the list we'll submit at the end */ vec_add1 (ptd->async_frames, async_frames[async_op]); } err = esp_decrypt_prepare_async_frame ( - vm, node, ptd, async_frames[async_op], sa0, payload, len, - cpd.icv_sz, cpd.iv_sz, pd, pd2, from[b - bufs], b[0], async_next, - async_next_node); + vm, ptd, async_frames[async_op], sa0, payload, len, cpd.icv_sz, + cpd.iv_sz, pd, pd2, from[b - bufs], b[0], async_next_node); if (ESP_DECRYPT_ERROR_RX_PKTS != err) { - esp_set_next_index (b[0], node, err, n_noop, noop_nexts, - ESP_DECRYPT_NEXT_DROP); + esp_decrypt_set_next_index ( + b[0], node, thread_index, err, n_noop, noop_nexts, + ESP_DECRYPT_NEXT_DROP, current_sa_index); } } else - esp_decrypt_prepare_sync_op ( - vm, node, ptd, &crypto_ops, &integ_ops, op, sa0, payload, len, - cpd.icv_sz, cpd.iv_sz, pd, pd2, b[0], sync_next, b - bufs); + { + err = esp_decrypt_prepare_sync_op (vm, ptd, sa0, payload, len, + cpd.icv_sz, cpd.iv_sz, pd, pd2, + b[0], n_sync); + if (err != ESP_DECRYPT_ERROR_RX_PKTS) + { + esp_decrypt_set_next_index (b[0], node, thread_index, err, 0, + sync_next, ESP_DECRYPT_NEXT_DROP, + current_sa_index); + } + } /* next */ next: if (ESP_DECRYPT_ERROR_RX_PKTS != err) { noop_bi[n_noop] = from[b - bufs]; n_noop++; - noop_next++; } else if (!is_async) { @@ -1207,8 +1304,6 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node, pd += 1; pd2 += 1; } - else - async_next++; n_left -= 1; b += 1; @@ -1234,7 +1329,8 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node, { n_noop += esp_async_recycle_failed_submit ( vm, *async_frame, node, ESP_DECRYPT_ERROR_CRYPTO_ENGINE_ERROR, - n_noop, noop_bi, noop_nexts, ESP_DECRYPT_NEXT_DROP); + IPSEC_SA_ERROR_CRYPTO_ENGINE_ERROR, n_noop, noop_bi, noop_nexts, + ESP_DECRYPT_NEXT_DROP, false); vnet_crypto_async_reset_frame (*async_frame); vnet_crypto_async_free_frame (vm, *async_frame); } @@ -1448,7 +1544,6 @@ VLIB_NODE_FN (esp6_decrypt_tun_post_node) (vlib_main_t * vm, return esp_decrypt_post_inline (vm, node, from_frame, 1, 1); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (esp4_decrypt_node) = { .name = "esp4-decrypt", .vector_size = sizeof (u32), @@ -1572,7 +1667,6 @@ VLIB_REGISTER_NODE (esp6_decrypt_tun_post_node) = { .sibling_of = "esp6-decrypt-tun", }; -/* *INDENT-ON* */ #ifndef CLIB_MARCH_VARIANT diff --git a/src/vnet/ipsec/esp_encrypt.c b/src/vnet/ipsec/esp_encrypt.c index 4ed3bf72c3f..dd47053874c 100644 --- a/src/vnet/ipsec/esp_encrypt.c +++ b/src/vnet/ipsec/esp_encrypt.c @@ -18,6 +18,7 @@ #include <vnet/vnet.h> #include <vnet/api_errno.h> #include <vnet/ip/ip.h> +#include <vnet/interface_output.h> #include <vnet/crypto/crypto.h> @@ -94,8 +95,7 @@ format_esp_post_encrypt_trace (u8 * s, va_list * args) /* pad packet in input buffer */ static_always_inline u8 * esp_add_footer_and_icv (vlib_main_t *vm, vlib_buffer_t **last, u8 esp_align, - u8 icv_sz, vlib_node_runtime_t *node, - u16 buffer_data_size, uword total_len) + u8 icv_sz, u16 buffer_data_size, uword total_len) { static const u8 pad_data[ESP_MAX_BLOCK_SIZE] = { 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, @@ -149,11 +149,9 @@ esp_update_ip4_hdr (ip4_header_t * ip4, u16 len, int is_transport, int is_udp) if (is_transport) { u8 prot = is_udp ? IP_PROTOCOL_UDP : IP_PROTOCOL_IPSEC_ESP; - - sum = ip_csum_update (ip4->checksum, ip4->protocol, - prot, ip4_header_t, protocol); + sum = ip_csum_update (ip4->checksum, ip4->protocol, prot, ip4_header_t, + protocol); ip4->protocol = prot; - sum = ip_csum_update (sum, old_len, len, ip4_header_t, length); } else @@ -182,9 +180,9 @@ ext_hdr_is_pre_esp (u8 nexthdr) return !u8x16_is_all_zero (ext_hdr_types == u8x16_splat (nexthdr)); #else - return ((nexthdr ^ IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) | - (nexthdr ^ IP_PROTOCOL_IPV6_ROUTE) | - ((nexthdr ^ IP_PROTOCOL_IPV6_FRAGMENTATION) != 0)); + return (!(nexthdr ^ IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) || + !(nexthdr ^ IP_PROTOCOL_IPV6_ROUTE) || + !(nexthdr ^ IP_PROTOCOL_IPV6_FRAGMENTATION)); #endif } @@ -215,6 +213,25 @@ esp_get_ip6_hdr_len (ip6_header_t * ip6, ip6_ext_header_t ** ext_hdr) return len; } +/* IPsec IV generation: IVs requirements differ depending of the + * encryption mode: IVs must be unpredictable for AES-CBC whereas it can + * be predictable but should never be reused with the same key material + * for CTR and GCM. + * To avoid reusing the same IVs between multiple VPP instances and between + * restarts, we use a properly chosen PRNG to generate IVs. To ensure the IV is + * unpredictable for CBC, it is then encrypted using the same key as the + * message. You can refer to NIST SP800-38a and NIST SP800-38d for more + * details. */ +static_always_inline void * +esp_generate_iv (ipsec_sa_t *sa, void *payload, int iv_sz) +{ + ASSERT (iv_sz >= sizeof (u64)); + u64 *iv = (u64 *) (payload - iv_sz); + clib_memset_u8 (iv, 0, iv_sz); + *iv = clib_pcg64i_random_r (&sa->iv_prng); + return iv; +} + static_always_inline void esp_process_chained_ops (vlib_main_t * vm, vlib_node_runtime_t * node, vnet_crypto_op_t * ops, vlib_buffer_t * b[], @@ -236,8 +253,10 @@ esp_process_chained_ops (vlib_main_t * vm, vlib_node_runtime_t * node, if (op->status != VNET_CRYPTO_OP_STATUS_COMPLETED) { u32 bi = op->user_data; - b[bi]->error = node->errors[ESP_ENCRYPT_ERROR_CRYPTO_ENGINE_ERROR]; - nexts[bi] = drop_next; + esp_encrypt_set_next_index (b[bi], node, vm->thread_index, + ESP_ENCRYPT_ERROR_CRYPTO_ENGINE_ERROR, + bi, nexts, drop_next, + vnet_buffer (b[bi])->ipsec.sad_index); n_fail--; } op++; @@ -264,8 +283,10 @@ esp_process_ops (vlib_main_t * vm, vlib_node_runtime_t * node, if (op->status != VNET_CRYPTO_OP_STATUS_COMPLETED) { u32 bi = op->user_data; - b[bi]->error = node->errors[ESP_ENCRYPT_ERROR_CRYPTO_ENGINE_ERROR]; - nexts[bi] = drop_next; + esp_encrypt_set_next_index (b[bi], node, vm->thread_index, + ESP_ENCRYPT_ERROR_CRYPTO_ENGINE_ERROR, + bi, nexts, drop_next, + vnet_buffer (b[bi])->ipsec.sad_index); n_fail--; } op++; @@ -368,28 +389,36 @@ esp_prepare_sync_op (vlib_main_t *vm, ipsec_per_thread_data_t *ptd, vnet_crypto_op_t *op; vec_add2_aligned (crypto_ops[0], op, 1, CLIB_CACHE_LINE_BYTES); vnet_crypto_op_init (op, sa0->crypto_enc_op_id); + u8 *crypto_start = payload; + /* esp_add_footer_and_icv() in esp_encrypt_inline() makes sure we always + * have enough space for ESP header and footer which includes ICV */ + ASSERT (payload_len > icv_sz); + u16 crypto_len = payload_len - icv_sz; + + /* generate the IV in front of the payload */ + void *pkt_iv = esp_generate_iv (sa0, payload, iv_sz); - op->src = op->dst = payload; op->key_index = sa0->crypto_key_index; - op->len = payload_len - icv_sz; op->user_data = bi; if (ipsec_sa_is_set_IS_CTR (sa0)) { - ASSERT (sizeof (u64) == iv_sz); /* construct nonce in a scratch space in front of the IP header */ esp_ctr_nonce_t *nonce = - (esp_ctr_nonce_t *) (payload - sizeof (u64) - hdr_len - - sizeof (*nonce)); - u64 *pkt_iv = (u64 *) (payload - sizeof (u64)); - + (esp_ctr_nonce_t *) (pkt_iv - hdr_len - sizeof (*nonce)); if (ipsec_sa_is_set_IS_AEAD (sa0)) { /* constuct aad in a scratch space in front of the nonce */ op->aad = (u8 *) nonce - sizeof (esp_aead_t); op->aad_len = esp_aad_fill (op->aad, esp, sa0, seq_hi); - op->tag = payload + op->len; + op->tag = payload + crypto_len; op->tag_len = 16; + if (PREDICT_FALSE (ipsec_sa_is_set_IS_NULL_GMAC (sa0))) + { + /* RFC-4543 ENCR_NULL_AUTH_AES_GMAC: IV is part of AAD */ + crypto_start -= iv_sz; + crypto_len += iv_sz; + } } else { @@ -397,23 +426,34 @@ esp_prepare_sync_op (vlib_main_t *vm, ipsec_per_thread_data_t *ptd, } nonce->salt = sa0->salt; - nonce->iv = *pkt_iv = clib_host_to_net_u64 (sa0->ctr_iv_counter++); + nonce->iv = *(u64 *) pkt_iv; op->iv = (u8 *) nonce; } else { - op->iv = payload - iv_sz; - op->flags = VNET_CRYPTO_OP_FLAG_INIT_IV; + /* construct zero iv in front of the IP header */ + op->iv = pkt_iv - hdr_len - iv_sz; + clib_memset_u8 (op->iv, 0, iv_sz); + /* include iv field in crypto */ + crypto_start -= iv_sz; + crypto_len += iv_sz; } - if (lb != b[0]) + if (PREDICT_FALSE (lb != b[0])) { /* is chained */ op->flags |= VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS; op->chunk_index = vec_len (ptd->chunks); op->tag = vlib_buffer_get_tail (lb) - icv_sz; - esp_encrypt_chain_crypto (vm, ptd, sa0, b[0], lb, icv_sz, payload, - payload_len, &op->n_chunks); + esp_encrypt_chain_crypto (vm, ptd, sa0, b[0], lb, icv_sz, + crypto_start, crypto_len + icv_sz, + &op->n_chunks); + } + else + { + /* not chained */ + op->src = op->dst = crypto_start; + op->len = crypto_len; } } @@ -462,33 +502,36 @@ esp_prepare_async_frame (vlib_main_t *vm, ipsec_per_thread_data_t *ptd, esp_post_data_t *post = esp_post_data (b); u8 *tag, *iv, *aad = 0; u8 flag = 0; - u32 key_index; - i16 crypto_start_offset, integ_start_offset = 0; + const u32 key_index = sa->crypto_key_index; + i16 crypto_start_offset, integ_start_offset; u16 crypto_total_len, integ_total_len; post->next_index = next; /* crypto */ - crypto_start_offset = payload - b->data; + crypto_start_offset = integ_start_offset = payload - b->data; crypto_total_len = integ_total_len = payload_len - icv_sz; tag = payload + crypto_total_len; - key_index = sa->linked_key_index; + /* generate the IV in front of the payload */ + void *pkt_iv = esp_generate_iv (sa, payload, iv_sz); if (ipsec_sa_is_set_IS_CTR (sa)) { - ASSERT (sizeof (u64) == iv_sz); /* construct nonce in a scratch space in front of the IP header */ - esp_ctr_nonce_t *nonce = (esp_ctr_nonce_t *) (payload - sizeof (u64) - - hdr_len - sizeof (*nonce)); - u64 *pkt_iv = (u64 *) (payload - sizeof (u64)); - + esp_ctr_nonce_t *nonce = + (esp_ctr_nonce_t *) (pkt_iv - hdr_len - sizeof (*nonce)); if (ipsec_sa_is_set_IS_AEAD (sa)) { /* constuct aad in a scratch space in front of the nonce */ aad = (u8 *) nonce - sizeof (esp_aead_t); esp_aad_fill (aad, esp, sa, sa->seq_hi); - key_index = sa->crypto_key_index; + if (PREDICT_FALSE (ipsec_sa_is_set_IS_NULL_GMAC (sa))) + { + /* RFC-4543 ENCR_NULL_AUTH_AES_GMAC: IV is part of AAD */ + crypto_start_offset -= iv_sz; + crypto_total_len += iv_sz; + } } else { @@ -496,13 +539,17 @@ esp_prepare_async_frame (vlib_main_t *vm, ipsec_per_thread_data_t *ptd, } nonce->salt = sa->salt; - nonce->iv = *pkt_iv = clib_host_to_net_u64 (sa->ctr_iv_counter++); + nonce->iv = *(u64 *) pkt_iv; iv = (u8 *) nonce; } else { - iv = payload - iv_sz; - flag |= VNET_CRYPTO_OP_FLAG_INIT_IV; + /* construct zero iv in front of the IP header */ + iv = pkt_iv - hdr_len - iv_sz; + clib_memset_u8 (iv, 0, iv_sz); + /* include iv field in crypto */ + crypto_start_offset -= iv_sz; + crypto_total_len += iv_sz; } if (lb != b) @@ -510,13 +557,14 @@ esp_prepare_async_frame (vlib_main_t *vm, ipsec_per_thread_data_t *ptd, /* chain */ flag |= VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS; tag = vlib_buffer_get_tail (lb) - icv_sz; - crypto_total_len = esp_encrypt_chain_crypto (vm, ptd, sa, b, lb, icv_sz, - payload, payload_len, 0); + crypto_total_len = esp_encrypt_chain_crypto ( + vm, ptd, sa, b, lb, icv_sz, b->data + crypto_start_offset, + crypto_total_len + icv_sz, 0); } if (sa->integ_op_id) { - integ_start_offset = crypto_start_offset - iv_sz - sizeof (esp_header_t); + integ_start_offset -= iv_sz + sizeof (esp_header_t); integ_total_len += iv_sz + sizeof (esp_header_t); if (b != lb) @@ -557,6 +605,7 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node, u32 current_sa_bytes = 0, spi = 0; u8 esp_align = 4, iv_sz = 0, icv_sz = 0; ipsec_sa_t *sa0 = 0; + u8 sa_drop_no_crypto = 0; vlib_buffer_t *lb; vnet_crypto_op_t **crypto_ops = &ptd->crypto_ops; vnet_crypto_op_t **integ_ops = &ptd->integ_ops; @@ -573,8 +622,8 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node, ESP_ENCRYPT_NEXT_HANDOFF_MPLS)); vlib_buffer_t *sync_bufs[VLIB_FRAME_SIZE]; u16 sync_nexts[VLIB_FRAME_SIZE], *sync_next = sync_nexts, n_sync = 0; - u16 async_nexts[VLIB_FRAME_SIZE], *async_next = async_nexts, n_async = 0; - u16 noop_nexts[VLIB_FRAME_SIZE], *noop_next = noop_nexts, n_noop = 0; + u16 n_async = 0; + u16 noop_nexts[VLIB_FRAME_SIZE], n_noop = 0; u32 sync_bi[VLIB_FRAME_SIZE]; u32 noop_bi[VLIB_FRAME_SIZE]; esp_encrypt_error_t err; @@ -613,6 +662,10 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node, CLIB_CACHE_LINE_BYTES, LOAD); } + vnet_calc_checksums_inline (vm, b[0], b[0]->flags & VNET_BUFFER_F_IS_IP4, + b[0]->flags & VNET_BUFFER_F_IS_IP6); + vnet_calc_outer_checksums_inline (vm, b[0]); + if (is_tun) { /* we are on a ipsec tunnel's feature arc */ @@ -623,8 +676,8 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node, if (PREDICT_FALSE (INDEX_INVALID == sa_index0)) { err = ESP_ENCRYPT_ERROR_NO_PROTECTION; - esp_set_next_index (b[0], node, err, n_noop, noop_nexts, - drop_next); + noop_nexts[n_noop] = drop_next; + b[0]->error = node->errors[err]; goto trace; } } @@ -634,27 +687,24 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node, if (sa_index0 != current_sa_index) { if (current_sa_packets) - vlib_increment_combined_counter (&ipsec_sa_counters, thread_index, - current_sa_index, - current_sa_packets, - current_sa_bytes); + vlib_increment_combined_counter ( + &ipsec_sa_counters, thread_index, current_sa_index, + current_sa_packets, current_sa_bytes); current_sa_packets = current_sa_bytes = 0; sa0 = ipsec_sa_get (sa_index0); + current_sa_index = sa_index0; + + sa_drop_no_crypto = ((sa0->crypto_alg == IPSEC_CRYPTO_ALG_NONE && + sa0->integ_alg == IPSEC_INTEG_ALG_NONE) && + !ipsec_sa_is_set_NO_ALGO_NO_DROP (sa0)); + + vlib_prefetch_combined_counter (&ipsec_sa_counters, thread_index, + current_sa_index); - if (PREDICT_FALSE ((sa0->crypto_alg == IPSEC_CRYPTO_ALG_NONE && - sa0->integ_alg == IPSEC_INTEG_ALG_NONE) && - !ipsec_sa_is_set_NO_ALGO_NO_DROP (sa0))) - { - err = ESP_ENCRYPT_ERROR_NO_ENCRYPTION; - esp_set_next_index (b[0], node, err, n_noop, noop_nexts, - drop_next); - goto trace; - } /* fetch the second cacheline ASAP */ clib_prefetch_load (sa0->cacheline1); - current_sa_index = sa_index0; spi = clib_net_to_host_u32 (sa0->spi); esp_align = sa0->esp_block_align; icv_sz = sa0->integ_icv_size; @@ -662,7 +712,15 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node, is_async = im->async_mode | ipsec_sa_is_set_IS_ASYNC (sa0); } - if (PREDICT_FALSE (~0 == sa0->thread_index)) + if (PREDICT_FALSE (sa_drop_no_crypto != 0)) + { + err = ESP_ENCRYPT_ERROR_NO_ENCRYPTION; + esp_encrypt_set_next_index (b[0], node, thread_index, err, n_noop, + noop_nexts, drop_next, sa_index0); + goto trace; + } + + if (PREDICT_FALSE ((u16) ~0 == sa0->thread_index)) { /* this is the first packet to use this SA, claim the SA * for this thread. this could happen simultaneously on @@ -675,8 +733,9 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node, { vnet_buffer (b[0])->ipsec.thread_index = sa0->thread_index; err = ESP_ENCRYPT_ERROR_HANDOFF; - esp_set_next_index (b[0], node, err, n_noop, noop_nexts, - handoff_next); + esp_encrypt_set_next_index (b[0], node, thread_index, err, n_noop, + noop_nexts, handoff_next, + current_sa_index); goto trace; } @@ -685,7 +744,8 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node, if (n_bufs == 0) { err = ESP_ENCRYPT_ERROR_NO_BUFFERS; - esp_set_next_index (b[0], node, err, n_noop, noop_nexts, drop_next); + esp_encrypt_set_next_index (b[0], node, thread_index, err, n_noop, + noop_nexts, drop_next, current_sa_index); goto trace; } @@ -699,7 +759,8 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node, if (PREDICT_FALSE (esp_seq_advance (sa0))) { err = ESP_ENCRYPT_ERROR_SEQ_CYCLED; - esp_set_next_index (b[0], node, err, n_noop, noop_nexts, drop_next); + esp_encrypt_set_next_index (b[0], node, thread_index, err, n_noop, + noop_nexts, drop_next, current_sa_index); goto trace; } @@ -710,13 +771,14 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node, { payload = vlib_buffer_get_current (b[0]); next_hdr_ptr = esp_add_footer_and_icv ( - vm, &lb, esp_align, icv_sz, node, buffer_data_size, + vm, &lb, esp_align, icv_sz, buffer_data_size, vlib_buffer_length_in_chain (vm, b[0])); if (!next_hdr_ptr) { err = ESP_ENCRYPT_ERROR_NO_BUFFERS; - esp_set_next_index (b[0], node, err, n_noop, noop_nexts, - drop_next); + esp_encrypt_set_next_index (b[0], node, thread_index, err, + n_noop, noop_nexts, drop_next, + current_sa_index); goto trace; } b[0]->flags &= ~VLIB_BUFFER_TOTAL_LENGTH_VALID; @@ -837,21 +899,23 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node, if ((old_ip_hdr - ip_len) < &b[0]->pre_data[0]) { err = ESP_ENCRYPT_ERROR_NO_BUFFERS; - esp_set_next_index (b[0], node, err, n_noop, noop_nexts, - drop_next); + esp_encrypt_set_next_index (b[0], node, thread_index, err, + n_noop, noop_nexts, drop_next, + current_sa_index); goto trace; } vlib_buffer_advance (b[0], ip_len); payload = vlib_buffer_get_current (b[0]); next_hdr_ptr = esp_add_footer_and_icv ( - vm, &lb, esp_align, icv_sz, node, buffer_data_size, + vm, &lb, esp_align, icv_sz, buffer_data_size, vlib_buffer_length_in_chain (vm, b[0])); if (!next_hdr_ptr) { err = ESP_ENCRYPT_ERROR_NO_BUFFERS; - esp_set_next_index (b[0], node, err, n_noop, noop_nexts, - drop_next); + esp_encrypt_set_next_index (b[0], node, thread_index, err, + n_noop, noop_nexts, drop_next, + current_sa_index); goto trace; } @@ -952,6 +1016,16 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node, { async_frames[async_op] = vnet_crypto_async_get_frame (vm, async_op); + + if (PREDICT_FALSE (!async_frames[async_op])) + { + err = ESP_ENCRYPT_ERROR_NO_AVAIL_FRAME; + esp_encrypt_set_next_index (b[0], node, thread_index, err, + n_noop, noop_nexts, drop_next, + current_sa_index); + goto trace; + } + /* Save the frame to the list we'll submit at the end */ vec_add1 (ptd->async_frames, async_frames[async_op]); } @@ -995,7 +1069,6 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node, { noop_bi[n_noop] = from[b - bufs]; n_noop++; - noop_next++; } else if (!is_async) { @@ -1007,7 +1080,6 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node, else { n_async++; - async_next++; } n_left -= 1; b += 1; @@ -1042,7 +1114,8 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node, { n_noop += esp_async_recycle_failed_submit ( vm, *async_frame, node, ESP_ENCRYPT_ERROR_CRYPTO_ENGINE_ERROR, - n_noop, noop_bi, noop_nexts, drop_next); + IPSEC_SA_ERROR_CRYPTO_ENGINE_ERROR, n_noop, noop_bi, + noop_nexts, drop_next, true); vnet_crypto_async_reset_frame (*async_frame); vnet_crypto_async_free_frame (vm, *async_frame); } @@ -1151,7 +1224,6 @@ VLIB_NODE_FN (esp4_encrypt_node) (vlib_main_t * vm, esp_encrypt_async_next.esp4_post_next); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (esp4_encrypt_node) = { .name = "esp4-encrypt", .vector_size = sizeof (u32), @@ -1170,7 +1242,6 @@ VLIB_REGISTER_NODE (esp4_encrypt_node) = { [ESP_ENCRYPT_NEXT_HANDOFF_MPLS] = "error-drop", [ESP_ENCRYPT_NEXT_INTERFACE_OUTPUT] = "interface-output" }, }; -/* *INDENT-ON* */ VLIB_NODE_FN (esp4_encrypt_post_node) (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -1179,7 +1250,6 @@ VLIB_NODE_FN (esp4_encrypt_post_node) (vlib_main_t * vm, return esp_encrypt_post_inline (vm, node, from_frame); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (esp4_encrypt_post_node) = { .name = "esp4-encrypt-post", .vector_size = sizeof (u32), @@ -1190,7 +1260,6 @@ VLIB_REGISTER_NODE (esp4_encrypt_post_node) = { .n_errors = ESP_ENCRYPT_N_ERROR, .error_counters = esp_encrypt_error_counters, }; -/* *INDENT-ON* */ VLIB_NODE_FN (esp6_encrypt_node) (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -1200,7 +1269,6 @@ VLIB_NODE_FN (esp6_encrypt_node) (vlib_main_t * vm, esp_encrypt_async_next.esp6_post_next); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (esp6_encrypt_node) = { .name = "esp6-encrypt", .vector_size = sizeof (u32), @@ -1211,7 +1279,6 @@ VLIB_REGISTER_NODE (esp6_encrypt_node) = { .n_errors = ESP_ENCRYPT_N_ERROR, .error_counters = esp_encrypt_error_counters, }; -/* *INDENT-ON* */ VLIB_NODE_FN (esp6_encrypt_post_node) (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -1220,7 +1287,6 @@ VLIB_NODE_FN (esp6_encrypt_post_node) (vlib_main_t * vm, return esp_encrypt_post_inline (vm, node, from_frame); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (esp6_encrypt_post_node) = { .name = "esp6-encrypt-post", .vector_size = sizeof (u32), @@ -1231,7 +1297,6 @@ VLIB_REGISTER_NODE (esp6_encrypt_post_node) = { .n_errors = ESP_ENCRYPT_N_ERROR, .error_counters = esp_encrypt_error_counters, }; -/* *INDENT-ON* */ VLIB_NODE_FN (esp4_encrypt_tun_node) (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -1241,7 +1306,6 @@ VLIB_NODE_FN (esp4_encrypt_tun_node) (vlib_main_t * vm, esp_encrypt_async_next.esp4_tun_post_next); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (esp4_encrypt_tun_node) = { .name = "esp4-encrypt-tun", .vector_size = sizeof (u32), @@ -1270,7 +1334,6 @@ VLIB_NODE_FN (esp4_encrypt_tun_post_node) (vlib_main_t * vm, return esp_encrypt_post_inline (vm, node, from_frame); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (esp4_encrypt_tun_post_node) = { .name = "esp4-encrypt-tun-post", .vector_size = sizeof (u32), @@ -1281,7 +1344,6 @@ VLIB_REGISTER_NODE (esp4_encrypt_tun_post_node) = { .n_errors = ESP_ENCRYPT_N_ERROR, .error_counters = esp_encrypt_error_counters, }; -/* *INDENT-ON* */ VLIB_NODE_FN (esp6_encrypt_tun_node) (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -1291,7 +1353,6 @@ VLIB_NODE_FN (esp6_encrypt_tun_node) (vlib_main_t * vm, esp_encrypt_async_next.esp6_tun_post_next); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (esp6_encrypt_tun_node) = { .name = "esp6-encrypt-tun", .vector_size = sizeof (u32), @@ -1313,7 +1374,6 @@ VLIB_REGISTER_NODE (esp6_encrypt_tun_node) = { }, }; -/* *INDENT-ON* */ VLIB_NODE_FN (esp6_encrypt_tun_post_node) (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -1322,7 +1382,6 @@ VLIB_NODE_FN (esp6_encrypt_tun_post_node) (vlib_main_t * vm, return esp_encrypt_post_inline (vm, node, from_frame); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (esp6_encrypt_tun_post_node) = { .name = "esp6-encrypt-tun-post", .vector_size = sizeof (u32), @@ -1333,7 +1392,6 @@ VLIB_REGISTER_NODE (esp6_encrypt_tun_post_node) = { .n_errors = ESP_ENCRYPT_N_ERROR, .error_counters = esp_encrypt_error_counters, }; -/* *INDENT-ON* */ VLIB_NODE_FN (esp_mpls_encrypt_tun_node) (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame) diff --git a/src/vnet/ipsec/ipsec.api b/src/vnet/ipsec/ipsec.api index 401564bd39b..68efe8f50f7 100644 --- a/src/vnet/ipsec/ipsec.api +++ b/src/vnet/ipsec/ipsec.api @@ -96,6 +96,7 @@ define ipsec_spd_entry_add_del_v2 */ define ipsec_spd_entry_add_del_reply { + option deprecated; u32 context; i32 retval; u32 stat_index; @@ -166,6 +167,7 @@ define ipsec_spd_details { define ipsec_sad_entry_add_del { option deprecated; + u32 client_index; u32 context; bool is_add; @@ -174,6 +176,8 @@ define ipsec_sad_entry_add_del define ipsec_sad_entry_add_del_v2 { + option deprecated; + u32 client_index; u32 context; bool is_add; @@ -187,12 +191,21 @@ define ipsec_sad_entry_add_del_v3 bool is_add; vl_api_ipsec_sad_entry_v3_t entry; }; + define ipsec_sad_entry_add { u32 client_index; u32 context; vl_api_ipsec_sad_entry_v3_t entry; }; + +define ipsec_sad_entry_add_v2 +{ + u32 client_index; + u32 context; + vl_api_ipsec_sad_entry_v4_t entry; +}; + autoreply define ipsec_sad_entry_del { u32 client_index; @@ -200,9 +213,55 @@ autoreply define ipsec_sad_entry_del u32 id; }; + +/** \brief An API to bind an SAD entry to a specific worker + + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sa_id - the id of the SA to bind + @param worker - the worker's index to which the SA will be bound to + */ +autoreply define ipsec_sad_bind +{ + u32 client_index; + u32 context; + u32 sa_id; + u32 worker; +}; + +autoreply define ipsec_sad_unbind +{ + u32 client_index; + u32 context; + u32 sa_id; +}; + +/** \brief An API to update the tunnel parameters and the ports associated with an SA + + Used in the NAT-T case when the NAT data changes + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sa_id - the id of the SA to update + @param is_tun - update the tunnel if non-zero, else update only the ports + @param tunnel - sender context, to match reply w/ request + @param udp_src_port - new src port for NAT-T. Used if different from 0xffff + @param udp_dst_port - new dst port for NAT-T. Used if different from 0xffff + */ +autoreply define ipsec_sad_entry_update +{ + u32 client_index; + u32 context; + u32 sad_id; + bool is_tun; + vl_api_tunnel_t tunnel; + u16 udp_src_port [default=0xffff]; + u16 udp_dst_port [default=0xffff]; +}; + define ipsec_sad_entry_add_del_reply { option deprecated; + u32 context; i32 retval; u32 stat_index; @@ -210,6 +269,8 @@ define ipsec_sad_entry_add_del_reply define ipsec_sad_entry_add_del_v2_reply { + option deprecated; + u32 context; i32 retval; u32 stat_index; @@ -221,6 +282,7 @@ define ipsec_sad_entry_add_del_v3_reply i32 retval; u32 stat_index; }; + define ipsec_sad_entry_add_reply { u32 context; @@ -228,6 +290,13 @@ define ipsec_sad_entry_add_reply u32 stat_index; }; +define ipsec_sad_entry_add_v2_reply +{ + u32 context; + i32 retval; + u32 stat_index; +}; + /** \brief Add or Update Protection for a tunnel with IPSEC Tunnel protection directly associates an SA with all packets @@ -391,12 +460,15 @@ define ipsec_itf_details define ipsec_sa_dump { option deprecated; + u32 client_index; u32 context; u32 sa_id; }; define ipsec_sa_v2_dump { + option deprecated; + u32 client_index; u32 context; u32 sa_id; @@ -407,6 +479,18 @@ define ipsec_sa_v3_dump u32 context; u32 sa_id; }; +define ipsec_sa_v4_dump +{ + u32 client_index; + u32 context; + u32 sa_id; +}; +define ipsec_sa_v5_dump +{ + u32 client_index; + u32 context; + u32 sa_id; +}; /** \brief IPsec security association database response @param context - sender context which was passed in the request @@ -422,6 +506,7 @@ define ipsec_sa_v3_dump */ define ipsec_sa_details { option deprecated; + u32 context; vl_api_ipsec_sad_entry_t entry; @@ -434,6 +519,8 @@ define ipsec_sa_details { u32 stat_index; }; define ipsec_sa_v2_details { + option deprecated; + u32 context; vl_api_ipsec_sad_entry_v2_t entry; @@ -456,6 +543,28 @@ define ipsec_sa_v3_details { u32 stat_index; }; +define ipsec_sa_v4_details { + u32 context; + vl_api_ipsec_sad_entry_v3_t entry; + + vl_api_interface_index_t sw_if_index; + u64 seq_outbound; + u64 last_seq_inbound; + u64 replay_window; + u32 thread_index; + u32 stat_index; +}; +define ipsec_sa_v5_details { + u32 context; + vl_api_ipsec_sad_entry_v4_t entry; + + vl_api_interface_index_t sw_if_index; + u64 seq_outbound; + u64 last_seq_inbound; + u64 replay_window; + u32 thread_index; + u32 stat_index; +}; /** \brief Dump IPsec backends @param client_index - opaque cookie to identify the sender @@ -584,6 +693,12 @@ counters esp_decrypt { units "packets"; description "unsupported payload"; }; + no_avail_frame { + severity error; + type counter64; + units "packets"; + description "no available frame (packet dropped)"; + }; }; counters esp_encrypt { @@ -641,6 +756,12 @@ counters esp_encrypt { units "packets"; description "no Encrypting SA (packet dropped)"; }; + no_avail_frame { + severity error; + type counter64; + units "packets"; + description "no available frame (packet dropped)"; + }; }; counters ah_encrypt { diff --git a/src/vnet/ipsec/ipsec.c b/src/vnet/ipsec/ipsec.c index 3ea2e4d62df..f8c39c327ed 100644 --- a/src/vnet/ipsec/ipsec.c +++ b/src/vnet/ipsec/ipsec.c @@ -275,8 +275,7 @@ ipsec_register_esp_backend ( const char *esp6_decrypt_node_name, const char *esp6_decrypt_tun_node_name, const char *esp_mpls_encrypt_node_tun_name, check_support_cb_t esp_check_support_cb, - add_del_sa_sess_cb_t esp_add_del_sa_sess_cb, - enable_disable_cb_t enable_disable_cb) + add_del_sa_sess_cb_t esp_add_del_sa_sess_cb) { ipsec_esp_backend_t *b; @@ -307,7 +306,6 @@ ipsec_register_esp_backend ( b->check_support_cb = esp_check_support_cb; b->add_del_sa_sess_cb = esp_add_del_sa_sess_cb; - b->enable_disable_cb = enable_disable_cb; return b - im->esp_backends; } @@ -358,18 +356,6 @@ ipsec_select_esp_backend (ipsec_main_t * im, u32 backend_idx) if (pool_is_free_index (im->esp_backends, backend_idx)) return VNET_API_ERROR_INVALID_VALUE; - /* disable current backend */ - if (im->esp_current_backend != ~0) - { - ipsec_esp_backend_t *cb = pool_elt_at_index (im->esp_backends, - im->esp_current_backend); - if (cb->enable_disable_cb) - { - if ((cb->enable_disable_cb) (0) != 0) - return -1; - } - } - ipsec_esp_backend_t *b = pool_elt_at_index (im->esp_backends, backend_idx); im->esp_current_backend = backend_idx; im->esp4_encrypt_node_index = b->esp4_encrypt_node_index; @@ -388,11 +374,6 @@ ipsec_select_esp_backend (ipsec_main_t * im, u32 backend_idx) im->esp6_encrypt_tun_node_index = b->esp6_encrypt_tun_node_index; im->esp_mpls_encrypt_tun_node_index = b->esp_mpls_encrypt_tun_node_index; - if (b->enable_disable_cb) - { - if ((b->enable_disable_cb) (1) != 0) - return -1; - } return 0; } @@ -402,16 +383,11 @@ ipsec_set_async_mode (u32 is_enabled) ipsec_main_t *im = &ipsec_main; ipsec_sa_t *sa; - vnet_crypto_request_async_mode (is_enabled); - im->async_mode = is_enabled; /* change SA crypto op data */ pool_foreach (sa, ipsec_sa_pool) - { - sa->crypto_op_data = - (is_enabled ? sa->async_op_data.data : sa->sync_op_data.data); - } + ipsec_sa_set_async_mode (sa, is_enabled); } static void @@ -485,7 +461,7 @@ ipsec_init (vlib_main_t * vm) vm, im, "crypto engine backend", "esp4-encrypt", "esp4-encrypt-tun", "esp4-decrypt", "esp4-decrypt-tun", "esp6-encrypt", "esp6-encrypt-tun", "esp6-decrypt", "esp6-decrypt-tun", "esp-mpls-encrypt-tun", - ipsec_check_esp_support, NULL, crypto_dispatch_enable_disable); + ipsec_check_esp_support, NULL); im->esp_default_backend = idx; rv = ipsec_select_esp_backend (im, idx); @@ -586,6 +562,30 @@ ipsec_init (vlib_main_t * vm) a->iv_size = 8; a->icv_size = 16; + a = im->crypto_algs + IPSEC_CRYPTO_ALG_AES_NULL_GMAC_128; + a->enc_op_id = VNET_CRYPTO_OP_AES_128_NULL_GMAC_ENC; + a->dec_op_id = VNET_CRYPTO_OP_AES_128_NULL_GMAC_DEC; + a->alg = VNET_CRYPTO_ALG_AES_128_GCM; + a->iv_size = 8; + a->block_align = 1; + a->icv_size = 16; + + a = im->crypto_algs + IPSEC_CRYPTO_ALG_AES_NULL_GMAC_192; + a->enc_op_id = VNET_CRYPTO_OP_AES_192_NULL_GMAC_ENC; + a->dec_op_id = VNET_CRYPTO_OP_AES_192_NULL_GMAC_DEC; + a->alg = VNET_CRYPTO_ALG_AES_192_GCM; + a->iv_size = 8; + a->block_align = 1; + a->icv_size = 16; + + a = im->crypto_algs + IPSEC_CRYPTO_ALG_AES_NULL_GMAC_256; + a->enc_op_id = VNET_CRYPTO_OP_AES_256_NULL_GMAC_ENC; + a->dec_op_id = VNET_CRYPTO_OP_AES_256_NULL_GMAC_DEC; + a->alg = VNET_CRYPTO_ALG_AES_256_GCM; + a->iv_size = 8; + a->block_align = 1; + a->icv_size = 16; + vec_validate (im->integ_algs, IPSEC_INTEG_N_ALG - 1); ipsec_main_integ_alg_t *i; diff --git a/src/vnet/ipsec/ipsec.h b/src/vnet/ipsec/ipsec.h index 69aa661683a..4aa09d7560e 100644 --- a/src/vnet/ipsec/ipsec.h +++ b/src/vnet/ipsec/ipsec.h @@ -93,8 +93,6 @@ typedef struct add_del_sa_sess_cb_t add_del_sa_sess_cb; /* check support function */ check_support_cb_t check_support_cb; - /* enable or disable function */ - enable_disable_cb_t enable_disable_cb; u32 esp4_encrypt_node_index; u32 esp4_decrypt_node_index; u32 esp4_encrypt_next_index; @@ -347,6 +345,23 @@ ipsec_spinlock_unlock (i32 *lock) clib_atomic_release (lock); } +/* Special case to drop or hand off packets for sync/async modes. + * + * Different than sync mode, async mode only enqueue drop or hand-off packets + * to next nodes. + */ +always_inline void +ipsec_set_next_index (vlib_buffer_t *b, vlib_node_runtime_t *node, + u32 thread_index, u32 err, u32 ipsec_sa_err, u16 index, + u16 *nexts, u16 drop_next, u32 sa_index) +{ + nexts[index] = drop_next; + b->error = node->errors[err]; + if (PREDICT_TRUE (ipsec_sa_err != ~0)) + vlib_increment_simple_counter (&ipsec_sa_err_counters[ipsec_sa_err], + thread_index, sa_index, 1); +} + u32 ipsec_register_ah_backend (vlib_main_t * vm, ipsec_main_t * im, const char *name, const char *ah4_encrypt_node_name, @@ -364,8 +379,7 @@ u32 ipsec_register_esp_backend ( const char *esp6_decrypt_node_name, const char *esp6_decrypt_tun_node_name, const char *esp_mpls_encrypt_tun_node_name, check_support_cb_t esp_check_support_cb, - add_del_sa_sess_cb_t esp_add_del_sa_sess_cb, - enable_disable_cb_t enable_disable_cb); + add_del_sa_sess_cb_t esp_add_del_sa_sess_cb); int ipsec_select_ah_backend (ipsec_main_t * im, u32 ah_backend_idx); int ipsec_select_esp_backend (ipsec_main_t * im, u32 esp_backend_idx); diff --git a/src/vnet/ipsec/ipsec_api.c b/src/vnet/ipsec/ipsec_api.c index 767dc82dca7..21216b1a614 100644 --- a/src/vnet/ipsec/ipsec_api.c +++ b/src/vnet/ipsec/ipsec_api.c @@ -150,12 +150,10 @@ send_ipsec_tunnel_protect_details (index_t itpi, void *arg) sa = ipsec_sa_get (itp->itp_out_sa); mp->tun.sa_out = htonl (sa->id); mp->tun.n_sa_in = itp->itp_n_sa_in; - /* *INDENT-OFF* */ FOR_EACH_IPSEC_PROTECT_INPUT_SA(itp, sa, ({ mp->tun.sa_in[ii++] = htonl (sa->id); })); - /* *INDENT-ON* */ vl_api_send_msg (ctx->reg, (u8 *) mp); @@ -264,12 +262,10 @@ static void vl_api_ipsec_spd_entry_add_del_t_handler goto out; out: - /* *INDENT-OFF* */ REPLY_MACRO2 (VL_API_IPSEC_SPD_ENTRY_ADD_DEL_REPLY, ({ rmp->stat_index = ntohl(stat_index); })); - /* *INDENT-ON* */ } static void @@ -327,7 +323,7 @@ vl_api_ipsec_spd_entry_add_del_v2_t_handler ( goto out; out: - REPLY_MACRO2 (VL_API_IPSEC_SPD_ENTRY_ADD_DEL_REPLY, + REPLY_MACRO2 (VL_API_IPSEC_SPD_ENTRY_ADD_DEL_V2_REPLY, ({ rmp->stat_index = ntohl (stat_index); })); } @@ -382,18 +378,16 @@ static void vl_api_ipsec_sad_entry_add_del_t_handler ip_address_decode2 (&mp->entry.tunnel_src, &tun.t_src); ip_address_decode2 (&mp->entry.tunnel_dst, &tun.t_dst); - rv = ipsec_sa_add_and_lock (id, spi, proto, crypto_alg, &crypto_key, - integ_alg, &integ_key, flags, mp->entry.salt, - htons (mp->entry.udp_src_port), - htons (mp->entry.udp_dst_port), &tun, &sa_index); + rv = ipsec_sa_add_and_lock ( + id, spi, proto, crypto_alg, &crypto_key, integ_alg, &integ_key, flags, + mp->entry.salt, htons (mp->entry.udp_src_port), + htons (mp->entry.udp_dst_port), 0, &tun, &sa_index); out: - /* *INDENT-OFF* */ REPLY_MACRO2 (VL_API_IPSEC_SAD_ENTRY_ADD_DEL_REPLY, { rmp->stat_index = htonl (sa_index); }); - /* *INDENT-ON* */ } static void vl_api_ipsec_sad_entry_add_del_v2_t_handler @@ -456,18 +450,16 @@ static void vl_api_ipsec_sad_entry_add_del_v2_t_handler ip_address_decode2 (&mp->entry.tunnel_src, &tun.t_src); ip_address_decode2 (&mp->entry.tunnel_dst, &tun.t_dst); - rv = ipsec_sa_add_and_lock ( - id, spi, proto, crypto_alg, &crypto_key, integ_alg, &integ_key, flags, - mp->entry.salt, htons (mp->entry.udp_src_port), - htons (mp->entry.udp_dst_port), &tun, &sa_index); + rv = ipsec_sa_add_and_lock ( + id, spi, proto, crypto_alg, &crypto_key, integ_alg, &integ_key, flags, + mp->entry.salt, htons (mp->entry.udp_src_port), + htons (mp->entry.udp_dst_port), 0, &tun, &sa_index); out: - /* *INDENT-OFF* */ REPLY_MACRO2 (VL_API_IPSEC_SAD_ENTRY_ADD_DEL_V2_REPLY, { rmp->stat_index = htonl (sa_index); }); - /* *INDENT-ON* */ } static int @@ -514,10 +506,10 @@ ipsec_sad_entry_add_v3 (const vl_api_ipsec_sad_entry_v3_t *entry, ipsec_key_decode (&entry->crypto_key, &crypto_key); ipsec_key_decode (&entry->integrity_key, &integ_key); - return ipsec_sa_add_and_lock (id, spi, proto, crypto_alg, &crypto_key, - integ_alg, &integ_key, flags, entry->salt, - htons (entry->udp_src_port), - htons (entry->udp_dst_port), &tun, sa_index); + return ipsec_sa_add_and_lock ( + id, spi, proto, crypto_alg, &crypto_key, integ_alg, &integ_key, flags, + entry->salt, htons (entry->udp_src_port), htons (entry->udp_dst_port), 0, + &tun, sa_index); } static void @@ -543,6 +535,56 @@ vl_api_ipsec_sad_entry_add_del_v3_t_handler ( { rmp->stat_index = htonl (sa_index); }); } +static int +ipsec_sad_entry_add_v4 (const vl_api_ipsec_sad_entry_v4_t *entry, + u32 *sa_index) +{ + ipsec_key_t crypto_key, integ_key; + ipsec_crypto_alg_t crypto_alg; + ipsec_integ_alg_t integ_alg; + ipsec_protocol_t proto; + ipsec_sa_flags_t flags; + u32 id, spi; + tunnel_t tun = { 0 }; + int rv; + + id = ntohl (entry->sad_id); + spi = ntohl (entry->spi); + + rv = ipsec_proto_decode (entry->protocol, &proto); + + if (rv) + return rv; + + rv = ipsec_crypto_algo_decode (entry->crypto_algorithm, &crypto_alg); + + if (rv) + return rv; + + rv = ipsec_integ_algo_decode (entry->integrity_algorithm, &integ_alg); + + if (rv) + return rv; + + flags = ipsec_sa_flags_decode (entry->flags); + + if (flags & IPSEC_SA_FLAG_IS_TUNNEL) + { + rv = tunnel_decode (&entry->tunnel, &tun); + + if (rv) + return rv; + } + + ipsec_key_decode (&entry->crypto_key, &crypto_key); + ipsec_key_decode (&entry->integrity_key, &integ_key); + + return ipsec_sa_add_and_lock ( + id, spi, proto, crypto_alg, &crypto_key, integ_alg, &integ_key, flags, + entry->salt, htons (entry->udp_src_port), htons (entry->udp_dst_port), + ntohl (entry->anti_replay_window_size), &tun, sa_index); +} + static void vl_api_ipsec_sad_entry_del_t_handler (vl_api_ipsec_sad_entry_del_t *mp) { @@ -568,6 +610,74 @@ vl_api_ipsec_sad_entry_add_t_handler (vl_api_ipsec_sad_entry_add_t *mp) } static void +vl_api_ipsec_sad_entry_add_v2_t_handler (vl_api_ipsec_sad_entry_add_v2_t *mp) +{ + vl_api_ipsec_sad_entry_add_reply_t *rmp; + u32 sa_index = ~0; + int rv; + + rv = ipsec_sad_entry_add_v4 (&mp->entry, &sa_index); + + REPLY_MACRO2 (VL_API_IPSEC_SAD_ENTRY_ADD_V2_REPLY, + { rmp->stat_index = htonl (sa_index); }); +} + +static void +vl_api_ipsec_sad_entry_update_t_handler (vl_api_ipsec_sad_entry_update_t *mp) +{ + vl_api_ipsec_sad_entry_update_reply_t *rmp; + u32 id; + tunnel_t tun = { 0 }; + int rv; + + id = ntohl (mp->sad_id); + + if (mp->is_tun) + { + rv = tunnel_decode (&mp->tunnel, &tun); + + if (rv) + goto out; + } + + rv = ipsec_sa_update (id, htons (mp->udp_src_port), htons (mp->udp_dst_port), + &tun, mp->is_tun); + +out: + REPLY_MACRO (VL_API_IPSEC_SAD_ENTRY_UPDATE_REPLY); +} + +static void +vl_api_ipsec_sad_bind_t_handler (vl_api_ipsec_sad_bind_t *mp) +{ + vl_api_ipsec_sad_bind_reply_t *rmp; + u32 sa_id; + u32 worker; + int rv; + + sa_id = ntohl (mp->sa_id); + worker = ntohl (mp->worker); + + rv = ipsec_sa_bind (sa_id, worker, true /* bind */); + + REPLY_MACRO (VL_API_IPSEC_SAD_BIND_REPLY); +} + +static void +vl_api_ipsec_sad_unbind_t_handler (vl_api_ipsec_sad_unbind_t *mp) +{ + vl_api_ipsec_sad_unbind_reply_t *rmp; + u32 sa_id; + int rv; + + sa_id = ntohl (mp->sa_id); + + rv = ipsec_sa_bind (sa_id, ~0, false /* bind */); + + REPLY_MACRO (VL_API_IPSEC_SAD_UNBIND_REPLY); +} + +static void send_ipsec_spds_details (ipsec_spd_t * spd, vl_api_registration_t * reg, u32 context) { @@ -721,12 +831,10 @@ vl_api_ipsec_spd_interface_dump_t_handler (vl_api_ipsec_spd_interface_dump_t * if (mp->spd_index_valid) { spd_index = ntohl (mp->spd_index); - /* *INDENT-OFF* */ hash_foreach(k, v, im->spd_index_by_sw_if_index, ({ if (v == spd_index) send_ipsec_spd_interface_details(reg, v, k, mp->context); })); - /* *INDENT-ON* */ } else { @@ -749,12 +857,10 @@ vl_api_ipsec_itf_create_t_handler (vl_api_ipsec_itf_create_t * mp) if (!rv) rv = ipsec_itf_create (ntohl (mp->itf.user_instance), mode, &sw_if_index); - /* *INDENT-OFF* */ REPLY_MACRO2 (VL_API_IPSEC_ITF_CREATE_REPLY, ({ rmp->sw_if_index = htonl (sw_if_index); })); - /* *INDENT-ON* */ } static void @@ -898,7 +1004,10 @@ send_ipsec_sa_details (ipsec_sa_t * sa, void *arg) mp->last_seq_inbound |= (u64) (clib_host_to_net_u32 (sa->seq_hi)); } if (ipsec_sa_is_set_USE_ANTI_REPLAY (sa)) - mp->replay_window = clib_host_to_net_u64 (sa->replay_window); + { + mp->replay_window = + clib_host_to_net_u64 (ipsec_sa_anti_replay_get_64b_window (sa)); + } mp->stat_index = clib_host_to_net_u32 (sa->stat_index); @@ -985,7 +1094,10 @@ send_ipsec_sa_v2_details (ipsec_sa_t * sa, void *arg) mp->last_seq_inbound |= (u64) (clib_host_to_net_u32 (sa->seq_hi)); } if (ipsec_sa_is_set_USE_ANTI_REPLAY (sa)) - mp->replay_window = clib_host_to_net_u64 (sa->replay_window); + { + mp->replay_window = + clib_host_to_net_u64 (ipsec_sa_anti_replay_get_64b_window (sa)); + } mp->stat_index = clib_host_to_net_u32 (sa->stat_index); @@ -1065,7 +1177,10 @@ send_ipsec_sa_v3_details (ipsec_sa_t *sa, void *arg) mp->last_seq_inbound |= (u64) (clib_host_to_net_u32 (sa->seq_hi)); } if (ipsec_sa_is_set_USE_ANTI_REPLAY (sa)) - mp->replay_window = clib_host_to_net_u64 (sa->replay_window); + { + mp->replay_window = + clib_host_to_net_u64 (ipsec_sa_anti_replay_get_64b_window (sa)); + } mp->stat_index = clib_host_to_net_u32 (sa->stat_index); @@ -1091,8 +1206,179 @@ vl_api_ipsec_sa_v3_dump_t_handler (vl_api_ipsec_sa_v3_dump_t *mp) ipsec_sa_walk (send_ipsec_sa_v3_details, &ctx); } +static walk_rc_t +send_ipsec_sa_v4_details (ipsec_sa_t *sa, void *arg) +{ + ipsec_dump_walk_ctx_t *ctx = arg; + vl_api_ipsec_sa_v4_details_t *mp; + + mp = vl_msg_api_alloc (sizeof (*mp)); + clib_memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (REPLY_MSG_ID_BASE + VL_API_IPSEC_SA_V4_DETAILS); + mp->context = ctx->context; + + mp->entry.sad_id = htonl (sa->id); + mp->entry.spi = htonl (sa->spi); + mp->entry.protocol = ipsec_proto_encode (sa->protocol); + + mp->entry.crypto_algorithm = ipsec_crypto_algo_encode (sa->crypto_alg); + ipsec_key_encode (&sa->crypto_key, &mp->entry.crypto_key); + + mp->entry.integrity_algorithm = ipsec_integ_algo_encode (sa->integ_alg); + ipsec_key_encode (&sa->integ_key, &mp->entry.integrity_key); + + mp->entry.flags = ipsec_sad_flags_encode (sa); + mp->entry.salt = clib_host_to_net_u32 (sa->salt); + + if (ipsec_sa_is_set_IS_PROTECT (sa)) + { + ipsec_sa_dump_match_ctx_t ctx = { + .sai = sa - ipsec_sa_pool, + .sw_if_index = ~0, + }; + ipsec_tun_protect_walk (ipsec_sa_dump_match_sa, &ctx); + + mp->sw_if_index = htonl (ctx.sw_if_index); + } + else + mp->sw_if_index = ~0; + + if (ipsec_sa_is_set_IS_TUNNEL (sa)) + tunnel_encode (&sa->tunnel, &mp->entry.tunnel); + + if (ipsec_sa_is_set_UDP_ENCAP (sa)) + { + mp->entry.udp_src_port = sa->udp_hdr.src_port; + mp->entry.udp_dst_port = sa->udp_hdr.dst_port; + } + + mp->seq_outbound = clib_host_to_net_u64 (((u64) sa->seq)); + mp->last_seq_inbound = clib_host_to_net_u64 (((u64) sa->seq)); + if (ipsec_sa_is_set_USE_ESN (sa)) + { + mp->seq_outbound |= (u64) (clib_host_to_net_u32 (sa->seq_hi)); + mp->last_seq_inbound |= (u64) (clib_host_to_net_u32 (sa->seq_hi)); + } + if (ipsec_sa_is_set_USE_ANTI_REPLAY (sa)) + { + mp->replay_window = + clib_host_to_net_u64 (ipsec_sa_anti_replay_get_64b_window (sa)); + } + + mp->thread_index = clib_host_to_net_u32 (sa->thread_index); + mp->stat_index = clib_host_to_net_u32 (sa->stat_index); + + vl_api_send_msg (ctx->reg, (u8 *) mp); + + return (WALK_CONTINUE); +} + +static void +vl_api_ipsec_sa_v4_dump_t_handler (vl_api_ipsec_sa_v4_dump_t *mp) +{ + vl_api_registration_t *reg; + + reg = vl_api_client_index_to_registration (mp->client_index); + if (!reg) + return; + + ipsec_dump_walk_ctx_t ctx = { + .reg = reg, + .context = mp->context, + }; + + ipsec_sa_walk (send_ipsec_sa_v4_details, &ctx); +} + +static walk_rc_t +send_ipsec_sa_v5_details (ipsec_sa_t *sa, void *arg) +{ + ipsec_dump_walk_ctx_t *ctx = arg; + vl_api_ipsec_sa_v5_details_t *mp; + + mp = vl_msg_api_alloc (sizeof (*mp)); + clib_memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (REPLY_MSG_ID_BASE + VL_API_IPSEC_SA_V5_DETAILS); + mp->context = ctx->context; + + mp->entry.sad_id = htonl (sa->id); + mp->entry.spi = htonl (sa->spi); + mp->entry.protocol = ipsec_proto_encode (sa->protocol); + + mp->entry.crypto_algorithm = ipsec_crypto_algo_encode (sa->crypto_alg); + ipsec_key_encode (&sa->crypto_key, &mp->entry.crypto_key); + + mp->entry.integrity_algorithm = ipsec_integ_algo_encode (sa->integ_alg); + ipsec_key_encode (&sa->integ_key, &mp->entry.integrity_key); + + mp->entry.flags = ipsec_sad_flags_encode (sa); + mp->entry.salt = clib_host_to_net_u32 (sa->salt); + + if (ipsec_sa_is_set_IS_PROTECT (sa)) + { + ipsec_sa_dump_match_ctx_t ctx = { + .sai = sa - ipsec_sa_pool, + .sw_if_index = ~0, + }; + ipsec_tun_protect_walk (ipsec_sa_dump_match_sa, &ctx); + + mp->sw_if_index = htonl (ctx.sw_if_index); + } + else + mp->sw_if_index = ~0; + + if (ipsec_sa_is_set_IS_TUNNEL (sa)) + tunnel_encode (&sa->tunnel, &mp->entry.tunnel); + + if (ipsec_sa_is_set_UDP_ENCAP (sa)) + { + mp->entry.udp_src_port = sa->udp_hdr.src_port; + mp->entry.udp_dst_port = sa->udp_hdr.dst_port; + } + + mp->seq_outbound = clib_host_to_net_u64 (((u64) sa->seq)); + mp->last_seq_inbound = clib_host_to_net_u64 (((u64) sa->seq)); + if (ipsec_sa_is_set_USE_ESN (sa)) + { + mp->seq_outbound |= (u64) (clib_host_to_net_u32 (sa->seq_hi)); + mp->last_seq_inbound |= (u64) (clib_host_to_net_u32 (sa->seq_hi)); + } + if (ipsec_sa_is_set_USE_ANTI_REPLAY (sa)) + { + mp->replay_window = + clib_host_to_net_u64 (ipsec_sa_anti_replay_get_64b_window (sa)); + + mp->entry.anti_replay_window_size = + clib_host_to_net_u32 (IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE (sa)); + } + + mp->thread_index = clib_host_to_net_u32 (sa->thread_index); + mp->stat_index = clib_host_to_net_u32 (sa->stat_index); + + vl_api_send_msg (ctx->reg, (u8 *) mp); + + return (WALK_CONTINUE); +} + +static void +vl_api_ipsec_sa_v5_dump_t_handler (vl_api_ipsec_sa_v5_dump_t *mp) +{ + vl_api_registration_t *reg; + + reg = vl_api_client_index_to_registration (mp->client_index); + if (!reg) + return; + + ipsec_dump_walk_ctx_t ctx = { + .reg = reg, + .context = mp->context, + }; + + ipsec_sa_walk (send_ipsec_sa_v5_details, &ctx); +} + static void -vl_api_ipsec_backend_dump_t_handler (vl_api_ipsec_backend_dump_t * mp) +vl_api_ipsec_backend_dump_t_handler (vl_api_ipsec_backend_dump_t *mp) { vl_api_registration_t *rp; ipsec_main_t *im = &ipsec_main; @@ -1108,7 +1394,6 @@ vl_api_ipsec_backend_dump_t_handler (vl_api_ipsec_backend_dump_t * mp) ipsec_ah_backend_t *ab; ipsec_esp_backend_t *eb; - /* *INDENT-OFF* */ pool_foreach (ab, im->ah_backends) { vl_api_ipsec_backend_details_t *mp = vl_msg_api_alloc (sizeof (*mp)); clib_memset (mp, 0, sizeof (*mp)); @@ -1133,7 +1418,6 @@ vl_api_ipsec_backend_dump_t_handler (vl_api_ipsec_backend_dump_t * mp) mp->active = mp->index == im->esp_current_backend ? 1 : 0; vl_api_send_msg (rp, (u8 *)mp); } - /* *INDENT-ON* */ } static void diff --git a/src/vnet/ipsec/ipsec_cli.c b/src/vnet/ipsec/ipsec_cli.c index 8b436b6b805..07d9df8f204 100644 --- a/src/vnet/ipsec/ipsec_cli.c +++ b/src/vnet/ipsec/ipsec_cli.c @@ -71,14 +71,12 @@ done: return error; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (set_interface_spd_command, static) = { .path = "set interface ipsec spd", .short_help = "set interface ipsec spd <int> <id>", .function = set_interface_spd_command_fn, }; -/* *INDENT-ON* */ static clib_error_t * ipsec_sa_add_del_command_fn (vlib_main_t * vm, @@ -88,6 +86,7 @@ ipsec_sa_add_del_command_fn (vlib_main_t * vm, unformat_input_t _line_input, *line_input = &_line_input; ipsec_crypto_alg_t crypto_alg; ipsec_integ_alg_t integ_alg; + u32 anti_replay_window_size; ipsec_protocol_t proto; ipsec_sa_flags_t flags; clib_error_t *error; @@ -105,6 +104,7 @@ ipsec_sa_add_del_command_fn (vlib_main_t * vm, is_add = 0; flags = IPSEC_SA_FLAG_NONE; proto = IPSEC_PROTOCOL_ESP; + anti_replay_window_size = 0; integ_alg = IPSEC_INTEG_ALG_NONE; crypto_alg = IPSEC_CRYPTO_ALG_NONE; udp_src = udp_dst = IPSEC_UDP_PORT_NONE; @@ -153,6 +153,9 @@ ipsec_sa_add_del_command_fn (vlib_main_t * vm, udp_src = i; else if (unformat (line_input, "udp-dst-port %d", &i)) udp_dst = i; + else if (unformat (line_input, "anti-replay-size %d", + &anti_replay_window_size)) + flags |= IPSEC_SA_FLAG_USE_ANTI_REPLAY; else if (unformat (line_input, "inbound")) flags |= IPSEC_SA_FLAG_IS_INBOUND; else if (unformat (line_input, "use-anti-replay")) @@ -184,9 +187,10 @@ ipsec_sa_add_del_command_fn (vlib_main_t * vm, error = clib_error_return (0, "missing spi"); goto done; } - rv = ipsec_sa_add_and_lock (id, spi, proto, crypto_alg, &ck, integ_alg, - &ik, flags, clib_host_to_net_u32 (salt), - udp_src, udp_dst, &tun, &sai); + rv = + ipsec_sa_add_and_lock (id, spi, proto, crypto_alg, &ck, integ_alg, &ik, + flags, clib_host_to_net_u32 (salt), udp_src, + udp_dst, anti_replay_window_size, &tun, &sai); } else { @@ -202,14 +206,77 @@ done: return error; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (ipsec_sa_add_del_command, static) = { .path = "ipsec sa", .short_help = "ipsec sa [add|del]", .function = ipsec_sa_add_del_command_fn, }; -/* *INDENT-ON* */ + +static clib_error_t * +ipsec_sa_bind_cli (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + u32 id = ~0; + u32 worker = ~0; + bool bind = 1; + int rv; + clib_error_t *error = NULL; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "unbind")) + bind = 0; + else if (id == ~0 && unformat (line_input, "%u", &id)) + ; + else if (unformat (line_input, "%u", &worker)) + ; + else + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } + } + + if (id == ~0) + { + error = clib_error_return (0, "please specify SA ID"); + goto done; + } + + if (bind && ~0 == worker) + { + error = clib_error_return (0, "please specify worker to bind to"); + goto done; + } + + rv = ipsec_sa_bind (id, worker, bind); + switch (rv) + { + case VNET_API_ERROR_INVALID_VALUE: + error = clib_error_return (0, "please specify a valid SA ID"); + break; + case VNET_API_ERROR_INVALID_WORKER: + error = clib_error_return (0, "please specify a valid worker index"); + break; + } + +done: + unformat_free (line_input); + + return error; +} + +VLIB_CLI_COMMAND (ipsec_sa_bind_cmd, static) = { + .path = "ipsec sa bind", + .short_help = "ipsec sa [unbind] <sa-id> <worker>", + .function = ipsec_sa_bind_cli, +}; static clib_error_t * ipsec_spd_add_del_command_fn (vlib_main_t * vm, @@ -254,14 +321,12 @@ done: return error; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (ipsec_spd_add_del_command, static) = { .path = "ipsec spd", .short_help = "ipsec spd [add|del] <id>", .function = ipsec_spd_add_del_command_fn, }; -/* *INDENT-ON* */ static clib_error_t * @@ -396,27 +461,23 @@ done: return error; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (ipsec_policy_add_del_command, static) = { .path = "ipsec policy", .short_help = "ipsec policy [add|del] spd <id> priority <n> ", .function = ipsec_policy_add_del_command_fn, }; -/* *INDENT-ON* */ static void ipsec_sa_show_all (vlib_main_t * vm, ipsec_main_t * im, u8 detail) { u32 sai; - /* *INDENT-OFF* */ pool_foreach_index (sai, ipsec_sa_pool) { vlib_cli_output (vm, "%U", format_ipsec_sa, sai, (detail ? IPSEC_FORMAT_DETAIL : IPSEC_FORMAT_BRIEF)); } - /* *INDENT-ON* */ } static void @@ -424,7 +485,6 @@ ipsec_spd_show_all (vlib_main_t * vm, ipsec_main_t * im) { u32 spdi; - /* *INDENT-OFF* */ pool_foreach_index (spdi, im->spds) { vlib_cli_output(vm, "%U", format_ipsec_spd, spdi); } @@ -437,7 +497,6 @@ ipsec_spd_show_all (vlib_main_t * vm, ipsec_main_t * im) { vlib_cli_output (vm, "%U", format_ipsec_in_spd_flow_cache); } - /* *INDENT-ON* */ } static void @@ -448,14 +507,12 @@ ipsec_spd_bindings_show_all (vlib_main_t * vm, ipsec_main_t * im) vlib_cli_output (vm, "SPD Bindings:"); - /* *INDENT-OFF* */ hash_foreach(sw_if_index, spd_id, im->spd_index_by_sw_if_index, ({ spd = pool_elt_at_index (im->spds, spd_id); vlib_cli_output (vm, " %d -> %U", spd->id, format_vnet_sw_if_index_name, im->vnet_main, sw_if_index); })); - /* *INDENT-ON* */ } static walk_rc_t @@ -489,13 +546,11 @@ show_ipsec_command_fn (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_ipsec_command, static) = { .path = "show ipsec all", .short_help = "show ipsec all", .function = show_ipsec_command_fn, }; -/* *INDENT-ON* */ static clib_error_t * show_ipsec_sa_command_fn (vlib_main_t * vm, @@ -540,12 +595,10 @@ clear_ipsec_sa_command_fn (vlib_main_t * vm, if (~0 == sai) { - /* *INDENT-OFF* */ pool_foreach_index (sai, ipsec_sa_pool) { ipsec_sa_clear (sai); } - /* *INDENT-ON* */ } else { @@ -558,7 +611,6 @@ clear_ipsec_sa_command_fn (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_ipsec_sa_command, static) = { .path = "show ipsec sa", .short_help = "show ipsec sa [index]", @@ -570,7 +622,6 @@ VLIB_CLI_COMMAND (clear_ipsec_sa_command, static) = { .short_help = "clear ipsec sa [index]", .function = clear_ipsec_sa_command_fn, }; -/* *INDENT-ON* */ static clib_error_t * show_ipsec_spd_command_fn (vlib_main_t * vm, @@ -600,13 +651,11 @@ show_ipsec_spd_command_fn (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_ipsec_spd_command, static) = { .path = "show ipsec spd", .short_help = "show ipsec spd [index]", .function = show_ipsec_spd_command_fn, }; -/* *INDENT-ON* */ static clib_error_t * show_ipsec_tunnel_command_fn (vlib_main_t * vm, @@ -618,13 +667,11 @@ show_ipsec_tunnel_command_fn (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_ipsec_tunnel_command, static) = { .path = "show ipsec tunnel", .short_help = "show ipsec tunnel", .function = show_ipsec_tunnel_command_fn, }; -/* *INDENT-ON* */ static clib_error_t * ipsec_show_backends_command_fn (vlib_main_t * vm, @@ -639,7 +686,6 @@ ipsec_show_backends_command_fn (vlib_main_t * vm, vlib_cli_output (vm, "IPsec AH backends available:"); u8 *s = format (NULL, "%=25s %=25s %=10s\n", "Name", "Index", "Active"); ipsec_ah_backend_t *ab; - /* *INDENT-OFF* */ pool_foreach (ab, im->ah_backends) { s = format (s, "%=25s %=25u %=10s\n", ab->name, ab - im->ah_backends, ab - im->ah_backends == im->ah_current_backend ? "yes" : "no"); @@ -655,13 +701,11 @@ ipsec_show_backends_command_fn (vlib_main_t * vm, s = format (s, " dec6 %s (next %d)\n", n->name, ab->ah6_decrypt_next_index); } } - /* *INDENT-ON* */ vlib_cli_output (vm, "%v", s); vec_set_len (s, 0); vlib_cli_output (vm, "IPsec ESP backends available:"); s = format (s, "%=25s %=25s %=10s\n", "Name", "Index", "Active"); ipsec_esp_backend_t *eb; - /* *INDENT-OFF* */ pool_foreach (eb, im->esp_backends) { s = format (s, "%=25s %=25u %=10s\n", eb->name, eb - im->esp_backends, eb - im->esp_backends == im->esp_current_backend ? "yes" @@ -678,20 +722,17 @@ ipsec_show_backends_command_fn (vlib_main_t * vm, s = format (s, " dec6 %s (next %d)\n", n->name, eb->esp6_decrypt_next_index); } } - /* *INDENT-ON* */ vlib_cli_output (vm, "%v", s); vec_free (s); return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (ipsec_show_backends_command, static) = { .path = "show ipsec backends", .short_help = "show ipsec backends", .function = ipsec_show_backends_command_fn, }; -/* *INDENT-ON* */ static clib_error_t * ipsec_select_backend_command_fn (vlib_main_t * vm, @@ -753,14 +794,12 @@ ipsec_select_backend_command_fn (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (ipsec_select_backend_command, static) = { .path = "ipsec select backend", .short_help = "ipsec select backend <ah|esp> <backend index>", .function = ipsec_select_backend_command_fn, }; -/* *INDENT-ON* */ static clib_error_t * clear_ipsec_counters_command_fn (vlib_main_t * vm, @@ -769,18 +808,17 @@ clear_ipsec_counters_command_fn (vlib_main_t * vm, { vlib_clear_combined_counters (&ipsec_spd_policy_counters); vlib_clear_combined_counters (&ipsec_sa_counters); - vlib_clear_simple_counters (&ipsec_sa_lost_counters); + for (int i = 0; i < IPSEC_SA_N_ERRORS; i++) + vlib_clear_simple_counters (&ipsec_sa_err_counters[i]); return (NULL); } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (clear_ipsec_counters_command, static) = { .path = "clear ipsec counters", .short_help = "clear ipsec counters", .function = clear_ipsec_counters_command_fn, }; -/* *INDENT-ON* */ static clib_error_t * ipsec_tun_protect_cmd (vlib_main_t * vm, @@ -830,7 +868,6 @@ ipsec_tun_protect_cmd (vlib_main_t * vm, /** * Protect tunnel with IPSEC */ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (ipsec_tun_protect_cmd_node, static) = { .path = "ipsec tunnel protect", @@ -838,7 +875,6 @@ VLIB_CLI_COMMAND (ipsec_tun_protect_cmd_node, static) = .short_help = "ipsec tunnel protect <interface> input-sa <SA> output-sa <SA> [add|del]", // this is not MP safe }; -/* *INDENT-ON* */ static clib_error_t * @@ -853,14 +889,12 @@ ipsec_tun_protect_show (vlib_main_t * vm, /** * show IPSEC tunnel protection */ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (ipsec_tun_protect_show_node, static) = { .path = "show ipsec protect", .function = ipsec_tun_protect_show, .short_help = "show ipsec protect", }; -/* *INDENT-ON* */ static int ipsec_tun_protect4_hash_show_one (clib_bihash_kv_8_16_t * kv, void *arg) @@ -909,14 +943,12 @@ ipsec_tun_protect_hash_show (vlib_main_t * vm, /** * show IPSEC tunnel protection hash tables */ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (ipsec_tun_protect_hash_show_node, static) = { .path = "show ipsec protect-hash", .function = ipsec_tun_protect_hash_show, .short_help = "show ipsec protect-hash", }; -/* *INDENT-ON* */ clib_error_t * ipsec_cli_init (vlib_main_t * vm) @@ -953,13 +985,11 @@ set_async_mode_command_fn (vlib_main_t * vm, unformat_input_t * input, return (NULL); } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (set_async_mode_command, static) = { .path = "set ipsec async mode", .short_help = "set ipsec async mode on|off", .function = set_async_mode_command_fn, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/ipsec/ipsec_format.c b/src/vnet/ipsec/ipsec_format.c index 9204b1c090d..e421a0d96b4 100644 --- a/src/vnet/ipsec/ipsec_format.c +++ b/src/vnet/ipsec/ipsec_format.c @@ -210,11 +210,145 @@ format_ipsec_policy (u8 *s, va_list *args) } u8 * -format_ipsec_policy_fp (u8 *s, va_list *args) +format_ipsec_fp_policy (u8 *s, va_list *args) { return format_ipsec_policy_with_suffix (s, args, (u8 *) "<fast-path>"); } +/** + * @brief Context when walking the fp bihash table. We need to filter + * only those policies that are of given type as we walk the table. + */ +typedef struct ipsec_spd_policy_ctx_t_ +{ + u32 *policies; + ipsec_spd_policy_type_t t; +} ipsec_fp_walk_ctx_t; + +static int +ipsec_fp_table_walk_ip4_cb (clib_bihash_kv_16_8_t *kvp, void *arg) +{ + ipsec_fp_walk_ctx_t *ctx = (ipsec_fp_walk_ctx_t *) arg; + ipsec_main_t *im = &ipsec_main; + ipsec_policy_t *p; + + ipsec_fp_lookup_value_t *val = (ipsec_fp_lookup_value_t *) &kvp->value; + + u32 *policy_id; + + vec_foreach (policy_id, val->fp_policies_ids) + { + p = pool_elt_at_index (im->policies, *policy_id); + if (p->type == ctx->t) + vec_add1 (ctx->policies, *policy_id); + } + + return BIHASH_WALK_CONTINUE; +} + +static int +ipsec_fp_table_walk_ip6_cb (clib_bihash_kv_40_8_t *kvp, void *arg) +{ + ipsec_fp_walk_ctx_t *ctx = (ipsec_fp_walk_ctx_t *) arg; + ipsec_main_t *im = &ipsec_main; + ipsec_policy_t *p; + + ipsec_fp_lookup_value_t *val = (ipsec_fp_lookup_value_t *) &kvp->value; + + u32 *policy_id; + + vec_foreach (policy_id, val->fp_policies_ids) + { + p = pool_elt_at_index (im->policies, *policy_id); + if (p->type == ctx->t) + vec_add1 (ctx->policies, *policy_id); + } + + return BIHASH_WALK_CONTINUE; +} + +u8 * +format_ipsec_fp_policies (u8 *s, va_list *args) +{ + ipsec_main_t *im = &ipsec_main; + ipsec_spd_t *spd = va_arg (*args, ipsec_spd_t *); + ipsec_spd_policy_type_t t = va_arg (*args, ipsec_spd_policy_type_t); + u32 *i; + ipsec_fp_walk_ctx_t ctx = { + .policies = 0, + .t = t, + }; + + u32 ip4_in_lookup_hash_idx = spd->fp_spd.ip4_in_lookup_hash_idx; + u32 ip4_out_lookup_hash_idx = spd->fp_spd.ip4_out_lookup_hash_idx; + u32 ip6_in_lookup_hash_idx = spd->fp_spd.ip6_in_lookup_hash_idx; + u32 ip6_out_lookup_hash_idx = spd->fp_spd.ip6_out_lookup_hash_idx; + + switch (t) + { + case IPSEC_SPD_POLICY_IP4_INBOUND_PROTECT: + case IPSEC_SPD_POLICY_IP4_INBOUND_BYPASS: + case IPSEC_SPD_POLICY_IP4_INBOUND_DISCARD: + if (INDEX_INVALID != ip4_in_lookup_hash_idx) + { + clib_bihash_16_8_t *bihash_table = pool_elt_at_index ( + im->fp_ip4_lookup_hashes_pool, ip4_in_lookup_hash_idx); + + clib_bihash_foreach_key_value_pair_16_8 ( + bihash_table, ipsec_fp_table_walk_ip4_cb, &ctx); + } + + break; + + case IPSEC_SPD_POLICY_IP6_INBOUND_PROTECT: + case IPSEC_SPD_POLICY_IP6_INBOUND_BYPASS: + case IPSEC_SPD_POLICY_IP6_INBOUND_DISCARD: + if (INDEX_INVALID != ip6_in_lookup_hash_idx) + { + clib_bihash_40_8_t *bihash_table = pool_elt_at_index ( + im->fp_ip6_lookup_hashes_pool, ip6_in_lookup_hash_idx); + + clib_bihash_foreach_key_value_pair_40_8 ( + bihash_table, ipsec_fp_table_walk_ip6_cb, &ctx); + } + + break; + case IPSEC_SPD_POLICY_IP4_OUTBOUND: + if (INDEX_INVALID != ip4_out_lookup_hash_idx) + { + clib_bihash_16_8_t *bihash_table = pool_elt_at_index ( + im->fp_ip4_lookup_hashes_pool, ip4_out_lookup_hash_idx); + + clib_bihash_foreach_key_value_pair_16_8 ( + bihash_table, ipsec_fp_table_walk_ip4_cb, &ctx); + } + + break; + case IPSEC_SPD_POLICY_IP6_OUTBOUND: + if (INDEX_INVALID != ip6_out_lookup_hash_idx) + { + clib_bihash_40_8_t *bihash_table = pool_elt_at_index ( + im->fp_ip6_lookup_hashes_pool, ip6_out_lookup_hash_idx); + + clib_bihash_foreach_key_value_pair_40_8 ( + bihash_table, ipsec_fp_table_walk_ip6_cb, &ctx); + } + + break; + default: + break; + } + + vec_foreach (i, ctx.policies) + { + s = format (s, "\n %U", format_ipsec_fp_policy, *i); + } + + vec_free (ctx.policies); + + return s; +} + u8 * format_ipsec_spd (u8 * s, va_list * args) { @@ -239,10 +373,7 @@ format_ipsec_spd (u8 * s, va_list * args) { \ s = format (s, "\n %U", format_ipsec_policy, *i); \ } \ - vec_foreach (i, spd->fp_spd.fp_policies[IPSEC_SPD_POLICY_##v]) \ - { \ - s = format (s, "\n %U", format_ipsec_policy_fp, *i); \ - } + s = format (s, "\n %U", format_ipsec_fp_policies, spd, IPSEC_SPD_POLICY_##v); foreach_ipsec_spd_policy_type; #undef _ @@ -313,7 +444,7 @@ format_ipsec_sa (u8 * s, va_list * args) u32 sai = va_arg (*args, u32); ipsec_format_flags_t flags = va_arg (*args, ipsec_format_flags_t); vlib_counter_t counts; - counter_t lost; + counter_t errors; ipsec_sa_t *sa; if (pool_is_free_index (ipsec_sa_pool, sai)) @@ -335,16 +466,18 @@ format_ipsec_sa (u8 * s, va_list * args) s = format (s, "\n salt 0x%x", clib_net_to_host_u32 (sa->salt)); s = format (s, "\n thread-index:%d", sa->thread_index); s = format (s, "\n seq %u seq-hi %u", sa->seq, sa->seq_hi); - s = format (s, "\n window %U", format_ipsec_replay_window, - sa->replay_window); - s = format (s, "\n crypto alg %U", - format_ipsec_crypto_alg, sa->crypto_alg); + s = format (s, "\n window-size: %llu", + IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE (sa)); + s = format (s, "\n window: Bl <- %U Tl", format_ipsec_replay_window, + ipsec_sa_anti_replay_get_64b_window (sa)); + s = + format (s, "\n crypto alg %U", format_ipsec_crypto_alg, sa->crypto_alg); if (sa->crypto_alg && (flags & IPSEC_FORMAT_INSECURE)) s = format (s, " key %U", format_ipsec_key, &sa->crypto_key); else s = format (s, " key [redacted]"); - s = format (s, "\n integrity alg %U", - format_ipsec_integ_alg, sa->integ_alg); + s = + format (s, "\n integrity alg %U", format_ipsec_integ_alg, sa->integ_alg); if (sa->integ_alg && (flags & IPSEC_FORMAT_INSECURE)) s = format (s, " key %U", format_ipsec_key, &sa->integ_key); else @@ -354,12 +487,17 @@ format_ipsec_sa (u8 * s, va_list * args) clib_host_to_net_u16 (sa->udp_hdr.dst_port)); vlib_get_combined_counter (&ipsec_sa_counters, sai, &counts); - lost = vlib_get_simple_counter (&ipsec_sa_lost_counters, sai); - s = format (s, "\n tx/rx:[packets:%Ld bytes:%Ld], lost:[packets:%Ld]", - counts.packets, counts.bytes, lost); + s = format (s, "\n tx/rx:[packets:%Ld bytes:%Ld]", counts.packets, + counts.bytes); + s = format (s, "\n SA errors:"); +#define _(index, val, err, desc) \ + errors = vlib_get_simple_counter (&ipsec_sa_err_counters[index], sai); \ + s = format (s, "\n " #desc ":[packets:%Ld]", errors); + foreach_ipsec_sa_err +#undef _ - if (ipsec_sa_is_set_IS_TUNNEL (sa)) - s = format (s, "\n%U", format_tunnel, &sa->tunnel, 3); + if (ipsec_sa_is_set_IS_TUNNEL (sa)) s = + format (s, "\n%U", format_tunnel, &sa->tunnel, 3); done: return (s); @@ -411,12 +549,10 @@ format_ipsec_tun_protect (u8 * s, va_list * args) IPSEC_FORMAT_BRIEF); s = format (s, "\n input-sa:"); - /* *INDENT-OFF* */ FOR_EACH_IPSEC_PROTECT_INPUT_SAI(itp, sai, ({ s = format (s, "\n %U", format_ipsec_sa, sai, IPSEC_FORMAT_BRIEF); })); - /* *INDENT-ON* */ return (s); } diff --git a/src/vnet/ipsec/ipsec_handoff.c b/src/vnet/ipsec/ipsec_handoff.c index e8daa1a6a23..68a859cf732 100644 --- a/src/vnet/ipsec/ipsec_handoff.c +++ b/src/vnet/ipsec/ipsec_handoff.c @@ -259,7 +259,6 @@ VLIB_NODE_FN (ah6_decrypt_handoff) (vlib_main_t * vm, return ipsec_handoff (vm, node, from_frame, im->ah6_dec_fq_index); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (esp4_encrypt_handoff) = { .name = "esp4-encrypt-handoff", .vector_size = sizeof (u32), @@ -416,7 +415,6 @@ VLIB_REGISTER_NODE (ah6_decrypt_handoff) = { [0] = "error-drop", }, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/ipsec/ipsec_input.c b/src/vnet/ipsec/ipsec_input.c index 62723d4ffa8..6ccc0be2622 100644 --- a/src/vnet/ipsec/ipsec_input.c +++ b/src/vnet/ipsec/ipsec_input.c @@ -153,24 +153,24 @@ ipsec4_input_spd_find_flow_cache_entry (ipsec_main_t *im, u32 sa, u32 da, } always_inline void -ipsec_fp_in_5tuple_from_ip4_range (ipsec_fp_5tuple_t *tuple, u32 la, u32 ra, +ipsec_fp_in_5tuple_from_ip4_range (ipsec_fp_5tuple_t *tuple, u32 sa, u32 da, u32 spi, u8 action) { clib_memset (tuple->l3_zero_pad, 0, sizeof (tuple->l3_zero_pad)); - tuple->laddr.as_u32 = la; - tuple->raddr.as_u32 = ra; + tuple->laddr.as_u32 = da; + tuple->raddr.as_u32 = sa; tuple->spi = spi; tuple->action = action; tuple->is_ipv6 = 0; } always_inline void -ipsec_fp_in_5tuple_from_ip6_range (ipsec_fp_5tuple_t *tuple, ip6_address_t *la, - ip6_address_t *ra, u32 spi, u8 action) +ipsec_fp_in_5tuple_from_ip6_range (ipsec_fp_5tuple_t *tuple, ip6_address_t *sa, + ip6_address_t *da, u32 spi, u8 action) { - clib_memcpy (&tuple->ip6_laddr, la, sizeof (ip6_address_t)); - clib_memcpy (&tuple->ip6_raddr, ra, sizeof (ip6_address_t)); + clib_memcpy (&tuple->ip6_laddr, da, sizeof (ip6_address_t)); + clib_memcpy (&tuple->ip6_raddr, sa, sizeof (ip6_address_t)); tuple->spi = spi; tuple->action = action; @@ -273,6 +273,193 @@ ip6_addr_match_range (ip6_address_t * a, ip6_address_t * la, return 0; } +always_inline void +ipsec_esp_packet_process (vlib_main_t *vm, ipsec_main_t *im, ip4_header_t *ip0, + esp_header_t *esp0, u32 thread_index, + ipsec_spd_t *spd0, vlib_buffer_t **b, + vlib_node_runtime_t *node, u64 *ipsec_bypassed, + u64 *ipsec_dropped, u64 *ipsec_matched, + u64 *ipsec_unprocessed, u16 *next) + +{ + ipsec_policy_t *p0 = NULL; + u32 pi0; + u8 has_space0; + bool search_flow_cache = false; + ipsec_policy_t *policies[1]; + ipsec_fp_5tuple_t tuples[1]; + bool ip_v6 = true; + + /* if flow cache is enabled, first search through flow cache for a + * policy match for either protect, bypass or discard rules, in that + * order. if no match is found search_flow_cache is set to false (1) + * and we revert back to linear search + */ + + search_flow_cache = im->input_flow_cache_flag; +udp_or_esp: + + /* SPI ID field in the ESP header MUST NOT be a zero value */ + if (esp0->spi == 0) + { + /* Drop the packet if SPI ID is zero */ + *ipsec_unprocessed += 1; + next[0] = IPSEC_INPUT_NEXT_DROP; + return; + } + + if (im->fp_spd_ipv4_in_is_enabled && + PREDICT_TRUE (INDEX_INVALID != spd0->fp_spd.ip4_in_lookup_hash_idx)) + { + ipsec_fp_in_5tuple_from_ip4_range (&tuples[0], ip0->src_address.as_u32, + ip0->dst_address.as_u32, + clib_net_to_host_u32 (esp0->spi), + IPSEC_SPD_POLICY_IP4_INBOUND_PROTECT); + ipsec_fp_in_policy_match_n (&spd0->fp_spd, !ip_v6, tuples, policies, 1); + p0 = policies[0]; + } + else if (search_flow_cache) /* attempt to match policy in flow cache */ + { + p0 = ipsec4_input_spd_find_flow_cache_entry ( + im, ip0->src_address.as_u32, ip0->dst_address.as_u32, + IPSEC_SPD_POLICY_IP4_INBOUND_PROTECT); + } + + else /* linear search if flow cache is not enabled, + or flow cache search just failed */ + { + p0 = ipsec_input_protect_policy_match ( + spd0, clib_net_to_host_u32 (ip0->src_address.as_u32), + clib_net_to_host_u32 (ip0->dst_address.as_u32), + clib_net_to_host_u32 (esp0->spi)); + } + has_space0 = vlib_buffer_has_space (b[0], (clib_address_t) (esp0 + 1) - + (clib_address_t) ip0); + + if (PREDICT_TRUE ((p0 != NULL) & (has_space0))) + { + *ipsec_matched += 1; + + pi0 = p0 - im->policies; + vlib_increment_combined_counter (&ipsec_spd_policy_counters, + thread_index, pi0, 1, + clib_net_to_host_u16 (ip0->length)); + + vnet_buffer (b[0])->ipsec.sad_index = p0->sa_index; + next[0] = im->esp4_decrypt_next_index; + vlib_buffer_advance (b[0], ((u8 *) esp0 - (u8 *) ip0)); + goto trace0; + } + else + { + p0 = 0; + pi0 = ~0; + } + if (im->fp_spd_ipv4_in_is_enabled && + PREDICT_TRUE (INDEX_INVALID != spd0->fp_spd.ip4_in_lookup_hash_idx)) + { + tuples->action = IPSEC_SPD_POLICY_IP4_INBOUND_BYPASS; + ipsec_fp_in_policy_match_n (&spd0->fp_spd, !ip_v6, tuples, policies, 1); + p0 = policies[0]; + } + else if (search_flow_cache) + { + p0 = ipsec4_input_spd_find_flow_cache_entry ( + im, ip0->src_address.as_u32, ip0->dst_address.as_u32, + IPSEC_SPD_POLICY_IP4_INBOUND_BYPASS); + } + + else + { + p0 = ipsec_input_policy_match ( + spd0, clib_net_to_host_u32 (ip0->src_address.as_u32), + clib_net_to_host_u32 (ip0->dst_address.as_u32), + IPSEC_SPD_POLICY_IP4_INBOUND_BYPASS); + } + + if (PREDICT_TRUE ((p0 != NULL))) + { + *ipsec_bypassed += 1; + + pi0 = p0 - im->policies; + vlib_increment_combined_counter (&ipsec_spd_policy_counters, + thread_index, pi0, 1, + clib_net_to_host_u16 (ip0->length)); + + goto trace0; + } + else + { + p0 = 0; + pi0 = ~0; + }; + if (im->fp_spd_ipv4_in_is_enabled && + PREDICT_TRUE (INDEX_INVALID != spd0->fp_spd.ip4_in_lookup_hash_idx)) + { + tuples->action = IPSEC_SPD_POLICY_IP4_INBOUND_DISCARD; + ipsec_fp_in_policy_match_n (&spd0->fp_spd, !ip_v6, tuples, policies, 1); + p0 = policies[0]; + } + else + + if (search_flow_cache) + { + p0 = ipsec4_input_spd_find_flow_cache_entry ( + im, ip0->src_address.as_u32, ip0->dst_address.as_u32, + IPSEC_SPD_POLICY_IP4_INBOUND_DISCARD); + } + + else + { + p0 = ipsec_input_policy_match ( + spd0, clib_net_to_host_u32 (ip0->src_address.as_u32), + clib_net_to_host_u32 (ip0->dst_address.as_u32), + IPSEC_SPD_POLICY_IP4_INBOUND_DISCARD); + } + + if (PREDICT_TRUE ((p0 != NULL))) + { + *ipsec_dropped += 1; + + pi0 = p0 - im->policies; + vlib_increment_combined_counter (&ipsec_spd_policy_counters, + thread_index, pi0, 1, + clib_net_to_host_u16 (ip0->length)); + + next[0] = IPSEC_INPUT_NEXT_DROP; + goto trace0; + } + else + { + p0 = 0; + pi0 = ~0; + }; + /* flow cache search failed, try again with linear search */ + if (search_flow_cache && p0 == NULL) + { + search_flow_cache = false; + goto udp_or_esp; + } + + /* Drop by default if no match on PROTECT, BYPASS or DISCARD */ + *ipsec_unprocessed += 1; + next[0] = IPSEC_INPUT_NEXT_DROP; + +trace0: + if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) && + PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED)) + { + ipsec_input_trace_t *tr = vlib_add_trace (vm, node, b[0], sizeof (*tr)); + + tr->proto = ip0->protocol; + tr->sa_id = p0 ? p0->sa_id : ~0; + tr->spi = has_space0 ? clib_net_to_host_u32 (esp0->spi) : ~0; + tr->seq = has_space0 ? clib_net_to_host_u32 (esp0->seq) : ~0; + tr->spd = spd0->id; + tr->policy_index = pi0; + } +} + always_inline ipsec_policy_t * ipsec6_input_protect_policy_match (ipsec_spd_t * spd, ip6_address_t * sa, @@ -345,9 +532,6 @@ VLIB_NODE_FN (ipsec4_input_node) (vlib_main_t * vm, ipsec_policy_t *p0 = NULL; u8 has_space0; bool search_flow_cache = false; - ipsec_policy_t *policies[1]; - ipsec_fp_5tuple_t tuples[1]; - bool ip_v6 = true; if (n_left_from > 2) { @@ -363,189 +547,37 @@ VLIB_NODE_FN (ipsec4_input_node) (vlib_main_t * vm, ip0 = vlib_buffer_get_current (b[0]); - if (PREDICT_TRUE - (ip0->protocol == IP_PROTOCOL_IPSEC_ESP - || ip0->protocol == IP_PROTOCOL_UDP)) + if (ip0->protocol == IP_PROTOCOL_UDP) { + udp_header_t *udp0 = NULL; + udp0 = (udp_header_t *) ((u8 *) ip0 + ip4_header_bytes (ip0)); - esp0 = (esp_header_t *) ((u8 *) ip0 + ip4_header_bytes (ip0)); - if (PREDICT_FALSE (ip0->protocol == IP_PROTOCOL_UDP)) - { - /* FIXME Skip, if not a UDP encapsulated packet */ - esp0 = (esp_header_t *) ((u8 *) esp0 + sizeof (udp_header_t)); - } - - // if flow cache is enabled, first search through flow cache for a - // policy match for either protect, bypass or discard rules, in that - // order. if no match is found search_flow_cache is set to false (1) - // and we revert back to linear search - search_flow_cache = im->input_flow_cache_flag; - - esp_or_udp: - if (im->fp_spd_ipv4_in_is_enabled && - PREDICT_TRUE (INDEX_INVALID != - spd0->fp_spd.ip4_in_lookup_hash_idx)) - { - ipsec_fp_in_5tuple_from_ip4_range ( - &tuples[0], ip0->src_address.as_u32, ip0->dst_address.as_u32, - clib_net_to_host_u32 (esp0->spi), - IPSEC_SPD_POLICY_IP4_INBOUND_PROTECT); - ipsec_fp_in_policy_match_n (&spd0->fp_spd, !ip_v6, tuples, - policies, 1); - p0 = policies[0]; - } - else if (search_flow_cache) // attempt to match policy in flow cache - { - p0 = ipsec4_input_spd_find_flow_cache_entry ( - im, ip0->src_address.as_u32, ip0->dst_address.as_u32, - IPSEC_SPD_POLICY_IP4_INBOUND_PROTECT); - } - - else // linear search if flow cache is not enabled, - // or flow cache search just failed - { - p0 = ipsec_input_protect_policy_match ( - spd0, clib_net_to_host_u32 (ip0->src_address.as_u32), - clib_net_to_host_u32 (ip0->dst_address.as_u32), - clib_net_to_host_u32 (esp0->spi)); - } - - has_space0 = - vlib_buffer_has_space (b[0], - (clib_address_t) (esp0 + 1) - - (clib_address_t) ip0); - - if (PREDICT_TRUE ((p0 != NULL) & (has_space0))) - { - ipsec_matched += 1; - - pi0 = p0 - im->policies; - vlib_increment_combined_counter - (&ipsec_spd_policy_counters, - thread_index, pi0, 1, clib_net_to_host_u16 (ip0->length)); - - vnet_buffer (b[0])->ipsec.sad_index = p0->sa_index; - next[0] = im->esp4_decrypt_next_index; - vlib_buffer_advance (b[0], ((u8 *) esp0 - (u8 *) ip0)); - goto trace0; - } - else - { - p0 = 0; - pi0 = ~0; - }; - - if (im->fp_spd_ipv4_in_is_enabled && - PREDICT_TRUE (INDEX_INVALID != - spd0->fp_spd.ip4_in_lookup_hash_idx)) - { - tuples->action = IPSEC_SPD_POLICY_IP4_INBOUND_BYPASS; - ipsec_fp_in_policy_match_n (&spd0->fp_spd, !ip_v6, tuples, - policies, 1); - p0 = policies[0]; - } - else if (search_flow_cache) - { - p0 = ipsec4_input_spd_find_flow_cache_entry ( - im, ip0->src_address.as_u32, ip0->dst_address.as_u32, - IPSEC_SPD_POLICY_IP4_INBOUND_BYPASS); - } - - else - { - p0 = ipsec_input_policy_match ( - spd0, clib_net_to_host_u32 (ip0->src_address.as_u32), - clib_net_to_host_u32 (ip0->dst_address.as_u32), - IPSEC_SPD_POLICY_IP4_INBOUND_BYPASS); - } - - if (PREDICT_TRUE ((p0 != NULL))) - { - ipsec_bypassed += 1; - - pi0 = p0 - im->policies; - vlib_increment_combined_counter ( - &ipsec_spd_policy_counters, thread_index, pi0, 1, - clib_net_to_host_u16 (ip0->length)); - - goto trace0; - } - else - { - p0 = 0; - pi0 = ~0; - }; - - if (im->fp_spd_ipv4_in_is_enabled && - PREDICT_TRUE (INDEX_INVALID != - spd0->fp_spd.ip4_in_lookup_hash_idx)) - { - tuples->action = IPSEC_SPD_POLICY_IP4_INBOUND_DISCARD; - ipsec_fp_in_policy_match_n (&spd0->fp_spd, !ip_v6, tuples, - policies, 1); - p0 = policies[0]; - } - else - - if (search_flow_cache) - { - p0 = ipsec4_input_spd_find_flow_cache_entry ( - im, ip0->src_address.as_u32, ip0->dst_address.as_u32, - IPSEC_SPD_POLICY_IP4_INBOUND_DISCARD); - } - - else - { - p0 = ipsec_input_policy_match ( - spd0, clib_net_to_host_u32 (ip0->src_address.as_u32), - clib_net_to_host_u32 (ip0->dst_address.as_u32), - IPSEC_SPD_POLICY_IP4_INBOUND_DISCARD); - } - - if (PREDICT_TRUE ((p0 != NULL))) - { - ipsec_dropped += 1; - - pi0 = p0 - im->policies; - vlib_increment_combined_counter ( - &ipsec_spd_policy_counters, thread_index, pi0, 1, - clib_net_to_host_u16 (ip0->length)); - - next[0] = IPSEC_INPUT_NEXT_DROP; - goto trace0; - } - else - { - p0 = 0; - pi0 = ~0; - }; - - // flow cache search failed, try again with linear search - if (search_flow_cache && p0 == NULL) - { - search_flow_cache = false; - goto esp_or_udp; - } - - /* Drop by default if no match on PROTECT, BYPASS or DISCARD */ - ipsec_unprocessed += 1; - next[0] = IPSEC_INPUT_NEXT_DROP; + /* As per rfc3948 in UDP Encapsulated Header, UDP checksum must be + * Zero, and receivers must not depen upon UPD checksum. + * inside ESP header , SPI ID value MUST NOT be a zero value + * */ - trace0: - if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) && - PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED)) + if (udp0->checksum == 0) { - ipsec_input_trace_t *tr = - vlib_add_trace (vm, node, b[0], sizeof (*tr)); + esp0 = (esp_header_t *) ((u8 *) udp0 + sizeof (udp_header_t)); - tr->proto = ip0->protocol; - tr->sa_id = p0 ? p0->sa_id : ~0; - tr->spi = has_space0 ? clib_net_to_host_u32 (esp0->spi) : ~0; - tr->seq = has_space0 ? clib_net_to_host_u32 (esp0->seq) : ~0; - tr->spd = spd0->id; - tr->policy_index = pi0; + ipsec_esp_packet_process (vm, im, ip0, esp0, thread_index, spd0, + b, node, &ipsec_bypassed, + &ipsec_dropped, &ipsec_matched, + &ipsec_unprocessed, next); + if (ipsec_bypassed > 0) + goto ipsec_bypassed; } } + else if (PREDICT_TRUE (ip0->protocol == IP_PROTOCOL_IPSEC_ESP)) + { + esp0 = (esp_header_t *) ((u8 *) ip0 + ip4_header_bytes (ip0)); + ipsec_esp_packet_process (vm, im, ip0, esp0, thread_index, spd0, b, + node, &ipsec_bypassed, &ipsec_dropped, + &ipsec_matched, &ipsec_unprocessed, next); + if (ipsec_bypassed > 0) + goto ipsec_bypassed; + } else if (ip0->protocol == IP_PROTOCOL_IPSEC_AH) { ah0 = (ah_header_t *) ((u8 *) ip0 + ip4_header_bytes (ip0)); @@ -687,6 +719,7 @@ VLIB_NODE_FN (ipsec4_input_node) (vlib_main_t * vm, } else { + ipsec_bypassed: ipsec_unprocessed += 1; } n_left_from -= 1; @@ -718,8 +751,6 @@ VLIB_NODE_FN (ipsec4_input_node) (vlib_main_t * vm, return frame->n_vectors; } - -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ipsec4_input_node) = { .name = "ipsec4-input-feature", .vector_size = sizeof (u32), @@ -734,7 +765,6 @@ VLIB_REGISTER_NODE (ipsec4_input_node) = { #undef _ }, }; -/* *INDENT-ON* */ extern vlib_node_registration_t ipsec6_input_node; @@ -916,7 +946,6 @@ VLIB_NODE_FN (ipsec6_input_node) (vlib_main_t * vm, } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ipsec6_input_node) = { .name = "ipsec6-input-feature", .vector_size = sizeof (u32), @@ -931,7 +960,6 @@ VLIB_REGISTER_NODE (ipsec6_input_node) = { #undef _ }, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/ipsec/ipsec_itf.c b/src/vnet/ipsec/ipsec_itf.c index f9c1d77a37d..b86bf6a110c 100644 --- a/src/vnet/ipsec/ipsec_itf.c +++ b/src/vnet/ipsec/ipsec_itf.c @@ -188,7 +188,6 @@ ipsec_itf_update_adj (vnet_main_t * vnm, u32 sw_if_index, adj_index_t ai) (ai, NULL, NULL, ADJ_FLAG_MIDCHAIN_IP_STACK, ipsec_itf_build_rewrite ()); } -/* *INDENT-OFF* */ VNET_DEVICE_CLASS (ipsec_itf_device_class) = { .name = "IPSEC Tunnel", .format_device_name = format_ipsec_itf_name, @@ -208,7 +207,6 @@ VNET_HW_INTERFACE_CLASS(ipsec_p2mp_hw_interface_class) = { .update_adjacency = ipsec_itf_update_adj, .flags = VNET_HW_INTERFACE_CLASS_FLAG_NBMA, }; -/* *INDENT-ON* */ /* * Maintain a bitmap of allocated ipsec_itf instance numbers. @@ -383,6 +381,7 @@ ipsec_itf_create_cli (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { unformat_input_t _line_input, *line_input = &_line_input; + tunnel_mode_t mode = TUNNEL_MODE_P2P; u32 instance, sw_if_index; clib_error_t *error; mac_address_t mac; @@ -398,6 +397,8 @@ ipsec_itf_create_cli (vlib_main_t * vm, { if (unformat (line_input, "instance %d", &instance)) ; + else if (unformat (line_input, "p2mp")) + mode = TUNNEL_MODE_MP; else { error = clib_error_return (0, "unknown input: %U", @@ -412,7 +413,7 @@ ipsec_itf_create_cli (vlib_main_t * vm, return error; } - rv = ipsec_itf_create (instance, TUNNEL_MODE_P2P, &sw_if_index); + rv = ipsec_itf_create (instance, mode, &sw_if_index); if (rv) return clib_error_return (0, "iPSec interface create failed"); @@ -427,17 +428,15 @@ ipsec_itf_create_cli (vlib_main_t * vm, * * @cliexpar * The following two command syntaxes are equivalent: - * @cliexcmd{ipsec itf create [instance <instance>]} + * @cliexcmd{ipsec itf create [instance <instance>] [p2mp]} * Example of how to create a ipsec interface: * @cliexcmd{ipsec itf create} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (ipsec_itf_create_command, static) = { .path = "ipsec itf create", - .short_help = "ipsec itf create [instance <instance>]", + .short_help = "ipsec itf create [instance <instance>] [p2mp]", .function = ipsec_itf_create_cli, }; -/* *INDENT-ON* */ static clib_error_t * ipsec_itf_delete_cli (vlib_main_t * vm, @@ -482,13 +481,11 @@ ipsec_itf_delete_cli (vlib_main_t * vm, * Example of how to create a ipsec_itf interface: * @cliexcmd{ipsec itf delete ipsec0} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (ipsec_itf_delete_command, static) = { .path = "ipsec itf delete", .short_help = "ipsec itf delete <interface>", .function = ipsec_itf_delete_cli, }; -/* *INDENT-ON* */ static clib_error_t * ipsec_interface_show (vlib_main_t * vm, @@ -496,12 +493,10 @@ ipsec_interface_show (vlib_main_t * vm, { index_t ii; - /* *INDENT-OFF* */ pool_foreach_index (ii, ipsec_itf_pool) { vlib_cli_output (vm, "%U", format_ipsec_itf, ii); } - /* *INDENT-ON* */ return NULL; } @@ -509,14 +504,12 @@ ipsec_interface_show (vlib_main_t * vm, /** * show IPSEC tunnel protection hash tables */ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (ipsec_interface_show_node, static) = { .path = "show ipsec interface", .function = ipsec_interface_show, .short_help = "show ipsec interface", }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/ipsec/ipsec_output.c b/src/vnet/ipsec/ipsec_output.c index 028d9761c07..787da9359e0 100644 --- a/src/vnet/ipsec/ipsec_output.c +++ b/src/vnet/ipsec/ipsec_output.c @@ -335,7 +335,6 @@ VLIB_NODE_FN (ipsec4_output_node) (vlib_main_t * vm, return ipsec_output_inline (vm, node, frame, 0); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ipsec4_output_node) = { .name = "ipsec4-output-feature", .vector_size = sizeof (u32), @@ -352,7 +351,6 @@ VLIB_REGISTER_NODE (ipsec4_output_node) = { #undef _ }, }; -/* *INDENT-ON* */ VLIB_NODE_FN (ipsec6_output_node) (vlib_main_t * vm, vlib_node_runtime_t * node, diff --git a/src/vnet/ipsec/ipsec_sa.c b/src/vnet/ipsec/ipsec_sa.c index a330abcb244..1d5195ec793 100644 --- a/src/vnet/ipsec/ipsec_sa.c +++ b/src/vnet/ipsec/ipsec_sa.c @@ -13,12 +13,14 @@ * limitations under the License. */ +#include <sys/random.h> #include <vnet/ipsec/ipsec.h> #include <vnet/ipsec/esp.h> #include <vnet/udp/udp_local.h> #include <vnet/fib/fib_table.h> #include <vnet/fib/fib_entry_track.h> #include <vnet/ipsec/ipsec_tun.h> +#include <vnet/ipsec/ipsec.api_enum.h> /** * @brief @@ -28,10 +30,8 @@ vlib_combined_counter_main_t ipsec_sa_counters = { .name = "SA", .stat_segment_name = "/net/ipsec/sa", }; -vlib_simple_counter_main_t ipsec_sa_lost_counters = { - .name = "SA-lost", - .stat_segment_name = "/net/ipsec/sa/lost", -}; +/* Per-SA error counters */ +vlib_simple_counter_main_t ipsec_sa_err_counters[IPSEC_SA_N_ERRORS]; ipsec_sa_t *ipsec_sa_pool; @@ -93,14 +93,35 @@ ipsec_sa_stack (ipsec_sa_t * sa) } void +ipsec_sa_set_async_mode (ipsec_sa_t *sa, int is_enabled) +{ + if (is_enabled) + { + sa->crypto_key_index = sa->crypto_async_key_index; + sa->crypto_enc_op_id = sa->crypto_async_enc_op_id; + sa->crypto_dec_op_id = sa->crypto_async_dec_op_id; + sa->integ_key_index = ~0; + sa->integ_op_id = ~0; + } + else + { + sa->crypto_key_index = sa->crypto_sync_key_index; + sa->crypto_enc_op_id = sa->crypto_sync_enc_op_id; + sa->crypto_dec_op_id = sa->crypto_sync_dec_op_id; + sa->integ_key_index = sa->integ_sync_key_index; + sa->integ_op_id = sa->integ_sync_op_id; + } +} + +void ipsec_sa_set_crypto_alg (ipsec_sa_t * sa, ipsec_crypto_alg_t crypto_alg) { ipsec_main_t *im = &ipsec_main; sa->crypto_alg = crypto_alg; sa->crypto_iv_size = im->crypto_algs[crypto_alg].iv_size; sa->esp_block_align = clib_max (4, im->crypto_algs[crypto_alg].block_align); - sa->sync_op_data.crypto_enc_op_id = im->crypto_algs[crypto_alg].enc_op_id; - sa->sync_op_data.crypto_dec_op_id = im->crypto_algs[crypto_alg].dec_op_id; + sa->crypto_sync_enc_op_id = im->crypto_algs[crypto_alg].enc_op_id; + sa->crypto_sync_dec_op_id = im->crypto_algs[crypto_alg].dec_op_id; sa->crypto_calg = im->crypto_algs[crypto_alg].alg; ASSERT (sa->crypto_iv_size <= ESP_MAX_IV_SIZE); ASSERT (sa->esp_block_align <= ESP_MAX_BLOCK_SIZE); @@ -115,6 +136,13 @@ ipsec_sa_set_crypto_alg (ipsec_sa_t * sa, ipsec_crypto_alg_t crypto_alg) { ipsec_sa_set_IS_CTR (sa); } + else if (IPSEC_CRYPTO_ALG_IS_NULL_GMAC (crypto_alg)) + { + sa->integ_icv_size = im->crypto_algs[crypto_alg].icv_size; + ipsec_sa_set_IS_CTR (sa); + ipsec_sa_set_IS_AEAD (sa); + ipsec_sa_set_IS_NULL_GMAC (sa); + } } void @@ -123,7 +151,7 @@ ipsec_sa_set_integ_alg (ipsec_sa_t * sa, ipsec_integ_alg_t integ_alg) ipsec_main_t *im = &ipsec_main; sa->integ_alg = integ_alg; sa->integ_icv_size = im->integ_algs[integ_alg].icv_size; - sa->sync_op_data.integ_op_id = im->integ_algs[integ_alg].op_id; + sa->integ_sync_op_id = im->integ_algs[integ_alg].op_id; sa->integ_calg = im->integ_algs[integ_alg].alg; ASSERT (sa->integ_icv_size <= ESP_MAX_ICV_SIZE); } @@ -131,44 +159,167 @@ ipsec_sa_set_integ_alg (ipsec_sa_t * sa, ipsec_integ_alg_t integ_alg) void ipsec_sa_set_async_op_ids (ipsec_sa_t * sa) { - /* *INDENT-OFF* */ if (ipsec_sa_is_set_USE_ESN (sa)) { -#define _(n, s, k) \ - if( sa->sync_op_data.crypto_enc_op_id == VNET_CRYPTO_OP_##n##_ENC ) \ - sa->async_op_data.crypto_async_enc_op_id = \ - VNET_CRYPTO_OP_##n##_TAG16_AAD12_ENC; \ - if( sa->sync_op_data.crypto_dec_op_id == VNET_CRYPTO_OP_##n##_DEC ) \ - sa->async_op_data.crypto_async_dec_op_id = \ - VNET_CRYPTO_OP_##n##_TAG16_AAD12_DEC; - foreach_crypto_aead_alg +#define _(n, s, k) \ + if (sa->crypto_sync_enc_op_id == VNET_CRYPTO_OP_##n##_ENC) \ + sa->crypto_async_enc_op_id = VNET_CRYPTO_OP_##n##_TAG16_AAD12_ENC; \ + if (sa->crypto_sync_dec_op_id == VNET_CRYPTO_OP_##n##_DEC) \ + sa->crypto_async_dec_op_id = VNET_CRYPTO_OP_##n##_TAG16_AAD12_DEC; + foreach_crypto_aead_alg #undef _ } else { -#define _(n, s, k) \ - if( sa->sync_op_data.crypto_enc_op_id == VNET_CRYPTO_OP_##n##_ENC ) \ - sa->async_op_data.crypto_async_enc_op_id = \ - VNET_CRYPTO_OP_##n##_TAG16_AAD8_ENC; \ - if( sa->sync_op_data.crypto_dec_op_id == VNET_CRYPTO_OP_##n##_DEC ) \ - sa->async_op_data.crypto_async_dec_op_id = \ - VNET_CRYPTO_OP_##n##_TAG16_AAD8_DEC; - foreach_crypto_aead_alg +#define _(n, s, k) \ + if (sa->crypto_sync_enc_op_id == VNET_CRYPTO_OP_##n##_ENC) \ + sa->crypto_async_enc_op_id = VNET_CRYPTO_OP_##n##_TAG16_AAD8_ENC; \ + if (sa->crypto_sync_dec_op_id == VNET_CRYPTO_OP_##n##_DEC) \ + sa->crypto_async_dec_op_id = VNET_CRYPTO_OP_##n##_TAG16_AAD8_DEC; + foreach_crypto_aead_alg #undef _ } -#define _(c, h, s, k ,d) \ - if( sa->sync_op_data.crypto_enc_op_id == VNET_CRYPTO_OP_##c##_ENC && \ - sa->sync_op_data.integ_op_id == VNET_CRYPTO_OP_##h##_HMAC) \ - sa->async_op_data.crypto_async_enc_op_id = \ - VNET_CRYPTO_OP_##c##_##h##_TAG##d##_ENC; \ - if( sa->sync_op_data.crypto_dec_op_id == VNET_CRYPTO_OP_##c##_DEC && \ - sa->sync_op_data.integ_op_id == VNET_CRYPTO_OP_##h##_HMAC) \ - sa->async_op_data.crypto_async_dec_op_id = \ - VNET_CRYPTO_OP_##c##_##h##_TAG##d##_DEC; +#define _(c, h, s, k, d) \ + if (sa->crypto_sync_enc_op_id == VNET_CRYPTO_OP_##c##_ENC && \ + sa->integ_sync_op_id == VNET_CRYPTO_OP_##h##_HMAC) \ + sa->crypto_async_enc_op_id = VNET_CRYPTO_OP_##c##_##h##_TAG##d##_ENC; \ + if (sa->crypto_sync_dec_op_id == VNET_CRYPTO_OP_##c##_DEC && \ + sa->integ_sync_op_id == VNET_CRYPTO_OP_##h##_HMAC) \ + sa->crypto_async_dec_op_id = VNET_CRYPTO_OP_##c##_##h##_TAG##d##_DEC; foreach_crypto_link_async_alg #undef _ - /* *INDENT-ON* */ +} + +int +ipsec_sa_update (u32 id, u16 src_port, u16 dst_port, const tunnel_t *tun, + bool is_tun) +{ + ipsec_main_t *im = &ipsec_main; + ipsec_sa_t *sa; + u32 sa_index; + uword *p; + int rv; + + p = hash_get (im->sa_index_by_sa_id, id); + if (!p) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + sa = ipsec_sa_get (p[0]); + sa_index = sa - ipsec_sa_pool; + + if (is_tun && ipsec_sa_is_set_IS_TUNNEL (sa) && + (ip_address_cmp (&tun->t_src, &sa->tunnel.t_src) != 0 || + ip_address_cmp (&tun->t_dst, &sa->tunnel.t_dst) != 0)) + { + /* if the source IP is updated for an inbound SA under a tunnel protect, + we need to update the tun_protect DB with the new src IP */ + if (ipsec_sa_is_set_IS_INBOUND (sa) && + ip_address_cmp (&tun->t_src, &sa->tunnel.t_src) != 0 && + !ip46_address_is_zero (&tun->t_src.ip)) + { + if (ip46_address_is_ip4 (&sa->tunnel.t_src.ip)) + { + ipsec4_tunnel_kv_t old_key, new_key; + clib_bihash_kv_8_16_t res, + *bkey = (clib_bihash_kv_8_16_t *) &old_key; + + ipsec4_tunnel_mk_key (&old_key, &sa->tunnel.t_src.ip.ip4, + clib_host_to_net_u32 (sa->spi)); + ipsec4_tunnel_mk_key (&new_key, &tun->t_src.ip.ip4, + clib_host_to_net_u32 (sa->spi)); + + if (!clib_bihash_search_8_16 (&im->tun4_protect_by_key, bkey, + &res)) + { + clib_bihash_add_del_8_16 (&im->tun4_protect_by_key, &res, 0); + res.key = new_key.key; + clib_bihash_add_del_8_16 (&im->tun4_protect_by_key, &res, 1); + } + } + else + { + ipsec6_tunnel_kv_t old_key = { + .key = { + .remote_ip = sa->tunnel.t_src.ip.ip6, + .spi = clib_host_to_net_u32 (sa->spi), + }, + }, new_key = { + .key = { + .remote_ip = tun->t_src.ip.ip6, + .spi = clib_host_to_net_u32 (sa->spi), + }}; + clib_bihash_kv_24_16_t res, + *bkey = (clib_bihash_kv_24_16_t *) &old_key; + + if (!clib_bihash_search_24_16 (&im->tun6_protect_by_key, bkey, + &res)) + { + clib_bihash_add_del_24_16 (&im->tun6_protect_by_key, &res, + 0); + clib_memcpy (&res.key, &new_key.key, 3); + clib_bihash_add_del_24_16 (&im->tun6_protect_by_key, &res, + 1); + } + } + } + tunnel_unresolve (&sa->tunnel); + tunnel_copy (tun, &sa->tunnel); + if (!ipsec_sa_is_set_IS_INBOUND (sa)) + { + dpo_reset (&sa->dpo); + + sa->tunnel_flags = sa->tunnel.t_encap_decap_flags; + + rv = tunnel_resolve (&sa->tunnel, FIB_NODE_TYPE_IPSEC_SA, sa_index); + + if (rv) + { + hash_unset (im->sa_index_by_sa_id, sa->id); + pool_put (ipsec_sa_pool, sa); + return rv; + } + ipsec_sa_stack (sa); + /* generate header templates */ + if (ipsec_sa_is_set_IS_TUNNEL_V6 (sa)) + { + tunnel_build_v6_hdr (&sa->tunnel, + (ipsec_sa_is_set_UDP_ENCAP (sa) ? + IP_PROTOCOL_UDP : + IP_PROTOCOL_IPSEC_ESP), + &sa->ip6_hdr); + } + else + { + tunnel_build_v4_hdr (&sa->tunnel, + (ipsec_sa_is_set_UDP_ENCAP (sa) ? + IP_PROTOCOL_UDP : + IP_PROTOCOL_IPSEC_ESP), + &sa->ip4_hdr); + } + } + } + + if (ipsec_sa_is_set_UDP_ENCAP (sa)) + { + if (dst_port != IPSEC_UDP_PORT_NONE && + dst_port != clib_net_to_host_u16 (sa->udp_hdr.dst_port)) + { + if (ipsec_sa_is_set_IS_INBOUND (sa)) + { + ipsec_unregister_udp_port ( + clib_net_to_host_u16 (sa->udp_hdr.dst_port), + !ipsec_sa_is_set_IS_TUNNEL_V6 (sa)); + ipsec_register_udp_port (dst_port, + !ipsec_sa_is_set_IS_TUNNEL_V6 (sa)); + } + sa->udp_hdr.dst_port = clib_host_to_net_u16 (dst_port); + } + if (src_port != IPSEC_UDP_PORT_NONE && + src_port != clib_net_to_host_u16 (sa->udp_hdr.src_port)) + sa->udp_hdr.src_port = clib_host_to_net_u16 (src_port); + } + return (0); } int @@ -176,13 +327,15 @@ ipsec_sa_add_and_lock (u32 id, u32 spi, ipsec_protocol_t proto, ipsec_crypto_alg_t crypto_alg, const ipsec_key_t *ck, ipsec_integ_alg_t integ_alg, const ipsec_key_t *ik, ipsec_sa_flags_t flags, u32 salt, u16 src_port, - u16 dst_port, const tunnel_t *tun, u32 *sa_out_index) + u16 dst_port, u32 anti_replay_window_size, + const tunnel_t *tun, u32 *sa_out_index) { vlib_main_t *vm = vlib_get_main (); ipsec_main_t *im = &ipsec_main; clib_error_t *err; ipsec_sa_t *sa; u32 sa_index; + u64 rand[2]; uword *p; int rv; @@ -190,16 +343,24 @@ ipsec_sa_add_and_lock (u32 id, u32 spi, ipsec_protocol_t proto, if (p) return VNET_API_ERROR_ENTRY_ALREADY_EXISTS; + if (getrandom (rand, sizeof (rand), 0) != sizeof (rand)) + return VNET_API_ERROR_INIT_FAILED; + pool_get_aligned_zero (ipsec_sa_pool, sa, CLIB_CACHE_LINE_BYTES); + clib_pcg64i_srandom_r (&sa->iv_prng, rand[0], rand[1]); + fib_node_init (&sa->node, FIB_NODE_TYPE_IPSEC_SA); fib_node_lock (&sa->node); sa_index = sa - ipsec_sa_pool; vlib_validate_combined_counter (&ipsec_sa_counters, sa_index); vlib_zero_combined_counter (&ipsec_sa_counters, sa_index); - vlib_validate_simple_counter (&ipsec_sa_lost_counters, sa_index); - vlib_zero_simple_counter (&ipsec_sa_lost_counters, sa_index); + for (int i = 0; i < IPSEC_SA_N_ERRORS; i++) + { + vlib_validate_simple_counter (&ipsec_sa_err_counters[i], sa_index); + vlib_zero_simple_counter (&ipsec_sa_err_counters[i], sa_index); + } tunnel_copy (tun, &sa->tunnel); sa->id = id; @@ -217,12 +378,14 @@ ipsec_sa_add_and_lock (u32 id, u32 spi, ipsec_protocol_t proto, ipsec_sa_set_crypto_alg (sa, crypto_alg); ipsec_sa_set_async_op_ids (sa); + if (ipsec_sa_is_set_USE_ANTI_REPLAY (sa) && anti_replay_window_size > 64) + ipsec_sa_set_ANTI_REPLAY_HUGE (sa); + clib_memcpy (&sa->crypto_key, ck, sizeof (sa->crypto_key)); - sa->crypto_key_index = vnet_crypto_key_add (vm, - im->crypto_algs[crypto_alg].alg, - (u8 *) ck->data, ck->len); - if (~0 == sa->crypto_key_index) + sa->crypto_sync_key_index = vnet_crypto_key_add ( + vm, im->crypto_algs[crypto_alg].alg, (u8 *) ck->data, ck->len); + if (~0 == sa->crypto_sync_key_index) { pool_put (ipsec_sa_pool, sa); return VNET_API_ERROR_KEY_LENGTH; @@ -230,42 +393,39 @@ ipsec_sa_add_and_lock (u32 id, u32 spi, ipsec_protocol_t proto, if (integ_alg != IPSEC_INTEG_ALG_NONE) { - sa->integ_key_index = vnet_crypto_key_add (vm, - im-> - integ_algs[integ_alg].alg, - (u8 *) ik->data, ik->len); - if (~0 == sa->integ_key_index) + sa->integ_sync_key_index = vnet_crypto_key_add ( + vm, im->integ_algs[integ_alg].alg, (u8 *) ik->data, ik->len); + if (~0 == sa->integ_sync_key_index) { pool_put (ipsec_sa_pool, sa); return VNET_API_ERROR_KEY_LENGTH; } } - if (sa->async_op_data.crypto_async_enc_op_id && - !ipsec_sa_is_set_IS_AEAD (sa)) - { //AES-CBC & HMAC - sa->async_op_data.linked_key_index = - vnet_crypto_key_add_linked (vm, sa->crypto_key_index, - sa->integ_key_index); - } + if (sa->crypto_async_enc_op_id && !ipsec_sa_is_set_IS_AEAD (sa)) + sa->crypto_async_key_index = + vnet_crypto_key_add_linked (vm, sa->crypto_sync_key_index, + sa->integ_sync_key_index); // AES-CBC & HMAC + else + sa->crypto_async_key_index = sa->crypto_sync_key_index; if (im->async_mode) - sa->crypto_op_data = sa->async_op_data.data; + { + ipsec_sa_set_async_mode (sa, 1); + } + else if (ipsec_sa_is_set_IS_ASYNC (sa)) + { + ipsec_sa_set_async_mode (sa, 1 /* is_enabled */); + } else { - if (ipsec_sa_is_set_IS_ASYNC (sa)) - { - vnet_crypto_request_async_mode (1); - sa->crypto_op_data = sa->async_op_data.data; - } - else - sa->crypto_op_data = sa->sync_op_data.data; + ipsec_sa_set_async_mode (sa, 0 /* is_enabled */); } err = ipsec_check_support_cb (im, sa); if (err) { - clib_warning ("%s", err->what); + clib_warning ("%v", err->what); pool_put (ipsec_sa_pool, sa); return VNET_API_ERROR_UNIMPLEMENTED; } @@ -330,6 +490,18 @@ ipsec_sa_add_and_lock (u32 id, u32 spi, ipsec_protocol_t proto, !ipsec_sa_is_set_IS_TUNNEL_V6 (sa)); } + /* window size rounded up to next power of 2 */ + if (ipsec_sa_is_set_ANTI_REPLAY_HUGE (sa)) + { + anti_replay_window_size = 1 << max_log2 (anti_replay_window_size); + sa->replay_window_huge = + clib_bitmap_set_region (0, 0, 1, anti_replay_window_size); + } + else + { + sa->replay_window = ~0; + } + hash_set (im->sa_index_by_sa_id, sa->id, sa_index); if (sa_out_index) @@ -353,19 +525,51 @@ ipsec_sa_del (ipsec_sa_t * sa) (void) ipsec_call_add_del_callbacks (im, sa, sa_index, 0); if (ipsec_sa_is_set_IS_ASYNC (sa)) - vnet_crypto_request_async_mode (0); + { + if (!ipsec_sa_is_set_IS_AEAD (sa)) + vnet_crypto_key_del (vm, sa->crypto_async_key_index); + } + if (ipsec_sa_is_set_UDP_ENCAP (sa) && ipsec_sa_is_set_IS_INBOUND (sa)) ipsec_unregister_udp_port (clib_net_to_host_u16 (sa->udp_hdr.dst_port), !ipsec_sa_is_set_IS_TUNNEL_V6 (sa)); if (ipsec_sa_is_set_IS_TUNNEL (sa) && !ipsec_sa_is_set_IS_INBOUND (sa)) dpo_reset (&sa->dpo); - vnet_crypto_key_del (vm, sa->crypto_key_index); + vnet_crypto_key_del (vm, sa->crypto_sync_key_index); if (sa->integ_alg != IPSEC_INTEG_ALG_NONE) - vnet_crypto_key_del (vm, sa->integ_key_index); + vnet_crypto_key_del (vm, sa->integ_sync_key_index); + if (ipsec_sa_is_set_ANTI_REPLAY_HUGE (sa)) + clib_bitmap_free (sa->replay_window_huge); pool_put (ipsec_sa_pool, sa); } +int +ipsec_sa_bind (u32 id, u32 worker, bool bind) +{ + ipsec_main_t *im = &ipsec_main; + uword *p; + ipsec_sa_t *sa; + + p = hash_get (im->sa_index_by_sa_id, id); + if (!p) + return VNET_API_ERROR_INVALID_VALUE; + + sa = ipsec_sa_get (p[0]); + + if (!bind) + { + sa->thread_index = ~0; + return 0; + } + + if (worker >= vlib_num_workers ()) + return VNET_API_ERROR_INVALID_WORKER; + + sa->thread_index = vlib_get_worker_thread_index (worker); + return 0; +} + void ipsec_sa_unlock (index_t sai) { @@ -431,7 +635,8 @@ void ipsec_sa_clear (index_t sai) { vlib_zero_combined_counter (&ipsec_sa_counters, sai); - vlib_zero_simple_counter (&ipsec_sa_lost_counters, sai); + for (int i = 0; i < IPSEC_SA_N_ERRORS; i++) + vlib_zero_simple_counter (&ipsec_sa_err_counters[i], sai); } void @@ -439,13 +644,11 @@ ipsec_sa_walk (ipsec_sa_walk_cb_t cb, void *ctx) { ipsec_sa_t *sa; - /* *INDENT-OFF* */ pool_foreach (sa, ipsec_sa_pool) { if (WALK_CONTINUE != cb (sa, ctx)) break; } - /* *INDENT-ON* */ } /** @@ -462,19 +665,18 @@ ipsec_sa_fib_node_get (fib_node_index_t index) } static ipsec_sa_t * -ipsec_sa_from_fib_node (fib_node_t * node) +ipsec_sa_from_fib_node (fib_node_t *node) { ASSERT (FIB_NODE_TYPE_IPSEC_SA == node->fn_type); - return ((ipsec_sa_t *) (((char *) node) - - STRUCT_OFFSET_OF (ipsec_sa_t, node))); - + return ( + (ipsec_sa_t *) (((char *) node) - STRUCT_OFFSET_OF (ipsec_sa_t, node))); } /** * Function definition to inform the FIB node that its last lock has gone. */ static void -ipsec_sa_last_lock_gone (fib_node_t * node) +ipsec_sa_last_lock_gone (fib_node_t *node) { /* * The ipsec SA is a root of the graph. As such @@ -487,7 +689,7 @@ ipsec_sa_last_lock_gone (fib_node_t * node) * Function definition to backwalk a FIB node */ static fib_node_back_walk_rc_t -ipsec_sa_back_walk (fib_node_t * node, fib_node_back_walk_ctx_t * ctx) +ipsec_sa_back_walk (fib_node_t *node, fib_node_back_walk_ctx_t *ctx) { ipsec_sa_stack (ipsec_sa_from_fib_node (node)); @@ -504,16 +706,24 @@ const static fib_node_vft_t ipsec_sa_vft = { .fnv_back_walk = ipsec_sa_back_walk, }; -/* force inclusion from application's main.c */ +/* Init per-SA error counters and node type */ clib_error_t * -ipsec_sa_interface_init (vlib_main_t * vm) +ipsec_sa_init (vlib_main_t *vm) { fib_node_register_type (FIB_NODE_TYPE_IPSEC_SA, &ipsec_sa_vft); - return 0; +#define _(index, val, err, desc) \ + ipsec_sa_err_counters[index].name = \ + (char *) format (0, "SA-" #err "%c", 0); \ + ipsec_sa_err_counters[index].stat_segment_name = \ + (char *) format (0, "/net/ipsec/sa/err/" #err "%c", 0); \ + ipsec_sa_err_counters[index].counters = 0; + foreach_ipsec_sa_err +#undef _ + return 0; } -VLIB_INIT_FUNCTION (ipsec_sa_interface_init); +VLIB_INIT_FUNCTION (ipsec_sa_init); /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/ipsec/ipsec_sa.h b/src/vnet/ipsec/ipsec_sa.h index 057e8cd9bff..4f73f1eab0f 100644 --- a/src/vnet/ipsec/ipsec_sa.h +++ b/src/vnet/ipsec/ipsec_sa.h @@ -16,11 +16,16 @@ #define __IPSEC_SPD_SA_H__ #include <vlib/vlib.h> +#include <vppinfra/pcg.h> #include <vnet/crypto/crypto.h> #include <vnet/ip/ip.h> #include <vnet/fib/fib_node.h> #include <vnet/tunnel/tunnel.h> +#define ESP_MAX_ICV_SIZE (32) +#define ESP_MAX_IV_SIZE (16) +#define ESP_MAX_BLOCK_SIZE (16) + #define foreach_ipsec_crypto_alg \ _ (0, NONE, "none") \ _ (1, AES_CBC_128, "aes-cbc-128") \ @@ -34,7 +39,10 @@ _ (9, AES_GCM_256, "aes-gcm-256") \ _ (10, DES_CBC, "des-cbc") \ _ (11, 3DES_CBC, "3des-cbc") \ - _ (12, CHACHA20_POLY1305, "chacha20-poly1305") + _ (12, CHACHA20_POLY1305, "chacha20-poly1305") \ + _ (13, AES_NULL_GMAC_128, "aes-null-gmac-128") \ + _ (14, AES_NULL_GMAC_192, "aes-null-gmac-192") \ + _ (15, AES_NULL_GMAC_256, "aes-null-gmac-256") typedef enum { @@ -44,6 +52,11 @@ typedef enum IPSEC_CRYPTO_N_ALG, } __clib_packed ipsec_crypto_alg_t; +#define IPSEC_CRYPTO_ALG_IS_NULL_GMAC(_alg) \ + ((_alg == IPSEC_CRYPTO_ALG_AES_NULL_GMAC_128) || \ + (_alg == IPSEC_CRYPTO_ALG_AES_NULL_GMAC_192) || \ + (_alg == IPSEC_CRYPTO_ALG_AES_NULL_GMAC_256)) + #define IPSEC_CRYPTO_ALG_IS_GCM(_alg) \ (((_alg == IPSEC_CRYPTO_ALG_AES_GCM_128) || \ (_alg == IPSEC_CRYPTO_ALG_AES_GCM_192) || \ @@ -107,7 +120,9 @@ typedef struct ipsec_key_t_ _ (128, IS_AEAD, "aead") \ _ (256, IS_CTR, "ctr") \ _ (512, IS_ASYNC, "async") \ - _ (1024, NO_ALGO_NO_DROP, "no-algo-no-drop") + _ (1024, NO_ALGO_NO_DROP, "no-algo-no-drop") \ + _ (2048, IS_NULL_GMAC, "null-gmac") \ + _ (4096, ANTI_REPLAY_HUGE, "anti-replay-huge") typedef enum ipsec_sad_flags_t_ { @@ -118,51 +133,64 @@ typedef enum ipsec_sad_flags_t_ STATIC_ASSERT (sizeof (ipsec_sa_flags_t) == 2, "IPSEC SA flags != 2 byte"); +#define foreach_ipsec_sa_err \ + _ (0, LOST, lost, "packets lost") \ + _ (1, HANDOFF, handoff, "hand-off") \ + _ (2, INTEG_ERROR, integ_error, "Integrity check failed") \ + _ (3, DECRYPTION_FAILED, decryption_failed, "Decryption failed") \ + _ (4, CRYPTO_ENGINE_ERROR, crypto_engine_error, \ + "crypto engine error (dropped)") \ + _ (5, REPLAY, replay, "SA replayed packet") \ + _ (6, RUNT, runt, "undersized packet") \ + _ (7, NO_BUFFERS, no_buffers, "no buffers (dropped)") \ + _ (8, OVERSIZED_HEADER, oversized_header, \ + "buffer with oversized header (dropped)") \ + _ (9, NO_TAIL_SPACE, no_tail_space, \ + "no enough buffer tail space (dropped)") \ + _ (10, TUN_NO_PROTO, tun_no_proto, "no tunnel protocol") \ + _ (11, UNSUP_PAYLOAD, unsup_payload, "unsupported payload") \ + _ (12, SEQ_CYCLED, seq_cycled, "sequence number cycled (dropped)") \ + _ (13, CRYPTO_QUEUE_FULL, crypto_queue_full, "crypto queue full (dropped)") \ + _ (14, NO_ENCRYPTION, no_encryption, "no Encrypting SA (dropped)") \ + _ (15, DROP_FRAGMENTS, drop_fragments, "IP fragments drop") + +typedef enum +{ +#define _(v, f, s, d) IPSEC_SA_ERROR_##f = v, + foreach_ipsec_sa_err +#undef _ + IPSEC_SA_N_ERRORS, +} __clib_packed ipsec_sa_err_t; + typedef struct { CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); - /* flags */ - ipsec_sa_flags_t flags; - - u8 crypto_iv_size; - u8 esp_block_align; - u8 integ_icv_size; + clib_pcg64i_random_t iv_prng; - u8 __pad1[3]; - - u32 thread_index; - - u32 spi; - u32 seq; - u32 seq_hi; - u64 replay_window; - u64 ctr_iv_counter; + union + { + u64 replay_window; + clib_bitmap_t *replay_window_huge; + }; dpo_id_t dpo; vnet_crypto_key_index_t crypto_key_index; vnet_crypto_key_index_t integ_key_index; - /* Union data shared by sync and async ops, updated when mode is - * changed. */ - union - { - struct - { - vnet_crypto_op_id_t crypto_enc_op_id:16; - vnet_crypto_op_id_t crypto_dec_op_id:16; - vnet_crypto_op_id_t integ_op_id:16; - }; + u32 spi; + u32 seq; + u32 seq_hi; - struct - { - vnet_crypto_async_op_id_t crypto_async_enc_op_id:16; - vnet_crypto_async_op_id_t crypto_async_dec_op_id:16; - vnet_crypto_key_index_t linked_key_index; - }; + u16 crypto_enc_op_id; + u16 crypto_dec_op_id; + u16 integ_op_id; + ipsec_sa_flags_t flags; + u16 thread_index; - u64 crypto_op_data; - }; + u16 integ_icv_size : 6; + u16 crypto_iv_size : 5; + u16 esp_block_align : 5; CLIB_CACHE_LINE_ALIGN_MARK (cacheline1); @@ -184,30 +212,7 @@ typedef struct CLIB_CACHE_LINE_ALIGN_MARK (cacheline2); /* Elements with u64 size multiples */ - union - { - struct - { - vnet_crypto_op_id_t crypto_enc_op_id:16; - vnet_crypto_op_id_t crypto_dec_op_id:16; - vnet_crypto_op_id_t integ_op_id:16; - }; - u64 data; - } sync_op_data; - - union - { - struct - { - vnet_crypto_async_op_id_t crypto_async_enc_op_id:16; - vnet_crypto_async_op_id_t crypto_async_dec_op_id:16; - vnet_crypto_key_index_t linked_key_index; - }; - u64 data; - } async_op_data; - tunnel_t tunnel; - fib_node_t node; /* elements with u32 size */ @@ -215,6 +220,16 @@ typedef struct u32 stat_index; vnet_crypto_alg_t integ_calg; vnet_crypto_alg_t crypto_calg; + u32 crypto_sync_key_index; + u32 integ_sync_key_index; + u32 crypto_async_key_index; + + /* elements with u16 size */ + u16 crypto_sync_enc_op_id; + u16 crypto_sync_dec_op_id; + u16 integ_sync_op_id; + u16 crypto_async_enc_op_id; + u16 crypto_async_dec_op_id; /* else u8 packed */ ipsec_crypto_alg_t crypto_alg; @@ -224,6 +239,10 @@ typedef struct ipsec_key_t crypto_key; } ipsec_sa_t; +STATIC_ASSERT (VNET_CRYPTO_N_OP_IDS < (1 << 16), "crypto ops overflow"); +STATIC_ASSERT (ESP_MAX_ICV_SIZE < (1 << 6), "integer icv overflow"); +STATIC_ASSERT (ESP_MAX_IV_SIZE < (1 << 5), "esp iv overflow"); +STATIC_ASSERT (ESP_MAX_BLOCK_SIZE < (1 << 5), "esp alignment overflow"); STATIC_ASSERT_OFFSET_OF (ipsec_sa_t, cacheline1, CLIB_CACHE_LINE_BYTES); STATIC_ASSERT_OFFSET_OF (ipsec_sa_t, cacheline2, 2 * CLIB_CACHE_LINE_BYTES); @@ -240,90 +259,149 @@ STATIC_ASSERT (STRUCT_OFFSET_OF (vnet_buffer_opaque_t, ipsec.sad_index) == STRUCT_OFFSET_OF (vnet_buffer_opaque_t, ip.save_protocol), "IPSec data is overlapping with IP data"); -#define _(a,v,s) \ - always_inline int \ - ipsec_sa_is_set_##v (const ipsec_sa_t *sa) { \ - return (sa->flags & IPSEC_SA_FLAG_##v); \ +#define _(a, v, s) \ + always_inline bool ipsec_sa_is_set_##v (const ipsec_sa_t *sa) \ + { \ + return (sa->flags & IPSEC_SA_FLAG_##v); \ } foreach_ipsec_sa_flags #undef _ -#define _(a,v,s) \ - always_inline int \ - ipsec_sa_set_##v (ipsec_sa_t *sa) { \ - return (sa->flags |= IPSEC_SA_FLAG_##v); \ +#define _(a, v, s) \ + always_inline void ipsec_sa_set_##v (ipsec_sa_t *sa) \ + { \ + sa->flags |= IPSEC_SA_FLAG_##v; \ } foreach_ipsec_sa_flags #undef _ -#define _(a,v,s) \ - always_inline int \ - ipsec_sa_unset_##v (ipsec_sa_t *sa) { \ - return (sa->flags &= ~IPSEC_SA_FLAG_##v); \ +#define _(a, v, s) \ + always_inline int ipsec_sa_unset_##v (ipsec_sa_t *sa) \ + { \ + return (sa->flags &= ~IPSEC_SA_FLAG_##v); \ } - foreach_ipsec_sa_flags + foreach_ipsec_sa_flags #undef _ -/** - * @brief - * SA packet & bytes counters - */ -extern vlib_combined_counter_main_t ipsec_sa_counters; -extern vlib_simple_counter_main_t ipsec_sa_lost_counters; - -extern void ipsec_mk_key (ipsec_key_t * key, const u8 * data, u8 len); - -extern int -ipsec_sa_add_and_lock (u32 id, u32 spi, ipsec_protocol_t proto, - ipsec_crypto_alg_t crypto_alg, const ipsec_key_t *ck, - ipsec_integ_alg_t integ_alg, const ipsec_key_t *ik, - ipsec_sa_flags_t flags, u32 salt, u16 src_port, - u16 dst_port, const tunnel_t *tun, u32 *sa_out_index); + /** + * @brief + * SA packet & bytes counters + */ + extern vlib_combined_counter_main_t ipsec_sa_counters; +extern vlib_simple_counter_main_t ipsec_sa_err_counters[IPSEC_SA_N_ERRORS]; + +extern void ipsec_mk_key (ipsec_key_t *key, const u8 *data, u8 len); + +extern int ipsec_sa_update (u32 id, u16 src_port, u16 dst_port, + const tunnel_t *tun, bool is_tun); +extern int ipsec_sa_add_and_lock ( + u32 id, u32 spi, ipsec_protocol_t proto, ipsec_crypto_alg_t crypto_alg, + const ipsec_key_t *ck, ipsec_integ_alg_t integ_alg, const ipsec_key_t *ik, + ipsec_sa_flags_t flags, u32 salt, u16 src_port, u16 dst_port, + u32 anti_replay_window_size, const tunnel_t *tun, u32 *sa_out_index); +extern int ipsec_sa_bind (u32 id, u32 worker, bool bind); extern index_t ipsec_sa_find_and_lock (u32 id); extern int ipsec_sa_unlock_id (u32 id); extern void ipsec_sa_unlock (index_t sai); extern void ipsec_sa_lock (index_t sai); extern void ipsec_sa_clear (index_t sai); -extern void ipsec_sa_set_crypto_alg (ipsec_sa_t * sa, +extern void ipsec_sa_set_crypto_alg (ipsec_sa_t *sa, ipsec_crypto_alg_t crypto_alg); -extern void ipsec_sa_set_integ_alg (ipsec_sa_t * sa, +extern void ipsec_sa_set_integ_alg (ipsec_sa_t *sa, ipsec_integ_alg_t integ_alg); +extern void ipsec_sa_set_async_mode (ipsec_sa_t *sa, int is_enabled); -typedef walk_rc_t (*ipsec_sa_walk_cb_t) (ipsec_sa_t * sa, void *ctx); +typedef walk_rc_t (*ipsec_sa_walk_cb_t) (ipsec_sa_t *sa, void *ctx); extern void ipsec_sa_walk (ipsec_sa_walk_cb_t cd, void *ctx); extern u8 *format_ipsec_replay_window (u8 *s, va_list *args); -extern u8 *format_ipsec_crypto_alg (u8 * s, va_list * args); -extern u8 *format_ipsec_integ_alg (u8 * s, va_list * args); -extern u8 *format_ipsec_sa (u8 * s, va_list * args); -extern u8 *format_ipsec_key (u8 * s, va_list * args); -extern uword unformat_ipsec_crypto_alg (unformat_input_t * input, - va_list * args); -extern uword unformat_ipsec_integ_alg (unformat_input_t * input, - va_list * args); -extern uword unformat_ipsec_key (unformat_input_t * input, va_list * args); - -#define IPSEC_UDP_PORT_NONE ((u16)~0) +extern u8 *format_ipsec_crypto_alg (u8 *s, va_list *args); +extern u8 *format_ipsec_integ_alg (u8 *s, va_list *args); +extern u8 *format_ipsec_sa (u8 *s, va_list *args); +extern u8 *format_ipsec_key (u8 *s, va_list *args); +extern uword unformat_ipsec_crypto_alg (unformat_input_t *input, + va_list *args); +extern uword unformat_ipsec_integ_alg (unformat_input_t *input, va_list *args); +extern uword unformat_ipsec_key (unformat_input_t *input, va_list *args); + +#define IPSEC_UDP_PORT_NONE ((u16) ~0) /* * Anti Replay definitions */ -#define IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE (64) -#define IPSEC_SA_ANTI_REPLAY_WINDOW_MAX_INDEX (IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE-1) +#define IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE(_sa) \ + (u32) (PREDICT_FALSE (ipsec_sa_is_set_ANTI_REPLAY_HUGE (_sa)) ? \ + clib_bitmap_bytes (_sa->replay_window_huge) * 8 : \ + BITS (_sa->replay_window)) + +#define IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE_KNOWN_WIN(_sa, _is_huge) \ + (u32) (_is_huge ? clib_bitmap_bytes (_sa->replay_window_huge) * 8 : \ + BITS (_sa->replay_window)) + +#define IPSEC_SA_ANTI_REPLAY_WINDOW_N_SEEN(_sa) \ + (u64) (PREDICT_FALSE (ipsec_sa_is_set_ANTI_REPLAY_HUGE (_sa)) ? \ + clib_bitmap_count_set_bits (_sa->replay_window_huge) : \ + count_set_bits (_sa->replay_window)) + +#define IPSEC_SA_ANTI_REPLAY_WINDOW_N_SEEN_KNOWN_WIN(_sa, _is_huge) \ + (u64) (_is_huge ? clib_bitmap_count_set_bits (_sa->replay_window_huge) : \ + count_set_bits (_sa->replay_window)) + +#define IPSEC_SA_ANTI_REPLAY_WINDOW_MAX_INDEX(_sa) \ + (u32) (IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE (_sa) - 1) + +#define IPSEC_SA_ANTI_REPLAY_WINDOW_MAX_INDEX_KNOWN_WIN(_sa, _is_huge) \ + (u32) (IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE (_sa, _is_huge) - 1) /* * sequence number less than the lower bound are outside of the window * From RFC4303 Appendix A: * Bl = Tl - W + 1 */ -#define IPSEC_SA_ANTI_REPLAY_WINDOW_LOWER_BOUND(_tl) (_tl - IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE + 1) +#define IPSEC_SA_ANTI_REPLAY_WINDOW_LOWER_BOUND(_sa) \ + (u32) (_sa->seq - IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE (_sa) + 1) + +#define IPSEC_SA_ANTI_REPLAY_WINDOW_LOWER_BOUND_KNOWN_WIN(_sa, _is_huge) \ + (u32) (_sa->seq - \ + IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE_KNOWN_WIN (_sa, _is_huge) + 1) + +always_inline u64 +ipsec_sa_anti_replay_get_64b_window (const ipsec_sa_t *sa) +{ + if (!ipsec_sa_is_set_ANTI_REPLAY_HUGE (sa)) + return sa->replay_window; + + u64 w; + u32 window_size = IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE (sa); + u32 tl_win_index = sa->seq & (window_size - 1); + + if (PREDICT_TRUE (tl_win_index >= 63)) + return clib_bitmap_get_multiple (sa->replay_window_huge, tl_win_index - 63, + 64); + + w = clib_bitmap_get_multiple_no_check (sa->replay_window_huge, 0, + tl_win_index + 1) + << (63 - tl_win_index); + w |= clib_bitmap_get_multiple_no_check (sa->replay_window_huge, + window_size - 63 + tl_win_index, + 63 - tl_win_index); + + return w; +} always_inline int -ipsec_sa_anti_replay_check (const ipsec_sa_t *sa, u32 seq) +ipsec_sa_anti_replay_check (const ipsec_sa_t *sa, u32 seq, bool ar_huge) { - if (ipsec_sa_is_set_USE_ANTI_REPLAY (sa) && - sa->replay_window & (1ULL << (sa->seq - seq))) - return 1; + u32 window_size = IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE_KNOWN_WIN (sa, ar_huge); + + /* we assume that the packet is in the window. + * if the packet falls left (sa->seq - seq >= window size), + * the result is wrong */ + + if (ar_huge) + return clib_bitmap_get (sa->replay_window_huge, seq & (window_size - 1)); else - return 0; + return (sa->replay_window >> (window_size + seq - sa->seq - 1)) & 1; + + return 0; } /* @@ -343,10 +421,14 @@ ipsec_sa_anti_replay_check (const ipsec_sa_t *sa, u32 seq) always_inline int ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq, u32 hi_seq_used, bool post_decrypt, - u32 *hi_seq_req) + u32 *hi_seq_req, bool ar_huge) { ASSERT ((post_decrypt == false) == (hi_seq_req != 0)); + u32 window_size = IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE_KNOWN_WIN (sa, ar_huge); + u32 window_lower_bound = + IPSEC_SA_ANTI_REPLAY_WINDOW_LOWER_BOUND_KNOWN_WIN (sa, ar_huge); + if (!ipsec_sa_is_set_USE_ESN (sa)) { if (hi_seq_req) @@ -359,14 +441,11 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq, if (PREDICT_TRUE (seq > sa->seq)) return 0; - u32 diff = sa->seq - seq; - - if (IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE > diff) - return ((sa->replay_window & (1ULL << diff)) ? 1 : 0); - else + /* does the packet fall out on the left of the window */ + if (sa->seq >= seq + window_size) return 1; - return 0; + return ipsec_sa_anti_replay_check (sa, seq, ar_huge); } if (!ipsec_sa_is_set_USE_ANTI_REPLAY (sa)) @@ -406,14 +485,15 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq, */ return 0; } - if (PREDICT_TRUE (sa->seq >= (IPSEC_SA_ANTI_REPLAY_WINDOW_MAX_INDEX))) + + if (PREDICT_TRUE (sa->seq >= window_size - 1)) { /* - * the last sequence number VPP recieved is more than one + * the last sequence number VPP received is more than one * window size greater than zero. * Case A from RFC4303 Appendix A. */ - if (seq < IPSEC_SA_ANTI_REPLAY_WINDOW_LOWER_BOUND (sa->seq)) + if (seq < window_lower_bound) { /* * the received sequence number is lower than the lower bound @@ -425,7 +505,7 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq, { if (hi_seq_used == sa->seq_hi) /* the high sequence number used to succesfully decrypt this - * packet is the same as the last-sequnence number of the SA. + * packet is the same as the last-sequence number of the SA. * that means this packet did not cause a wrap. * this packet is thus out of window and should be dropped */ return 1; @@ -437,8 +517,8 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq, } else { - /* pre-decrypt it might be the might that casues a wrap, we - * need to decrpyt to find out */ + /* pre-decrypt it might be the packet that causes a wrap, we + * need to decrypt it to find out */ if (hi_seq_req) *hi_seq_req = sa->seq_hi + 1; return 0; @@ -447,17 +527,17 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq, else { /* - * the recieved sequence number greater than the low + * the received sequence number greater than the low * end of the window. */ if (hi_seq_req) *hi_seq_req = sa->seq_hi; if (seq <= sa->seq) /* - * The recieved seq number is within bounds of the window + * The received seq number is within bounds of the window * check if it's a duplicate */ - return (ipsec_sa_anti_replay_check (sa, seq)); + return ipsec_sa_anti_replay_check (sa, seq, ar_huge); else /* * The received sequence number is greater than the window @@ -470,14 +550,14 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq, else { /* - * the last sequence number VPP recieved is within one window + * the last sequence number VPP received is within one window * size of zero, i.e. 0 < TL < WINDOW_SIZE, the lower bound is thus a * large sequence number. - * Note that the check below uses unsiged integer arthimetic, so the + * Note that the check below uses unsigned integer arithmetic, so the * RHS will be a larger number. * Case B from RFC4303 Appendix A. */ - if (seq < IPSEC_SA_ANTI_REPLAY_WINDOW_LOWER_BOUND (sa->seq)) + if (seq < window_lower_bound) { /* * the sequence number is less than the lower bound. @@ -490,7 +570,7 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq, */ if (hi_seq_req) *hi_seq_req = sa->seq_hi; - return (ipsec_sa_anti_replay_check (sa, seq)); + return ipsec_sa_anti_replay_check (sa, seq, ar_huge); } else { @@ -498,7 +578,7 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq, * the packet is less the window lower bound or greater than * the higher bound, depending on how you look at it... * We're assuming, given that the last sequence number received, - * TL < WINDOW_SIZE, that a largeer seq num is more likely to be + * TL < WINDOW_SIZE, that a larger seq num is more likely to be * a packet that moves the window forward, than a packet that has * wrapped the high sequence again. If it were the latter then * we've lost close to 2^32 packets. @@ -511,15 +591,14 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq, else { /* - * the packet seq number is between the lower bound (a large nubmer) - * and MAX_SEQ_NUM. This is in the window since the window upper bound - * tl > 0. - * However, since TL is the other side of 0 to the received - * packet, the SA has moved on to a higher sequence number. + * the packet seq number is between the lower bound (a large number) + * and MAX_SEQ_NUM. This is in the window since the window upper + * bound tl > 0. However, since TL is the other side of 0 to the + * received packet, the SA has moved on to a higher sequence number. */ if (hi_seq_req) *hi_seq_req = sa->seq_hi - 1; - return (ipsec_sa_anti_replay_check (sa, seq)); + return ipsec_sa_anti_replay_check (sa, seq, ar_huge); } } @@ -529,45 +608,149 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq, } always_inline u32 -ipsec_sa_anti_replay_window_shift (ipsec_sa_t *sa, u32 inc) +ipsec_sa_anti_replay_window_shift (ipsec_sa_t *sa, u32 inc, bool ar_huge) { u32 n_lost = 0; + u32 seen = 0; + u32 window_size = IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE_KNOWN_WIN (sa, ar_huge); - if (inc < IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE) + if (inc < window_size) { - if (sa->seq > IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE) + if (ar_huge) + { + /* the number of packets we saw in this section of the window */ + clib_bitmap_t *window = sa->replay_window_huge; + u32 window_lower_bound = (sa->seq + 1) & (window_size - 1); + u32 window_next_lower_bound = + (window_lower_bound + inc) & (window_size - 1); + + uword i_block, i_word_start, i_word_end, full_words; + uword n_blocks = window_size >> log2_uword_bits; + uword mask; + + i_block = window_lower_bound >> log2_uword_bits; + + i_word_start = window_lower_bound & (uword_bits - 1); + i_word_end = window_next_lower_bound & (uword_bits - 1); + + /* We stay in the same word */ + if (i_word_start + inc <= uword_bits) + { + mask = pow2_mask (inc) << i_word_start; + seen += count_set_bits (window[i_block] & mask); + window[i_block] &= ~mask; + } + else + { + full_words = (inc + i_word_start - uword_bits - i_word_end) >> + log2_uword_bits; + + /* count set bits in the first word */ + mask = (uword) ~0 << i_word_start; + seen += count_set_bits (window[i_block] & mask); + window[i_block] &= ~mask; + i_block = (i_block + 1) & (n_blocks - 1); + + /* count set bits in the next full words */ + /* even if the last word need to be fully counted, we treat it + * apart */ + while (full_words >= 8) + { + if (full_words >= 16) + { + /* prefect the next 8 blocks (64 bytes) */ + clib_prefetch_store ( + &window[(i_block + 8) & (n_blocks - 1)]); + } + + seen += count_set_bits (window[i_block]); + seen += + count_set_bits (window[(i_block + 1) & (n_blocks - 1)]); + seen += + count_set_bits (window[(i_block + 2) & (n_blocks - 1)]); + seen += + count_set_bits (window[(i_block + 3) & (n_blocks - 1)]); + seen += + count_set_bits (window[(i_block + 4) & (n_blocks - 1)]); + seen += + count_set_bits (window[(i_block + 5) & (n_blocks - 1)]); + seen += + count_set_bits (window[(i_block + 6) & (n_blocks - 1)]); + seen += + count_set_bits (window[(i_block + 7) & (n_blocks - 1)]); + window[i_block] = 0; + window[(i_block + 1) & (n_blocks - 1)] = 0; + window[(i_block + 2) & (n_blocks - 1)] = 0; + window[(i_block + 3) & (n_blocks - 1)] = 0; + window[(i_block + 4) & (n_blocks - 1)] = 0; + window[(i_block + 5) & (n_blocks - 1)] = 0; + window[(i_block + 6) & (n_blocks - 1)] = 0; + window[(i_block + 7) & (n_blocks - 1)] = 0; + + i_block = (i_block + 8) & (n_blocks - 1); + full_words -= 8; + } + while (full_words > 0) + { + // last word is treated after the loop + seen += count_set_bits (window[i_block]); + window[i_block] = 0; + i_block = (i_block + 1) & (n_blocks - 1); + full_words--; + } + + /* the last word */ + mask = pow2_mask (i_word_end); + seen += count_set_bits (window[i_block] & mask); + window[i_block] &= ~mask; + } + + clib_bitmap_set_no_check (window, + (sa->seq + inc) & (window_size - 1), 1); + } + else { /* * count how many holes there are in the portion * of the window that we will right shift of the end * as a result of this increments */ - u64 mask = (((u64) 1 << inc) - 1) << (BITS (u64) - inc); - u64 old = sa->replay_window & mask; + u64 old = sa->replay_window & pow2_mask (inc); /* the number of packets we saw in this section of the window */ - u64 seen = count_set_bits (old); - - /* - * the number we missed is the size of the window section - * minus the number we saw. - */ - n_lost = inc - seen; + seen = count_set_bits (old); + sa->replay_window = + ((sa->replay_window) >> inc) | (1ULL << (window_size - 1)); } - sa->replay_window = ((sa->replay_window) << inc) | 1; + + /* + * the number we missed is the size of the window section + * minus the number we saw. + */ + n_lost = inc - seen; } else { /* holes in the replay window are lost packets */ - n_lost = BITS (u64) - count_set_bits (sa->replay_window); + n_lost = window_size - + IPSEC_SA_ANTI_REPLAY_WINDOW_N_SEEN_KNOWN_WIN (sa, ar_huge); /* any sequence numbers that now fall outside the window * are forever lost */ - n_lost += inc - IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE; + n_lost += inc - window_size; - sa->replay_window = 1; + if (PREDICT_FALSE (ar_huge)) + { + clib_bitmap_zero (sa->replay_window_huge); + clib_bitmap_set_no_check (sa->replay_window_huge, + (sa->seq + inc) & (window_size - 1), 1); + } + else + { + sa->replay_window = 1ULL << (window_size - 1); + } } - return (n_lost); + return n_lost; } /* @@ -581,9 +764,10 @@ ipsec_sa_anti_replay_window_shift (ipsec_sa_t *sa, u32 inc) */ always_inline u64 ipsec_sa_anti_replay_advance (ipsec_sa_t *sa, u32 thread_index, u32 seq, - u32 hi_seq) + u32 hi_seq, bool ar_huge) { u64 n_lost = 0; + u32 window_size = IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE_KNOWN_WIN (sa, ar_huge); u32 pos; if (ipsec_sa_is_set_USE_ESN (sa)) @@ -593,25 +777,33 @@ ipsec_sa_anti_replay_advance (ipsec_sa_t *sa, u32 thread_index, u32 seq, if (wrap == 0 && seq > sa->seq) { pos = seq - sa->seq; - n_lost = ipsec_sa_anti_replay_window_shift (sa, pos); + n_lost = ipsec_sa_anti_replay_window_shift (sa, pos, ar_huge); sa->seq = seq; } else if (wrap > 0) { - pos = ~seq + sa->seq + 1; - n_lost = ipsec_sa_anti_replay_window_shift (sa, pos); + pos = seq + ~sa->seq + 1; + n_lost = ipsec_sa_anti_replay_window_shift (sa, pos, ar_huge); sa->seq = seq; sa->seq_hi = hi_seq; } else if (wrap < 0) { pos = ~seq + sa->seq + 1; - sa->replay_window |= (1ULL << pos); + if (ar_huge) + clib_bitmap_set_no_check (sa->replay_window_huge, + seq & (window_size - 1), 1); + else + sa->replay_window |= (1ULL << (window_size - 1 - pos)); } else { pos = sa->seq - seq; - sa->replay_window |= (1ULL << pos); + if (ar_huge) + clib_bitmap_set_no_check (sa->replay_window_huge, + seq & (window_size - 1), 1); + else + sa->replay_window |= (1ULL << (window_size - 1 - pos)); } } else @@ -619,13 +811,17 @@ ipsec_sa_anti_replay_advance (ipsec_sa_t *sa, u32 thread_index, u32 seq, if (seq > sa->seq) { pos = seq - sa->seq; - n_lost = ipsec_sa_anti_replay_window_shift (sa, pos); + n_lost = ipsec_sa_anti_replay_window_shift (sa, pos, ar_huge); sa->seq = seq; } else { pos = sa->seq - seq; - sa->replay_window |= (1ULL << pos); + if (ar_huge) + clib_bitmap_set_no_check (sa->replay_window_huge, + seq & (window_size - 1), 1); + else + sa->replay_window |= (1ULL << (window_size - 1 - pos)); } } @@ -637,8 +833,8 @@ ipsec_sa_anti_replay_advance (ipsec_sa_t *sa, u32 thread_index, u32 seq, * Makes choice for thread_id should be assigned. * if input ~0, gets random worker_id based on unix_time_now_nsec */ -always_inline u32 -ipsec_sa_assign_thread (u32 thread_id) +always_inline u16 +ipsec_sa_assign_thread (u16 thread_id) { return ((thread_id) ? thread_id : (unix_time_now_nsec () % vlib_num_workers ()) + 1); diff --git a/src/vnet/ipsec/ipsec_spd.c b/src/vnet/ipsec/ipsec_spd.c index 5d5d521dd72..7b9a0aea8ed 100644 --- a/src/vnet/ipsec/ipsec_spd.c +++ b/src/vnet/ipsec/ipsec_spd.c @@ -38,12 +38,10 @@ ipsec_add_del_spd (vlib_main_t * vm, u32 spd_id, int is_add) if (!spd) return VNET_API_ERROR_INVALID_VALUE; - /* *INDENT-OFF* */ hash_foreach (k, v, im->spd_index_by_sw_if_index, ({ if (v == spd_index) ipsec_set_interface_spd(vm, k, spd_id, 0); })); - /* *INDENT-ON* */ hash_unset (im->spd_index_by_spd_id, spd_id); #define _(s,v) vec_free(spd->policies[IPSEC_SPD_POLICY_##s]); foreach_ipsec_spd_policy_type @@ -165,9 +163,6 @@ ipsec_add_del_spd (vlib_main_t * vm, u32 spd_id, int is_add) pool_max_len (im->fp_ip6_lookup_hashes_pool)) { clib_bihash_40_8_t *bihash_table; - ipsec_spd_fp_t *fp_spd = &spd->fp_spd; - - fp_spd->name6_out = format (0, "spd_%u_fp_ip6_out", spd_id); fp_spd->name6_out = format (0, "spd_%u_fp_ip6_out", spd_id); pool_get (im->fp_ip6_lookup_hashes_pool, bihash_table); @@ -185,7 +180,6 @@ ipsec_add_del_spd (vlib_main_t * vm, u32 spd_id, int is_add) pool_max_len (im->fp_ip6_lookup_hashes_pool)) { clib_bihash_40_8_t *bihash_table; - ipsec_spd_fp_t *fp_spd = &spd->fp_spd; fp_spd->name6_in = format (0, "spd_%u_fp_ip6_in", spd_id); pool_get (im->fp_ip6_lookup_hashes_pool, bihash_table); diff --git a/src/vnet/ipsec/ipsec_spd.h b/src/vnet/ipsec/ipsec_spd.h index 3a4fd0ec91c..3b1e4b40747 100644 --- a/src/vnet/ipsec/ipsec_spd.h +++ b/src/vnet/ipsec/ipsec_spd.h @@ -55,8 +55,6 @@ typedef struct */ typedef struct { - /** vectors for each of the fast path policy types */ - u32 *fp_policies[IPSEC_SPD_POLICY_N_TYPES]; ipsec_fp_mask_id_t *fp_mask_ids[IPSEC_SPD_POLICY_N_TYPES]; /* names of bihash tables */ u8 *name4_out; diff --git a/src/vnet/ipsec/ipsec_spd_fp_lookup.h b/src/vnet/ipsec/ipsec_spd_fp_lookup.h index a372ac77a50..2bbd7c664f9 100644 --- a/src/vnet/ipsec/ipsec_spd_fp_lookup.h +++ b/src/vnet/ipsec/ipsec_spd_fp_lookup.h @@ -97,8 +97,8 @@ static_always_inline int single_rule_in_match_5tuple (ipsec_policy_t *policy, ipsec_fp_5tuple_t *match) { - u32 sa = clib_net_to_host_u32 (match->laddr.as_u32); - u32 da = clib_net_to_host_u32 (match->raddr.as_u32); + u32 da = clib_net_to_host_u32 (match->laddr.as_u32); + u32 sa = clib_net_to_host_u32 (match->raddr.as_u32); if (policy->policy == IPSEC_POLICY_ACTION_PROTECT) { @@ -118,16 +118,16 @@ single_rule_in_match_5tuple (ipsec_policy_t *policy, ipsec_fp_5tuple_t *match) } else { - if (da < clib_net_to_host_u32 (policy->raddr.start.ip4.as_u32)) + if (sa < clib_net_to_host_u32 (policy->raddr.start.ip4.as_u32)) return (0); - if (da > clib_net_to_host_u32 (policy->raddr.stop.ip4.as_u32)) + if (sa > clib_net_to_host_u32 (policy->raddr.stop.ip4.as_u32)) return (0); - if (sa < clib_net_to_host_u32 (policy->laddr.start.ip4.as_u32)) + if (da < clib_net_to_host_u32 (policy->laddr.start.ip4.as_u32)) return (0); - if (sa > clib_net_to_host_u32 (policy->laddr.stop.ip4.as_u32)) + if (da > clib_net_to_host_u32 (policy->laddr.stop.ip4.as_u32)) return (0); } return (1); @@ -196,13 +196,16 @@ ipsec_fp_in_ip6_policy_match_n (void *spd_fp, ipsec_fp_5tuple_t *tuples, { policy = im->policies + *policy_id; - if ((last_priority[i] < policy->priority) && - (single_rule_in_match_5tuple (policy, match))) + if (single_rule_in_match_5tuple (policy, match)) { - last_priority[i] = policy->priority; - if (policies[i] == 0) - counter++; - policies[i] = policy; + if (last_priority[i] < policy->priority) + { + last_priority[i] = policy->priority; + if (policies[i] == 0) + counter++; + policies[i] = policy; + } + break; } } } @@ -291,13 +294,16 @@ ipsec_fp_in_ip4_policy_match_n (void *spd_fp, ipsec_fp_5tuple_t *tuples, { policy = im->policies + *policy_id; - if ((last_priority[i] < policy->priority) && - (single_rule_in_match_5tuple (policy, match))) + if (single_rule_in_match_5tuple (policy, match)) { - last_priority[i] = policy->priority; - if (policies[i] == 0) - counter++; - policies[i] = policy; + if (last_priority[i] < policy->priority) + { + last_priority[i] = policy->priority; + if (policies[i] == 0) + counter++; + policies[i] = policy; + } + break; } } } @@ -418,6 +424,7 @@ ipsec_fp_out_ip6_policy_match_n (void *spd_fp, ipsec_fp_5tuple_t *tuples, policies[i] = policy; ids[i] = *policy_id; } + break; } } } @@ -511,14 +518,17 @@ ipsec_fp_out_ip4_policy_match_n (void *spd_fp, ipsec_fp_5tuple_t *tuples, { policy = im->policies + *policy_id; - if ((last_priority[i] < policy->priority) && - (single_rule_out_match_5tuple (policy, match))) + if (single_rule_out_match_5tuple (policy, match)) { - last_priority[i] = policy->priority; - if (policies[i] == 0) - counter++; - policies[i] = policy; - ids[i] = *policy_id; + if (last_priority[i] < policy->priority) + { + last_priority[i] = policy->priority; + if (policies[i] == 0) + counter++; + policies[i] = policy; + ids[i] = *policy_id; + } + break; } } } diff --git a/src/vnet/ipsec/ipsec_spd_policy.c b/src/vnet/ipsec/ipsec_spd_policy.c index 5261621b64a..af087689941 100644 --- a/src/vnet/ipsec/ipsec_spd_policy.c +++ b/src/vnet/ipsec/ipsec_spd_policy.c @@ -24,22 +24,6 @@ vlib_combined_counter_main_t ipsec_spd_policy_counters = { .stat_segment_name = "/net/ipsec/policy", }; -static int -ipsec_spd_entry_sort (void *a1, void *a2) -{ - ipsec_main_t *im = &ipsec_main; - u32 *id1 = a1; - u32 *id2 = a2; - ipsec_policy_t *p1, *p2; - - p1 = pool_elt_at_index (im->policies, *id1); - p2 = pool_elt_at_index (im->policies, *id2); - if (p1 && p2) - return p2->priority - p1->priority; - - return 0; -} - int ipsec_policy_mk_type (bool is_outbound, bool is_ipv6, @@ -189,6 +173,7 @@ ipsec_add_del_policy (vlib_main_t * vm, if (is_add) { u32 policy_index; + u32 i; if (policy->policy == IPSEC_POLICY_ACTION_PROTECT) { @@ -216,9 +201,20 @@ ipsec_add_del_policy (vlib_main_t * vm, vlib_validate_combined_counter (&ipsec_spd_policy_counters, policy_index); vlib_zero_combined_counter (&ipsec_spd_policy_counters, policy_index); - vec_add1 (spd->policies[policy->type], policy_index); - vec_sort_with_function (spd->policies[policy->type], - ipsec_spd_entry_sort); + + vec_foreach_index (i, spd->policies[policy->type]) + { + ipsec_policy_t *p = + pool_elt_at_index (im->policies, spd->policies[policy->type][i]); + + if (p->priority <= vp->priority) + { + break; + } + } + + vec_insert_elts (spd->policies[policy->type], &policy_index, 1, i); + *stat_index = policy_index; } else @@ -382,7 +378,6 @@ ipsec_fp_get_policy_ports_mask (ipsec_policy_t *policy, } mask->protocol = (policy->protocol == IPSEC_POLICY_PROTOCOL_ANY) ? 0 : ~0; - mask->action = 0; } static_always_inline void @@ -399,6 +394,15 @@ ipsec_fp_ip4_get_policy_mask (ipsec_policy_t *policy, ipsec_fp_5tuple_t *mask, clib_memset_u8 (mask, 0xff, sizeof (ipsec_fp_5tuple_t)); clib_memset_u8 (&mask->l3_zero_pad, 0, sizeof (mask->l3_zero_pad)); + if (inbound && (policy->type == IPSEC_SPD_POLICY_IP4_INBOUND_PROTECT && + policy->sa_index != INDEX_INVALID)) + { + ipsec_sa_t *s = ipsec_sa_get (policy->sa_index); + + if (ipsec_sa_is_set_IS_TUNNEL (s)) + goto set_spi_mask; + } + /* find bits where start != stop */ *plmask = *pladdr_start ^ *pladdr_stop; *prmask = *praddr_start ^ *praddr_stop; @@ -413,6 +417,7 @@ ipsec_fp_ip4_get_policy_mask (ipsec_policy_t *policy, ipsec_fp_5tuple_t *mask, *prmask = clib_host_to_net_u32 ( mask_out_highest_set_bit_u32 (clib_net_to_host_u32 (*prmask))); +set_spi_mask: if (inbound) { if (policy->type != IPSEC_SPD_POLICY_IP4_INBOUND_PROTECT) @@ -440,6 +445,15 @@ ipsec_fp_ip6_get_policy_mask (ipsec_policy_t *policy, ipsec_fp_5tuple_t *mask, clib_memset_u8 (mask, 0xff, sizeof (ipsec_fp_5tuple_t)); + if (inbound && (policy->type == IPSEC_SPD_POLICY_IP6_INBOUND_PROTECT && + policy->sa_index != INDEX_INVALID)) + { + ipsec_sa_t *s = ipsec_sa_get (policy->sa_index); + + if (ipsec_sa_is_set_IS_TUNNEL (s)) + goto set_spi_mask; + } + *plmask = (*pladdr_start++ ^ *pladdr_stop++); *prmask = (*praddr_start++ ^ *praddr_stop++); @@ -466,16 +480,16 @@ ipsec_fp_ip6_get_policy_mask (ipsec_policy_t *policy, ipsec_fp_5tuple_t *mask, if (*prmask++ & clib_host_to_net_u64 (0x1)) { - *prmask = (*pladdr_start ^ *pladdr_stop); + *prmask = (*praddr_start ^ *praddr_stop); *prmask = clib_host_to_net_u64 ( mask_out_highest_set_bit_u64 (clib_net_to_host_u64 (*prmask))); } else *prmask = 0; - +set_spi_mask: if (inbound) { - if (policy->type != IPSEC_SPD_POLICY_IP4_INBOUND_PROTECT) + if (policy->type != IPSEC_SPD_POLICY_IP6_INBOUND_PROTECT) mask->spi = 0; mask->protocol = 0; @@ -512,7 +526,21 @@ ipsec_fp_get_policy_5tuple (ipsec_policy_t *policy, ipsec_fp_5tuple_t *tuple, policy->sa_index != INDEX_INVALID) { ipsec_sa_t *s = ipsec_sa_get (policy->sa_index); + tuple->spi = s->spi; + if (ipsec_sa_is_set_IS_TUNNEL (s)) + { + if (tuple->is_ipv6) + { + tuple->ip6_laddr = s->tunnel.t_dst.ip.ip6; + tuple->ip6_raddr = s->tunnel.t_src.ip.ip6; + } + else + { + tuple->laddr = s->tunnel.t_dst.ip.ip4; + tuple->raddr = s->tunnel.t_src.ip.ip4; + } + } } else tuple->spi = INDEX_INVALID; @@ -521,7 +549,6 @@ ipsec_fp_get_policy_5tuple (ipsec_policy_t *policy, ipsec_fp_5tuple_t *tuple, } tuple->protocol = policy->protocol; - tuple->lport = policy->lport.start; tuple->rport = policy->rport.start; } @@ -590,17 +617,24 @@ ipsec_fp_ip4_add_policy (ipsec_main_t *im, ipsec_spd_fp_t *fp_spd, } else { + u32 i; + u32 *old_fp_policies_ids = result_val->fp_policies_ids; - if (vec_max_len (result_val->fp_policies_ids) != - vec_len (result_val->fp_policies_ids)) + vec_foreach_index (i, result_val->fp_policies_ids) { - /* no need to resize */ - vec_add1 (result_val->fp_policies_ids, policy_index); + ipsec_policy_t *p = + pool_elt_at_index (im->policies, result_val->fp_policies_ids[i]); + + if (p->priority <= policy->priority) + { + break; + } } - else - { - vec_add1 (result_val->fp_policies_ids, policy_index); + vec_insert_elts (result_val->fp_policies_ids, &policy_index, 1, i); + + if (result_val->fp_policies_ids != old_fp_policies_ids) + { res = clib_bihash_add_del_16_8 (bihash_table, &result, 1); if (res != 0) @@ -626,7 +660,6 @@ ipsec_fp_ip4_add_policy (ipsec_main_t *im, ipsec_spd_fp_t *fp_spd, (fp_spd->fp_mask_ids[policy->type] + searched_idx)->refcount++; mte->refcount++; - vec_add1 (fp_spd->fp_policies[policy->type], policy_index); clib_memcpy (vp, policy, sizeof (*vp)); return 0; @@ -695,17 +728,24 @@ ipsec_fp_ip6_add_policy (ipsec_main_t *im, ipsec_spd_fp_t *fp_spd, } else { + u32 i; + u32 *old_fp_policies_ids = result_val->fp_policies_ids; - if (vec_max_len (result_val->fp_policies_ids) != - vec_len (result_val->fp_policies_ids)) + vec_foreach_index (i, result_val->fp_policies_ids) { - /* no need to resize */ - vec_add1 (result_val->fp_policies_ids, policy_index); + ipsec_policy_t *p = + pool_elt_at_index (im->policies, result_val->fp_policies_ids[i]); + + if (p->priority <= policy->priority) + { + break; + } } - else - { - vec_add1 (result_val->fp_policies_ids, policy_index); + vec_insert_elts (result_val->fp_policies_ids, &policy_index, 1, i); + + if (result_val->fp_policies_ids != old_fp_policies_ids) + { res = clib_bihash_add_del_40_8 (bihash_table, &result, 1); if (res != 0) @@ -731,7 +771,6 @@ ipsec_fp_ip6_add_policy (ipsec_main_t *im, ipsec_spd_fp_t *fp_spd, (fp_spd->fp_mask_ids[policy->type] + searched_idx)->refcount++; mte->refcount++; - vec_add1 (fp_spd->fp_policies[policy->type], policy_index); clib_memcpy (vp, policy, sizeof (*vp)); return 0; @@ -760,7 +799,7 @@ ipsec_fp_ip6_del_policy (ipsec_main_t *im, ipsec_spd_fp_t *fp_spd, fp_spd->ip6_out_lookup_hash_idx); ipsec_policy_t *vp; - u32 ii, iii, imt; + u32 ii, imt; ipsec_fp_ip6_get_policy_mask (policy, &mask, inbound); ipsec_fp_get_policy_5tuple (policy, &policy_5tuple, inbound); @@ -769,57 +808,38 @@ ipsec_fp_ip6_del_policy (ipsec_main_t *im, ipsec_spd_fp_t *fp_spd, if (res != 0) return -1; - res = -1; vec_foreach_index (ii, result_val->fp_policies_ids) { vp = pool_elt_at_index (im->policies, *(result_val->fp_policies_ids + ii)); if (ipsec_policy_is_equal (vp, policy)) { - vec_foreach_index (iii, fp_spd->fp_policies[policy->type]) + if (vec_len (result_val->fp_policies_ids) == 1) + { + vec_free (result_val->fp_policies_ids); + clib_bihash_add_del_40_8 (bihash_table, &result, 0); + } + else + vec_delete (result_val->fp_policies_ids, 1, ii); + + vec_foreach_index (imt, fp_spd->fp_mask_ids[policy->type]) { - if (*(fp_spd->fp_policies[policy->type] + iii) == - *(result_val->fp_policies_ids + ii)) + if ((fp_spd->fp_mask_ids[policy->type] + imt)->mask_type_idx == + vp->fp_mask_type_id) { - if (vec_len (result_val->fp_policies_ids) == 1) - { - vec_free (result_val->fp_policies_ids); - clib_bihash_add_del_40_8 (bihash_table, &result, 0); - } - else - { - vec_del1 (result_val->fp_policies_ids, ii); - } - vec_del1 (fp_spd->fp_policies[policy->type], iii); - - vec_foreach_index (imt, fp_spd->fp_mask_ids[policy->type]) - { - if ((fp_spd->fp_mask_ids[policy->type] + imt) - ->mask_type_idx == vp->fp_mask_type_id) - { - - if ((fp_spd->fp_mask_ids[policy->type] + imt) - ->refcount-- == 1) - vec_del1 (fp_spd->fp_mask_ids[policy->type], imt); - - break; - } - } - - res = 0; + + if ((fp_spd->fp_mask_ids[policy->type] + imt)->refcount-- == + 1) + vec_del1 (fp_spd->fp_mask_ids[policy->type], imt); + break; } } - if (res != 0) - continue; - else - { - ipsec_fp_release_mask_type (im, vp->fp_mask_type_id); - ipsec_sa_unlock (vp->sa_index); - pool_put (im->policies, vp); - return 0; - } + ipsec_fp_release_mask_type (im, vp->fp_mask_type_id); + ipsec_sa_unlock (vp->sa_index); + pool_put (im->policies, vp); + return 0; } } return -1; @@ -837,7 +857,7 @@ ipsec_fp_ip4_del_policy (ipsec_main_t *im, ipsec_spd_fp_t *fp_spd, (ipsec_fp_lookup_value_t *) &result.value; bool inbound = ipsec_is_policy_inbound (policy); ipsec_policy_t *vp; - u32 ii, iii, imt; + u32 ii, imt; clib_bihash_16_8_t *bihash_table = inbound ? pool_elt_at_index (im->fp_ip4_lookup_hashes_pool, fp_spd->ip4_in_lookup_hash_idx) : @@ -852,57 +872,37 @@ ipsec_fp_ip4_del_policy (ipsec_main_t *im, ipsec_spd_fp_t *fp_spd, if (res != 0) return -1; - res = -1; vec_foreach_index (ii, result_val->fp_policies_ids) { vp = pool_elt_at_index (im->policies, *(result_val->fp_policies_ids + ii)); if (ipsec_policy_is_equal (vp, policy)) { - vec_foreach_index (iii, fp_spd->fp_policies[policy->type]) + if (vec_len (result_val->fp_policies_ids) == 1) { - if (*(fp_spd->fp_policies[policy->type] + iii) == - *(result_val->fp_policies_ids + ii)) - { - if (vec_len (result_val->fp_policies_ids) == 1) - { - vec_free (result_val->fp_policies_ids); - clib_bihash_add_del_16_8 (bihash_table, &result, 0); - } - else - { - vec_del1 (result_val->fp_policies_ids, ii); - } - vec_del1 (fp_spd->fp_policies[policy->type], iii); - - vec_foreach_index (imt, fp_spd->fp_mask_ids[policy->type]) - { - if ((fp_spd->fp_mask_ids[policy->type] + imt) - ->mask_type_idx == vp->fp_mask_type_id) - { - - if ((fp_spd->fp_mask_ids[policy->type] + imt) - ->refcount-- == 1) - vec_del1 (fp_spd->fp_mask_ids[policy->type], imt); - - break; - } - } - - res = 0; - break; - } + vec_free (result_val->fp_policies_ids); + clib_bihash_add_del_16_8 (bihash_table, &result, 0); } - - if (res != 0) - continue; else + vec_delete (result_val->fp_policies_ids, 1, ii); + + vec_foreach_index (imt, fp_spd->fp_mask_ids[policy->type]) { - ipsec_fp_release_mask_type (im, vp->fp_mask_type_id); - ipsec_sa_unlock (vp->sa_index); - pool_put (im->policies, vp); - return 0; + if ((fp_spd->fp_mask_ids[policy->type] + imt)->mask_type_idx == + vp->fp_mask_type_id) + { + + if ((fp_spd->fp_mask_ids[policy->type] + imt)->refcount-- == + 1) + vec_del1 (fp_spd->fp_mask_ids[policy->type], imt); + + break; + } } + ipsec_fp_release_mask_type (im, vp->fp_mask_type_id); + ipsec_sa_unlock (vp->sa_index); + pool_put (im->policies, vp); + return 0; } } return -1; diff --git a/src/vnet/ipsec/ipsec_test.c b/src/vnet/ipsec/ipsec_test.c index f1436193636..86d09f18a5c 100644 --- a/src/vnet/ipsec/ipsec_test.c +++ b/src/vnet/ipsec/ipsec_test.c @@ -282,12 +282,30 @@ vl_api_ipsec_sad_entry_add_reply_t_handler ( { } +static void +vl_api_ipsec_sad_entry_add_v2_reply_t_handler ( + vl_api_ipsec_sad_entry_add_reply_t *mp) +{ +} + static int api_ipsec_sad_entry_del (vat_main_t *vat) { return -1; } +static int +api_ipsec_sad_bind (vat_main_t *vat) +{ + return -1; +} + +static int +api_ipsec_sad_unbind (vat_main_t *vat) +{ + return -1; +} + static void vl_api_ipsec_sad_entry_add_del_v2_reply_t_handler ( vl_api_ipsec_sad_entry_add_del_v2_reply_t *mp) @@ -307,6 +325,12 @@ api_ipsec_sad_entry_add_del_v3 (vat_main_t *vat) } static int +api_ipsec_sad_entry_update (vat_main_t *vat) +{ + return -1; +} + +static int api_ipsec_tunnel_protect_update (vat_main_t *vat) { return -1; @@ -324,6 +348,18 @@ api_ipsec_sa_v3_dump (vat_main_t *vat) } static int +api_ipsec_sa_v4_dump (vat_main_t *vat) +{ + return -1; +} + +static int +api_ipsec_sa_v5_dump (vat_main_t *vat) +{ + return -1; +} + +static int api_ipsec_tunnel_protect_dump (vat_main_t *vat) { return -1; @@ -347,6 +383,12 @@ api_ipsec_sad_entry_add (vat_main_t *vat) return -1; } +static int +api_ipsec_sad_entry_add_v2 (vat_main_t *vat) +{ + return -1; +} + static void vl_api_ipsec_spd_entry_add_del_reply_t_handler ( vl_api_ipsec_spd_entry_add_del_reply_t *mp) @@ -376,6 +418,16 @@ vl_api_ipsec_sa_v3_details_t_handler (vl_api_ipsec_sa_v3_details_t *mp) { } +static void +vl_api_ipsec_sa_v4_details_t_handler (vl_api_ipsec_sa_v4_details_t *mp) +{ +} + +static void +vl_api_ipsec_sa_v5_details_t_handler (vl_api_ipsec_sa_v5_details_t *mp) +{ +} + static int api_ipsec_spd_interface_dump (vat_main_t *vat) { diff --git a/src/vnet/ipsec/ipsec_tun.c b/src/vnet/ipsec/ipsec_tun.c index 82f5a11d26f..ecda291e985 100644 --- a/src/vnet/ipsec/ipsec_tun.c +++ b/src/vnet/ipsec/ipsec_tun.c @@ -236,7 +236,6 @@ ipsec_tun_protect_rx_db_add (ipsec_main_t * im, if (ip46_address_is_zero (&itp->itp_crypto.dst)) return; - /* *INDENT-OFF* */ FOR_EACH_IPSEC_PROTECT_INPUT_SAI(itp, sai, ({ sa = ipsec_sa_get (sai); @@ -291,7 +290,6 @@ ipsec_tun_protect_rx_db_add (ipsec_main_t * im, ipsec_tun_register_nodes (AF_IP6); } })) - /* *INDENT-ON* */ } static adj_walk_rc_t @@ -371,7 +369,6 @@ ipsec_tun_protect_rx_db_remove (ipsec_main_t * im, { const ipsec_sa_t *sa; - /* *INDENT-OFF* */ FOR_EACH_IPSEC_PROTECT_INPUT_SA(itp, sa, ({ if (ip46_address_is_ip4 (&itp->itp_crypto.dst)) @@ -405,7 +402,6 @@ ipsec_tun_protect_rx_db_remove (ipsec_main_t * im, } } })); - /* *INDENT-ON* */ } static adj_walk_rc_t @@ -464,7 +460,6 @@ ipsec_tun_protect_set_crypto_addr (ipsec_tun_protect_t * itp) { ipsec_sa_t *sa; - /* *INDENT-OFF* */ FOR_EACH_IPSEC_PROTECT_INPUT_SA(itp, sa, ({ if (ipsec_sa_is_set_IS_TUNNEL (sa)) @@ -484,7 +479,6 @@ ipsec_tun_protect_set_crypto_addr (ipsec_tun_protect_t * itp) itp->itp_flags &= ~IPSEC_PROTECT_ENCAPED; } })); - /* *INDENT-ON* */ } static void @@ -504,13 +498,11 @@ ipsec_tun_protect_config (ipsec_main_t * im, if (itp->itp_flags & IPSEC_PROTECT_ITF) ipsec_sa_set_NO_ALGO_NO_DROP (ipsec_sa_get (itp->itp_out_sa)); - /* *INDENT-OFF* */ FOR_EACH_IPSEC_PROTECT_INPUT_SAI(itp, sai, ({ ipsec_sa_lock(sai); })); ipsec_tun_protect_set_crypto_addr(itp); - /* *INDENT-ON* */ /* * add to the DB against each SA @@ -527,7 +519,6 @@ ipsec_tun_protect_unconfig (ipsec_main_t * im, ipsec_tun_protect_t * itp) ipsec_sa_t *sa; index_t sai; - /* *INDENT-OFF* */ FOR_EACH_IPSEC_PROTECT_INPUT_SA(itp, sa, ({ ipsec_sa_unset_IS_PROTECT (sa); @@ -543,7 +534,6 @@ ipsec_tun_protect_unconfig (ipsec_main_t * im, ipsec_tun_protect_t * itp) ({ ipsec_sa_unlock(sai); })); - /* *INDENT-ON* */ ITP_DBG (itp, "unconfigured"); } @@ -751,12 +741,10 @@ ipsec_tun_protect_walk (ipsec_tun_protect_walk_cb_t fn, void *ctx) { index_t itpi; - /* *INDENT-OFF* */ pool_foreach_index (itpi, ipsec_tun_protect_pool) { fn (itpi, ctx); } - /* *INDENT-ON* */ } void @@ -772,12 +760,10 @@ ipsec_tun_protect_walk_itf (u32 sw_if_index, idi = &itp_db.id_itf[sw_if_index]; - /* *INDENT-OFF* */ hash_foreach(key, itpi, idi->id_hash, ({ fn (itpi, ctx); })); - /* *INDENT-ON* */ if (INDEX_INVALID != idi->id_itp) fn (idi->id_itp, ctx); } diff --git a/src/vnet/ipsec/ipsec_tun_in.c b/src/vnet/ipsec/ipsec_tun_in.c index 9f1e2d6c5a1..c82de3ebaff 100644 --- a/src/vnet/ipsec/ipsec_tun_in.c +++ b/src/vnet/ipsec/ipsec_tun_in.c @@ -278,6 +278,7 @@ ipsec_tun_protect_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, else { next[0] = ipsec_ip6_if_no_tunnel (node, b[0], esp0, ip60); + vlib_buffer_advance (b[0], -buf_rewind0); n_no_tunnel++; goto trace00; } @@ -410,7 +411,6 @@ VLIB_NODE_FN (ipsec4_tun_input_node) (vlib_main_t * vm, return ipsec_tun_protect_input_inline (vm, node, from_frame, 0); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ipsec4_tun_input_node) = { .name = "ipsec4-tun-input", .vector_size = sizeof (u32), @@ -420,7 +420,6 @@ VLIB_REGISTER_NODE (ipsec4_tun_input_node) = { .error_counters = ipsec_tun_error_counters, .sibling_of = "device-input", }; -/* *INDENT-ON* */ VLIB_NODE_FN (ipsec6_tun_input_node) (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -429,7 +428,6 @@ VLIB_NODE_FN (ipsec6_tun_input_node) (vlib_main_t * vm, return ipsec_tun_protect_input_inline (vm, node, from_frame, 1); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ipsec6_tun_input_node) = { .name = "ipsec6-tun-input", .vector_size = sizeof (u32), @@ -439,7 +437,6 @@ VLIB_REGISTER_NODE (ipsec6_tun_input_node) = { .error_counters = ipsec_tun_error_counters, .sibling_of = "device-input", }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/ipsec/ipsec_types.api b/src/vnet/ipsec/ipsec_types.api index 3f894348bcb..37c1141ab46 100644 --- a/src/vnet/ipsec/ipsec_types.api +++ b/src/vnet/ipsec/ipsec_types.api @@ -37,6 +37,9 @@ enum ipsec_crypto_alg IPSEC_API_CRYPTO_ALG_DES_CBC, IPSEC_API_CRYPTO_ALG_3DES_CBC, IPSEC_API_CRYPTO_ALG_CHACHA20_POLY1305 [backwards_compatible], + IPSEC_API_CRYPTO_ALG_AES_NULL_GMAC_128 [backwards_compatible], + IPSEC_API_CRYPTO_ALG_AES_NULL_GMAC_192 [backwards_compatible], + IPSEC_API_CRYPTO_ALG_AES_NULL_GMAC_256 [backwards_compatible], }; /* @@ -193,9 +196,6 @@ typedef ipsec_spd_entry_v2 /** \brief IPsec: Security Association Database entry - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param is_add - add SAD entry if non-zero, else delete @param sad_id - sad id @param spi - security parameter index @param protocol - 0 = AH, 1 = ESP @@ -203,6 +203,7 @@ typedef ipsec_spd_entry_v2 @param crypto_key - crypto keying material @param integrity_algorithm - one of the supported algorithms @param integrity_key - integrity keying material + @param flags - SA flags (see ipsec_sad_flags above) @param tunnel_src_address - IPsec tunnel source address IPv6 if is_tunnel_ipv6 is non-zero, else IPv4. Only valid if is_tunnel is non-zero @param tunnel_dst_address - IPsec tunnel destination address IPv6 if is_tunnel_ipv6 is non-zero, else IPv4. Only valid if is_tunnel is non-zero @param tx_table_id - the FIB id used for encapsulated packets @@ -287,6 +288,46 @@ typedef ipsec_sad_entry_v3 u16 udp_dst_port [default=4500]; }; +/** \brief IPsec: Security Association Database entry + @param sad_id - sad id + @param spi - security parameter index + @param protocol - 0 = AH, 1 = ESP + @param crypto_algorithm - a supported crypto algorithm + @param crypto_key - crypto keying material + @param integrity_algorithm - one of the supported algorithms + @param integrity_key - integrity keying material + @param flags - SA flags (see ipsec_sad_flags above) + @param tunnel - tunnel description (see vnet/tunnel/tunnel_types.api) + @param salt - for use with counter mode ciphers + @param udp_src_port - If using UDP Encapsulation, use this source port for + TX. It is ignored for RX. + @param udp_dst_port - If using UDP Encapsulation, use this destination port + for TX. Expect traffic on this port for RX. + @param anti_replay_window_size - AR window size to use. The supplied value is round up to the nearest power of 2. + */ +typedef ipsec_sad_entry_v4 +{ + u32 sad_id; + u32 spi; + + vl_api_ipsec_proto_t protocol; + + vl_api_ipsec_crypto_alg_t crypto_algorithm; + vl_api_key_t crypto_key; + + vl_api_ipsec_integ_alg_t integrity_algorithm; + vl_api_key_t integrity_key; + + vl_api_ipsec_sad_flags_t flags; + + vl_api_tunnel_t tunnel; + + u32 salt; + u16 udp_src_port [default=4500]; + u16 udp_dst_port [default=4500]; + + u32 anti_replay_window_size [default=64]; +}; /* * Local Variables: diff --git a/src/vnet/l2/feat_bitmap.c b/src/vnet/l2/feat_bitmap.c index 349ec67462b..507fe365f07 100644 --- a/src/vnet/l2/feat_bitmap.c +++ b/src/vnet/l2/feat_bitmap.c @@ -155,7 +155,6 @@ feat_bitmap_drop_init (vlib_main_t * vm) VLIB_INIT_FUNCTION (feat_bitmap_drop_init); -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (feat_bitmap_drop_node,static) = { .function = feat_bitmap_drop_node_fn, .name = "feature-bitmap-drop", @@ -173,7 +172,6 @@ VLIB_REGISTER_NODE (feat_bitmap_drop_node,static) = { [FEAT_BITMAP_DROP_NEXT_DROP] = "error-drop", }, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/l2/l2.api b/src/vnet/l2/l2.api index b0ac23f705a..ccba9aa3df1 100644 --- a/src/vnet/l2/l2.api +++ b/src/vnet/l2/l2.api @@ -1,6 +1,7 @@ /* Hey Emacs use -*- mode: C -*- */ /* * Copyright (c) 2016 Cisco and/or its affiliates. + * Copyright (c) 2022 Nordix Foundation. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: @@ -14,7 +15,7 @@ * limitations under the License. */ -option version = "3.1.0"; +option version = "3.2.0"; import "vnet/ip/ip_types.api"; import "vnet/ethernet/ethernet_types.api"; @@ -304,7 +305,7 @@ autoreply define bridge_domain_set_learn_limit u32 learn_limit; }; -/** \brief L2 bridge domain add or delete request +/** \brief L2 bridge domain add or delete request - will be deprecated @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param bd_id - the bridge domain to create @@ -319,6 +320,7 @@ autoreply define bridge_domain_set_learn_limit */ autoreply define bridge_domain_add_del { + option deprecated; u32 client_index; u32 context; u32 bd_id; @@ -333,6 +335,49 @@ autoreply define bridge_domain_add_del bool is_add [default=true]; }; +/** \brief L2 bridge domain add delete request version 2 + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param bd_id - if the id == ~0 creates a bridge domain with an unused id + if the id != ~0 the id of the bridge domain to create/delete + @param flood - enable/disable bcast/mcast flooding in the bd + @param uu_flood - enable/disable unknown unicast flood in the bd + @param forward - enable/disable forwarding on all interfaces in the bd + @param learn - enable/disable learning on all interfaces in the bd + @param arp_term - enable/disable arp termination in the bd + @param arp_ufwd - enable/disable arp unicast forwarding in the bd + @param mac_age - mac aging time in min, 0 for disabled + @param is_add - add or delete flag +*/ +define bridge_domain_add_del_v2 +{ + u32 client_index; + u32 context; + u32 bd_id; + bool flood; + bool uu_flood; + bool forward; + bool learn; + bool arp_term; + bool arp_ufwd; + u8 mac_age; + string bd_tag[64]; + bool is_add [default=true]; +}; + +/** \brief L2 bridge domain add delete version 2 response + @param context - sender context, to match reply w/ request + @param retval - return code for the set bridge flags request + @param resulting_id - the id for the new bridge domain +*/ +define bridge_domain_add_del_v2_reply +{ + u32 context; + i32 retval; + u32 bd_id; +}; + + /** \brief L2 bridge domain request operational state details @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request diff --git a/src/vnet/l2/l2_api.c b/src/vnet/l2/l2_api.c index c555a17d5ea..035542d298d 100644 --- a/src/vnet/l2/l2_api.c +++ b/src/vnet/l2/l2_api.c @@ -3,6 +3,7 @@ * l2_api.c - layer 2 forwarding api * * Copyright (c) 2016 Cisco and/or its affiliates. + * Copyright (c) 2022 Nordix Foundation. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: @@ -67,7 +68,6 @@ vl_api_l2_xconnect_dump_t_handler (vl_api_l2_xconnect_dump_t * mp) if (!reg) return; - /* *INDENT-OFF* */ vec_foreach_index (sw_if_index, l2im->configs) { config = vec_elt_at_index (l2im->configs, sw_if_index); @@ -75,7 +75,6 @@ vl_api_l2_xconnect_dump_t_handler (vl_api_l2_xconnect_dump_t * mp) send_l2_xconnect_details (reg, mp->context, sw_if_index, config->output_sw_if_index); } - /* *INDENT-ON* */ } static void @@ -413,12 +412,10 @@ vl_api_l2_flags_t_handler (vl_api_l2_flags_t * mp) BAD_SW_IF_INDEX_LABEL; - /* *INDENT-OFF* */ REPLY_MACRO2(VL_API_L2_FLAGS_REPLY, ({ rmp->resulting_feature_bitmap = ntohl(rbm); })); - /* *INDENT-ON* */ } static void @@ -511,6 +508,37 @@ vl_api_bridge_domain_add_del_t_handler (vl_api_bridge_domain_add_del_t * mp) } static void +vl_api_bridge_domain_add_del_v2_t_handler ( + vl_api_bridge_domain_add_del_v2_t *mp) +{ + vl_api_bridge_domain_add_del_v2_reply_t *rmp; + u32 bd_id = ntohl (mp->bd_id); + int rv = 0; + + if ((~0 == bd_id) && (mp->is_add)) + bd_id = bd_get_unused_id (); + + if ((~0 == bd_id) && (mp->is_add)) + rv = VNET_API_ERROR_EAGAIN; + else + { + l2_bridge_domain_add_del_args_t a = { .is_add = mp->is_add, + .flood = mp->flood, + .uu_flood = mp->uu_flood, + .forward = mp->forward, + .learn = mp->learn, + .arp_term = mp->arp_term, + .arp_ufwd = mp->arp_ufwd, + .mac_age = mp->mac_age, + .bd_id = bd_id, + .bd_tag = mp->bd_tag }; + rv = bd_add_del (&a); + } + REPLY_MACRO2 (VL_API_BRIDGE_DOMAIN_ADD_DEL_V2_REPLY, + ({ rmp->bd_id = htonl (bd_id); })); +} + +static void send_bridge_domain_details (l2input_main_t * l2im, vl_api_registration_t * reg, l2_bridge_domain_t * bd_config, @@ -651,12 +679,10 @@ vl_api_bridge_flags_t_handler (vl_api_bridge_flags_t * mp) bitmap = bd_set_flags (vm, bd_index, flags, mp->is_set); out: - /* *INDENT-OFF* */ REPLY_MACRO2(VL_API_BRIDGE_FLAGS_REPLY, ({ rmp->resulting_feature_bitmap = ntohl(bitmap); })); - /* *INDENT-ON* */ } static void @@ -918,7 +944,6 @@ vl_api_bd_ip_mac_dump_t_handler (vl_api_bd_ip_mac_dump_t * mp) u64 mac64; bd_id = bd_config->bd_id; - /* *INDENT-OFF* */ hash_foreach (ip4_addr.as_u32, mac64, bd_config->mac_by_ip4, ({ ip46_address_t ip = { @@ -940,7 +965,6 @@ vl_api_bd_ip_mac_dump_t_handler (vl_api_bd_ip_mac_dump_t * mp) send_bd_ip_mac_entry (am, reg, bd_id, &ip, IP46_TYPE_IP6, &mac, mp->context); })); - /* *INDENT-ON* */ } } } @@ -1094,12 +1118,10 @@ vl_api_bvi_create_t_handler (vl_api_bvi_create_t * mp) rv = l2_bvi_create (ntohl (mp->user_instance), &mac, &sw_if_index); - /* *INDENT-OFF* */ REPLY_MACRO2(VL_API_BVI_CREATE_REPLY, ({ rmp->sw_if_index = ntohl (sw_if_index); })); - /* *INDENT-ON* */ } static void @@ -1193,13 +1215,11 @@ l2_arp_term_process (vlib_main_t * vm, vlib_node_runtime_t * rt, return 0; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (l2_arp_term_process_node) = { .function = l2_arp_term_process, .type = VLIB_NODE_TYPE_PROCESS, .name = "l2-arp-term-publisher", }; -/* *INDENT-ON* */ static void vl_api_want_l2_arp_term_events_t_handler (vl_api_want_l2_arp_term_events_t * @@ -1280,14 +1300,15 @@ l2_api_hookup (vlib_main_t * vm) { api_main_t *am = vlibapi_get_main (); - /* Mark VL_API_BRIDGE_DOMAIN_DUMP as mp safe */ - vl_api_set_msg_thread_safe (am, VL_API_BRIDGE_DOMAIN_DUMP, 1); - /* * Set up the (msg_name, crc, message-id) table */ REPLY_MSG_ID_BASE = setup_message_id_table (); + /* Mark VL_API_BRIDGE_DOMAIN_DUMP as mp safe */ + vl_api_set_msg_thread_safe ( + am, REPLY_MSG_ID_BASE + VL_API_BRIDGE_DOMAIN_DUMP, 1); + return 0; } diff --git a/src/vnet/l2/l2_arp_term.c b/src/vnet/l2/l2_arp_term.c index 594ee8e3622..eed9b7af7c3 100644 --- a/src/vnet/l2/l2_arp_term.c +++ b/src/vnet/l2/l2_arp_term.c @@ -449,7 +449,6 @@ arp_term_l2bd (vlib_main_t * vm, return frame->n_vectors; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (arp_term_l2bd_node, static) = { .function = arp_term_l2bd, .name = "arp-term-l2bd", @@ -464,7 +463,6 @@ VLIB_REGISTER_NODE (arp_term_l2bd_node, static) = { .format_buffer = format_ethernet_arp_header, .format_trace = format_arp_term_input_trace, }; -/* *INDENT-ON* */ clib_error_t * arp_term_init (vlib_main_t * vm) diff --git a/src/vnet/l2/l2_bd.c b/src/vnet/l2/l2_bd.c index 7e6ea60b440..c7392c03b58 100644 --- a/src/vnet/l2/l2_bd.c +++ b/src/vnet/l2/l2_bd.c @@ -102,12 +102,10 @@ bd_free_ip_mac_tables (l2_bridge_domain_t * bd) ip6_address_t *ip6_addr_key; hash_free (bd->mac_by_ip4); - /* *INDENT-OFF* */ hash_foreach_mem (ip6_addr_key, mac_addr, bd->mac_by_ip6, ({ clib_mem_free (ip6_addr_key); /* free memory used for ip6 addr key */ })); - /* *INDENT-ON* */ hash_free (bd->mac_by_ip6); } @@ -454,13 +452,11 @@ done: * Example of how to disable learning (where 200 is the bridge-domain-id): * @cliexcmd{set bridge-domain learn 200 disable} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (bd_learn_cli, static) = { .path = "set bridge-domain learn", .short_help = "set bridge-domain learn <bridge-domain-id> [disable]", .function = bd_learn, }; -/* *INDENT-ON* */ static clib_error_t * bd_default_learn_limit (vlib_main_t *vm, unformat_input_t *input, @@ -547,13 +543,11 @@ done: * Example of how to disable forwarding (where 200 is the bridge-domain-id): * @cliexcmd{set bridge-domain forward 200 disable} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (bd_fwd_cli, static) = { .path = "set bridge-domain forward", .short_help = "set bridge-domain forward <bridge-domain-id> [disable]", .function = bd_fwd, }; -/* *INDENT-ON* */ /** Set bridge-domain flood enable/disable. @@ -612,13 +606,11 @@ done: * Example of how to disable flooding (where 200 is the bridge-domain-id): * @cliexcmd{set bridge-domain flood 200 disable} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (bd_flood_cli, static) = { .path = "set bridge-domain flood", .short_help = "set bridge-domain flood <bridge-domain-id> [disable]", .function = bd_flood, }; -/* *INDENT-ON* */ /** Set bridge-domain unknown-unicast flood enable/disable. @@ -677,13 +669,11 @@ done: * Example of how to disable unknown-unicast flooding (where 200 is the bridge-domain-id): * @cliexcmd{set bridge-domain uu-flood 200 disable} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (bd_uu_flood_cli, static) = { .path = "set bridge-domain uu-flood", .short_help = "set bridge-domain uu-flood <bridge-domain-id> [disable]", .function = bd_uu_flood, }; -/* *INDENT-ON* */ /** Set bridge-domain arp-unicast forward enable/disable. @@ -742,13 +732,11 @@ done: * Example of how to disable arp-unicast forwarding (where 200 is the bridge-domain-id): * @cliexcmd{set bridge-domain arp-ufwd 200 disable} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (bd_arp_ufwd_cli, static) = { .path = "set bridge-domain arp-ufwd", .short_help = "set bridge-domain arp-ufwd <bridge-domain-id> [disable]", .function = bd_arp_ufwd, }; -/* *INDENT-ON* */ /** Set bridge-domain arp term enable/disable. @@ -854,13 +842,11 @@ done: * Example of how to disable mac aging (where 200 is the bridge-domain-id): * @cliexcmd{set bridge-domain flood 200 0} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (bd_mac_age_cli, static) = { .path = "set bridge-domain mac-age", .short_help = "set bridge-domain mac-age <bridge-domain-id> <mins>", .function = bd_mac_age, }; -/* *INDENT-ON* */ static clib_error_t * bd_learn_limit (vlib_main_t *vm, unformat_input_t *input, @@ -921,13 +907,11 @@ VLIB_CLI_COMMAND (bd_learn_limit_cli, static) = { * Example of how to disable ARP termination (where 200 is the bridge-domain-id): * @cliexcmd{set bridge-domain arp term 200 disable} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (bd_arp_term_cli, static) = { .path = "set bridge-domain arp term", .short_help = "set bridge-domain arp term <bridge-domain-id> [disable]", .function = bd_arp_term, }; -/* *INDENT-ON* */ /** @@ -1119,13 +1103,11 @@ done: * Example of how to delete an ARP entry (where 200 is the bridge-domain-id): * @cliexcmd{set bridge-domain arp entry 200 192.168.72.45 52:54:00:3b:83:1a del} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (bd_arp_entry_cli, static) = { .path = "set bridge-domain arp entry", .short_help = "set bridge-domain arp entry <bridge-domain-id> [<ip-addr> <mac-addr> [del] | del-all]", .function = bd_arp_entry, }; -/* *INDENT-ON* */ static u8 * format_uu_cfg (u8 * s, va_list * args) @@ -1289,7 +1271,6 @@ bd_show (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) vlib_cli_output (vm, "\n IP4/IP6 to MAC table for ARP Termination"); - /* *INDENT-OFF* */ hash_foreach (ip4_addr, mac_addr, bd_config->mac_by_ip4, ({ vlib_cli_output (vm, "%=40U => %=20U", @@ -1303,7 +1284,6 @@ bd_show (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) format_ip6_address, ip6_addr, format_ethernet_address, &mac_addr); })); - /* *INDENT-ON* */ } if ((detail || bd_tag) && (bd_config->bd_tag)) @@ -1349,13 +1329,11 @@ done: * @cliexend * @endparblock ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (bd_show_cli, static) = { .path = "show bridge-domain", .short_help = "show bridge-domain [bridge-domain-id [detail|int|arp|bd-tag]]", .function = bd_show, }; -/* *INDENT-ON* */ int bd_add_del (l2_bridge_domain_add_del_args_t * a) @@ -1493,8 +1471,15 @@ bd_add_del_command_fn (vlib_main_t * vm, unformat_input_t * input, if (bd_id == ~0) { - error = clib_error_return (0, "bridge-domain-id not specified"); - goto done; + if (is_add) + { + bd_id = bd_get_unused_id (); + } + else + { + error = clib_error_return (0, "bridge-domain-id not specified"); + goto done; + } } if (bd_id == 0) @@ -1587,7 +1572,6 @@ done: * @endparblock ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (bd_create_cli, static) = { .path = "create bridge-domain", .short_help = "create bridge-domain <bridge-domain-id>" @@ -1595,9 +1579,38 @@ VLIB_CLI_COMMAND (bd_create_cli, static) = { " [arp-ufwd <0|1>] [mac-age <nn>] [bd-tag <tag>] [del]", .function = bd_add_del_command_fn, }; -/* *INDENT-ON* */ +/* + * Returns an unused bridge domain id, and ~0 if it can't find one. + */ +u32 +bd_get_unused_id (void) +{ + bd_main_t *bdm = &bd_main; + int i, j; + static u32 seed = 0; + /* limit to 1M tries */ + for (j = 0; j < 1 << 10; j++) + { + seed = random_u32 (&seed); + for (i = 0; i < 1 << 10; i++) + { + /* + * iterate seed+0, seed+1, seed-1, seed+2, seed-2, ... to generate id + */ + seed += (2 * (i % 2) - 1) * i; + /* bd_id must be (1 <= bd_id <= L2_BD_ID_MAX) */ + seed &= L2_BD_ID_MAX; + if (seed == 0) + continue; + if (bd_find_index (bdm, seed) == ~0) + return seed; + } + } + + return ~0; +} /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/l2/l2_bd.h b/src/vnet/l2/l2_bd.h index 0d77292519d..082d210b972 100644 --- a/src/vnet/l2/l2_bd.h +++ b/src/vnet/l2/l2_bd.h @@ -2,6 +2,7 @@ * l2_bd.h : layer 2 bridge domain * * Copyright (c) 2013 Cisco and/or its affiliates. + * Copyright (c) 2022 Nordix Foundation. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: @@ -166,7 +167,7 @@ u32 bd_set_flags (vlib_main_t * vm, u32 bd_index, bd_flags_t flags, void bd_set_mac_age (vlib_main_t * vm, u32 bd_index, u8 age); void bd_set_learn_limit (vlib_main_t *vm, u32 bd_index, u32 learn_limit); int bd_add_del (l2_bridge_domain_add_del_args_t * args); - +u32 bd_get_unused_id (void); /** * \brief Get a bridge domain. * diff --git a/src/vnet/l2/l2_bvi.c b/src/vnet/l2/l2_bvi.c index 9cfff55fb45..e39c4aae39d 100644 --- a/src/vnet/l2/l2_bvi.c +++ b/src/vnet/l2/l2_bvi.c @@ -58,14 +58,12 @@ bvi_mac_change (vnet_hw_interface_t * hi, return (NULL); } -/* *INDENT-OFF* */ VNET_DEVICE_CLASS (bvi_device_class) = { .name = "BVI", .format_device_name = format_bvi_name, .admin_up_down_function = bvi_admin_up_down, .mac_addr_change_function = bvi_mac_change, }; -/* *INDENT-ON* */ /* * Maintain a bitmap of allocated bvi instance numbers. @@ -273,13 +271,11 @@ l2_bvi_create_cli (vlib_main_t * vm, * Example of how to create a bvi interface: * @cliexcmd{bvi create} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (l2_bvi_create_command, static) = { .path = "bvi create", .short_help = "bvi create [mac <mac-addr>] [instance <instance>]", .function = l2_bvi_create_cli, }; -/* *INDENT-ON* */ static clib_error_t * l2_bvi_delete_cli (vlib_main_t * vm, @@ -324,13 +320,11 @@ l2_bvi_delete_cli (vlib_main_t * vm, * Example of how to create a bvi interface: * @cliexcmd{bvi delete bvi0} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (l2_bvi_delete_command, static) = { .path = "bvi delete", .short_help = "bvi delete <interface>", .function = l2_bvi_delete_cli, }; -/* *INDENT-ON* */ /* diff --git a/src/vnet/l2/l2_classify.h b/src/vnet/l2/l2_classify.h index 68a2bb98e64..3c86fb5ca86 100644 --- a/src/vnet/l2/l2_classify.h +++ b/src/vnet/l2/l2_classify.h @@ -39,7 +39,6 @@ typedef enum L2_INPUT_CLASSIFY_NEXT_ETHERNET_INPUT, L2_INPUT_CLASSIFY_NEXT_IP4_INPUT, L2_INPUT_CLASSIFY_NEXT_IP6_INPUT, - L2_INPUT_CLASSIFY_NEXT_LI, L2_INPUT_CLASSIFY_N_NEXT, } l2_input_classify_next_t; diff --git a/src/vnet/l2/l2_efp_filter.c b/src/vnet/l2/l2_efp_filter.c index ad325b83df2..47256ffa5d3 100644 --- a/src/vnet/l2/l2_efp_filter.c +++ b/src/vnet/l2/l2_efp_filter.c @@ -461,7 +461,6 @@ VLIB_NODE_FN (l2_efp_filter_node) (vlib_main_t * vm, } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (l2_efp_filter_node) = { .name = "l2-efp-filter", .vector_size = sizeof (u32), @@ -478,7 +477,6 @@ VLIB_REGISTER_NODE (l2_efp_filter_node) = { [L2_EFP_FILTER_NEXT_DROP] = "error-drop", }, }; -/* *INDENT-ON* */ #ifndef CLIB_MARCH_VARIANT clib_error_t * @@ -559,13 +557,11 @@ done: * Example of how to disable a Layer 2 efp-filter on a sub-interface: * @cliexcmd{set interface l2 efp-filter GigabitEthernet0/8/0.200 disable} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (int_l2_efp_filter_cli, static) = { .path = "set interface l2 efp-filter", .short_help = "set interface l2 efp-filter <interface> [disable]", .function = int_l2_efp_filter, }; -/* *INDENT-ON* */ #endif /* CLIB_MARCH_VARIANT */ diff --git a/src/vnet/l2/l2_fib.c b/src/vnet/l2/l2_fib.c index d9d6710fd15..3dcd1e7ae26 100644 --- a/src/vnet/l2/l2_fib.c +++ b/src/vnet/l2/l2_fib.c @@ -95,8 +95,7 @@ format_vnet_sw_if_index_name_with_NA (u8 * s, va_list * args) if (!swif) return format (s, "Stale"); - return format (s, "%U", format_vnet_sw_interface_name, vnm, - vnet_get_sw_interface_or_null (vnm, sw_if_index)); + return format (s, "%U", format_vnet_sw_if_index_name, vnm, sw_if_index); } typedef struct l2fib_dump_walk_ctx_t_ @@ -353,13 +352,11 @@ show_l2fib (vlib_main_t * vm, * 3 l2fib entries * @cliexend ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_l2fib_cli, static) = { .path = "show l2fib", .short_help = "show l2fib [all] | [bd_id <nn> | bd_index <nn>] [learn | add] | [raw]", .function = show_l2fib, }; -/* *INDENT-ON* */ void l2fib_table_init (void) @@ -416,13 +413,11 @@ clear_l2fib (vlib_main_t * vm, * no l2fib entries * @cliexend ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (clear_l2fib_cli, static) = { .path = "clear l2fib", .short_help = "clear l2fib", .function = clear_l2fib, }; -/* *INDENT-ON* */ static l2fib_seq_num_t l2fib_cur_seq_num (u32 bd_index, u32 sw_if_index) @@ -593,13 +588,11 @@ done: * 3 l2fib entries * @cliexend ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (l2fib_add_cli, static) = { .path = "l2fib add", .short_help = "l2fib add <mac> <bridge-domain-id> filter | <intf> [static | bvi]", .function = l2fib_add, }; -/* *INDENT-ON* */ static clib_error_t * @@ -724,13 +717,11 @@ l2fib_test_command_fn (vlib_main_t * vm, * @cliexcmd{test l2fib del mac 52:54:00:53:00:00 count 4} * @endparblock ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (l2fib_test_command, static) = { .path = "test l2fib", .short_help = "test l2fib [add|del|check] mac <base-addr> count <nn>", .function = l2fib_test_command_fn, }; -/* *INDENT-ON* */ /** @@ -833,13 +824,11 @@ done: * Example of how to delete a MAC Address entry from the L2 FIB table of a bridge-domain (where 200 is the bridge-domain-id): * @cliexcmd{l2fib del 52:54:00:53:18:33 200} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (l2fib_del_cli, static) = { .path = "l2fib del", .short_help = "l2fib del <mac> <bridge-domain-id> []", .function = l2fib_del, }; -/* *INDENT-ON* */ static clib_error_t * l2fib_set_scan_delay (vlib_main_t *vm, unformat_input_t *input, @@ -977,13 +966,11 @@ l2fib_flush_mac_all (vlib_main_t * vm, * Example of how to flush MAC Address entries learned on an interface from the L2 FIB table: * @cliexcmd{l2fib flush-mac interface GigabitEthernet2/1/0} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (l2fib_flush_mac_all_cli, static) = { .path = "l2fib flush-mac all", .short_help = "l2fib flush-mac all", .function = l2fib_flush_mac_all, }; -/* *INDENT-ON* */ /*? * This command kick off ager to delete all existing MAC Address entries, @@ -993,13 +980,11 @@ VLIB_CLI_COMMAND (l2fib_flush_mac_all_cli, static) = { * Example of how to flush MAC Address entries learned on an interface from the L2 FIB table: * @cliexcmd{l2fib flush-mac interface GigabitEthernet2/1/0} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (l2fib_flush_mac_int_cli, static) = { .path = "l2fib flush-mac interface", .short_help = "l2fib flush-mac interface <if-name>", .function = l2fib_flush_mac_int, }; -/* *INDENT-ON* */ /** Flush bridge-domain MACs except static ones. @@ -1042,13 +1027,11 @@ done: * Example of how to flush MAC Address entries learned in a bridge domain from the L2 FIB table: * @cliexcmd{l2fib flush-mac bridge-domain 1000} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (l2fib_flush_mac_bd_cli, static) = { .path = "l2fib flush-mac bridge-domain", .short_help = "l2fib flush-mac bridge-domain <bd-id>", .function = l2fib_flush_mac_bd, }; -/* *INDENT-ON* */ clib_error_t * l2fib_sw_interface_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags) @@ -1149,7 +1132,7 @@ l2fib_scan (vlib_main_t * vm, f64 start_time, u8 event_only) { for (k = 0; k < BIHASH_KVP_PER_PAGE; k++) { - if (v->kvp[k].key == ~0ULL && v->kvp[k].value == ~0ULL) + if (BV (clib_bihash_is_free) (&v->kvp[k])) continue; l2fib_entry_key_t key = {.raw = v->kvp[k].key }; @@ -1366,13 +1349,11 @@ l2fib_mac_age_scanner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, return 0; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (l2fib_mac_age_scanner_process_node) = { .function = l2fib_mac_age_scanner_process, .type = VLIB_NODE_TYPE_PROCESS, .name = "l2fib-mac-age-scanner-process", }; -/* *INDENT-ON* */ clib_error_t * l2fib_init (vlib_main_t * vm) diff --git a/src/vnet/l2/l2_flood.c b/src/vnet/l2/l2_flood.c index c0d7bf8dfab..f8cb3cb5687 100644 --- a/src/vnet/l2/l2_flood.c +++ b/src/vnet/l2/l2_flood.c @@ -362,7 +362,6 @@ VLIB_NODE_FN (l2flood_node) (vlib_main_t * vm, } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (l2flood_node) = { .name = "l2-flood", .vector_size = sizeof (u32), @@ -380,7 +379,6 @@ VLIB_REGISTER_NODE (l2flood_node) = { [L2FLOOD_NEXT_DROP] = "error-drop", }, }; -/* *INDENT-ON* */ #ifndef CLIB_MARCH_VARIANT clib_error_t * @@ -468,13 +466,11 @@ done: * Example of how to disable flooding: * @cliexcmd{set interface l2 flood GigabitEthernet0/8/0 disable} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (int_flood_cli, static) = { .path = "set interface l2 flood", .short_help = "set interface l2 flood <interface> [disable]", .function = int_flood, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/l2/l2_fwd.c b/src/vnet/l2/l2_fwd.c index 1ee3a534cd7..503dfc27957 100644 --- a/src/vnet/l2/l2_fwd.c +++ b/src/vnet/l2/l2_fwd.c @@ -288,7 +288,6 @@ l2fwd_node_inline (vlib_main_t * vm, vlib_node_runtime_t * node, #ifdef COUNTERS em->counters[node_counter_base_index + L2FWD_ERROR_L2FWD] += 4; #endif - /* *INDENT-OFF* */ l2fib_lookup_4 (msm->mac_table, &cached_key, &cached_result, h0->dst_address, h1->dst_address, h2->dst_address, h3->dst_address, @@ -304,7 +303,6 @@ l2fwd_node_inline (vlib_main_t * vm, vlib_node_runtime_t * node, &result1, &result2, &result3); - /* *INDENT-ON* */ l2fwd_process (vm, node, msm, em, b[0], sw_if_index0, &result0, next); l2fwd_process (vm, node, msm, em, b[1], sw_if_index1, &result1, next + 1); @@ -414,7 +412,6 @@ VLIB_NODE_FN (l2fwd_node) (vlib_main_t * vm, return l2fwd_node_inline (vm, node, frame, 0 /* do_trace */ ); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (l2fwd_node) = { .name = "l2-fwd", .vector_size = sizeof (u32), @@ -432,7 +429,6 @@ VLIB_REGISTER_NODE (l2fwd_node) = { [L2FWD_NEXT_DROP] = "error-drop", }, }; -/* *INDENT-ON* */ #ifndef CLIB_MARCH_VARIANT clib_error_t * @@ -527,13 +523,11 @@ done: * Example of how to disable forwarding: * @cliexcmd{set interface l2 forward GigabitEthernet0/8/0 disable} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (int_fwd_cli, static) = { .path = "set interface l2 forward", .short_help = "set interface l2 forward <interface> [disable]", .function = int_fwd, }; -/* *INDENT-ON* */ #endif diff --git a/src/vnet/l2/l2_in_out_acl.c b/src/vnet/l2/l2_in_out_acl.c index 7307a6802a2..2e2cb1e7f36 100644 --- a/src/vnet/l2/l2_in_out_acl.c +++ b/src/vnet/l2/l2_in_out_acl.c @@ -464,7 +464,6 @@ VLIB_NODE_FN (l2_outacl_node) (vlib_main_t * vm, IN_OUT_ACL_OUTPUT_TABLE_GROUP); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (l2_inacl_node) = { .name = "l2-input-acl", .vector_size = sizeof (u32), @@ -498,7 +497,6 @@ VLIB_REGISTER_NODE (l2_outacl_node) = { [ACL_NEXT_INDEX_DENY] = "error-drop", }, }; -/* *INDENT-ON* */ #ifndef CLIB_MARCH_VARIANT diff --git a/src/vnet/l2/l2_in_out_feat_arc.c b/src/vnet/l2/l2_in_out_feat_arc.c index 94c4c1bb713..26fbd3eb776 100644 --- a/src/vnet/l2/l2_in_out_feat_arc.c +++ b/src/vnet/l2/l2_in_out_feat_arc.c @@ -396,7 +396,6 @@ vnet_l2_in_out_feat_arc_enable_disable (u32 sw_if_index, int is_output, } #endif /* CLIB_MARCH_VARIANT */ -/* *INDENT-OFF* */ VNET_FEATURE_ARC_INIT (l2_in_ip4_arc, static) = { .arc_name = "l2-input-ip4", @@ -438,10 +437,8 @@ VNET_FEATURE_ARC_INIT (l2_in_nonip_arc, static) = }; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (l2_in_feat_arc_node) = { .name = "l2-input-feat-arc", .vector_size = sizeof (u32), @@ -521,7 +518,6 @@ VNET_FEATURE_INIT (l2_out_nonip_arc_end, static) = .node_name = "l2-output-feat-arc-end", .runs_before = 0, /* not before any other features */ }; -/* *INDENT-ON* */ #ifndef CLIB_MARCH_VARIANT diff --git a/src/vnet/l2/l2_input.c b/src/vnet/l2/l2_input.c index b09555aa6ed..23bd5cc9958 100644 --- a/src/vnet/l2/l2_input.c +++ b/src/vnet/l2/l2_input.c @@ -646,13 +646,11 @@ done: * Example of how to remove an interface from a Layer2 bridge-domain: * @cliexcmd{set interface l3 GigabitEthernet0/a/0.200} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (int_l2_bridge_cli, static) = { .path = "set interface l2 bridge", .short_help = "set interface l2 bridge <interface> <bridge-domain-id> [bvi|uu-fwd] [shg]", .function = int_l2_bridge, }; -/* *INDENT-ON* */ /** * Set subinterface in xconnect mode with another interface. @@ -712,13 +710,11 @@ done: * @cliexcmd{set interface l3 GigabitEthernet0/8/0.300} * @cliexcmd{set interface l3 GigabitEthernet0/9/0.300} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (int_l2_xc_cli, static) = { .path = "set interface l2 xconnect", .short_help = "set interface l2 xconnect <interface> <peer interface>", .function = int_l2_xc, }; -/* *INDENT-ON* */ /** * Set subinterface in L3 mode. @@ -762,13 +758,11 @@ done: * Example of how to set the mode of an interface to Layer 3: * @cliexcmd{set interface l3 GigabitEthernet0/8/0.200} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (int_l3_cli, static) = { .path = "set interface l3", .short_help = "set interface l3 <interface>", .function = int_l3, }; -/* *INDENT-ON* */ /** * Show interface mode. @@ -810,9 +804,7 @@ show_int_mode (vlib_main_t * vm, /* Gather interfaces. */ sis = vec_new (vnet_sw_interface_t, pool_elts (im->sw_interfaces)); vec_set_len (sis, 0); - /* *INDENT-OFF* */ pool_foreach (si, im->sw_interfaces) { vec_add1 (sis, si[0]); } - /* *INDENT-ON* */ } vec_foreach (si, sis) @@ -878,13 +870,11 @@ done: * l2 bridge GigabitEthernet0/8/0.200 bd_id 200 shg 0 * @cliexend ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_l2_mode, static) = { .path = "show mode", .short_help = "show mode [<if-name1> <if-name2> ...]", .function = show_int_mode, }; -/* *INDENT-ON* */ #define foreach_l2_init_function \ _(feat_bitmap_drop_init) \ diff --git a/src/vnet/l2/l2_input.h b/src/vnet/l2/l2_input.h index 7d1dc9c1d05..3de1537b45e 100644 --- a/src/vnet/l2/l2_input.h +++ b/src/vnet/l2/l2_input.h @@ -27,6 +27,7 @@ #include <vnet/ethernet/packet.h> #include <vnet/ip/ip4_inlines.h> #include <vnet/ip/ip6_inlines.h> +#include <vnet/mpls/mpls_lookup.h> /* l2 connection type */ typedef enum l2_input_flags_t_ @@ -327,7 +328,7 @@ vnet_update_l2_len (vlib_buffer_t *b) /* * Compute flow hash of an ethernet packet, use 5-tuple hash if L3 packet - * is ip4 or ip6. Otherwise hash on smac/dmac/etype. + * is ip4, ip6, or mpls. Otherwise hash on smac/dmac/etype. * The vlib buffer current pointer is expected to be at ethernet header * and vnet l2.l2_len is expected to be setup already. */ @@ -342,6 +343,9 @@ vnet_l2_compute_flow_hash (vlib_buffer_t * b) return ip4_compute_flow_hash ((ip4_header_t *) l3h, IP_FLOW_HASH_DEFAULT); else if (ethertype == ETHERNET_TYPE_IP6) return ip6_compute_flow_hash ((ip6_header_t *) l3h, IP_FLOW_HASH_DEFAULT); + else if (ethertype == ETHERNET_TYPE_MPLS) + return mpls_compute_flow_hash ((mpls_unicast_header_t *) l3h, + IP_FLOW_HASH_DEFAULT); else { u32 a, b, c; diff --git a/src/vnet/l2/l2_input_classify.c b/src/vnet/l2/l2_input_classify.c index d33a0810d28..cc031bd46a5 100644 --- a/src/vnet/l2/l2_input_classify.c +++ b/src/vnet/l2/l2_input_classify.c @@ -442,7 +442,6 @@ VLIB_NODE_FN (l2_input_classify_node) (vlib_main_t * vm, return frame->n_vectors; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (l2_input_classify_node) = { .name = "l2-input-classify", .vector_size = sizeof (u32), @@ -462,10 +461,8 @@ VLIB_REGISTER_NODE (l2_input_classify_node) = { [L2_INPUT_CLASSIFY_NEXT_ETHERNET_INPUT] = "ethernet-input-not-l2", [L2_INPUT_CLASSIFY_NEXT_IP4_INPUT] = "ip4-input", [L2_INPUT_CLASSIFY_NEXT_IP6_INPUT] = "ip6-input", - [L2_INPUT_CLASSIFY_NEXT_LI] = "li-hit", }, }; -/* *INDENT-ON* */ #ifndef CLIB_MARCH_VARIANT /** l2 input classsifier feature initialization. */ @@ -642,7 +639,6 @@ int_l2_input_classify_command_fn (vlib_main_t * vm, * @todo This is incomplete. This needs a detailed description and a * practical example. ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (int_l2_input_classify_cli, static) = { .path = "set interface l2 input classify", .short_help = @@ -650,7 +646,6 @@ VLIB_CLI_COMMAND (int_l2_input_classify_cli, static) = { " [ip6-table <n>] [other-table <n>]", .function = int_l2_input_classify_command_fn, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/l2/l2_input_node.c b/src/vnet/l2/l2_input_node.c index f8dfa3641b3..76b94809eb3 100644 --- a/src/vnet/l2/l2_input_node.c +++ b/src/vnet/l2/l2_input_node.c @@ -251,11 +251,11 @@ l2input_node_inline (vlib_main_t * vm, /* Prefetch next iteration. */ { - /* Prefetch the buffer header and packet for the N+2 loop iteration */ - clib_prefetch_store (b + 4); - clib_prefetch_store (b + 5); - clib_prefetch_store (b + 6); - clib_prefetch_store (b + 7); + /* Prefetch the buffer header for the N+2 loop iteration */ + clib_prefetch_store (b[4]); + clib_prefetch_store (b[5]); + clib_prefetch_store (b[6]); + clib_prefetch_store (b[7]); clib_prefetch_store (b[4]->data); clib_prefetch_store (b[5]->data); @@ -365,7 +365,6 @@ VLIB_NODE_FN (l2input_node) (vlib_main_t * vm, return l2input_node_inline (vm, node, frame, 0 /* do_trace */ ); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (l2input_node) = { .name = "l2-input", .vector_size = sizeof (u32), @@ -385,7 +384,6 @@ VLIB_REGISTER_NODE (l2input_node) = { [L2INPUT_NEXT_DROP] = "error-drop", }, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/l2/l2_input_vtr.c b/src/vnet/l2/l2_input_vtr.c index 3c1235bfa32..ccf3efa2390 100644 --- a/src/vnet/l2/l2_input_vtr.c +++ b/src/vnet/l2/l2_input_vtr.c @@ -319,7 +319,6 @@ VLIB_NODE_FN (l2_invtr_node) (vlib_main_t * vm, } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (l2_invtr_node) = { .name = "l2-input-vtr", .vector_size = sizeof (u32), @@ -336,7 +335,6 @@ VLIB_REGISTER_NODE (l2_invtr_node) = { [L2_INVTR_NEXT_DROP] = "error-drop", }, }; -/* *INDENT-ON* */ #ifndef CLIB_MARCH_VARIANT clib_error_t * diff --git a/src/vnet/l2/l2_learn.c b/src/vnet/l2/l2_learn.c index 6d90cee62a7..24b5389e55a 100644 --- a/src/vnet/l2/l2_learn.c +++ b/src/vnet/l2/l2_learn.c @@ -439,7 +439,6 @@ VLIB_NODE_FN (l2learn_node) (vlib_main_t * vm, return l2learn_node_inline (vm, node, frame, 0 /* do_trace */ ); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (l2learn_node) = { .name = "l2-learn", .vector_size = sizeof (u32), @@ -457,7 +456,6 @@ VLIB_REGISTER_NODE (l2learn_node) = { [L2LEARN_NEXT_L2FWD] = "l2-fwd", }, }; -/* *INDENT-ON* */ #ifndef CLIB_MARCH_VARIANT clib_error_t * @@ -540,13 +538,11 @@ done: * Example of how to disable learning: * @cliexcmd{set interface l2 learn GigabitEthernet0/8/0 disable} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (int_learn_cli, static) = { .path = "set interface l2 learn", .short_help = "set interface l2 learn <interface> [disable]", .function = int_learn, }; -/* *INDENT-ON* */ static clib_error_t * diff --git a/src/vnet/l2/l2_output.c b/src/vnet/l2/l2_output.c index 74ca868e535..7c70cf9f4c7 100644 --- a/src/vnet/l2/l2_output.c +++ b/src/vnet/l2/l2_output.c @@ -443,7 +443,6 @@ VLIB_NODE_FN (l2output_node) (vlib_main_t * vm, return frame->n_vectors; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (l2output_node) = { .name = "l2-output", .vector_size = sizeof (u32), @@ -461,7 +460,6 @@ VLIB_REGISTER_NODE (l2output_node) = { [L2OUTPUT_NEXT_BAD_INTF] = "l2-output-bad-intf", }, }; -/* *INDENT-ON* */ #define foreach_l2output_bad_intf_error \ @@ -549,7 +547,6 @@ VLIB_NODE_FN (l2output_bad_intf_node) (vlib_main_t * vm, return frame->n_vectors; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (l2output_bad_intf_node) = { .name = "l2-output-bad-intf", .vector_size = sizeof (u32), @@ -565,7 +562,6 @@ VLIB_REGISTER_NODE (l2output_bad_intf_node) = { [0] = "error-drop", }, }; -/* *INDENT-ON* */ static clib_error_t * l2output_init (vlib_main_t * vm) diff --git a/src/vnet/l2/l2_output_classify.c b/src/vnet/l2/l2_output_classify.c index 97beb37f351..33a7c927386 100644 --- a/src/vnet/l2/l2_output_classify.c +++ b/src/vnet/l2/l2_output_classify.c @@ -435,7 +435,6 @@ VLIB_NODE_FN (l2_output_classify_node) (vlib_main_t * vm, return frame->n_vectors; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (l2_output_classify_node) = { .name = "l2-output-classify", .vector_size = sizeof (u32), @@ -454,7 +453,6 @@ VLIB_REGISTER_NODE (l2_output_classify_node) = { [L2_OUTPUT_CLASSIFY_NEXT_DROP] = "error-drop", }, }; -/* *INDENT-ON* */ #ifndef CLIB_MARCH_VARIANT /** l2 output classsifier feature initialization. */ @@ -634,7 +632,6 @@ int_l2_output_classify_command_fn (vlib_main_t * vm, * @todo This is incomplete. This needs a detailed description and a * practical example. ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (int_l2_output_classify_cli, static) = { .path = "set interface l2 output classify", .short_help = @@ -642,7 +639,6 @@ VLIB_CLI_COMMAND (int_l2_output_classify_cli, static) = { " [ip6-table <n>] [other-table <n>]", .function = int_l2_output_classify_command_fn, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/l2/l2_patch.c b/src/vnet/l2/l2_patch.c index 6de4e50a298..f85938ed799 100644 --- a/src/vnet/l2/l2_patch.c +++ b/src/vnet/l2/l2_patch.c @@ -206,7 +206,6 @@ VLIB_NODE_FN (l2_patch_node) (vlib_main_t * vm, return frame->n_vectors; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (l2_patch_node) = { .name = "l2-patch", .vector_size = sizeof (u32), @@ -223,7 +222,6 @@ VLIB_REGISTER_NODE (l2_patch_node) = { [L2_PATCH_NEXT_DROP] = "error-drop", }, }; -/* *INDENT-ON* */ extern int vnet_l2_patch_add_del (u32 rx_sw_if_index, u32 tx_sw_if_index, int is_add); @@ -270,6 +268,8 @@ vnet_l2_patch_add_del (u32 rx_sw_if_index, u32 tx_sw_if_index, int is_add) vnet_feature_enable_disable ("device-input", "l2-patch", rxhi->sw_if_index, 1, 0, 0); + vnet_feature_enable_disable ("port-rx-eth", "l2-patch", + rxhi->sw_if_index, 1, 0, 0); } else { @@ -278,6 +278,8 @@ vnet_l2_patch_add_del (u32 rx_sw_if_index, u32 tx_sw_if_index, int is_add) vnet_feature_enable_disable ("device-input", "l2-patch", rxhi->sw_if_index, 0, 0, 0); + vnet_feature_enable_disable ("port-rx-eth", "l2-patch", + rxhi->sw_if_index, 0, 0, 0); if (vec_len (l2pm->tx_next_by_rx_sw_if_index) > rx_sw_if_index) { l2pm->tx_next_by_rx_sw_if_index[rx_sw_if_index] = ~0; @@ -369,13 +371,11 @@ done: * @todo This is incomplete. This needs a detailed description and a * practical example. ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (test_patch_command, static) = { .path = "test l2patch", .short_help = "test l2patch rx <intfc> tx <intfc> [del]", .function = test_patch_command_fn, }; -/* *INDENT-ON* */ /** Display the contents of the l2patch table. */ static clib_error_t * @@ -421,13 +421,11 @@ show_l2patch (vlib_main_t * vm, * @todo This is incomplete. This needs a detailed description and a * practical example. ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_l2patch_cli, static) = { .path = "show l2patch", .short_help = "Show l2 interface cross-connect entries", .function = show_l2patch, }; -/* *INDENT-ON* */ static clib_error_t * l2_patch_init (vlib_main_t * vm) diff --git a/src/vnet/l2/l2_rw.c b/src/vnet/l2/l2_rw.c index 2c008794c1b..c0e8ec489fc 100644 --- a/src/vnet/l2/l2_rw.c +++ b/src/vnet/l2/l2_rw.c @@ -109,6 +109,7 @@ l2_rw_rewrite (l2_rw_entry_t * rwe, u8 * h) /* FALLTHROUGH */ case 1: d[0] = (d[0] & ~rwe->mask[0]) | rwe->value[0]; + rwe->hit_count++; break; default: abort (); @@ -332,6 +333,7 @@ l2_rw_mod_entry (u32 * index, return 0; } + e->hit_count = 0; e->skip_n_vectors = skip / sizeof (u32x4); skip -= e->skip_n_vectors * sizeof (u32x4); e->rewrite_n_vectors = (skip + len - 1) / sizeof (u32x4) + 1; @@ -398,17 +400,19 @@ l2_rw_entry_cli_fn (vlib_main_t * vm, * the provisioned mask and value, modifies the packet header. * * @cliexpar - * @todo This is incomplete. This needs a detailed description and a - * practical example. + * Example of how to add an l2 rewrite entry to change the destination mac of + * the packet to 00:8a:00:0d:0e:02 (where parameter mask is Ethernet header's +mask, + * parameter value is Ethernet header's value): + * @cliexcmd{l2 rewrite entry mask ffffffffffff00000000000000000000 value +008a000d0e0200000000000000000000} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (l2_rw_entry_cli, static) = { .path = "l2 rewrite entry", .short_help = "l2 rewrite entry [index <index>] [mask <hex-mask>] [value <hex-value>] [skip <n_bytes>] [del]", .function = l2_rw_entry_cli_fn, }; -/* *INDENT-ON* */ #ifndef CLIB_MARCH_VARIANT int @@ -468,21 +472,36 @@ l2_rw_interface_cli_fn (vlib_main_t * vm, } /*? - * Layer 2-Rewrite node uses classify tables to match packets. Then, using - * the provisioned mask and value, modifies the packet header. + * Apply the rule to the interface. The following example shows how to use +classify + * entry and Layer 2-Rewrite entry to modify the packet ethernet header on the + * interface. * * @cliexpar - * @todo This is incomplete. This needs a detailed description and a - * practical example. + * Example use the classify to filter packets that do not need to be modified +(where + * 192.168.68.34 is the destination ip of the data packet, 8080 is the +destination port + * of the packet): + * @cliexcmd{classify table mask l3 ip4 dst l4 dst_port} + * @cliexcmd{classify session acl-hit-next permit table-index 0 match l3 ip4 +dst 192.168.68.34 l4 dst_port 8080} + * + * @cliexpar + * Example apply classify and l2 rewrite rules to the interface (where +YusurK2Eth6/0/1/3 + * is interface, \"table 0\" means Table Id is 0, \"miss 0\" means the packet +that matches + * the classify. miss will be modified according to the l2 rewrite entry with +index 0): + * @cliexcmd{set interface l2 rewrite YusurK2Eth6/0/1/3 table 0 miss-index 0} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (l2_rw_interface_cli, static) = { .path = "set interface l2 rewrite", .short_help = "set interface l2 rewrite <interface> [table <table index>] [miss-index <entry-index>]", .function = l2_rw_interface_cli_fn, }; -/* *INDENT-ON* */ static clib_error_t * l2_rw_show_interfaces_cli_fn (vlib_main_t * vm, @@ -494,30 +513,27 @@ l2_rw_show_interfaces_cli_fn (vlib_main_t * vm, vlib_cli_output (vm, "No interface is currently using l2 rewrite\n"); uword i; - /* *INDENT-OFF* */ clib_bitmap_foreach (i, rw->configs_bitmap) { vlib_cli_output (vm, "sw_if_index:%d %U\n", i, format_l2_rw_config, &rw->configs[i]); } - /* *INDENT-ON* */ return 0; } /*? - * Layer 2-Rewrite node uses classify tables to match packets. Then, using - * the provisioned mask and value, modifies the packet header. + * This command displays the l2 rewrite entries of the interfaces. * * @cliexpar - * @todo This is incomplete. This needs a detailed description and a - * practical example. + * Example of how to display the l2 rewrite rules on the interface: + * @cliexstart{show l2 rewrite interfaces} + * sw_if_index:4 table-index:0 miss-index:0 + * @cliexend ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (l2_rw_show_interfaces_cli, static) = { .path = "show l2 rewrite interfaces", .short_help = "show l2 rewrite interfaces", .function = l2_rw_show_interfaces_cli_fn, }; -/* *INDENT-ON* */ static clib_error_t * l2_rw_show_entries_cli_fn (vlib_main_t * vm, @@ -528,30 +544,29 @@ l2_rw_show_entries_cli_fn (vlib_main_t * vm, if (pool_elts (rw->entries) == 0) vlib_cli_output (vm, "No entries\n"); - /* *INDENT-OFF* */ pool_foreach (e, rw->entries) { vlib_cli_output (vm, "%U\n", format_l2_rw_entry, e); } - /* *INDENT-ON* */ return 0; } /*? - * Layer 2-Rewrite node uses classify tables to match packets. Then, using - * the provisioned mask and value, modifies the packet header. + * This command displays all l2 rewrite entries. * * @cliexpar - * @todo This is incomplete. This needs a detailed description and a - * practical example. + * Example of how to display all l2 rewrite entries: + * @cliexstart{show l2 rewrite entries} + * 0 - mask:ffffffffffff00000000000000000000 +value:aabbccddeeff00000000000000000000 + * hits:0 skip_bytes:0 + * @cliexend ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (l2_rw_show_entries_cli, static) = { .path = "show l2 rewrite entries", .short_help = "show l2 rewrite entries", .function = l2_rw_show_entries_cli_fn, }; -/* *INDENT-ON* */ static int l2_rw_enable_disable (u32 bridge_domain, u8 disable) @@ -587,21 +602,22 @@ l2_rw_set_cli_fn (vlib_main_t * vm, } /*? - * Layer 2-Rewrite node uses classify tables to match packets. Then, using - * the provisioned mask and value, modifies the packet header. + * Layer 2 rewrite can be enabled and disabled on each interface and on each +bridge-domain. + * Use this command to manage l2 rewrite on bridge-domain. * * @cliexpar - * @todo This is incomplete. This needs a detailed description and a - * practical example. + * Example of how to enable rewrite (where 100 is the bridge-domain-id): + * @cliexcmd{set bridge-domain rewrite 100} + * Example of how to disable rewrite (where 100 is the bridge-domain-id): + * @cliexcmd{set bridge-domain rewrite 100 disable} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (l2_rw_set_cli, static) = { .path = "set bridge-domain rewrite", .short_help = "set bridge-domain rewrite <bridge-domain> [disable]", .function = l2_rw_set_cli_fn, }; -/* *INDENT-ON* */ static clib_error_t * l2_rw_init (vlib_main_t * vm) @@ -643,7 +659,6 @@ static char *l2_rw_error_strings[] = { #undef _ }; -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (l2_rw_node) = { .name = "l2-rw", .vector_size = sizeof (u32), @@ -655,7 +670,6 @@ VLIB_REGISTER_NODE (l2_rw_node) = { .n_next_nodes = L2_RW_N_NEXT, .next_nodes = { [L2_RW_NEXT_DROP] = "error-drop"}, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/l2/l2_rw.h b/src/vnet/l2/l2_rw.h index f9b10333f43..6d12a21fe55 100644 --- a/src/vnet/l2/l2_rw.h +++ b/src/vnet/l2/l2_rw.h @@ -27,7 +27,6 @@ #include <vnet/l2/l2_input.h> -/* *INDENT-OFF* */ typedef CLIB_PACKED(struct _l2_rw_entry { u16 skip_n_vectors; u16 rewrite_n_vectors; @@ -35,15 +34,12 @@ typedef CLIB_PACKED(struct _l2_rw_entry { u32x4 *mask; u32x4 *value; }) l2_rw_entry_t; -/* *INDENT-ON* */ /* l2_rw configuration for one interface */ -/* *INDENT-OFF* */ typedef CLIB_PACKED(struct _l2_rw_config { u32 table_index; /* Which classify table to use */ u32 miss_index; /* Rewrite entry to use if table does not match */ }) l2_rw_config_t; -/* *INDENT-ON* */ typedef struct { diff --git a/src/vnet/l2/l2_test.c b/src/vnet/l2/l2_test.c index 3be4a46223d..b78e388a9f1 100644 --- a/src/vnet/l2/l2_test.c +++ b/src/vnet/l2/l2_test.c @@ -1,5 +1,6 @@ /* SPDX-License-Identifier: Apache-2.0 * Copyright(c) 2021 Cisco Systems, Inc. + * Copyright(c) 2022 Nordix Foundation. */ #include <vat/vat.h> @@ -634,6 +635,18 @@ done: return ret; } +static int +api_bridge_domain_add_del_v2 (vat_main_t *vam) +{ + return -1; +} + +static void +vl_api_bridge_domain_add_del_v2_reply_t_handler ( + vl_api_bridge_domain_add_del_v2_reply_t *mp) +{ +} + #define foreach_pbb_vtr_op \ _ ("disable", L2_VTR_DISABLED) \ _ ("pop", L2_VTR_POP_2) \ diff --git a/src/vnet/l2/l2_uu_fwd.c b/src/vnet/l2/l2_uu_fwd.c index fb3571d159c..4a510b658d7 100644 --- a/src/vnet/l2/l2_uu_fwd.c +++ b/src/vnet/l2/l2_uu_fwd.c @@ -211,7 +211,6 @@ VLIB_NODE_FN (l2_uu_fwd_node) (vlib_main_t * vm, return frame->n_vectors; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (l2_uu_fwd_node) = { .name = "l2-uu-fwd", .vector_size = sizeof (u32), @@ -228,7 +227,6 @@ VLIB_REGISTER_NODE (l2_uu_fwd_node) = { [L2_UU_FWD_NEXT_L2_OUTPUT] = "l2-output", }, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/l2/l2_vtr.c b/src/vnet/l2/l2_vtr.c index bfd1dcb9280..4053c0fc1cb 100644 --- a/src/vnet/l2/l2_vtr.c +++ b/src/vnet/l2/l2_vtr.c @@ -670,13 +670,11 @@ done: * @cliexend * @endparblock ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (int_l2_vtr_cli, static) = { .path = "set interface l2 tag-rewrite", .short_help = "set interface l2 tag-rewrite <interface> [disable | pop {1|2} | push {dot1q|dot1ad} <tag> <tag>]", .function = int_l2_vtr, }; -/* *INDENT-ON* */ /** * Get pbb tag rewrite on the given interface. @@ -816,13 +814,11 @@ done: return error; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (int_l2_pbb_vtr_cli, static) = { .path = "set interface l2 pbb-tag-rewrite", .short_help = "set interface l2 pbb-tag-rewrite <interface> [disable | pop | push | translate_pbb_stag <outer_tag> dmac <address> smac <address> s_id <nn> [b_vlanid <nn>]]", .function = int_l2_pbb_vtr, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/l2/l2_xcrw.c b/src/vnet/l2/l2_xcrw.c index d848fac6b72..9edd8b6ba57 100644 --- a/src/vnet/l2/l2_xcrw.c +++ b/src/vnet/l2/l2_xcrw.c @@ -238,7 +238,6 @@ VLIB_NODE_FN (l2_xcrw_node) (vlib_main_t * vm, return frame->n_vectors; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (l2_xcrw_node) = { .name = "l2-xcrw", .vector_size = sizeof (u32), @@ -255,7 +254,6 @@ VLIB_REGISTER_NODE (l2_xcrw_node) = { [L2_XCRW_NEXT_DROP] = "error-drop", }, }; -/* *INDENT-ON* */ #ifndef CLIB_MARCH_VARIANT clib_error_t * @@ -279,12 +277,10 @@ format_xcrw_name (u8 * s, va_list * args) return format (s, "xcrw%d", dev_instance); } -/* *INDENT-OFF* */ VNET_DEVICE_CLASS (xcrw_device_class,static) = { .name = "Xcrw", .format_device_name = format_xcrw_name, }; -/* *INDENT-ON* */ /* Create a sham tunnel interface and return its sw_if_index */ static u32 @@ -496,7 +492,6 @@ done: * @todo This is incomplete. This needs a detailed description and a * practical example. ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (set_l2_xcrw_command, static) = { .path = "set interface l2 xcrw", .short_help = @@ -504,7 +499,6 @@ VLIB_CLI_COMMAND (set_l2_xcrw_command, static) = { " [del] [tx-fib-id <id>] [ipv6] rw <hex-bytes>", .function = set_l2_xcrw_command_fn, }; -/* *INDENT-ON* */ #endif /* CLIB_MARCH_VARIANT */ @@ -568,12 +562,10 @@ show_l2xcrw_command_fn (vlib_main_t * vm, vlib_cli_output (vm, "%U", format_l2xcrw, 0, 0); - /* *INDENT-OFF* */ pool_foreach (t, xcm->tunnels) { vlib_cli_output (vm, "%U", format_l2xcrw, vnm, t); } - /* *INDENT-ON* */ return 0; } @@ -585,13 +577,11 @@ show_l2xcrw_command_fn (vlib_main_t * vm, * @todo This is incomplete. This needs a detailed description and a * practical example. ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_l2xcrw_command, static) = { .path = "show l2xcrw", .short_help = "show l2xcrw", .function = show_l2xcrw_command_fn, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/lawful-intercept/lawful_intercept.c b/src/vnet/lawful-intercept/lawful_intercept.c deleted file mode 100644 index fff44fc3a67..00000000000 --- a/src/vnet/lawful-intercept/lawful_intercept.c +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include <vnet/lawful-intercept/lawful_intercept.h> - -li_main_t li_main; - -static clib_error_t * -set_li_command_fn (vlib_main_t * vm, - unformat_input_t * input, vlib_cli_command_t * cmd) -{ - li_main_t *lm = &li_main; - ip4_address_t collector; - u8 collector_set = 0; - ip4_address_t src; - u8 src_set = 0; - u32 tmp; - u16 udp_port = 0; - u8 is_add = 1; - int i; - - while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (input, "collector %U", unformat_ip4_address, &collector)) - collector_set = 1; - if (unformat (input, "src %U", unformat_ip4_address, &src)) - src_set = 1; - else if (unformat (input, "udp-port %d", &tmp)) - udp_port = tmp; - else if (unformat (input, "del")) - is_add = 0; - else - break; - } - - if (collector_set == 0) - return clib_error_return (0, "collector must be set..."); - if (src_set == 0) - return clib_error_return (0, "src must be set..."); - if (udp_port == 0) - return clib_error_return (0, "udp-port must be set..."); - - if (is_add == 1) - { - for (i = 0; i < vec_len (lm->collectors); i++) - { - if (lm->collectors[i].as_u32 == collector.as_u32) - { - if (lm->ports[i] == udp_port) - return clib_error_return ( - 0, "collector %U:%d already configured", format_ip4_address, - &collector, udp_port); - else - return clib_error_return ( - 0, "collector %U already configured with port %d", - format_ip4_address, &collector, (int) (lm->ports[i])); - } - } - vec_add1 (lm->collectors, collector); - vec_add1 (lm->ports, udp_port); - vec_add1 (lm->src_addrs, src); - return 0; - } - else - { - for (i = 0; i < vec_len (lm->collectors); i++) - { - if ((lm->collectors[i].as_u32 == collector.as_u32) - && lm->ports[i] == udp_port) - { - vec_delete (lm->collectors, 1, i); - vec_delete (lm->ports, 1, i); - vec_delete (lm->src_addrs, 1, i); - return 0; - } - } - return clib_error_return (0, "collector %U:%d not configured", - &collector, udp_port); - } - return 0; -} - -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (set_li_command, static) = { - .path = "set li", - .short_help = - "set li src <ip4-address> collector <ip4-address> udp-port <nnnn>", - .function = set_li_command_fn, -}; -/* *INDENT-ON* */ - -static clib_error_t * -li_init (vlib_main_t * vm) -{ - li_main_t *lm = &li_main; - - lm->vlib_main = vm; - lm->vnet_main = vnet_get_main (); - lm->hit_node_index = li_hit_node.index; - return 0; -} - -VLIB_INIT_FUNCTION (li_init); - - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/lawful-intercept/lawful_intercept.h b/src/vnet/lawful-intercept/lawful_intercept.h deleted file mode 100644 index e39fa0d0752..00000000000 --- a/src/vnet/lawful-intercept/lawful_intercept.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __lawful_intercept_h__ -#define __lawful_intercept_h__ - -#include <vnet/vnet.h> -#include <vnet/ip/ip.h> - -typedef struct -{ - /* LI collector info */ - ip4_address_t *src_addrs; - ip4_address_t *collectors; - u16 *ports; - - /* Hit node index */ - u32 hit_node_index; - - /* convenience */ - vlib_main_t *vlib_main; - vnet_main_t *vnet_main; -} li_main_t; - -extern li_main_t li_main; - -/* *INDENT-OFF* */ -typedef CLIB_PACKED(struct { - ip4_header_t ip4; - udp_header_t udp; -}) ip4_udp_header_t; -/* *INDENT-ON* */ - -extern vlib_node_registration_t li_hit_node; - -#endif /* __lawful_intercept_h__ */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/lawful-intercept/node.c b/src/vnet/lawful-intercept/node.c deleted file mode 100644 index c5328e672d0..00000000000 --- a/src/vnet/lawful-intercept/node.c +++ /dev/null @@ -1,288 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include <vlib/vlib.h> -#include <vnet/vnet.h> -#include <vppinfra/error.h> - -#include <vnet/lawful-intercept/lawful_intercept.h> - -#include <vppinfra/error.h> -#include <vppinfra/elog.h> - -extern vlib_node_registration_t li_hit_node; - -typedef struct -{ - u32 next_index; -} li_hit_trace_t; - -/* packet trace format function */ -static u8 * -format_li_hit_trace (u8 * s, va_list * args) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); - li_hit_trace_t *t = va_arg (*args, li_hit_trace_t *); - - s = format (s, "LI_HIT: next index %d", t->next_index); - - return s; -} - -#define foreach_li_hit_error \ -_(HITS, "LI packets processed") \ -_(NO_COLLECTOR, "No collector configured") \ -_(BUFFER_ALLOCATION_FAILURE, "Buffer allocation failure") - -typedef enum -{ -#define _(sym,str) LI_HIT_ERROR_##sym, - foreach_li_hit_error -#undef _ - LI_HIT_N_ERROR, -} li_hit_error_t; - -static char *li_hit_error_strings[] = { -#define _(sym,string) string, - foreach_li_hit_error -#undef _ -}; - -typedef enum -{ - LI_HIT_NEXT_ETHERNET, - LI_HIT_N_NEXT, -} li_hit_next_t; - -VLIB_NODE_FN (li_hit_node) (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame) -{ - u32 n_left_from, *from, *to_next; - li_hit_next_t next_index; - vlib_frame_t *int_frame = 0; - u32 *to_int_next = 0; - li_main_t *lm = &li_main; - - from = vlib_frame_vector_args (frame); - n_left_from = frame->n_vectors; - next_index = node->cached_next_index; - - if (PREDICT_FALSE (vec_len (lm->collectors) == 0)) - { - vlib_node_increment_counter (vm, li_hit_node.index, - LI_HIT_ERROR_NO_COLLECTOR, n_left_from); - } - else - { - /* The intercept frame... */ - int_frame = vlib_get_frame_to_node (vm, ip4_lookup_node.index); - to_int_next = vlib_frame_vector_args (int_frame); - } - - while (n_left_from > 0) - { - u32 n_left_to_next; - - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - -#if 0 - while (n_left_from >= 4 && n_left_to_next >= 2) - { - u32 next0 = LI_HIT_NEXT_INTERFACE_OUTPUT; - u32 next1 = LI_HIT_NEXT_INTERFACE_OUTPUT; - u32 sw_if_index0, sw_if_index1; - u8 tmp0[6], tmp1[6]; - ethernet_header_t *en0, *en1; - u32 bi0, bi1; - vlib_buffer_t *b0, *b1; - - /* Prefetch next iteration. */ - { - vlib_buffer_t *p2, *p3; - - p2 = vlib_get_buffer (vm, from[2]); - p3 = vlib_get_buffer (vm, from[3]); - - vlib_prefetch_buffer_header (p2, LOAD); - vlib_prefetch_buffer_header (p3, LOAD); - - clib_prefetch_store (p2->data); - clib_prefetch_store (p3->data); - } - - /* speculatively enqueue b0 and b1 to the current next frame */ - to_next[0] = bi0 = from[0]; - to_next[1] = bi1 = from[1]; - from += 2; - to_next += 2; - n_left_from -= 2; - n_left_to_next -= 2; - - b0 = vlib_get_buffer (vm, bi0); - b1 = vlib_get_buffer (vm, bi1); - - /* $$$$$ Dual loop: process 2 x packets here $$$$$ */ - ASSERT (b0->current_data == 0); - ASSERT (b1->current_data == 0); - - en0 = vlib_buffer_get_current (b0); - en1 = vlib_buffer_get_current (b1); - - sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; - sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX]; - - /* Send pkt back out the RX interface */ - vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0; - vnet_buffer (b1)->sw_if_index[VLIB_TX] = sw_if_index1; - - /* $$$$$ End of processing 2 x packets $$$$$ */ - - if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) - { - if (b0->flags & VLIB_BUFFER_IS_TRACED) - { - li_hit_trace_t *t = - vlib_add_trace (vm, node, b0, sizeof (*t)); - t->sw_if_index = sw_if_index0; - t->next_index = next0; - } - if (b1->flags & VLIB_BUFFER_IS_TRACED) - { - li_hit_trace_t *t = - vlib_add_trace (vm, node, b1, sizeof (*t)); - t->sw_if_index = sw_if_index1; - t->next_index = next1; - } - } - - /* verify speculative enqueues, maybe switch current next frame */ - vlib_validate_buffer_enqueue_x2 (vm, node, next_index, - to_next, n_left_to_next, - bi0, bi1, next0, next1); - } -#endif /* $$$ dual-loop off */ - - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 bi0; - vlib_buffer_t *b0; - vlib_buffer_t *c0; - ip4_udp_header_t *iu0; - ip4_header_t *ip0; - udp_header_t *udp0; - u32 next0 = LI_HIT_NEXT_ETHERNET; - - /* speculatively enqueue b0 to the current next frame */ - bi0 = from[0]; - to_next[0] = bi0; - from += 1; - to_next += 1; - n_left_from -= 1; - n_left_to_next -= 1; - - b0 = vlib_get_buffer (vm, bi0); - if (PREDICT_TRUE (to_int_next != 0)) - { - /* Make an intercept copy. This can fail. */ - c0 = vlib_buffer_copy (vm, b0); - - if (PREDICT_FALSE (c0 == 0)) - { - vlib_node_increment_counter - (vm, node->node_index, - LI_HIT_ERROR_BUFFER_ALLOCATION_FAILURE, 1); - goto skip; - } - - vlib_buffer_advance (c0, -sizeof (*iu0)); - - iu0 = vlib_buffer_get_current (c0); - ip0 = &iu0->ip4; - - ip0->ip_version_and_header_length = 0x45; - ip0->ttl = 254; - ip0->protocol = IP_PROTOCOL_UDP; - - ip0->src_address.as_u32 = lm->src_addrs[0].as_u32; - ip0->dst_address.as_u32 = lm->collectors[0].as_u32; - ip0->length = vlib_buffer_length_in_chain (vm, c0); - ip0->checksum = ip4_header_checksum (ip0); - - udp0 = &iu0->udp; - udp0->src_port = udp0->dst_port = - clib_host_to_net_u16 (lm->ports[0]); - udp0->checksum = 0; - udp0->length = - clib_net_to_host_u16 (vlib_buffer_length_in_chain (vm, b0)); - - to_int_next[0] = vlib_get_buffer_index (vm, c0); - to_int_next++; - } - - skip: - if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) - && (b0->flags & VLIB_BUFFER_IS_TRACED))) - { - li_hit_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); - t->next_index = next0; - } - - /* verify speculative enqueue, maybe switch current next frame */ - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, - bi0, next0); - } - - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - - if (int_frame) - { - int_frame->n_vectors = frame->n_vectors; - vlib_put_frame_to_node (vm, ip4_lookup_node.index, int_frame); - } - - vlib_node_increment_counter (vm, li_hit_node.index, - LI_HIT_ERROR_HITS, frame->n_vectors); - return frame->n_vectors; -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (li_hit_node) = { - .name = "li-hit", - .vector_size = sizeof (u32), - .format_trace = format_li_hit_trace, - .type = VLIB_NODE_TYPE_INTERNAL, - - .n_errors = ARRAY_LEN(li_hit_error_strings), - .error_strings = li_hit_error_strings, - - .n_next_nodes = LI_HIT_N_NEXT, - - /* edit / add dispositions here */ - .next_nodes = { - [LI_HIT_NEXT_ETHERNET] = "ethernet-input-not-l2", - }, -}; -/* *INDENT-ON* */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/llc/llc.c b/src/vnet/llc/llc.c index 4a7fdf9d9ba..4cbf17d48df 100644 --- a/src/vnet/llc/llc.c +++ b/src/vnet/llc/llc.c @@ -181,14 +181,12 @@ llc_build_rewrite (vnet_main_t * vnm, return (rewrite); } -/* *INDENT-OFF* */ VNET_HW_INTERFACE_CLASS (llc_hw_interface_class) = { .name = "LLC", .format_header = format_llc_header_with_length, .unformat_header = unformat_llc_header, .build_rewrite = llc_build_rewrite, }; -/* *INDENT-ON* */ static void add_protocol (llc_main_t * pm, llc_protocol_t protocol, char *protocol_name) diff --git a/src/vnet/llc/node.c b/src/vnet/llc/node.c index 086925bd305..d1ee6948269 100644 --- a/src/vnet/llc/node.c +++ b/src/vnet/llc/node.c @@ -246,7 +246,6 @@ static char *llc_error_strings[] = { #undef _ }; -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (llc_input_node) = { .function = llc_input, .name = "llc-input", @@ -267,7 +266,6 @@ VLIB_REGISTER_NODE (llc_input_node) = { .format_trace = format_llc_input_trace, .unformat_buffer = unformat_llc_header, }; -/* *INDENT-ON* */ static void llc_setup_node (vlib_main_t *vm, u32 node_index) diff --git a/src/vnet/mfib/mfib_forward.c b/src/vnet/mfib/mfib_forward.c index affedb0ef00..3befce041bb 100644 --- a/src/vnet/mfib/mfib_forward.c +++ b/src/vnet/mfib/mfib_forward.c @@ -74,7 +74,7 @@ mfib_forward_lookup_trace (vlib_main_t * vm, t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0])); t0->entry_index = vnet_buffer (b0)->ip.adj_index[VLIB_TX]; t0->fib_index = vec_elt (im->mfib_index_by_sw_if_index, - vnet_buffer(b1)->sw_if_index[VLIB_RX]); + vnet_buffer(b0)->sw_if_index[VLIB_RX]); } if (b1->flags & VLIB_BUFFER_IS_TRACED) { diff --git a/src/vnet/mfib/mfib_itf.c b/src/vnet/mfib/mfib_itf.c index b323d3e4a96..e65a6d733cf 100644 --- a/src/vnet/mfib/mfib_itf.c +++ b/src/vnet/mfib/mfib_itf.c @@ -206,10 +206,8 @@ format_mfib_itf (u8 * s, va_list * args) if (~0 != mfib_itf->mfi_sw_if_index) { return (format(s, " %U: %U", - format_vnet_sw_interface_name, - vnm, - vnet_get_sw_interface(vnm, - mfib_itf->mfi_sw_if_index), + format_vnet_sw_if_index_name, + vnm, mfib_itf->mfi_sw_if_index, format_mfib_itf_flags, mfib_itf->mfi_flags)); } else diff --git a/src/vnet/mfib/mfib_types.c b/src/vnet/mfib/mfib_types.c index 19583ea18f4..755f656a7b2 100644 --- a/src/vnet/mfib/mfib_types.c +++ b/src/vnet/mfib/mfib_types.c @@ -253,7 +253,6 @@ mfib_show_route_flags (vlib_main_t * vm, /*? * This command displays the set of supported flags applicable to an MFIB route */ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (mfib_route_flags_command, static) = { .path = "show mfib route flags", @@ -261,7 +260,6 @@ VLIB_CLI_COMMAND (mfib_route_flags_command, static) = .function = mfib_show_route_flags, .is_mp_safe = 1, }; -/* *INDENT-ON* */ clib_error_t * mfib_show_itf_flags (vlib_main_t * vm, @@ -282,7 +280,6 @@ mfib_show_itf_flags (vlib_main_t * vm, /*? * This command displays the set of supported flags applicable to an MFIB interface */ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (mfib_itf_flags_command, static) = { .path = "show mfib itf flags", @@ -290,4 +287,3 @@ VLIB_CLI_COMMAND (mfib_itf_flags_command, static) = .function = mfib_show_itf_flags, .is_mp_safe = 1, }; -/* *INDENT-ON* */ diff --git a/src/vnet/misc.c b/src/vnet/misc.c index 18d4651cff3..ea816615a50 100644 --- a/src/vnet/misc.c +++ b/src/vnet/misc.c @@ -56,18 +56,14 @@ vnet_local_interface_tx (vlib_main_t * vm, return f->n_vectors; } -/* *INDENT-OFF* */ VNET_DEVICE_CLASS (vnet_local_interface_device_class) = { .name = "local", .tx_function = vnet_local_interface_tx, }; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ VNET_HW_INTERFACE_CLASS (vnet_local_interface_hw_class,static) = { .name = "local", }; -/* *INDENT-ON* */ clib_error_t * vnet_main_init (vlib_main_t * vm) @@ -86,10 +82,12 @@ vnet_main_init (vlib_main_t * vm) vnm->local_interface_hw_if_index = hw_if_index; vnm->local_interface_sw_if_index = hw->sw_if_index; + vnm->pcap.current_filter_function = + vlib_is_packet_traced_default_function (); + return 0; } -/* *INDENT-OFF* */ VLIB_INIT_FUNCTION (vnet_main_init)= { .init_order = VLIB_INITS("vnet_interface_init", @@ -102,7 +100,6 @@ VLIB_INIT_FUNCTION (vnet_main_init)= "mpls_init", "vnet_main_init"), }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/mpls/interface.c b/src/vnet/mpls/interface.c index 5e80b9d0532..fd654dca891 100644 --- a/src/vnet/mpls/interface.c +++ b/src/vnet/mpls/interface.c @@ -22,6 +22,14 @@ #include <vnet/adj/adj_midchain.h> #include <vnet/dpo/classify_dpo.h> +typedef struct +{ + mpls_interface_state_change_function_t *function; + uword function_opaque; +} mpls_interface_state_change_callback_t; + +/** Functions to call when interface becomes MPLS enabled/disabled. */ +static mpls_interface_state_change_callback_t *state_change_callbacks; u8 mpls_sw_interface_is_enabled (u32 sw_if_index) @@ -34,6 +42,17 @@ mpls_sw_interface_is_enabled (u32 sw_if_index) return (mm->mpls_enabled_by_sw_if_index[sw_if_index]); } +void +mpls_interface_state_change_add_callback ( + mpls_interface_state_change_function_t *function, uword opaque) +{ + mpls_interface_state_change_callback_t cb = { + .function = function, + .function_opaque = opaque, + }; + vec_add1 (state_change_callbacks, cb); +} + int mpls_sw_interface_enable_disable (mpls_main_t *mm, u32 sw_if_index, u8 is_enable) @@ -81,6 +100,12 @@ mpls_sw_interface_enable_disable (mpls_main_t *mm, u32 sw_if_index, else if (hi->l3_if_count) hi->l3_if_count--; + { + mpls_interface_state_change_callback_t *cb; + vec_foreach (cb, state_change_callbacks) + cb->function (mm, cb->function_opaque, sw_if_index, is_enable); + } + return (0); } diff --git a/src/vnet/mpls/mpls.api b/src/vnet/mpls/mpls.api index 4c164bb2bf2..5d775dafdfc 100644 --- a/src/vnet/mpls/mpls.api +++ b/src/vnet/mpls/mpls.api @@ -92,6 +92,26 @@ define mpls_tunnel_details vl_api_mpls_tunnel_t mt_tunnel; }; +/** \brief Dump mpls enabled interface(s) + @param client_index - opaque cookie to identify the sender + @param sw_if_index - sw_if_index of a specific interface, or -1 (default) + to return all MPLS enabled interfaces +*/ +define mpls_interface_dump +{ + u32 client_index; + u32 context; + vl_api_interface_index_t sw_if_index [default=0xffffffff]; +}; + +/** \brief mpls enabled interface details +*/ +define mpls_interface_details +{ + u32 context; + vl_api_interface_index_t sw_if_index; +}; + /** \brief MPLS Route Add / del route @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request diff --git a/src/vnet/mpls/mpls.c b/src/vnet/mpls/mpls.c index 0d01010feea..7d922b003cc 100644 --- a/src/vnet/mpls/mpls.c +++ b/src/vnet/mpls/mpls.c @@ -370,7 +370,13 @@ done: VLIB_CLI_COMMAND (mpls_local_label_command, static) = { .path = "mpls local-label", .function = vnet_mpls_local_label, - .short_help = "mpls local-label [add|del] <label-value> [eos|non-eos] via [next-hop-address] [next-hop-interface] [next-hop-table <value>] [weight <value>] [preference <value>] [udp-encap-id <value>] [ip4-lookup-in-table <value>] [ip6-lookup-in-table <value>] [mpls-lookup-in-table <value>] [resolve-via-host] [resolve-via-attached] [rx-ip4 <interface>] [out-labels <value value value>]", + .short_help = + "mpls local-label [add|del] <label-value> [eos|non-eos] via " + "[next-hop-address] [next-hop-interface] [next-hop-table <value>] [weight " + "<value>] [preference <value>] [udp-encap-id <value>] " + "[ip4-lookup-in-table <value>] [ip6-lookup-in-table <value>] " + "[mpls-lookup-in-table <value>] [resolve-via-host] [resolve-via-attached] " + "[rx-ip4|rx-ip6 <interface>] [out-labels <value value value>]", }; clib_error_t * @@ -430,13 +436,11 @@ vnet_mpls_table_cmd (vlib_main_t * vm, return error; } -/* *INDENT-ON* */ /*? * This command is used to add or delete MPLS Tables. All * Tables must be explicitly added before that can be used, * Including the default table. ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (mpls_table_command, static) = { .path = "mpls table", .short_help = "mpls table [add|del] <table-id>", diff --git a/src/vnet/mpls/mpls.h b/src/vnet/mpls/mpls.h index 885901f89a4..6baaaad95ba 100644 --- a/src/vnet/mpls/mpls.h +++ b/src/vnet/mpls/mpls.h @@ -23,14 +23,18 @@ #include <vnet/fib/fib_node.h> #include <vnet/adj/adj.h> +struct mpls_main_t; + /** * @brief Definition of a callback for receiving MPLS interface state change * notifications */ -typedef void (*mpls_interface_state_change_callback_t) (u32 sw_if_index, - u32 is_enable); +typedef void (mpls_interface_state_change_function_t) (struct mpls_main_t *mm, + uword opaque, + u32 sw_if_index, + u32 is_enable); -typedef struct +typedef struct mpls_main_t { /* MPLS FIB index for each software interface */ u32 *fib_index_by_sw_if_index; @@ -77,11 +81,14 @@ unformat_function_t unformat_mpls_unicast_label; unformat_function_t unformat_mpls_header; unformat_function_t unformat_pg_mpls_header; +u8 mpls_sw_interface_is_enabled (u32 sw_if_index); + +void mpls_interface_state_change_add_callback ( + mpls_interface_state_change_function_t *function, uword opaque); + int mpls_sw_interface_enable_disable (mpls_main_t *mm, u32 sw_if_index, u8 is_enable); -u8 mpls_sw_interface_is_enabled (u32 sw_if_index); - int mpls_dest_cmp (void *a1, void *a2); int mpls_fib_index_cmp (void *a1, void *a2); diff --git a/src/vnet/mpls/mpls_api.c b/src/vnet/mpls/mpls_api.c index fac52827e1d..58998a6576c 100644 --- a/src/vnet/mpls/mpls_api.c +++ b/src/vnet/mpls/mpls_api.c @@ -199,12 +199,10 @@ vl_api_mpls_route_add_del_t_handler (vl_api_mpls_route_add_del_t * mp) rv = mpls_route_add_del_t_handler (vnm, mp, &stats_index); - /* *INDENT-OFF* */ REPLY_MACRO2 (VL_API_MPLS_ROUTE_ADD_DEL_REPLY, ({ rmp->stats_index = htonl (stats_index); })); - /* *INDENT-ON* */ } void @@ -270,13 +268,11 @@ vl_api_mpls_tunnel_add_del_t_handler (vl_api_mpls_tunnel_add_del_t * mp) vec_free (rpaths); out: - /* *INDENT-OFF* */ REPLY_MACRO2(VL_API_MPLS_TUNNEL_ADD_DEL_REPLY, ({ rmp->sw_if_index = ntohl(tunnel_sw_if_index); rmp->tunnel_index = ntohl(tunnel_index); })); - /* *INDENT-ON* */ } static void @@ -401,12 +397,58 @@ vl_api_mpls_table_dump_t_handler (vl_api_mpls_table_dump_t * mp) if (!reg) return; - /* *INDENT-OFF* */ pool_foreach (fib_table, mm->fibs) { send_mpls_table_details(am, reg, mp->context, fib_table); } - /* *INDENT-ON* */ +} + +static void +send_mpls_interface_details (vpe_api_main_t *am, vl_api_registration_t *reg, + u32 context, const u32 sw_if_index) +{ + vl_api_mpls_interface_details_t *mp; + + mp = vl_msg_api_alloc_zero (sizeof (*mp)); + mp->_vl_msg_id = ntohs (REPLY_MSG_ID_BASE + VL_API_MPLS_INTERFACE_DETAILS); + mp->context = context; + + mp->sw_if_index = htonl (sw_if_index); + vl_api_send_msg (reg, (u8 *) mp); +} + +static void +vl_api_mpls_interface_dump_t_handler (vl_api_mpls_interface_dump_t *mp) +{ + vpe_api_main_t *am = &vpe_api_main; + vl_api_registration_t *reg; + vnet_interface_main_t *im = &vnet_main.interface_main; + vnet_sw_interface_t *si; + u32 sw_if_index = ~0; + + reg = vl_api_client_index_to_registration (mp->client_index); + if (!reg) + return; + sw_if_index = ntohl (mp->sw_if_index); + + if (sw_if_index == ~0) + { + pool_foreach (si, im->sw_interfaces) + { + if (mpls_sw_interface_is_enabled (si->sw_if_index)) + { + send_mpls_interface_details (am, reg, mp->context, + si->sw_if_index); + } + } + } + else + { + if (mpls_sw_interface_is_enabled (sw_if_index)) + { + send_mpls_interface_details (am, reg, mp->context, sw_if_index); + } + } } static void diff --git a/src/vnet/mpls/mpls_input.c b/src/vnet/mpls/mpls_input.c index c18cbda6315..0505d9a1829 100644 --- a/src/vnet/mpls/mpls_input.c +++ b/src/vnet/mpls/mpls_input.c @@ -278,10 +278,8 @@ static clib_error_t * mpls_input_init (vlib_main_t * vm) return 0; } -/* *INDENT-OFF* */ VLIB_INIT_FUNCTION (mpls_input_init) = { .runs_after = VLIB_INITS("mpls_init"), }; -/* *INDENT-ON* */ #endif /* CLIB_MARCH_VARIANT */ diff --git a/src/vnet/mpls/mpls_lookup.c b/src/vnet/mpls/mpls_lookup.c index db423392c03..a5ac56534a5 100644 --- a/src/vnet/mpls/mpls_lookup.c +++ b/src/vnet/mpls/mpls_lookup.c @@ -44,13 +44,13 @@ format_mpls_lookup_trace (u8 * s, va_list * args) CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); mpls_lookup_trace_t * t = va_arg (*args, mpls_lookup_trace_t *); - s = format (s, "MPLS: next [%d], lookup fib index %d, LB index %d hash %x " - "label %d eos %d", - t->next_index, t->lfib_index, t->lb_index, t->hash, - vnet_mpls_uc_get_label( - clib_net_to_host_u32(t->label_net_byte_order)), - vnet_mpls_uc_get_s( - clib_net_to_host_u32(t->label_net_byte_order))); + s = format ( + s, + "MPLS: next [%d], lookup fib index %d, LB index %d hash 0x%08x " + "label %d eos %d", + t->next_index, t->lfib_index, t->lb_index, t->hash, + vnet_mpls_uc_get_label (clib_net_to_host_u32 (t->label_net_byte_order)), + vnet_mpls_uc_get_s (clib_net_to_host_u32 (t->label_net_byte_order))); return s; } @@ -482,8 +482,8 @@ format_mpls_load_balance_trace (u8 * s, va_list * args) CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); mpls_load_balance_trace_t * t = va_arg (*args, mpls_load_balance_trace_t *); - s = format (s, "MPLS: next [%d], LB index %d hash %d", - t->next_index, t->lb_index, t->hash); + s = format (s, "MPLS: next [%d], LB index %d hash 0x%08x", t->next_index, + t->lb_index, t->hash); return s; } @@ -553,75 +553,77 @@ VLIB_NODE_FN (mpls_load_balance_node) (vlib_main_t * vm, * We don't want to use the same hash value at each level in the recursion * graph as that would lead to polarisation */ - hc0 = vnet_buffer (p0)->ip.flow_hash = 0; - hc1 = vnet_buffer (p1)->ip.flow_hash = 0; - - if (PREDICT_FALSE (lb0->lb_n_buckets > 1)) - { - if (PREDICT_TRUE (vnet_buffer(p0)->ip.flow_hash)) - { - hc0 = vnet_buffer(p0)->ip.flow_hash = vnet_buffer(p0)->ip.flow_hash >> 1; - } - else - { - hc0 = vnet_buffer(p0)->ip.flow_hash = mpls_compute_flow_hash(mpls0, hc0); - } - dpo0 = load_balance_get_fwd_bucket(lb0, (hc0 & lb0->lb_n_buckets_minus_1)); - } - else - { - dpo0 = load_balance_get_bucket_i (lb0, 0); - } - if (PREDICT_FALSE (lb1->lb_n_buckets > 1)) - { - if (PREDICT_TRUE (vnet_buffer(p1)->ip.flow_hash)) - { - hc1 = vnet_buffer(p1)->ip.flow_hash = vnet_buffer(p1)->ip.flow_hash >> 1; - } - else - { - hc1 = vnet_buffer(p1)->ip.flow_hash = mpls_compute_flow_hash(mpls1, hc1); - } - dpo1 = load_balance_get_fwd_bucket(lb1, (hc1 & lb1->lb_n_buckets_minus_1)); - } - else - { - dpo1 = load_balance_get_bucket_i (lb1, 0); - } - - next0 = dpo0->dpoi_next_node; - next1 = dpo1->dpoi_next_node; - - vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; - vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index; - - vlib_increment_combined_counter - (cm, thread_index, lbi0, 1, - vlib_buffer_length_in_chain (vm, p0)); - vlib_increment_combined_counter - (cm, thread_index, lbi1, 1, - vlib_buffer_length_in_chain (vm, p1)); - - if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) - { - mpls_load_balance_trace_t *tr = vlib_add_trace (vm, node, - p0, sizeof (*tr)); - tr->next_index = next0; - tr->lb_index = lbi0; - tr->hash = hc0; - } - if (PREDICT_FALSE(p1->flags & VLIB_BUFFER_IS_TRACED)) - { - mpls_load_balance_trace_t *tr = vlib_add_trace (vm, node, - p1, sizeof (*tr)); - tr->next_index = next1; - tr->lb_index = lbi1; - tr->hash = hc1; - } - - vlib_validate_buffer_enqueue_x2 (vm, node, next, - to_next, n_left_to_next, - pi0, pi1, next0, next1); + hc0 = hc1 = 0; + + if (PREDICT_FALSE (lb0->lb_n_buckets > 1)) + { + if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash)) + { + hc0 = vnet_buffer (p0)->ip.flow_hash = + vnet_buffer (p0)->ip.flow_hash >> 1; + } + else + { + hc0 = vnet_buffer (p0)->ip.flow_hash = + mpls_compute_flow_hash (mpls0, lb0->lb_hash_config); + } + dpo0 = load_balance_get_fwd_bucket ( + lb0, (hc0 & lb0->lb_n_buckets_minus_1)); + } + else + { + dpo0 = load_balance_get_bucket_i (lb0, 0); + } + if (PREDICT_FALSE (lb1->lb_n_buckets > 1)) + { + if (PREDICT_TRUE (vnet_buffer (p1)->ip.flow_hash)) + { + hc1 = vnet_buffer (p1)->ip.flow_hash = + vnet_buffer (p1)->ip.flow_hash >> 1; + } + else + { + hc1 = vnet_buffer (p1)->ip.flow_hash = + mpls_compute_flow_hash (mpls1, lb1->lb_hash_config); + } + dpo1 = load_balance_get_fwd_bucket ( + lb1, (hc1 & lb1->lb_n_buckets_minus_1)); + } + else + { + dpo1 = load_balance_get_bucket_i (lb1, 0); + } + + next0 = dpo0->dpoi_next_node; + next1 = dpo1->dpoi_next_node; + + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index; + + vlib_increment_combined_counter ( + cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0)); + vlib_increment_combined_counter ( + cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1)); + + if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_load_balance_trace_t *tr = + vlib_add_trace (vm, node, p0, sizeof (*tr)); + tr->next_index = next0; + tr->lb_index = lbi0; + tr->hash = hc0; + } + if (PREDICT_FALSE (p1->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_load_balance_trace_t *tr = + vlib_add_trace (vm, node, p1, sizeof (*tr)); + tr->next_index = next1; + tr->lb_index = lbi1; + tr->hash = hc1; + } + + vlib_validate_buffer_enqueue_x2 ( + vm, node, next, to_next, n_left_to_next, pi0, pi1, next0, next1); } while (n_left_from > 0 && n_left_to_next > 0) @@ -646,44 +648,45 @@ VLIB_NODE_FN (mpls_load_balance_node) (vlib_main_t * vm, lb0 = load_balance_get(lbi0); - hc0 = vnet_buffer (p0)->ip.flow_hash = 0; - if (PREDICT_FALSE (lb0->lb_n_buckets > 1)) - { - if (PREDICT_TRUE (vnet_buffer(p0)->ip.flow_hash)) - { - hc0 = vnet_buffer(p0)->ip.flow_hash = vnet_buffer(p0)->ip.flow_hash >> 1; - } - else - { - hc0 = vnet_buffer(p0)->ip.flow_hash = mpls_compute_flow_hash(mpls0, hc0); - } - dpo0 = load_balance_get_fwd_bucket(lb0, (hc0 & lb0->lb_n_buckets_minus_1)); - } - else - { - dpo0 = load_balance_get_bucket_i (lb0, 0); - } - - next0 = dpo0->dpoi_next_node; - vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; - - if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) - { - mpls_load_balance_trace_t *tr = vlib_add_trace (vm, node, - p0, sizeof (*tr)); - tr->next_index = next0; - tr->lb_index = lbi0; - tr->hash = hc0; - } - - vlib_increment_combined_counter - (cm, thread_index, lbi0, 1, - vlib_buffer_length_in_chain (vm, p0)); - - vlib_validate_buffer_enqueue_x1 (vm, node, next, - to_next, n_left_to_next, - pi0, next0); - } + hc0 = 0; + if (PREDICT_FALSE (lb0->lb_n_buckets > 1)) + { + if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash)) + { + hc0 = vnet_buffer (p0)->ip.flow_hash = + vnet_buffer (p0)->ip.flow_hash >> 1; + } + else + { + hc0 = vnet_buffer (p0)->ip.flow_hash = + mpls_compute_flow_hash (mpls0, lb0->lb_hash_config); + } + dpo0 = load_balance_get_fwd_bucket ( + lb0, (hc0 & lb0->lb_n_buckets_minus_1)); + } + else + { + dpo0 = load_balance_get_bucket_i (lb0, 0); + } + + next0 = dpo0->dpoi_next_node; + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + + if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_load_balance_trace_t *tr = + vlib_add_trace (vm, node, p0, sizeof (*tr)); + tr->next_index = next0; + tr->lb_index = lbi0; + tr->hash = hc0; + } + + vlib_increment_combined_counter ( + cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0)); + + vlib_validate_buffer_enqueue_x1 (vm, node, next, to_next, + n_left_to_next, pi0, next0); + } vlib_put_next_frame (vm, node, next, n_left_to_next); } diff --git a/src/vnet/mpls/mpls_tunnel.c b/src/vnet/mpls/mpls_tunnel.c index 5f7bf8c3b25..b03a4a57f68 100644 --- a/src/vnet/mpls/mpls_tunnel.c +++ b/src/vnet/mpls/mpls_tunnel.c @@ -932,7 +932,12 @@ done: VLIB_CLI_COMMAND (create_mpls_tunnel_command, static) = { .path = "mpls tunnel", .short_help = - "mpls tunnel [multicast] [l2-only] via [next-hop-address] [next-hop-interface] [next-hop-table <value>] [weight <value>] [preference <value>] [udp-encap-id <value>] [ip4-lookup-in-table <value>] [ip6-lookup-in-table <value>] [mpls-lookup-in-table <value>] [resolve-via-host] [resolve-via-connected] [rx-ip4 <interface>] [out-labels <value value value>]", + "mpls tunnel [multicast] [l2-only] via [next-hop-address] " + "[next-hop-interface] [next-hop-table <value>] [weight <value>] " + "[preference <value>] [udp-encap-id <value>] [ip4-lookup-in-table " + "<value>] [ip6-lookup-in-table <value>] [mpls-lookup-in-table <value>] " + "[resolve-via-host] [resolve-via-connected] [rx-ip4|rx-ip6 <interface>] " + "[out-labels <value value value>]", .function = vnet_create_mpls_tunnel_command_fn, }; diff --git a/src/vnet/osi/node.c b/src/vnet/osi/node.c index 4eb3e461139..9edc354cda7 100644 --- a/src/vnet/osi/node.c +++ b/src/vnet/osi/node.c @@ -239,7 +239,6 @@ static char *osi_error_strings[] = { #undef _ }; -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (osi_input_node) = { .function = osi_input, .name = "osi-input", @@ -260,7 +259,6 @@ VLIB_REGISTER_NODE (osi_input_node) = { .format_trace = format_osi_input_trace, .unformat_buffer = unformat_osi_header, }; -/* *INDENT-ON* */ static void osi_setup_node (vlib_main_t *vm, u32 node_index) diff --git a/src/vnet/pg/cli.c b/src/vnet/pg/cli.c index ac225094391..3f2de2604b2 100644 --- a/src/vnet/pg/cli.c +++ b/src/vnet/pg/cli.c @@ -47,12 +47,10 @@ /* Root of all packet generator cli commands. */ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (vlib_cli_pg_command, static) = { .path = "packet-generator", .short_help = "Packet generator commands", }; -/* *INDENT-ON* */ void pg_enable_disable (u32 stream_index, int is_enable) @@ -63,11 +61,9 @@ pg_enable_disable (u32 stream_index, int is_enable) if (stream_index == ~0) { /* No stream specified: enable/disable all streams. */ - /* *INDENT-OFF* */ pool_foreach (s, pg->streams) { pg_stream_enable_disable (pg, s, is_enable); } - /* *INDENT-ON* */ } else { @@ -138,23 +134,19 @@ doit: return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (enable_streams_cli, static) = { .path = "packet-generator enable-stream", .short_help = "Enable packet generator streams", .function = enable_disable_stream, .function_arg = 1, /* is_enable */ }; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (disable_streams_cli, static) = { .path = "packet-generator disable-stream", .short_help = "Disable packet generator streams", .function = enable_disable_stream, .function_arg = 0, /* is_enable */ }; -/* *INDENT-ON* */ static u8 * format_pg_edit_group (u8 * s, va_list * va) @@ -210,12 +202,10 @@ format_pg_stream (u8 * s, va_list * va) if (verbose) { pg_edit_group_t *g; - /* *INDENT-OFF* */ vec_foreach (g, t->edit_groups) { s = format (s, "\n%U%U", format_white_space, indent, format_pg_edit_group, g); } - /* *INDENT-ON* */ } return s; @@ -244,23 +234,19 @@ show_streams (vlib_main_t * vm, } vlib_cli_output (vm, "%U", format_pg_stream, 0, 0); - /* *INDENT-OFF* */ pool_foreach (s, pg->streams) { vlib_cli_output (vm, "%U", format_pg_stream, s, verbose); } - /* *INDENT-ON* */ done: return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_streams_cli, static) = { .path = "show packet-generator ", .short_help = "show packet-generator [verbose]", .function = show_streams, }; -/* *INDENT-ON* */ static clib_error_t * pg_pcap_read (pg_stream_t * s, char *file_name) @@ -505,7 +491,6 @@ done: return error; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (new_stream_cli, static) = { .path = "packet-generator new", .function = new_stream, @@ -523,7 +508,6 @@ VLIB_CLI_COMMAND (new_stream_cli, static) = { "rate PPS rate to transfer packet data\n" "maxframe NPKTS maximum number of packets per frame\n", }; -/* *INDENT-ON* */ static clib_error_t * del_stream (vlib_main_t * vm, @@ -541,13 +525,11 @@ del_stream (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (del_stream_cli, static) = { .path = "packet-generator delete", .function = del_stream, .short_help = "Delete stream with given name", }; -/* *INDENT-ON* */ static clib_error_t * change_stream_parameters (vlib_main_t * vm, @@ -588,13 +570,11 @@ change_stream_parameters (vlib_main_t * vm, return error; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (change_stream_parameters_cli, static) = { .path = "packet-generator configure", .short_help = "Change packet generator stream parameters", .function = change_stream_parameters, }; -/* *INDENT-ON* */ static clib_error_t * pg_capture_cmd_fn (vlib_main_t * vm, @@ -671,13 +651,11 @@ done: return error; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (pg_capture_cmd, static) = { .path = "packet-generator capture", .short_help = "packet-generator capture <interface name> pcap <filename> [count <n>]", .function = pg_capture_cmd_fn, }; -/* *INDENT-ON* */ static clib_error_t * create_pg_if_cmd_fn (vlib_main_t * vm, @@ -685,7 +663,7 @@ create_pg_if_cmd_fn (vlib_main_t * vm, { pg_main_t *pg = &pg_main; unformat_input_t _line_input, *line_input = &_line_input; - u32 if_id, gso_enabled = 0, gso_size = 0, coalesce_enabled = 0; + u32 if_id = ~0, gso_enabled = 0, gso_size = 0, coalesce_enabled = 0; clib_error_t *error = NULL; pg_interface_mode_t mode = PG_MODE_ETHERNET; @@ -730,7 +708,6 @@ done: return error; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (create_pg_if_cmd, static) = { .path = "create packet-generator", .short_help = "create packet-generator interface <interface name>" @@ -738,7 +715,6 @@ VLIB_CLI_COMMAND (create_pg_if_cmd, static) = { " [mode <ethernet | ip4 | ip6>]", .function = create_pg_if_cmd_fn, }; -/* *INDENT-ON* */ /* Dummy init function so that we can be linked in. */ static clib_error_t * diff --git a/src/vnet/pg/input.c b/src/vnet/pg/input.c index 6f38ed0869a..321472c4d85 100644 --- a/src/vnet/pg/input.c +++ b/src/vnet/pg/input.c @@ -1578,7 +1578,7 @@ fill_buffer_offload_flags (vlib_main_t *vm, u32 *buffers, u32 n_buffers, (VNET_BUFFER_F_IS_IP4 | VNET_BUFFER_F_L2_HDR_OFFSET_VALID | VNET_BUFFER_F_L3_HDR_OFFSET_VALID | VNET_BUFFER_F_L4_HDR_OFFSET_VALID); - if (buffer_oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM) + if (buffer_oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM || gso_enabled) oflags |= VNET_BUFFER_OFFLOAD_F_IP_CKSUM; } else if (PREDICT_TRUE (ethertype == ETHERNET_TYPE_IP6)) @@ -1596,7 +1596,7 @@ fill_buffer_offload_flags (vlib_main_t *vm, u32 *buffers, u32 n_buffers, if (l4_proto == IP_PROTOCOL_TCP) { - if (buffer_oflags & VNET_BUFFER_OFFLOAD_F_TCP_CKSUM) + if (buffer_oflags & VNET_BUFFER_OFFLOAD_F_TCP_CKSUM || gso_enabled) oflags |= VNET_BUFFER_OFFLOAD_F_TCP_CKSUM; /* only set GSO flag for chained buffers */ @@ -1639,8 +1639,8 @@ pg_generate_packets (vlib_node_runtime_t * node, pg_interface_t *pi; int i; - pi = pool_elt_at_index (pg->interfaces, - pg->if_id_by_sw_if_index[s->sw_if_index[VLIB_RX]]); + pi = pool_elt_at_index ( + pg->interfaces, pg->if_index_by_sw_if_index[s->sw_if_index[VLIB_RX]]); bi0 = s->buffer_indices; n_packets_in_fifo = pg_stream_fill (pg, s, n_packets_to_generate); @@ -1816,17 +1816,14 @@ pg_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) if (vlib_num_workers ()) worker_index = vlib_get_current_worker_index (); - /* *INDENT-OFF* */ clib_bitmap_foreach (i, pg->enabled_streams[worker_index]) { pg_stream_t *s = vec_elt_at_index (pg->streams, i); n_packets += pg_input_stream (node, pg, s); } - /* *INDENT-ON* */ return n_packets; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (pg_input_node) = { .function = pg_input, .flags = VLIB_NODE_FLAG_TRACE_SUPPORTED, @@ -1839,7 +1836,6 @@ VLIB_REGISTER_NODE (pg_input_node) = { /* Input node will be left disabled until a stream is active. */ .state = VLIB_NODE_STATE_DISABLED, }; -/* *INDENT-ON* */ VLIB_NODE_FN (pg_input_mac_filter) (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -1864,9 +1860,9 @@ VLIB_NODE_FN (pg_input_mac_filter) (vlib_main_t * vm, pg_interface_t *pi; mac_address_t in; - pi = pool_elt_at_index - (pg->interfaces, - pg->if_id_by_sw_if_index[vnet_buffer (b[0])->sw_if_index[VLIB_RX]]); + pi = pool_elt_at_index ( + pg->interfaces, + pg->if_index_by_sw_if_index[vnet_buffer (b[0])->sw_if_index[VLIB_RX]]); eth = vlib_buffer_get_current (b[0]); mac_address_from_bytes (&in, eth->dst_address); @@ -1898,7 +1894,6 @@ VLIB_NODE_FN (pg_input_mac_filter) (vlib_main_t * vm, return (frame->n_vectors); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (pg_input_mac_filter) = { .name = "pg-input-mac-filter", .vector_size = sizeof (u32), @@ -1912,7 +1907,6 @@ VNET_FEATURE_INIT (pg_input_mac_filter_feat, static) = { .arc_name = "device-input", .node_name = "pg-input-mac-filter", }; -/* *INDENT-ON* */ static clib_error_t * pg_input_mac_filter_cfg (vlib_main_t * vm, @@ -1950,13 +1944,11 @@ pg_input_mac_filter_cfg (vlib_main_t * vm, return NULL; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (enable_streams_cli, static) = { .path = "packet-generator mac-filter", .short_help = "packet-generator mac-filter <INTERFACE> <on|off>", .function = pg_input_mac_filter_cfg, }; -/* *INDENT-ON* */ /* diff --git a/src/vnet/pg/pg.api b/src/vnet/pg/pg.api index 3630e0c2f0d..4f531fb1f5e 100644 --- a/src/vnet/pg/pg.api +++ b/src/vnet/pg/pg.api @@ -38,6 +38,8 @@ enum pg_interface_mode : u8 */ define pg_create_interface { + option deprecated; + u32 client_index; u32 context; vl_api_interface_index_t interface_id; @@ -60,6 +62,8 @@ define pg_create_interface_v2 */ define pg_create_interface_reply { + option deprecated; + u32 context; i32 retval; vl_api_interface_index_t sw_if_index; diff --git a/src/vnet/pg/pg.h b/src/vnet/pg/pg.h index e69ee6458e7..6d5b25ba25a 100644 --- a/src/vnet/pg/pg.h +++ b/src/vnet/pg/pg.h @@ -349,7 +349,7 @@ typedef struct pg_main_t /* Pool of interfaces. */ pg_interface_t *interfaces; uword *if_index_by_if_id; - uword *if_id_by_sw_if_index; + uword *if_index_by_sw_if_index; /* Vector of buffer indices for use in pg_stream_fill_replay, per thread */ u32 **replay_buffers_by_thread; @@ -383,7 +383,7 @@ void pg_interface_enable_disable_coalesce (pg_interface_t * pi, u8 enable, u32 tx_node_index); /* Find/create free packet-generator interface index. */ -u32 pg_interface_add_or_get (pg_main_t *pg, uword stream_index, u8 gso_enabled, +u32 pg_interface_add_or_get (pg_main_t *pg, u32 stream_index, u8 gso_enabled, u32 gso_size, u8 coalesce_enabled, pg_interface_mode_t mode); diff --git a/src/vnet/pg/pg_api.c b/src/vnet/pg/pg_api.c index 468c88ee8bb..e5d0a08a527 100644 --- a/src/vnet/pg/pg_api.c +++ b/src/vnet/pg/pg_api.c @@ -40,12 +40,10 @@ vl_api_pg_create_interface_t_handler (vl_api_pg_create_interface_t * mp) ntohl (mp->gso_size), 0, PG_MODE_ETHERNET); pg_interface_t *pi = pool_elt_at_index (pg->interfaces, pg_if_id); - /* *INDENT-OFF* */ REPLY_MACRO2(VL_API_PG_CREATE_INTERFACE_REPLY, ({ rmp->sw_if_index = ntohl(pi->sw_if_index); })); - /* *INDENT-ON* */ } static void diff --git a/src/vnet/pg/stream.c b/src/vnet/pg/stream.c index 112cc09ae97..cf3d37d5e9e 100644 --- a/src/vnet/pg/stream.c +++ b/src/vnet/pg/stream.c @@ -171,7 +171,6 @@ pg_add_del_mac_address (vnet_hw_interface_t * hi, return (NULL); } -/* *INDENT-OFF* */ VNET_DEVICE_CLASS (pg_dev_class) = { .name = "pg", .tx_function = pg_output, @@ -180,7 +179,6 @@ VNET_DEVICE_CLASS (pg_dev_class) = { .admin_up_down_function = pg_interface_admin_up_down, .mac_addr_add_del_function = pg_add_del_mac_address, }; -/* *INDENT-ON* */ static u8 * pg_build_rewrite (vnet_main_t * vnm, @@ -197,12 +195,10 @@ pg_build_rewrite (vnet_main_t * vnm, return (rewrite); } -/* *INDENT-OFF* */ VNET_HW_INTERFACE_CLASS (pg_interface_class,static) = { .name = "Packet generator", .build_rewrite = pg_build_rewrite, }; -/* *INDENT-ON* */ static u32 pg_eth_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, u32 flags) @@ -249,7 +245,7 @@ VNET_HW_INTERFACE_CLASS (pg_tun_hw_interface_class) = { }; u32 -pg_interface_add_or_get (pg_main_t *pg, uword if_id, u8 gso_enabled, +pg_interface_add_or_get (pg_main_t *pg, u32 if_id, u8 gso_enabled, u32 gso_size, u8 coalesce_enabled, pg_interface_mode_t mode) { @@ -315,8 +311,8 @@ pg_interface_add_or_get (pg_main_t *pg, uword if_id, u8 gso_enabled, hash_set (pg->if_index_by_if_id, if_id, i); - vec_validate (pg->if_id_by_sw_if_index, hi->sw_if_index); - pg->if_id_by_sw_if_index[hi->sw_if_index] = i; + vec_validate (pg->if_index_by_sw_if_index, hi->sw_if_index); + pg->if_index_by_sw_if_index[hi->sw_if_index] = i; if (vlib_num_workers ()) { @@ -560,6 +556,11 @@ pg_stream_add (pg_main_t * pg, pg_stream_t * s_init) */ s->sw_if_index[VLIB_RX] = pi->sw_if_index; } + else if (vec_len (pg->if_index_by_sw_if_index) <= s->sw_if_index[VLIB_RX]) + { + vec_validate (pg->if_index_by_sw_if_index, s->sw_if_index[VLIB_RX]); + pg->if_index_by_sw_if_index[s->sw_if_index[VLIB_RX]] = s->pg_if_index; + } /* Connect the graph. */ s->next_index = vlib_node_add_next (vm, device_input_node.index, diff --git a/src/vnet/policer/node_funcs.c b/src/vnet/policer/node_funcs.c index efa2f830f8c..2d2252d247a 100644 --- a/src/vnet/policer/node_funcs.c +++ b/src/vnet/policer/node_funcs.c @@ -670,7 +670,6 @@ VLIB_NODE_FN (ip4_policer_classify_node) (vlib_main_t * vm, POLICER_CLASSIFY_TABLE_IP4); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_policer_classify_node) = { .name = "ip4-policer-classify", .vector_size = sizeof (u32), @@ -682,7 +681,6 @@ VLIB_REGISTER_NODE (ip4_policer_classify_node) = { [POLICER_CLASSIFY_NEXT_INDEX_DROP] = "error-drop", }, }; -/* *INDENT-ON* */ VLIB_NODE_FN (ip6_policer_classify_node) (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -692,7 +690,6 @@ VLIB_NODE_FN (ip6_policer_classify_node) (vlib_main_t * vm, POLICER_CLASSIFY_TABLE_IP6); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip6_policer_classify_node) = { .name = "ip6-policer-classify", .vector_size = sizeof (u32), @@ -704,7 +701,6 @@ VLIB_REGISTER_NODE (ip6_policer_classify_node) = { [POLICER_CLASSIFY_NEXT_INDEX_DROP] = "error-drop", }, }; -/* *INDENT-ON* */ VLIB_NODE_FN (l2_policer_classify_node) (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -713,7 +709,6 @@ VLIB_NODE_FN (l2_policer_classify_node) (vlib_main_t * vm, return policer_classify_inline (vm, node, frame, POLICER_CLASSIFY_TABLE_L2); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (l2_policer_classify_node) = { .name = "l2-policer-classify", .vector_size = sizeof (u32), @@ -725,7 +720,6 @@ VLIB_REGISTER_NODE (l2_policer_classify_node) = { [POLICER_CLASSIFY_NEXT_INDEX_DROP] = "error-drop", }, }; -/* *INDENT-ON* */ #ifndef CLIB_MARCH_VARIANT static clib_error_t * diff --git a/src/vnet/policer/police.h b/src/vnet/policer/police.h index 5ad249ef40e..8f126e22175 100644 --- a/src/vnet/policer/police.h +++ b/src/vnet/policer/police.h @@ -73,8 +73,6 @@ typedef enum typedef struct { CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); - u32 lock; // for exclusive access to the struct - u32 single_rate; // 1 = single rate policer, 0 = two rate policer u32 color_aware; // for hierarchical policing u32 scale; // power-of-2 shift amount for lower rates @@ -93,11 +91,9 @@ typedef struct u32 current_bucket; // MOD u32 extended_limit; u32 extended_bucket; // MOD - - u64 last_update_time; // MOD u32 thread_index; // Tie policer to a thread, rather than lock - u32 pad32; - + u64 last_update_time; // MOD + u8 *name; } policer_t; STATIC_ASSERT_SIZEOF (policer_t, CLIB_CACHE_LINE_BYTES); diff --git a/src/vnet/policer/police_inlines.h b/src/vnet/policer/police_inlines.h index 6b0c0ecf725..08000b9a303 100644 --- a/src/vnet/policer/police_inlines.h +++ b/src/vnet/policer/police_inlines.h @@ -123,7 +123,7 @@ policer_handoff (vlib_main_t *vm, vlib_node_runtime_t *node, u32 n_enq, n_left_from, *from; vnet_policer_main_t *pm; policer_t *policer; - u32 this_thread, policer_thread; + u32 this_thread, policer_thread = 0; bool single_policer_node = (policer_index != ~0); pm = &vnet_policer_main; diff --git a/src/vnet/policer/policer.api b/src/vnet/policer/policer.api index f4bf9384f10..a5a60b35c6b 100644 --- a/src/vnet/policer/policer.api +++ b/src/vnet/policer/policer.api @@ -13,7 +13,7 @@ * limitations under the License. */ -option version = "2.0.0"; +option version = "3.0.0"; import "vnet/interface_types.api"; import "vnet/policer/policer_types.api"; @@ -35,6 +35,16 @@ autoreply define policer_bind bool bind_enable; }; +autoreply define policer_bind_v2 +{ + u32 client_index; + u32 context; + + u32 policer_index; + u32 worker_index; + bool bind_enable; +}; + /** \brief policer input: Apply policer as an input feature. @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -52,6 +62,16 @@ autoreply define policer_input bool apply; }; +autoreply define policer_input_v2 +{ + u32 client_index; + u32 context; + + u32 policer_index; + vl_api_interface_index_t sw_if_index; + bool apply; +}; + /** \brief policer output: Apply policer as an output feature. @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -69,6 +89,16 @@ autoreply define policer_output bool apply; }; +autoreply define policer_output_v2 +{ + u32 client_index; + u32 context; + + u32 policer_index; + vl_api_interface_index_t sw_if_index; + bool apply; +}; + /** \brief Add/del policer @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -106,6 +136,40 @@ define policer_add_del vl_api_sse2_qos_action_t violate_action; }; +define policer_add +{ + u32 client_index; + u32 context; + + string name[64]; + vl_api_policer_config_t infos; +}; + +autoreply define policer_del +{ + u32 client_index; + u32 context; + + u32 policer_index; +}; + +autoreply define policer_update +{ + u32 client_index; + u32 context; + + u32 policer_index; + vl_api_policer_config_t infos; +}; + +autoreply define policer_reset +{ + u32 client_index; + u32 context; + + u32 policer_index; +}; + /** \brief Add/del policer response @param context - sender context, to match reply w/ request @param retval - return value for request @@ -118,6 +182,13 @@ define policer_add_del_reply u32 policer_index; }; +define policer_add_reply +{ + u32 context; + i32 retval; + u32 policer_index; +}; + /** \brief Get list of policers @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -133,6 +204,23 @@ define policer_dump string match_name[64]; }; +/** \brief Get list of policers + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param policer_index - index of policer in the pool, ~0 to request all +*/ +define policer_dump_v2 +{ + u32 client_index; + u32 context; + + u32 policer_index; +}; + +service { + rpc policer_dump_v2 returns stream policer_details; +}; + /** \brief Policer operational state response. @param context - sender context, to match reply w/ request @param name - policer name diff --git a/src/vnet/policer/policer.c b/src/vnet/policer/policer.c index 0513563e1ec..eb7d40a340a 100644 --- a/src/vnet/policer/policer.c +++ b/src/vnet/policer/policer.c @@ -49,105 +49,161 @@ vlib_combined_counter_main_t policer_counters[] = { }, }; -clib_error_t * -policer_add_del (vlib_main_t *vm, u8 *name, qos_pol_cfg_params_st *cfg, - u32 *policer_index, u8 is_add) +int +policer_add (vlib_main_t *vm, const u8 *name, const qos_pol_cfg_params_st *cfg, + u32 *policer_index) { vnet_policer_main_t *pm = &vnet_policer_main; policer_t test_policer; policer_t *policer; + policer_t *pp; + qos_pol_cfg_params_st *cp; uword *p; u32 pi; int rv; + int i; p = hash_get_mem (pm->policer_config_by_name, name); - if (is_add == 0) - { - /* free policer config and template */ - if (p == 0) - { - vec_free (name); - return clib_error_return (0, "No such policer configuration"); - } - pool_put_index (pm->configs, p[0]); - pool_put_index (pm->policer_templates, p[0]); - hash_unset_mem (pm->policer_config_by_name, name); + if (p != NULL) + return VNET_API_ERROR_VALUE_EXIST; - /* free policer */ - p = hash_get_mem (pm->policer_index_by_name, name); - if (p == 0) - { - vec_free (name); - return clib_error_return (0, "No such policer"); - } - pool_put_index (pm->policers, p[0]); - hash_unset_mem (pm->policer_index_by_name, name); + /* Vet the configuration before adding it to the table */ + rv = pol_logical_2_physical (cfg, &test_policer); - vec_free (name); - return 0; - } + if (rv != 0) + return VNET_API_ERROR_INVALID_VALUE; - if (p != 0) + pool_get (pm->configs, cp); + pool_get_aligned (pm->policers, policer, CLIB_CACHE_LINE_BYTES); + + clib_memcpy (cp, cfg, sizeof (*cp)); + clib_memcpy (policer, &test_policer, sizeof (*pp)); + + policer->name = format (0, "%s%c", name, 0); + pi = policer - pm->policers; + + hash_set_mem (pm->policer_config_by_name, policer->name, cp - pm->configs); + hash_set_mem (pm->policer_index_by_name, policer->name, pi); + *policer_index = pi; + policer->thread_index = ~0; + + for (i = 0; i < NUM_POLICE_RESULTS; i++) { - vec_free (name); - return clib_error_return (0, "Policer already exists"); + vlib_validate_combined_counter (&policer_counters[i], pi); + vlib_zero_combined_counter (&policer_counters[i], pi); } - /* Vet the configuration before adding it to the table */ - rv = pol_logical_2_physical (cfg, &test_policer); + return 0; +} + +int +policer_del (vlib_main_t *vm, u32 policer_index) +{ + vnet_policer_main_t *pm = &vnet_policer_main; + policer_t *policer; + uword *p; + + if (pool_is_free_index (pm->policers, policer_index)) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + policer = &pm->policers[policer_index]; + + p = hash_get_mem (pm->policer_config_by_name, policer->name); - if (rv == 0) + /* free policer config */ + if (p != NULL) { - policer_t *pp; - qos_pol_cfg_params_st *cp; - int i; + pool_put_index (pm->configs, p[0]); + hash_unset_mem (pm->policer_config_by_name, policer->name); + } - pool_get (pm->configs, cp); - pool_get (pm->policer_templates, pp); + /* free policer */ + hash_unset_mem (pm->policer_index_by_name, policer->name); + vec_free (policer->name); + pool_put_index (pm->policers, policer_index); + + return 0; +} + +int +policer_update (vlib_main_t *vm, u32 policer_index, + const qos_pol_cfg_params_st *cfg) +{ + vnet_policer_main_t *pm = &vnet_policer_main; + policer_t test_policer; + policer_t *policer; + qos_pol_cfg_params_st *cp; + uword *p; + u8 *name; + int rv; + int i; - ASSERT (cp - pm->configs == pp - pm->policer_templates); + if (pool_is_free_index (pm->policers, policer_index)) + return VNET_API_ERROR_NO_SUCH_ENTRY; - clib_memcpy (cp, cfg, sizeof (*cp)); - clib_memcpy (pp, &test_policer, sizeof (*pp)); + policer = &pm->policers[policer_index]; - hash_set_mem (pm->policer_config_by_name, name, cp - pm->configs); - pool_get_aligned (pm->policers, policer, CLIB_CACHE_LINE_BYTES); - policer[0] = pp[0]; - pi = policer - pm->policers; - hash_set_mem (pm->policer_index_by_name, name, pi); - *policer_index = pi; - policer->thread_index = ~0; + /* Vet the configuration before adding it to the table */ + rv = pol_logical_2_physical (cfg, &test_policer); + if (rv != 0) + return VNET_API_ERROR_INVALID_VALUE; - for (i = 0; i < NUM_POLICE_RESULTS; i++) - { - vlib_validate_combined_counter (&policer_counters[i], pi); - vlib_zero_combined_counter (&policer_counters[i], pi); - } + p = hash_get_mem (pm->policer_config_by_name, policer->name); + + if (PREDICT_TRUE (p != NULL)) + { + cp = &pm->configs[p[0]]; } else { - vec_free (name); - return clib_error_return (0, "Config failed sanity check"); + /* recover from a missing configuration */ + pool_get (pm->configs, cp); + hash_set_mem (pm->policer_config_by_name, policer->name, + cp - pm->configs); } + name = policer->name; + + clib_memcpy (cp, cfg, sizeof (*cp)); + clib_memcpy (policer, &test_policer, sizeof (*policer)); + + policer->name = name; + policer->thread_index = ~0; + + for (i = 0; i < NUM_POLICE_RESULTS; i++) + vlib_zero_combined_counter (&policer_counters[i], policer_index); + return 0; } int -policer_bind_worker (u8 *name, u32 worker, bool bind) +policer_reset (vlib_main_t *vm, u32 policer_index) { vnet_policer_main_t *pm = &vnet_policer_main; policer_t *policer; - uword *p; - p = hash_get_mem (pm->policer_index_by_name, name); - if (p == 0) - { - return VNET_API_ERROR_NO_SUCH_ENTRY; - } + if (pool_is_free_index (pm->policers, policer_index)) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + policer = &pm->policers[policer_index]; + + policer->current_bucket = policer->current_limit; + policer->extended_bucket = policer->extended_limit; + + return 0; +} + +int +policer_bind_worker (u32 policer_index, u32 worker, bool bind) +{ + vnet_policer_main_t *pm = &vnet_policer_main; + policer_t *policer; + + if (pool_is_free_index (pm->policers, policer_index)) + return VNET_API_ERROR_NO_SUCH_ENTRY; - policer = &pm->policers[p[0]]; + policer = &pm->policers[policer_index]; if (bind) { @@ -166,21 +222,9 @@ policer_bind_worker (u8 *name, u32 worker, bool bind) } int -policer_input (u8 *name, u32 sw_if_index, vlib_dir_t dir, bool apply) +policer_input (u32 policer_index, u32 sw_if_index, vlib_dir_t dir, bool apply) { vnet_policer_main_t *pm = &vnet_policer_main; - policer_t *policer; - u32 policer_index; - uword *p; - - p = hash_get_mem (pm->policer_index_by_name, name); - if (p == 0) - { - return VNET_API_ERROR_NO_SUCH_ENTRY; - } - - policer = &pm->policers[p[0]]; - policer_index = policer - pm->policers; if (apply) { @@ -210,20 +254,21 @@ policer_input (u8 *name, u32 sw_if_index, vlib_dir_t dir, bool apply) u8 * format_policer_instance (u8 * s, va_list * va) { + vnet_policer_main_t *pm = &vnet_policer_main; policer_t *i = va_arg (*va, policer_t *); - uword pi = va_arg (*va, uword); + u32 policer_index = i - pm->policers; int result; vlib_counter_t counts[NUM_POLICE_RESULTS]; for (result = 0; result < NUM_POLICE_RESULTS; result++) { - vlib_get_combined_counter (&policer_counters[result], pi, + vlib_get_combined_counter (&policer_counters[result], policer_index, &counts[result]); } - s = format (s, "policer at %llx: %s rate, %s color-aware\n", - i, i->single_rate ? "single" : "dual", - i->color_aware ? "is" : "not"); + s = + format (s, "Policer at index %d: %s rate, %s color-aware\n", policer_index, + i->single_rate ? "single" : "dual", i->color_aware ? "is" : "not"); s = format (s, "cir %u tok/period, pir %u tok/period, scale %u\n", i->cir_tokens_per_period, i->pir_tokens_per_period, i->scale); s = format (s, "cur lim %u, cur bkt %u, ext lim %u, ext bkt %u\n", @@ -475,6 +520,7 @@ unformat_policer_classify_next_index (unformat_input_t * input, va_list * va) return 0; p = hash_get_mem (pm->policer_index_by_name, match_name); + vec_free (match_name); if (p == 0) return 0; @@ -513,12 +559,16 @@ static clib_error_t * policer_add_command_fn (vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd) { + vnet_policer_main_t *pm = &vnet_policer_main; qos_pol_cfg_params_st c; unformat_input_t _line_input, *line_input = &_line_input; - u8 is_add = 1; u8 *name = 0; + uword *p; u32 pi; + u32 policer_index = ~0; + int rv = 0; clib_error_t *error = NULL; + u8 is_update = cmd->function_arg; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -528,9 +578,9 @@ policer_add_command_fn (vlib_main_t *vm, unformat_input_t *input, while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { - if (unformat (line_input, "del")) - is_add = 0; - else if (unformat (line_input, "name %s", &name)) + if (unformat (line_input, "name %s", &name)) + ; + else if (is_update && unformat (line_input, "index %u", &policer_index)) ; else if (unformat (line_input, "color-aware")) c.color_aware = 1; @@ -546,10 +596,41 @@ policer_add_command_fn (vlib_main_t *vm, unformat_input_t *input, } } - error = policer_add_del (vm, name, &c, &pi, is_add); + if (is_update) + { + if (~0 == policer_index && 0 != name) + { + p = hash_get_mem (pm->policer_index_by_name, name); + if (p != NULL) + policer_index = p[0]; + } + + if (~0 != policer_index) + { + rv = policer_update (vm, policer_index, &c); + } + } + else + { + rv = policer_add (vm, name, &c, &pi); + } + + switch (rv) + { + case VNET_API_ERROR_NO_SUCH_ENTRY: + error = clib_error_return (0, "No such policer"); + break; + case VNET_API_ERROR_VALUE_EXIST: + error = clib_error_return (0, "Policer already exists"); + break; + case VNET_API_ERROR_INVALID_VALUE: + error = clib_error_return (0, "Config failed sanity check"); + break; + } done: unformat_free (line_input); + vec_free (name); return error; } @@ -560,6 +641,10 @@ policer_del_command_fn (vlib_main_t *vm, unformat_input_t *input, { unformat_input_t _line_input, *line_input = &_line_input; clib_error_t *error = NULL; + vnet_policer_main_t *pm = &vnet_policer_main; + int rv; + u32 policer_index = ~0; + uword *p; u8 *name = 0; /* Get a line of input. */ @@ -570,6 +655,8 @@ policer_del_command_fn (vlib_main_t *vm, unformat_input_t *input, { if (unformat (line_input, "name %s", &name)) ; + else if (unformat (line_input, "index %u", &policer_index)) + ; else { error = clib_error_return (0, "unknown input `%U'", @@ -578,10 +665,30 @@ policer_del_command_fn (vlib_main_t *vm, unformat_input_t *input, } } - error = policer_add_del (vm, name, NULL, NULL, 0); + if (~0 == policer_index && 0 != name) + { + p = hash_get_mem (pm->policer_index_by_name, name); + if (p != NULL) + policer_index = p[0]; + } + + rv = VNET_API_ERROR_NO_SUCH_ENTRY; + if (~0 != policer_index) + rv = policer_del (vm, policer_index); + + switch (rv) + { + case VNET_API_ERROR_INVALID_VALUE: + error = clib_error_return (0, "No such policer configuration"); + break; + case VNET_API_ERROR_NO_SUCH_ENTRY: + error = clib_error_return (0, "No such policer"); + break; + } done: unformat_free (line_input); + vec_free (name); return error; } @@ -592,13 +699,14 @@ policer_bind_command_fn (vlib_main_t *vm, unformat_input_t *input, { unformat_input_t _line_input, *line_input = &_line_input; clib_error_t *error = NULL; - u8 bind, *name = 0; - u32 worker; + vnet_policer_main_t *pm = &vnet_policer_main; + u8 bind = 1; + u8 *name = 0; + u32 worker = ~0; + u32 policer_index = ~0; + uword *p; int rv; - bind = 1; - worker = ~0; - /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) return 0; @@ -607,6 +715,8 @@ policer_bind_command_fn (vlib_main_t *vm, unformat_input_t *input, { if (unformat (line_input, "name %s", &name)) ; + else if (unformat (line_input, "index %u", &policer_index)) + ; else if (unformat (line_input, "unbind")) bind = 0; else if (unformat (line_input, "%d", &worker)) @@ -626,7 +736,16 @@ policer_bind_command_fn (vlib_main_t *vm, unformat_input_t *input, } else { - rv = policer_bind_worker (name, worker, bind); + if (~0 == policer_index && 0 != name) + { + p = hash_get_mem (pm->policer_index_by_name, name); + if (p != NULL) + policer_index = p[0]; + } + + rv = VNET_API_ERROR_NO_SUCH_ENTRY; + if (~0 != policer_index) + rv = policer_bind_worker (policer_index, worker, bind); if (rv) error = clib_error_return (0, "failed: `%d'", rv); @@ -634,6 +753,7 @@ policer_bind_command_fn (vlib_main_t *vm, unformat_input_t *input, done: unformat_free (line_input); + vec_free (name); return error; } @@ -644,14 +764,15 @@ policer_input_command_fn (vlib_main_t *vm, unformat_input_t *input, { unformat_input_t _line_input, *line_input = &_line_input; clib_error_t *error = NULL; - u8 apply, *name = 0; - u32 sw_if_index; + vnet_policer_main_t *pm = &vnet_policer_main; + u8 apply = 1; + u8 *name = 0; + u32 sw_if_index = ~0; + u32 policer_index = ~0; + uword *p; int rv; vlib_dir_t dir = cmd->function_arg; - apply = 1; - sw_if_index = ~0; - /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) return 0; @@ -660,6 +781,8 @@ policer_input_command_fn (vlib_main_t *vm, unformat_input_t *input, { if (unformat (line_input, "name %s", &name)) ; + else if (unformat (line_input, "index %u", &policer_index)) + ; else if (unformat (line_input, "unapply")) apply = 0; else if (unformat (line_input, "%U", unformat_vnet_sw_interface, @@ -680,7 +803,16 @@ policer_input_command_fn (vlib_main_t *vm, unformat_input_t *input, } else { - rv = policer_input (name, sw_if_index, dir, apply); + if (~0 == policer_index && 0 != name) + { + p = hash_get_mem (pm->policer_index_by_name, name); + if (p != NULL) + policer_index = p[0]; + } + + rv = VNET_API_ERROR_NO_SUCH_ENTRY; + if (~0 != policer_index) + rv = policer_input (policer_index, sw_if_index, dir, apply); if (rv) error = clib_error_return (0, "failed: `%d'", rv); @@ -688,101 +820,199 @@ policer_input_command_fn (vlib_main_t *vm, unformat_input_t *input, done: unformat_free (line_input); + vec_free (name); + + return error; +} + +static clib_error_t * +policer_reset_command_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + clib_error_t *error = NULL; + vnet_policer_main_t *pm = &vnet_policer_main; + int rv; + u32 policer_index = ~0; + uword *p; + u8 *name = 0; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "name %s", &name)) + ; + else if (unformat (line_input, "index %u", &policer_index)) + ; + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } + } + + if (~0 == policer_index && 0 != name) + { + p = hash_get_mem (pm->policer_index_by_name, name); + if (p != NULL) + policer_index = p[0]; + } + + rv = VNET_API_ERROR_NO_SUCH_ENTRY; + if (~0 != policer_index) + rv = policer_reset (vm, policer_index); + + switch (rv) + { + case VNET_API_ERROR_NO_SUCH_ENTRY: + error = clib_error_return (0, "No such policer"); + break; + } + +done: + unformat_free (line_input); + vec_free (name); return error; } VLIB_CLI_COMMAND (configure_policer_command, static) = { .path = "configure policer", - .short_help = "configure policer name <name> <params> ", + .short_help = "configure policer [name <name> | index <index>] [type 1r2c | " + "1r3c | 2r3c-2698 " + "| 2r3c-4115] [color-aware] [cir <cir>] [cb <cb>] [eir <eir>] " + "[eb <eb>] [rate kbps | pps] [round closest | up | down] " + "[conform-action drop | transmit | mark-and-transmit <dscp>] " + "[exceed-action drop | transmit | mark-and-transmit <dscp>] " + "[violate-action drop | transmit | mark-and-transmit <dscp>]", .function = policer_add_command_fn, + .function_arg = 1 }; VLIB_CLI_COMMAND (policer_add_command, static) = { .path = "policer add", - .short_help = "policer name <name> <params> ", + .short_help = "policer add name <name> [type 1r2c | 1r3c | 2r3c-2698 | " + "2r3c-4115] [color-aware] [cir <cir>] [cb <cb>] [eir <eir>] " + "[eb <eb>] [rate kbps | pps] [round closest | up | down] " + "[conform-action drop | transmit | mark-and-transmit <dscp>] " + "[exceed-action drop | transmit | mark-and-transmit <dscp>] " + "[violate-action drop | transmit | mark-and-transmit <dscp>]", .function = policer_add_command_fn, + .function_arg = 0 }; VLIB_CLI_COMMAND (policer_del_command, static) = { .path = "policer del", - .short_help = "policer del name <name> ", + .short_help = "policer del [name <name> | index <index>]", .function = policer_del_command_fn, }; VLIB_CLI_COMMAND (policer_bind_command, static) = { .path = "policer bind", - .short_help = "policer bind [unbind] name <name> <worker>", + .short_help = "policer bind [unbind] [name <name> | index <index>] <worker>", .function = policer_bind_command_fn, }; VLIB_CLI_COMMAND (policer_input_command, static) = { .path = "policer input", - .short_help = "policer input [unapply] name <name> <interfac>", + .short_help = + "policer input [unapply] [name <name> | index <index>] <interface>", .function = policer_input_command_fn, .function_arg = VLIB_RX, }; VLIB_CLI_COMMAND (policer_output_command, static) = { .path = "policer output", - .short_help = "policer output [unapply] name <name> <interfac>", + .short_help = + "policer output [unapply] [name <name> | index <index>] <interface>", .function = policer_input_command_fn, .function_arg = VLIB_TX, }; +VLIB_CLI_COMMAND (policer_reset_command, static) = { + .path = "policer reset", + .short_help = "policer reset [name <name> | index <index>]", + .function = policer_reset_command_fn +}; + static clib_error_t * show_policer_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { vnet_policer_main_t *pm = &vnet_policer_main; - hash_pair_t *p; - u32 pool_index; - u8 *match_name = 0; - u8 *name; - uword *pi; + unformat_input_t _line_input, *line_input = &_line_input; + policer_t *policer; + u32 policer_index = ~0; + u8 *name = 0; + uword *ci, *pi; qos_pol_cfg_params_st *config; - policer_t *templ; - - (void) unformat (input, "name %s", &match_name); - - /* *INDENT-OFF* */ - hash_foreach_pair (p, pm->policer_config_by_name, - ({ - name = (u8 *) p->key; - if (match_name == 0 || !strcmp((char *) name, (char *) match_name)) - { - pi = hash_get_mem (pm->policer_index_by_name, name); - - pool_index = p->value[0]; - config = pool_elt_at_index (pm->configs, pool_index); - templ = pool_elt_at_index (pm->policer_templates, pool_index); - vlib_cli_output (vm, "Name \"%s\" %U ", name, format_policer_config, - config); - if (pi) - { - vlib_cli_output (vm, "Template %U", format_policer_instance, templ, - pi[0]); - } - else - { - vlib_cli_output ( - vm, "Cannot print template - policer index hash lookup failed"); - } - vlib_cli_output (vm, "-----------"); - } - })); - /* *INDENT-ON* */ - return 0; + clib_error_t *error = 0; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + { + pool_foreach (policer, pm->policers) + { + ci = hash_get_mem (pm->policer_config_by_name, policer->name); + config = pool_elt_at_index (pm->configs, ci[0]); + + vlib_cli_output (vm, "Name \"%s\" %U ", policer->name, + format_policer_config, config); + vlib_cli_output (vm, "%U", format_policer_instance, policer); + vlib_cli_output (vm, "-----------"); + } + return 0; + } + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "name %s", &name)) + ; + else if (unformat (line_input, "index %u", &policer_index)) + ; + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } + } + + if (~0 == policer_index && 0 != name) + { + pi = hash_get_mem (pm->policer_index_by_name, name); + if (pi != NULL) + policer_index = pi[0]; + } + + if (~0 == policer_index || pool_is_free_index (pm->policers, policer_index)) + goto done; + + policer = &pm->policers[policer_index]; + ci = hash_get_mem (pm->policer_config_by_name, policer->name); + config = pool_elt_at_index (pm->configs, ci[0]); + vlib_cli_output (vm, "Name \"%s\" %U ", policer->name, format_policer_config, + config); + vlib_cli_output (vm, "%U", format_policer_instance, policer); + vlib_cli_output (vm, "-----------"); + +done: + unformat_free (line_input); + vec_free (name); + + return error; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_policer_command, static) = { - .path = "show policer", - .short_help = "show policer [name]", - .function = show_policer_command_fn, + .path = "show policer", + .short_help = "show policer [name <name> | index <index>]", + .function = show_policer_command_fn, }; -/* *INDENT-ON* */ static clib_error_t * show_policer_pools_command_fn (vlib_main_t * vm, @@ -791,19 +1021,15 @@ show_policer_pools_command_fn (vlib_main_t * vm, { vnet_policer_main_t *pm = &vnet_policer_main; - vlib_cli_output (vm, "pool sizes: configs=%d templates=%d policers=%d", - pool_elts (pm->configs), - pool_elts (pm->policer_templates), - pool_elts (pm->policers)); + vlib_cli_output (vm, "pool sizes: configs=%d policers=%d", + pool_elts (pm->configs), pool_elts (pm->policers)); return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_policer_pools_command, static) = { .path = "show policer pools", .short_help = "show policer pools", .function = show_policer_pools_command_fn, }; -/* *INDENT-ON* */ clib_error_t * policer_init (vlib_main_t * vm) diff --git a/src/vnet/policer/policer.h b/src/vnet/policer/policer.h index f5b6c0d3b31..7ce7fc79d47 100644 --- a/src/vnet/policer/policer.h +++ b/src/vnet/policer/policer.h @@ -32,7 +32,7 @@ typedef struct qos_pol_cfg_params_st *configs; policer_t *policer_templates; - /* Config by name hash */ + /* Config by policer name hash */ uword *policer_config_by_name; /* Policer by name hash */ @@ -68,11 +68,16 @@ typedef enum } vnet_policer_next_t; u8 *format_policer_instance (u8 * s, va_list * va); -clib_error_t *policer_add_del (vlib_main_t *vm, u8 *name, - qos_pol_cfg_params_st *cfg, u32 *policer_index, - u8 is_add); -int policer_bind_worker (u8 *name, u32 worker, bool bind); -int policer_input (u8 *name, u32 sw_if_index, vlib_dir_t dir, bool apply); +int policer_add (vlib_main_t *vm, const u8 *name, + const qos_pol_cfg_params_st *cfg, u32 *policer_index); + +int policer_update (vlib_main_t *vm, u32 policer_index, + const qos_pol_cfg_params_st *cfg); +int policer_del (vlib_main_t *vm, u32 policer_index); +int policer_reset (vlib_main_t *vm, u32 policer_index); +int policer_bind_worker (u32 policer_index, u32 worker, bool bind); +int policer_input (u32 policer_index, u32 sw_if_index, vlib_dir_t dir, + bool apply); #endif /* __included_policer_h__ */ diff --git a/src/vnet/policer/policer.rst b/src/vnet/policer/policer.rst new file mode 100644 index 00000000000..0e7369e373b --- /dev/null +++ b/src/vnet/policer/policer.rst @@ -0,0 +1,217 @@ +.. _policer: + +Policing +======== + +VPP implements several policer types, that don't always conform +to the related RFCs [#rfc2697]_ [#rfc2698]_ [#rfc4115]_. +Only policers implemented in VPP will be presented, along with +the differences they have compared to RFCs. + +.. contents:: :local: + :depth: 1 + + +1 rate 2 color (1r2c) +--------------------- + +This is the most straightforward policer. There is no RFC describing it, +however we can found its description in many documentation [#juniper]_ [#cisco]_ . + +A 1r2c policer is great to classify incoming packets into two categories: +conforming packets (said green), and violating ones (said red). + +Parameters +~~~~~~~~~~ + +To set-up such a policer, only two parameters are needed: + +Committed Information Rate (CIR) + Given in bytes per second, this parameter is the average + throughput allowed by the policer. + + It sets the limit between conforming arriving packets (those making the + traffic fall below the CIR), and violating arriving packets + (those making the traffic exceed the CIR). + +Committed Burst Size (CBS) + It represents the size (in bytes) of a token bucket used to allow + some burstiness from the incoming traffic. + +.. figure:: /_images/policer-1r2c-bucket.png + :align: center + :scale: 25% + + Figure 1: 1r2c bucket filling logic + +The committed token bucket (C) is filling up at CIR tokens (bytes) +per second, up to CBS tokens. All overflowing tokens are lost. + +Color-Blind algorithm +~~~~~~~~~~~~~~~~~~~~~ + +.. image:: /_images/policer-1r2c-blind.png + :align: center + :scale: 75% + +| + +Color-Aware algorithm +~~~~~~~~~~~~~~~~~~~~~ + +In online documentation, there is no trace of a color-aware 1r2c policer. +However, VPP implementation allows such a thing. + +.. image:: /_images/policer-1r2c-aware.png + :align: center + :scale: 75% + +| + + +1 rate 3 color (1r3c) RFC 2697 [#rfc2697]_ +------------------------------------------ + +As for the `1 rate 2 color (1r2c)`_ policer, only one rate parameters is required +to setup a 1r3c policer. However, such a policer adds another kind of packet category: +exceeding ones (said yellow). + +Parameters +~~~~~~~~~~ + +To set-up such a policer, three parameters are needed: + +Committed Information Rate (CIR) + As in the `1 rate 2 color (1r2c)`_ policer. + +Committed Burst Size (CBS) + As in the `1 rate 2 color (1r2c)`_ policer. + +Excess Burst Size (EBS) + It represents the size (in bytes) of a second token bucket used + to allow an additional burstiness from the incoming traffic, when + traffic as been below the CIR for some time. + +.. figure:: /_images/policer-1r3c-bucket.png + :align: center + :scale: 25% + + Figure 2: 1r3c buckets filling logic + +The committed token bucket (C) is filling up at CIR tokens (bytes) +per second, up to CBS tokens. When C is full, tokens are overflowing +into the excess token bucket (E), up to EBS tokens. Only overflowing +tokens from E are lost. + +Color-Blind algorithm +~~~~~~~~~~~~~~~~~~~~~ + +.. image:: /_images/policer-1r3c-blind.png + :align: center + :scale: 75% + +| + +Color-Aware algorithm +~~~~~~~~~~~~~~~~~~~~~ + +.. image:: /_images/policer-1r3c-aware.png + :align: center + :scale: 75% + +| + +Notes +~~~~~ + +In the RFC 2697 [#rfc2697]_ describing the 1r3c policer, conforming (green) packets +only consume tokens from the token bucket C. Whereas, in VPP, they also consume tokens from E. + +One way to stick to the RFC is then to set the EBS parameter to be superior to CBS, so that +EBS - CBS corresponds to the EBS from the RFC. + +However, VPP does not enforce setting EBS > CBS, which could result in undesired behavior. + +2 rate 3 color (2r3c) RFC 2698 [#rfc2698]_ +------------------------------------------ + +Instead of setting the limit between yellow and red packets in terms of bursts, +as it is done by `1 rate 3 color (1r3c) RFC 2697`_ policers, two rate policers introduce +another rate parameter to discriminate between those two kinds of packets. + +Parameters +~~~~~~~~~~ + +To set-up such a policer, four parameters are needed: + +Committed Information Rate (CIR) + As in the `1 rate 2 color (1r2c)`_ policer. + +Committed Burst Size (CBS) + As in the `1 rate 2 color (1r2c)`_ policer. + +Peak Information Rate (PIR) + Given in bytes per second, this parameter is the average + throughput allowed by the policer when there is a peak in + traffic. + + It sets a second limit between exceeding arriving packets + (those making the traffic fall below the PIR, but above CIR), + and violating arriving packets (those making the traffic exceed the PIR). + +Peak Burst Size (PBS) + It represents the size (in bytes) of a second token bucket used + to allow an additional peak traffic. + +.. figure:: /_images/policer-2r3c-bucket.png + :align: center + :scale: 25% + + Figure 2: 2r3c-rfc2698 buckets filling logic + +The committed token bucket (C) is filling up at CIR tokens (bytes) +per second, up to CBS tokens. In the meantime, the peak token bucket (P) +is filling up at PIR tokens per second, up to PBS. All overflowing tokens +from C and P are lost. + +Color-Blind algorithm +~~~~~~~~~~~~~~~~~~~~~ + +.. image:: /_images/policer-2r3c-blind.png + :align: center + :scale: 75% + +| + +Color-Aware algorithm +~~~~~~~~~~~~~~~~~~~~~ + +.. image:: /_images/policer-2r3c-aware.png + :align: center + :scale: 50% + +| + +Notes +~~~~~ + +To have a working policer, the condition PIR >= CIR needs to hold. +Indeed, since we assume that peak traffic should have a greater +rate than committed ones. + + +2 rate 3 color (2r3c) RFC 4115 [#rfc4115]_ +------------------------------------------ + +The 2r3c-RFC4115 is an allowed choice by VPP. However, there is currently +no implementation of such a policer. Hence, the only two rate policer VPP +implements is the `2 rate 3 color (2r3c) RFC 2698`_ policer. + + +.. rubric:: References: + +.. [#juniper] https://www.juniper.net/documentation/us/en/software/junos/traffic-mgmt-nfx/routing-policy/topics/concept/tcm-overview-cos-qfx-series-understanding.html +.. [#cisco] https://www.cisco.com/c/en/us/td/docs/ios-xml/ios/qos_mqc/configuration/xe-16-8/qos-mqc-xe-16-8-book/qos-pkt-policing.html +.. [#rfc2697] https://www.rfc-editor.org/rfc/rfc2697.html +.. [#rfc2698] https://www.rfc-editor.org/rfc/rfc2698.html +.. [#rfc4115] https://www.rfc-editor.org/rfc/rfc4115.html diff --git a/src/vnet/policer/policer_api.c b/src/vnet/policer/policer_api.c index 4f9baa09feb..df35b472a89 100644 --- a/src/vnet/policer/policer_api.c +++ b/src/vnet/policer/policer_api.c @@ -35,126 +35,293 @@ static void vl_api_policer_add_del_t_handler (vl_api_policer_add_del_t * mp) { vlib_main_t *vm = vlib_get_main (); + vnet_policer_main_t *pm = &vnet_policer_main; vl_api_policer_add_del_reply_t *rmp; int rv = 0; - u8 *name = NULL; + uword *p; + char name[sizeof (mp->name) + 1]; qos_pol_cfg_params_st cfg; - clib_error_t *error; u32 policer_index; - name = format (0, "%s", mp->name); - vec_terminate_c_string (name); - - clib_memset (&cfg, 0, sizeof (cfg)); - cfg.rfc = (qos_policer_type_en) mp->type; - cfg.rnd_type = (qos_round_type_en) mp->round_type; - cfg.rate_type = (qos_rate_type_en) mp->rate_type; - cfg.rb.kbps.cir_kbps = ntohl (mp->cir); - cfg.rb.kbps.eir_kbps = ntohl (mp->eir); - cfg.rb.kbps.cb_bytes = clib_net_to_host_u64 (mp->cb); - cfg.rb.kbps.eb_bytes = clib_net_to_host_u64 (mp->eb); - cfg.conform_action.action_type = - (qos_action_type_en) mp->conform_action.type; - cfg.conform_action.dscp = mp->conform_action.dscp; - cfg.exceed_action.action_type = (qos_action_type_en) mp->exceed_action.type; - cfg.exceed_action.dscp = mp->exceed_action.dscp; - cfg.violate_action.action_type = - (qos_action_type_en) mp->violate_action.type; - cfg.violate_action.dscp = mp->violate_action.dscp; - - cfg.color_aware = mp->color_aware; - - error = policer_add_del (vm, name, &cfg, &policer_index, mp->is_add); - - if (error) + snprintf (name, sizeof (name), "%s", mp->name); + + if (mp->is_add) { - rv = VNET_API_ERROR_UNSPECIFIED; - clib_error_free (error); + clib_memset (&cfg, 0, sizeof (cfg)); + cfg.rfc = (qos_policer_type_en) mp->type; + cfg.rnd_type = (qos_round_type_en) mp->round_type; + cfg.rate_type = (qos_rate_type_en) mp->rate_type; + cfg.rb.kbps.cir_kbps = ntohl (mp->cir); + cfg.rb.kbps.eir_kbps = ntohl (mp->eir); + cfg.rb.kbps.cb_bytes = clib_net_to_host_u64 (mp->cb); + cfg.rb.kbps.eb_bytes = clib_net_to_host_u64 (mp->eb); + cfg.conform_action.action_type = + (qos_action_type_en) mp->conform_action.type; + cfg.conform_action.dscp = mp->conform_action.dscp; + cfg.exceed_action.action_type = + (qos_action_type_en) mp->exceed_action.type; + cfg.exceed_action.dscp = mp->exceed_action.dscp; + cfg.violate_action.action_type = + (qos_action_type_en) mp->violate_action.type; + cfg.violate_action.dscp = mp->violate_action.dscp; + cfg.color_aware = mp->color_aware; + + rv = policer_add (vm, (u8 *) name, &cfg, &policer_index); } + else + { + p = hash_get_mem (pm->policer_index_by_name, name); + + rv = VNET_API_ERROR_NO_SUCH_ENTRY; + if (p != NULL) + rv = policer_del (vm, p[0]); + } + + REPLY_MACRO2 (VL_API_POLICER_ADD_DEL_REPLY, ({ + if (rv == 0 && mp->is_add) + rmp->policer_index = htonl (policer_index); + else + rmp->policer_index = ~0; + })); +} + +static_always_inline void +policer_set_configuration (qos_pol_cfg_params_st *cfg, + vl_api_policer_config_t *infos) +{ + clib_memset (cfg, 0, sizeof (*cfg)); + cfg->rfc = (qos_policer_type_en) infos->type; + cfg->rnd_type = (qos_round_type_en) infos->round_type; + cfg->rate_type = (qos_rate_type_en) infos->rate_type; + cfg->rb.kbps.cir_kbps = ntohl (infos->cir); + cfg->rb.kbps.eir_kbps = ntohl (infos->eir); + cfg->rb.kbps.cb_bytes = clib_net_to_host_u64 (infos->cb); + cfg->rb.kbps.eb_bytes = clib_net_to_host_u64 (infos->eb); + cfg->conform_action.action_type = + (qos_action_type_en) infos->conform_action.type; + cfg->conform_action.dscp = infos->conform_action.dscp; + cfg->exceed_action.action_type = + (qos_action_type_en) infos->exceed_action.type; + cfg->exceed_action.dscp = infos->exceed_action.dscp; + cfg->violate_action.action_type = + (qos_action_type_en) infos->violate_action.type; + cfg->violate_action.dscp = infos->violate_action.dscp; + cfg->color_aware = infos->color_aware; +} + +static void +vl_api_policer_add_t_handler (vl_api_policer_add_t *mp) +{ + vlib_main_t *vm = vlib_get_main (); + vl_api_policer_add_reply_t *rmp; + int rv = 0; + char name[sizeof (mp->name) + 1]; + qos_pol_cfg_params_st cfg; + u32 policer_index; + + snprintf (name, sizeof (name), "%s", mp->name); + + policer_set_configuration (&cfg, &mp->infos); + + rv = policer_add (vm, (u8 *) name, &cfg, &policer_index); - /* *INDENT-OFF* */ - REPLY_MACRO2(VL_API_POLICER_ADD_DEL_REPLY, - ({ - if (rv == 0 && mp->is_add) - rmp->policer_index = ntohl(policer_index); - else - rmp->policer_index = ~0; - })); - /* *INDENT-ON* */ + REPLY_MACRO2 (VL_API_POLICER_ADD_REPLY, ({ + if (rv == 0) + rmp->policer_index = htonl (policer_index); + else + rmp->policer_index = ~0; + })); +} + +static void +vl_api_policer_del_t_handler (vl_api_policer_del_t *mp) +{ + vlib_main_t *vm = vlib_get_main (); + vl_api_policer_del_reply_t *rmp; + u32 policer_index; + int rv = 0; + + policer_index = ntohl (mp->policer_index); + rv = policer_del (vm, policer_index); + + REPLY_MACRO (VL_API_POLICER_DEL_REPLY); +} + +static void +vl_api_policer_update_t_handler (vl_api_policer_update_t *mp) +{ + vlib_main_t *vm = vlib_get_main (); + vl_api_policer_update_reply_t *rmp; + int rv = 0; + qos_pol_cfg_params_st cfg; + u32 policer_index; + + policer_set_configuration (&cfg, &mp->infos); + + policer_index = ntohl (mp->policer_index); + rv = policer_update (vm, policer_index, &cfg); + + REPLY_MACRO (VL_API_POLICER_UPDATE_REPLY); +} + +static void +vl_api_policer_reset_t_handler (vl_api_policer_reset_t *mp) +{ + vlib_main_t *vm = vlib_get_main (); + vl_api_policer_reset_reply_t *rmp; + u32 policer_index; + int rv = 0; + + policer_index = ntohl (mp->policer_index); + rv = policer_reset (vm, policer_index); + + REPLY_MACRO (VL_API_POLICER_RESET_REPLY); } static void vl_api_policer_bind_t_handler (vl_api_policer_bind_t *mp) { vl_api_policer_bind_reply_t *rmp; - u8 *name; + vnet_policer_main_t *pm = &vnet_policer_main; + char name[sizeof (mp->name) + 1]; + uword *p; u32 worker_index; u8 bind_enable; int rv; - name = format (0, "%s", mp->name); - vec_terminate_c_string (name); + snprintf (name, sizeof (name), "%s", mp->name); worker_index = ntohl (mp->worker_index); bind_enable = mp->bind_enable; - rv = policer_bind_worker (name, worker_index, bind_enable); - vec_free (name); + p = hash_get_mem (pm->policer_index_by_name, name); + + rv = VNET_API_ERROR_NO_SUCH_ENTRY; + if (p != NULL) + rv = policer_bind_worker (p[0], worker_index, bind_enable); + REPLY_MACRO (VL_API_POLICER_BIND_REPLY); } static void +vl_api_policer_bind_v2_t_handler (vl_api_policer_bind_v2_t *mp) +{ + vl_api_policer_bind_v2_reply_t *rmp; + u32 policer_index; + u32 worker_index; + u8 bind_enable; + int rv; + + policer_index = ntohl (mp->policer_index); + worker_index = ntohl (mp->worker_index); + bind_enable = mp->bind_enable; + + rv = policer_bind_worker (policer_index, worker_index, bind_enable); + + REPLY_MACRO (VL_API_POLICER_BIND_V2_REPLY); +} + +static void vl_api_policer_input_t_handler (vl_api_policer_input_t *mp) { - vl_api_policer_bind_reply_t *rmp; - u8 *name; + vl_api_policer_input_reply_t *rmp; + vnet_policer_main_t *pm = &vnet_policer_main; + char name[sizeof (mp->name) + 1]; + uword *p; u32 sw_if_index; u8 apply; int rv; VALIDATE_SW_IF_INDEX (mp); - name = format (0, "%s", mp->name); - vec_terminate_c_string (name); + snprintf (name, sizeof (name), "%s", mp->name); sw_if_index = ntohl (mp->sw_if_index); apply = mp->apply; - rv = policer_input (name, sw_if_index, VLIB_RX, apply); - vec_free (name); + p = hash_get_mem (pm->policer_index_by_name, name); + + rv = VNET_API_ERROR_NO_SUCH_ENTRY; + if (p != NULL) + rv = policer_input (p[0], sw_if_index, VLIB_RX, apply); BAD_SW_IF_INDEX_LABEL; REPLY_MACRO (VL_API_POLICER_INPUT_REPLY); } static void -vl_api_policer_output_t_handler (vl_api_policer_input_t *mp) +vl_api_policer_input_v2_t_handler (vl_api_policer_input_v2_t *mp) { - vl_api_policer_bind_reply_t *rmp; - u8 *name; + vl_api_policer_input_v2_reply_t *rmp; + u32 policer_index; + u32 sw_if_index; + u8 apply; + int rv; + + VALIDATE_SW_IF_INDEX (mp); + + policer_index = ntohl (mp->policer_index); + sw_if_index = ntohl (mp->sw_if_index); + apply = mp->apply; + + rv = policer_input (policer_index, sw_if_index, VLIB_RX, apply); + + BAD_SW_IF_INDEX_LABEL; + REPLY_MACRO (VL_API_POLICER_INPUT_REPLY); +} + +static void +vl_api_policer_output_t_handler (vl_api_policer_output_t *mp) +{ + vl_api_policer_output_reply_t *rmp; + vnet_policer_main_t *pm = &vnet_policer_main; + char name[sizeof (mp->name) + 1]; + uword *p; u32 sw_if_index; u8 apply; int rv; VALIDATE_SW_IF_INDEX (mp); - name = format (0, "%s", mp->name); - vec_terminate_c_string (name); + snprintf (name, sizeof (name), "%s", mp->name); sw_if_index = ntohl (mp->sw_if_index); apply = mp->apply; - rv = policer_input (name, sw_if_index, VLIB_TX, apply); - vec_free (name); + p = hash_get_mem (pm->policer_index_by_name, name); + + rv = VNET_API_ERROR_NO_SUCH_ENTRY; + if (p != NULL) + rv = policer_input (p[0], sw_if_index, VLIB_TX, apply); BAD_SW_IF_INDEX_LABEL; REPLY_MACRO (VL_API_POLICER_OUTPUT_REPLY); } static void -send_policer_details (u8 *name, qos_pol_cfg_params_st *config, - policer_t *templ, vl_api_registration_t *reg, - u32 context) +vl_api_policer_output_v2_t_handler (vl_api_policer_output_v2_t *mp) +{ + vl_api_policer_output_reply_t *rmp; + u32 policer_index; + u32 sw_if_index; + u8 apply; + int rv; + + VALIDATE_SW_IF_INDEX (mp); + + policer_index = ntohl (mp->policer_index); + sw_if_index = ntohl (mp->sw_if_index); + apply = mp->apply; + + rv = policer_input (policer_index, sw_if_index, VLIB_TX, apply); + + BAD_SW_IF_INDEX_LABEL; + REPLY_MACRO (VL_API_POLICER_OUTPUT_REPLY); +} + +static void +send_policer_details (qos_pol_cfg_params_st *config, policer_t *policer, + vl_api_registration_t *reg, u32 context) { vl_api_policer_details_t *mp; @@ -170,26 +337,27 @@ send_policer_details (u8 *name, qos_pol_cfg_params_st *config, mp->round_type = (vl_api_sse2_qos_round_type_t) config->rnd_type; mp->type = (vl_api_sse2_qos_policer_type_t) config->rfc; mp->conform_action.type = - (vl_api_sse2_qos_action_type_t) config->conform_action.action_type; - mp->conform_action.dscp = config->conform_action.dscp; + (vl_api_sse2_qos_action_type_t) policer->action[POLICE_CONFORM]; + mp->conform_action.dscp = policer->mark_dscp[POLICE_CONFORM]; mp->exceed_action.type = - (vl_api_sse2_qos_action_type_t) config->exceed_action.action_type; - mp->exceed_action.dscp = config->exceed_action.dscp; + (vl_api_sse2_qos_action_type_t) policer->action[POLICE_EXCEED]; + mp->exceed_action.dscp = policer->mark_dscp[POLICE_EXCEED]; mp->violate_action.type = - (vl_api_sse2_qos_action_type_t) config->violate_action.action_type; - mp->violate_action.dscp = config->violate_action.dscp; - mp->single_rate = templ->single_rate ? 1 : 0; - mp->color_aware = templ->color_aware ? 1 : 0; - mp->scale = htonl (templ->scale); - mp->cir_tokens_per_period = htonl (templ->cir_tokens_per_period); - mp->pir_tokens_per_period = htonl (templ->pir_tokens_per_period); - mp->current_limit = htonl (templ->current_limit); - mp->current_bucket = htonl (templ->current_bucket); - mp->extended_limit = htonl (templ->extended_limit); - mp->extended_bucket = htonl (templ->extended_bucket); - mp->last_update_time = clib_host_to_net_u64 (templ->last_update_time); - - strncpy ((char *) mp->name, (char *) name, ARRAY_LEN (mp->name) - 1); + (vl_api_sse2_qos_action_type_t) policer->action[POLICE_VIOLATE]; + mp->violate_action.dscp = policer->mark_dscp[POLICE_VIOLATE]; + mp->single_rate = policer->single_rate ? 1 : 0; + mp->color_aware = policer->color_aware ? 1 : 0; + mp->scale = htonl (policer->scale); + mp->cir_tokens_per_period = htonl (policer->cir_tokens_per_period); + mp->pir_tokens_per_period = htonl (policer->pir_tokens_per_period); + mp->current_limit = htonl (policer->current_limit); + mp->current_bucket = htonl (policer->current_bucket); + mp->extended_limit = htonl (policer->extended_limit); + mp->extended_bucket = htonl (policer->extended_bucket); + mp->last_update_time = clib_host_to_net_u64 (policer->last_update_time); + + strncpy ((char *) mp->name, (char *) policer->name, + ARRAY_LEN (mp->name) - 1); vl_api_send_msg (reg, (u8 *) mp); } @@ -199,13 +367,11 @@ vl_api_policer_dump_t_handler (vl_api_policer_dump_t * mp) { vl_api_registration_t *reg; vnet_policer_main_t *pm = &vnet_policer_main; - hash_pair_t *hp; - uword *p; - u32 pool_index; + uword *p, *pi; + u32 pool_index, policer_index; u8 *match_name = 0; - u8 *name; qos_pol_cfg_params_st *config; - policer_t *templ; + policer_t *policer; reg = vl_api_client_index_to_registration (mp->client_index); if (!reg) @@ -220,26 +386,67 @@ vl_api_policer_dump_t_handler (vl_api_policer_dump_t * mp) if (mp->match_name_valid) { p = hash_get_mem (pm->policer_config_by_name, match_name); - if (p) + pi = hash_get_mem (pm->policer_index_by_name, match_name); + if (0 == p || 0 == pi) + return; + + pool_index = p[0]; + policer_index = pi[0]; + config = pool_elt_at_index (pm->configs, pool_index); + policer = pool_elt_at_index (pm->policers, policer_index); + send_policer_details (config, policer, reg, mp->context); + } + else + { + pool_foreach (policer, pm->policers) + { + p = hash_get_mem (pm->policer_config_by_name, policer->name); + if (0 == p) + continue; + + pool_index = p[0]; + config = pool_elt_at_index (pm->configs, pool_index); + send_policer_details (config, policer, reg, mp->context); + }; + } +} + +static void +vl_api_policer_dump_v2_t_handler (vl_api_policer_dump_v2_t *mp) +{ + vl_api_registration_t *reg; + vnet_policer_main_t *pm = &vnet_policer_main; + qos_pol_cfg_params_st *config; + u32 policer_index, pool_index; + policer_t *policer; + uword *p; + + reg = vl_api_client_index_to_registration (mp->client_index); + if (!reg) + return; + + policer_index = ntohl (mp->policer_index); + + if (~0 == policer_index) + { + pool_foreach (policer, pm->policers) { + p = hash_get_mem (pm->policer_config_by_name, policer->name); pool_index = p[0]; config = pool_elt_at_index (pm->configs, pool_index); - templ = pool_elt_at_index (pm->policer_templates, pool_index); - send_policer_details (match_name, config, templ, reg, mp->context); - } + send_policer_details (config, policer, reg, mp->context); + }; } else { - /* *INDENT-OFF* */ - hash_foreach_pair (hp, pm->policer_config_by_name, - ({ - name = (u8 *) hp->key; - pool_index = hp->value[0]; - config = pool_elt_at_index (pm->configs, pool_index); - templ = pool_elt_at_index (pm->policer_templates, pool_index); - send_policer_details(name, config, templ, reg, mp->context); - })); - /* *INDENT-ON* */ + if (pool_is_free_index (pm->policers, policer_index)) + return; + + policer = &pm->policers[policer_index]; + p = hash_get_mem (pm->policer_config_by_name, policer->name); + pool_index = p[0]; + config = pool_elt_at_index (pm->configs, pool_index); + send_policer_details (config, policer, reg, mp->context); } } diff --git a/src/vnet/policer/policer_types.api b/src/vnet/policer/policer_types.api index 3e21b7d707c..9d4c6447f69 100644 --- a/src/vnet/policer/policer_types.api +++ b/src/vnet/policer/policer_types.api @@ -56,6 +56,34 @@ typedef sse2_qos_action u8 dscp; }; +/** \brief Policer configuration + @param cir - CIR + @param eir - EIR + @param cb - Committed Burst + @param eb - Excess or Peak Burst + @param rate_type - rate type + @param round_type - rounding type + @param type - policer algorithm + @param color_aware - 0=color-blind, 1=color-aware + @param conform_action - conform action + @param exceed_action - exceed action type + @param violate_action - violate action type +*/ +typedef policer_config +{ + u32 cir; + u32 eir; + u64 cb; + u64 eb; + vl_api_sse2_qos_rate_type_t rate_type; + vl_api_sse2_qos_round_type_t round_type; + vl_api_sse2_qos_policer_type_t type; + bool color_aware; + vl_api_sse2_qos_action_t conform_action; + vl_api_sse2_qos_action_t exceed_action; + vl_api_sse2_qos_action_t violate_action; +}; + /* * Local Variables: * eval: (c-set-style "gnu") diff --git a/src/vnet/policer/xlate.c b/src/vnet/policer/xlate.c index 9c4d76fd990..bffd208716d 100644 --- a/src/vnet/policer/xlate.c +++ b/src/vnet/policer/xlate.c @@ -1058,7 +1058,7 @@ x86_pol_compute_hw_params (qos_pol_cfg_params_st *cfg, policer_t *hw) * Return: Status, success or failure code. */ int -pol_logical_2_physical (qos_pol_cfg_params_st *cfg, policer_t *phys) +pol_logical_2_physical (const qos_pol_cfg_params_st *cfg, policer_t *phys) { int rc; qos_pol_cfg_params_st kbps_cfg; diff --git a/src/vnet/policer/xlate.h b/src/vnet/policer/xlate.h index 722ac2fb777..7f6ebe7b65d 100644 --- a/src/vnet/policer/xlate.h +++ b/src/vnet/policer/xlate.h @@ -158,7 +158,7 @@ typedef struct qos_pol_hw_params_st_ u32 extd_bkt; } qos_pol_hw_params_st; -int pol_logical_2_physical (qos_pol_cfg_params_st *cfg, policer_t *phys); +int pol_logical_2_physical (const qos_pol_cfg_params_st *cfg, policer_t *phys); #endif /* __included_xlate_h__ */ diff --git a/src/vnet/ppp/node.c b/src/vnet/ppp/node.c index eead2b2f0c1..fa056bfb99f 100644 --- a/src/vnet/ppp/node.c +++ b/src/vnet/ppp/node.c @@ -265,7 +265,6 @@ static char *ppp_error_strings[] = { #undef ppp_error }; -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ppp_input_node) = { .function = ppp_input, .name = "ppp-input", @@ -288,7 +287,6 @@ VLIB_REGISTER_NODE (ppp_input_node) = { .format_trace = format_ppp_input_trace, .unformat_buffer = unformat_ppp_header, }; -/* *INDENT-ON* */ static clib_error_t * ppp_input_runtime_init (vlib_main_t * vm) diff --git a/src/vnet/ppp/ppp.c b/src/vnet/ppp/ppp.c index b1fafa13145..8aa8504fcdd 100644 --- a/src/vnet/ppp/ppp.c +++ b/src/vnet/ppp/ppp.c @@ -197,7 +197,6 @@ ppp_build_rewrite (vnet_main_t * vnm, return (rewrite); } -/* *INDENT-OFF* */ VNET_HW_INTERFACE_CLASS (ppp_hw_interface_class) = { .name = "PPP", .format_header = format_ppp_header_with_length, @@ -205,7 +204,6 @@ VNET_HW_INTERFACE_CLASS (ppp_hw_interface_class) = { .build_rewrite = ppp_build_rewrite, .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P, }; -/* *INDENT-ON* */ static void add_protocol (ppp_main_t * pm, ppp_protocol_t protocol, char *protocol_name) diff --git a/src/vnet/qos/qos_egress_map.c b/src/vnet/qos/qos_egress_map.c index 7985579d3cf..43c0c55df07 100644 --- a/src/vnet/qos/qos_egress_map.c +++ b/src/vnet/qos/qos_egress_map.c @@ -47,13 +47,11 @@ qos_egress_map_get_id (index_t qemi) qos_egress_map_id_t qid; index_t qmi; - /* *INDENT-OFF* */ hash_foreach(qid, qmi, qem_db, ({ if (qmi == qemi) return (qid); })); - /* *INDENT-OFF* */ return (~0); } @@ -129,12 +127,10 @@ qos_egress_map_walk (qos_egress_map_walk_cb_t fn, void *c) qos_egress_map_id_t qid; index_t qmi; - /* *INDENT-OFF* */ hash_foreach(qid, qmi, qem_db, ({ fn(qid, pool_elt_at_index(qem_pool, qmi), c); })); - /* *INDENT-OFF* */ } static clib_error_t * @@ -181,14 +177,12 @@ qos_egress_map_update_cli (vlib_main_t * vm, * @cliexpar * @cliexcmd{qos egress map id 0 [ip][4]=4} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (qos_egress_map_update_command, static) = { .path = "qos egress map", .short_help = "qos egress map id %d [delete] {[SOURCE][INPUT]=OUTPUT}", .function = qos_egress_map_update_cli, .is_mp_safe = 1, }; -/* *INDENT-ON* */ u8 *format_qos_egress_map (u8 * s, va_list * args) { @@ -239,7 +233,6 @@ VLIB_CLI_COMMAND (qos_egress_map_update_command, static) = { { index_t qemi; - /* *INDENT-OFF* */ hash_foreach(map_id, qemi, qem_db, ({ vlib_cli_output (vm, " Map-ID:%d\n%U", @@ -247,7 +240,6 @@ VLIB_CLI_COMMAND (qos_egress_map_update_command, static) = { format_qos_egress_map, pool_elt_at_index(qem_pool, qemi), 2); })); - /* *INDENT-ON* */ } else { @@ -274,14 +266,12 @@ VLIB_CLI_COMMAND (qos_egress_map_update_command, static) = { * @cliexpar * @cliexcmd{show qos egress map} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (qos_egress_map_show_command, static) = { .path = "show qos egress map", .short_help = "show qos egress map id %d", .function = qos_egress_map_show, .is_mp_safe = 1, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/qos/qos_mark.c b/src/vnet/qos/qos_mark.c index 44bb34bd010..3817c89a009 100644 --- a/src/vnet/qos/qos_mark.c +++ b/src/vnet/qos/qos_mark.c @@ -187,14 +187,12 @@ qos_mark_cli (vlib_main_t * vm, * @cliexpar * @cliexcmd{qos egress interface GigEthernet0/9/0 id 0 output ip} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (qos_egress_map_interface_command, static) = { .path = "qos mark", .short_help = "qos mark <SOURCE> <INTERFACE> id <MAP>", .function = qos_mark_cli, .is_mp_safe = 1, }; -/* *INDENT-ON* */ static void qos_mark_show_one_interface (vlib_main_t * vm, u32 sw_if_index) @@ -271,14 +269,12 @@ qos_mark_show (vlib_main_t * vm, * @cliexpar * @cliexcmd{show qos egress map} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (qos_mark_show_command, static) = { .path = "show qos mark", .short_help = "show qos mark [interface]", .function = qos_mark_show, .is_mp_safe = 1, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/qos/qos_mark_node.c b/src/vnet/qos/qos_mark_node.c index f12e66b4fa0..16a487aede8 100644 --- a/src/vnet/qos/qos_mark_node.c +++ b/src/vnet/qos/qos_mark_node.c @@ -212,7 +212,6 @@ VLIB_NODE_FN (vlan_ip6_qos_mark_node) (vlib_main_t * vm, return (qos_mark_inline (vm, node, frame, QOS_SOURCE_VLAN, 0)); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_qos_mark_node) = { .name = "ip4-qos-mark", .vector_size = sizeof (u32), @@ -330,7 +329,6 @@ VNET_FEATURE_INIT (vlan_mpls_qos_mark_node, static) = { .runs_after = VNET_FEATURES ("mpls-qos-mark"), }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/qos/qos_record.c b/src/vnet/qos/qos_record.c index d52c1442d8d..fdf79766471 100644 --- a/src/vnet/qos/qos_record.c +++ b/src/vnet/qos/qos_record.c @@ -203,14 +203,12 @@ qos_record_cli (vlib_main_t * vm, * @cliexpar * @cliexcmd{qos record ip GigEthernet0/1/0} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (qos_record_command, static) = { .path = "qos record", .short_help = "qos record <record-source> <INTERFACE> [disable]", .function = qos_record_cli, .is_mp_safe = 1, }; -/* *INDENT-ON* */ static void qos_record_show_one_interface (vlib_main_t * vm, u32 sw_if_index) @@ -285,14 +283,12 @@ qos_record_show (vlib_main_t * vm, * @cliexpar * @cliexcmd{show qos egress map} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (qos_record_show_command, static) = { .path = "show qos record", .short_help = "show qos record [interface]", .function = qos_record_show, .is_mp_safe = 1, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/qos/qos_record_node.c b/src/vnet/qos/qos_record_node.c index 75e1421dc08..1a34891f85d 100644 --- a/src/vnet/qos/qos_record_node.c +++ b/src/vnet/qos/qos_record_node.c @@ -222,7 +222,6 @@ VLIB_NODE_FN (l2_ip_qos_record_node) (vlib_main_t * vm, return (qos_record_inline (vm, node, frame, QOS_SOURCE_VLAN, 0, 1)); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_qos_record_node) = { .name = "ip4-qos-record", .vector_size = sizeof (u32), @@ -372,7 +371,6 @@ VLIB_REGISTER_NODE (l2_ip_qos_record_node) = { [0] = "error-drop", }, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/qos/qos_store.c b/src/vnet/qos/qos_store.c index 1e8a53bbdfc..3424a914e35 100644 --- a/src/vnet/qos/qos_store.c +++ b/src/vnet/qos/qos_store.c @@ -211,14 +211,12 @@ qos_store_cli (vlib_main_t * vm, * @cliexpar * @cliexcmd{qos store ip GigEthernet0/1/0} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (qos_store_command, static) = { .path = "qos store", .short_help = "qos store <store-source> <INTERFACE> [disable]", .function = qos_store_cli, .is_mp_safe = 1, }; -/* *INDENT-ON* */ static void qos_store_show_one_interface (vlib_main_t * vm, u32 sw_if_index) @@ -295,14 +293,12 @@ qos_store_show (vlib_main_t * vm, * @cliexpar * @cliexcmd{show qos egress map} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (qos_store_show_command, static) = { .path = "show qos store", .short_help = "show qos store [interface]", .function = qos_store_show, .is_mp_safe = 1, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/qos/qos_store_node.c b/src/vnet/qos/qos_store_node.c index 2273b2eac77..6a5ad24453d 100644 --- a/src/vnet/qos/qos_store_node.c +++ b/src/vnet/qos/qos_store_node.c @@ -121,7 +121,6 @@ VLIB_NODE_FN (ip6_qos_store_node) (vlib_main_t * vm, } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_qos_store_node) = { .name = "ip4-qos-store", .vector_size = sizeof (u32), @@ -168,7 +167,6 @@ VNET_FEATURE_INIT (ip6m_qos_store_node, static) = { .node_name = "ip6-qos-store", }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/session/application.c b/src/vnet/session/application.c index 3b2c7cdb35a..c66548507e5 100644 --- a/src/vnet/session/application.c +++ b/src/vnet/session/application.c @@ -31,10 +31,12 @@ static app_main_t app_main; static app_listener_t * app_listener_alloc (application_t * app) { + app_main_t *am = &app_main; app_listener_t *app_listener; - pool_get (app->listeners, app_listener); + + pool_get (am->listeners, app_listener); clib_memset (app_listener, 0, sizeof (*app_listener)); - app_listener->al_index = app_listener - app->listeners; + app_listener->al_index = app_listener - am->listeners; app_listener->app_index = app->app_index; app_listener->session_index = SESSION_INVALID_INDEX; app_listener->local_index = SESSION_INVALID_INDEX; @@ -43,18 +45,23 @@ app_listener_alloc (application_t * app) } app_listener_t * -app_listener_get (application_t * app, u32 app_listener_index) +app_listener_get (u32 app_listener_index) { - return pool_elt_at_index (app->listeners, app_listener_index); + app_main_t *am = &app_main; + + return pool_elt_at_index (am->listeners, app_listener_index); } static void app_listener_free (application_t * app, app_listener_t * app_listener) { + app_main_t *am = &app_main; + clib_bitmap_free (app_listener->workers); + vec_free (app_listener->cl_listeners); if (CLIB_DEBUG) clib_memset (app_listener, 0xfa, sizeof (*app_listener)); - pool_put (app->listeners, app_listener); + pool_put (am->listeners, app_listener); } session_handle_t @@ -63,24 +70,14 @@ app_listener_handle (app_listener_t * al) return al->ls_handle; } -app_listener_t * -app_listener_get_w_session (session_t * ls) -{ - application_t *app; - - app = application_get_if_valid (ls->app_index); - if (!app) - return 0; - return app_listener_get (app, ls->al_index); -} - session_handle_t app_listen_session_handle (session_t * ls) { app_listener_t *al; - al = app_listener_get_w_session (ls); - if (!al) + /* TODO(fcoras): quic session handles */ + if (ls->al_index == SESSION_INVALID_INDEX) return listen_session_get_handle (ls); + al = app_listener_get (ls->al_index); return al->ls_handle; } @@ -91,7 +88,7 @@ app_listener_get_w_handle (session_handle_t handle) ls = session_get_from_handle_if_valid (handle); if (!ls) return 0; - return app_listener_get_w_session (ls); + return app_listener_get (ls->al_index); } app_listener_t * @@ -112,7 +109,7 @@ app_listener_lookup (application_t * app, session_endpoint_cfg_t * sep_ext) if (handle != SESSION_INVALID_HANDLE) { ls = listen_session_get_from_handle (handle); - return app_listener_get_w_session (ls); + return app_listener_get (ls->al_index); } } @@ -122,7 +119,7 @@ app_listener_lookup (application_t * app, session_endpoint_cfg_t * sep_ext) if (handle != SESSION_INVALID_HANDLE) { ls = listen_session_get_from_handle (handle); - return app_listener_get_w_session ((session_t *) ls); + return app_listener_get (ls->al_index); } /* @@ -144,7 +141,7 @@ app_listener_lookup (application_t * app, session_endpoint_cfg_t * sep_ext) if (handle != SESSION_INVALID_HANDLE) { ls = listen_session_get_from_handle (handle); - return app_listener_get_w_session ((session_t *) ls); + return app_listener_get (ls->al_index); } } } @@ -181,7 +178,6 @@ app_listener_alloc_and_init (application_t * app, local_st = session_type_from_proto_and_ip (TRANSPORT_PROTO_NONE, sep->is_ip4); ls = listen_session_alloc (0, local_st); - ls->app_index = app->app_index; ls->app_wrk_index = sep->app_wrk_index; lh = session_handle (ls); @@ -189,11 +185,12 @@ app_listener_alloc_and_init (application_t * app, { ls = session_get_from_handle (lh); session_free (ls); + app_listener_free (app, app_listener); return rv; } ls = session_get_from_handle (lh); - app_listener = app_listener_get (app, al_index); + app_listener = app_listener_get (al_index); app_listener->local_index = ls->session_index; app_listener->ls_handle = lh; ls->al_index = al_index; @@ -212,7 +209,6 @@ app_listener_alloc_and_init (application_t * app, * build it's own specific listening connection. */ ls = listen_session_alloc (0, st); - ls->app_index = app->app_index; ls->app_wrk_index = sep->app_wrk_index; /* Listen pool can be reallocated if the transport is @@ -223,10 +219,11 @@ app_listener_alloc_and_init (application_t * app, { ls = listen_session_get_from_handle (lh); session_free (ls); + app_listener_free (app, app_listener); return rv; } ls = listen_session_get_from_handle (lh); - app_listener = app_listener_get (app, al_index); + app_listener = app_listener_get (al_index); app_listener->session_index = ls->session_index; app_listener->ls_handle = lh; ls->al_index = al_index; @@ -288,8 +285,9 @@ app_listener_cleanup (app_listener_t * al) } static app_worker_t * -app_listener_select_worker (application_t * app, app_listener_t * al) +app_listener_select_worker (app_listener_t *al) { + application_t *app; u32 wrk_index; app = application_get (al->app_index); @@ -319,6 +317,13 @@ app_listener_get_local_session (app_listener_t * al) return listen_session_get (al->local_index); } +session_t * +app_listener_get_wrk_cl_session (app_listener_t *al, u32 wrk_map_index) +{ + u32 si = vec_elt (al->cl_listeners, wrk_map_index); + return session_get (si, 0 /* listener thread */); +} + static app_worker_map_t * app_worker_map_alloc (application_t * app) { @@ -723,6 +728,12 @@ application_get_if_valid (u32 app_index) return pool_elt_at_index (app_main.app_pool, app_index); } +static int +_null_app_tx_callback (session_t *s) +{ + return 0; +} + static void application_verify_cb_fns (session_cb_vft_t * cb_fns) { @@ -734,6 +745,8 @@ application_verify_cb_fns (session_cb_vft_t * cb_fns) clib_warning ("No session disconnect callback function provided"); if (cb_fns->session_reset_callback == 0) clib_warning ("No session reset callback function provided"); + if (!cb_fns->builtin_app_tx_callback) + cb_fns->builtin_app_tx_callback = _null_app_tx_callback; } /** @@ -763,8 +776,8 @@ application_verify_cfg (ssvm_segment_type_t st) return 1; } -static int -application_alloc_and_init (app_init_args_t * a) +static session_error_t +application_alloc_and_init (app_init_args_t *a) { ssvm_segment_type_t seg_type = SSVM_SEGMENT_MEMFD; segment_manager_props_t *props; @@ -785,15 +798,15 @@ application_alloc_and_init (app_init_args_t * a) { clib_warning ("mq eventfds can only be used if socket transport is " "used for binary api"); - return VNET_API_ERROR_APP_UNSUPPORTED_CFG; + return SESSION_E_NOSUPPORT; } if (!application_verify_cfg (seg_type)) - return VNET_API_ERROR_APP_UNSUPPORTED_CFG; + return SESSION_E_NOSUPPORT; if (opts[APP_OPTIONS_PREALLOC_FIFO_PAIRS] && opts[APP_OPTIONS_PREALLOC_FIFO_HDRS]) - return VNET_API_ERROR_APP_UNSUPPORTED_CFG; + return SESSION_E_NOSUPPORT; /* Check that the obvious things are properly set up */ application_verify_cb_fns (a->session_cb_vft); @@ -874,12 +887,10 @@ application_free (application_t * app) * Free workers */ - /* *INDENT-OFF* */ pool_flush (wrk_map, app->worker_maps, ({ app_wrk = app_worker_get (wrk_map->wrk_index); app_worker_free (app_wrk); })); - /* *INDENT-ON* */ pool_free (app->worker_maps); /* @@ -922,13 +933,11 @@ application_detach_process (application_t * app, u32 api_client_index) APP_DBG ("Detaching for app %v index %u api client index %u", app->name, app->app_index, api_client_index); - /* *INDENT-OFF* */ pool_foreach (wrk_map, app->worker_maps) { app_wrk = app_worker_get (wrk_map->wrk_index); if (app_wrk->api_client_index == api_client_index) vec_add1 (wrks, app_wrk->wrk_index); } - /* *INDENT-ON* */ if (!vec_len (wrks)) { @@ -999,12 +1008,55 @@ application_n_workers (application_t * app) app_worker_t * application_listener_select_worker (session_t * ls) { - application_t *app; app_listener_t *al; - app = application_get (ls->app_index); - al = app_listener_get (app, ls->al_index); - return app_listener_select_worker (app, al); + al = app_listener_get (ls->al_index); + return app_listener_select_worker (al); +} + +always_inline u32 +app_listener_cl_flow_hash (session_dgram_hdr_t *hdr) +{ + u32 hash = 0; + + if (hdr->is_ip4) + { + hash = clib_crc32c_u32 (hash, hdr->rmt_ip.ip4.as_u32); + hash = clib_crc32c_u32 (hash, hdr->lcl_ip.ip4.as_u32); + hash = clib_crc32c_u16 (hash, hdr->rmt_port); + hash = clib_crc32c_u16 (hash, hdr->lcl_port); + } + else + { + hash = clib_crc32c_u64 (hash, hdr->rmt_ip.ip6.as_u64[0]); + hash = clib_crc32c_u64 (hash, hdr->rmt_ip.ip6.as_u64[1]); + hash = clib_crc32c_u64 (hash, hdr->lcl_ip.ip6.as_u64[0]); + hash = clib_crc32c_u64 (hash, hdr->lcl_ip.ip6.as_u64[1]); + hash = clib_crc32c_u16 (hash, hdr->rmt_port); + hash = clib_crc32c_u16 (hash, hdr->lcl_port); + } + + return hash; +} + +session_t * +app_listener_select_wrk_cl_session (session_t *ls, session_dgram_hdr_t *hdr) +{ + u32 wrk_map_index = 0; + app_listener_t *al; + + al = app_listener_get (ls->al_index); + /* Crude test to check if only worker 0 is set */ + if (al->workers[0] != 1) + { + u32 hash = app_listener_cl_flow_hash (hdr); + hash %= vec_len (al->workers) * sizeof (uword); + wrk_map_index = clib_bitmap_next_set (al->workers, hash); + if (wrk_map_index == ~0) + wrk_map_index = clib_bitmap_first_set (al->workers); + } + + return app_listener_get_wrk_cl_session (al, wrk_map_index); } int @@ -1046,8 +1098,8 @@ application_alloc_worker_and_init (application_t * app, app_worker_t ** wrk) return 0; } -int -vnet_app_worker_add_del (vnet_app_worker_add_del_args_t * a) +session_error_t +vnet_app_worker_add_del (vnet_app_worker_add_del_args_t *a) { fifo_segment_t *fs; app_worker_map_t *wrk_map; @@ -1058,7 +1110,7 @@ vnet_app_worker_add_del (vnet_app_worker_add_del_args_t * a) app = application_get (a->app_index); if (!app) - return VNET_API_ERROR_INVALID_VALUE; + return SESSION_E_INVALID; if (a->is_add) { @@ -1081,11 +1133,11 @@ vnet_app_worker_add_del (vnet_app_worker_add_del_args_t * a) { wrk_map = app_worker_map_get (app, a->wrk_map_index); if (!wrk_map) - return VNET_API_ERROR_INVALID_VALUE; + return SESSION_E_INVALID; app_wrk = app_worker_get (wrk_map->wrk_index); if (!app_wrk) - return VNET_API_ERROR_INVALID_VALUE; + return SESSION_E_INVALID; application_api_table_del (app_wrk->api_client_index); if (appns_sapi_enabled ()) @@ -1098,8 +1150,8 @@ vnet_app_worker_add_del (vnet_app_worker_add_del_args_t * a) return 0; } -static int -app_validate_namespace (u8 * namespace_id, u64 secret, u32 * app_ns_index) +static session_error_t +app_validate_namespace (u8 *namespace_id, u64 secret, u32 *app_ns_index) { app_namespace_t *app_ns; if (vec_len (namespace_id) == 0) @@ -1111,12 +1163,12 @@ app_validate_namespace (u8 * namespace_id, u64 secret, u32 * app_ns_index) *app_ns_index = app_namespace_index_from_id (namespace_id); if (*app_ns_index == APP_NAMESPACE_INVALID_INDEX) - return VNET_API_ERROR_APP_INVALID_NS; + return SESSION_E_INVALID_NS; app_ns = app_namespace_get (*app_ns_index); if (!app_ns) - return VNET_API_ERROR_APP_INVALID_NS; + return SESSION_E_INVALID_NS; if (app_ns->ns_secret != secret) - return VNET_API_ERROR_APP_WRONG_NS_SECRET; + return SESSION_E_WRONG_NS_SECRET; return 0; } @@ -1140,8 +1192,8 @@ app_name_from_api_index (u32 api_client_index) * to external app and a segment manager for shared memory fifo based * communication with the external app. */ -int -vnet_application_attach (vnet_app_attach_args_t * a) +session_error_t +vnet_application_attach (vnet_app_attach_args_t *a) { fifo_segment_t *fs; application_t *app = 0; @@ -1150,17 +1202,17 @@ vnet_application_attach (vnet_app_attach_args_t * a) u32 app_ns_index = 0; u8 *app_name = 0; u64 secret; - int rv; + session_error_t rv; if (a->api_client_index != APP_INVALID_INDEX) app = application_lookup (a->api_client_index); else if (a->name) app = application_lookup_name (a->name); else - return VNET_API_ERROR_INVALID_VALUE; + return SESSION_E_INVALID; if (app) - return VNET_API_ERROR_APP_ALREADY_ATTACHED; + return SESSION_E_APP_ATTACHED; /* Socket api sets the name and validates namespace prior to attach */ if (!a->use_sock_api) @@ -1214,8 +1266,8 @@ vnet_application_attach (vnet_app_attach_args_t * a) /** * Detach application from vpp */ -int -vnet_application_detach (vnet_app_detach_args_t * a) +session_error_t +vnet_application_detach (vnet_app_detach_args_t *a) { application_t *app; @@ -1223,7 +1275,7 @@ vnet_application_detach (vnet_app_detach_args_t * a) if (!app) { clib_warning ("app not attached"); - return VNET_API_ERROR_APPLICATION_NOT_ATTACHED; + return SESSION_E_NOAPP; } app_interface_check_thread_and_barrier (vnet_application_detach, a); @@ -1297,8 +1349,8 @@ session_endpoint_update_for_app (session_endpoint_cfg_t * sep, } } -int -vnet_listen (vnet_listen_args_t * a) +session_error_t +vnet_listen (vnet_listen_args_t *a) { app_listener_t *app_listener; app_worker_t *app_wrk; @@ -1351,13 +1403,13 @@ vnet_listen (vnet_listen_args_t * a) return 0; } -int -vnet_connect (vnet_connect_args_t * a) +session_error_t +vnet_connect (vnet_connect_args_t *a) { app_worker_t *client_wrk; application_t *client; - ASSERT (vlib_thread_is_main_w_barrier ()); + ASSERT (session_vlib_thread_is_cl_thread ()); if (session_endpoint_is_zero (&a->sep)) return SESSION_E_INVALID_RMT_IP; @@ -1375,7 +1427,7 @@ vnet_connect (vnet_connect_args_t * a) */ if (application_has_local_scope (client)) { - int rv; + session_error_t rv; a->sep_ext.original_tp = a->sep_ext.transport_proto; a->sep_ext.transport_proto = TRANSPORT_PROTO_NONE; @@ -1390,8 +1442,8 @@ vnet_connect (vnet_connect_args_t * a) return app_worker_connect_session (client_wrk, &a->sep_ext, &a->sh); } -int -vnet_unlisten (vnet_unlisten_args_t * a) +session_error_t +vnet_unlisten (vnet_unlisten_args_t *a) { app_worker_t *app_wrk; app_listener_t *al; @@ -1421,7 +1473,7 @@ vnet_unlisten (vnet_unlisten_args_t * a) return app_worker_stop_listen (app_wrk, al); } -int +session_error_t vnet_shutdown_session (vnet_shutdown_args_t *a) { app_worker_t *app_wrk; @@ -1442,8 +1494,8 @@ vnet_shutdown_session (vnet_shutdown_args_t *a) return 0; } -int -vnet_disconnect_session (vnet_disconnect_args_t * a) +session_error_t +vnet_disconnect_session (vnet_disconnect_args_t *a) { app_worker_t *app_wrk; session_t *s; @@ -1483,7 +1535,7 @@ application_change_listener_owner (session_t * s, app_worker_t * app_wrk) if (!app) return SESSION_E_NOAPP; - app_listener = app_listener_get (app, s->al_index); + app_listener = app_listener_get (s->al_index); /* Only remove from lb for now */ app_listener->workers = clib_bitmap_set (app_listener->workers, @@ -1527,6 +1579,12 @@ application_has_global_scope (application_t * app) return app->flags & APP_OPTIONS_FLAGS_USE_GLOBAL_SCOPE; } +int +application_original_dst_is_enabled (application_t *app) +{ + return app->flags & APP_OPTIONS_FLAGS_GET_ORIGINAL_DST; +} + static clib_error_t * application_start_stop_proxy_fib_proto (application_t * app, u8 fib_proto, u8 transport_proto, u8 is_start) @@ -1688,7 +1746,6 @@ application_format_listeners (application_t * app, int verbose) return; } - /* *INDENT-OFF* */ pool_foreach (wrk_map, app->worker_maps) { app_wrk = app_worker_get (wrk_map->wrk_index); if (hash_elts (app_wrk->listeners_table) == 0) @@ -1698,7 +1755,6 @@ application_format_listeners (application_t * app, int verbose) handle, sm_index, verbose); })); } - /* *INDENT-ON* */ } static void @@ -1713,12 +1769,10 @@ application_format_connects (application_t * app, int verbose) return; } - /* *INDENT-OFF* */ pool_foreach (wrk_map, app->worker_maps) { app_wrk = app_worker_get (wrk_map->wrk_index); app_worker_format_connects (app_wrk, verbose); } - /* *INDENT-ON* */ } u8 * @@ -1819,12 +1873,10 @@ format_application (u8 * s, va_list * args) format_memory_size, props->rx_fifo_size, format_memory_size, props->tx_fifo_size); - /* *INDENT-OFF* */ pool_foreach (wrk_map, app->worker_maps) { app_wrk = app_worker_get (wrk_map->wrk_index); s = format (s, "%U", format_app_worker, app_wrk); } - /* *INDENT-ON* */ return s; } @@ -1842,11 +1894,9 @@ application_format_all_listeners (vlib_main_t * vm, int verbose) application_format_listeners (0, verbose); - /* *INDENT-OFF* */ pool_foreach (app, app_main.app_pool) { application_format_listeners (app, verbose); } - /* *INDENT-ON* */ } void @@ -1862,11 +1912,9 @@ application_format_all_clients (vlib_main_t * vm, int verbose) application_format_connects (0, verbose); - /* *INDENT-OFF* */ pool_foreach (app, app_main.app_pool) { application_format_connects (app, verbose); } - /* *INDENT-ON* */ } static clib_error_t * @@ -1876,11 +1924,9 @@ show_certificate_command_fn (vlib_main_t * vm, unformat_input_t * input, app_cert_key_pair_t *ckpair; session_cli_return_if_not_enabled (); - /* *INDENT-OFF* */ pool_foreach (ckpair, app_main.cert_key_pair_store) { vlib_cli_output (vm, "%U", format_cert_key_pair, ckpair); } - /* *INDENT-ON* */ return 0; } @@ -1891,14 +1937,12 @@ appliction_format_app_mq (vlib_main_t * vm, application_t * app) app_worker_t *wrk; int i; - /* *INDENT-OFF* */ pool_foreach (map, app->worker_maps) { wrk = app_worker_get (map->wrk_index); vlib_cli_output (vm, "[A%d][%d]%U", app->app_index, map->wrk_index, format_svm_msg_q, wrk->event_queue); } - /* *INDENT-ON* */ for (i = 0; i < vec_len (app->rx_mqs); i++) vlib_cli_output (vm, "[A%d][R%d]%U", app->app_index, i, format_svm_msg_q, @@ -1919,11 +1963,9 @@ appliction_format_all_app_mq (vlib_main_t * vm) session_main_get_vpp_event_queue (i)); } - /* *INDENT-OFF* */ pool_foreach (app, app_main.app_pool) { appliction_format_app_mq (vm, app); } - /* *INDENT-ON* */ return 0; } @@ -2074,7 +2116,7 @@ vnet_app_del_cert_key_pair (u32 index) u32 *app_index; if (!(ckpair = app_cert_key_pair_get_if_valid (index))) - return (VNET_API_ERROR_INVALID_VALUE); + return SESSION_E_INVALID; vec_foreach (app_index, ckpair->app_interests) { diff --git a/src/vnet/session/application.h b/src/vnet/session/application.h index 09737a6752d..c68a911230f 100644 --- a/src/vnet/session/application.h +++ b/src/vnet/session/application.h @@ -77,17 +77,17 @@ typedef struct app_worker_ /** Pool of half-open session handles. Tracked in case worker detaches */ session_handle_t *half_open_table; + /* Per vpp worker fifos of events for app worker */ + session_event_t **wrk_evts; + + /* Vector of vpp workers mq congestion flags */ + u8 *wrk_mq_congested; + /** Protects detached seg managers */ clib_spinlock_t detached_seg_managers_lock; /** Vector of detached listener segment managers */ u32 *detached_seg_managers; - - /** Fifo of messages postponed because of mq congestion */ - app_wrk_postponed_msg_t *postponed_mq_msgs; - - /** Lock to add/sub message from ref @postponed_mq_msgs */ - clib_spinlock_t postponed_mq_msgs_lock; } app_worker_t; typedef struct app_worker_map_ @@ -106,6 +106,8 @@ typedef struct app_listener_ session_handle_t ls_handle; /**< session handle of the local or global listening session that also identifies the app listener */ + u32 *cl_listeners; /**< vector that maps app workers to their + cl sessions with fifos */ } app_listener_t; typedef enum app_rx_mq_flags_ @@ -149,9 +151,6 @@ typedef struct application_ u16 proxied_transports; - /** Pool of listeners for the app */ - app_listener_t *listeners; - /** Preferred tls engine */ u8 tls_engine; @@ -198,6 +197,9 @@ typedef struct app_main_ */ application_t *app_pool; + /** Pool of app listeners */ + app_listener_t *listeners; + /** * Hash table of apps by api client index */ @@ -246,7 +248,7 @@ typedef struct _vnet_app_worker_add_del_args #define APP_NS_INVALID_INDEX ((u32)~0) #define APP_INVALID_SEGMENT_MANAGER_INDEX ((u32) ~0) -app_listener_t *app_listener_get (application_t * app, u32 al_index); +app_listener_t *app_listener_get (u32 al_index); int app_listener_alloc_and_init (application_t * app, session_endpoint_cfg_t * sep, app_listener_t ** listener); @@ -254,6 +256,8 @@ void app_listener_cleanup (app_listener_t * app_listener); session_handle_t app_listener_handle (app_listener_t * app_listener); app_listener_t *app_listener_lookup (application_t * app, session_endpoint_cfg_t * sep); +session_t *app_listener_select_wrk_cl_session (session_t *ls, + session_dgram_hdr_t *hdr); /** * Get app listener handle for listening session @@ -277,9 +281,9 @@ session_handle_t app_listen_session_handle (session_t * ls); * @return pointer to app listener or 0 */ app_listener_t *app_listener_get_w_handle (session_handle_t handle); -app_listener_t *app_listener_get_w_session (session_t * ls); session_t *app_listener_get_session (app_listener_t * al); session_t *app_listener_get_local_session (app_listener_t * al); +session_t *app_listener_get_wrk_cl_session (app_listener_t *al, u32 wrk_index); application_t *application_get (u32 index); application_t *application_get_if_valid (u32 index); @@ -300,6 +304,7 @@ u8 application_has_global_scope (application_t * app); void application_setup_proxy (application_t * app); void application_remove_proxy (application_t * app); void application_namespace_cleanup (app_namespace_t *app_ns); +int application_original_dst_is_enabled (application_t *app); segment_manager_props_t *application_get_segment_manager_properties (u32 app_index); @@ -316,6 +321,12 @@ void application_enable_rx_mqs_nodes (u8 is_en); * App worker */ +always_inline u8 +app_worker_mq_is_congested (app_worker_t *app_wrk) +{ + return app_wrk->mq_congested > 0; +} + app_worker_t *app_worker_alloc (application_t * app); int application_alloc_worker_and_init (application_t * app, app_worker_t ** wrk); @@ -326,9 +337,14 @@ int app_worker_own_session (app_worker_t * app_wrk, session_t * s); void app_worker_free (app_worker_t * app_wrk); int app_worker_connect_session (app_worker_t *app, session_endpoint_cfg_t *sep, session_handle_t *rsh); -int app_worker_start_listen (app_worker_t * app_wrk, app_listener_t * lstnr); +session_error_t app_worker_start_listen (app_worker_t *app_wrk, + app_listener_t *lstnr); int app_worker_stop_listen (app_worker_t * app_wrk, app_listener_t * al); int app_worker_init_accepted (session_t * s); +int app_worker_listened_notify (app_worker_t *app_wrk, session_handle_t alsh, + u32 opaque, session_error_t err); +int app_worker_unlisten_reply (app_worker_t *app_wrk, session_handle_t sh, + u32 opaque, session_error_t err); int app_worker_accept_notify (app_worker_t * app_wrk, session_t * s); int app_worker_init_connected (app_worker_t * app_wrk, session_t * s); int app_worker_connect_notify (app_worker_t * app_wrk, session_t * s, @@ -341,13 +357,21 @@ int app_worker_transport_closed_notify (app_worker_t * app_wrk, int app_worker_reset_notify (app_worker_t * app_wrk, session_t * s); int app_worker_cleanup_notify (app_worker_t * app_wrk, session_t * s, session_cleanup_ntf_t ntf); +int app_worker_cleanup_notify_custom (app_worker_t *app_wrk, session_t *s, + session_cleanup_ntf_t ntf, + void (*cleanup_cb) (session_t *s)); int app_worker_migrate_notify (app_worker_t * app_wrk, session_t * s, session_handle_t new_sh); -int app_worker_builtin_rx (app_worker_t * app_wrk, session_t * s); -int app_worker_builtin_tx (app_worker_t * app_wrk, session_t * s); +int app_worker_rx_notify (app_worker_t *app_wrk, session_t *s); int app_worker_session_fifo_tuning (app_worker_t * app_wrk, session_t * s, svm_fifo_t * f, session_ft_action_t act, u32 len); +void app_worker_add_event (app_worker_t *app_wrk, session_t *s, + session_evt_type_t evt_type); +void app_worker_add_event_custom (app_worker_t *app_wrk, u32 thread_index, + session_event_t *evt); +int app_wrk_flush_wrk_events (app_worker_t *app_wrk, u32 thread_index); +void app_worker_del_all_events (app_worker_t *app_wrk); segment_manager_t *app_worker_get_listen_segment_manager (app_worker_t *, session_t *); segment_manager_t *app_worker_get_connect_segment_manager (app_worker_t *); @@ -362,9 +386,10 @@ void app_wrk_send_ctrl_evt_fd (app_worker_t *app_wrk, u8 evt_type, void *msg, u32 msg_len, int fd); void app_wrk_send_ctrl_evt (app_worker_t *app_wrk, u8 evt_type, void *msg, u32 msg_len); -int app_worker_send_event (app_worker_t * app, session_t * s, u8 evt); -int app_worker_lock_and_send_event (app_worker_t * app, session_t * s, - u8 evt_type); +u8 app_worker_mq_wrk_is_congested (app_worker_t *app_wrk, u32 thread_index); +void app_worker_set_mq_wrk_congested (app_worker_t *app_wrk, u32 thread_index); +void app_worker_unset_wrk_mq_congested (app_worker_t *app_wrk, + u32 thread_index); session_t *app_worker_proxy_listener (app_worker_t * app, u8 fib_proto, u8 transport_proto); void app_worker_del_detached_sm (app_worker_t * app_wrk, u32 sm_index); @@ -373,7 +398,7 @@ u8 *format_app_worker_listener (u8 * s, va_list * args); u8 *format_crypto_engine (u8 * s, va_list * args); u8 *format_crypto_context (u8 * s, va_list * args); void app_worker_format_connects (app_worker_t * app_wrk, int verbose); -int vnet_app_worker_add_del (vnet_app_worker_add_del_args_t * a); +session_error_t vnet_app_worker_add_del (vnet_app_worker_add_del_args_t *a); uword unformat_application_proto (unformat_input_t * input, va_list * args); @@ -381,18 +406,17 @@ app_cert_key_pair_t *app_cert_key_pair_get (u32 index); app_cert_key_pair_t *app_cert_key_pair_get_if_valid (u32 index); app_cert_key_pair_t *app_cert_key_pair_get_default (); -/* Needed while we support both bapi and mq ctrl messages */ -int mq_send_session_bound_cb (u32 app_wrk_index, u32 api_context, - session_handle_t handle, int rv); -int mq_send_session_connected_cb (u32 app_wrk_index, u32 api_context, - session_t * s, session_error_t err); -void mq_send_unlisten_reply (app_worker_t * app_wrk, session_handle_t sh, - u32 context, int rv); void sapi_socket_close_w_handle (u32 api_handle); crypto_engine_type_t app_crypto_engine_type_add (void); u8 app_crypto_engine_n_types (void); +static inline u8 +app_worker_application_is_builtin (app_worker_t *app_wrk) +{ + return app_wrk->app_is_builtin; +} + #endif /* SRC_VNET_SESSION_APPLICATION_H_ */ /* diff --git a/src/vnet/session/application_interface.c b/src/vnet/session/application_interface.c index 74f456a1eab..a62f914d43a 100644 --- a/src/vnet/session/application_interface.c +++ b/src/vnet/session/application_interface.c @@ -73,8 +73,8 @@ unformat_vnet_uri (unformat_input_t * input, va_list * args) static u8 *cache_uri; static session_endpoint_cfg_t *cache_sep; -int -parse_uri (char *uri, session_endpoint_cfg_t * sep) +session_error_t +parse_uri (char *uri, session_endpoint_cfg_t *sep) { unformat_input_t _input, *input = &_input; @@ -92,7 +92,7 @@ parse_uri (char *uri, session_endpoint_cfg_t * sep) if (!unformat (input, "%U", unformat_vnet_uri, sep)) { unformat_free (input); - return VNET_API_ERROR_INVALID_VALUE; + return SESSION_E_INVALID; } unformat_free (input); @@ -106,8 +106,8 @@ parse_uri (char *uri, session_endpoint_cfg_t * sep) return 0; } -int -vnet_bind_uri (vnet_listen_args_t * a) +session_error_t +vnet_bind_uri (vnet_listen_args_t *a) { session_endpoint_cfg_t sep = SESSION_ENDPOINT_CFG_NULL; int rv; @@ -120,36 +120,36 @@ vnet_bind_uri (vnet_listen_args_t * a) return vnet_listen (a); } -int -vnet_unbind_uri (vnet_unlisten_args_t * a) +session_error_t +vnet_unbind_uri (vnet_unlisten_args_t *a) { session_endpoint_cfg_t sep = SESSION_ENDPOINT_CFG_NULL; application_t *app; session_t *listener; u32 table_index; - int rv; + session_error_t rv; if ((rv = parse_uri (a->uri, &sep))) return rv; app = application_get (a->app_index); if (!app) - return VNET_API_ERROR_INVALID_VALUE; + return SESSION_E_INVALID; table_index = application_session_table (app, fib_ip_proto (!sep.is_ip4)); listener = session_lookup_listener (table_index, (session_endpoint_t *) & sep); if (!listener) - return VNET_API_ERROR_ADDRESS_NOT_IN_USE; + return SESSION_E_ADDR_NOT_IN_USE; a->handle = listen_session_get_handle (listener); return vnet_unlisten (a); } -int -vnet_connect_uri (vnet_connect_args_t * a) +session_error_t +vnet_connect_uri (vnet_connect_args_t *a) { session_endpoint_cfg_t sep = SESSION_ENDPOINT_CFG_NULL; - int rv; + session_error_t rv; if ((rv = parse_uri (a->uri, &sep))) return rv; diff --git a/src/vnet/session/application_interface.h b/src/vnet/session/application_interface.h index 9fc03a0e97a..f175e4a58c6 100644 --- a/src/vnet/session/application_interface.h +++ b/src/vnet/session/application_interface.h @@ -62,6 +62,13 @@ typedef struct session_cb_vft_ /** Notify app that session pool migration happened */ void (*session_migrate_callback) (session_t * s, session_handle_t new_sh); + /** Notify app (external only) that listen was processed */ + int (*session_listened_callback) (u32 app_wrk_index, u32 api_context, + session_handle_t handle, int rv); + /** Notify app (external only) that unlisten was processed */ + void (*session_unlistened_callback) (u32 app_wrk_index, session_handle_t sh, + u32 context, int rv); + /** Direct RX callback for built-in application */ int (*builtin_app_rx_callback) (session_t * session); @@ -74,6 +81,8 @@ typedef struct session_cb_vft_ /** Delegate fifo-tuning-logic to application */ int (*fifo_tuning_callback) (session_t * s, svm_fifo_t * f, session_ft_action_t act, u32 bytes); + /** Custom fifo allocation for proxy */ + int (*proxy_alloc_session_fifos) (session_t *s); } session_cb_vft_t; @@ -117,7 +126,7 @@ typedef struct _vnet_bind_args_t /* * Results */ - u64 handle; + session_handle_t handle; } vnet_listen_args_t; typedef struct _vnet_unlisten_args_t @@ -125,7 +134,7 @@ typedef struct _vnet_unlisten_args_t union { char *uri; - u64 handle; /**< Session handle */ + session_handle_t handle; /**< Session handle */ }; u32 app_index; /**< Owning application index */ u32 wrk_map_index; /**< App's local pool worker index */ @@ -233,7 +242,8 @@ typedef enum _ (USE_LOCAL_SCOPE, "App can use local session scope") \ _ (EVT_MQ_USE_EVENTFD, "Use eventfds for signaling") \ _ (MEMFD_FOR_BUILTIN, "Use memfd for builtin app segs") \ - _ (USE_HUGE_PAGE, "Use huge page for FIFO") + _ (USE_HUGE_PAGE, "Use huge page for FIFO") \ + _ (GET_ORIGINAL_DST, "Get original dst enabled") typedef enum _app_options { @@ -270,24 +280,26 @@ typedef enum session_fd_flag_ #undef _ } session_fd_flag_t; -int parse_uri (char *uri, session_endpoint_cfg_t * sep); -int vnet_bind_uri (vnet_listen_args_t *); -int vnet_unbind_uri (vnet_unlisten_args_t * a); -int vnet_connect_uri (vnet_connect_args_t * a); +session_error_t parse_uri (char *uri, session_endpoint_cfg_t *sep); +session_error_t vnet_bind_uri (vnet_listen_args_t *); +session_error_t vnet_unbind_uri (vnet_unlisten_args_t *a); +session_error_t vnet_connect_uri (vnet_connect_args_t *a); -int vnet_application_attach (vnet_app_attach_args_t * a); -int vnet_application_detach (vnet_app_detach_args_t * a); -int vnet_listen (vnet_listen_args_t * a); -int vnet_connect (vnet_connect_args_t * a); -int vnet_unlisten (vnet_unlisten_args_t * a); -int vnet_shutdown_session (vnet_shutdown_args_t *a); -int vnet_disconnect_session (vnet_disconnect_args_t * a); +session_error_t vnet_application_attach (vnet_app_attach_args_t *a); +session_error_t vnet_application_detach (vnet_app_detach_args_t *a); +session_error_t vnet_listen (vnet_listen_args_t *a); +session_error_t vnet_connect (vnet_connect_args_t *a); +session_error_t vnet_unlisten (vnet_unlisten_args_t *a); +session_error_t vnet_shutdown_session (vnet_shutdown_args_t *a); +session_error_t vnet_disconnect_session (vnet_disconnect_args_t *a); int vnet_app_add_cert_key_pair (vnet_app_add_cert_key_pair_args_t * a); int vnet_app_del_cert_key_pair (u32 index); /** Ask for app cb on pair deletion */ int vnet_app_add_cert_key_interest (u32 index, u32 app_index); +uword unformat_vnet_uri (unformat_input_t *input, va_list *args); + typedef struct app_session_transport_ { ip46_address_t rmt_ip; /**< remote ip */ @@ -297,15 +309,15 @@ typedef struct app_session_transport_ u8 is_ip4; /**< set if uses ip4 networking */ } app_session_transport_t; -#define foreach_app_session_field \ - _(svm_fifo_t, *rx_fifo) /**< rx fifo */ \ - _(svm_fifo_t, *tx_fifo) /**< tx fifo */ \ - _(session_type_t, session_type) /**< session type */ \ - _(volatile u8, session_state) /**< session state */ \ - _(u32, session_index) /**< index in owning pool */ \ - _(app_session_transport_t, transport) /**< transport info */ \ - _(svm_msg_q_t, *vpp_evt_q) /**< vpp event queue */ \ - _(u8, is_dgram) /**< flag for dgram mode */ \ +#define foreach_app_session_field \ + _ (svm_fifo_t, *rx_fifo) /**< rx fifo */ \ + _ (svm_fifo_t, *tx_fifo) /**< tx fifo */ \ + _ (session_type_t, session_type) /**< session type */ \ + _ (volatile u8, session_state) /**< session state */ \ + _ (u32, session_index) /**< index in owning pool */ \ + _ (app_session_transport_t, transport) /**< transport info */ \ + _ (svm_msg_q_t, *vpp_evt_q) /**< vpp event queue */ \ + _ (u8, is_dgram) /**< flag for dgram mode */ typedef struct { @@ -344,7 +356,7 @@ STATIC_ASSERT (sizeof (session_listen_uri_msg_t) <= SESSION_CTRL_MSG_MAX_SIZE, typedef struct session_bound_msg_ { u32 context; - u64 handle; + session_handle_t handle; i32 retval; u8 lcl_is_ip4; u8 lcl_ip[16]; @@ -367,15 +379,15 @@ typedef struct session_unlisten_msg_ typedef struct session_unlisten_reply_msg_ { u32 context; - u64 handle; + session_handle_t handle; i32 retval; } __clib_packed session_unlisten_reply_msg_t; typedef struct session_accepted_msg_ { u32 context; - u64 listener_handle; - u64 handle; + session_handle_t listener_handle; + session_handle_t handle; uword server_rx_fifo; uword server_tx_fifo; u64 segment_handle; @@ -384,13 +396,15 @@ typedef struct session_accepted_msg_ transport_endpoint_t lcl; transport_endpoint_t rmt; u8 flags; + u32 original_dst_ip4; + u16 original_dst_port; } __clib_packed session_accepted_msg_t; typedef struct session_accepted_reply_msg_ { u32 context; i32 retval; - u64 handle; + session_handle_t handle; } __clib_packed session_accepted_reply_msg_t; typedef struct session_connect_msg_ @@ -430,7 +444,7 @@ typedef struct session_connected_msg_ { u32 context; i32 retval; - u64 handle; + session_handle_t handle; uword server_rx_fifo; uword server_tx_fifo; u64 segment_handle; @@ -460,33 +474,33 @@ typedef struct session_disconnected_msg_ { u32 client_index; u32 context; - u64 handle; + session_handle_t handle; } __clib_packed session_disconnected_msg_t; typedef struct session_disconnected_reply_msg_ { u32 context; i32 retval; - u64 handle; + session_handle_t handle; } __clib_packed session_disconnected_reply_msg_t; typedef struct session_reset_msg_ { u32 client_index; u32 context; - u64 handle; + session_handle_t handle; } __clib_packed session_reset_msg_t; typedef struct session_reset_reply_msg_ { u32 context; i32 retval; - u64 handle; + session_handle_t handle; } __clib_packed session_reset_reply_msg_t; typedef struct session_req_worker_update_msg_ { - u64 session_handle; + session_handle_t session_handle; } __clib_packed session_req_worker_update_msg_t; /* NOTE: using u16 for wrk indices because message needs to fit in 18B */ @@ -495,12 +509,12 @@ typedef struct session_worker_update_msg_ u32 client_index; u16 wrk_index; u16 req_wrk_index; - u64 handle; + session_handle_t handle; } __clib_packed session_worker_update_msg_t; typedef struct session_worker_update_reply_msg_ { - u64 handle; + session_handle_t handle; uword rx_fifo; uword tx_fifo; u64 segment_handle; @@ -641,14 +655,18 @@ app_send_io_evt_to_vpp (svm_msg_q_t * mq, u32 session_index, u8 evt_type, } } +#define app_send_dgram_raw(f, at, vpp_evt_q, data, len, evt_type, do_evt, \ + noblock) \ + app_send_dgram_raw_gso (f, at, vpp_evt_q, data, len, 0, evt_type, do_evt, \ + noblock) + always_inline int -app_send_dgram_raw (svm_fifo_t * f, app_session_transport_t * at, - svm_msg_q_t * vpp_evt_q, u8 * data, u32 len, u8 evt_type, - u8 do_evt, u8 noblock) +app_send_dgram_raw_gso (svm_fifo_t *f, app_session_transport_t *at, + svm_msg_q_t *vpp_evt_q, u8 *data, u32 len, + u16 gso_size, u8 evt_type, u8 do_evt, u8 noblock) { session_dgram_hdr_t hdr; int rv; - if (svm_fifo_max_enqueue_prod (f) < (sizeof (session_dgram_hdr_t) + len)) return 0; @@ -659,10 +677,8 @@ app_send_dgram_raw (svm_fifo_t * f, app_session_transport_t * at, hdr.rmt_port = at->rmt_port; clib_memcpy_fast (&hdr.lcl_ip, &at->lcl_ip, sizeof (ip46_address_t)); hdr.lcl_port = at->lcl_port; - - /* *INDENT-OFF* */ + hdr.gso_size = gso_size; svm_fifo_seg_t segs[2] = {{ (u8 *) &hdr, sizeof (hdr) }, { data, len }}; - /* *INDENT-ON* */ rv = svm_fifo_enqueue_segments (f, segs, 2, 0 /* allow partial */ ); if (PREDICT_FALSE (rv < 0)) @@ -787,13 +803,11 @@ app_recv (app_session_t * s, u8 * data, u32 len) return app_recv_stream (s, data, len); } -/* *INDENT-OFF* */ static char *session_error_str[] = { #define _(sym, str) str, foreach_session_error #undef _ }; -/* *INDENT-ON* */ static inline u8 * format_session_error (u8 * s, va_list * args) diff --git a/src/vnet/session/application_local.c b/src/vnet/session/application_local.c index 8590d041600..3cb743d10e0 100644 --- a/src/vnet/session/application_local.c +++ b/src/vnet/session/application_local.c @@ -53,6 +53,8 @@ typedef struct ct_worker_ ct_cleanup_req_t *pending_cleanups; /**< Fifo of pending indices */ u8 have_connects; /**< Set if connect rpc pending */ u8 have_cleanups; /**< Set if cleanup rpc pending */ + clib_spinlock_t pending_connects_lock; /**< Lock for pending connects */ + u32 *new_connects; /**< Burst of connects to be done */ } ct_worker_t; typedef struct ct_main_ @@ -65,6 +67,9 @@ typedef struct ct_main_ clib_rwlock_t app_segs_lock; /**< RW lock for seg contexts */ uword *app_segs_ctxs_table; /**< App handle to segment pool map */ ct_segments_ctx_t *app_seg_ctxs; /**< Pool of ct segment contexts */ + u32 **fwrk_pending_connects; /**< First wrk pending half-opens */ + u32 fwrk_thread; /**< First worker thread */ + u8 fwrk_have_flush; /**< Flag for connect flush rpc */ } ct_main_t; static ct_main_t ct_main; @@ -81,7 +86,8 @@ ct_connection_alloc (u32 thread_index) ct_worker_t *wrk = ct_worker_get (thread_index); ct_connection_t *ct; - pool_get_zero (wrk->connections, ct); + pool_get_aligned_safe (wrk->connections, ct, CLIB_CACHE_LINE_BYTES); + clib_memset (ct, 0, sizeof (*ct)); ct->c_c_index = ct - wrk->connections; ct->c_thread_index = thread_index; ct->client_wrk = ~0; @@ -123,11 +129,18 @@ ct_half_open_alloc (void) clib_spinlock_lock (&cm->ho_reuseable_lock); vec_foreach (hip, cm->ho_reusable) - pool_put_index (cm->wrk[0].connections, *hip); + pool_put_index (cm->wrk[cm->fwrk_thread].connections, *hip); vec_reset_length (cm->ho_reusable); clib_spinlock_unlock (&cm->ho_reuseable_lock); - return ct_connection_alloc (0); + return ct_connection_alloc (cm->fwrk_thread); +} + +static ct_connection_t * +ct_half_open_get (u32 ho_index) +{ + ct_main_t *cm = &ct_main; + return ct_connection_get (ho_index, cm->fwrk_thread); } void @@ -181,6 +194,12 @@ ct_set_invalid_app_wrk (ct_connection_t *ct, u8 is_client) } } +static inline u64 +ct_client_seg_handle (u64 server_sh, u32 client_wrk_index) +{ + return (((u64) client_wrk_index << 56) | server_sh); +} + static void ct_session_dealloc_fifos (ct_connection_t *ct, svm_fifo_t *rx_fifo, svm_fifo_t *tx_fifo) @@ -301,7 +320,8 @@ ct_session_dealloc_fifos (ct_connection_t *ct, svm_fifo_t *rx_fifo, segment_manager_t *csm; csm = app_worker_get_connect_segment_manager (app_wrk); if (!segment_manager_app_detached (csm)) - app_worker_del_segment_notify (app_wrk, ct->segment_handle); + app_worker_del_segment_notify ( + app_wrk, ct_client_seg_handle (ct->segment_handle, ct->client_wrk)); } /* Notify server app and free segment */ @@ -363,9 +383,10 @@ ct_session_connect_notify (session_t *ss, session_error_t err) ss = session_get (ss_index, thread_index); cs->session_type = ss->session_type; cs->listener_handle = SESSION_INVALID_HANDLE; - cs->session_state = SESSION_STATE_CONNECTING; + session_set_state (cs, SESSION_STATE_CONNECTING); cs->app_wrk_index = client_wrk->wrk_index; cs->connection_index = cct->c_c_index; + cs->opaque = opaque; cct->c_s_index = cs->session_index; /* This will allocate fifos for the session. They won't be used for @@ -379,7 +400,7 @@ ct_session_connect_notify (session_t *ss, session_error_t err) goto connect_error; } - cs->session_state = SESSION_STATE_CONNECTING; + session_set_state (cs, SESSION_STATE_CONNECTING); if (app_worker_connect_notify (client_wrk, cs, 0, opaque)) { @@ -390,7 +411,7 @@ ct_session_connect_notify (session_t *ss, session_error_t err) } cs = session_get (cct->c_s_index, cct->c_thread_index); - cs->session_state = SESSION_STATE_READY; + session_set_state (cs, SESSION_STATE_READY); return 0; @@ -441,11 +462,11 @@ ct_alloc_segment (ct_main_t *cm, app_worker_t *server_wrk, u64 table_handle, segment_manager_t *sm, u32 client_wrk_index) { u32 seg_ctx_index = ~0, sm_index, pair_bytes; + u64 seg_size, seg_handle, client_seg_handle; segment_manager_props_t *props; const u32 margin = 16 << 10; ct_segments_ctx_t *seg_ctx; app_worker_t *client_wrk; - u64 seg_size, seg_handle; application_t *server; ct_segment_t *ct_seg; uword *spp; @@ -507,7 +528,11 @@ ct_alloc_segment (ct_main_t *cm, app_worker_t *server_wrk, u64 table_handle, goto error; client_wrk = app_worker_get (client_wrk_index); - if (app_worker_add_segment_notify (client_wrk, seg_handle)) + /* Make sure client workers do not have overlapping segment handles. + * Ideally, we should attach fs to client worker segment manager and + * create a new handle but that's not currently possible. */ + client_seg_handle = ct_client_seg_handle (seg_handle, client_wrk_index); + if (app_worker_add_segment_notify (client_wrk, client_seg_handle)) { app_worker_del_segment_notify (server_wrk, seg_handle); goto error; @@ -645,7 +670,7 @@ ct_accept_one (u32 thread_index, u32 ho_index) cct = ct_connection_alloc (thread_index); cct_index = cct->c_c_index; - ho = ct_connection_get (ho_index, 0); + ho = ct_half_open_get (ho_index); /* Unlikely but half-open session and transport could have been freed */ if (PREDICT_FALSE (!ho)) @@ -701,7 +726,7 @@ ct_accept_one (u32 thread_index, u32 ho_index) sct->c_is_ip4); ss->connection_index = sct->c_c_index; ss->listener_handle = listen_session_get_handle (ll); - ss->session_state = SESSION_STATE_CREATED; + session_set_state (ss, SESSION_STATE_CREATED); server_wrk = application_listener_select_worker (ll); ss->app_wrk_index = server_wrk->wrk_index; @@ -724,9 +749,10 @@ ct_accept_one (u32 thread_index, u32 ho_index) cct->client_tx_fifo = ss->rx_fifo; cct->client_rx_fifo->refcnt++; cct->client_tx_fifo->refcnt++; - cct->segment_handle = sct->segment_handle; + cct->segment_handle = + ct_client_seg_handle (sct->segment_handle, cct->client_wrk); - ss->session_state = SESSION_STATE_ACCEPTING; + session_set_state (ss, SESSION_STATE_ACCEPTING); if (app_worker_accept_notify (server_wrk, ss)) { ct_session_connect_notify (ss, SESSION_E_REFUSED); @@ -739,39 +765,90 @@ ct_accept_one (u32 thread_index, u32 ho_index) static void ct_accept_rpc_wrk_handler (void *rpc_args) { - u32 thread_index, ho_index, n_connects, i, n_pending; + u32 thread_index, n_connects, i, n_pending; const u32 max_connects = 32; ct_worker_t *wrk; + u8 need_rpc = 0; thread_index = pointer_to_uword (rpc_args); wrk = ct_worker_get (thread_index); - /* Sub without lock as main enqueues with worker barrier */ + /* Connects could be handled without worker barrier so grab lock */ + clib_spinlock_lock (&wrk->pending_connects_lock); + n_pending = clib_fifo_elts (wrk->pending_connects); n_connects = clib_min (n_pending, max_connects); + vec_validate (wrk->new_connects, n_connects); for (i = 0; i < n_connects; i++) - { - clib_fifo_sub1 (wrk->pending_connects, ho_index); - ct_accept_one (thread_index, ho_index); - } + clib_fifo_sub1 (wrk->pending_connects, wrk->new_connects[i]); if (n_pending == n_connects) wrk->have_connects = 0; else + need_rpc = 1; + + clib_spinlock_unlock (&wrk->pending_connects_lock); + + for (i = 0; i < n_connects; i++) + ct_accept_one (thread_index, wrk->new_connects[i]); + + if (need_rpc) session_send_rpc_evt_to_thread_force ( thread_index, ct_accept_rpc_wrk_handler, uword_to_pointer (thread_index, void *)); } -static int -ct_connect (app_worker_t * client_wrk, session_t * ll, - session_endpoint_cfg_t * sep) +static void +ct_fwrk_flush_connects (void *rpc_args) { - u32 thread_index, ho_index; + u32 thread_index, fwrk_index, n_workers; ct_main_t *cm = &ct_main; - ct_connection_t *ho; ct_worker_t *wrk; + u8 need_rpc; + + fwrk_index = cm->fwrk_thread; + n_workers = vec_len (cm->fwrk_pending_connects); + + for (thread_index = fwrk_index; thread_index < n_workers; thread_index++) + { + if (!vec_len (cm->fwrk_pending_connects[thread_index])) + continue; + + wrk = ct_worker_get (thread_index); + + /* Connects can be done without worker barrier, grab dst worker lock */ + if (thread_index != fwrk_index) + clib_spinlock_lock (&wrk->pending_connects_lock); + + clib_fifo_add (wrk->pending_connects, + cm->fwrk_pending_connects[thread_index], + vec_len (cm->fwrk_pending_connects[thread_index])); + if (!wrk->have_connects) + { + wrk->have_connects = 1; + need_rpc = 1; + } + + if (thread_index != fwrk_index) + clib_spinlock_unlock (&wrk->pending_connects_lock); + + vec_reset_length (cm->fwrk_pending_connects[thread_index]); + + if (need_rpc) + session_send_rpc_evt_to_thread_force ( + thread_index, ct_accept_rpc_wrk_handler, + uword_to_pointer (thread_index, void *)); + } + + cm->fwrk_have_flush = 0; +} + +static void +ct_program_connect_to_wrk (u32 ho_index) +{ + ct_main_t *cm = &ct_main; + u32 thread_index; /* Simple round-robin policy for spreading sessions over workers. We skip * thread index 0, i.e., offset the index by 1, when we have workers as it @@ -780,6 +857,25 @@ ct_connect (app_worker_t * client_wrk, session_t * ll, cm->n_sessions += 1; thread_index = cm->n_workers ? (cm->n_sessions % cm->n_workers) + 1 : 0; + /* Pospone flushing of connect request to dst worker until after session + * layer fully initializes the half-open session. */ + vec_add1 (cm->fwrk_pending_connects[thread_index], ho_index); + if (!cm->fwrk_have_flush) + { + session_send_rpc_evt_to_thread_force ( + cm->fwrk_thread, ct_fwrk_flush_connects, + uword_to_pointer (thread_index, void *)); + cm->fwrk_have_flush = 1; + } +} + +static int +ct_connect (app_worker_t *client_wrk, session_t *ll, + session_endpoint_cfg_t *sep) +{ + ct_connection_t *ho; + u32 ho_index; + /* * Alloc and init client half-open transport */ @@ -800,21 +896,10 @@ ct_connect (app_worker_t * client_wrk, session_t * ll, ho->actual_tp = sep->original_tp; /* - * Accept connection on thread selected above. Connected reply comes + * Program connect on a worker, connected reply comes * after server accepts the connection. */ - - wrk = ct_worker_get (thread_index); - - /* Worker barrier held, add without additional lock */ - clib_fifo_add1 (wrk->pending_connects, ho_index); - if (!wrk->have_connects) - { - wrk->have_connects = 1; - session_send_rpc_evt_to_thread_force ( - thread_index, ct_accept_rpc_wrk_handler, - uword_to_pointer (thread_index, void *)); - } + ct_program_connect_to_wrk (ho_index); return ho_index; } @@ -852,9 +937,9 @@ ct_listener_get (u32 ct_index) } static transport_connection_t * -ct_half_open_get (u32 ct_index) +ct_session_half_open_get (u32 ct_index) { - return (transport_connection_t *) ct_connection_get (ct_index, 0); + return (transport_connection_t *) ct_half_open_get (ct_index); } static void @@ -876,7 +961,10 @@ ct_session_cleanup (u32 conn_index, u32 thread_index) static void ct_cleanup_ho (u32 ho_index) { - ct_connection_free (ct_connection_get (ho_index, 0)); + ct_connection_t *ho; + + ho = ct_half_open_get (ho_index); + ct_connection_free (ho); } static int @@ -907,7 +995,7 @@ ct_session_connect (transport_endpoint_cfg_t * tep) goto global_scope; ll = listen_session_get_from_handle (lh); - al = app_listener_get_w_session (ll); + al = app_listener_get (ll->al_index); /* * Break loop if rule in local table points to connecting app. This @@ -936,8 +1024,12 @@ global_scope: ll = session_lookup_listener_wildcard (table_index, sep); /* Avoid connecting app to own listener */ - if (ll && ll->app_index != app->app_index) - return ct_connect (app_wrk, ll, sep_ext); + if (ll) + { + al = app_listener_get (ll->al_index); + if (al->app_index != app->app_index) + return ct_connect (app_wrk, ll, sep_ext); + } /* Failed to connect but no error */ return SESSION_E_LOCAL_CONNECT; @@ -946,6 +1038,8 @@ global_scope: static inline int ct_close_is_reset (ct_connection_t *ct, session_t *s) { + if (ct->flags & CT_CONN_F_RESET) + return 1; if (ct->flags & CT_CONN_F_CLIENT) return (svm_fifo_max_dequeue (ct->client_rx_fifo) > 0); else @@ -953,6 +1047,17 @@ ct_close_is_reset (ct_connection_t *ct, session_t *s) } static void +ct_session_cleanup_server_session (session_t *s) +{ + ct_connection_t *ct; + + ct = (ct_connection_t *) session_get_transport (s); + ct_session_dealloc_fifos (ct, s->rx_fifo, s->tx_fifo); + session_free (s); + ct_connection_free (ct); +} + +static void ct_session_postponed_cleanup (ct_connection_t *ct) { ct_connection_t *peer_ct; @@ -972,33 +1077,38 @@ ct_session_postponed_cleanup (ct_connection_t *ct) } session_transport_closed_notify (&ct->connection); + /* It would be cleaner to call session_transport_delete_notify + * but then we can't control session cleanup lower */ + session_set_state (s, SESSION_STATE_TRANSPORT_DELETED); + if (app_wrk) + app_worker_cleanup_notify (app_wrk, s, SESSION_CLEANUP_TRANSPORT); + if (ct->flags & CT_CONN_F_CLIENT) { - if (app_wrk) - app_worker_cleanup_notify (app_wrk, s, SESSION_CLEANUP_TRANSPORT); - /* Normal free for client session as the fifos are allocated through * the connects segment manager in a segment that's not shared with * the server */ ct_session_dealloc_fifos (ct, ct->client_rx_fifo, ct->client_tx_fifo); - session_free_w_fifos (s); + session_program_cleanup (s); + ct_connection_free (ct); } else { /* Manual session and fifo segment cleanup to avoid implicit * segment manager cleanups and notifications */ - app_wrk = app_worker_get_if_valid (s->app_wrk_index); if (app_wrk) { - app_worker_cleanup_notify (app_wrk, s, SESSION_CLEANUP_TRANSPORT); - app_worker_cleanup_notify (app_wrk, s, SESSION_CLEANUP_SESSION); + /* Remove custom cleanup notify infra when/if switching to normal + * session cleanup. Note that ct is freed in the cb function */ + app_worker_cleanup_notify_custom (app_wrk, s, + SESSION_CLEANUP_SESSION, + ct_session_cleanup_server_session); + } + else + { + ct_connection_free (ct); } - - ct_session_dealloc_fifos (ct, s->rx_fifo, s->tx_fifo); - session_free (s); } - - ct_connection_free (ct); } static void @@ -1022,10 +1132,10 @@ ct_handle_cleanups (void *args) clib_fifo_sub2 (wrk->pending_cleanups, req); ct = ct_connection_get (req->ct_index, thread_index); s = session_get (ct->c_s_index, ct->c_thread_index); - if (!svm_fifo_has_event (s->tx_fifo)) - ct_session_postponed_cleanup (ct); - else + if (svm_fifo_has_event (s->tx_fifo) || (s->flags & SESSION_F_RX_EVT)) clib_fifo_add1 (wrk->pending_cleanups, *req); + else + ct_session_postponed_cleanup (ct); n_to_handle -= 1; } @@ -1090,6 +1200,15 @@ ct_session_close (u32 ct_index, u32 thread_index) ct_program_cleanup (ct); } +static void +ct_session_reset (u32 ct_index, u32 thread_index) +{ + ct_connection_t *ct; + ct = ct_connection_get (ct_index, thread_index); + ct->flags |= CT_CONN_F_RESET; + ct_session_close (ct_index, thread_index); +} + static transport_connection_t * ct_session_get (u32 ct_index, u32 thread_index) { @@ -1178,7 +1297,7 @@ format_ct_half_open (u8 *s, va_list *args) { u32 ho_index = va_arg (*args, u32); u32 verbose = va_arg (*args, u32); - ct_connection_t *ct = ct_connection_get (ho_index, 0); + ct_connection_t *ct = ct_half_open_get (ho_index); s = format (s, "%-" SESSION_CLI_ID_LEN "U", format_ct_connection_id, ct); if (verbose) s = format (s, "%-" SESSION_CLI_STATE_LEN "s", "HALF-OPEN"); @@ -1229,26 +1348,31 @@ ct_enable_disable (vlib_main_t * vm, u8 is_en) { vlib_thread_main_t *vtm = &vlib_thread_main; ct_main_t *cm = &ct_main; + ct_worker_t *wrk; cm->n_workers = vlib_num_workers (); + cm->fwrk_thread = transport_cl_thread (); vec_validate (cm->wrk, vtm->n_vlib_mains); + vec_foreach (wrk, cm->wrk) + clib_spinlock_init (&wrk->pending_connects_lock); clib_spinlock_init (&cm->ho_reuseable_lock); clib_rwlock_init (&cm->app_segs_lock); + vec_validate (cm->fwrk_pending_connects, cm->n_workers); return 0; } -/* *INDENT-OFF* */ static const transport_proto_vft_t cut_thru_proto = { .enable = ct_enable_disable, .start_listen = ct_start_listen, .stop_listen = ct_stop_listen, .get_connection = ct_session_get, .get_listener = ct_listener_get, - .get_half_open = ct_half_open_get, + .get_half_open = ct_session_half_open_get, .cleanup = ct_session_cleanup, .cleanup_ho = ct_cleanup_ho, .connect = ct_session_connect, .close = ct_session_close, + .reset = ct_session_reset, .custom_tx = ct_custom_tx, .app_rx_evt = ct_app_rx_evt, .format_listener = format_ct_listener, @@ -1261,7 +1385,6 @@ static const transport_proto_vft_t cut_thru_proto = { .service_type = TRANSPORT_SERVICE_VC, }, }; -/* *INDENT-ON* */ static inline int ct_session_can_tx (session_t *s) @@ -1286,6 +1409,7 @@ ct_session_tx (session_t * s) peer_s = session_get (peer_ct->c_s_index, peer_ct->c_thread_index); if (peer_s->session_state >= SESSION_STATE_TRANSPORT_CLOSING) return 0; + peer_s->flags |= SESSION_F_RX_EVT; return session_enqueue_notify (peer_s); } diff --git a/src/vnet/session/application_local.h b/src/vnet/session/application_local.h index 86edf243b22..fd2804c7baf 100644 --- a/src/vnet/session/application_local.h +++ b/src/vnet/session/application_local.h @@ -22,7 +22,8 @@ #define foreach_ct_flags \ _ (CLIENT, "client") \ - _ (HALF_OPEN, "half-open") + _ (HALF_OPEN, "half-open") \ + _ (RESET, "reset") enum { diff --git a/src/vnet/session/application_namespace.c b/src/vnet/session/application_namespace.c index cd2636cff32..f547dcfc031 100644 --- a/src/vnet/session/application_namespace.c +++ b/src/vnet/session/application_namespace.c @@ -81,21 +81,20 @@ app_namespace_alloc (const u8 *ns_id) return app_ns; } -int -vnet_app_namespace_add_del (vnet_app_namespace_add_del_args_t * a) +session_error_t +vnet_app_namespace_add_del (vnet_app_namespace_add_del_args_t *a) { app_namespace_t *app_ns; session_table_t *st; u32 ns_index; - int rv; + session_error_t rv; if (a->is_add) { if (a->sw_if_index != APP_NAMESPACE_INVALID_INDEX && !vnet_get_sw_interface_or_null (vnet_get_main (), a->sw_if_index)) - return VNET_API_ERROR_INVALID_SW_IF_INDEX; - + return SESSION_E_INVALID; if (a->sw_if_index != APP_NAMESPACE_INVALID_INDEX) { @@ -108,7 +107,7 @@ vnet_app_namespace_add_del (vnet_app_namespace_add_del_args_t * a) } if (a->sw_if_index == APP_NAMESPACE_INVALID_INDEX && a->ip4_fib_id == APP_NAMESPACE_INVALID_INDEX) - return VNET_API_ERROR_INVALID_VALUE; + return SESSION_E_INVALID; app_ns = app_namespace_get_from_id (a->ns_id); if (!app_ns) @@ -119,11 +118,6 @@ vnet_app_namespace_add_del (vnet_app_namespace_add_del_args_t * a) st->is_local = 1; st->appns_index = app_namespace_index (app_ns); app_ns->local_table_index = session_table_index (st); - if (a->netns) - { - app_ns->netns = vec_dup (a->netns); - vec_terminate_c_string (app_ns->netns); - } if (a->sock_name) { app_ns->sock_name = vec_dup (a->sock_name); @@ -153,11 +147,11 @@ vnet_app_namespace_add_del (vnet_app_namespace_add_del_args_t * a) { ns_index = app_namespace_index_from_id (a->ns_id); if (ns_index == APP_NAMESPACE_INVALID_INDEX) - return VNET_API_ERROR_INVALID_VALUE; + return SESSION_E_INVALID; app_ns = app_namespace_get (ns_index); if (!app_ns) - return VNET_API_ERROR_INVALID_VALUE; + return SESSION_E_INVALID; application_namespace_cleanup (app_ns); @@ -167,8 +161,6 @@ vnet_app_namespace_add_del (vnet_app_namespace_add_del_args_t * a) st = session_table_get (app_ns->local_table_index); session_table_free (st, FIB_PROTOCOL_MAX); - if (app_ns->netns) - vec_free (app_ns->netns); if (app_ns->sock_name) vec_free (app_ns->sock_name); @@ -255,7 +247,6 @@ app_namespaces_init (void) /* clang-format off */ vnet_app_namespace_add_del_args_t a = { .ns_id = ns_id, - .netns = 0, .sock_name = 0, .secret = 0, .sw_if_index = APP_NAMESPACE_INVALID_INDEX, @@ -272,7 +263,7 @@ app_ns_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { u8 is_add = 0, *ns_id = 0, secret_set = 0, sw_if_index_set = 0; - u8 *netns = 0, *sock_name = 0; + u8 *sock_name = 0; unformat_input_t _line_input, *line_input = &_line_input; u32 sw_if_index, fib_id = APP_NAMESPACE_INVALID_INDEX; vnet_main_t *vnm = vnet_get_main (); @@ -302,8 +293,6 @@ app_ns_fn (vlib_main_t * vm, unformat_input_t * input, sw_if_index_set = 1; else if (unformat (line_input, "fib_id", &fib_id)) ; - else if (unformat (line_input, "netns %_%v%_", &netns)) - ; else if (unformat (line_input, "sock-name %_%v%_", &sock_name)) ; else @@ -329,7 +318,6 @@ app_ns_fn (vlib_main_t * vm, unformat_input_t * input, /* clang-format off */ vnet_app_namespace_add_del_args_t args = { .ns_id = ns_id, - .netns = netns, .secret = secret, .sw_if_index = sw_if_index, .sock_name = sock_name, @@ -344,21 +332,18 @@ app_ns_fn (vlib_main_t * vm, unformat_input_t * input, done: vec_free (ns_id); - vec_free (netns); vec_free (sock_name); unformat_free (line_input); return error; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (app_ns_command, static) = { .path = "app ns", .short_help = "app ns [add|del] id <namespace-id> secret <secret> " - "sw_if_index <sw_if_index> if <interface> [netns <ns>]", + "sw_if_index <sw_if_index> if <interface>", .function = app_ns_fn, }; -/* *INDENT-ON* */ u8 * format_app_namespace (u8 * s, va_list * args) @@ -371,8 +356,6 @@ format_app_namespace (u8 * s, va_list * args) if (app_ns->sw_if_index != (u32) ~0) s = format (s, "\nInterface: %U", format_vnet_sw_if_index_name, vnm, app_ns->sw_if_index); - if (app_ns->netns) - s = format (s, "\nNetns: %s", app_ns->netns); if (app_ns->sock_name) s = format (s, "\nSocket: %s", app_ns->sock_name); @@ -401,7 +384,6 @@ app_namespace_show_api (vlib_main_t * vm, app_namespace_t * app_ns) vlib_cli_output (vm, "%12s%12s%5s", "app index", "wrk index", "fd"); - /* *INDENT-OFF* */ pool_foreach (cs, app_ns->app_sockets) { handle = (app_ns_api_handle_t *) &cs->private_data; cf = clib_file_get (&file_main, handle->aah_file_index); @@ -414,7 +396,6 @@ app_namespace_show_api (vlib_main_t * vm, app_namespace_t * app_ns) vlib_cli_output (vm, "%12d%12d%5u", app_wrk->app_index, app_wrk->wrk_map_index, cf->file_descriptor); } - /* *INDENT-ON* */ } static clib_error_t * @@ -482,8 +463,7 @@ show_app_ns_fn (vlib_main_t * vm, unformat_input_t * main_input, } do_ns_list: - table_add_header_col (t, 6, "Index", "Secret", "Interface", "Id", "Netns", - "Socket"); + table_add_header_col (t, 5, "Index", "Secret", "Interface", "Id", "Socket"); int i = 0; pool_foreach (app_ns, app_namespace_pool) { @@ -493,7 +473,6 @@ do_ns_list: table_format_cell (t, i, j++, "%U", format_vnet_sw_if_index_name, vnm, app_ns->sw_if_index); table_format_cell (t, i, j++, "%s", app_ns->ns_id); - table_format_cell (t, i, j++, "%s", app_ns->netns); table_format_cell (t, i++, j++, "%s", app_ns->sock_name); } @@ -510,13 +489,11 @@ done: return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_app_ns_command, static) = { .path = "show app ns", .short_help = "show app ns [id <id> [api-clients]]", .function = show_app_ns_fn, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/session/application_namespace.h b/src/vnet/session/application_namespace.h index 1750d41fff8..261325cbe0e 100644 --- a/src/vnet/session/application_namespace.h +++ b/src/vnet/session/application_namespace.h @@ -51,11 +51,6 @@ typedef struct _app_namespace u8 *ns_id; /** - * Linux netns if one was provided - */ - u8 *netns; - - /** * Name of socket applications can use to attach to session layer */ u8 *sock_name; @@ -69,7 +64,6 @@ typedef struct _app_namespace typedef struct _vnet_app_namespace_add_del_args { u8 *ns_id; - u8 *netns; u8 *sock_name; u64 secret; u32 sw_if_index; @@ -88,7 +82,8 @@ const u8 *app_namespace_id (app_namespace_t * app_ns); const u8 *app_namespace_id_from_index (u32 index); u32 app_namespace_index_from_id (const u8 *ns_id); void app_namespaces_init (void); -int vnet_app_namespace_add_del (vnet_app_namespace_add_del_args_t * a); +session_error_t +vnet_app_namespace_add_del (vnet_app_namespace_add_del_args_t *a); u32 app_namespace_get_fib_index (app_namespace_t * app_ns, u8 fib_proto); session_table_t *app_namespace_get_local_table (app_namespace_t * app_ns); diff --git a/src/vnet/session/application_worker.c b/src/vnet/session/application_worker.c index 844e78f7fa9..befdb7c7002 100644 --- a/src/vnet/session/application_worker.c +++ b/src/vnet/session/application_worker.c @@ -26,6 +26,7 @@ app_worker_t * app_worker_alloc (application_t * app) { app_worker_t *app_wrk; + pool_get (app_workers, app_wrk); clib_memset (app_wrk, 0, sizeof (*app_wrk)); app_wrk->wrk_index = app_wrk - app_workers; @@ -33,7 +34,8 @@ app_worker_alloc (application_t * app) app_wrk->wrk_map_index = ~0; app_wrk->connects_seg_manager = APP_INVALID_SEGMENT_MANAGER_INDEX; clib_spinlock_init (&app_wrk->detached_seg_managers_lock); - clib_spinlock_init (&app_wrk->postponed_mq_msgs_lock); + vec_validate (app_wrk->wrk_evts, vlib_num_workers ()); + vec_validate (app_wrk->wrk_mq_congested, vlib_num_workers ()); APP_DBG ("New app %v worker %u", app->name, app_wrk->wrk_index); return app_wrk; } @@ -56,26 +58,34 @@ void app_worker_free (app_worker_t * app_wrk) { application_t *app = application_get (app_wrk->app_index); + session_handle_t handle, *handles = 0, *sh; vnet_unlisten_args_t _a, *a = &_a; - u64 handle, *handles = 0, *sm_indices = 0; segment_manager_t *sm; - session_handle_t *sh; + u64 *sm_indices = 0; session_t *ls; u32 sm_index; int i; /* + * Cleanup vpp wrk events + */ + app_worker_del_all_events (app_wrk); + for (i = 0; i < vec_len (app_wrk->wrk_evts); i++) + clib_fifo_free (app_wrk->wrk_evts[i]); + + vec_free (app_wrk->wrk_evts); + vec_free (app_wrk->wrk_mq_congested); + + /* * Listener cleanup */ - /* *INDENT-OFF* */ hash_foreach (handle, sm_index, app_wrk->listeners_table, ({ ls = listen_session_get_from_handle (handle); vec_add1 (handles, app_listen_session_handle (ls)); vec_add1 (sm_indices, sm_index); sm = segment_manager_get (sm_index); })); - /* *INDENT-ON* */ for (i = 0; i < vec_len (handles); i++) { @@ -92,7 +102,7 @@ app_worker_free (app_worker_t * app_wrk) segment_manager_init_free (sm); } } - vec_reset_length (handles); + vec_free (handles); vec_free (sm_indices); hash_free (app_wrk->listeners_table); @@ -127,7 +137,6 @@ app_worker_free (app_worker_t * app_wrk) } vec_free (app_wrk->detached_seg_managers); clib_spinlock_free (&app_wrk->detached_seg_managers_lock); - clib_spinlock_free (&app_wrk->postponed_mq_msgs_lock); if (CLIB_DEBUG) clib_memset (app_wrk, 0xfe, sizeof (*app_wrk)); @@ -177,12 +186,67 @@ app_worker_alloc_session_fifos (segment_manager_t * sm, session_t * s) } int +app_worker_alloc_wrk_cl_session (app_worker_t *app_wrk, session_t *ls) +{ + svm_fifo_t *rx_fifo = 0, *tx_fifo = 0; + segment_manager_t *sm; + session_handle_t lsh; + app_listener_t *al; + session_t *s; + + al = app_listener_get (ls->al_index); + sm = app_worker_get_listen_segment_manager (app_wrk, ls); + lsh = session_handle (ls); + + s = session_alloc (0 /* listener on main worker */); + session_set_state (s, SESSION_STATE_LISTENING); + s->flags |= SESSION_F_IS_CLESS; + s->app_wrk_index = app_wrk->wrk_index; + ls = session_get_from_handle (lsh); + s->session_type = ls->session_type; + s->connection_index = ls->connection_index; + + segment_manager_alloc_session_fifos (sm, s->thread_index, &rx_fifo, + &tx_fifo); + + rx_fifo->shr->master_session_index = s->session_index; + rx_fifo->master_thread_index = s->thread_index; + + tx_fifo->shr->master_session_index = s->session_index; + tx_fifo->master_thread_index = s->thread_index; + + s->rx_fifo = rx_fifo; + s->tx_fifo = tx_fifo; + + vec_validate (al->cl_listeners, app_wrk->wrk_map_index); + al->cl_listeners[app_wrk->wrk_map_index] = s->session_index; + + return 0; +} + +void +app_worker_free_wrk_cl_session (app_worker_t *app_wrk, session_t *ls) +{ + app_listener_t *al; + session_t *s; + + al = app_listener_get (ls->al_index); + + s = app_listener_get_wrk_cl_session (al, app_wrk->wrk_map_index); + segment_manager_dealloc_fifos (s->rx_fifo, s->tx_fifo); + session_free (s); + + al->cl_listeners[app_wrk->wrk_map_index] = SESSION_INVALID_INDEX; +} + +int app_worker_init_listener (app_worker_t * app_wrk, session_t * ls) { segment_manager_t *sm; /* Allocate segment manager. All sessions derived out of a listen session - * have fifos allocated by the same segment manager. */ + * have fifos allocated by the same segment manager. + * TODO(fcoras): limit memory consumption by cless listeners */ if (!(sm = app_worker_alloc_segment_manager (app_wrk))) return SESSION_E_ALLOC; @@ -193,18 +257,14 @@ app_worker_init_listener (app_worker_t * app_wrk, session_t * ls) hash_set (app_wrk->listeners_table, listen_session_get_handle (ls), segment_manager_index (sm)); - if (transport_connection_is_cless (session_get_transport (ls))) - { - if (ls->rx_fifo) - return SESSION_E_NOSUPPORT; - return app_worker_alloc_session_fifos (sm, ls); - } + if (ls->flags & SESSION_F_IS_CLESS) + return app_worker_alloc_wrk_cl_session (app_wrk, ls); + return 0; } -int -app_worker_start_listen (app_worker_t * app_wrk, - app_listener_t * app_listener) +session_error_t +app_worker_start_listen (app_worker_t *app_wrk, app_listener_t *app_listener) { session_t *ls; int rv; @@ -268,12 +328,8 @@ app_worker_stop_listen_session (app_worker_t * app_wrk, session_t * ls) if (PREDICT_FALSE (!sm_indexp)) return; - /* Dealloc fifos, if any (dgram listeners) */ - if (ls->rx_fifo) - { - segment_manager_dealloc_fifos (ls->rx_fifo, ls->tx_fifo); - ls->tx_fifo = ls->rx_fifo = 0; - } + if (ls->flags & SESSION_F_IS_CLESS) + app_worker_free_wrk_cl_session (app_wrk, ls); /* Try to cleanup segment manager */ sm = segment_manager_get (*sm_indexp); @@ -340,7 +396,7 @@ app_worker_init_accepted (session_t * s) listener = listen_session_get_from_handle (s->listener_handle); app_wrk = application_listener_select_worker (listener); - if (PREDICT_FALSE (app_wrk->mq_congested)) + if (PREDICT_FALSE (app_worker_mq_is_congested (app_wrk))) return -1; s->app_wrk_index = app_wrk->wrk_index; @@ -356,10 +412,35 @@ app_worker_init_accepted (session_t * s) } int +app_worker_listened_notify (app_worker_t *app_wrk, session_handle_t alsh, + u32 opaque, session_error_t err) +{ + session_event_t evt = { .event_type = SESSION_CTRL_EVT_BOUND, + .as_u64[0] = alsh, + .as_u64[1] = (u64) opaque << 32 | (u32) err }; + + app_worker_add_event_custom (app_wrk, 0 /* thread index */, &evt); + + return 0; +} + +int +app_worker_unlisten_reply (app_worker_t *app_wrk, session_handle_t sh, + u32 opaque, session_error_t err) +{ + session_event_t evt = { .event_type = SESSION_CTRL_EVT_UNLISTEN_REPLY, + .as_u64[0] = sh, + .as_u64[1] = (u64) opaque << 32 | (u32) err }; + + app_worker_add_event_custom (app_wrk, 0 /* thread index */, &evt); + return 0; +} + +int app_worker_accept_notify (app_worker_t * app_wrk, session_t * s) { - application_t *app = application_get (app_wrk->app_index); - return app->cb_fns.session_accept_callback (s); + app_worker_add_event (app_wrk, s, SESSION_CTRL_EVT_ACCEPTED); + return 0; } int @@ -373,7 +454,7 @@ app_worker_init_connected (app_worker_t * app_wrk, session_t * s) /* Allocate fifos for session, unless the app is a builtin proxy */ if (application_is_builtin_proxy (app)) - return 0; + return app->cb_fns.proxy_alloc_session_fifos (s); sm = app_worker_get_connect_segment_manager (app_wrk); return app_worker_alloc_session_fifos (sm, s); @@ -383,9 +464,13 @@ int app_worker_connect_notify (app_worker_t * app_wrk, session_t * s, session_error_t err, u32 opaque) { - application_t *app = application_get (app_wrk->app_index); - return app->cb_fns.session_connected_callback (app_wrk->wrk_index, opaque, - s, err); + session_event_t evt = { .event_type = SESSION_CTRL_EVT_CONNECTED, + .as_u64[0] = s ? s->session_index : ~0, + .as_u64[1] = (u64) opaque << 32 | (u32) err }; + u32 thread_index = s ? s->thread_index : vlib_get_thread_index (); + + app_worker_add_event_custom (app_wrk, thread_index, &evt); + return 0; } int @@ -393,7 +478,7 @@ app_worker_add_half_open (app_worker_t *app_wrk, session_handle_t sh) { session_handle_t *shp; - ASSERT (vlib_get_thread_index () == 0); + ASSERT (session_vlib_thread_is_cl_thread ()); pool_get (app_wrk->half_open_table, shp); *shp = sh; @@ -403,36 +488,28 @@ app_worker_add_half_open (app_worker_t *app_wrk, session_handle_t sh) int app_worker_del_half_open (app_worker_t *app_wrk, session_t *s) { - application_t *app = application_get (app_wrk->app_index); - ASSERT (vlib_get_thread_index () <= 1); - pool_put_index (app_wrk->half_open_table, s->ho_index); - if (app->cb_fns.half_open_cleanup_callback) - app->cb_fns.half_open_cleanup_callback (s); + app_worker_add_event (app_wrk, s, SESSION_CTRL_EVT_HALF_CLEANUP); return 0; } int app_worker_close_notify (app_worker_t * app_wrk, session_t * s) { - application_t *app = application_get (app_wrk->app_index); - app->cb_fns.session_disconnect_callback (s); + app_worker_add_event (app_wrk, s, SESSION_CTRL_EVT_DISCONNECTED); return 0; } int app_worker_transport_closed_notify (app_worker_t * app_wrk, session_t * s) { - application_t *app = application_get (app_wrk->app_index); - if (app->cb_fns.session_transport_closed_callback) - app->cb_fns.session_transport_closed_callback (s); + app_worker_add_event (app_wrk, s, SESSION_CTRL_EVT_TRANSPORT_CLOSED); return 0; } int app_worker_reset_notify (app_worker_t * app_wrk, session_t * s) { - application_t *app = application_get (app_wrk->app_index); - app->cb_fns.session_reset_callback (s); + app_worker_add_event (app_wrk, s, SESSION_CTRL_EVT_RESET); return 0; } @@ -440,29 +517,33 @@ int app_worker_cleanup_notify (app_worker_t * app_wrk, session_t * s, session_cleanup_ntf_t ntf) { - application_t *app = application_get (app_wrk->app_index); - if (app->cb_fns.session_cleanup_callback) - app->cb_fns.session_cleanup_callback (s, ntf); + session_event_t evt = { .event_type = SESSION_CTRL_EVT_CLEANUP, + .as_u64[0] = (u64) ntf << 32 | s->session_index, + .as_u64[1] = pointer_to_uword (session_cleanup) }; + + app_worker_add_event_custom (app_wrk, s->thread_index, &evt); + return 0; } int -app_worker_builtin_rx (app_worker_t * app_wrk, session_t * s) +app_worker_cleanup_notify_custom (app_worker_t *app_wrk, session_t *s, + session_cleanup_ntf_t ntf, + void (*cleanup_cb) (session_t *s)) { - application_t *app = application_get (app_wrk->app_index); - app->cb_fns.builtin_app_rx_callback (s); + session_event_t evt = { .event_type = SESSION_CTRL_EVT_CLEANUP, + .as_u64[0] = (u64) ntf << 32 | s->session_index, + .as_u64[1] = pointer_to_uword (cleanup_cb) }; + + app_worker_add_event_custom (app_wrk, s->thread_index, &evt); + return 0; } int -app_worker_builtin_tx (app_worker_t * app_wrk, session_t * s) +app_worker_rx_notify (app_worker_t *app_wrk, session_t *s) { - application_t *app = application_get (app_wrk->app_index); - - if (!app->cb_fns.builtin_app_tx_callback) - return 0; - - app->cb_fns.builtin_app_tx_callback (s); + app_worker_add_event (app_wrk, s, SESSION_IO_EVT_RX); return 0; } @@ -470,8 +551,11 @@ int app_worker_migrate_notify (app_worker_t * app_wrk, session_t * s, session_handle_t new_sh) { - application_t *app = application_get (app_wrk->app_index); - app->cb_fns.session_migrate_callback (s, new_sh); + session_event_t evt = { .event_type = SESSION_CTRL_EVT_MIGRATED, + .as_u64[0] = s->session_index, + .as_u64[1] = new_sh }; + + app_worker_add_event_custom (app_wrk, s->thread_index, &evt); return 0; } @@ -480,6 +564,7 @@ app_worker_own_session (app_worker_t * app_wrk, session_t * s) { segment_manager_t *sm; svm_fifo_t *rxf, *txf; + int rv; if (s->session_state == SESSION_STATE_LISTENING) return application_change_listener_owner (s, app_wrk); @@ -496,8 +581,8 @@ app_worker_own_session (app_worker_t * app_wrk, session_t * s) s->tx_fifo = 0; sm = app_worker_get_connect_segment_manager (app_wrk); - if (app_worker_alloc_session_fifos (sm, s)) - return -1; + if ((rv = app_worker_alloc_session_fifos (sm, s))) + return rv; if (!svm_fifo_is_empty_cons (rxf)) svm_fifo_clone (s->rx_fifo, rxf); @@ -514,7 +599,7 @@ int app_worker_connect_session (app_worker_t *app_wrk, session_endpoint_cfg_t *sep, session_handle_t *rsh) { - if (PREDICT_FALSE (app_wrk->mq_congested)) + if (PREDICT_FALSE (app_worker_mq_is_congested (app_wrk))) return SESSION_E_REFUSED; sep->app_wrk_index = app_wrk->wrk_index; @@ -560,14 +645,12 @@ app_worker_first_listener (app_worker_t * app_wrk, u8 fib_proto, sst = session_type_from_proto_and_ip (transport_proto, fib_proto == FIB_PROTOCOL_IP4); - /* *INDENT-OFF* */ hash_foreach (handle, sm_index, app_wrk->listeners_table, ({ listener = listen_session_get_from_handle (handle); if (listener->session_type == sst && !(listener->flags & SESSION_F_PROXY)) return listener; })); - /* *INDENT-ON* */ return 0; } @@ -584,13 +667,11 @@ app_worker_proxy_listener (app_worker_t * app_wrk, u8 fib_proto, sst = session_type_from_proto_and_ip (transport_proto, fib_proto == FIB_PROTOCOL_IP4); - /* *INDENT-OFF* */ hash_foreach (handle, sm_index, app_wrk->listeners_table, ({ listener = listen_session_get_from_handle (handle); if (listener->session_type == sst && (listener->flags & SESSION_F_PROXY)) return listener; })); - /* *INDENT-ON* */ return 0; } @@ -601,24 +682,23 @@ app_worker_proxy_listener (app_worker_t * app_wrk, u8 fib_proto, int app_worker_add_segment_notify (app_worker_t * app_wrk, u64 segment_handle) { - application_t *app = application_get (app_wrk->app_index); + session_event_t evt = { .event_type = SESSION_CTRL_EVT_APP_ADD_SEGMENT, + .as_u64[1] = segment_handle }; - return app->cb_fns.add_segment_callback (app_wrk->wrk_index, - segment_handle); + app_worker_add_event_custom (app_wrk, vlib_get_thread_index (), &evt); + + return 0; } int app_worker_del_segment_notify (app_worker_t * app_wrk, u64 segment_handle) { - application_t *app = application_get (app_wrk->app_index); - return app->cb_fns.del_segment_callback (app_wrk->wrk_index, - segment_handle); -} + session_event_t evt = { .event_type = SESSION_CTRL_EVT_APP_DEL_SEGMENT, + .as_u64[1] = segment_handle }; -static inline u8 -app_worker_application_is_builtin (app_worker_t * app_wrk) -{ - return app_wrk->app_is_builtin; + app_worker_add_event_custom (app_wrk, vlib_get_thread_index (), &evt); + + return 0; } static int @@ -677,126 +757,38 @@ app_wrk_send_fd (app_worker_t *app_wrk, int fd) return 0; } -static int -mq_try_lock_and_alloc_msg (svm_msg_q_t *mq, session_mq_rings_e ring, - svm_msg_q_msg_t *msg) -{ - int rv, n_try = 0; - - while (n_try < 75) - { - rv = svm_msg_q_lock_and_alloc_msg_w_ring (mq, ring, SVM_Q_NOWAIT, msg); - if (!rv) - return 0; - /* - * Break the loop if mq is full, usually this is because the - * app has crashed or is hanging on somewhere. - */ - if (rv != -1) - break; - n_try += 1; - usleep (1); - } - - return -1; -} - -typedef union app_wrk_mq_rpc_args_ -{ - struct - { - u32 thread_index; - u32 app_wrk_index; - }; - uword as_uword; -} app_wrk_mq_rpc_ags_t; - -static int -app_wrk_handle_mq_postponed_msgs (void *arg) +void +app_worker_add_event (app_worker_t *app_wrk, session_t *s, + session_evt_type_t evt_type) { - svm_msg_q_msg_t _mq_msg, *mq_msg = &_mq_msg; - app_wrk_postponed_msg_t *pm; - app_wrk_mq_rpc_ags_t args; - u32 max_msg, n_msg = 0; - app_worker_t *app_wrk; session_event_t *evt; - svm_msg_q_t *mq; - - args.as_uword = pointer_to_uword (arg); - app_wrk = app_worker_get_if_valid (args.app_wrk_index); - if (!app_wrk) - return 0; - - mq = app_wrk->event_queue; - - clib_spinlock_lock (&app_wrk->postponed_mq_msgs_lock); - - max_msg = clib_min (32, clib_fifo_elts (app_wrk->postponed_mq_msgs)); - while (n_msg < max_msg) - { - pm = clib_fifo_head (app_wrk->postponed_mq_msgs); - if (mq_try_lock_and_alloc_msg (mq, pm->ring, mq_msg)) - break; - - evt = svm_msg_q_msg_data (mq, mq_msg); - clib_memset (evt, 0, sizeof (*evt)); - evt->event_type = pm->event_type; - clib_memcpy_fast (evt->data, pm->data, pm->len); - - if (pm->fd != -1) - app_wrk_send_fd (app_wrk, pm->fd); - - svm_msg_q_add_and_unlock (mq, mq_msg); - - clib_fifo_advance_head (app_wrk->postponed_mq_msgs, 1); - n_msg += 1; - } + ASSERT (s->thread_index == vlib_get_thread_index ()); + clib_fifo_add2 (app_wrk->wrk_evts[s->thread_index], evt); + evt->session_index = s->session_index; + evt->event_type = evt_type; + evt->postponed = 0; - if (!clib_fifo_elts (app_wrk->postponed_mq_msgs)) + /* First event for this app_wrk. Schedule it for handling in session input */ + if (clib_fifo_elts (app_wrk->wrk_evts[s->thread_index]) == 1) { - app_wrk->mq_congested = 0; + session_worker_t *wrk = session_main_get_worker (s->thread_index); + session_wrk_program_app_wrk_evts (wrk, app_wrk->wrk_index); } - else - { - session_send_rpc_evt_to_thread_force ( - args.thread_index, app_wrk_handle_mq_postponed_msgs, - uword_to_pointer (args.as_uword, void *)); - } - - clib_spinlock_unlock (&app_wrk->postponed_mq_msgs_lock); - - return 0; } -static void -app_wrk_add_mq_postponed_msg (app_worker_t *app_wrk, session_mq_rings_e ring, - u8 evt_type, void *msg, u32 msg_len, int fd) +void +app_worker_add_event_custom (app_worker_t *app_wrk, u32 thread_index, + session_event_t *evt) { - app_wrk_postponed_msg_t *pm; - - clib_spinlock_lock (&app_wrk->postponed_mq_msgs_lock); + clib_fifo_add1 (app_wrk->wrk_evts[thread_index], *evt); - app_wrk->mq_congested = 1; - - clib_fifo_add2 (app_wrk->postponed_mq_msgs, pm); - clib_memcpy_fast (pm->data, msg, msg_len); - pm->event_type = evt_type; - pm->ring = ring; - pm->len = msg_len; - pm->fd = fd; - - if (clib_fifo_elts (app_wrk->postponed_mq_msgs) == 1) + /* First event for this app_wrk. Schedule it for handling in session input */ + if (clib_fifo_elts (app_wrk->wrk_evts[thread_index]) == 1) { - app_wrk_mq_rpc_ags_t args = { .thread_index = vlib_get_thread_index (), - .app_wrk_index = app_wrk->wrk_index }; - - session_send_rpc_evt_to_thread_force ( - args.thread_index, app_wrk_handle_mq_postponed_msgs, - uword_to_pointer (args.as_uword, void *)); + session_worker_t *wrk = session_main_get_worker (thread_index); + session_wrk_program_app_wrk_evts (wrk, app_wrk->wrk_index); } - - clib_spinlock_unlock (&app_wrk->postponed_mq_msgs_lock); } always_inline void @@ -806,14 +798,9 @@ app_wrk_send_ctrl_evt_inline (app_worker_t *app_wrk, u8 evt_type, void *msg, svm_msg_q_msg_t _mq_msg, *mq_msg = &_mq_msg; svm_msg_q_t *mq = app_wrk->event_queue; session_event_t *evt; - int rv; - if (PREDICT_FALSE (app_wrk->mq_congested)) - goto handle_congestion; - - rv = mq_try_lock_and_alloc_msg (mq, SESSION_MQ_CTRL_EVT_RING, mq_msg); - if (PREDICT_FALSE (rv)) - goto handle_congestion; + ASSERT (!svm_msg_q_or_ring_is_full (mq, SESSION_MQ_CTRL_EVT_RING)); + *mq_msg = svm_msg_q_alloc_msg_w_ring (mq, SESSION_MQ_CTRL_EVT_RING); evt = svm_msg_q_msg_data (mq, mq_msg); clib_memset (evt, 0, sizeof (*evt)); @@ -823,14 +810,7 @@ app_wrk_send_ctrl_evt_inline (app_worker_t *app_wrk, u8 evt_type, void *msg, if (fd != -1) app_wrk_send_fd (app_wrk, fd); - svm_msg_q_add_and_unlock (mq, mq_msg); - - return; - -handle_congestion: - - app_wrk_add_mq_postponed_msg (app_wrk, SESSION_MQ_CTRL_EVT_RING, evt_type, - msg, msg_len, fd); + svm_msg_q_add_raw (mq, mq_msg); } void @@ -847,116 +827,33 @@ app_wrk_send_ctrl_evt (app_worker_t *app_wrk, u8 evt_type, void *msg, app_wrk_send_ctrl_evt_inline (app_wrk, evt_type, msg, msg_len, -1); } -static inline int -app_send_io_evt_rx (app_worker_t * app_wrk, session_t * s) +u8 +app_worker_mq_wrk_is_congested (app_worker_t *app_wrk, u32 thread_index) { - svm_msg_q_msg_t _mq_msg = { 0 }, *mq_msg = &_mq_msg; - session_event_t *evt; - svm_msg_q_t *mq; - u32 app_session; - int rv; - - if (app_worker_application_is_builtin (app_wrk)) - return app_worker_builtin_rx (app_wrk, s); - - if (svm_fifo_has_event (s->rx_fifo)) - return 0; - - app_session = s->rx_fifo->shr->client_session_index; - mq = app_wrk->event_queue; - - if (PREDICT_FALSE (app_wrk->mq_congested)) - goto handle_congestion; - - rv = mq_try_lock_and_alloc_msg (mq, SESSION_MQ_IO_EVT_RING, mq_msg); - - if (PREDICT_FALSE (rv)) - goto handle_congestion; - - evt = svm_msg_q_msg_data (mq, mq_msg); - evt->event_type = SESSION_IO_EVT_RX; - evt->session_index = app_session; - - (void) svm_fifo_set_event (s->rx_fifo); - - svm_msg_q_add_and_unlock (mq, mq_msg); - - return 0; - -handle_congestion: - - app_wrk_add_mq_postponed_msg (app_wrk, SESSION_MQ_IO_EVT_RING, - SESSION_IO_EVT_RX, &app_session, - sizeof (app_session), -1); - return -1; + return app_wrk->wrk_mq_congested[thread_index] > 0; } -static inline int -app_send_io_evt_tx (app_worker_t * app_wrk, session_t * s) +void +app_worker_set_mq_wrk_congested (app_worker_t *app_wrk, u32 thread_index) { - svm_msg_q_msg_t _mq_msg = { 0 }, *mq_msg = &_mq_msg; - session_event_t *evt; - svm_msg_q_t *mq; - u32 app_session; - int rv; - - if (app_worker_application_is_builtin (app_wrk)) - return app_worker_builtin_tx (app_wrk, s); - - app_session = s->tx_fifo->shr->client_session_index; - mq = app_wrk->event_queue; - - if (PREDICT_FALSE (app_wrk->mq_congested)) - goto handle_congestion; - - rv = mq_try_lock_and_alloc_msg (mq, SESSION_MQ_IO_EVT_RING, mq_msg); - - if (PREDICT_FALSE (rv)) - goto handle_congestion; - - evt = svm_msg_q_msg_data (mq, mq_msg); - evt->event_type = SESSION_IO_EVT_TX; - evt->session_index = app_session; - - svm_msg_q_add_and_unlock (mq, mq_msg); - - return 0; - -handle_congestion: - - app_wrk_add_mq_postponed_msg (app_wrk, SESSION_MQ_IO_EVT_RING, - SESSION_IO_EVT_TX, &app_session, - sizeof (app_session), -1); - return -1; + clib_atomic_fetch_add_relax (&app_wrk->mq_congested, 1); + ASSERT (thread_index == vlib_get_thread_index ()); + app_wrk->wrk_mq_congested[thread_index] = 1; } -/* *INDENT-OFF* */ -typedef int (app_send_evt_handler_fn) (app_worker_t *app, - session_t *s); -static app_send_evt_handler_fn * const app_send_evt_handler_fns[2] = { - app_send_io_evt_rx, - app_send_io_evt_tx, -}; -/* *INDENT-ON* */ - -/** - * Send event to application - * - * Logic from queue perspective is blocking. However, if queue is full, - * we return. - */ -int -app_worker_lock_and_send_event (app_worker_t * app, session_t * s, - u8 evt_type) +void +app_worker_unset_wrk_mq_congested (app_worker_t *app_wrk, u32 thread_index) { - return app_send_evt_handler_fns[evt_type] (app, s); + clib_atomic_fetch_sub_relax (&app_wrk->mq_congested, 1); + ASSERT (thread_index == vlib_get_thread_index ()); + app_wrk->wrk_mq_congested[thread_index] = 0; } u8 * format_app_worker_listener (u8 * s, va_list * args) { app_worker_t *app_wrk = va_arg (*args, app_worker_t *); - u64 handle = va_arg (*args, u64); + session_handle_t handle = va_arg (*args, u64); u32 sm_index = va_arg (*args, u32); int verbose = va_arg (*args, int); session_t *listener; diff --git a/src/vnet/session/mma_template.h b/src/vnet/session/mma_template.h index dc3545a4ffe..2c0230c2869 100644 --- a/src/vnet/session/mma_template.h +++ b/src/vnet/session/mma_template.h @@ -41,11 +41,9 @@ typedef struct { u32 action_index; u32 *next_indices; - /* *INDENT-OFF* */ RTT (mma_mask_or_match) mask; RTT (mma_mask_or_match) match; RTT (mma_mask_or_match) max_match; - /* *INDENT-ON* */ } RTT (mma_rule); typedef int (*RTT (rule_cmp_fn)) (RTT (mma_rule) * rule1, diff --git a/src/vnet/session/segment_manager.c b/src/vnet/session/segment_manager.c index c14cffa66a2..80bebdca9b5 100644 --- a/src/vnet/session/segment_manager.c +++ b/src/vnet/session/segment_manager.c @@ -105,8 +105,8 @@ segment_manager_add_segment_inline (segment_manager_t *sm, uword segment_size, /* Not configured for addition of new segments and not first */ if (!props->add_segment && !segment_size) { - clib_warning ("cannot allocate new segment"); - return VNET_API_ERROR_INVALID_VALUE; + SESSION_DBG ("cannot allocate new segment"); + return SESSION_E_INVALID; } /* @@ -418,7 +418,7 @@ segment_manager_init_first (segment_manager_t * sm) fs_index = segment_manager_add_segment (sm, max_seg_size, 0); if (fs_index < 0) { - clib_warning ("Failed to preallocate segment %d", i); + SESSION_DBG ("Failed to preallocate segment %d", i); return fs_index; } @@ -440,7 +440,7 @@ segment_manager_init_first (segment_manager_t * sm) fs_index = segment_manager_add_segment (sm, first_seg_size, 0); if (fs_index < 0) { - clib_warning ("Failed to allocate segment"); + SESSION_DBG ("Failed to allocate segment"); return fs_index; } @@ -458,7 +458,7 @@ segment_manager_init_first (segment_manager_t * sm) for (; i < fs->n_slices; i++) { if (fifo_segment_prealloc_fifo_hdrs (fs, i, hdrs_per_slice)) - return VNET_API_ERROR_SVM_SEGMENT_CREATE_FAIL; + return SESSION_E_SEG_CREATE; } } @@ -499,11 +499,9 @@ segment_manager_free (segment_manager_t * sm) * the manager is explicitly deleted/detached by the app. */ clib_rwlock_writer_lock (&sm->segments_rwlock); - /* *INDENT-OFF* */ pool_foreach (fifo_segment, sm->segments) { segment_manager_del_segment (sm, fifo_segment); } - /* *INDENT-ON* */ pool_free (sm->segments); clib_rwlock_writer_unlock (&sm->segments_rwlock); @@ -582,7 +580,6 @@ segment_manager_has_fifos (segment_manager_t * sm) fifo_segment_t *seg; u8 first = 1; - /* *INDENT-OFF* */ segment_manager_foreach_segment_w_lock (seg, sm, ({ if (CLIB_DEBUG && !first && !fifo_segment_has_fifos (seg) && !(fifo_segment_flags (seg) & FIFO_SEGMENT_F_IS_PREALLOCATED)) @@ -597,7 +594,6 @@ segment_manager_has_fifos (segment_manager_t * sm) return 1; } })); - /* *INDENT-ON* */ return 0; } @@ -617,7 +613,6 @@ segment_manager_del_sessions (segment_manager_t * sm) ASSERT (pool_elts (sm->segments) != 0); /* Across all fifo segments used by the server */ - /* *INDENT-OFF* */ segment_manager_foreach_segment_w_lock (fs, sm, ({ for (slice_index = 0; slice_index < fs->n_slices; slice_index++) { @@ -642,7 +637,6 @@ segment_manager_del_sessions (segment_manager_t * sm) * sessions if the segment can be removed. */ })); - /* *INDENT-ON* */ vec_foreach (handle, handles) { @@ -807,7 +801,7 @@ sm_lock_and_alloc_segment_and_fifos (segment_manager_t *sm, props->tx_fifo_size, rx_fifo, tx_fifo); if (rv) { - clib_warning ("Added a segment, still can't allocate a fifo"); + SESSION_DBG ("Added a segment, still can't allocate a fifo"); rv = SESSION_E_SEG_NO_SPACE2; goto done; } @@ -866,7 +860,7 @@ segment_manager_dealloc_fifos (svm_fifo_t * rx_fifo, svm_fifo_t * tx_fifo) /* Thread that allocated the fifos must be the one to clean them up */ ASSERT (rx_fifo->master_thread_index == vlib_get_thread_index () || - rx_fifo->refcnt > 1); + rx_fifo->refcnt > 1 || vlib_thread_is_main_w_barrier ()); /* It's possible to have no segment manager if the session was removed * as result of a detach. */ @@ -961,12 +955,10 @@ segment_manager_alloc_queue (fifo_segment_t * segment, fifo_evt_size = sizeof (session_event_t); notif_q_size = clib_max (16, props->evt_q_size >> 4); - /* *INDENT-OFF* */ svm_msg_q_ring_cfg_t rc[SESSION_MQ_N_RINGS] = { {props->evt_q_size, fifo_evt_size, 0}, {notif_q_size, session_evt_size, 0} }; - /* *INDENT-ON* */ cfg->consumer_pid = 0; cfg->n_rings = 2; cfg->q_nitems = props->evt_q_size; @@ -1125,13 +1117,11 @@ done: return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (segment_manager_show_command, static) = { .path = "show segment-manager", .short_help = "show segment-manager [segments][verbose][index <nn>]", .function = segment_manager_show_fn, }; -/* *INDENT-ON* */ void segment_manager_format_sessions (segment_manager_t * sm, int verbose) @@ -1160,7 +1150,6 @@ segment_manager_format_sessions (segment_manager_t * sm, int verbose) clib_rwlock_reader_lock (&sm->segments_rwlock); - /* *INDENT-OFF* */ pool_foreach (fs, sm->segments) { for (slice_index = 0; slice_index < fs->n_slices; slice_index++) { @@ -1192,7 +1181,6 @@ segment_manager_format_sessions (segment_manager_t * sm, int verbose) vec_free (s); } } - /* *INDENT-ON* */ clib_rwlock_reader_unlock (&sm->segments_rwlock); } diff --git a/src/vnet/session/segment_manager.h b/src/vnet/session/segment_manager.h index e786b3144c2..1e99c4605a6 100644 --- a/src/vnet/session/segment_manager.h +++ b/src/vnet/session/segment_manager.h @@ -190,7 +190,9 @@ static inline void segment_manager_parse_segment_handle (u64 segment_handle, u32 * sm_index, u32 * segment_index) { - *sm_index = segment_handle >> 32; + /* Upper 8 bits zeroed out as they may be used for cut-through segments. + * See @ref ct_alloc_segment */ + *sm_index = (segment_handle >> 32) & 0xFFFFFF; *segment_index = segment_handle & 0xFFFFFFFF; } diff --git a/src/vnet/session/session.api b/src/vnet/session/session.api index d2a942fb68b..6affae4112d 100644 --- a/src/vnet/session/session.api +++ b/src/vnet/session/session.api @@ -117,38 +117,6 @@ autoreply define app_del_cert_key_pair { u32 index; }; -/** \brief Application add TLS certificate - ### WILL BE DEPRECATED POST 20.01 ### - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param cert_len - certificate length - @param cert - certificate as a string -*/ -autoreply define application_tls_cert_add { - option deprecated="to be removed post 21.06"; - u32 client_index; - u32 context; - u32 app_index; - u16 cert_len; - u8 cert[cert_len]; -}; - -/** \brief Application add TLS key - ### WILL BE DEPRECATED POST 20.01 ### - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param key_len - certificate length - @param key - PEM encoded key as a string -*/ -autoreply define application_tls_key_add { - option deprecated="to be removed post 21.06"; - u32 client_index; - u32 context; - u32 app_index; - u16 key_len; - u8 key[key_len]; -}; - /** \brief add/del application worker @param client_index - opaque cookie to identify the sender client to vpp direction only @@ -251,9 +219,49 @@ define app_namespace_add_del { @param ip6_fib_id - id of ip6 fib that "supports" the namespace. Ignored if sw_if_index set. @param namespace_id - namespace id + @param sock_name - socket name (path, abstract socket name) +*/ +define app_namespace_add_del_v4 { + option deprecated; + u32 client_index; + u32 context; + u64 secret; + bool is_add [default=true]; + vl_api_interface_index_t sw_if_index [default=0xffffffff]; + u32 ip4_fib_id; + u32 ip6_fib_id; + string namespace_id[64]; + string sock_name[]; +}; + +/** \brief Reply for app namespace add/del + @param context - returned sender context, to match reply w/ request + @param retval - return code + @param appns_index - app namespace index +*/ +define app_namespace_add_del_v4_reply +{ + u32 context; + i32 retval; + u32 appns_index; +}; + +/** \brief add/del application namespace + @param client_index - opaque cookie to identify the sender + client to vpp direction only + @param context - sender context, to match reply w/ request + @param secret - secret shared between app and vpp + @param sw_if_index - local interface that "supports" namespace. Set to + ~0 if no preference + @param ip4_fib_id - id of ip4 fib that "supports" the namespace. Ignored + if sw_if_index set. + @param ip6_fib_id - id of ip6 fib that "supports" the namespace. Ignored + if sw_if_index set. + @param namespace_id - namespace id @param netns - linux net namespace */ define app_namespace_add_del_v2 { + option deprecated; u32 client_index; u32 context; u64 secret; @@ -280,6 +288,7 @@ define app_namespace_add_del_v2 { @param sock_name - socket name (path, abstract socket name) */ define app_namespace_add_del_v3 { + option deprecated; u32 client_index; u32 context; u64 secret; @@ -312,6 +321,7 @@ define app_namespace_add_del_reply */ define app_namespace_add_del_v2_reply { + option deprecated; u32 context; i32 retval; u32 appns_index; @@ -319,6 +329,7 @@ define app_namespace_add_del_v2_reply define app_namespace_add_del_v3_reply { + option deprecated; u32 context; i32 retval; u32 appns_index; diff --git a/src/vnet/session/session.c b/src/vnet/session/session.c index 3643e91a33a..67e7ee39001 100644 --- a/src/vnet/session/session.c +++ b/src/vnet/session/session.c @@ -17,6 +17,7 @@ * @brief Session and session manager */ +#include <vnet/plugin/plugin.h> #include <vnet/session/session.h> #include <vnet/session/application.h> #include <vnet/dpo/load_balance.h> @@ -59,7 +60,7 @@ session_send_evt_to_thread (void *data, void *args, u32 thread_index, evt = (session_event_t *) svm_msg_q_msg_data (mq, &msg); evt->session_index = *(u32 *) data; break; - case SESSION_IO_EVT_BUILTIN_TX: + case SESSION_IO_EVT_TX_MAIN: case SESSION_CTRL_EVT_CLOSE: case SESSION_CTRL_EVT_RESET: msg = svm_msg_q_alloc_msg_w_ring (mq, SESSION_MQ_IO_EVT_RING); @@ -96,6 +97,13 @@ session_send_io_evt_to_thread_custom (void *data, u32 thread_index, } int +session_program_tx_io_evt (session_handle_tu_t sh, session_evt_type_t evt_type) +{ + return session_send_evt_to_thread ((void *) &sh.session_index, 0, + (u32) sh.thread_index, evt_type); +} + +int session_send_ctrl_evt_to_thread (session_t * s, session_evt_type_t evt_type) { /* only events supported are disconnect, shutdown and reset */ @@ -208,7 +216,7 @@ session_alloc (u32 thread_index) clib_memset (s, 0, sizeof (*s)); s->session_index = s - wrk->sessions; s->thread_index = thread_index; - s->app_index = APP_INVALID_INDEX; + s->al_index = APP_INVALID_INDEX; return s; } @@ -216,15 +224,12 @@ session_alloc (u32 thread_index) void session_free (session_t * s) { - if (CLIB_DEBUG) - { - u8 thread_index = s->thread_index; - clib_memset (s, 0xFA, sizeof (*s)); - pool_put (session_main.wrk[thread_index].sessions, s); - return; - } + session_worker_t *wrk = &session_main.wrk[s->thread_index]; + SESSION_EVT (SESSION_EVT_FREE, s); - pool_put (session_main.wrk[s->thread_index].sessions, s); + if (CLIB_DEBUG) + clib_memset (s, 0xFA, sizeof (*s)); + pool_put (wrk->sessions, s); } u8 @@ -242,35 +247,48 @@ session_is_valid (u32 si, u8 thread_index) || s->session_state <= SESSION_STATE_LISTENING) return 1; - if (s->session_state == SESSION_STATE_CONNECTING && + if ((s->session_state == SESSION_STATE_CONNECTING || + s->session_state == SESSION_STATE_TRANSPORT_CLOSED) && (s->flags & SESSION_F_HALF_OPEN)) return 1; tc = session_get_transport (s); - if (s->connection_index != tc->c_index - || s->thread_index != tc->thread_index || tc->s_index != si) + if (s->connection_index != tc->c_index || + s->thread_index != tc->thread_index || tc->s_index != si) return 0; return 1; } +void +session_cleanup (session_t *s) +{ + segment_manager_dealloc_fifos (s->rx_fifo, s->tx_fifo); + session_free (s); +} + static void session_cleanup_notify (session_t * s, session_cleanup_ntf_t ntf) { app_worker_t *app_wrk; app_wrk = app_worker_get_if_valid (s->app_wrk_index); - if (!app_wrk) - return; + if (PREDICT_FALSE (!app_wrk)) + { + if (ntf == SESSION_CLEANUP_TRANSPORT) + return; + + session_cleanup (s); + return; + } app_worker_cleanup_notify (app_wrk, s, ntf); } void -session_free_w_fifos (session_t * s) +session_program_cleanup (session_t *s) { + ASSERT (s->session_state == SESSION_STATE_TRANSPORT_DELETED); session_cleanup_notify (s, SESSION_CLEANUP_SESSION); - segment_manager_dealloc_fifos (s->rx_fifo, s->tx_fifo); - session_free (s); } /** @@ -287,7 +305,7 @@ session_delete (session_t * s) if ((rv = session_lookup_del_session (s))) clib_warning ("session %u hash delete rv %d", s->session_index, rv); - session_free_w_fifos (s); + session_program_cleanup (s); } void @@ -302,16 +320,27 @@ session_cleanup_half_open (session_handle_t ho_handle) * session should be removed. */ if (ho->connection_index == ~0) { - ho->session_state = SESSION_STATE_CLOSED; + session_set_state (ho, SESSION_STATE_CLOSED); return; } /* Migrated transports are no longer half-opens */ transport_cleanup (session_get_transport_proto (ho), - ho->connection_index, ho->app_index /* overloaded */); + ho->connection_index, ho->al_index /* overloaded */); + } + else if (ho->session_state != SESSION_STATE_TRANSPORT_DELETED) + { + /* Cleanup half-open session lookup table if need be */ + if (ho->session_state != SESSION_STATE_TRANSPORT_CLOSED) + { + transport_connection_t *tc; + tc = transport_get_half_open (session_get_transport_proto (ho), + ho->connection_index); + if (tc && !(tc->flags & TRANSPORT_CONNECTION_F_NO_LOOKUP)) + session_lookup_del_half_open (tc); + } + transport_cleanup_half_open (session_get_transport_proto (ho), + ho->connection_index); } - else - transport_cleanup_half_open (session_get_transport_proto (ho), - ho->connection_index); session_free (ho); } @@ -320,10 +349,12 @@ session_half_open_free (session_t *ho) { app_worker_t *app_wrk; - ASSERT (vlib_get_thread_index () <= 1); - app_wrk = app_worker_get (ho->app_wrk_index); - app_worker_del_half_open (app_wrk, ho); - session_free (ho); + ASSERT (vlib_get_thread_index () <= transport_cl_thread ()); + app_wrk = app_worker_get_if_valid (ho->app_wrk_index); + if (app_wrk) + app_worker_del_half_open (app_wrk, ho); + else + session_free (ho); } static void @@ -336,16 +367,26 @@ session_half_open_free_rpc (void *args) void session_half_open_delete_notify (transport_connection_t *tc) { + session_t *ho = ho_session_get (tc->s_index); + + /* Cleanup half-open lookup table if need be */ + if (ho->session_state != SESSION_STATE_TRANSPORT_CLOSED) + { + if (!(tc->flags & TRANSPORT_CONNECTION_F_NO_LOOKUP)) + session_lookup_del_half_open (tc); + } + session_set_state (ho, SESSION_STATE_TRANSPORT_DELETED); + /* Notification from ctrl thread accepted without rpc */ - if (!tc->thread_index) + if (tc->thread_index == transport_cl_thread ()) { - session_half_open_free (ho_session_get (tc->s_index)); + session_half_open_free (ho); } else { void *args = uword_to_pointer ((uword) tc->s_index, void *); - session_send_rpc_evt_to_thread_force (0, session_half_open_free_rpc, - args); + session_send_rpc_evt_to_thread_force (transport_cl_thread (), + session_half_open_free_rpc, args); } } @@ -354,6 +395,9 @@ session_half_open_migrate_notify (transport_connection_t *tc) { session_t *ho; + /* Support half-open migrations only for transports with no lookup */ + ASSERT (tc->flags & TRANSPORT_CONNECTION_F_NO_LOOKUP); + ho = ho_session_get (tc->s_index); ho->flags |= SESSION_F_IS_MIGRATING; ho->connection_index = ~0; @@ -373,8 +417,8 @@ session_half_open_migrated_notify (transport_connection_t *tc) return -1; } ho->connection_index = tc->c_index; - /* Overload app index for half-open with new thread */ - ho->app_index = tc->thread_index; + /* Overload al_index for half-open with new thread */ + ho->al_index = tc->thread_index; return 0; } @@ -389,7 +433,7 @@ session_alloc_for_connection (transport_connection_t * tc) s = session_alloc (thread_index); s->session_type = session_type_from_proto_and_ip (tc->proto, tc->is_ip4); - s->session_state = SESSION_STATE_CLOSED; + session_set_state (s, SESSION_STATE_CLOSED); /* Attach transport to session and vice versa */ s->connection_index = tc->c_index; @@ -536,10 +580,162 @@ session_fifo_tuning (session_t * s, svm_fifo_t * f, } } +void +session_wrk_program_app_wrk_evts (session_worker_t *wrk, u32 app_wrk_index) +{ + u8 need_interrupt; + + ASSERT ((wrk - session_main.wrk) == vlib_get_thread_index ()); + need_interrupt = clib_bitmap_is_zero (wrk->app_wrks_pending_ntf); + wrk->app_wrks_pending_ntf = + clib_bitmap_set (wrk->app_wrks_pending_ntf, app_wrk_index, 1); + + if (need_interrupt) + vlib_node_set_interrupt_pending (wrk->vm, session_input_node.index); +} + +always_inline void +session_program_io_event (app_worker_t *app_wrk, session_t *s, + session_evt_type_t et, u8 is_cl) +{ + if (is_cl) + { + /* Special events for connectionless sessions */ + et += SESSION_IO_EVT_BUILTIN_RX - SESSION_IO_EVT_RX; + + ASSERT (s->thread_index == 0 || et == SESSION_IO_EVT_TX_MAIN); + session_event_t evt = { + .event_type = et, + .session_handle = session_handle (s), + }; + + app_worker_add_event_custom (app_wrk, vlib_get_thread_index (), &evt); + } + else + { + app_worker_add_event (app_wrk, s, et); + } +} + +static inline int +session_notify_subscribers (u32 app_index, session_t *s, svm_fifo_t *f, + session_evt_type_t evt_type) +{ + app_worker_t *app_wrk; + application_t *app; + u8 is_cl; + int i; + + app = application_get (app_index); + if (!app) + return -1; + + is_cl = s->thread_index != vlib_get_thread_index (); + for (i = 0; i < f->shr->n_subscribers; i++) + { + app_wrk = application_get_worker (app, f->shr->subscribers[i]); + if (!app_wrk) + continue; + session_program_io_event (app_wrk, s, evt_type, is_cl ? 1 : 0); + } + + return 0; +} + +always_inline int +session_enqueue_notify_inline (session_t *s, u8 is_cl) +{ + app_worker_t *app_wrk; + + app_wrk = app_worker_get_if_valid (s->app_wrk_index); + if (PREDICT_FALSE (!app_wrk)) + return -1; + + session_program_io_event (app_wrk, s, SESSION_IO_EVT_RX, is_cl); + + if (PREDICT_FALSE (svm_fifo_n_subscribers (s->rx_fifo))) + return session_notify_subscribers (app_wrk->app_index, s, s->rx_fifo, + SESSION_IO_EVT_RX); + + return 0; +} + +int +session_enqueue_notify (session_t *s) +{ + return session_enqueue_notify_inline (s, 0 /* is_cl */); +} + +int +session_enqueue_notify_cl (session_t *s) +{ + return session_enqueue_notify_inline (s, 1 /* is_cl */); +} + +int +session_dequeue_notify (session_t *s) +{ + app_worker_t *app_wrk; + u8 is_cl; + + /* Unset as soon as event is requested */ + svm_fifo_clear_deq_ntf (s->tx_fifo); + + app_wrk = app_worker_get_if_valid (s->app_wrk_index); + if (PREDICT_FALSE (!app_wrk)) + return -1; + + is_cl = s->session_state == SESSION_STATE_LISTENING || + s->session_state == SESSION_STATE_OPENED; + session_program_io_event (app_wrk, s, SESSION_IO_EVT_TX, is_cl ? 1 : 0); + + if (PREDICT_FALSE (svm_fifo_n_subscribers (s->tx_fifo))) + return session_notify_subscribers (app_wrk->app_index, s, s->tx_fifo, + SESSION_IO_EVT_TX); + + return 0; +} + +/** + * Flushes queue of sessions that are to be notified of new data + * enqueued events. + * + * @param transport_proto transport protocol for which queue to be flushed + * @param thread_index Thread index for which the flush is to be performed. + * @return 0 on success or a positive number indicating the number of + * failures due to API queue being full. + */ +void +session_main_flush_enqueue_events (transport_proto_t transport_proto, + u32 thread_index) +{ + session_worker_t *wrk = session_main_get_worker (thread_index); + session_handle_t *handles; + session_t *s; + u32 i, is_cl; + + handles = wrk->session_to_enqueue[transport_proto]; + + for (i = 0; i < vec_len (handles); i++) + { + s = session_get_from_handle (handles[i]); + session_fifo_tuning (s, s->rx_fifo, SESSION_FT_ACTION_ENQUEUED, + 0 /* TODO/not needed */); + is_cl = + s->thread_index != thread_index || (s->flags & SESSION_F_IS_CLESS); + if (!is_cl) + session_enqueue_notify_inline (s, 0); + else + session_enqueue_notify_inline (s, 1); + } + + vec_reset_length (handles); + wrk->session_to_enqueue[transport_proto] = handles; +} + /* - * Enqueue data for delivery to session peer. Does not notify peer of enqueue - * event but on request can queue notification events for later delivery by - * calling stream_server_flush_enqueue_events(). + * Enqueue data for delivery to app. If requested, it queues app notification + * event for later delivery. * * @param tc Transport connection which is to be enqueued data * @param b Buffer to be enqueued @@ -588,15 +784,14 @@ session_enqueue_stream_connection (transport_connection_t * tc, if (queue_event) { - /* Queue RX event on this fifo. Eventually these will need to be flushed - * by calling stream_server_flush_enqueue_events () */ - session_worker_t *wrk; - - wrk = session_main_get_worker (s->thread_index); + /* Queue RX event on this fifo. Eventually these will need to be + * flushed by calling @ref session_main_flush_enqueue_events () */ if (!(s->flags & SESSION_F_RX_EVT)) { + session_worker_t *wrk = session_main_get_worker (s->thread_index); + ASSERT (s->thread_index == vlib_get_thread_index ()); s->flags |= SESSION_F_RX_EVT; - vec_add1 (wrk->session_to_enqueue[tc->proto], s->session_index); + vec_add1 (wrk->session_to_enqueue[tc->proto], session_handle (s)); } session_fifo_tuning (s, s->rx_fifo, SESSION_FT_ACTION_ENQUEUED, 0); @@ -605,10 +800,11 @@ session_enqueue_stream_connection (transport_connection_t * tc, return enqueued; } -int -session_enqueue_dgram_connection (session_t * s, - session_dgram_hdr_t * hdr, - vlib_buffer_t * b, u8 proto, u8 queue_event) +always_inline int +session_enqueue_dgram_connection_inline (session_t *s, + session_dgram_hdr_t *hdr, + vlib_buffer_t *b, u8 proto, + u8 queue_event, u32 is_cl) { int rv; @@ -617,12 +813,10 @@ session_enqueue_dgram_connection (session_t * s, if (PREDICT_TRUE (!(b->flags & VLIB_BUFFER_NEXT_PRESENT))) { - /* *INDENT-OFF* */ svm_fifo_seg_t segs[2] = { { (u8 *) hdr, sizeof (*hdr) }, { vlib_buffer_get_current (b), b->current_length } }; - /* *INDENT-ON* */ rv = svm_fifo_enqueue_segments (s->rx_fifo, segs, 2, 0 /* allow_partial */ ); @@ -654,15 +848,16 @@ session_enqueue_dgram_connection (session_t * s, if (queue_event && rv > 0) { - /* Queue RX event on this fifo. Eventually these will need to be flushed - * by calling stream_server_flush_enqueue_events () */ - session_worker_t *wrk; - - wrk = session_main_get_worker (s->thread_index); + /* Queue RX event on this fifo. Eventually these will need to be + * flushed by calling @ref session_main_flush_enqueue_events () */ if (!(s->flags & SESSION_F_RX_EVT)) { + u32 thread_index = + is_cl ? vlib_get_thread_index () : s->thread_index; + session_worker_t *wrk = session_main_get_worker (thread_index); + ASSERT (s->thread_index == vlib_get_thread_index () || is_cl); s->flags |= SESSION_F_RX_EVT; - vec_add1 (wrk->session_to_enqueue[proto], s->session_index); + vec_add1 (wrk->session_to_enqueue[proto], session_handle (s)); } session_fifo_tuning (s, s->rx_fifo, SESSION_FT_ACTION_ENQUEUED, 0); @@ -671,6 +866,34 @@ session_enqueue_dgram_connection (session_t * s, } int +session_enqueue_dgram_connection (session_t *s, session_dgram_hdr_t *hdr, + vlib_buffer_t *b, u8 proto, u8 queue_event) +{ + return session_enqueue_dgram_connection_inline (s, hdr, b, proto, + queue_event, 0 /* is_cl */); +} + +int +session_enqueue_dgram_connection2 (session_t *s, session_dgram_hdr_t *hdr, + vlib_buffer_t *b, u8 proto, u8 queue_event) +{ + return session_enqueue_dgram_connection_inline (s, hdr, b, proto, + queue_event, 1 /* is_cl */); +} + +int +session_enqueue_dgram_connection_cl (session_t *s, session_dgram_hdr_t *hdr, + vlib_buffer_t *b, u8 proto, + u8 queue_event) +{ + session_t *awls; + + awls = app_listener_select_wrk_cl_session (s, hdr); + return session_enqueue_dgram_connection_inline (awls, hdr, b, proto, + queue_event, 1 /* is_cl */); +} + +int session_tx_fifo_peek_bytes (transport_connection_t * tc, u8 * buffer, u32 offset, u32 max_bytes) { @@ -693,187 +916,6 @@ session_tx_fifo_dequeue_drop (transport_connection_t * tc, u32 max_bytes) return rv; } -static inline int -session_notify_subscribers (u32 app_index, session_t * s, - svm_fifo_t * f, session_evt_type_t evt_type) -{ - app_worker_t *app_wrk; - application_t *app; - int i; - - app = application_get (app_index); - if (!app) - return -1; - - for (i = 0; i < f->shr->n_subscribers; i++) - { - app_wrk = application_get_worker (app, f->shr->subscribers[i]); - if (!app_wrk) - continue; - if (app_worker_lock_and_send_event (app_wrk, s, evt_type)) - return -1; - } - - return 0; -} - -/** - * Notify session peer that new data has been enqueued. - * - * @param s Stream session for which the event is to be generated. - * @param lock Flag to indicate if call should lock message queue. - * - * @return 0 on success or negative number if failed to send notification. - */ -static inline int -session_enqueue_notify_inline (session_t * s) -{ - app_worker_t *app_wrk; - u32 session_index; - u8 n_subscribers; - - session_index = s->session_index; - n_subscribers = svm_fifo_n_subscribers (s->rx_fifo); - - app_wrk = app_worker_get_if_valid (s->app_wrk_index); - if (PREDICT_FALSE (!app_wrk)) - { - SESSION_DBG ("invalid s->app_index = %d", s->app_wrk_index); - return 0; - } - - SESSION_EVT (SESSION_EVT_ENQ, s, svm_fifo_max_dequeue_prod (s->rx_fifo)); - - s->flags &= ~SESSION_F_RX_EVT; - - /* Application didn't confirm accept yet */ - if (PREDICT_FALSE (s->session_state == SESSION_STATE_ACCEPTING)) - return 0; - - if (PREDICT_FALSE (app_worker_lock_and_send_event (app_wrk, s, - SESSION_IO_EVT_RX))) - return -1; - - if (PREDICT_FALSE (n_subscribers)) - { - s = session_get (session_index, vlib_get_thread_index ()); - return session_notify_subscribers (app_wrk->app_index, s, - s->rx_fifo, SESSION_IO_EVT_RX); - } - - return 0; -} - -int -session_enqueue_notify (session_t * s) -{ - return session_enqueue_notify_inline (s); -} - -static void -session_enqueue_notify_rpc (void *arg) -{ - u32 session_index = pointer_to_uword (arg); - session_t *s; - - s = session_get_if_valid (session_index, vlib_get_thread_index ()); - if (!s) - return; - - session_enqueue_notify (s); -} - -/** - * Like session_enqueue_notify, but can be called from a thread that does not - * own the session. - */ -void -session_enqueue_notify_thread (session_handle_t sh) -{ - u32 thread_index = session_thread_from_handle (sh); - u32 session_index = session_index_from_handle (sh); - - /* - * Pass session index (u32) as opposed to handle (u64) in case pointers - * are not 64-bit. - */ - session_send_rpc_evt_to_thread (thread_index, - session_enqueue_notify_rpc, - uword_to_pointer (session_index, void *)); -} - -int -session_dequeue_notify (session_t * s) -{ - app_worker_t *app_wrk; - - svm_fifo_clear_deq_ntf (s->tx_fifo); - - app_wrk = app_worker_get_if_valid (s->app_wrk_index); - if (PREDICT_FALSE (!app_wrk)) - return -1; - - if (PREDICT_FALSE (app_worker_lock_and_send_event (app_wrk, s, - SESSION_IO_EVT_TX))) - return -1; - - if (PREDICT_FALSE (s->tx_fifo->shr->n_subscribers)) - return session_notify_subscribers (app_wrk->app_index, s, - s->tx_fifo, SESSION_IO_EVT_TX); - - return 0; -} - -/** - * Flushes queue of sessions that are to be notified of new data - * enqueued events. - * - * @param thread_index Thread index for which the flush is to be performed. - * @return 0 on success or a positive number indicating the number of - * failures due to API queue being full. - */ -int -session_main_flush_enqueue_events (u8 transport_proto, u32 thread_index) -{ - session_worker_t *wrk = session_main_get_worker (thread_index); - session_t *s; - int i, errors = 0; - u32 *indices; - - indices = wrk->session_to_enqueue[transport_proto]; - - for (i = 0; i < vec_len (indices); i++) - { - s = session_get_if_valid (indices[i], thread_index); - if (PREDICT_FALSE (!s)) - { - errors++; - continue; - } - - session_fifo_tuning (s, s->rx_fifo, SESSION_FT_ACTION_ENQUEUED, - 0 /* TODO/not needed */ ); - - if (PREDICT_FALSE (session_enqueue_notify_inline (s))) - errors++; - } - - vec_reset_length (indices); - wrk->session_to_enqueue[transport_proto] = indices; - - return errors; -} - -int -session_main_flush_all_enqueue_events (u8 transport_proto) -{ - vlib_thread_main_t *vtm = vlib_get_thread_main (); - int i, errors = 0; - for (i = 0; i < 1 + vtm->n_threads; i++) - errors += session_main_flush_enqueue_events (transport_proto, i); - return errors; -} - int session_stream_connect_notify (transport_connection_t * tc, session_error_t err) @@ -888,6 +930,7 @@ session_stream_connect_notify (transport_connection_t * tc, session_lookup_del_half_open (tc); ho = ho_session_get (tc->s_index); + session_set_state (ho, SESSION_STATE_TRANSPORT_CLOSED); opaque = ho->opaque; app_wrk = app_worker_get_if_valid (ho->app_wrk_index); if (!app_wrk) @@ -897,8 +940,9 @@ session_stream_connect_notify (transport_connection_t * tc, return app_worker_connect_notify (app_wrk, s, err, opaque); s = session_alloc_for_connection (tc); - s->session_state = SESSION_STATE_CONNECTING; + session_set_state (s, SESSION_STATE_CONNECTING); s->app_wrk_index = app_wrk->wrk_index; + s->opaque = opaque; new_si = s->session_index; new_ti = s->thread_index; @@ -910,7 +954,7 @@ session_stream_connect_notify (transport_connection_t * tc, } s = session_get (new_si, new_ti); - s->session_state = SESSION_STATE_READY; + session_set_state (s, SESSION_STATE_READY); session_lookup_add_connection (tc, session_handle (s)); if (app_worker_connect_notify (app_wrk, s, SESSION_E_NONE, opaque)) @@ -926,43 +970,20 @@ session_stream_connect_notify (transport_connection_t * tc, return 0; } -typedef union session_switch_pool_reply_args_ -{ - struct - { - u32 session_index; - u16 thread_index; - u8 is_closed; - }; - u64 as_u64; -} session_switch_pool_reply_args_t; - -STATIC_ASSERT (sizeof (session_switch_pool_reply_args_t) <= sizeof (uword), - "switch pool reply args size"); - static void -session_switch_pool_reply (void *arg) +session_switch_pool_closed_rpc (void *arg) { - session_switch_pool_reply_args_t rargs; + session_handle_t sh; session_t *s; - rargs.as_u64 = pointer_to_uword (arg); - s = session_get_if_valid (rargs.session_index, rargs.thread_index); + sh = pointer_to_uword (arg); + s = session_get_from_handle_if_valid (sh); if (!s) return; - /* Session closed during migration. Clean everything up */ - if (rargs.is_closed) - { - transport_cleanup (session_get_transport_proto (s), s->connection_index, - s->thread_index); - segment_manager_dealloc_fifos (s->rx_fifo, s->tx_fifo); - session_free (s); - return; - } - - /* Notify app that it has data on the new session */ - session_enqueue_notify (s); + transport_cleanup (session_get_transport_proto (s), s->connection_index, + s->thread_index); + session_cleanup (s); } typedef struct _session_switch_pool_args @@ -980,8 +1001,7 @@ static void session_switch_pool (void *cb_args) { session_switch_pool_args_t *args = (session_switch_pool_args_t *) cb_args; - session_switch_pool_reply_args_t rargs; - session_handle_t new_sh; + session_handle_t sh, new_sh; segment_manager_t *sm; app_worker_t *app_wrk; session_t *s; @@ -989,37 +1009,32 @@ session_switch_pool (void *cb_args) ASSERT (args->thread_index == vlib_get_thread_index ()); s = session_get (args->session_index, args->thread_index); - /* Check if session closed during migration */ - rargs.is_closed = s->session_state >= SESSION_STATE_TRANSPORT_CLOSING; + app_wrk = app_worker_get_if_valid (s->app_wrk_index); + if (!app_wrk) + goto app_closed; - transport_cleanup (session_get_transport_proto (s), s->connection_index, - s->thread_index); + /* Cleanup fifo segment slice state for fifos */ + sm = app_worker_get_connect_segment_manager (app_wrk); + segment_manager_detach_fifo (sm, &s->rx_fifo); + segment_manager_detach_fifo (sm, &s->tx_fifo); - app_wrk = app_worker_get_if_valid (s->app_wrk_index); - if (app_wrk) - { - /* Cleanup fifo segment slice state for fifos */ - sm = app_worker_get_connect_segment_manager (app_wrk); - segment_manager_detach_fifo (sm, &s->rx_fifo); - segment_manager_detach_fifo (sm, &s->tx_fifo); + /* Check if session closed during migration */ + if (s->session_state >= SESSION_STATE_TRANSPORT_CLOSING) + goto app_closed; - /* Notify app, using old session, about the migration event */ - if (!rargs.is_closed) - { - new_sh = session_make_handle (args->new_session_index, - args->new_thread_index); - app_worker_migrate_notify (app_wrk, s, new_sh); - } - } + new_sh = + session_make_handle (args->new_session_index, args->new_thread_index); + app_worker_migrate_notify (app_wrk, s, new_sh); - /* Trigger app read and fifo updates on the new thread */ - rargs.session_index = args->new_session_index; - rargs.thread_index = args->new_thread_index; - session_send_rpc_evt_to_thread (args->new_thread_index, - session_switch_pool_reply, - uword_to_pointer (rargs.as_u64, void *)); + clib_mem_free (cb_args); + return; - session_free (s); +app_closed: + /* Session closed during migration. Clean everything up */ + sh = session_handle (s); + session_send_rpc_evt_to_thread (args->new_thread_index, + session_switch_pool_closed_rpc, + uword_to_pointer (sh, void *)); clib_mem_free (cb_args); } @@ -1040,7 +1055,7 @@ session_dgram_connect_notify (transport_connection_t * tc, */ new_s = session_clone_safe (tc->s_index, old_thread_index); new_s->connection_index = tc->c_index; - new_s->session_state = SESSION_STATE_READY; + session_set_state (new_s, SESSION_STATE_READY); new_s->flags |= SESSION_F_IS_MIGRATING; if (!(tc->flags & TRANSPORT_CONNECTION_F_NO_LOOKUP)) @@ -1094,11 +1109,11 @@ session_transport_closing_notify (transport_connection_t * tc) * accept might be rejected */ if (s->session_state == SESSION_STATE_ACCEPTING) { - s->session_state = SESSION_STATE_TRANSPORT_CLOSING; + session_set_state (s, SESSION_STATE_TRANSPORT_CLOSING); return; } - s->session_state = SESSION_STATE_TRANSPORT_CLOSING; + session_set_state (s, SESSION_STATE_TRANSPORT_CLOSING); app_wrk = app_worker_get (s->app_wrk_index); app_worker_close_notify (app_wrk, s); } @@ -1139,7 +1154,7 @@ session_transport_delete_notify (transport_connection_t * tc) * because transport will soon be closed and closed sessions * are assumed to have been removed from the lookup table */ session_lookup_del_session (s); - s->session_state = SESSION_STATE_TRANSPORT_DELETED; + session_set_state (s, SESSION_STATE_TRANSPORT_DELETED); session_cleanup_notify (s, SESSION_CLEANUP_TRANSPORT); svm_fifo_dequeue_drop_all (s->tx_fifo); break; @@ -1150,7 +1165,7 @@ session_transport_delete_notify (transport_connection_t * tc) * session is just removed because both transport and app have * confirmed the close*/ session_lookup_del_session (s); - s->session_state = SESSION_STATE_TRANSPORT_DELETED; + session_set_state (s, SESSION_STATE_TRANSPORT_DELETED); session_cleanup_notify (s, SESSION_CLEANUP_TRANSPORT); svm_fifo_dequeue_drop_all (s->tx_fifo); session_program_transport_ctrl_evt (s, SESSION_CTRL_EVT_CLOSE); @@ -1159,6 +1174,7 @@ session_transport_delete_notify (transport_connection_t * tc) break; case SESSION_STATE_CLOSED: session_cleanup_notify (s, SESSION_CLEANUP_TRANSPORT); + session_set_state (s, SESSION_STATE_TRANSPORT_DELETED); session_delete (s); break; default: @@ -1186,6 +1202,9 @@ session_transport_closed_notify (transport_connection_t * tc) if (!(s = session_get_if_valid (tc->s_index, tc->thread_index))) return; + if (s->session_state >= SESSION_STATE_TRANSPORT_CLOSED) + return; + /* Transport thinks that app requested close but it actually didn't. * Can happen for tcp: * 1)if fin and rst are received in close succession. @@ -1194,17 +1213,15 @@ session_transport_closed_notify (transport_connection_t * tc) { session_transport_closing_notify (tc); svm_fifo_dequeue_drop_all (s->tx_fifo); - s->session_state = SESSION_STATE_TRANSPORT_CLOSED; + session_set_state (s, SESSION_STATE_TRANSPORT_CLOSED); } /* If app close has not been received or has not yet resulted in * a transport close, only mark the session transport as closed */ else if (s->session_state <= SESSION_STATE_CLOSING) - { - s->session_state = SESSION_STATE_TRANSPORT_CLOSED; - } + session_set_state (s, SESSION_STATE_TRANSPORT_CLOSED); /* If app also closed, switch to closed */ else if (s->session_state == SESSION_STATE_APP_CLOSED) - s->session_state = SESSION_STATE_CLOSED; + session_set_state (s, SESSION_STATE_CLOSED); app_wrk = app_worker_get_if_valid (s->app_wrk_index); if (app_wrk) @@ -1226,10 +1243,10 @@ session_transport_reset_notify (transport_connection_t * tc) return; if (s->session_state == SESSION_STATE_ACCEPTING) { - s->session_state = SESSION_STATE_TRANSPORT_CLOSING; + session_set_state (s, SESSION_STATE_TRANSPORT_CLOSING); return; } - s->session_state = SESSION_STATE_TRANSPORT_CLOSING; + session_set_state (s, SESSION_STATE_TRANSPORT_CLOSING); app_wrk = app_worker_get (s->app_wrk_index); app_worker_reset_notify (app_wrk, s); } @@ -1246,12 +1263,12 @@ session_stream_accept_notify (transport_connection_t * tc) return -1; if (s->session_state != SESSION_STATE_CREATED) return 0; - s->session_state = SESSION_STATE_ACCEPTING; + session_set_state (s, SESSION_STATE_ACCEPTING); if (app_worker_accept_notify (app_wrk, s)) { /* On transport delete, no notifications should be sent. Unless, the * accept is retried and successful. */ - s->session_state = SESSION_STATE_CREATED; + session_set_state (s, SESSION_STATE_CREATED); return -1; } return 0; @@ -1269,7 +1286,7 @@ session_stream_accept (transport_connection_t * tc, u32 listener_index, s = session_alloc_for_connection (tc); s->listener_handle = ((u64) thread_index << 32) | (u64) listener_index; - s->session_state = SESSION_STATE_CREATED; + session_set_state (s, SESSION_STATE_CREATED); if ((rv = app_worker_init_accepted (s))) { @@ -1313,7 +1330,7 @@ session_dgram_accept (transport_connection_t * tc, u32 listener_index, } session_lookup_add_connection (tc, session_handle (s)); - s->session_state = SESSION_STATE_ACCEPTING; + session_set_state (s, SESSION_STATE_ACCEPTING); app_wrk = app_worker_get (s->app_wrk_index); if ((rv = app_worker_accept_notify (app_wrk, s))) @@ -1351,7 +1368,10 @@ session_open_cl (session_endpoint_cfg_t *rmt, session_handle_t *rsh) app_wrk = app_worker_get (rmt->app_wrk_index); s = session_alloc_for_connection (tc); s->app_wrk_index = app_wrk->wrk_index; - s->session_state = SESSION_STATE_OPENED; + s->opaque = rmt->opaque; + session_set_state (s, SESSION_STATE_OPENED); + if (transport_connection_is_cless (tc)) + s->flags |= SESSION_F_IS_CLESS; if (app_worker_init_connected (app_wrk, s)) { session_free (s); @@ -1419,13 +1439,11 @@ session_open_app (session_endpoint_cfg_t *rmt, session_handle_t *rsh) typedef int (*session_open_service_fn) (session_endpoint_cfg_t *, session_handle_t *); -/* *INDENT-OFF* */ static session_open_service_fn session_open_srv_fns[TRANSPORT_N_SERVICES] = { session_open_vc, session_open_cl, session_open_app, }; -/* *INDENT-ON* */ /** * Ask transport to open connection to remote transport endpoint. @@ -1476,6 +1494,9 @@ session_listen (session_t * ls, session_endpoint_cfg_t * sep) * worker because local tables (for ct sessions) are not backed by a fib */ ls = listen_session_get (s_index); ls->connection_index = tc_index; + ls->opaque = sep->opaque; + if (transport_connection_is_cless (session_get_transport (ls))) + ls->flags |= SESSION_F_IS_CLESS; return 0; } @@ -1530,9 +1551,15 @@ session_half_close (session_t *s) void session_close (session_t * s) { - if (!s) + if (!s || (s->flags & SESSION_F_APP_CLOSED)) return; + /* Transports can close and delete their state independent of app closes + * and transport initiated state transitions can hide app closes. Instead + * of extending the state machine to support separate tracking of app and + * transport initiated closes, use a flag. */ + s->flags |= SESSION_F_APP_CLOSED; + if (s->session_state >= SESSION_STATE_CLOSING) { /* Session will only be removed once both app and transport @@ -1543,9 +1570,12 @@ session_close (session_t * s) return; } - /* App closed so stop propagating dequeue notifications */ - svm_fifo_clear_deq_ntf (s->tx_fifo); - s->session_state = SESSION_STATE_CLOSING; + /* App closed so stop propagating dequeue notifications. + * App might disconnect session before connected, in this case, + * tx_fifo may not be setup yet, so clear only it's inited. */ + if (s->tx_fifo) + svm_fifo_clear_deq_ntf (s->tx_fifo); + session_set_state (s, SESSION_STATE_CLOSING); session_program_transport_ctrl_evt (s, SESSION_CTRL_EVT_CLOSE); } @@ -1557,12 +1587,46 @@ session_reset (session_t * s) { if (s->session_state >= SESSION_STATE_CLOSING) return; - /* Drop all outstanding tx data */ - svm_fifo_dequeue_drop_all (s->tx_fifo); - s->session_state = SESSION_STATE_CLOSING; + /* Drop all outstanding tx data + * App might disconnect session before connected, in this case, + * tx_fifo may not be setup yet, so clear only it's inited. */ + if (s->tx_fifo) + svm_fifo_dequeue_drop_all (s->tx_fifo); + session_set_state (s, SESSION_STATE_CLOSING); session_program_transport_ctrl_evt (s, SESSION_CTRL_EVT_RESET); } +void +session_detach_app (session_t *s) +{ + if (s->session_state < SESSION_STATE_TRANSPORT_CLOSING) + { + session_close (s); + } + else if (s->session_state < SESSION_STATE_TRANSPORT_DELETED) + { + transport_connection_t *tc; + + /* Transport is closing but it's not yet deleted. Confirm close and + * subsequently detach transport from session and enqueue a session + * cleanup notification. Transport closed and cleanup notifications are + * going to be dropped by session layer apis */ + transport_close (session_get_transport_proto (s), s->connection_index, + s->thread_index); + tc = session_get_transport (s); + tc->s_index = SESSION_INVALID_INDEX; + session_set_state (s, SESSION_STATE_TRANSPORT_DELETED); + session_cleanup_notify (s, SESSION_CLEANUP_SESSION); + } + else + { + session_cleanup_notify (s, SESSION_CLEANUP_SESSION); + } + + s->flags |= SESSION_F_APP_CLOSED; + s->app_wrk_index = APP_INVALID_INDEX; +} + /** * Notify transport the session can be half-disconnected. * @@ -1594,10 +1658,10 @@ session_transport_close (session_t * s) if (s->session_state >= SESSION_STATE_APP_CLOSED) { if (s->session_state == SESSION_STATE_TRANSPORT_CLOSED) - s->session_state = SESSION_STATE_CLOSED; + session_set_state (s, SESSION_STATE_CLOSED); /* If transport is already deleted, just free the session */ else if (s->session_state >= SESSION_STATE_TRANSPORT_DELETED) - session_free_w_fifos (s); + session_program_cleanup (s); return; } @@ -1607,7 +1671,7 @@ session_transport_close (session_t * s) * delete notify. This will finally lead to the complete cleanup of the * session. */ - s->session_state = SESSION_STATE_APP_CLOSED; + session_set_state (s, SESSION_STATE_APP_CLOSED); transport_close (session_get_transport_proto (s), s->connection_index, s->thread_index); @@ -1622,13 +1686,13 @@ session_transport_reset (session_t * s) if (s->session_state >= SESSION_STATE_APP_CLOSED) { if (s->session_state == SESSION_STATE_TRANSPORT_CLOSED) - s->session_state = SESSION_STATE_CLOSED; + session_set_state (s, SESSION_STATE_CLOSED); else if (s->session_state >= SESSION_STATE_TRANSPORT_DELETED) - session_free_w_fifos (s); + session_program_cleanup (s); return; } - s->session_state = SESSION_STATE_APP_CLOSED; + session_set_state (s, SESSION_STATE_APP_CLOSED); transport_reset (session_get_transport_proto (s), s->connection_index, s->thread_index); } @@ -1727,14 +1791,28 @@ session_segment_handle (session_t * s) f->segment_index); } -/* *INDENT-OFF* */ +void +session_get_original_dst (transport_endpoint_t *i2o_src, + transport_endpoint_t *i2o_dst, + transport_proto_t transport_proto, u32 *original_dst, + u16 *original_dst_port) +{ + session_main_t *smm = vnet_get_session_main (); + ip_protocol_t proto = + (transport_proto == TRANSPORT_PROTO_TCP ? IPPROTO_TCP : IPPROTO_UDP); + if (!smm->original_dst_lookup || !i2o_dst->is_ip4) + return; + smm->original_dst_lookup (&i2o_src->ip.ip4, i2o_src->port, &i2o_dst->ip.ip4, + i2o_dst->port, proto, original_dst, + original_dst_port); +} + static session_fifo_rx_fn *session_tx_fns[TRANSPORT_TX_N_FNS] = { session_tx_fifo_peek_and_snd, session_tx_fifo_dequeue_and_snd, session_tx_fifo_dequeue_internal, session_tx_fifo_dequeue_and_snd }; -/* *INDENT-ON* */ void session_register_transport (transport_proto_t transport_proto, @@ -2013,6 +2091,7 @@ session_dma_completion_cb (vlib_main_t *vm, struct vlib_dma_batch *batch) static void session_prepare_dma_args (vlib_dma_config_t *args) { + args->max_batches = 16; args->max_transfers = DMA_TRANS_SIZE; args->max_transfer_size = 65536; args->features = 0; @@ -2104,6 +2183,7 @@ session_node_enable_disable (u8 is_en) if (!sm->poll_main) continue; } + vlib_node_set_state (vm, session_input_node.index, mstate); vlib_node_set_state (vm, session_queue_node.index, state); } @@ -2147,6 +2227,8 @@ session_main_init (vlib_main_t * vm) smm->use_private_rx_mqs = 0; smm->no_adaptive = 0; smm->last_transport_proto_type = TRANSPORT_PROTO_HTTP; + smm->port_allocator_min_src_port = 1024; + smm->port_allocator_max_src_port = 65535; return 0; } @@ -2244,6 +2326,10 @@ session_config_fn (vlib_main_t * vm, unformat_input_t * input) else if (unformat (input, "local-endpoints-table-buckets %d", &smm->local_endpoints_table_buckets)) ; + else if (unformat (input, "min-src-port %d", &tmp)) + smm->port_allocator_min_src_port = tmp; + else if (unformat (input, "max-src-port %d", &tmp)) + smm->port_allocator_max_src_port = tmp; else if (unformat (input, "enable")) smm->session_enable_asap = 1; else if (unformat (input, "use-app-socket-api")) @@ -2256,6 +2342,11 @@ session_config_fn (vlib_main_t * vm, unformat_input_t * input) smm->no_adaptive = 1; else if (unformat (input, "use-dma")) smm->dma_enabled = 1; + else if (unformat (input, "nat44-original-dst-enable")) + { + smm->original_dst_lookup = vlib_get_plugin_symbol ( + "nat_plugin.so", "nat44_original_dst_lookup"); + } /* * Deprecated but maintained for compatibility */ diff --git a/src/vnet/session/session.h b/src/vnet/session/session.h index a68e51239bd..a5604bf8725 100644 --- a/src/vnet/session/session.h +++ b/src/vnet/session/session.h @@ -23,22 +23,10 @@ #include <svm/fifo_segment.h> #include <vlib/dma/dma.h> -#define foreach_session_input_error \ -_(NO_SESSION, "No session drops") \ -_(NO_LISTENER, "No listener for dst port drops") \ -_(ENQUEUED, "Packets pushed into rx fifo") \ -_(NOT_READY, "Session not ready packets") \ -_(FIFO_FULL, "Packets dropped for lack of rx fifo space") \ -_(EVENT_FIFO_FULL, "Events not sent for lack of event fifo space") \ -_(API_QUEUE_FULL, "Sessions not created for lack of API queue space") \ - -typedef enum -{ -#define _(sym,str) SESSION_ERROR_##sym, - foreach_session_input_error -#undef _ - SESSION_N_ERROR, -} session_input_error_t; +typedef struct session_wrk_stats_ +{ + u32 errors[SESSION_N_ERRORS]; +} session_wrk_stats_t; typedef struct session_tx_context_ { @@ -112,8 +100,8 @@ typedef struct session_worker_ /** Convenience pointer to this worker's vlib_main */ vlib_main_t *vm; - /** Per-proto vector of sessions to enqueue */ - u32 **session_to_enqueue; + /** Per-proto vector of session handles to enqueue */ + session_handle_t **session_to_enqueue; /** Timerfd used to periodically signal wrk session queue node */ int timerfd; @@ -157,12 +145,12 @@ typedef struct session_worker_ /** Flag that is set if main thread signaled to handle connects */ u32 n_pending_connects; - /** Main thread loops in poll mode without a connect */ - u32 no_connect_loops; - /** List head for first worker evts pending handling on main */ clib_llist_index_t evts_pending_main; + /** Per-app-worker bitmap of pending notifications */ + uword *app_wrks_pending_ntf; + int config_index; u8 dma_enabled; session_dma_transfer *dma_trans; @@ -172,6 +160,8 @@ typedef struct session_worker_ u16 batch_num; vlib_dma_batch_t *batch; + session_wrk_stats_t stats; + #if SESSION_DEBUG /** last event poll time by thread */ clib_time_type_t last_event_poll; @@ -189,6 +179,10 @@ extern session_fifo_rx_fn session_tx_fifo_dequeue_internal; u8 session_node_lookup_fifo_event (svm_fifo_t * f, session_event_t * e); typedef void (*session_update_time_fn) (f64 time_now, u8 thread_index); +typedef void (*nat44_original_dst_lookup_fn) ( + ip4_address_t *i2o_src, u16 i2o_src_port, ip4_address_t *i2o_dst, + u16 i2o_dst_port, ip_protocol_t proto, u32 *original_dst, + u16 *original_dst_port); typedef struct session_main_ { @@ -212,7 +206,9 @@ typedef struct session_main_ * Trade memory for speed, for now */ u32 *session_type_to_next; - /** Thread for cl and ho that rely on cl allocs */ + /** Thread used for allocating active open connections, i.e., half-opens + * for transports like tcp, and sessions that will be migrated for cl + * transports like udp. If vpp has workers, this will be first worker. */ u32 transport_cl_thread; transport_proto_t last_transport_proto_type; @@ -271,14 +267,22 @@ typedef struct session_main_ u32 local_endpoints_table_memory; u32 local_endpoints_table_buckets; + /** Transport source port allocation range */ + u16 port_allocator_min_src_port; + u16 port_allocator_max_src_port; + /** Preallocate session config parameter */ u32 preallocated_sessions; u16 msg_id_base; + + /** Query nat44-ed session to get original dst ip4 & dst port. */ + nat44_original_dst_lookup_fn original_dst_lookup; } session_main_t; extern session_main_t session_main; extern vlib_node_registration_t session_queue_node; +extern vlib_node_registration_t session_input_node; extern vlib_node_registration_t session_queue_process_node; extern vlib_node_registration_t session_queue_pre_input_node; @@ -334,7 +338,7 @@ session_evt_ctrl_data (session_worker_t * wrk, session_evt_elt_t * elt) static inline void session_evt_ctrl_data_free (session_worker_t * wrk, session_evt_elt_t * elt) { - ASSERT (elt->evt.event_type > SESSION_IO_EVT_BUILTIN_TX); + ASSERT (elt->evt.event_type >= SESSION_CTRL_EVT_RPC); pool_put_index (wrk->ctrl_evts_data, elt->evt.ctrl_data_index); } @@ -362,7 +366,8 @@ int session_wrk_handle_mq (session_worker_t *wrk, svm_msg_q_t *mq); session_t *session_alloc (u32 thread_index); void session_free (session_t * s); -void session_free_w_fifos (session_t * s); +void session_cleanup (session_t *s); +void session_program_cleanup (session_t *s); void session_cleanup_half_open (session_handle_t ho_handle); u8 session_is_valid (u32 si, u8 thread_index); @@ -387,44 +392,37 @@ session_get_if_valid (u64 si, u32 thread_index) } always_inline session_t * -session_get_from_handle (session_handle_t handle) +session_get_from_handle (session_handle_tu_t handle) { session_main_t *smm = &session_main; - u32 session_index, thread_index; - session_parse_handle (handle, &session_index, &thread_index); - return pool_elt_at_index (smm->wrk[thread_index].sessions, session_index); + return pool_elt_at_index (smm->wrk[handle.thread_index].sessions, + handle.session_index); } always_inline session_t * -session_get_from_handle_if_valid (session_handle_t handle) +session_get_from_handle_if_valid (session_handle_tu_t handle) { - u32 session_index, thread_index; - session_parse_handle (handle, &session_index, &thread_index); - return session_get_if_valid (session_index, thread_index); + return session_get_if_valid (handle.session_index, handle.thread_index); } -u64 session_segment_handle (session_t * s); - /** * Get session from handle and avoid pool validation if no same thread * * Peekers are fine because pool grows with barrier (see @ref session_alloc) */ always_inline session_t * -session_get_from_handle_safe (u64 handle) +session_get_from_handle_safe (session_handle_tu_t handle) { - u32 thread_index = session_thread_from_handle (handle); - session_worker_t *wrk = &session_main.wrk[thread_index]; + session_worker_t *wrk = &session_main.wrk[handle.thread_index]; - if (thread_index == vlib_get_thread_index ()) + if (handle.thread_index == vlib_get_thread_index ()) { - return pool_elt_at_index (wrk->sessions, - session_index_from_handle (handle)); + return pool_elt_at_index (wrk->sessions, handle.session_index); } else { /* Don't use pool_elt_at index to avoid pool bitmap reallocs */ - return wrk->sessions + session_index_from_handle (handle); + return wrk->sessions + handle.session_index; } } @@ -450,16 +448,19 @@ int session_stop_listen (session_t * s); void session_half_close (session_t *s); void session_close (session_t * s); void session_reset (session_t * s); +void session_detach_app (session_t *s); void session_transport_half_close (session_t *s); void session_transport_close (session_t * s); void session_transport_reset (session_t * s); void session_transport_cleanup (session_t * s); -int session_send_io_evt_to_thread (svm_fifo_t * f, - session_evt_type_t evt_type); -int session_enqueue_notify (session_t * s); +int session_enqueue_notify (session_t *s); int session_dequeue_notify (session_t * s); +int session_enqueue_notify_cl (session_t *s); +int session_send_io_evt_to_thread (svm_fifo_t *f, session_evt_type_t evt_type); int session_send_io_evt_to_thread_custom (void *data, u32 thread_index, session_evt_type_t evt_type); +int session_program_tx_io_evt (session_handle_tu_t sh, + session_evt_type_t evt_type); void session_send_rpc_evt_to_thread (u32 thread_index, void *fp, void *rpc_args); void session_send_rpc_evt_to_thread_force (u32 thread_index, void *fp, @@ -472,6 +473,7 @@ void session_get_endpoint (session_t * s, transport_endpoint_t * tep, u8 is_lcl); int session_transport_attribute (session_t *s, u8 is_get, transport_endpt_attr_t *attr); +u64 session_segment_handle (session_t *s); u8 *format_session (u8 * s, va_list * args); uword unformat_session (unformat_input_t * input, va_list * args); @@ -489,6 +491,13 @@ int session_enqueue_dgram_connection (session_t * s, session_dgram_hdr_t * hdr, vlib_buffer_t * b, u8 proto, u8 queue_event); +int session_enqueue_dgram_connection2 (session_t *s, session_dgram_hdr_t *hdr, + vlib_buffer_t *b, u8 proto, + u8 queue_event); +int session_enqueue_dgram_connection_cl (session_t *s, + session_dgram_hdr_t *hdr, + vlib_buffer_t *b, u8 proto, + u8 queue_event); int session_stream_connect_notify (transport_connection_t * tc, session_error_t err); int session_dgram_connect_notify (transport_connection_t * tc, @@ -506,6 +515,7 @@ int session_stream_accept (transport_connection_t * tc, u32 listener_index, u32 thread_index, u8 notify); int session_dgram_accept (transport_connection_t * tc, u32 listener_index, u32 thread_index); + /** * Initialize session layer for given transport proto and ip version * @@ -527,6 +537,13 @@ int session_tx_fifo_peek_bytes (transport_connection_t * tc, u8 * buffer, u32 offset, u32 max_bytes); u32 session_tx_fifo_dequeue_drop (transport_connection_t * tc, u32 max_bytes); +always_inline void +session_set_state (session_t *s, session_state_t session_state) +{ + s->session_state = session_state; + SESSION_EVT (SESSION_EVT_STATE_CHANGE, s); +} + always_inline u32 transport_max_rx_enqueue (transport_connection_t * tc) { @@ -569,6 +586,19 @@ transport_rx_fifo_has_ooo_data (transport_connection_t * tc) return svm_fifo_has_ooo_data (s->rx_fifo); } +always_inline u32 +transport_tx_fifo_has_dgram (transport_connection_t *tc) +{ + session_t *s = session_get (tc->s_index, tc->thread_index); + u32 max_deq = svm_fifo_max_dequeue_cons (s->tx_fifo); + session_dgram_pre_hdr_t phdr; + + if (max_deq <= sizeof (session_dgram_hdr_t)) + return 0; + svm_fifo_peek (s->tx_fifo, 0, sizeof (phdr), (u8 *) &phdr); + return max_deq >= phdr.data_length + sizeof (session_dgram_hdr_t); +} + always_inline void transport_rx_fifo_req_deq_ntf (transport_connection_t *tc) { @@ -609,12 +639,19 @@ transport_cl_thread (void) return session_main.transport_cl_thread; } +always_inline u32 +session_vlib_thread_is_cl_thread (void) +{ + return (vlib_get_thread_index () == transport_cl_thread () || + vlib_thread_is_main_w_barrier ()); +} + /* * Listen sessions */ -always_inline u64 -listen_session_get_handle (session_t * s) +always_inline session_handle_t +listen_session_get_handle (session_t *s) { ASSERT (s->session_state == SESSION_STATE_LISTENING || session_get_transport_proto (s) == TRANSPORT_PROTO_QUIC); @@ -661,28 +698,17 @@ always_inline session_t * ho_session_alloc (void) { session_t *s; - ASSERT (vlib_get_thread_index () == 0); - s = session_alloc (0); + ASSERT (session_vlib_thread_is_cl_thread ()); + s = session_alloc (transport_cl_thread ()); s->session_state = SESSION_STATE_CONNECTING; s->flags |= SESSION_F_HALF_OPEN; - /* Not ideal. Half-opens are only allocated from main with worker barrier - * but can be cleaned up, i.e., session_half_open_free, from main without - * a barrier. In debug images, the free_bitmap can grow while workers peek - * the sessions pool, e.g., session_half_open_migrate_notify, and as a - * result crash while validating the session. To avoid this, grow the bitmap - * now. */ - if (CLIB_DEBUG) - { - session_t *sp = session_main.wrk[0].sessions; - clib_bitmap_validate (pool_header (sp)->free_bitmap, s->session_index); - } return s; } always_inline session_t * ho_session_get (u32 ho_index) { - return session_get (ho_index, 0 /* half-open thread */); + return session_get (ho_index, transport_cl_thread ()); } always_inline void @@ -707,7 +733,7 @@ vnet_get_session_main () always_inline session_worker_t * session_main_get_worker (u32 thread_index) { - return &session_main.wrk[thread_index]; + return vec_elt_at_index (session_main.wrk, thread_index); } static inline session_worker_t * @@ -715,13 +741,13 @@ session_main_get_worker_if_valid (u32 thread_index) { if (thread_index > vec_len (session_main.wrk)) return 0; - return &session_main.wrk[thread_index]; + return session_main_get_worker (thread_index); } always_inline svm_msg_q_t * session_main_get_vpp_event_queue (u32 thread_index) { - return session_main.wrk[thread_index].vpp_event_queue; + return session_main_get_worker (thread_index)->vpp_event_queue; } always_inline u8 @@ -730,14 +756,31 @@ session_main_is_enabled () return session_main.is_enabled == 1; } +always_inline void +session_worker_stat_error_inc (session_worker_t *wrk, int error, int value) +{ + if ((-(error) >= 0 && -(error) < SESSION_N_ERRORS)) + wrk->stats.errors[-error] += value; + else + SESSION_DBG ("unknown session counter"); +} + +always_inline void +session_stat_error_inc (int error, int value) +{ + session_worker_t *wrk; + wrk = session_main_get_worker (vlib_get_thread_index ()); + session_worker_stat_error_inc (wrk, error, value); +} + #define session_cli_return_if_not_enabled() \ do { \ if (!session_main.is_enabled) \ return clib_error_return (0, "session layer is not enabled"); \ } while (0) -int session_main_flush_enqueue_events (u8 proto, u32 thread_index); -int session_main_flush_all_enqueue_events (u8 transport_proto); +void session_main_flush_enqueue_events (transport_proto_t transport_proto, + u32 thread_index); void session_queue_run_on_main_thread (vlib_main_t * vm); /** @@ -769,10 +812,16 @@ void session_wrk_enable_adaptive_mode (session_worker_t *wrk); fifo_segment_t *session_main_get_wrk_mqs_segment (void); void session_node_enable_disable (u8 is_en); clib_error_t *vnet_session_enable_disable (vlib_main_t * vm, u8 is_en); -void session_wrk_handle_evts_main_rpc (); +void session_wrk_handle_evts_main_rpc (void *); +void session_wrk_program_app_wrk_evts (session_worker_t *wrk, + u32 app_wrk_index); session_t *session_alloc_for_connection (transport_connection_t * tc); session_t *session_alloc_for_half_open (transport_connection_t *tc); +void session_get_original_dst (transport_endpoint_t *i2o_src, + transport_endpoint_t *i2o_dst, + transport_proto_t transport_proto, + u32 *original_dst, u16 *original_dst_port); typedef void (pool_safe_realloc_rpc_fn) (void *rpc_args); @@ -813,8 +862,7 @@ pool_program_safe_realloc_rpc (void *args) { max_elts = _vec_max_len (*pra->pool, pra->elt_size); n_alloc = clib_max (2 * max_elts, POOL_REALLOC_SAFE_ELT_THRESH); - _pool_alloc (pra->pool, free_elts + n_alloc, pra->align, 0, - pra->elt_size); + _pool_alloc (pra->pool, n_alloc, pra->align, 0, pra->elt_size); } pool_realloc_flag (*pra->pool) = 0; clib_mem_free (args); diff --git a/src/vnet/session/session_api.c b/src/vnet/session/session_api.c index 2502ef6a70a..48eb932a2c9 100644 --- a/src/vnet/session/session_api.c +++ b/src/vnet/session/session_api.c @@ -136,6 +136,13 @@ mq_send_session_accepted_cb (session_t * s) m.mq_index = s->thread_index; } + if (application_original_dst_is_enabled (app)) + { + session_get_original_dst (&m.lcl, &m.rmt, + session_get_transport_proto (s), + &m.original_dst_ip4, &m.original_dst_port); + } + app_wrk_send_ctrl_evt (app_wrk, SESSION_CTRL_EVT_ACCEPTED, &m, sizeof (m)); return 0; @@ -205,7 +212,6 @@ mq_send_session_connected_cb (u32 app_wrk_index, u32 api_context, session_t * s, session_error_t err) { session_connected_msg_t m = { 0 }; - transport_connection_t *tc; fifo_segment_t *eq_seg; app_worker_t *app_wrk; application_t *app; @@ -223,14 +229,6 @@ mq_send_session_connected_cb (u32 app_wrk_index, u32 api_context, if (session_has_transport (s)) { - tc = session_get_transport (s); - if (!tc) - { - clib_warning ("failed to retrieve transport!"); - m.retval = SESSION_E_REFUSED; - goto snd_msg; - } - m.handle = session_handle (s); m.vpp_event_queue_address = fifo_segment_msg_q_offset (eq_seg, s->thread_index); @@ -245,7 +243,6 @@ mq_send_session_connected_cb (u32 app_wrk_index, u32 api_context, else { ct_connection_t *cct; - session_t *ss; cct = (ct_connection_t *) session_get_transport (s); m.handle = session_handle (s); @@ -256,11 +253,10 @@ mq_send_session_connected_cb (u32 app_wrk_index, u32 api_context, m.server_rx_fifo = fifo_segment_fifo_offset (s->rx_fifo); m.server_tx_fifo = fifo_segment_fifo_offset (s->tx_fifo); m.segment_handle = session_segment_handle (s); - ss = ct_session_get_peer (s); - m.ct_rx_fifo = fifo_segment_fifo_offset (ss->tx_fifo); - m.ct_tx_fifo = fifo_segment_fifo_offset (ss->rx_fifo); - m.ct_segment_handle = session_segment_handle (ss); m.mq_index = s->thread_index; + m.ct_rx_fifo = fifo_segment_fifo_offset (cct->client_rx_fifo); + m.ct_tx_fifo = fifo_segment_fifo_offset (cct->client_tx_fifo); + m.ct_segment_handle = cct->segment_handle; } /* Setup client session index in advance, in case data arrives @@ -275,12 +271,12 @@ snd_msg: return 0; } -int +static int mq_send_session_bound_cb (u32 app_wrk_index, u32 api_context, session_handle_t handle, int rv) { session_bound_msg_t m = { 0 }; - transport_endpoint_t tep; + transport_connection_t *ltc; fifo_segment_t *eq_seg; app_worker_t *app_wrk; application_t *app; @@ -302,21 +298,24 @@ mq_send_session_bound_cb (u32 app_wrk_index, u32 api_context, else ls = app_listener_get_local_session (al); - session_get_endpoint (ls, &tep, 1 /* is_lcl */); - m.lcl_port = tep.port; - m.lcl_is_ip4 = tep.is_ip4; - clib_memcpy_fast (m.lcl_ip, &tep.ip, sizeof (tep.ip)); + ltc = session_get_transport (ls); + m.lcl_port = ltc->lcl_port; + m.lcl_is_ip4 = ltc->is_ip4; + clib_memcpy_fast (m.lcl_ip, <c->lcl_ip, sizeof (m.lcl_ip)); app = application_get (app_wrk->app_index); eq_seg = application_get_rx_mqs_segment (app); m.vpp_evt_q = fifo_segment_msg_q_offset (eq_seg, ls->thread_index); m.mq_index = ls->thread_index; - if (session_transport_service_type (ls) == TRANSPORT_SERVICE_CL && - ls->rx_fifo) + if (transport_connection_is_cless (ltc)) { - m.rx_fifo = fifo_segment_fifo_offset (ls->rx_fifo); - m.tx_fifo = fifo_segment_fifo_offset (ls->tx_fifo); - m.segment_handle = session_segment_handle (ls); + session_t *wrk_ls; + m.mq_index = transport_cl_thread (); + m.vpp_evt_q = fifo_segment_msg_q_offset (eq_seg, m.mq_index); + wrk_ls = app_listener_get_wrk_cl_session (al, app_wrk->wrk_map_index); + m.rx_fifo = fifo_segment_fifo_offset (wrk_ls->rx_fifo); + m.tx_fifo = fifo_segment_fifo_offset (wrk_ls->tx_fifo); + m.segment_handle = session_segment_handle (wrk_ls); } snd_msg: @@ -326,11 +325,14 @@ snd_msg: return 0; } -void -mq_send_unlisten_reply (app_worker_t * app_wrk, session_handle_t sh, - u32 context, int rv) +static void +mq_send_unlisten_cb (u32 app_wrk_index, session_handle_t sh, u32 context, + int rv) { session_unlisten_reply_msg_t m = { 0 }; + app_worker_t *app_wrk; + + app_wrk = app_worker_get (app_wrk_index); m.context = context; m.handle = sh; @@ -451,6 +453,52 @@ mq_send_session_cleanup_cb (session_t * s, session_cleanup_ntf_t ntf) app_wrk_send_ctrl_evt (app_wrk, SESSION_CTRL_EVT_CLEANUP, &m, sizeof (m)); } +static int +mq_send_io_rx_event (session_t *s) +{ + session_event_t *mq_evt; + svm_msg_q_msg_t mq_msg; + app_worker_t *app_wrk; + svm_msg_q_t *mq; + + if (svm_fifo_has_event (s->rx_fifo)) + return 0; + + app_wrk = app_worker_get (s->app_wrk_index); + mq = app_wrk->event_queue; + + mq_msg = svm_msg_q_alloc_msg_w_ring (mq, SESSION_MQ_IO_EVT_RING); + mq_evt = svm_msg_q_msg_data (mq, &mq_msg); + + mq_evt->event_type = SESSION_IO_EVT_RX; + mq_evt->session_index = s->rx_fifo->shr->client_session_index; + + (void) svm_fifo_set_event (s->rx_fifo); + + svm_msg_q_add_raw (mq, &mq_msg); + + return 0; +} + +static int +mq_send_io_tx_event (session_t *s) +{ + app_worker_t *app_wrk = app_worker_get (s->app_wrk_index); + svm_msg_q_t *mq = app_wrk->event_queue; + session_event_t *mq_evt; + svm_msg_q_msg_t mq_msg; + + mq_msg = svm_msg_q_alloc_msg_w_ring (mq, SESSION_MQ_IO_EVT_RING); + mq_evt = svm_msg_q_msg_data (mq, &mq_msg); + + mq_evt->event_type = SESSION_IO_EVT_TX; + mq_evt->session_index = s->tx_fifo->shr->client_session_index; + + svm_msg_q_add_raw (mq, &mq_msg); + + return 0; +} + static session_cb_vft_t session_mq_cb_vft = { .session_accept_callback = mq_send_session_accepted_cb, .session_disconnect_callback = mq_send_session_disconnected_cb, @@ -458,8 +506,12 @@ static session_cb_vft_t session_mq_cb_vft = { .session_reset_callback = mq_send_session_reset_cb, .session_migrate_callback = mq_send_session_migrate_cb, .session_cleanup_callback = mq_send_session_cleanup_cb, + .session_listened_callback = mq_send_session_bound_cb, + .session_unlistened_callback = mq_send_unlisten_cb, .add_segment_callback = mq_send_add_segment_cb, .del_segment_callback = mq_send_del_segment_cb, + .builtin_app_rx_callback = mq_send_io_rx_event, + .builtin_app_tx_callback = mq_send_io_tx_event, }; static void @@ -525,7 +577,8 @@ vl_api_app_attach_t_handler (vl_api_app_attach_t * mp) if ((rv = vnet_application_attach (a))) { - clib_warning ("attach returned: %d", rv); + clib_warning ("attach returned: %U", format_session_error, rv); + rv = VNET_API_ERROR_UNSPECIFIED; vec_free (a->namespace_id); goto done; } @@ -567,7 +620,6 @@ vl_api_app_attach_t_handler (vl_api_app_attach_t * mp) } done: - /* *INDENT-OFF* */ REPLY_MACRO3 ( VL_API_APP_ATTACH_REPLY, ((!rv) ? vec_len (((fifo_segment_t *) a->segment)->ssvm.name) : 0), ({ @@ -590,7 +642,6 @@ done: rmp->segment_handle = clib_host_to_net_u64 (a->segment_handle); } })); - /* *INDENT-ON* */ if (n_fds) session_send_fds (reg, fds, n_fds); @@ -632,7 +683,9 @@ vl_api_app_worker_add_del_t_handler (vl_api_app_worker_add_del_t * mp) rv = vnet_app_worker_add_del (&args); if (rv) { - clib_warning ("app worker add/del returned: %d", rv); + clib_warning ("app worker add/del returned: %U", format_session_error, + rv); + rv = VNET_API_ERROR_UNSPECIFIED; goto done; } @@ -653,16 +706,16 @@ vl_api_app_worker_add_del_t_handler (vl_api_app_worker_add_del_t * mp) n_fds += 1; } - /* *INDENT-OFF* */ done: REPLY_MACRO3 ( VL_API_APP_WORKER_ADD_DEL_REPLY, ((!rv && mp->is_add) ? vec_len (args.segment->name) : 0), ({ rmp->is_add = mp->is_add; - rmp->wrk_index = clib_host_to_net_u32 (args.wrk_map_index); - rmp->segment_handle = clib_host_to_net_u64 (args.segment_handle); + rmp->wrk_index = mp->wrk_index; if (!rv && mp->is_add) { + rmp->wrk_index = clib_host_to_net_u32 (args.wrk_map_index); + rmp->segment_handle = clib_host_to_net_u64 (args.segment_handle); rmp->app_event_queue_address = fifo_segment_msg_q_offset ((fifo_segment_t *) args.segment, 0); rmp->n_fds = n_fds; @@ -674,7 +727,6 @@ done: } } })); - /* *INDENT-ON* */ if (n_fds) session_send_fds (reg, fds, n_fds); @@ -700,6 +752,12 @@ vl_api_application_detach_t_handler (vl_api_application_detach_t * mp) a->app_index = app->app_index; a->api_client_index = mp->client_index; rv = vnet_application_detach (a); + if (rv) + { + clib_warning ("vnet_application_detach: %U", format_session_error, + rv); + rv = VNET_API_ERROR_UNSPECIFIED; + } } done: @@ -723,7 +781,6 @@ vl_api_app_namespace_add_del_t_handler (vl_api_app_namespace_add_del_t * mp) vnet_app_namespace_add_del_args_t args = { .ns_id = ns_id, - .netns = 0, .sock_name = 0, .secret = clib_net_to_host_u64 (mp->secret), .sw_if_index = clib_net_to_host_u32 (mp->sw_if_index), @@ -743,13 +800,11 @@ vl_api_app_namespace_add_del_t_handler (vl_api_app_namespace_add_del_t * mp) } vec_free (ns_id); - /* *INDENT-OFF* */ done: REPLY_MACRO2 (VL_API_APP_NAMESPACE_ADD_DEL_REPLY, ({ if (!rv) rmp->appns_index = clib_host_to_net_u32 (appns_index); })); - /* *INDENT-ON* */ } static void @@ -757,7 +812,7 @@ vl_api_app_namespace_add_del_v2_t_handler ( vl_api_app_namespace_add_del_v2_t *mp) { vl_api_app_namespace_add_del_v2_reply_t *rmp; - u8 *ns_id = 0, *netns = 0; + u8 *ns_id = 0; u32 appns_index = 0; int rv = 0; @@ -768,13 +823,10 @@ vl_api_app_namespace_add_del_v2_t_handler ( } mp->namespace_id[sizeof (mp->namespace_id) - 1] = 0; - mp->netns[sizeof (mp->netns) - 1] = 0; ns_id = format (0, "%s", &mp->namespace_id); - netns = format (0, "%s", &mp->netns); vnet_app_namespace_add_del_args_t args = { .ns_id = ns_id, - .netns = netns, .sock_name = 0, .secret = clib_net_to_host_u64 (mp->secret), .sw_if_index = clib_net_to_host_u32 (mp->sw_if_index), @@ -793,7 +845,6 @@ vl_api_app_namespace_add_del_v2_t_handler ( } } vec_free (ns_id); - vec_free (netns); done: REPLY_MACRO2 (VL_API_APP_NAMESPACE_ADD_DEL_V2_REPLY, ({ @@ -803,11 +854,55 @@ done: } static void +vl_api_app_namespace_add_del_v4_t_handler ( + vl_api_app_namespace_add_del_v4_t *mp) +{ + vl_api_app_namespace_add_del_v4_reply_t *rmp; + u8 *ns_id = 0, *sock_name = 0; + u32 appns_index = 0; + int rv = 0; + if (session_main_is_enabled () == 0) + { + rv = VNET_API_ERROR_FEATURE_DISABLED; + goto done; + } + mp->namespace_id[sizeof (mp->namespace_id) - 1] = 0; + ns_id = format (0, "%s", &mp->namespace_id); + sock_name = vl_api_from_api_to_new_vec (mp, &mp->sock_name); + vnet_app_namespace_add_del_args_t args = { + .ns_id = ns_id, + .sock_name = sock_name, + .secret = clib_net_to_host_u64 (mp->secret), + .sw_if_index = clib_net_to_host_u32 (mp->sw_if_index), + .ip4_fib_id = clib_net_to_host_u32 (mp->ip4_fib_id), + .ip6_fib_id = clib_net_to_host_u32 (mp->ip6_fib_id), + .is_add = mp->is_add, + }; + rv = vnet_app_namespace_add_del (&args); + if (!rv && mp->is_add) + { + appns_index = app_namespace_index_from_id (ns_id); + if (appns_index == APP_NAMESPACE_INVALID_INDEX) + { + clib_warning ("app ns lookup failed id:%s", ns_id); + rv = VNET_API_ERROR_UNSPECIFIED; + } + } + vec_free (ns_id); + vec_free (sock_name); +done: + REPLY_MACRO2 (VL_API_APP_NAMESPACE_ADD_DEL_V4_REPLY, ({ + if (!rv) + rmp->appns_index = clib_host_to_net_u32 (appns_index); + })); +} + +static void vl_api_app_namespace_add_del_v3_t_handler ( vl_api_app_namespace_add_del_v3_t *mp) { vl_api_app_namespace_add_del_v3_reply_t *rmp; - u8 *ns_id = 0, *netns = 0, *sock_name = 0; + u8 *ns_id = 0, *sock_name = 0, *api_sock_name = 0; u32 appns_index = 0; int rv = 0; if (session_main_is_enabled () == 0) @@ -816,13 +911,22 @@ vl_api_app_namespace_add_del_v3_t_handler ( goto done; } mp->namespace_id[sizeof (mp->namespace_id) - 1] = 0; - mp->netns[sizeof (mp->netns) - 1] = 0; ns_id = format (0, "%s", &mp->namespace_id); - netns = format (0, "%s", &mp->netns); - sock_name = vl_api_from_api_to_new_vec (mp, &mp->sock_name); + api_sock_name = vl_api_from_api_to_new_vec (mp, &mp->sock_name); + mp->netns[sizeof (mp->netns) - 1] = 0; + if (strlen ((char *) mp->netns) != 0) + { + sock_name = + format (0, "abstract:%v,netns_name=%s", api_sock_name, &mp->netns); + } + else + { + sock_name = api_sock_name; + api_sock_name = 0; // for vec_free + } + vnet_app_namespace_add_del_args_t args = { .ns_id = ns_id, - .netns = netns, .sock_name = sock_name, .secret = clib_net_to_host_u64 (mp->secret), .sw_if_index = clib_net_to_host_u32 (mp->sw_if_index), @@ -841,8 +945,8 @@ vl_api_app_namespace_add_del_v3_t_handler ( } } vec_free (ns_id); - vec_free (netns); vec_free (sock_name); + vec_free (api_sock_name); done: REPLY_MACRO2 (VL_API_APP_NAMESPACE_ADD_DEL_V3_REPLY, ({ if (!rv) @@ -877,7 +981,10 @@ vl_api_session_rule_add_del_t_handler (vl_api_session_rule_add_del_t * mp) rv = vnet_session_rule_add_del (&args); if (rv) - clib_warning ("rule add del returned: %d", rv); + { + clib_warning ("rule add del returned: %U", format_session_error, rv); + rv = VNET_API_ERROR_UNSPECIFIED; + } vec_free (table_args->tag); REPLY_MACRO (VL_API_SESSION_RULE_ADD_DEL_REPLY); } @@ -980,7 +1087,6 @@ send_session_rules_table_details (session_rules_table_t * srt, u8 fib_proto, if (is_local || fib_proto == FIB_PROTOCOL_IP4) { u8 *tag = 0; - /* *INDENT-OFF* */ srt16 = &srt->session_rules_tables_16; pool_foreach (rule16, srt16->rules) { ri = mma_rules_table_rule_index_16 (srt16, rule16); @@ -988,12 +1094,10 @@ send_session_rules_table_details (session_rules_table_t * srt, u8 fib_proto, send_session_rule_details4 (rule16, is_local, tp, appns_index, tag, reg, context); } - /* *INDENT-ON* */ } if (is_local || fib_proto == FIB_PROTOCOL_IP6) { u8 *tag = 0; - /* *INDENT-OFF* */ srt40 = &srt->session_rules_tables_40; pool_foreach (rule40, srt40->rules) { ri = mma_rules_table_rule_index_40 (srt40, rule40); @@ -1001,7 +1105,6 @@ send_session_rules_table_details (session_rules_table_t * srt, u8 fib_proto, send_session_rule_details6 (rule40, is_local, tp, appns_index, tag, reg, context); } - /* *INDENT-ON* */ } } @@ -1016,7 +1119,6 @@ vl_api_session_rules_dump_t_handler (vl_api_session_rules_dump_t * mp) if (!reg) return; - /* *INDENT-OFF* */ session_table_foreach (st, ({ for (tp = 0; tp < TRANSPORT_N_PROTOS; tp++) { @@ -1026,7 +1128,6 @@ vl_api_session_rules_dump_t_handler (vl_api_session_rules_dump_t * mp) mp->context); } })); - /* *INDENT-ON* */ } static void @@ -1071,12 +1172,10 @@ vl_api_app_add_cert_key_pair_t_handler (vl_api_app_add_cert_key_pair_t * mp) rv = vnet_app_add_cert_key_pair (a); done: - /* *INDENT-OFF* */ REPLY_MACRO2 (VL_API_APP_ADD_CERT_KEY_PAIR_REPLY, ({ if (!rv) rmp->index = clib_host_to_net_u32 (a->index); })); - /* *INDENT-ON* */ } static void @@ -1092,6 +1191,12 @@ vl_api_app_del_cert_key_pair_t_handler (vl_api_app_del_cert_key_pair_t * mp) } ckpair_index = clib_net_to_host_u32 (mp->index); rv = vnet_app_del_cert_key_pair (ckpair_index); + if (rv) + { + clib_warning ("vnet_app_del_cert_key_pair: %U", format_session_error, + rv); + rv = VNET_API_ERROR_UNSPECIFIED; + } done: REPLY_MACRO (VL_API_APP_DEL_CERT_KEY_PAIR_REPLY); @@ -1169,8 +1274,12 @@ static session_cb_vft_t session_mq_sapi_cb_vft = { .session_reset_callback = mq_send_session_reset_cb, .session_migrate_callback = mq_send_session_migrate_cb, .session_cleanup_callback = mq_send_session_cleanup_cb, + .session_listened_callback = mq_send_session_bound_cb, + .session_unlistened_callback = mq_send_unlisten_cb, .add_segment_callback = mq_send_add_segment_sapi_cb, .del_segment_callback = mq_send_del_segment_sapi_cb, + .builtin_app_rx_callback = mq_send_io_rx_event, + .builtin_app_tx_callback = mq_send_io_tx_event, }; static void @@ -1310,7 +1419,7 @@ sapi_add_del_worker_handler (app_namespace_t * app_ns, app = application_get_if_valid (mp->app_index); if (!app) { - rv = VNET_API_ERROR_INVALID_VALUE; + rv = SESSION_E_INVALID; goto done; } @@ -1325,7 +1434,8 @@ sapi_add_del_worker_handler (app_namespace_t * app_ns, rv = vnet_app_worker_add_del (&args); if (rv) { - clib_warning ("app worker add/del returned: %d", rv); + clib_warning ("app worker add/del returned: %U", format_session_error, + rv); goto done; } @@ -1348,15 +1458,20 @@ sapi_add_del_worker_handler (app_namespace_t * app_ns, done: + /* With app sock api socket expected to be closed, no reply */ + if (!mp->is_add && appns_sapi_enabled ()) + return; + msg.type = APP_SAPI_MSG_TYPE_ADD_DEL_WORKER_REPLY; rmp = &msg.worker_add_del_reply; rmp->retval = rv; rmp->is_add = mp->is_add; + rmp->wrk_index = mp->wrk_index; rmp->api_client_handle = sapi_handle; - rmp->wrk_index = args.wrk_map_index; - rmp->segment_handle = args.segment_handle; if (!rv && mp->is_add) { + rmp->wrk_index = args.wrk_map_index; + rmp->segment_handle = args.segment_handle; /* No segment name and size. This supports only memfds */ rmp->app_event_queue_address = fifo_segment_msg_q_offset ((fifo_segment_t *) args.segment, 0); @@ -1653,27 +1768,10 @@ appns_sapi_add_ns_socket (app_namespace_t * app_ns) clib_socket_t *cs; char dir[4096]; - if (app_ns->netns) - { - if (!app_ns->sock_name) - app_ns->sock_name = format (0, "@vpp/session/%v%c", app_ns->ns_id, 0); - if (app_ns->sock_name[0] != '@') - return VNET_API_ERROR_INVALID_VALUE; - } - else - { - snprintf (dir, sizeof (dir), "%s%s", vlib_unix_get_runtime_dir (), - subdir); - err = vlib_unix_recursive_mkdir ((char *) dir); - if (err) - { - clib_error_report (err); - return VNET_API_ERROR_SYSCALL_ERROR_1; - } + snprintf (dir, sizeof (dir), "%s%s", vlib_unix_get_runtime_dir (), subdir); - if (!app_ns->sock_name) - app_ns->sock_name = format (0, "%s%v%c", dir, app_ns->ns_id, 0); - } + if (!app_ns->sock_name) + app_ns->sock_name = format (0, "%s%v%c", dir, app_ns->ns_id, 0); /* * Create and initialize socket to listen on @@ -1684,13 +1782,24 @@ appns_sapi_add_ns_socket (app_namespace_t * app_ns) CLIB_SOCKET_F_ALLOW_GROUP_WRITE | CLIB_SOCKET_F_SEQPACKET | CLIB_SOCKET_F_PASSCRED; - if ((err = clib_socket_init_netns (cs, app_ns->netns))) + if (clib_socket_prefix_get_type (cs->config) == CLIB_SOCKET_TYPE_UNIX) + { + err = vlib_unix_recursive_mkdir ((char *) dir); + if (err) + { + clib_error_report (err); + return SESSION_E_SYSCALL; + } + } + + if ((err = clib_socket_init (cs))) { clib_error_report (err); return -1; } - if (!app_ns->netns && stat ((char *) app_ns->sock_name, &file_stat) == -1) + if (clib_socket_prefix_get_type (cs->config) == CLIB_SOCKET_TYPE_UNIX && + stat ((char *) app_ns->sock_name, &file_stat) == -1) return -1; /* @@ -1712,19 +1821,6 @@ appns_sapi_add_ns_socket (app_namespace_t * app_ns) return 0; } -static void -vl_api_application_tls_cert_add_t_handler ( - vl_api_application_tls_cert_add_t *mp) -{ - /* deprecated */ -} - -static void -vl_api_application_tls_key_add_t_handler (vl_api_application_tls_key_add_t *mp) -{ - /* deprecated */ -} - #include <vnet/session/session.api.c> static clib_error_t * session_api_hookup (vlib_main_t *vm) diff --git a/src/vnet/session/session_cli.c b/src/vnet/session/session_cli.c index 344937c684a..569a77bccc1 100644 --- a/src/vnet/session/session_cli.c +++ b/src/vnet/session/session_cli.c @@ -145,8 +145,11 @@ format_session (u8 * s, va_list * args) else if (ss->session_state == SESSION_STATE_CONNECTING) { if (ss->flags & SESSION_F_HALF_OPEN) - s = format (s, "%U%v", format_transport_half_open_connection, tp, - ss->connection_index, ss->thread_index, verbose, str); + { + s = format (s, "%U", format_transport_half_open_connection, tp, + ss->connection_index, ss->thread_index, verbose); + s = format (s, "%v", str); + } else s = format (s, "%U", format_transport_connection, tp, ss->connection_index, ss->thread_index, verbose); @@ -339,7 +342,6 @@ session_cli_show_all_sessions (vlib_main_t * vm, int verbose) n_closed = 0; - /* *INDENT-OFF* */ pool_foreach (s, pool) { if (s->session_state >= SESSION_STATE_TRANSPORT_DELETED) { @@ -348,7 +350,6 @@ session_cli_show_all_sessions (vlib_main_t * vm, int verbose) } vlib_cli_output (vm, "%U", format_session, s, verbose); } - /* *INDENT-ON* */ if (!n_closed) vlib_cli_output (vm, "Thread %d: active sessions %u", thread_index, @@ -615,7 +616,6 @@ show_session_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_output (vm, "%-" SESSION_CLI_ID_LEN "s%-24s", "Listener", "App"); - /* *INDENT-OFF* */ pool_foreach (s, smm->wrk[0].sessions) { if (s->session_state != SESSION_STATE_LISTENING || s->session_type != sst) @@ -625,7 +625,6 @@ show_session_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_output (vm, "%U%-25v%", format_session, s, 0, app_name); } - /* *INDENT-ON* */ goto done; } @@ -655,7 +654,6 @@ done: return error; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (vlib_cli_show_session_command) = { .path = "show session", @@ -665,7 +663,6 @@ VLIB_CLI_COMMAND (vlib_cli_show_session_command) = "[protos] [states] ", .function = show_session_command_fn, }; -/* *INDENT-ON* */ static int clear_session (session_t * s) @@ -717,27 +714,23 @@ clear_session_command_fn (vlib_main_t * vm, unformat_input_t * input, if (clear_all) { - /* *INDENT-OFF* */ vec_foreach (wrk, smm->wrk) { pool_foreach (session, wrk->sessions) { clear_session (session); } }; - /* *INDENT-ON* */ } return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (clear_session_command, static) = { .path = "clear session", .short_help = "clear session thread <thread> session <index>", .function = clear_session_command_fn, }; -/* *INDENT-ON* */ static clib_error_t * show_session_fifo_trace_command_fn (vlib_main_t * vm, @@ -780,14 +773,12 @@ show_session_fifo_trace_command_fn (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_session_fifo_trace_command, static) = { .path = "show session fifo trace", .short_help = "show session fifo trace <session>", .function = show_session_fifo_trace_command_fn, }; -/* *INDENT-ON* */ static clib_error_t * session_replay_fifo_command_fn (vlib_main_t * vm, unformat_input_t * input, @@ -827,14 +818,12 @@ session_replay_fifo_command_fn (vlib_main_t * vm, unformat_input_t * input, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (session_replay_fifo_trace_command, static) = { .path = "session replay fifo", .short_help = "session replay fifo <session>", .function = session_replay_fifo_command_fn, }; -/* *INDENT-ON* */ static clib_error_t * session_enable_disable_fn (vlib_main_t * vm, unformat_input_t * input, @@ -859,14 +848,68 @@ session_enable_disable_fn (vlib_main_t * vm, unformat_input_t * input, return vnet_session_enable_disable (vm, is_en); } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (session_enable_disable_command, static) = { .path = "session", .short_help = "session [enable|disable]", .function = session_enable_disable_fn, }; -/* *INDENT-ON* */ + +static clib_error_t * +show_session_stats_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + session_main_t *smm = &session_main; + session_worker_t *wrk; + unsigned int *e; + + if (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + return clib_error_return (0, "unknown input `%U'", format_unformat_error, + input); + + vec_foreach (wrk, smm->wrk) + { + vlib_cli_output (vm, "Thread %u:\n", wrk - smm->wrk); + e = wrk->stats.errors; +#define _(name, str) \ + if (e[SESSION_EP_##name]) \ + vlib_cli_output (vm, " %lu %s", e[SESSION_EP_##name], str); + foreach_session_error +#undef _ + } + return 0; +} + +VLIB_CLI_COMMAND (show_session_stats_command, static) = { + .path = "show session stats", + .short_help = "show session stats", + .function = show_session_stats_fn, +}; + +static clib_error_t * +clear_session_stats_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + session_main_t *smm = &session_main; + session_worker_t *wrk; + + if (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + return clib_error_return (0, "unknown input `%U'", format_unformat_error, + input); + + vec_foreach (wrk, smm->wrk) + { + clib_memset (&wrk->stats, 0, sizeof (wrk->stats)); + } + + return 0; +} + +VLIB_CLI_COMMAND (clear_session_stats_command, static) = { + .path = "clear session stats", + .short_help = "clear session stats", + .function = clear_session_stats_fn, +}; /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/session/session_debug.c b/src/vnet/session/session_debug.c index 8e4588ecd0b..2a50adac5dd 100644 --- a/src/vnet/session/session_debug.c +++ b/src/vnet/session/session_debug.c @@ -52,15 +52,20 @@ show_session_dbg_clock_cycles_fn (vlib_main_t * vm, unformat_input_t * input, } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_session_dbg_clock_cycles_command, static) = { .path = "show session dbg clock_cycles", .short_help = "show session dbg clock_cycles", .function = show_session_dbg_clock_cycles_fn, }; -/* *INDENT-ON* */ +static_always_inline f64 +session_dbg_time_now (u32 thread) +{ + vlib_main_t *vm = vlib_get_main_by_index (thread); + + return clib_time_now (&vm->clib_time) + vm->time_offset; +} static clib_error_t * clear_session_dbg_clock_cycles_fn (vlib_main_t * vm, unformat_input_t * input, @@ -77,7 +82,7 @@ clear_session_dbg_clock_cycles_fn (vlib_main_t * vm, unformat_input_t * input, { sde = &session_dbg_main.wrk[thread]; clib_memset (sde, 0, sizeof (session_dbg_evts_t)); - sde->last_time = vlib_time_now (vlib_mains[thread]); + sde->last_time = session_dbg_time_now (thread); sde->start_time = sde->last_time; } @@ -85,14 +90,12 @@ clear_session_dbg_clock_cycles_fn (vlib_main_t * vm, unformat_input_t * input, } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (clear_session_clock_cycles_command, static) = { .path = "clear session dbg clock_cycles", .short_help = "clear session dbg clock_cycles", .function = clear_session_dbg_clock_cycles_fn, }; -/* *INDENT-ON* */ void session_debug_init (void) @@ -107,15 +110,99 @@ session_debug_init (void) for (thread = 0; thread < num_threads; thread++) { clib_memset (&sdm->wrk[thread], 0, sizeof (session_dbg_evts_t)); - sdm->wrk[thread].start_time = vlib_time_now (vlib_mains[thread]); + sdm->wrk[thread].start_time = session_dbg_time_now (thread); } } + +static const char *session_evt_grp_str[] = { +#define _(sym, str) str, + foreach_session_evt_grp +#undef _ +}; + +static void +session_debug_show_groups (vlib_main_t *vm) +{ + session_dbg_main_t *sdm = &session_dbg_main; + int i = 0; + + vlib_cli_output (vm, "%-10s%-30s%-10s", "Index", "Group", "Level"); + + for (i = 0; i < SESSION_EVT_N_GRP; i++) + vlib_cli_output (vm, "%-10d%-30s%-10d", i, session_evt_grp_str[i], + sdm->grp_dbg_lvl[i]); +} + +static clib_error_t * +session_debug_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + session_dbg_main_t *sdm = &session_dbg_main; + u32 group, level = ~0; + clib_error_t *error = 0; + u8 is_show = 0; + uword *bitmap = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "show")) + is_show = 1; + else if (unformat (input, "group %U", unformat_bitmap_list, &bitmap)) + ; + else if (unformat (input, "level %d", &level)) + ; + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + goto done; + } + } + + if (is_show) + { + session_debug_show_groups (vm); + goto done; + } + if (level == ~0) + { + vlib_cli_output (vm, "level must be entered"); + goto done; + } + + group = clib_bitmap_last_set (bitmap); + if (group == ~0) + { + vlib_cli_output (vm, "group must be entered"); + goto done; + } + if (group >= SESSION_EVT_N_GRP) + { + vlib_cli_output (vm, "group out of bounds"); + goto done; + } + clib_bitmap_foreach (group, bitmap) + sdm->grp_dbg_lvl[group] = level; + +done: + + clib_bitmap_free (bitmap); + return error; +} + +VLIB_CLI_COMMAND (session_debug_command, static) = { + .path = "session debug", + .short_help = "session debug {show | debug group <list> level <n>}", + .function = session_debug_fn, + .is_mp_safe = 1, +}; + #else void session_debug_init (void) { } -#endif +#endif /* SESSION_DEBUG */ void dump_thread_0_event_queue (void) @@ -189,7 +276,7 @@ session_node_cmp_event (session_event_t * e, svm_fifo_t * f) case SESSION_IO_EVT_RX: case SESSION_IO_EVT_TX: case SESSION_IO_EVT_BUILTIN_RX: - case SESSION_IO_EVT_BUILTIN_TX: + case SESSION_IO_EVT_TX_MAIN: case SESSION_IO_EVT_TX_FLUSH: if (e->session_index == f->shr->master_session_index) return 1; @@ -211,7 +298,6 @@ session_node_lookup_fifo_event (svm_fifo_t * f, session_event_t * e) session_worker_t *wrk; int i, index, found = 0; svm_msg_q_msg_t *msg; - svm_msg_q_ring_t *ring; svm_msg_q_t *mq; u8 thread_index; @@ -228,8 +314,7 @@ session_node_lookup_fifo_event (svm_fifo_t * f, session_event_t * e) for (i = 0; i < sq->cursize; i++) { msg = (svm_msg_q_msg_t *) (&sq->data[0] + sq->elsize * index); - ring = svm_msg_q_ring (mq, msg->ring_index); - clib_memcpy_fast (e, svm_msg_q_msg_data (mq, msg), ring->elsize); + clib_memcpy_fast (e, svm_msg_q_msg_data (mq, msg), sizeof (*e)); found = session_node_cmp_event (e, f); if (found) return 1; @@ -239,7 +324,6 @@ session_node_lookup_fifo_event (svm_fifo_t * f, session_event_t * e) * Search pending events vector */ - /* *INDENT-OFF* */ clib_llist_foreach (wrk->event_elts, evt_list, pool_elt_at_index (wrk->event_elts, wrk->new_head), elt, ({ @@ -250,9 +334,7 @@ session_node_lookup_fifo_event (svm_fifo_t * f, session_event_t * e) goto done; } })); - /* *INDENT-ON* */ - /* *INDENT-OFF* */ clib_llist_foreach (wrk->event_elts, evt_list, pool_elt_at_index (wrk->event_elts, wrk->old_head), elt, ({ @@ -263,7 +345,6 @@ session_node_lookup_fifo_event (svm_fifo_t * f, session_event_t * e) goto done; } })); - /* *INDENT-ON* */ done: return found; diff --git a/src/vnet/session/session_debug.h b/src/vnet/session/session_debug.h index 9e49a35dbe6..d433ef47fb1 100644 --- a/src/vnet/session/session_debug.h +++ b/src/vnet/session/session_debug.h @@ -17,49 +17,81 @@ #include <vnet/session/transport.h> #include <vlib/vlib.h> - -#define foreach_session_dbg_evt \ - _(ENQ, "enqueue") \ - _(DEQ, "dequeue") \ - _(DEQ_NODE, "dequeue") \ - _(POLL_GAP_TRACK, "poll gap track") \ - _(POLL_DISPATCH_TIME, "dispatch time") \ - _(DISPATCH_START, "dispatch start") \ - _(DISPATCH_END, "dispatch end") \ - _(FREE, "session free") \ - _(DSP_CNTRS, "dispatch counters") \ - _(IO_EVT_COUNTS, "io evt counts") \ - _(EVT_COUNTS, "ctrl evt counts") \ +#include <vpp/vnet/config.h> + +#define foreach_session_dbg_evt \ + _ (ENQ, DEQ_EVTS, 1, "enqueue") \ + _ (DEQ, DEQ_EVTS, 1, "dequeue") \ + _ (DEQ_NODE, DISPATCH_DBG, 1, "dequeue") \ + _ (POLL_GAP_TRACK, EVT_POLL_DBG, 1, "poll gap track") \ + _ (POLL_DISPATCH_TIME, EVT_POLL_DBG, 1, "dispatch time") \ + _ (DISPATCH_START, CLOCKS_EVT_DBG, 1, "dispatch start") \ + _ (DISPATCH_END, CLOCKS_EVT_DBG, 1, "dispatch end") \ + _ (DSP_CNTRS, CLOCKS_EVT_DBG, 1, "dispatch counters") \ + _ (STATE_CHANGE, SM, 1, "session state change") \ + _ (FREE, SM, 1, "session free") \ + _ (IO_EVT_COUNTS, COUNTS_EVT_DBG, 1, "io evt counts") \ + _ (COUNTS, COUNTS_EVT_DBG, 1, "ctrl evt counts") typedef enum _session_evt_dbg { -#define _(sym, str) SESSION_EVT_##sym, +#define _(sym, grp, lvl, str) SESSION_EVT_##sym, foreach_session_dbg_evt #undef _ } session_evt_dbg_e; -#define foreach_session_events \ -_(CLK_UPDATE_TIME, 1, 1, "Time Update Time") \ -_(CLK_MQ_DEQ, 1, 1, "Time MQ Dequeue") \ -_(CLK_CTRL_EVTS, 1, 1, "Time Ctrl Events") \ -_(CLK_NEW_IO_EVTS, 1, 1, "Time New IO Events") \ -_(CLK_OLD_IO_EVTS, 1, 1, "Time Old IO Events") \ -_(CLK_TOTAL, 1, 1, "Time Total in Node") \ -_(CLK_START, 1, 1, "Time Since Last Reset") \ - \ -_(CNT_MQ_EVTS, 1, 0, "# of MQ Events Processed" ) \ -_(CNT_CTRL_EVTS, 1, 0, "# of Ctrl Events Processed" ) \ -_(CNT_NEW_EVTS, 1, 0, "# of New Events Processed" ) \ -_(CNT_OLD_EVTS, 1, 0, "# of Old Events Processed" ) \ -_(CNT_IO_EVTS, 1, 0, "# of Events Processed" ) \ -_(CNT_NODE_CALL, 1, 0, "# of Node Calls") \ - \ -_(BASE_OFFSET_IO_EVTS, 0, 0, "NULL") \ -_(SESSION_IO_EVT_RX, 1, 0, "# of IO Event RX") \ -_(SESSION_IO_EVT_TX, 1, 0, "# of IO Event TX") \ -_(SESSION_IO_EVT_TX_FLUSH, 1, 0, "# of IO Event TX Flush") \ -_(SESSION_IO_EVT_BUILTIN_RX, 1, 0, "# of IO Event BuiltIn RX") \ -_(SESSION_IO_EVT_BUILTIN_TX, 1, 0, "# of IO Event BuiltIn TX") \ +typedef enum session_evt_lvl_ +{ +#define _(sym, grp, lvl, str) SESSION_EVT_##sym##_LVL = lvl, + foreach_session_dbg_evt +#undef _ +} session_evt_lvl_e; + +#define foreach_session_evt_grp \ + _ (DEQ_EVTS, "dequeue/enqueue events") \ + _ (DISPATCH_DBG, "dispatch") \ + _ (EVT_POLL_DBG, "event poll") \ + _ (SM, "state machine") \ + _ (CLOCKS_EVT_DBG, "clocks events") \ + _ (COUNTS_EVT_DBG, "counts events") + +typedef enum session_evt_grp_ +{ +#define _(sym, str) SESSION_EVT_GRP_##sym, + foreach_session_evt_grp +#undef _ + SESSION_EVT_N_GRP +} session_evt_grp_e; + +typedef enum session_evt_to_grp_ +{ +#define _(sym, grp, lvl, str) SESSION_EVT_##sym##_GRP = SESSION_EVT_GRP_##grp, + foreach_session_dbg_evt +#undef _ +} session_evt_to_grp_e; + +#define foreach_session_events \ + _ (CLK_UPDATE_TIME, 1, 1, "Time Update Time") \ + _ (CLK_MQ_DEQ, 1, 1, "Time MQ Dequeue") \ + _ (CLK_CTRL_EVTS, 1, 1, "Time Ctrl Events") \ + _ (CLK_NEW_IO_EVTS, 1, 1, "Time New IO Events") \ + _ (CLK_OLD_IO_EVTS, 1, 1, "Time Old IO Events") \ + _ (CLK_TOTAL, 1, 1, "Time Total in Node") \ + _ (CLK_START, 1, 1, "Time Since Last Reset") \ + \ + _ (CNT_MQ_EVTS, 1, 0, "# of MQ Events Processed") \ + _ (CNT_CTRL_EVTS, 1, 0, "# of Ctrl Events Processed") \ + _ (CNT_NEW_EVTS, 1, 0, "# of New Events Processed") \ + _ (CNT_OLD_EVTS, 1, 0, "# of Old Events Processed") \ + _ (CNT_IO_EVTS, 1, 0, "# of Events Processed") \ + _ (CNT_NODE_CALL, 1, 0, "# of Node Calls") \ + \ + _ (BASE_OFFSET_IO_EVTS, 0, 0, "NULL") \ + _ (SESSION_IO_EVT_RX, 1, 0, "# of IO Event RX") \ + _ (SESSION_IO_EVT_TX, 1, 0, "# of IO Event TX") \ + _ (SESSION_IO_EVT_TX_FLUSH, 1, 0, "# of IO Event TX Flush") \ + _ (SESSION_IO_EVT_BUILTIN_RX, 1, 0, "# of IO Event BuiltIn RX") \ + _ (SESSION_IO_EVT_TX_MAIN, 1, 0, "# of IO Event TX Main") typedef enum { @@ -90,17 +122,28 @@ typedef struct session_dbg_evts_t typedef struct session_dbg_main_ { session_dbg_evts_t *wrk; + u8 grp_dbg_lvl[SESSION_EVT_N_GRP]; } session_dbg_main_t; extern session_dbg_main_t session_dbg_main; -#define SESSION_DEBUG 0 * (TRANSPORT_DEBUG > 0) -#define SESSION_DEQ_EVTS (0) -#define SESSION_DISPATCH_DBG (0) -#define SESSION_EVT_POLL_DBG (0) -#define SESSION_SM (0) +#if defined VPP_SESSION_DEBUG && (TRANSPORT_DEBUG > 0) +#define SESSION_DEBUG (1) +#define SESSION_DEQ_EVTS (1) +#define SESSION_DISPATCH_DBG (1) +#define SESSION_EVT_POLL_DBG (1) +#define SESSION_SM (1) +#define SESSION_CLOCKS_EVT_DBG (1) +#define SESSION_COUNTS_EVT_DBG (1) +#else +#define SESSION_DEBUG (0) +#define SESSION_DEQ_EVTS (0) +#define SESSION_DISPATCH_DBG (0) +#define SESSION_EVT_POLL_DBG (0) +#define SESSION_SM (0) #define SESSION_CLOCKS_EVT_DBG (0) #define SESSION_COUNTS_EVT_DBG (0) +#endif #if SESSION_DEBUG @@ -123,17 +166,43 @@ extern session_dbg_main_t session_dbg_main; ed = ELOG_DATA (&vlib_global_main.elog_main, _e) #if SESSION_SM -#define SESSION_EVT_FREE_HANDLER(_s) \ -{ \ - ELOG_TYPE_DECLARE (_e) = \ - { \ - .format = "free: idx %u", \ - .format_args = "i4", \ - }; \ - DEC_SESSION_ETD(_s, _e, 1); \ - ed->data[0] = _s->session_index; \ -} +#define SESSION_EVT_STATE_CHANGE_HANDLER(_s) \ + { \ + ELOG_TYPE_DECLARE (_e) = { \ + .format = "%s: idx %u", \ + .format_args = "t4i4", \ + .n_enum_strings = 12, \ + .enum_strings = { \ + "created", \ + "listening", \ + "connecting", \ + "accepting", \ + "ready", \ + "opened", \ + "transport closing", \ + "closing", \ + "app closed", \ + "transport closed", \ + "closed", \ + "transport deleted", \ + }, \ + }; \ + DEC_SESSION_ETD (_s, _e, 2); \ + ed->data[0] = _s->session_state; \ + ed->data[1] = _s->session_index; \ + } + +#define SESSION_EVT_FREE_HANDLER(_s) \ + { \ + ELOG_TYPE_DECLARE (_e) = { \ + .format = "free: idx %u", \ + .format_args = "i4", \ + }; \ + DEC_SESSION_ED (_e, 1); \ + ed->data[0] = _s->session_index; \ + } #else +#define SESSION_EVT_STATE_CHANGE_HANDLER(_s) #define SESSION_EVT_FREE_HANDLER(_s) #endif @@ -282,17 +351,17 @@ extern session_dbg_main_t session_dbg_main; counters[SESS_Q_##_node_evt].u64 += _cnt; \ } -#define SESSION_IO_EVT_COUNTS_HANDLER(_node_evt, _cnt, _wrk) \ -{ \ - u8 type = SESS_Q_BASE_OFFSET_IO_EVTS + _node_evt + 1; \ - session_dbg_evts_t *sde; \ - sde = &session_dbg_main.wrk[_wrk->vm->thread_index]; \ - sde->counters[type].u64 += _cnt; \ - sde->counters[SESS_Q_CNT_IO_EVTS].u64 += _cnt ; \ -} +#define SESSION_EVT_IO_EVT_COUNTS_HANDLER(_node_evt, _cnt, _wrk) \ + { \ + u8 type = SESS_Q_BASE_OFFSET_IO_EVTS + _node_evt + 1; \ + session_dbg_evts_t *sde; \ + sde = &session_dbg_main.wrk[_wrk->vm->thread_index]; \ + sde->counters[type].u64 += _cnt; \ + sde->counters[SESS_Q_CNT_IO_EVTS].u64 += _cnt; \ + } #else #define SESSION_EVT_COUNTS_HANDLER(_node_evt, _cnt, _wrk) -#define SESSION_IO_EVT_COUNTS_HANDLER(_node_evt, _cnt, _wrk) +#define SESSION_EVT_IO_EVT_COUNTS_HANDLER(_node_evt, _cnt, _wrk) #endif /*SESSION_COUNTS_EVT_DBG */ @@ -322,8 +391,18 @@ extern session_dbg_main_t session_dbg_main; #define CONCAT_HELPER(_a, _b) _a##_b #define CC(_a, _b) CONCAT_HELPER(_a, _b) -#define SESSION_EVT(_evt, _args...) CC(_evt, _HANDLER)(_args) - +#define session_evt_lvl(_evt) CC (_evt, _LVL) +#define session_evt_grp(_evt) CC (_evt, _GRP) +#define session_evt_grp_dbg_lvl(_evt) \ + session_dbg_main.grp_dbg_lvl[session_evt_grp (_evt)] +#define SESSION_EVT(_evt, _args...) \ + do \ + { \ + if (PREDICT_FALSE (session_evt_grp_dbg_lvl (_evt) >= \ + session_evt_lvl (_evt))) \ + CC (_evt, _HANDLER) (_args); \ + } \ + while (0) #else #define SESSION_EVT(_evt, _args...) #define SESSION_DBG(_fmt, _args...) diff --git a/src/vnet/session/session_input.c b/src/vnet/session/session_input.c new file mode 100644 index 00000000000..73b777127fd --- /dev/null +++ b/src/vnet/session/session_input.c @@ -0,0 +1,343 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2023 Cisco Systems, Inc. + */ + +#include <vnet/session/session.h> +#include <vnet/session/application.h> + +static inline int +mq_try_lock (svm_msg_q_t *mq) +{ + int rv, n_try = 0; + + while (n_try < 100) + { + rv = svm_msg_q_try_lock (mq); + if (!rv) + return 0; + n_try += 1; + usleep (1); + } + + return -1; +} + +always_inline u8 +mq_event_ring_index (session_evt_type_t et) +{ + return (et >= SESSION_CTRL_EVT_RPC ? SESSION_MQ_CTRL_EVT_RING : + SESSION_MQ_IO_EVT_RING); +} + +void +app_worker_del_all_events (app_worker_t *app_wrk) +{ + session_worker_t *wrk; + session_event_t *evt; + u32 thread_index; + session_t *s; + + for (thread_index = 0; thread_index < vec_len (app_wrk->wrk_evts); + thread_index++) + { + while (clib_fifo_elts (app_wrk->wrk_evts[thread_index])) + { + clib_fifo_sub2 (app_wrk->wrk_evts[thread_index], evt); + switch (evt->event_type) + { + case SESSION_CTRL_EVT_MIGRATED: + s = session_get (evt->session_index, thread_index); + transport_cleanup (session_get_transport_proto (s), + s->connection_index, s->thread_index); + session_free (s); + break; + case SESSION_CTRL_EVT_CLEANUP: + s = session_get (evt->as_u64[0] & 0xffffffff, thread_index); + if (evt->as_u64[0] >> 32 != SESSION_CLEANUP_SESSION) + break; + uword_to_pointer (evt->as_u64[1], void (*) (session_t * s)) (s); + break; + case SESSION_CTRL_EVT_HALF_CLEANUP: + s = ho_session_get (evt->session_index); + pool_put_index (app_wrk->half_open_table, s->ho_index); + session_free (s); + break; + default: + break; + } + } + wrk = session_main_get_worker (thread_index); + clib_bitmap_set (wrk->app_wrks_pending_ntf, app_wrk->wrk_index, 0); + } +} + +always_inline int +app_worker_flush_events_inline (app_worker_t *app_wrk, u32 thread_index, + u8 is_builtin) +{ + application_t *app = application_get (app_wrk->app_index); + svm_msg_q_t *mq = app_wrk->event_queue; + u8 ring_index, mq_is_cong; + session_state_t old_state; + session_event_t *evt; + u32 n_evts = 128, i; + session_t *s; + int rv; + + n_evts = clib_min (n_evts, clib_fifo_elts (app_wrk->wrk_evts[thread_index])); + + if (!is_builtin) + { + mq_is_cong = app_worker_mq_is_congested (app_wrk); + if (mq_try_lock (mq)) + { + app_worker_set_mq_wrk_congested (app_wrk, thread_index); + return 0; + } + } + + for (i = 0; i < n_evts; i++) + { + evt = clib_fifo_head (app_wrk->wrk_evts[thread_index]); + if (!is_builtin) + { + ring_index = mq_event_ring_index (evt->event_type); + if (svm_msg_q_or_ring_is_full (mq, ring_index)) + { + app_worker_set_mq_wrk_congested (app_wrk, thread_index); + break; + } + } + + switch (evt->event_type) + { + case SESSION_IO_EVT_RX: + s = session_get (evt->session_index, thread_index); + s->flags &= ~SESSION_F_RX_EVT; + /* Application didn't confirm accept yet */ + if (PREDICT_FALSE (s->session_state == SESSION_STATE_ACCEPTING || + s->session_state == SESSION_STATE_CONNECTING)) + break; + app->cb_fns.builtin_app_rx_callback (s); + break; + /* Handle sessions that might not be on current thread */ + case SESSION_IO_EVT_BUILTIN_RX: + s = session_get_from_handle_if_valid (evt->session_handle); + if (!s) + break; + s->flags &= ~SESSION_F_RX_EVT; + if (PREDICT_FALSE (s->session_state == SESSION_STATE_ACCEPTING || + s->session_state == SESSION_STATE_CONNECTING)) + break; + app->cb_fns.builtin_app_rx_callback (s); + break; + case SESSION_IO_EVT_TX: + s = session_get (evt->session_index, thread_index); + app->cb_fns.builtin_app_tx_callback (s); + break; + case SESSION_IO_EVT_TX_MAIN: + s = session_get_from_handle_if_valid (evt->session_handle); + if (!s) + break; + app->cb_fns.builtin_app_tx_callback (s); + break; + case SESSION_CTRL_EVT_BOUND: + /* No app cb function currently */ + if (is_builtin) + break; + app->cb_fns.session_listened_callback ( + app_wrk->wrk_index, evt->as_u64[1] >> 32, evt->session_handle, + evt->as_u64[1] & 0xffffffff); + break; + case SESSION_CTRL_EVT_ACCEPTED: + s = session_get (evt->session_index, thread_index); + old_state = s->session_state; + if (app->cb_fns.session_accept_callback (s)) + { + session_detach_app (s); + break; + } + if (is_builtin) + { + if (old_state >= SESSION_STATE_TRANSPORT_CLOSING) + { + session_set_state (s, + clib_max (old_state, s->session_state)); + if (!(s->flags & SESSION_F_APP_CLOSED)) + app->cb_fns.session_disconnect_callback (s); + } + } + break; + case SESSION_CTRL_EVT_CONNECTED: + if (!(evt->as_u64[1] & 0xffffffff)) + { + s = session_get (evt->session_index, thread_index); + old_state = s->session_state; + } + else + s = 0; + rv = app->cb_fns.session_connected_callback ( + app_wrk->wrk_index, evt->as_u64[1] >> 32, s, + evt->as_u64[1] & 0xffffffff); + if (!s) + break; + if (rv) + { + session_detach_app (s); + break; + } + if (old_state >= SESSION_STATE_TRANSPORT_CLOSING) + { + session_set_state (s, clib_max (old_state, s->session_state)); + if (!(s->flags & SESSION_F_APP_CLOSED)) + app->cb_fns.session_disconnect_callback (s); + } + break; + case SESSION_CTRL_EVT_DISCONNECTED: + s = session_get (evt->session_index, thread_index); + if (!(s->flags & SESSION_F_APP_CLOSED)) + app->cb_fns.session_disconnect_callback (s); + break; + case SESSION_CTRL_EVT_RESET: + s = session_get (evt->session_index, thread_index); + if (!(s->flags & SESSION_F_APP_CLOSED)) + app->cb_fns.session_reset_callback (s); + break; + case SESSION_CTRL_EVT_UNLISTEN_REPLY: + if (is_builtin) + break; + app->cb_fns.session_unlistened_callback ( + app_wrk->wrk_index, evt->session_handle, evt->as_u64[1] >> 32, + evt->as_u64[1] & 0xffffffff); + break; + case SESSION_CTRL_EVT_MIGRATED: + s = session_get (evt->session_index, thread_index); + app->cb_fns.session_migrate_callback (s, evt->as_u64[1]); + transport_cleanup (session_get_transport_proto (s), + s->connection_index, s->thread_index); + session_free (s); + /* Notify app that it has data on the new session */ + s = session_get_from_handle (evt->as_u64[1]); + session_send_io_evt_to_thread (s->rx_fifo, + SESSION_IO_EVT_BUILTIN_RX); + break; + case SESSION_CTRL_EVT_TRANSPORT_CLOSED: + s = session_get (evt->session_index, thread_index); + /* Notification enqueued before session was refused by app */ + if (PREDICT_FALSE (s->app_wrk_index == APP_INVALID_INDEX)) + break; + if (app->cb_fns.session_transport_closed_callback) + app->cb_fns.session_transport_closed_callback (s); + break; + case SESSION_CTRL_EVT_CLEANUP: + s = session_get (evt->as_u64[0] & 0xffffffff, thread_index); + /* Notification enqueued before session was refused by app */ + if (PREDICT_TRUE (s->app_wrk_index != APP_INVALID_INDEX)) + { + if (app->cb_fns.session_cleanup_callback) + app->cb_fns.session_cleanup_callback (s, evt->as_u64[0] >> 32); + } + if (evt->as_u64[0] >> 32 != SESSION_CLEANUP_SESSION) + break; + uword_to_pointer (evt->as_u64[1], void (*) (session_t * s)) (s); + break; + case SESSION_CTRL_EVT_HALF_CLEANUP: + s = ho_session_get (evt->session_index); + ASSERT (session_vlib_thread_is_cl_thread ()); + if (app->cb_fns.half_open_cleanup_callback) + app->cb_fns.half_open_cleanup_callback (s); + pool_put_index (app_wrk->half_open_table, s->ho_index); + session_free (s); + break; + case SESSION_CTRL_EVT_APP_ADD_SEGMENT: + app->cb_fns.add_segment_callback (app_wrk->wrk_index, + evt->as_u64[1]); + break; + case SESSION_CTRL_EVT_APP_DEL_SEGMENT: + app->cb_fns.del_segment_callback (app_wrk->wrk_index, + evt->as_u64[1]); + break; + default: + clib_warning ("unexpected event: %u", evt->event_type); + ASSERT (0); + break; + } + clib_fifo_advance_head (app_wrk->wrk_evts[thread_index], 1); + } + + if (!is_builtin) + { + svm_msg_q_unlock (mq); + if (mq_is_cong && i == n_evts) + app_worker_unset_wrk_mq_congested (app_wrk, thread_index); + } + + return 0; +} + +int +app_wrk_flush_wrk_events (app_worker_t *app_wrk, u32 thread_index) +{ + if (app_worker_application_is_builtin (app_wrk)) + return app_worker_flush_events_inline (app_wrk, thread_index, + 1 /* is_builtin */); + else + return app_worker_flush_events_inline (app_wrk, thread_index, + 0 /* is_builtin */); +} + +static inline int +session_wrk_flush_events (session_worker_t *wrk) +{ + app_worker_t *app_wrk; + uword app_wrk_index; + u32 thread_index; + + thread_index = wrk->vm->thread_index; + app_wrk_index = clib_bitmap_first_set (wrk->app_wrks_pending_ntf); + + while (app_wrk_index != ~0) + { + app_wrk = app_worker_get_if_valid (app_wrk_index); + /* app_wrk events are flushed on free, so should be valid here */ + ASSERT (app_wrk != 0); + app_wrk_flush_wrk_events (app_wrk, thread_index); + + if (!clib_fifo_elts (app_wrk->wrk_evts[thread_index])) + clib_bitmap_set (wrk->app_wrks_pending_ntf, app_wrk->wrk_index, 0); + + app_wrk_index = + clib_bitmap_next_set (wrk->app_wrks_pending_ntf, app_wrk_index + 1); + } + + if (!clib_bitmap_is_zero (wrk->app_wrks_pending_ntf)) + vlib_node_set_interrupt_pending (wrk->vm, session_input_node.index); + + return 0; +} + +VLIB_NODE_FN (session_input_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + u32 thread_index = vm->thread_index; + session_worker_t *wrk; + + wrk = session_main_get_worker (thread_index); + session_wrk_flush_events (wrk); + + return 0; +} + +VLIB_REGISTER_NODE (session_input_node) = { + .name = "session-input", + .type = VLIB_NODE_TYPE_INPUT, + .state = VLIB_NODE_STATE_DISABLED, +}; + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */
\ No newline at end of file diff --git a/src/vnet/session/session_lookup.c b/src/vnet/session/session_lookup.c index 68f98d0f046..9d028dbb28c 100644 --- a/src/vnet/session/session_lookup.c +++ b/src/vnet/session/session_lookup.c @@ -29,13 +29,14 @@ #include <vnet/session/session.h> #include <vnet/session/application.h> +static session_lookup_main_t sl_main; + /** * Network namespace index (i.e., fib index) to session lookup table. We * should have one per network protocol type but for now we only support IP4/6 */ static u32 *fib_index_to_table_index[2]; -/* *INDENT-OFF* */ /* 16 octets */ typedef CLIB_PACKED (struct { union @@ -72,7 +73,6 @@ typedef CLIB_PACKED (struct { u64 as_u64[6]; }; }) v6_connection_key_t; -/* *INDENT-ON* */ typedef clib_bihash_kv_16_8_t session_kv4_t; typedef clib_bihash_kv_48_8_t session_kv6_t; @@ -155,29 +155,70 @@ make_v6_ss_kv_from_tc (session_kv6_t * kv, transport_connection_t * tc) tc->rmt_port, tc->proto); } +static inline u8 +session_table_alloc_needs_sync (void) +{ + return !vlib_thread_is_main_w_barrier () && (vlib_num_workers () > 1); +} + +static_always_inline u8 +session_table_is_alloced (u8 fib_proto, u32 fib_index) +{ + return (vec_len (fib_index_to_table_index[fib_proto]) > fib_index && + fib_index_to_table_index[fib_proto][fib_index] != ~0); +} + static session_table_t * session_table_get_or_alloc (u8 fib_proto, u32 fib_index) { session_table_t *st; u32 table_index; + ASSERT (fib_index != ~0); - if (vec_len (fib_index_to_table_index[fib_proto]) > fib_index && - fib_index_to_table_index[fib_proto][fib_index] != ~0) + + if (session_table_is_alloced (fib_proto, fib_index)) { table_index = fib_index_to_table_index[fib_proto][fib_index]; return session_table_get (table_index); } + + u8 needs_sync = session_table_alloc_needs_sync (); + session_lookup_main_t *slm = &sl_main; + + /* Stop workers, otherwise consumers might be affected. This is + * acceptable because new tables should seldom be allocated */ + if (needs_sync) + { + vlib_workers_sync (); + + /* We might have a race, only one worker allowed at once */ + clib_spinlock_lock (&slm->st_alloc_lock); + } + + /* Another worker just allocated this table */ + if (session_table_is_alloced (fib_proto, fib_index)) + { + table_index = fib_index_to_table_index[fib_proto][fib_index]; + st = session_table_get (table_index); + } else { st = session_table_alloc (); - table_index = session_table_index (st); + st->active_fib_proto = fib_proto; + session_table_init (st, fib_proto); vec_validate_init_empty (fib_index_to_table_index[fib_proto], fib_index, ~0); + table_index = session_table_index (st); fib_index_to_table_index[fib_proto][fib_index] = table_index; - st->active_fib_proto = fib_proto; - session_table_init (st, fib_proto); - return st; } + + if (needs_sync) + { + clib_spinlock_unlock (&slm->st_alloc_lock); + vlib_workers_continue (); + } + + return st; } static session_table_t * @@ -1311,8 +1352,8 @@ session_lookup_connection (u32 fib_index, ip46_address_t * lcl, lcl_port, rmt_port, proto); } -int -vnet_session_rule_add_del (session_rule_add_del_args_t * args) +session_error_t +vnet_session_rule_add_del (session_rule_add_del_args_t *args) { app_namespace_t *app_ns = app_namespace_get (args->appns_index); session_rules_table_t *srt; @@ -1322,14 +1363,14 @@ vnet_session_rule_add_del (session_rule_add_del_args_t * args) int rv = 0; if (!app_ns) - return VNET_API_ERROR_APP_INVALID_NS; + return SESSION_E_INVALID_NS; if (args->scope > 3) - return VNET_API_ERROR_INVALID_VALUE; + return SESSION_E_INVALID; if (args->transport_proto != TRANSPORT_PROTO_TCP && args->transport_proto != TRANSPORT_PROTO_UDP) - return VNET_API_ERROR_INVALID_VALUE; + return SESSION_E_INVALID; if ((args->scope & SESSION_RULE_SCOPE_GLOBAL) || args->scope == 0) { @@ -1569,7 +1610,6 @@ done: return error; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (session_rule_command, static) = { .path = "session rule", @@ -1577,7 +1617,6 @@ VLIB_CLI_COMMAND (session_rule_command, static) = "<lcl-ip/plen> <lcl-port> <rmt-ip/plen> <rmt-port> action <action>", .function = session_rule_command_fn, }; -/* *INDENT-ON* */ void session_lookup_dump_rules_table (u32 fib_index, u8 fib_proto, @@ -1700,7 +1739,6 @@ show_session_rules_command_fn (vlib_main_t * vm, unformat_input_t * input, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_session_rules_command, static) = { .path = "show session rules", @@ -1708,11 +1746,93 @@ VLIB_CLI_COMMAND (show_session_rules_command, static) = "<lcl-port> <rmt-ip/plen> <rmt-port> scope <scope>]", .function = show_session_rules_command_fn, }; -/* *INDENT-ON* */ + +u8 * +format_session_lookup_tables (u8 *s, va_list *args) +{ + u32 fib_proto = va_arg (*args, u32); + u32 *fibs, num_fibs = 0, fib_index, indent; + session_table_t *st; + u64 total_mem = 0; + + fibs = fib_index_to_table_index[fib_proto]; + + for (fib_index = 0; fib_index < vec_len (fibs); fib_index++) + { + if (fibs[fib_index] == ~0) + continue; + + num_fibs += 1; + st = session_table_get (fibs[fib_index]); + total_mem += session_table_memory_size (st); + } + + indent = format_get_indent (s); + s = format (s, "active fibs:\t%u\n", num_fibs); + s = format (s, "%Umax fib-index:\t%u\n", format_white_space, indent, + vec_len (fibs) - 1); + s = format (s, "%Utable memory:\t%U\n", format_white_space, indent, + format_memory_size, total_mem); + s = format (s, "%Uvec memory:\t%U\n", format_white_space, indent, + format_memory_size, vec_mem_size (fibs)); + + return s; +} + +static clib_error_t * +show_session_lookup_command_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + session_table_t *st; + u32 fib_index = ~0; + + session_cli_return_if_not_enabled (); + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "table %u", &fib_index)) + ; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + + if (fib_index != ~0) + { + st = session_table_get_for_fib_index (FIB_PROTOCOL_IP4, fib_index); + if (st) + vlib_cli_output (vm, "%U", format_session_table, st); + else + vlib_cli_output (vm, "no ip4 table for fib-index %u", fib_index); + st = session_table_get_for_fib_index (FIB_PROTOCOL_IP6, fib_index); + if (st) + vlib_cli_output (vm, "%U", format_session_table, st); + else + vlib_cli_output (vm, "no ip6 table for fib-index %u", fib_index); + goto done; + } + + vlib_cli_output (vm, "ip4 fib lookup tables:\n %U", + format_session_lookup_tables, FIB_PROTOCOL_IP4); + vlib_cli_output (vm, "ip6 fib lookup tables:\n %U", + format_session_lookup_tables, FIB_PROTOCOL_IP6); + +done: + return 0; +} + +VLIB_CLI_COMMAND (show_session_lookup_command, static) = { + .path = "show session lookup", + .short_help = "show session lookup [table <fib-index>]", + .function = show_session_lookup_command_fn, +}; void session_lookup_init (void) { + session_lookup_main_t *slm = &sl_main; + + clib_spinlock_init (&slm->st_alloc_lock); + /* * Allocate default table and map it to fib_index 0 */ diff --git a/src/vnet/session/session_lookup.h b/src/vnet/session/session_lookup.h index c1037dff8c9..f9ffc15165a 100644 --- a/src/vnet/session/session_lookup.h +++ b/src/vnet/session/session_lookup.h @@ -29,6 +29,11 @@ typedef enum session_lookup_result_ SESSION_LOOKUP_RESULT_FILTERED } session_lookup_result_t; +typedef struct session_lookup_main_ +{ + clib_spinlock_t st_alloc_lock; +} session_lookup_main_t; + session_t *session_lookup_safe4 (u32 fib_index, ip4_address_t * lcl, ip4_address_t * rmt, u16 lcl_port, u16 rmt_port, u8 proto); @@ -130,7 +135,7 @@ typedef struct _session_rule_add_del_args u8 transport_proto; } session_rule_add_del_args_t; -int vnet_session_rule_add_del (session_rule_add_del_args_t * args); +session_error_t vnet_session_rule_add_del (session_rule_add_del_args_t *args); void session_lookup_set_tables_appns (app_namespace_t * app_ns); void session_lookup_init (void); diff --git a/src/vnet/session/session_node.c b/src/vnet/session/session_node.c index 1908a58f08f..0ec158fb429 100644 --- a/src/vnet/session/session_node.c +++ b/src/vnet/session/session_node.c @@ -139,13 +139,17 @@ session_mq_listen_handler (session_worker_t *wrk, session_evt_elt_t *elt) a->sep_ext.ext_cfg = session_mq_get_ext_config (app, mp->ext_config); if ((rv = vnet_listen (a))) - clib_warning ("listen returned: %U", format_session_error, rv); + session_worker_stat_error_inc (wrk, rv, 1); app_wrk = application_get_worker (app, mp->wrk_index); - mq_send_session_bound_cb (app_wrk->wrk_index, mp->context, a->handle, rv); + app_worker_listened_notify (app_wrk, a->handle, mp->context, rv); if (mp->ext_config) session_mq_free_ext_config (app, mp->ext_config); + + /* Make sure events are flushed before releasing barrier, to avoid + * potential race with accept. */ + app_wrk_flush_wrk_events (app_wrk, 0); } static void @@ -170,7 +174,8 @@ session_mq_listen_uri_handler (session_worker_t *wrk, session_evt_elt_t *elt) rv = vnet_bind_uri (a); app_wrk = application_get_worker (app, 0); - mq_send_session_bound_cb (app_wrk->wrk_index, mp->context, a->handle, rv); + app_worker_listened_notify (app_wrk, a->handle, mp->context, rv); + app_wrk_flush_wrk_events (app_wrk, 0); } static void @@ -178,6 +183,7 @@ session_mq_connect_one (session_connect_msg_t *mp) { vnet_connect_args_t _a, *a = &_a; app_worker_t *app_wrk; + session_worker_t *wrk; application_t *app; int rv; @@ -211,9 +217,10 @@ session_mq_connect_one (session_connect_msg_t *mp) if ((rv = vnet_connect (a))) { - clib_warning ("connect returned: %U", format_session_error, rv); + wrk = session_main_get_worker (vlib_get_thread_index ()); + session_worker_stat_error_inc (wrk, rv, 1); app_wrk = application_get_worker (app, mp->wrk_index); - mq_send_session_connected_cb (app_wrk->wrk_index, mp->context, 0, rv); + app_worker_connect_notify (app_wrk, 0, rv, mp->context); } if (mp->ext_config) @@ -224,23 +231,20 @@ static void session_mq_handle_connects_rpc (void *arg) { u32 max_connects = 32, n_connects = 0; - vlib_main_t *vm = vlib_get_main (); session_evt_elt_t *he, *elt, *next; - session_worker_t *fwrk, *wrk; + session_worker_t *fwrk; - ASSERT (vlib_get_thread_index () == 0); + ASSERT (session_vlib_thread_is_cl_thread ()); /* Pending connects on linked list pertaining to first worker */ - fwrk = session_main_get_worker (1); + fwrk = session_main_get_worker (transport_cl_thread ()); if (!fwrk->n_pending_connects) - goto update_state; - - vlib_worker_thread_barrier_sync (vm); + return; he = clib_llist_elt (fwrk->event_elts, fwrk->pending_connects); elt = clib_llist_next (fwrk->event_elts, evt_list, he); - /* Avoid holding the barrier for too long */ + /* Avoid holding the worker for too long */ while (n_connects < max_connects && elt != he) { next = clib_llist_next (fwrk->event_elts, evt_list, elt); @@ -254,45 +258,10 @@ session_mq_handle_connects_rpc (void *arg) /* Decrement with worker barrier */ fwrk->n_pending_connects -= n_connects; - - vlib_worker_thread_barrier_release (vm); - -update_state: - - /* Switch worker to poll mode if it was in interrupt mode and had work or - * back to interrupt if threshold of loops without a connect is passed. - * While in poll mode, reprogram connects rpc */ - wrk = session_main_get_worker (0); - if (wrk->state != SESSION_WRK_POLLING) - { - if (n_connects) - { - session_wrk_set_state (wrk, SESSION_WRK_POLLING); - vlib_node_set_state (vm, session_queue_node.index, - VLIB_NODE_STATE_POLLING); - wrk->no_connect_loops = 0; - } - } - else + if (fwrk->n_pending_connects > 0) { - if (!n_connects) - { - if (++wrk->no_connect_loops > 1e5) - { - session_wrk_set_state (wrk, SESSION_WRK_INTERRUPT); - vlib_node_set_state (vm, session_queue_node.index, - VLIB_NODE_STATE_INTERRUPT); - } - } - else - wrk->no_connect_loops = 0; - } - - if (wrk->state == SESSION_WRK_POLLING) - { - elt = session_evt_alloc_ctrl (wrk); - elt->evt.event_type = SESSION_CTRL_EVT_RPC; - elt->evt.rpc_args.fp = session_mq_handle_connects_rpc; + session_send_rpc_evt_to_thread_force (fwrk->vm->thread_index, + session_mq_handle_connects_rpc, 0); } } @@ -302,20 +271,28 @@ session_mq_connect_handler (session_worker_t *wrk, session_evt_elt_t *elt) u32 thread_index = wrk - session_main.wrk; session_evt_elt_t *he; - /* No workers, so just deal with the connect now */ - if (PREDICT_FALSE (!thread_index)) + if (PREDICT_FALSE (thread_index > transport_cl_thread ())) { - session_mq_connect_one (session_evt_ctrl_data (wrk, elt)); + clib_warning ("Connect on wrong thread. Dropping"); return; } - if (PREDICT_FALSE (thread_index != 1)) + /* If on worker, check if main has any pending messages. Avoids reordering + * with other control messages that need to be handled by main + */ + if (thread_index) { - clib_warning ("Connect on wrong thread. Dropping"); - return; + he = clib_llist_elt (wrk->event_elts, wrk->evts_pending_main); + + /* Events pending on main, postpone to avoid reordering */ + if (!clib_llist_is_empty (wrk->event_elts, evt_list, he)) + { + clib_llist_add_tail (wrk->event_elts, evt_list, elt, he); + return; + } } - /* Add to pending list to be handled by main thread */ + /* Add to pending list to be handled by first worker */ he = clib_llist_elt (wrk->event_elts, wrk->pending_connects); clib_llist_add_tail (wrk->event_elts, evt_list, elt, he); @@ -323,9 +300,8 @@ session_mq_connect_handler (session_worker_t *wrk, session_evt_elt_t *elt) wrk->n_pending_connects += 1; if (wrk->n_pending_connects == 1) { - vlib_node_set_interrupt_pending (vlib_get_main_by_index (0), - session_queue_node.index); - session_send_rpc_evt_to_thread (0, session_mq_handle_connects_rpc, 0); + session_send_rpc_evt_to_thread_force (thread_index, + session_mq_handle_connects_rpc, 0); } } @@ -351,9 +327,9 @@ session_mq_connect_uri_handler (session_worker_t *wrk, session_evt_elt_t *elt) a->app_index = app->app_index; if ((rv = vnet_connect_uri (a))) { - clib_warning ("connect_uri returned: %d", rv); + session_worker_stat_error_inc (wrk, rv, 1); app_wrk = application_get_worker (app, 0 /* default wrk only */ ); - mq_send_session_connected_cb (app_wrk->wrk_index, mp->context, 0, rv); + app_worker_connect_notify (app_wrk, 0, rv, mp->context); } } @@ -433,13 +409,13 @@ session_mq_unlisten_handler (session_worker_t *wrk, session_evt_elt_t *elt) a->wrk_map_index = mp->wrk_index; if ((rv = vnet_unlisten (a))) - clib_warning ("unlisten returned: %d", rv); + session_worker_stat_error_inc (wrk, rv, 1); app_wrk = application_get_worker (app, a->wrk_map_index); if (!app_wrk) return; - mq_send_unlisten_reply (app_wrk, sh, mp->context, rv); + app_worker_unlisten_reply (app_wrk, sh, mp->context, rv); } static void @@ -480,28 +456,29 @@ session_mq_accepted_reply_handler (session_worker_t *wrk, a->app_index = mp->context; a->handle = mp->handle; vnet_disconnect_session (a); + s->app_wrk_index = SESSION_INVALID_INDEX; return; } /* Special handling for cut-through sessions */ if (!session_has_transport (s)) { - s->session_state = SESSION_STATE_READY; + session_set_state (s, SESSION_STATE_READY); ct_session_connect_notify (s, SESSION_E_NONE); return; } old_state = s->session_state; - s->session_state = SESSION_STATE_READY; + session_set_state (s, SESSION_STATE_READY); if (!svm_fifo_is_empty_prod (s->rx_fifo)) - app_worker_lock_and_send_event (app_wrk, s, SESSION_IO_EVT_RX); + app_worker_rx_notify (app_wrk, s); /* Closed while waiting for app to reply. Resend disconnect */ if (old_state >= SESSION_STATE_TRANSPORT_CLOSING) { app_worker_close_notify (app_wrk, s); - s->session_state = old_state; + session_set_state (s, old_state); return; } } @@ -514,15 +491,13 @@ session_mq_reset_reply_handler (void *data) app_worker_t *app_wrk; session_t *s; application_t *app; - u32 index, thread_index; mp = (session_reset_reply_msg_t *) data; app = application_lookup (mp->context); if (!app) return; - session_parse_handle (mp->handle, &index, &thread_index); - s = session_get_if_valid (index, thread_index); + s = session_get_from_handle_if_valid (mp->handle); /* No session or not the right session */ if (!s || s->session_state < SESSION_STATE_TRANSPORT_CLOSING) @@ -632,6 +607,7 @@ session_mq_worker_update_handler (void *data) session_event_t *evt; session_t *s; application_t *app; + int rv; app = application_lookup (mp->client_index); if (!app) @@ -668,7 +644,9 @@ session_mq_worker_update_handler (void *data) return; } - app_worker_own_session (app_wrk, s); + rv = app_worker_own_session (app_wrk, s); + if (rv) + session_stat_error_inc (rv, 1); /* * Send reply @@ -695,7 +673,7 @@ session_mq_worker_update_handler (void *data) session_send_io_evt_to_thread (s->tx_fifo, SESSION_IO_EVT_TX); if (s->rx_fifo && !svm_fifo_is_empty (s->rx_fifo)) - app_worker_lock_and_send_event (app_wrk, s, SESSION_IO_EVT_RX); + app_worker_rx_notify (app_wrk, s); if (s->session_state >= SESSION_STATE_TRANSPORT_CLOSING) app_worker_close_notify (app_wrk, s); @@ -812,6 +790,9 @@ session_wrk_handle_evts_main_rpc (void *args) case SESSION_CTRL_EVT_ACCEPTED_REPLY: session_mq_accepted_reply_handler (fwrk, elt); break; + case SESSION_CTRL_EVT_CONNECT: + session_mq_connect_handler (fwrk, elt); + break; default: clib_warning ("unhandled %u", elt->evt.event_type); ALWAYS_ASSERT (0); @@ -820,8 +801,11 @@ session_wrk_handle_evts_main_rpc (void *args) /* Regrab element in case pool moved */ elt = clib_llist_elt (fwrk->event_elts, ei); - session_evt_ctrl_data_free (fwrk, elt); - clib_llist_put (fwrk->event_elts, elt); + if (!clib_llist_elt_is_linked (elt, evt_list)) + { + session_evt_ctrl_data_free (fwrk, elt); + clib_llist_put (fwrk->event_elts, elt); + } ei = next_ei; } @@ -1125,8 +1109,8 @@ session_tx_fill_buffer (session_worker_t *wrk, session_tx_context_t *ctx, if (transport_connection_is_cless (ctx->tc)) { - ip_copy (&ctx->tc->rmt_ip, &hdr->rmt_ip, ctx->tc->is_ip4); - ctx->tc->rmt_port = hdr->rmt_port; + clib_memcpy_fast (data0 - sizeof (session_dgram_hdr_t), hdr, + sizeof (*hdr)); } hdr->data_offset += n_bytes_read; if (hdr->data_offset == hdr->data_length) @@ -1188,6 +1172,11 @@ session_tx_not_ready (session_t * s, u8 peek_data) return 2; } } + else + { + if (s->session_state == SESSION_STATE_TRANSPORT_DELETED) + return 2; + } return 0; } @@ -1244,9 +1233,28 @@ session_tx_set_dequeue_params (vlib_main_t * vm, session_tx_context_t * ctx, svm_fifo_peek (ctx->s->tx_fifo, 0, sizeof (ctx->hdr), (u8 *) & ctx->hdr); + /* Zero length dgrams not supported */ + if (PREDICT_FALSE (ctx->hdr.data_length == 0)) + { + svm_fifo_dequeue_drop (ctx->s->tx_fifo, sizeof (ctx->hdr)); + ctx->max_len_to_snd = 0; + return; + } + /* We cannot be sure apps have not enqueued incomplete dgrams */ + if (PREDICT_FALSE (ctx->max_dequeue < + ctx->hdr.data_length + sizeof (ctx->hdr))) + { + ctx->max_len_to_snd = 0; + return; + } ASSERT (ctx->hdr.data_length > ctx->hdr.data_offset); len = ctx->hdr.data_length - ctx->hdr.data_offset; + if (ctx->hdr.gso_size) + { + ctx->sp.snd_mss = clib_min (ctx->sp.snd_mss, ctx->hdr.gso_size); + } + /* Process multiple dgrams if smaller than min (buf_space, mss). * This avoids handling multiple dgrams if they require buffer * chains */ @@ -1266,11 +1274,13 @@ session_tx_set_dequeue_params (vlib_main_t * vm, session_tx_context_t * ctx, { svm_fifo_peek (ctx->s->tx_fifo, offset, sizeof (ctx->hdr), (u8 *) & hdr); - ASSERT (hdr.data_length > hdr.data_offset); dgram_len = hdr.data_length - hdr.data_offset; - if (len + dgram_len > ctx->max_dequeue - || first_dgram_len != dgram_len) + if (offset + sizeof (hdr) + hdr.data_length > + ctx->max_dequeue || + first_dgram_len != dgram_len) break; + /* Assert here to allow test above with zero length dgrams */ + ASSERT (hdr.data_length > hdr.data_offset); len += dgram_len; offset += sizeof (hdr) + hdr.data_length; } @@ -1408,9 +1418,12 @@ session_tx_fifo_read_and_snd_i (session_worker_t * wrk, ctx->sp.max_burst_size = max_burst; n_custom_tx = ctx->transport_vft->custom_tx (ctx->tc, &ctx->sp); *n_tx_packets += n_custom_tx; - if (PREDICT_FALSE - (ctx->s->session_state >= SESSION_STATE_TRANSPORT_CLOSED)) - return SESSION_TX_OK; + if (PREDICT_FALSE (ctx->s->session_state >= + SESSION_STATE_TRANSPORT_CLOSED)) + { + svm_fifo_unset_event (ctx->s->tx_fifo); + return SESSION_TX_OK; + } max_burst -= n_custom_tx; if (!max_burst || (ctx->s->flags & SESSION_F_CUSTOM_TX)) { @@ -1552,7 +1565,7 @@ session_tx_fifo_read_and_snd_i (session_worker_t * wrk, *n_tx_packets += ctx->n_segs_per_evt; SESSION_EVT (SESSION_EVT_DEQ, ctx->s, ctx->max_len_to_snd, ctx->max_dequeue, - ctx->s->tx_fifo->has_event, wrk->last_vlib_time); + ctx->s->tx_fifo->shr->has_event, wrk->last_vlib_time); ASSERT (ctx->left_to_snd == 0); @@ -1597,9 +1610,12 @@ session_tx_fifo_dequeue_internal (session_worker_t * wrk, { transport_send_params_t *sp = &wrk->ctx.sp; session_t *s = wrk->ctx.s; + clib_llist_index_t ei; u32 n_packets; - if (PREDICT_FALSE (s->session_state >= SESSION_STATE_TRANSPORT_CLOSED)) + if (PREDICT_FALSE ((s->session_state >= SESSION_STATE_TRANSPORT_CLOSED) || + (s->session_state == SESSION_STATE_CONNECTING && + (s->flags & SESSION_F_HALF_OPEN)))) return 0; /* Clear custom-tx flag used to request reschedule for tx */ @@ -1610,9 +1626,14 @@ session_tx_fifo_dequeue_internal (session_worker_t * wrk, sp->max_burst_size = clib_min (SESSION_NODE_FRAME_SIZE - *n_tx_packets, TRANSPORT_PACER_MAX_BURST_PKTS); + /* Grab elt index since app transports can enqueue events on tx */ + ei = clib_llist_entry_index (wrk->event_elts, elt); + n_packets = transport_custom_tx (session_get_transport_proto (s), s, sp); *n_tx_packets += n_packets; + elt = clib_llist_elt (wrk->event_elts, ei); + if (s->flags & SESSION_F_CUSTOM_TX) { session_evt_add_old (wrk, elt); @@ -1767,7 +1788,7 @@ session_event_dispatch_io (session_worker_t * wrk, vlib_node_runtime_t * node, break; case SESSION_IO_EVT_RX: s = session_event_get_session (wrk, e); - if (!s) + if (!s || s->session_state >= SESSION_STATE_TRANSPORT_CLOSED) break; transport_app_rx_evt (session_get_transport_proto (s), s->connection_index, s->thread_index); @@ -1778,19 +1799,21 @@ session_event_dispatch_io (session_worker_t * wrk, vlib_node_runtime_t * node, break; svm_fifo_unset_event (s->rx_fifo); app_wrk = app_worker_get (s->app_wrk_index); - app_worker_builtin_rx (app_wrk, s); + app_worker_rx_notify (app_wrk, s); break; - case SESSION_IO_EVT_BUILTIN_TX: - s = session_get_from_handle_if_valid (e->session_handle); + case SESSION_IO_EVT_TX_MAIN: + s = session_get_if_valid (e->session_index, 0 /* main thread */); + if (PREDICT_FALSE (!s)) + break; wrk->ctx.s = s; if (PREDICT_TRUE (s != 0)) - session_tx_fifo_dequeue_internal (wrk, node, elt, n_tx_packets); + (smm->session_tx_fns[s->session_type]) (wrk, node, elt, n_tx_packets); break; default: clib_warning ("unhandled event type %d", e->event_type); } - SESSION_EVT (SESSION_IO_EVT_COUNTS, e->event_type, 1, wrk); + SESSION_EVT (SESSION_EVT_IO_EVT_COUNTS, e->event_type, 1, wrk); /* Regrab elements in case pool moved */ elt = clib_llist_elt (wrk->event_elts, ei); @@ -1798,14 +1821,12 @@ session_event_dispatch_io (session_worker_t * wrk, vlib_node_runtime_t * node, clib_llist_put (wrk->event_elts, elt); } -/* *INDENT-OFF* */ static const u32 session_evt_msg_sizes[] = { #define _(symc, sym) \ [SESSION_CTRL_EVT_ ## symc] = sizeof (session_ ## sym ##_msg_t), foreach_session_ctrl_evt #undef _ }; -/* *INDENT-ON* */ always_inline void session_update_time_subscribers (session_main_t *smm, clib_time_type_t now, @@ -1882,7 +1903,7 @@ session_wrk_update_state (session_worker_t *wrk) if (wrk->state == SESSION_WRK_POLLING) { - if (clib_llist_elts (wrk->event_elts) == 4 && + if (clib_llist_elts (wrk->event_elts) == 5 && vlib_last_vectors_per_main_loop (vm) < 1) { session_wrk_set_state (wrk, SESSION_WRK_INTERRUPT); @@ -1892,7 +1913,7 @@ session_wrk_update_state (session_worker_t *wrk) } else if (wrk->state == SESSION_WRK_INTERRUPT) { - if (clib_llist_elts (wrk->event_elts) > 4 || + if (clib_llist_elts (wrk->event_elts) > 5 || vlib_last_vectors_per_main_loop (vm) > 1) { session_wrk_set_state (wrk, SESSION_WRK_POLLING); @@ -1940,6 +1961,8 @@ session_queue_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, if (wrk->trans_head == ((wrk->trans_tail + 1) & (wrk->trans_size - 1))) return 0; wrk->batch = vlib_dma_batch_new (vm, wrk->config_index); + if (!wrk->batch) + return 0; } /* @@ -2041,7 +2064,6 @@ session_queue_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, return n_tx_packets; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (session_queue_node) = { .function = session_queue_node_fn, .flags = VLIB_NODE_FLAG_TRACE_SUPPORTED, @@ -2052,7 +2074,6 @@ VLIB_REGISTER_NODE (session_queue_node) = { .error_counters = session_error_counters, .state = VLIB_NODE_STATE_DISABLED, }; -/* *INDENT-ON* */ static clib_error_t * session_wrk_tfd_read_ready (clib_file_t *cf) @@ -2156,7 +2177,6 @@ session_queue_process (vlib_main_t * vm, vlib_node_runtime_t * rt, return 0; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (session_queue_process_node) = { .function = session_queue_process, @@ -2164,7 +2184,6 @@ VLIB_REGISTER_NODE (session_queue_process_node) = .name = "session-queue-process", .state = VLIB_NODE_STATE_DISABLED, }; -/* *INDENT-ON* */ static_always_inline uword session_queue_pre_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -2177,7 +2196,6 @@ session_queue_pre_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, return session_queue_node_fn (vm, node, frame); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (session_queue_pre_input_node) = { .function = session_queue_pre_input_inline, @@ -2185,7 +2203,6 @@ VLIB_REGISTER_NODE (session_queue_pre_input_node) = .name = "session-queue-main", .state = VLIB_NODE_STATE_DISABLED, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/session/session_rules_table.c b/src/vnet/session/session_rules_table.c index 5108c00d728..70a702cf55c 100644 --- a/src/vnet/session/session_rules_table.c +++ b/src/vnet/session/session_rules_table.c @@ -386,11 +386,11 @@ session_rules_table_lookup6 (session_rules_table_t * srt, * @param srt table where rule should be added * @param args rule arguments * - * @return 0 if success, clib_error_t error otherwise + * @return 0 if success, session_error_t error otherwise */ -int -session_rules_table_add_del (session_rules_table_t * srt, - session_rule_table_add_del_args_t * args) +session_error_t +session_rules_table_add_del (session_rules_table_t *srt, + session_rule_table_add_del_args_t *args) { u8 fib_proto = args->rmt.fp_proto, *rt; u32 ri_from_tag, ri; @@ -398,7 +398,7 @@ session_rules_table_add_del (session_rules_table_t * srt, ri_from_tag = session_rules_table_rule_for_tag (srt, args->tag); if (args->is_add && ri_from_tag != SESSION_RULES_TABLE_INVALID_INDEX) - return VNET_API_ERROR_INVALID_VALUE; + return SESSION_E_INVALID; if (fib_proto == FIB_PROTOCOL_IP4) { @@ -509,7 +509,7 @@ session_rules_table_add_del (session_rules_table_t * srt, } } else - return VNET_API_ERROR_INVALID_VALUE_2; + return SESSION_E_INVALID; return 0; } @@ -605,11 +605,9 @@ session_rules_table_cli_dump (vlib_main_t * vm, session_rules_table_t * srt, srt4 = &srt->session_rules_tables_16; vlib_cli_output (vm, "IP4 rules"); - /* *INDENT-OFF* */ pool_foreach (sr4, srt4->rules) { vlib_cli_output (vm, "%U", format_session_rule4, srt, sr4); } - /* *INDENT-ON* */ } else if (fib_proto == FIB_PROTOCOL_IP6) @@ -619,11 +617,9 @@ session_rules_table_cli_dump (vlib_main_t * vm, session_rules_table_t * srt, srt6 = &srt->session_rules_tables_40; vlib_cli_output (vm, "IP6 rules"); - /* *INDENT-OFF* */ pool_foreach (sr6, srt6->rules) { vlib_cli_output (vm, "%U", format_session_rule6, srt, sr6); } - /* *INDENT-ON* */ } } diff --git a/src/vnet/session/session_rules_table.h b/src/vnet/session/session_rules_table.h index 206ef2f380f..010d50a6398 100644 --- a/src/vnet/session/session_rules_table.h +++ b/src/vnet/session/session_rules_table.h @@ -18,11 +18,11 @@ #include <vnet/vnet.h> #include <vnet/fib/fib.h> +#include <vnet/session/session_types.h> #include <vnet/session/transport.h> #include <vnet/session/mma_16.h> #include <vnet/session/mma_40.h> -/* *INDENT-OFF* */ typedef CLIB_PACKED (struct { union @@ -52,7 +52,6 @@ typedef CLIB_PACKED (struct u64 as_u64[5]; }; }) session_mask_or_match_6_t; -/* *INDENT-ON* */ #define SESSION_RULE_TAG_MAX_LEN 64 #define SESSION_RULES_TABLE_INVALID_INDEX MMA_TABLE_INVALID_INDEX @@ -111,8 +110,9 @@ void session_rules_table_show_rule (vlib_main_t * vm, ip46_address_t * lcl_ip, u16 lcl_port, ip46_address_t * rmt_ip, u16 rmt_port, u8 is_ip4); -int session_rules_table_add_del (session_rules_table_t * srt, - session_rule_table_add_del_args_t * args); +session_error_t +session_rules_table_add_del (session_rules_table_t *srt, + session_rule_table_add_del_args_t *args); u8 *session_rules_table_rule_tag (session_rules_table_t * srt, u32 ri, u8 is_ip4); void session_rules_table_init (session_rules_table_t * srt); diff --git a/src/vnet/session/session_table.c b/src/vnet/session/session_table.c index 9af8ae6a584..dbbe771979c 100644 --- a/src/vnet/session/session_table.c +++ b/src/vnet/session/session_table.c @@ -185,7 +185,66 @@ ip4_session_table_walk (clib_bihash_16_8_t * hash, &ctx); } -/* *INDENT-ON* */ +u32 +session_table_memory_size (session_table_t *st) +{ + u64 total_size = 0; + + if (clib_bihash_is_initialised_16_8 (&st->v4_session_hash)) + { + clib_bihash_alloc_chunk_16_8_t *c = st->v4_session_hash.chunks; + while (c) + { + total_size += c->size; + c = c->next; + } + c = st->v4_half_open_hash.chunks; + while (c) + { + total_size += c->size; + c = c->next; + } + } + + if (clib_bihash_is_initialised_48_8 (&st->v6_session_hash)) + { + clib_bihash_alloc_chunk_48_8_t *c = st->v6_session_hash.chunks; + while (c) + { + total_size += c->size; + c = c->next; + } + c = st->v6_half_open_hash.chunks; + while (c) + { + total_size += c->size; + c = c->next; + } + } + + return total_size; +} + +u8 * +format_session_table (u8 *s, va_list *args) +{ + session_table_t *st = va_arg (*args, session_table_t *); + + if (clib_bihash_is_initialised_16_8 (&st->v4_session_hash)) + { + s = format (s, "%U", format_bihash_16_8, &st->v4_session_hash, 0); + s = format (s, "%U", format_bihash_16_8, &st->v4_half_open_hash, 0); + } + + if (clib_bihash_is_initialised_48_8 (&st->v6_session_hash)) + { + s = format (s, "%U", format_bihash_48_8, &st->v6_session_hash, 0); + s = format (s, "%U", format_bihash_48_8, &st->v6_half_open_hash, 0); + } + + return s; +} + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/vnet/session/session_table.h b/src/vnet/session/session_table.h index 2127ea45d01..636b8d77bee 100644 --- a/src/vnet/session/session_table.h +++ b/src/vnet/session/session_table.h @@ -69,6 +69,9 @@ u32 session_table_index (session_table_t * slt); void session_table_init (session_table_t * slt, u8 fib_proto); void session_table_free (session_table_t *slt, u8 fib_proto); +u32 session_table_memory_size (session_table_t *st); +u8 *format_session_table (u8 *s, va_list *args); + /* Internal, try not to use it! */ session_table_t *_get_session_tables (); @@ -76,7 +79,6 @@ session_table_t *_get_session_tables (); pool_foreach (VAR, _get_session_tables ()) BODY #endif /* SRC_VNET_SESSION_SESSION_TABLE_H_ */ -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON * diff --git a/src/vnet/session/session_test.c b/src/vnet/session/session_test.c index a8a9327b892..770e7263024 100644 --- a/src/vnet/session/session_test.c +++ b/src/vnet/session/session_test.c @@ -277,12 +277,6 @@ api_app_worker_add_del (vat_main_t *vat) } static int -api_application_tls_key_add (vat_main_t *vat) -{ - return -1; -} - -static int api_app_namespace_add_del (vat_main_t *vam) { vl_api_app_namespace_add_del_t *mp; @@ -330,8 +324,14 @@ api_app_namespace_add_del (vat_main_t *vam) return ret; } +static void +vl_api_app_namespace_add_del_v4_reply_t_handler ( + vl_api_app_namespace_add_del_v4_reply_t *mp) +{ +} + static int -api_application_tls_cert_add (vat_main_t *vat) +api_app_namespace_add_del_v4 (vat_main_t *vat) { return -1; } diff --git a/src/vnet/session/session_types.h b/src/vnet/session/session_types.h index 95a88c5ab6e..5e650727d61 100644 --- a/src/vnet/session/session_types.h +++ b/src/vnet/session/session_types.h @@ -25,6 +25,19 @@ #define SESSION_CTRL_MSG_TX_MAX_SIZE 160 #define SESSION_NODE_FRAME_SIZE 128 +typedef u8 session_type_t; +typedef u64 session_handle_t; + +typedef union session_handle_tu_ +{ + session_handle_t handle; + struct + { + u32 session_index; + u32 thread_index; + }; +} __attribute__ ((__transparent_union__)) session_handle_tu_t; + #define foreach_session_endpoint_fields \ foreach_transport_endpoint_cfg_fields \ _(u8, transport_proto) \ @@ -125,9 +138,6 @@ session_endpoint_is_zero (session_endpoint_t * sep) return ip_is_zero (&sep->ip, sep->is_ip4); } -typedef u8 session_type_t; -typedef u64 session_handle_t; - typedef enum { SESSION_CLEANUP_TRANSPORT, @@ -144,19 +154,19 @@ typedef enum session_ft_action_ /* * Session states */ -#define foreach_session_state \ - _(CREATED, "created") \ - _(LISTENING, "listening") \ - _(CONNECTING, "connecting") \ - _(ACCEPTING, "accepting") \ - _(READY, "ready") \ - _(OPENED, "opened") \ - _(TRANSPORT_CLOSING, "transport-closing") \ - _(CLOSING, "closing") \ - _(APP_CLOSED, "app-closed") \ - _(TRANSPORT_CLOSED, "transport-closed") \ - _(CLOSED, "closed") \ - _(TRANSPORT_DELETED, "transport-deleted") \ +#define foreach_session_state \ + _ (CREATED, "created") \ + _ (LISTENING, "listening") \ + _ (CONNECTING, "connecting") \ + _ (ACCEPTING, "accepting") \ + _ (READY, "ready") \ + _ (OPENED, "opened") \ + _ (TRANSPORT_CLOSING, "transport-closing") \ + _ (CLOSING, "closing") \ + _ (APP_CLOSED, "app-closed") \ + _ (TRANSPORT_CLOSED, "transport-closed") \ + _ (CLOSED, "closed") \ + _ (TRANSPORT_DELETED, "transport-deleted") typedef enum { @@ -164,7 +174,7 @@ typedef enum foreach_session_state #undef _ SESSION_N_STATES, -} session_state_t; +} __clib_packed session_state_t; #define foreach_session_flag \ _ (RX_EVT, "rx-event") \ @@ -173,7 +183,9 @@ typedef enum _ (IS_MIGRATING, "migrating") \ _ (UNIDIRECTIONAL, "unidirectional") \ _ (CUSTOM_FIFO_TUNING, "custom-fifo-tuning") \ - _ (HALF_OPEN, "half-open") + _ (HALF_OPEN, "half-open") \ + _ (APP_CLOSED, "app-closed") \ + _ (IS_CLESS, "connectionless") typedef enum session_flags_bits_ { @@ -196,38 +208,42 @@ typedef struct session_ svm_fifo_t *rx_fifo; svm_fifo_t *tx_fifo; + union + { + session_handle_t handle; + struct + { + /** Index in thread pool where session was allocated */ + u32 session_index; + + /** Index of the thread that allocated the session */ + u32 thread_index; + }; + }; + /** Type built from transport and network protocol types */ session_type_t session_type; /** State in session layer state machine. See @ref session_state_t */ - volatile u8 session_state; - - /** Index in thread pool where session was allocated */ - u32 session_index; + volatile session_state_t session_state; /** Index of the app worker that owns the session */ u32 app_wrk_index; - /** Index of the thread that allocated the session */ - u8 thread_index; - /** Session flags. See @ref session_flags_t */ - u32 flags; + session_flags_t flags; /** Index of the transport connection associated to the session */ u32 connection_index; - /** Index of application that owns the listener. Set only if a listener */ - u32 app_index; + /** App listener index in app's listener pool if a listener */ + u32 al_index; union { /** Parent listener session index if the result of an accept */ session_handle_t listener_handle; - /** App listener index in app's listener pool if a listener */ - u32 al_index; - /** Index in app worker's half-open table if a half-open */ u32 ho_index; }; @@ -300,45 +316,35 @@ session_tx_is_dgram (session_t * s) always_inline session_handle_t session_handle (session_t * s) { - return ((u64) s->thread_index << 32) | (u64) s->session_index; + return s->handle; } always_inline u32 -session_index_from_handle (session_handle_t handle) +session_index_from_handle (session_handle_tu_t handle) { - return handle & 0xFFFFFFFF; + return handle.session_index; } always_inline u32 -session_thread_from_handle (session_handle_t handle) +session_thread_from_handle (session_handle_tu_t handle) { - return handle >> 32; + return handle.thread_index; } always_inline void -session_parse_handle (session_handle_t handle, u32 * index, - u32 * thread_index) +session_parse_handle (session_handle_tu_t handle, u32 *index, + u32 *thread_index) { - *index = session_index_from_handle (handle); - *thread_index = session_thread_from_handle (handle); + *index = handle.session_index; + *thread_index = handle.thread_index; } static inline session_handle_t session_make_handle (u32 session_index, u32 data) { - return (((u64) data << 32) | (u64) session_index); -} - -always_inline u32 -session_handle_index (session_handle_t ho_handle) -{ - return (ho_handle & 0xffffffff); -} - -always_inline u32 -session_handle_data (session_handle_t ho_handle) -{ - return (ho_handle >> 32); + return ((session_handle_tu_t){ .session_index = session_index, + .thread_index = data }) + .handle; } typedef enum @@ -347,7 +353,7 @@ typedef enum SESSION_IO_EVT_TX, SESSION_IO_EVT_TX_FLUSH, SESSION_IO_EVT_BUILTIN_RX, - SESSION_IO_EVT_BUILTIN_TX, + SESSION_IO_EVT_TX_MAIN, SESSION_CTRL_EVT_RPC, SESSION_CTRL_EVT_HALF_CLOSE, SESSION_CTRL_EVT_CLOSE, @@ -378,6 +384,8 @@ typedef enum SESSION_CTRL_EVT_APP_WRK_RPC, SESSION_CTRL_EVT_TRANSPORT_ATTR, SESSION_CTRL_EVT_TRANSPORT_ATTR_REPLY, + SESSION_CTRL_EVT_TRANSPORT_CLOSED, + SESSION_CTRL_EVT_HALF_CLEANUP, } session_evt_type_t; #define foreach_session_ctrl_evt \ @@ -412,7 +420,6 @@ typedef enum #define FIFO_EVENT_APP_TX SESSION_IO_EVT_TX #define FIFO_EVENT_DISCONNECT SESSION_CTRL_EVT_CLOSE #define FIFO_EVENT_BUILTIN_RX SESSION_IO_EVT_BUILTIN_RX -#define FIFO_EVENT_BUILTIN_TX SESSION_IO_EVT_BUILTIN_TX typedef enum { @@ -437,6 +444,7 @@ typedef struct session_handle_t session_handle; session_rpc_args_t rpc_args; u32 ctrl_data_index; + u64 as_u64[2]; struct { u8 data[0]; @@ -461,12 +469,12 @@ typedef struct session_dgram_header_ u16 rmt_port; u16 lcl_port; u8 is_ip4; + u16 gso_size; } __clib_packed session_dgram_hdr_t; #define SESSION_CONN_ID_LEN 37 -#define SESSION_CONN_HDR_LEN 45 - -STATIC_ASSERT (sizeof (session_dgram_hdr_t) == (SESSION_CONN_ID_LEN + 8), +#define SESSION_CONN_HDR_LEN 47 +STATIC_ASSERT (sizeof (session_dgram_hdr_t) == (SESSION_CONN_ID_LEN + 10), "session conn id wrong length"); #define foreach_session_error \ @@ -484,9 +492,11 @@ STATIC_ASSERT (sizeof (session_dgram_hdr_t) == (SESSION_CONN_ID_LEN + 8), _ (NOLISTEN, "not listening") \ _ (NOSESSION, "session does not exist") \ _ (NOAPP, "app not attached") \ + _ (APP_ATTACHED, "app already attached") \ _ (PORTINUSE, "lcl port in use") \ _ (IPINUSE, "ip in use") \ _ (ALREADY_LISTENING, "ip port pair already listened on") \ + _ (ADDR_NOT_IN_USE, "address not in use") \ _ (INVALID, "invalid value") \ _ (INVALID_RMT_IP, "invalid remote ip") \ _ (INVALID_APPWRK, "invalid app worker") \ @@ -506,6 +516,8 @@ STATIC_ASSERT (sizeof (session_dgram_hdr_t) == (SESSION_CONN_ID_LEN + 8), _ (NOCRYPTOENG, "no crypto engine") \ _ (NOCRYPTOCKP, "cert key pair not found ") \ _ (LOCAL_CONNECT, "could not connect with local scope") \ + _ (WRONG_NS_SECRET, "wrong ns secret") \ + _ (SYSCALL, "system call error") \ _ (TRANSPORT_NO_REG, "transport was not registered") typedef enum session_error_p_ diff --git a/src/vnet/session/transport.c b/src/vnet/session/transport.c index 192a201612a..1c2a9261d3c 100644 --- a/src/vnet/session/transport.c +++ b/src/vnet/session/transport.c @@ -17,36 +17,31 @@ #include <vnet/session/session.h> #include <vnet/fib/fib.h> -typedef struct local_endpoint_ -{ - transport_endpoint_t ep; - int refcnt; -} local_endpoint_t; - /** * Per-type vector of transport protocol virtual function tables */ transport_proto_vft_t *tp_vfts; -/* - * Port allocator seed - */ -static u32 port_allocator_seed; +typedef struct local_endpoint_ +{ + transport_endpoint_t ep; + transport_proto_t proto; + int refcnt; +} local_endpoint_t; -/* - * Local endpoints table - */ -static transport_endpoint_table_t local_endpoints_table; +typedef struct transport_main_ +{ + transport_endpoint_table_t local_endpoints_table; + local_endpoint_t *local_endpoints; + u32 *lcl_endpts_freelist; + u32 port_allocator_seed; + u16 port_allocator_min_src_port; + u16 port_allocator_max_src_port; + u8 lcl_endpts_cleanup_pending; + clib_spinlock_t local_endpoints_lock; +} transport_main_t; -/* - * Pool of local endpoints - */ -static local_endpoint_t *local_endpoints; - -/* - * Local endpoints pool lock - */ -static clib_spinlock_t local_endpoints_lock; +static transport_main_t tp_main; u8 * format_transport_proto (u8 * s, va_list * args) @@ -76,6 +71,35 @@ format_transport_proto_short (u8 * s, va_list * args) return s; } +const char *transport_flags_str[] = { +#define _(sym, str) str, + foreach_transport_connection_flag +#undef _ +}; + +u8 * +format_transport_flags (u8 *s, va_list *args) +{ + transport_connection_flags_t flags; + int i, last = -1; + + flags = va_arg (*args, transport_connection_flags_t); + + for (i = 0; i < TRANSPORT_CONNECTION_N_FLAGS; i++) + if (flags & (1 << i)) + last = i; + + for (i = 0; i < last; i++) + { + if (flags & (1 << i)) + s = format (s, "%s, ", transport_flags_str[i]); + } + if (last >= 0) + s = format (s, "%s", transport_flags_str[last]); + + return s; +} + u8 * format_transport_connection (u8 * s, va_list * args) { @@ -100,8 +124,8 @@ format_transport_connection (u8 * s, va_list * args) if (transport_connection_is_tx_paced (tc)) s = format (s, "%Upacer: %U\n", format_white_space, indent, format_transport_pacer, &tc->pacer, tc->thread_index); - s = format (s, "%Utransport: flags 0x%x\n", format_white_space, indent, - tc->flags); + s = format (s, "%Utransport: flags: %U\n", format_white_space, indent, + format_transport_flags, tc->flags); } return s; } @@ -124,14 +148,13 @@ u8 * format_transport_half_open_connection (u8 * s, va_list * args) { u32 transport_proto = va_arg (*args, u32); - u32 ho_index = va_arg (*args, u32); transport_proto_vft_t *tp_vft; tp_vft = transport_protocol_get_vft (transport_proto); if (!tp_vft) return s; - s = format (s, "%U", tp_vft->format_half_open, ho_index); + s = (tp_vft->format_half_open) (s, args); return s; } @@ -426,52 +449,115 @@ transport_connection_attribute (transport_proto_t tp, u32 conn_index, void transport_endpoint_free (u32 tepi) { - pool_put_index (local_endpoints, tepi); + transport_main_t *tm = &tp_main; + pool_put_index (tm->local_endpoints, tepi); } always_inline local_endpoint_t * transport_endpoint_alloc (void) { + transport_main_t *tm = &tp_main; local_endpoint_t *lep; ASSERT (vlib_get_thread_index () <= transport_cl_thread ()); - pool_get_aligned_safe (local_endpoints, lep, 0); + + pool_get_aligned_safe (tm->local_endpoints, lep, 0); return lep; } +static void +transport_cleanup_freelist (void) +{ + transport_main_t *tm = &tp_main; + local_endpoint_t *lep; + u32 *lep_indexp; + + clib_spinlock_lock (&tm->local_endpoints_lock); + + vec_foreach (lep_indexp, tm->lcl_endpts_freelist) + { + lep = pool_elt_at_index (tm->local_endpoints, *lep_indexp); + + /* Port re-shared after attempt to cleanup */ + if (lep->refcnt > 0) + continue; + + transport_endpoint_table_del (&tm->local_endpoints_table, lep->proto, + &lep->ep); + transport_endpoint_free (*lep_indexp); + } + + vec_reset_length (tm->lcl_endpts_freelist); + + tm->lcl_endpts_cleanup_pending = 0; + + clib_spinlock_unlock (&tm->local_endpoints_lock); +} + void -transport_endpoint_cleanup (u8 proto, ip46_address_t * lcl_ip, u16 port) +transport_program_endpoint_cleanup (u32 lepi) +{ + transport_main_t *tm = &tp_main; + u8 flush_fl = 0; + + /* All workers can free connections. Synchronize access to freelist */ + clib_spinlock_lock (&tm->local_endpoints_lock); + + vec_add1 (tm->lcl_endpts_freelist, lepi); + + /* Avoid accumulating lots of endpoints for cleanup */ + if (!tm->lcl_endpts_cleanup_pending && + vec_len (tm->lcl_endpts_freelist) > 32) + { + tm->lcl_endpts_cleanup_pending = 1; + flush_fl = 1; + } + + clib_spinlock_unlock (&tm->local_endpoints_lock); + + if (flush_fl) + session_send_rpc_evt_to_thread_force (transport_cl_thread (), + transport_cleanup_freelist, 0); +} + +int +transport_release_local_endpoint (u8 proto, ip46_address_t *lcl_ip, u16 port) { + transport_main_t *tm = &tp_main; local_endpoint_t *lep; u32 lepi; - /* Cleanup local endpoint if this was an active connect */ - lepi = transport_endpoint_lookup (&local_endpoints_table, proto, lcl_ip, - clib_net_to_host_u16 (port)); + lepi = transport_endpoint_lookup (&tm->local_endpoints_table, proto, lcl_ip, + port); if (lepi == ENDPOINT_INVALID_INDEX) - return; + return -1; + + /* First worker may be cleaning up ports so avoid touching free bitmap */ + lep = &tm->local_endpoints[lepi]; + ASSERT (lep->refcnt >= 1); - lep = pool_elt_at_index (local_endpoints, lepi); + /* Local endpoint no longer in use, program cleanup */ if (!clib_atomic_sub_fetch (&lep->refcnt, 1)) { - transport_endpoint_table_del (&local_endpoints_table, proto, &lep->ep); - - /* All workers can free connections. Synchronize access to pool */ - clib_spinlock_lock (&local_endpoints_lock); - transport_endpoint_free (lepi); - clib_spinlock_unlock (&local_endpoints_lock); + transport_program_endpoint_cleanup (lepi); + return 0; } + + /* Not an error, just in idication that endpoint was not cleaned up */ + return -1; } static int transport_endpoint_mark_used (u8 proto, ip46_address_t *ip, u16 port) { + transport_main_t *tm = &tp_main; local_endpoint_t *lep; u32 tei; ASSERT (vlib_get_thread_index () <= transport_cl_thread ()); - tei = transport_endpoint_lookup (&local_endpoints_table, proto, ip, port); + tei = + transport_endpoint_lookup (&tm->local_endpoints_table, proto, ip, port); if (tei != ENDPOINT_INVALID_INDEX) return SESSION_E_PORTINUSE; @@ -479,10 +565,11 @@ transport_endpoint_mark_used (u8 proto, ip46_address_t *ip, u16 port) lep = transport_endpoint_alloc (); clib_memcpy_fast (&lep->ep.ip, ip, sizeof (*ip)); lep->ep.port = port; + lep->proto = proto; lep->refcnt = 1; - transport_endpoint_table_add (&local_endpoints_table, proto, &lep->ep, - lep - local_endpoints); + transport_endpoint_table_add (&tm->local_endpoints_table, proto, &lep->ep, + lep - tm->local_endpoints); return 0; } @@ -490,14 +577,18 @@ transport_endpoint_mark_used (u8 proto, ip46_address_t *ip, u16 port) void transport_share_local_endpoint (u8 proto, ip46_address_t * lcl_ip, u16 port) { + transport_main_t *tm = &tp_main; local_endpoint_t *lep; u32 lepi; - lepi = transport_endpoint_lookup (&local_endpoints_table, proto, lcl_ip, - clib_net_to_host_u16 (port)); + /* Active opens should call this only from a control thread, which are also + * used to allocate and free ports. So, pool has only one writer and + * potentially many readers. Listeners are allocated with barrier */ + lepi = transport_endpoint_lookup (&tm->local_endpoints_table, proto, lcl_ip, + port); if (lepi != ENDPOINT_INVALID_INDEX) { - lep = pool_elt_at_index (local_endpoints, lepi); + lep = pool_elt_at_index (tm->local_endpoints, lepi); clib_atomic_add_fetch (&lep->refcnt, 1); } } @@ -505,11 +596,16 @@ transport_share_local_endpoint (u8 proto, ip46_address_t * lcl_ip, u16 port) /** * Allocate local port and add if successful add entry to local endpoint * table to mark the pair as used. + * + * @return port in net order or -1 if port cannot be allocated */ int -transport_alloc_local_port (u8 proto, ip46_address_t * ip) +transport_alloc_local_port (u8 proto, ip46_address_t *lcl_addr, + transport_endpoint_cfg_t *rmt) { - u16 min = 1024, max = 65535; /* XXX configurable ? */ + transport_main_t *tm = &tp_main; + u16 min = tm->port_allocator_min_src_port; + u16 max = tm->port_allocator_max_src_port; int tries, limit; limit = max - min; @@ -525,13 +621,26 @@ transport_alloc_local_port (u8 proto, ip46_address_t * ip) /* Find a port in the specified range */ while (1) { - port = random_u32 (&port_allocator_seed) & PORT_MASK; + port = random_u32 (&tm->port_allocator_seed) & PORT_MASK; if (PREDICT_TRUE (port >= min && port < max)) - break; + { + port = clib_host_to_net_u16 (port); + break; + } } - if (!transport_endpoint_mark_used (proto, ip, port)) + if (!transport_endpoint_mark_used (proto, lcl_addr, port)) return port; + + /* IP:port pair already in use, check if 6-tuple available */ + if (session_lookup_connection (rmt->fib_index, lcl_addr, &rmt->ip, port, + rmt->port, proto, rmt->is_ip4)) + continue; + + /* 6-tuple is available so increment lcl endpoint refcount */ + transport_share_local_endpoint (proto, lcl_addr, port); + + return port; } return -1; } @@ -594,6 +703,7 @@ transport_alloc_local_endpoint (u8 proto, transport_endpoint_cfg_t * rmt_cfg, ip46_address_t * lcl_addr, u16 * lcl_port) { transport_endpoint_t *rmt = (transport_endpoint_t *) rmt_cfg; + transport_main_t *tm = &tp_main; session_error_t error; int port; @@ -614,22 +724,37 @@ transport_alloc_local_endpoint (u8 proto, transport_endpoint_cfg_t * rmt_cfg, sizeof (rmt_cfg->peer.ip)); } + /* Cleanup freelist if need be */ + if (vec_len (tm->lcl_endpts_freelist)) + transport_cleanup_freelist (); + /* * Allocate source port */ if (rmt_cfg->peer.port == 0) { - port = transport_alloc_local_port (proto, lcl_addr); + port = transport_alloc_local_port (proto, lcl_addr, rmt_cfg); if (port < 1) return SESSION_E_NOPORT; *lcl_port = port; } else { - port = clib_net_to_host_u16 (rmt_cfg->peer.port); - *lcl_port = port; + *lcl_port = rmt_cfg->peer.port; + + if (!transport_endpoint_mark_used (proto, lcl_addr, rmt_cfg->peer.port)) + return 0; + + /* IP:port pair already in use, check if 6-tuple available */ + if (session_lookup_connection (rmt->fib_index, lcl_addr, &rmt->ip, + rmt_cfg->peer.port, rmt->port, proto, + rmt->is_ip4)) + return SESSION_E_PORTINUSE; + + /* 6-tuple is available so increment lcl endpoint refcount */ + transport_share_local_endpoint (proto, lcl_addr, rmt_cfg->peer.port); - return transport_endpoint_mark_used (proto, lcl_addr, port); + return 0; } return 0; @@ -846,6 +971,7 @@ transport_init (void) { vlib_thread_main_t *vtm = vlib_get_thread_main (); session_main_t *smm = vnet_get_session_main (); + transport_main_t *tm = &tp_main; u32 num_threads; if (smm->local_endpoints_table_buckets == 0) @@ -854,12 +980,14 @@ transport_init (void) smm->local_endpoints_table_memory = 512 << 20; /* Initialize [port-allocator] random number seed */ - port_allocator_seed = (u32) clib_cpu_time_now (); + tm->port_allocator_seed = (u32) clib_cpu_time_now (); + tm->port_allocator_min_src_port = smm->port_allocator_min_src_port; + tm->port_allocator_max_src_port = smm->port_allocator_max_src_port; - clib_bihash_init_24_8 (&local_endpoints_table, "local endpoints table", + clib_bihash_init_24_8 (&tm->local_endpoints_table, "local endpoints table", smm->local_endpoints_table_buckets, smm->local_endpoints_table_memory); - clib_spinlock_init (&local_endpoints_lock); + clib_spinlock_init (&tm->local_endpoints_lock); num_threads = 1 /* main thread */ + vtm->n_threads; if (num_threads > 1) diff --git a/src/vnet/session/transport.h b/src/vnet/session/transport.h index 633bb1ecfd0..e6ba1ecbc5f 100644 --- a/src/vnet/session/transport.h +++ b/src/vnet/session/transport.h @@ -66,7 +66,6 @@ typedef struct transport_send_params_ /* * Transport protocol virtual function table */ -/* *INDENT-OFF* */ typedef struct _transport_proto_vft { /* @@ -125,7 +124,6 @@ typedef struct _transport_proto_vft */ transport_options_t transport_options; } transport_proto_vft_t; -/* *INDENT-ON* */ extern transport_proto_vft_t *tp_vfts; @@ -246,13 +244,14 @@ transport_register_new_protocol (const transport_proto_vft_t * vft, transport_proto_vft_t *transport_protocol_get_vft (transport_proto_t tp); void transport_update_time (clib_time_type_t time_now, u8 thread_index); -int transport_alloc_local_port (u8 proto, ip46_address_t * ip); -int transport_alloc_local_endpoint (u8 proto, transport_endpoint_cfg_t * rmt, - ip46_address_t * lcl_addr, - u16 * lcl_port); +int transport_alloc_local_port (u8 proto, ip46_address_t *ip, + transport_endpoint_cfg_t *rmt); +int transport_alloc_local_endpoint (u8 proto, transport_endpoint_cfg_t *rmt, + ip46_address_t *lcl_addr, u16 *lcl_port); void transport_share_local_endpoint (u8 proto, ip46_address_t * lcl_ip, u16 port); -void transport_endpoint_cleanup (u8 proto, ip46_address_t * lcl_ip, u16 port); +int transport_release_local_endpoint (u8 proto, ip46_address_t *lcl_ip, + u16 port); void transport_enable_disable (vlib_main_t * vm, u8 is_en); void transport_init (void); diff --git a/src/vnet/session/transport_types.h b/src/vnet/session/transport_types.h index adf5e59e6c0..b3469fa9fdb 100644 --- a/src/vnet/session/transport_types.h +++ b/src/vnet/session/transport_types.h @@ -40,24 +40,35 @@ typedef enum transport_service_type_ TRANSPORT_N_SERVICES } transport_service_type_t; +/* + * IS_TX_PACED : Connection sending is paced + * NO_LOOKUP: Don't register connection in lookup. Does not apply to local + * apps and transports using the network layer (udp/tcp) + * DESCHED: Connection descheduled by the session layer + * CLESS: Connection is "connection less". Some important implications of that + * are that connections are not pinned to workers and listeners will + * have fifos associated to them + */ +#define foreach_transport_connection_flag \ + _ (IS_TX_PACED, "tx_paced") \ + _ (NO_LOOKUP, "no_lookup") \ + _ (DESCHED, "descheduled") \ + _ (CLESS, "connectionless") + +typedef enum transport_connection_flags_bits_ +{ +#define _(sym, str) TRANSPORT_CONNECTION_F_BIT_##sym, + foreach_transport_connection_flag +#undef _ + TRANSPORT_CONNECTION_N_FLAGS +} transport_connection_flags_bits_t; + typedef enum transport_connection_flags_ { - TRANSPORT_CONNECTION_F_IS_TX_PACED = 1 << 0, - /** - * Don't register connection in lookup. Does not apply to local apps - * and transports using the network layer (udp/tcp) - */ - TRANSPORT_CONNECTION_F_NO_LOOKUP = 1 << 1, - /** - * Connection descheduled by the session layer. - */ - TRANSPORT_CONNECTION_F_DESCHED = 1 << 2, - /** - * Connection is "connection less". Some important implications of that - * are that connections are not pinned to workers and listeners will - * have fifos associated to them - */ - TRANSPORT_CONNECTION_F_CLESS = 1 << 3, +#define _(sym, str) \ + TRANSPORT_CONNECTION_F_##sym = 1 << TRANSPORT_CONNECTION_F_BIT_##sym, + foreach_transport_connection_flag +#undef _ } transport_connection_flags_t; typedef struct _spacer @@ -113,7 +124,7 @@ typedef struct _transport_connection #if TRANSPORT_DEBUG elog_track_t elog_track; /**< Event logging */ - u32 cc_stat_tstamp; /**< CC stats timestamp */ + f64 cc_stat_tstamp; /**< CC stats timestamp */ #endif /** @@ -176,6 +187,7 @@ typedef enum _transport_proto u8 *format_transport_proto (u8 * s, va_list * args); u8 *format_transport_proto_short (u8 * s, va_list * args); +u8 *format_transport_flags (u8 *s, va_list *args); u8 *format_transport_connection (u8 * s, va_list * args); u8 *format_transport_listen_connection (u8 * s, va_list * args); u8 *format_transport_half_open_connection (u8 * s, va_list * args); diff --git a/src/vnet/snap/node.c b/src/vnet/snap/node.c index 2a42907321c..ad88b2b3a90 100644 --- a/src/vnet/snap/node.c +++ b/src/vnet/snap/node.c @@ -261,7 +261,6 @@ static char *snap_error_strings[] = { #undef _ }; -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (snap_input_node) = { .function = snap_input, .name = "snap-input", @@ -282,7 +281,6 @@ VLIB_REGISTER_NODE (snap_input_node) = { .format_trace = format_snap_input_trace, .unformat_buffer = unformat_snap_header, }; -/* *INDENT-ON* */ static void snap_setup_node (vlib_main_t *vm, u32 node_index) diff --git a/src/vnet/snap/snap.h b/src/vnet/snap/snap.h index f6b3be1847f..028df4ede66 100644 --- a/src/vnet/snap/snap.h +++ b/src/vnet/snap/snap.h @@ -75,7 +75,6 @@ typedef enum typedef union { - /* *INDENT-OFF* */ CLIB_PACKED (struct { /* OUI: organization unique identifier. */ u8 oui[3]; @@ -83,7 +82,6 @@ typedef union /* Per-OUI protocol. */ u16 protocol; }); - /* *INDENT-ON* */ u8 as_u8[5]; } snap_header_t; diff --git a/src/vnet/span/node.c b/src/vnet/span/node.c index ca5ea68ae90..56977b58dc2 100644 --- a/src/vnet/span/node.c +++ b/src/vnet/span/node.c @@ -84,7 +84,6 @@ span_mirror (vlib_main_t * vm, vlib_node_runtime_t * node, u32 sw_if_index0, if (PREDICT_FALSE (b0->flags & VNET_BUFFER_F_SPAN_CLONE)) return; - /* *INDENT-OFF* */ clib_bitmap_foreach (i, sm0->mirror_ports) { if (mirror_frames[i] == 0) @@ -122,7 +121,6 @@ span_mirror (vlib_main_t * vm, vlib_node_runtime_t * node, u32 sw_if_index0, } } } - /* *INDENT-ON* */ } static_always_inline uword @@ -304,7 +302,6 @@ VLIB_NODE_FN (span_l2_output_node) (vlib_main_t * vm, [0] = "error-drop" \ } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (span_input_node) = { span_node_defs, .name = "span-input", @@ -349,7 +346,6 @@ clib_error_t *span_init (vlib_main_t * vm) } VLIB_INIT_FUNCTION (span_init); -/* *INDENT-ON* */ #endif /* CLIB_MARCH_VARIANT */ #undef span_node_defs diff --git a/src/vnet/span/span.c b/src/vnet/span/span.c index ec47920504a..bf5e20f4d14 100644 --- a/src/vnet/span/span.c +++ b/src/vnet/span/span.c @@ -87,6 +87,9 @@ span_add_delete_entry (vlib_main_t * vm, if (enable_rx || disable_rx) vnet_feature_enable_disable ("device-input", "span-input", src_sw_if_index, rx, 0, 0); + if (enable_rx || disable_rx) + vnet_feature_enable_disable ("port-rx-eth", "span-input", + src_sw_if_index, rx, 0, 0); if (enable_tx || disable_tx) vnet_feature_enable_disable ("interface-output", "span-output", src_sw_if_index, tx, 0, 0); @@ -163,13 +166,11 @@ set_interface_span_command_fn (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (set_interface_span_command, static) = { .path = "set interface span", .short_help = "set interface span <if-name> [l2] {disable | destination <if-name> [both|rx|tx]}", .function = set_interface_span_command_fn, }; -/* *INDENT-ON* */ static clib_error_t * show_interfaces_span_command_fn (vlib_main_t * vm, @@ -188,7 +189,6 @@ show_interfaces_span_command_fn (vlib_main_t * vm, }; u8 *s = 0; - /* *INDENT-OFF* */ vec_foreach (si, sm->interfaces) { span_mirror_t * drxm = &si->mirror_rxtx[SPAN_FEAT_DEVICE][VLIB_RX]; @@ -229,18 +229,15 @@ show_interfaces_span_command_fn (vlib_main_t * vm, clib_bitmap_free (d); } } - /* *INDENT-ON* */ vec_free (s); return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_interfaces_span_command, static) = { .path = "show interface span", .short_help = "Shows SPAN mirror table", .function = show_interfaces_span_command_fn, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/span/span_api.c b/src/vnet/span/span_api.c index 300f619934e..f5b24bdf214 100644 --- a/src/vnet/span/span_api.c +++ b/src/vnet/span/span_api.c @@ -61,7 +61,6 @@ vl_api_sw_interface_span_dump_t_handler (vl_api_sw_interface_span_dump_t * mp) return; span_feat_t sf = mp->is_l2 ? SPAN_FEAT_L2 : SPAN_FEAT_DEVICE; - /* *INDENT-OFF* */ vec_foreach (si, sm->interfaces) { span_mirror_t * rxm = &si->mirror_rxtx[sf][VLIB_RX]; @@ -90,7 +89,6 @@ vl_api_sw_interface_span_dump_t_handler (vl_api_sw_interface_span_dump_t * mp) clib_bitmap_free (b); } } - /* *INDENT-ON* */ } #include <vnet/span/span.api.c> diff --git a/src/vnet/srmpls/sr_mpls_api.c b/src/vnet/srmpls/sr_mpls_api.c index 45107f08ab1..920856acff6 100644 --- a/src/vnet/srmpls/sr_mpls_api.c +++ b/src/vnet/srmpls/sr_mpls_api.c @@ -29,7 +29,6 @@ #include <vnet/srmpls/sr_mpls.api_enum.h> #include <vnet/srmpls/sr_mpls.api_types.h> -#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) #define vl_api_version(n, v) static u32 api_version = v; #include <vnet/srmpls/sr_mpls.api.h> @@ -194,12 +193,18 @@ sr_mpls_api_hookup (vlib_main_t * vm) vec_free (name); #define _(N, n) \ - vl_msg_api_set_handlers ( \ - REPLY_MSG_ID_BASE + VL_API_##N, #n, vl_api_##n##_t_handler, \ - vl_noop_handler, vl_api_##n##_t_endian, vl_api_##n##_t_print, \ - sizeof (vl_api_##n##_t), 1, vl_api_##n##_t_print_json, \ - vl_api_##n##_t_tojson, vl_api_##n##_t_fromjson, \ - vl_api_##n##_t_calc_size); + vl_msg_api_config (&(vl_msg_api_msg_config_t){ \ + .id = REPLY_MSG_ID_BASE + VL_API_##N, \ + .name = #n, \ + .handler = vl_api_##n##_t_handler, \ + .endian = vl_api_##n##_t_endian, \ + .format_fn = vl_api_##n##_t_format, \ + .size = sizeof (vl_api_##n##_t), \ + .traced = 1, \ + .tojson = vl_api_##n##_t_tojson, \ + .fromjson = vl_api_##n##_t_fromjson, \ + .calc_size = vl_api_##n##_t_calc_size, \ + }); foreach_vpe_api_msg; #undef _ @@ -207,25 +212,34 @@ sr_mpls_api_hookup (vlib_main_t * vm) * Manually register the sr policy add msg, so we trace enough bytes * to capture a typical segment list */ - vl_msg_api_set_handlers ( - REPLY_MSG_ID_BASE + VL_API_SR_MPLS_POLICY_ADD, "sr_mpls_policy_add", - vl_api_sr_mpls_policy_add_t_handler, vl_noop_handler, - vl_api_sr_mpls_policy_add_t_endian, vl_api_sr_mpls_policy_add_t_print, 256, - 1, vl_api_sr_mpls_policy_add_t_print_json, - vl_api_sr_mpls_policy_add_t_tojson, vl_api_sr_mpls_policy_add_t_fromjson, - vl_api_sr_mpls_policy_add_t_calc_size); - + vl_msg_api_config (&(vl_msg_api_msg_config_t){ + .id = REPLY_MSG_ID_BASE + VL_API_SR_MPLS_POLICY_ADD, + .name = "sr_mpls_policy_add", + .handler = vl_api_sr_mpls_policy_add_t_handler, + .endian = vl_api_sr_mpls_policy_add_t_endian, + .format_fn = vl_api_sr_mpls_policy_add_t_format, + .size = 256, + .traced = 1, + .tojson = vl_api_sr_mpls_policy_add_t_tojson, + .fromjson = vl_api_sr_mpls_policy_add_t_fromjson, + .calc_size = vl_api_sr_mpls_policy_add_t_calc_size, + }); /* * Manually register the sr policy mod msg, so we trace enough bytes * to capture a typical segment list */ - vl_msg_api_set_handlers ( - REPLY_MSG_ID_BASE + VL_API_SR_MPLS_POLICY_MOD, "sr_mpls_policy_mod", - vl_api_sr_mpls_policy_mod_t_handler, vl_noop_handler, - vl_api_sr_mpls_policy_mod_t_endian, vl_api_sr_mpls_policy_mod_t_print, 256, - 1, vl_api_sr_mpls_policy_mod_t_print_json, - vl_api_sr_mpls_policy_mod_t_tojson, vl_api_sr_mpls_policy_mod_t_fromjson, - vl_api_sr_mpls_policy_mod_t_calc_size); + vl_msg_api_config (&(vl_msg_api_msg_config_t){ + .id = REPLY_MSG_ID_BASE + VL_API_SR_MPLS_POLICY_MOD, + .name = "sr_mpls_policy_mod", + .handler = vl_api_sr_mpls_policy_mod_t_handler, + .endian = vl_api_sr_mpls_policy_mod_t_endian, + .format_fn = vl_api_sr_mpls_policy_mod_t_format, + .size = 256, + .traced = 1, + .tojson = vl_api_sr_mpls_policy_mod_t_tojson, + .fromjson = vl_api_sr_mpls_policy_mod_t_fromjson, + .calc_size = vl_api_sr_mpls_policy_mod_t_calc_size, + }); /* * Set up the (msg_name, crc, message-id) table diff --git a/src/vnet/srmpls/sr_mpls_policy.c b/src/vnet/srmpls/sr_mpls_policy.c index 8f0804850f1..41cb71601e9 100644 --- a/src/vnet/srmpls/sr_mpls_policy.c +++ b/src/vnet/srmpls/sr_mpls_policy.c @@ -108,7 +108,6 @@ create_sl (mpls_sr_policy_t * sr_policy, mpls_label_t * sl, u32 weight) fib_route_path_t *paths = NULL; vec_add1 (paths, path); - /* *INDENT-OFF* */ fib_prefix_t pfx = { .fp_len = 21, .fp_proto = FIB_PROTOCOL_MPLS, @@ -116,7 +115,6 @@ create_sl (mpls_sr_policy_t * sr_policy, mpls_label_t * sl, u32 weight) .fp_eos = eos, .fp_payload_proto = DPO_PROTO_MPLS, }; - /* *INDENT-ON* */ fib_table_entry_path_add2 (0, &pfx, @@ -245,7 +243,6 @@ sr_mpls_policy_del (mpls_label_t bsid) /* remove each of the MPLS routes */ FOR_EACH_MPLS_EOS_BIT (eos) { - /* *INDENT-OFF* */ fib_prefix_t pfx = { .fp_len = 21, .fp_proto = FIB_PROTOCOL_MPLS, @@ -253,7 +250,6 @@ sr_mpls_policy_del (mpls_label_t bsid) .fp_eos = eos, .fp_payload_proto = DPO_PROTO_MPLS, }; - /* *INDENT-ON* */ fib_table_entry_path_remove2 (0, &pfx, FIB_SOURCE_SR, paths); } @@ -359,7 +355,6 @@ sr_mpls_policy_mod (mpls_label_t bsid, u8 operation, FOR_EACH_MPLS_EOS_BIT (eos) { - /* *INDENT-OFF* */ fib_prefix_t pfx = { .fp_len = 21, .fp_proto = FIB_PROTOCOL_MPLS, @@ -367,7 +362,6 @@ sr_mpls_policy_mod (mpls_label_t bsid, u8 operation, .fp_eos = eos, .fp_payload_proto = DPO_PROTO_MPLS, }; - /* *INDENT-ON* */ fib_table_entry_path_remove2 (0, &pfx, FIB_SOURCE_SR, paths); } @@ -411,7 +405,6 @@ sr_mpls_policy_mod (mpls_label_t bsid, u8 operation, FOR_EACH_MPLS_EOS_BIT (eos) { - /* *INDENT-OFF* */ fib_prefix_t pfx = { .fp_len = 21, .fp_proto = FIB_PROTOCOL_MPLS, @@ -419,7 +412,6 @@ sr_mpls_policy_mod (mpls_label_t bsid, u8 operation, .fp_eos = eos, .fp_payload_proto = DPO_PROTO_MPLS, }; - /* *INDENT-ON* */ fib_table_entry_path_remove2 (0, &pfx, FIB_SOURCE_SR, paths); } @@ -434,7 +426,6 @@ sr_mpls_policy_mod (mpls_label_t bsid, u8 operation, FOR_EACH_MPLS_EOS_BIT (eos) { - /* *INDENT-OFF* */ fib_prefix_t pfx = { .fp_len = 21, .fp_proto = FIB_PROTOCOL_MPLS, @@ -442,7 +433,6 @@ sr_mpls_policy_mod (mpls_label_t bsid, u8 operation, .fp_eos = eos, .fp_payload_proto = DPO_PROTO_MPLS, }; - /* *INDENT-ON* */ fib_table_entry_path_add2 (0, &pfx, @@ -568,7 +558,6 @@ sr_mpls_policy_command_fn (vlib_main_t * vm, unformat_input_t * input, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND(sr_mpls_policy_command, static)= { .path = "sr mpls policy", @@ -577,7 +566,6 @@ VLIB_CLI_COMMAND(sr_mpls_policy_command, static)= .long_help = "TBD.\n", .function = sr_mpls_policy_command_fn, }; -/* *INDENT-ON* */ /** * @brief CLI to display onscreen all the SR MPLS policies @@ -597,11 +585,9 @@ show_sr_mpls_policies_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_output (vm, "SR MPLS policies:"); - /* *INDENT-OFF* */ pool_foreach (sr_policy, sm->sr_policies) { vec_add1(vec_policies, sr_policy); } - /* *INDENT-ON* */ vec_foreach_index (i, vec_policies) { @@ -647,14 +633,12 @@ show_sr_mpls_policies_command_fn (vlib_main_t * vm, unformat_input_t * input, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND(show_sr_mpls_policies_command, static)= { .path = "show sr mpls policies", .short_help = "show sr mpls policies", .function = show_sr_mpls_policies_command_fn, }; -/* *INDENT-ON* */ /** * @brief Update the Endpoint,Color tuple of an SR policy @@ -888,14 +872,12 @@ cli_sr_mpls_policy_ec_command_fn (vlib_main_t * vm, unformat_input_t * input, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND(cli_sr_mpls_policy_ec_command, static)= { .path = "sr mpls policy te", .short_help = "sr mpls policy te bsid xxxxx endpoint x.x.x.x color 12341234", .function = cli_sr_mpls_policy_ec_command_fn, }; -/* *INDENT-ON* */ /********************* SR MPLS Policy initialization ***********************/ /** diff --git a/src/vnet/srmpls/sr_mpls_steering.c b/src/vnet/srmpls/sr_mpls_steering.c index b12e78d2755..e8920df542b 100644 --- a/src/vnet/srmpls/sr_mpls_steering.c +++ b/src/vnet/srmpls/sr_mpls_steering.c @@ -770,7 +770,6 @@ sr_mpls_steer_policy_command_fn (vlib_main_t * vm, unformat_input_t * input, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND(sr_mpls_steer_policy_command, static)= { .path = "sr mpls steer", @@ -785,7 +784,6 @@ VLIB_CLI_COMMAND(sr_mpls_steer_policy_command, static)= "\t\tsr steer l3 2001::/64 via next-hop 2001::1 color 1234 co 2 vpn-label 500\n", .function = sr_mpls_steer_policy_command_fn, }; -/* *INDENT-ON* */ static clib_error_t * show_sr_mpls_steering_policies_command_fn (vlib_main_t * vm, @@ -799,11 +797,9 @@ show_sr_mpls_steering_policies_command_fn (vlib_main_t * vm, int i; vlib_cli_output (vm, "SR MPLS steering policies:"); - /* *INDENT-OFF* */ pool_foreach (steer_pl, sm->steer_policies) { vec_add1(steer_policies, steer_pl); } - /* *INDENT-ON* */ for (i = 0; i < vec_len (steer_policies); i++) { vlib_cli_output (vm, "=========================="); @@ -871,14 +867,12 @@ show_sr_mpls_steering_policies_command_fn (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND(show_sr_mpls_steering_policies_command, static)= { .path = "show sr mpls steering policies", .short_help = "show sr mpls steering policies", .function = show_sr_mpls_steering_policies_command_fn, }; -/* *INDENT-ON* */ clib_error_t * sr_mpls_steering_init (vlib_main_t * vm) @@ -894,9 +888,7 @@ sr_mpls_steering_init (vlib_main_t * vm) return 0; } -/* *INDENT-OFF* */ VLIB_INIT_FUNCTION(sr_mpls_steering_init); -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/srv6/sr.api b/src/vnet/srv6/sr.api index 6190a8c7ff5..4766ce3ba11 100644 --- a/src/vnet/srv6/sr.api +++ b/src/vnet/srv6/sr.api @@ -14,7 +14,7 @@ * limitations under the License. */ -option version = "2.0.0"; +option version = "2.1.0"; import "vnet/interface_types.api"; import "vnet/ip/ip_types.api"; @@ -109,6 +109,65 @@ autoreply define sr_policy_mod vl_api_srv6_sid_list_t sids; }; +enum sr_policy_type : u8 +{ + SR_API_POLICY_TYPE_DEFAULT = 0, + SR_API_POLICY_TYPE_SPRAY = 1, + SR_API_POLICY_TYPE_TEF = 2, +}; + +/** \brief IPv6 SR policy add + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param bsid is the bindingSID of the SR Policy + @param weight is the weight of the sid list. optional. + @param is_encap is the behavior of the SR policy. (0.SRH insert // 1.Encapsulation) + @param type is the SR policy param. (0.Default // 1.Spray // 2.Tef) + @param fib_table is the VRF where to install the FIB entry for the BSID + @param sids is a srv6_sid_list object + @param encap_src is a encaps IPv6 source addr. optional. +*/ +autoreply define sr_policy_add_v2 +{ + u32 client_index; + u32 context; + vl_api_ip6_address_t bsid_addr; + u32 weight; + bool is_encap; + vl_api_sr_policy_type_t type [default=0x0]; + u32 fib_table; + vl_api_srv6_sid_list_t sids; + vl_api_ip6_address_t encap_src; + option status="in_progress"; +}; + +/** \brief IPv6 SR policy modification + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param bsid is the bindingSID of the SR Policy + @param sr_policy_index is the index of the SR policy + @param fib_table is the VRF where to install the FIB entry for the BSID + @param operation is the operation to perform (among the top ones) + @param sl_index is the index of the Segment List to modify/delete + @param weight is the weight of the sid list. optional. + @param sids is a srv6_sid_list object + @param encap_src is a encaps IPv6 source addr. optional. +*/ +autoreply define sr_policy_mod_v2 +{ + u32 client_index; + u32 context; + vl_api_ip6_address_t bsid_addr; + u32 sr_policy_index; + u32 fib_table; + vl_api_sr_policy_op_t operation; + u32 sl_index; + u32 weight; + vl_api_srv6_sid_list_t sids; + vl_api_ip6_address_t encap_src; + option status="in_progress"; +}; + /** \brief IPv6 SR policy deletion @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -195,12 +254,45 @@ define sr_localsids_details u32 xconnect_iface_or_vrf_table; }; + +/** \brief Dump the list of SR LocalSIDs along with packet statistics + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define sr_localsids_with_packet_stats_dump +{ + u32 client_index; + u32 context; + option status="in_progress"; +}; + +define sr_localsids_with_packet_stats_details +{ + u32 context; + vl_api_ip6_address_t addr; + bool end_psp; + vl_api_sr_behavior_t behavior; + u32 fib_table; + u32 vlan_index; + vl_api_address_t xconnect_nh_addr; + u32 xconnect_iface_or_vrf_table; + u64 good_traffic_bytes; + u64 good_traffic_pkt_count; + u64 bad_traffic_bytes; + u64 bad_traffic_pkt_count; + option status="in_progress"; +}; + + + /** \brief Dump the list of SR policies @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request */ define sr_policies_dump { + option deprecated; + u32 client_index; u32 context; }; @@ -217,6 +309,28 @@ define sr_policies_details vl_api_srv6_sid_list_t sid_lists[num_sid_lists]; }; +/** \brief Dump the list of SR policies v2 + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define sr_policies_v2_dump +{ + u32 client_index; + u32 context; +}; + +define sr_policies_v2_details +{ + u32 context; + vl_api_ip6_address_t bsid; + vl_api_ip6_address_t encap_src; + vl_api_sr_policy_type_t type; + bool is_encap; + u32 fib_table; + u8 num_sid_lists; + vl_api_srv6_sid_list_t sid_lists[num_sid_lists]; +}; + /** \brief Dump the list of SR policies along with actual segment list index on VPP @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request diff --git a/src/vnet/srv6/sr.h b/src/vnet/srv6/sr.h index 02cceade682..c2867eb7508 100644 --- a/src/vnet/srv6/sr.h +++ b/src/vnet/srv6/sr.h @@ -56,13 +56,11 @@ #define SR_SEGMENT_LIST_WEIGHT_DEFAULT 1 -/* *INDENT-OFF* */ typedef struct { ip6_header_t ip; ip6_sr_header_t sr; } __attribute__ ((packed)) ip6srv_combo_header_t; -/* *INDENT-ON* */ /** * @brief SR Segment List (SID list) @@ -112,6 +110,8 @@ typedef struct u8 is_encap; /**< Mode (0 is SRH insert, 1 Encaps) */ + ip6_address_t encap_src; + u16 plugin; void *plugin_mem; } ip6_sr_policy_t; @@ -345,11 +345,12 @@ sr_policy_register_function (vlib_main_t * vm, u8 * fn_name, sr_p_plugin_callback_t * removal_fn); extern int sr_policy_add (ip6_address_t *bsid, ip6_address_t *segments, - u32 weight, u8 type, u32 fib_table, u8 is_encap, - u16 plugin, void *plugin_mem); -extern int sr_policy_mod (ip6_address_t * bsid, u32 index, u32 fib_table, - u8 operation, ip6_address_t * segments, - u32 sl_index, u32 weight); + ip6_address_t *encap_src, u32 weight, u8 type, + u32 fib_table, u8 is_encap, u16 plugin, + void *plugin_mem); +extern int sr_policy_mod (ip6_address_t *bsid, u32 index, u32 fib_table, + u8 operation, ip6_address_t *segments, + ip6_address_t *encap_src, u32 sl_index, u32 weight); extern int sr_policy_del (ip6_address_t * bsid, u32 index); extern int diff --git a/src/vnet/srv6/sr_api.c b/src/vnet/srv6/sr_api.c index 71924981841..a44c3098112 100644 --- a/src/vnet/srv6/sr_api.c +++ b/src/vnet/srv6/sr_api.c @@ -82,17 +82,16 @@ vl_api_sr_policy_add_t_handler (vl_api_sr_policy_add_t * mp) ip6_address_decode (mp->bsid_addr, &bsid_addr); -/* - * sr_policy_add (ip6_address_t *bsid, ip6_address_t *segments, - * u32 weight, u8 behavior, u32 fib_table, u8 is_encap, - * u16 behavior, void *plugin_mem) - */ + /* + * sr_policy_add (ip6_address_t *bsid, ip6_address_t *segments, + * ip6_address_t *encap_src, + * u32 weight, u8 behavior, u32 fib_table, u8 is_encap, + * u16 behavior, void *plugin_mem) + */ int rv = 0; - rv = sr_policy_add (&bsid_addr, - segments, - ntohl (mp->sids.weight), - mp->is_spray, ntohl (mp->fib_table), mp->is_encap, 0, - NULL); + rv = + sr_policy_add (&bsid_addr, segments, NULL, ntohl (mp->sids.weight), + mp->is_spray, ntohl (mp->fib_table), mp->is_encap, 0, NULL); vec_free (segments); REPLY_MACRO (VL_API_SR_POLICY_ADD_REPLY); @@ -115,18 +114,93 @@ vl_api_sr_policy_mod_t_handler (vl_api_sr_policy_mod_t * mp) ip6_address_decode (mp->bsid_addr, &bsid_addr); int rv = 0; -/* - * int - * sr_policy_mod(ip6_address_t *bsid, u32 index, u32 fib_table, - * u8 operation, ip6_address_t *segments, u32 sl_index, - * u32 weight, u8 is_encap) - */ - rv = sr_policy_mod (&bsid_addr, - ntohl (mp->sr_policy_index), - ntohl (mp->fib_table), - mp->operation, - segments, ntohl (mp->sl_index), - ntohl (mp->sids.weight)); + /* + * int + * sr_policy_mod(ip6_address_t *bsid, u32 index, u32 fib_table, + * u8 operation, ip6_address_t *segments, + * ip6_address_t *encap_src, u32 sl_index, + * u32 weight, u8 is_encap) + */ + rv = sr_policy_mod (&bsid_addr, ntohl (mp->sr_policy_index), + ntohl (mp->fib_table), mp->operation, segments, NULL, + ntohl (mp->sl_index), ntohl (mp->sids.weight)); + vec_free (segments); + + REPLY_MACRO (VL_API_SR_POLICY_MOD_REPLY); +} + +static void +vl_api_sr_policy_add_v2_t_handler (vl_api_sr_policy_add_v2_t *mp) +{ + vl_api_sr_policy_add_v2_reply_t *rmp; + ip6_address_t *segments = 0, *seg; + ip6_address_t bsid_addr; + ip6_address_t encap_src; + + int i; + for (i = 0; i < mp->sids.num_sids; i++) + { + vec_add2 (segments, seg, 1); + ip6_address_decode (mp->sids.sids[i], seg); + } + + ip6_address_decode (mp->bsid_addr, &bsid_addr); + ip6_address_decode (mp->encap_src, &encap_src); + + if (ip6_address_is_zero (&encap_src)) + { + encap_src = *sr_get_encaps_source (); + } + /* + * sr_policy_add (ip6_address_t *bsid, ip6_address_t *segments, + * ip6_address_t *encap_src, + * u32 weight, u8 behavior, u32 fib_table, u8 is_encap, + * u16 behavior, void *plugin_mem) + */ + int rv = 0; + rv = + sr_policy_add (&bsid_addr, segments, &encap_src, ntohl (mp->sids.weight), + mp->type, ntohl (mp->fib_table), mp->is_encap, 0, NULL); + vec_free (segments); + + REPLY_MACRO (VL_API_SR_POLICY_ADD_V2_REPLY); +} + +static void +vl_api_sr_policy_mod_v2_t_handler (vl_api_sr_policy_mod_v2_t *mp) +{ + vl_api_sr_policy_mod_v2_reply_t *rmp; + ip6_address_t *segments = 0, *seg; + ip6_address_t bsid_addr; + ip6_address_t encap_src; + + int i; + for (i = 0; i < mp->sids.num_sids; i++) + { + vec_add2 (segments, seg, 1); + ip6_address_decode (mp->sids.sids[i], seg); + } + + ip6_address_decode (mp->bsid_addr, &bsid_addr); + ip6_address_decode (mp->encap_src, &encap_src); + + if (ip6_address_is_zero (&encap_src)) + { + encap_src = *sr_get_encaps_source (); + } + + int rv = 0; + /* + * int + * sr_policy_mod(ip6_address_t *bsid, u32 index, u32 fib_table, + * u8 operation, ip6_address_t *segments, + * ip6_address_t *encap_src, u32 sl_index, + * u32 weight, u8 is_encap) + */ + rv = + sr_policy_mod (&bsid_addr, ntohl (mp->sr_policy_index), + ntohl (mp->fib_table), mp->operation, segments, &encap_src, + ntohl (mp->sl_index), ntohl (mp->sids.weight)); vec_free (segments); REPLY_MACRO (VL_API_SR_POLICY_MOD_REPLY); @@ -247,12 +321,77 @@ static void vl_api_sr_localsids_dump_t_handler if (!reg) return; - /* *INDENT-OFF* */ pool_foreach (t, sm->localsids) { send_sr_localsid_details(t, reg, mp->context); } - /* *INDENT-ON* */ +} + +static void +send_sr_localsid_with_packet_stats_details (int local_sid_index, + ip6_sr_localsid_t *t, + vl_api_registration_t *reg, + u32 context) +{ + vl_api_sr_localsids_with_packet_stats_details_t *rmp; + vlib_counter_t good_traffic, bad_traffic; + ip6_sr_main_t *sm = &sr_main; + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + clib_memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = + ntohs (REPLY_MSG_ID_BASE + VL_API_SR_LOCALSIDS_WITH_PACKET_STATS_DETAILS); + ip6_address_encode (&t->localsid, rmp->addr); + rmp->end_psp = t->end_psp; + rmp->behavior = t->behavior; + rmp->fib_table = htonl (t->fib_table); + rmp->vlan_index = htonl (t->vlan_index); + ip_address_encode (&t->next_hop, IP46_TYPE_ANY, &rmp->xconnect_nh_addr); + + if (t->behavior == SR_BEHAVIOR_T || t->behavior == SR_BEHAVIOR_DT6) + rmp->xconnect_iface_or_vrf_table = + htonl (fib_table_get_table_id (t->sw_if_index, FIB_PROTOCOL_IP6)); + else if (t->behavior == SR_BEHAVIOR_DT4) + rmp->xconnect_iface_or_vrf_table = + htonl (fib_table_get_table_id (t->sw_if_index, FIB_PROTOCOL_IP4)); + else + rmp->xconnect_iface_or_vrf_table = htonl (t->sw_if_index); + + rmp->context = context; + vlib_get_combined_counter (&(sm->sr_ls_valid_counters), local_sid_index, + &good_traffic); + vlib_get_combined_counter (&(sm->sr_ls_invalid_counters), local_sid_index, + &bad_traffic); + rmp->good_traffic_bytes = clib_host_to_net_u64 (good_traffic.bytes); + rmp->good_traffic_pkt_count = clib_host_to_net_u64 (good_traffic.packets); + rmp->bad_traffic_bytes = clib_host_to_net_u64 (bad_traffic.bytes); + rmp->bad_traffic_pkt_count = clib_host_to_net_u64 (bad_traffic.packets); + vl_api_send_msg (reg, (u8 *) rmp); +} + +static void +vl_api_sr_localsids_with_packet_stats_dump_t_handler ( + vl_api_sr_localsids_with_packet_stats_dump_t *mp) +{ + vl_api_registration_t *reg; + ip6_sr_main_t *sm = &sr_main; + ip6_sr_localsid_t **localsid_list = 0; + ip6_sr_localsid_t *t; + int i; + + reg = vl_api_client_index_to_registration (mp->client_index); + if (!reg) + return; + + pool_foreach (t, sm->localsids) + { + vec_add1 (localsid_list, t); + } + for (i = 0; i < vec_len (localsid_list); i++) + { + t = localsid_list[i]; + send_sr_localsid_with_packet_stats_details (i, t, reg, mp->context); + } } static void send_sr_policies_details @@ -312,15 +451,74 @@ vl_api_sr_policies_dump_t_handler (vl_api_sr_policies_dump_t * mp) if (!reg) return; - /* *INDENT-OFF* */ pool_foreach (t, sm->sr_policies) { send_sr_policies_details(t, reg, mp->context); } - /* *INDENT-ON* */ } +static void +send_sr_policies_v2_details (ip6_sr_policy_t *t, vl_api_registration_t *reg, + u32 context) +{ + vl_api_sr_policies_v2_details_t *rmp; + ip6_sr_main_t *sm = &sr_main; + u32 *sl_index, slidx = 0; + ip6_sr_sl_t *segment_list = 0; + ip6_address_t *segment; + vl_api_srv6_sid_list_t *api_sid_list; + + rmp = vl_msg_api_alloc (sizeof (*rmp) + vec_len (t->segments_lists) * + sizeof (vl_api_srv6_sid_list_t)); + clib_memset (rmp, 0, + (sizeof (*rmp) + vec_len (t->segments_lists) * + sizeof (vl_api_srv6_sid_list_t))); + + rmp->_vl_msg_id = ntohs (REPLY_MSG_ID_BASE + VL_API_SR_POLICIES_V2_DETAILS); + ip6_address_encode (&t->bsid, rmp->bsid); + ip6_address_encode (&t->encap_src, rmp->encap_src); + rmp->is_encap = t->is_encap; + rmp->type = t->type; + rmp->fib_table = htonl (t->fib_table); + rmp->num_sid_lists = vec_len (t->segments_lists); + + /* Fill in all the segments lists */ + vec_foreach (sl_index, t->segments_lists) + { + segment_list = pool_elt_at_index (sm->sid_lists, *sl_index); + + api_sid_list = &rmp->sid_lists[sl_index - t->segments_lists]; + + api_sid_list->num_sids = vec_len (segment_list->segments); + api_sid_list->weight = htonl (segment_list->weight); + slidx = 0; + vec_foreach (segment, segment_list->segments) + { + ip6_address_encode (segment, api_sid_list->sids[slidx++]); + } + } + + rmp->context = context; + vl_api_send_msg (reg, (u8 *) rmp); +} + +static void +vl_api_sr_policies_v2_dump_t_handler (vl_api_sr_policies_v2_dump_t *mp) +{ + vl_api_registration_t *reg; + ip6_sr_main_t *sm = &sr_main; + ip6_sr_policy_t *t; + + reg = vl_api_client_index_to_registration (mp->client_index); + if (!reg) + return; + + pool_foreach (t, sm->sr_policies) + { + send_sr_policies_v2_details (t, reg, mp->context); + } +} static void send_sr_policies_details_with_sl_index (ip6_sr_policy_t * t, vl_api_registration_t * reg, u32 context) @@ -381,12 +579,10 @@ static void if (!reg) return; - /* *INDENT-OFF* */ pool_foreach (t, sm->sr_policies) { send_sr_policies_details_with_sl_index(t, reg, mp->context); } - /* *INDENT-ON* */ } static void send_sr_steering_pol_details @@ -428,12 +624,10 @@ static void vl_api_sr_steering_pol_dump_t_handler if (!reg) return; - /* *INDENT-OFF* */ pool_foreach (t, sm->steer_policies) { send_sr_steering_pol_details(t, reg, mp->context); } - /* *INDENT-ON* */ } #include <vnet/srv6/sr.api.c> diff --git a/src/vnet/srv6/sr_localsid.c b/src/vnet/srv6/sr_localsid.c index a055c923be9..12349bb95e8 100644 --- a/src/vnet/srv6/sr_localsid.c +++ b/src/vnet/srv6/sr_localsid.c @@ -396,12 +396,10 @@ sr_cli_localsid_command_fn (vlib_main_t * vm, unformat_input_t * input, sr_localsid_fn_registration_t **plugin_it = 0; /* Create a vector out of the plugin pool as recommended */ - /* *INDENT-OFF* */ pool_foreach (plugin, sm->plugin_functions) { vec_add1 (vec_plugins, plugin); } - /* *INDENT-ON* */ vec_foreach (plugin_it, vec_plugins) { @@ -506,7 +504,6 @@ sr_cli_localsid_command_fn (vlib_main_t * vm, unformat_input_t * input, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (sr_localsid_command, static) = { .path = "sr localsid", .short_help = "sr localsid (del) address XX:XX::YY:YY" @@ -534,7 +531,6 @@ VLIB_CLI_COMMAND (sr_localsid_command, static) = { "\t\tParameters: '<ip4_fib_table>'\n", .function = sr_cli_localsid_command_fn, }; -/* *INDENT-ON* */ /** * @brief CLI function to 'show' all SR LocalSIDs on console. @@ -551,9 +547,7 @@ show_sr_localsid_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_output (vm, "SRv6 - My LocalSID Table:"); vlib_cli_output (vm, "========================="); - /* *INDENT-OFF* */ pool_foreach (ls, sm->localsids) { vec_add1 (localsid_list, ls); } - /* *INDENT-ON* */ for (i = 0; i < vec_len (localsid_list); i++) { ls = localsid_list[i]; @@ -676,13 +670,11 @@ show_sr_localsid_command_fn (vlib_main_t * vm, unformat_input_t * input, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_sr_localsid_command, static) = { .path = "show sr localsids", .short_help = "show sr localsids", .function = show_sr_localsid_command_fn, }; -/* *INDENT-ON* */ /** * @brief Function to 'clear' ALL SR localsid counters @@ -700,13 +692,11 @@ clear_sr_localsid_counters_command_fn (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (clear_sr_localsid_counters_command, static) = { .path = "clear sr localsid-counters", .short_help = "clear sr localsid-counters", .function = clear_sr_localsid_counters_command_fn, }; -/* *INDENT-ON* */ /************************ SR LocalSID graphs node ****************************/ /** @@ -1438,7 +1428,6 @@ sr_localsid_d_fn (vlib_main_t * vm, vlib_node_runtime_t * node, return from_frame->n_vectors; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (sr_localsid_d_node) = { .function = sr_localsid_d_fn, .name = "sr-localsid-d", @@ -1454,7 +1443,6 @@ VLIB_REGISTER_NODE (sr_localsid_d_node) = { #undef _ }, }; -/* *INDENT-ON* */ /** * @brief SR LocalSID graph node. Supports all default SR Endpoint without decaps @@ -1748,7 +1736,6 @@ sr_localsid_fn (vlib_main_t * vm, vlib_node_runtime_t * node, return from_frame->n_vectors; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (sr_localsid_node) = { .function = sr_localsid_fn, .name = "sr-localsid", @@ -1764,7 +1751,6 @@ VLIB_REGISTER_NODE (sr_localsid_node) = { #undef _ }, }; -/* *INDENT-ON* */ /** * @brief SR LocalSID uN graph node. Supports all default SR Endpoint without decaps @@ -2058,7 +2044,6 @@ sr_localsid_un_fn (vlib_main_t * vm, vlib_node_runtime_t * node, return from_frame->n_vectors; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (sr_localsid_un_node) = { .function = sr_localsid_un_fn, .name = "sr-localsid-un", @@ -2074,7 +2059,6 @@ VLIB_REGISTER_NODE (sr_localsid_un_node) = { #undef _ }, }; -/* *INDENT-ON* */ static uword sr_localsid_un_perf_fn (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -2270,7 +2254,6 @@ sr_localsid_un_perf_fn (vlib_main_t * vm, vlib_node_runtime_t * node, return from_frame->n_vectors; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (sr_localsid_un_perf_node) = { .function = sr_localsid_un_perf_fn, .name = "sr-localsid-un-perf", @@ -2286,7 +2269,6 @@ VLIB_REGISTER_NODE (sr_localsid_un_perf_node) = { #undef _ }, }; -/* *INDENT-ON* */ static u8 * format_sr_dpo (u8 * s, va_list * args) @@ -2406,10 +2388,8 @@ show_sr_localsid_behaviors_command_fn (vlib_main_t * vm, vlib_cli_output (vm, "SR LocalSIDs behaviors:\n-----------------------\n\n"); - /* *INDENT-OFF* */ pool_foreach (plugin, sm->plugin_functions) { vec_add1 (plugins_vec, plugin); } - /* *INDENT-ON* */ /* Print static behaviors */ vlib_cli_output (vm, "Default behaviors:\n" @@ -2439,13 +2419,11 @@ show_sr_localsid_behaviors_command_fn (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_sr_localsid_behaviors_command, static) = { .path = "show sr localsids behaviors", .short_help = "show sr localsids behaviors", .function = show_sr_localsid_behaviors_command_fn, }; -/* *INDENT-ON* */ /** * @brief SR LocalSID initialization diff --git a/src/vnet/srv6/sr_policy_rewrite.c b/src/vnet/srv6/sr_policy_rewrite.c index 12e7f084c8c..0aa88cc273e 100644 --- a/src/vnet/srv6/sr_policy_rewrite.c +++ b/src/vnet/srv6/sr_policy_rewrite.c @@ -49,6 +49,7 @@ #include <vnet/dpo/replicate_dpo.h> #include <vnet/srv6/sr_pt.h> +#include <vppinfra/byte_order.h> #include <vppinfra/error.h> #include <vppinfra/elog.h> @@ -141,13 +142,11 @@ set_sr_src_command_fn (vlib_main_t * vm, unformat_input_t * input, return clib_error_return (0, "No address specified"); } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (set_sr_src_command, static) = { .path = "set sr encaps source", .short_help = "set sr encaps source addr <ip6_addr>", .function = set_sr_src_command_fn, }; -/* *INDENT-ON* */ /******************** SR rewrite set encaps IPv6 hop-limit ********************/ @@ -179,24 +178,23 @@ set_sr_hop_limit_command_fn (vlib_main_t * vm, unformat_input_t * input, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (set_sr_hop_limit_command, static) = { .path = "set sr encaps hop-limit", .short_help = "set sr encaps hop-limit <value>", .function = set_sr_hop_limit_command_fn, }; -/* *INDENT-ON* */ /*********************** SR rewrite string computation ************************/ /** * @brief SR rewrite string computation for IPv6 encapsulation (inline) * * @param sl is a vector of IPv6 addresses composing the Segment List + * @param src_v6addr is a encaps IPv6 source addr * * @return precomputed rewrite string for encapsulation */ static inline u8 * -compute_rewrite_encaps (ip6_address_t *sl, u8 type) +compute_rewrite_encaps (ip6_address_t *sl, ip6_address_t *src_v6addr, u8 type) { ip6_header_t *iph; ip6_sr_header_t *srh; @@ -224,8 +222,8 @@ compute_rewrite_encaps (ip6_address_t *sl, u8 type) iph = (ip6_header_t *) rs; iph->ip_version_traffic_class_and_flow_label = clib_host_to_net_u32 (0 | ((6 & 0xF) << 28)); - iph->src_address.as_u64[0] = sr_pr_encaps_src.as_u64[0]; - iph->src_address.as_u64[1] = sr_pr_encaps_src.as_u64[1]; + iph->src_address.as_u64[0] = src_v6addr->as_u64[0]; + iph->src_address.as_u64[1] = src_v6addr->as_u64[1]; iph->payload_length = header_length - IPv6_DEFAULT_HEADER_LENGTH; iph->protocol = IP_PROTOCOL_IPV6; iph->hop_limit = sr_pr_encaps_hop_limit; @@ -369,18 +367,20 @@ compute_rewrite_bsid (ip6_address_t * sl) * * @param sr_policy is the SR policy where the SL will be added * @param sl is a vector of IPv6 addresses composing the Segment List + * @param encap_src is a encaps IPv6 source addr. optional. * @param weight is the weight of the SegmentList (for load-balancing purposes) * @param is_encap represents the mode (SRH insertion vs Encapsulation) * * @return pointer to the just created segment list */ static inline ip6_sr_sl_t * -create_sl (ip6_sr_policy_t * sr_policy, ip6_address_t * sl, u32 weight, - u8 is_encap) +create_sl (ip6_sr_policy_t *sr_policy, ip6_address_t *sl, + ip6_address_t *encap_src, u32 weight, u8 is_encap) { ip6_sr_main_t *sm = &sr_main; ip6_sr_sl_t *segment_list; sr_policy_fn_registration_t *plugin = 0; + ip6_address_t encap_srcv6 = sr_pr_encaps_src; pool_get (sm->sid_lists, segment_list); clib_memset (segment_list, 0, sizeof (*segment_list)); @@ -399,8 +399,14 @@ create_sl (ip6_sr_policy_t * sr_policy, ip6_address_t * sl, u32 weight, if (is_encap) { - segment_list->rewrite = compute_rewrite_encaps (sl, sr_policy->type); + if (encap_src) + { + clib_memcpy_fast (&encap_srcv6, encap_src, sizeof (ip6_address_t)); + } + segment_list->rewrite = + compute_rewrite_encaps (sl, &encap_srcv6, sr_policy->type); segment_list->rewrite_bsid = segment_list->rewrite; + sr_policy->encap_src = encap_srcv6; } else { @@ -659,17 +665,19 @@ update_replicate (ip6_sr_policy_t * sr_policy) * * @param bsid is the bindingSID of the SR Policy * @param segments is a vector of IPv6 address composing the segment list + * @param encap_src is a encaps IPv6 source addr. optional. * @param weight is the weight of the sid list. optional. * @param behavior is the behavior of the SR policy. (default//spray) * @param fib_table is the VRF where to install the FIB entry for the BSID - * @param is_encap (bool) whether SR policy should behave as Encap/SRH Insertion + * @param is_encap (bool) whether SR policy should behave as Encap/SRH + * Insertion * * @return 0 if correct, else error */ int -sr_policy_add (ip6_address_t *bsid, ip6_address_t *segments, u32 weight, - u8 type, u32 fib_table, u8 is_encap, u16 plugin, - void *ls_plugin_mem) +sr_policy_add (ip6_address_t *bsid, ip6_address_t *segments, + ip6_address_t *encap_src, u32 weight, u8 type, u32 fib_table, + u8 is_encap, u16 plugin, void *ls_plugin_mem) { ip6_sr_main_t *sm = &sr_main; ip6_sr_policy_t *sr_policy = 0; @@ -725,7 +733,7 @@ sr_policy_add (ip6_address_t *bsid, ip6_address_t *segments, u32 weight, NULL); /* Create a segment list and add the index to the SR policy */ - create_sl (sr_policy, segments, weight, is_encap); + create_sl (sr_policy, segments, encap_src, weight, is_encap); /* If FIB doesnt exist, create them */ if (sm->fib_table_ip6 == (u32) ~ 0) @@ -855,6 +863,7 @@ sr_policy_del (ip6_address_t * bsid, u32 index) * @param fib_table is the VRF where to install the FIB entry for the BSID * @param operation is the operation to perform (among the top ones) * @param segments is a vector of IPv6 address composing the segment list + * @param encap_src is a encaps IPv6 source addr. optional. * @param sl_index is the index of the Segment List to modify/delete * @param weight is the weight of the sid list. optional. * @param is_encap Mode. Encapsulation or SRH insertion. @@ -862,8 +871,8 @@ sr_policy_del (ip6_address_t * bsid, u32 index) * @return 0 if correct, else error */ int -sr_policy_mod (ip6_address_t * bsid, u32 index, u32 fib_table, - u8 operation, ip6_address_t * segments, u32 sl_index, +sr_policy_mod (ip6_address_t *bsid, u32 index, u32 fib_table, u8 operation, + ip6_address_t *segments, ip6_address_t *encap_src, u32 sl_index, u32 weight) { ip6_sr_main_t *sm = &sr_main; @@ -888,8 +897,8 @@ sr_policy_mod (ip6_address_t * bsid, u32 index, u32 fib_table, if (operation == 1) /* Add SR List to an existing SR policy */ { /* Create the new SL */ - segment_list = - create_sl (sr_policy, segments, weight, sr_policy->is_encap); + segment_list = create_sl (sr_policy, segments, encap_src, weight, + sr_policy->is_encap); /* Create a new LB DPO */ if (sr_policy->type == SR_POLICY_TYPE_DEFAULT) @@ -962,7 +971,7 @@ sr_policy_command_fn (vlib_main_t * vm, unformat_input_t * input, int rv = -1; char is_del = 0, is_add = 0, is_mod = 0; char policy_set = 0; - ip6_address_t bsid, next_address; + ip6_address_t bsid, next_address, src_v6addr; u32 sr_policy_index = (u32) ~ 0, sl_index = (u32) ~ 0; u32 weight = (u32) ~ 0, fib_table = (u32) ~ 0; ip6_address_t *segments = 0, *this_seg; @@ -971,6 +980,7 @@ sr_policy_command_fn (vlib_main_t * vm, unformat_input_t * input, u8 type = SR_POLICY_TYPE_DEFAULT; u16 behavior = 0; void *ls_plugin_mem = 0; + ip6_address_t *encap_src = 0; while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { @@ -994,6 +1004,10 @@ sr_policy_command_fn (vlib_main_t * vm, unformat_input_t * input, clib_memcpy_fast (this_seg->as_u8, next_address.as_u8, sizeof (*this_seg)); } + else if (unformat (input, "v6src %U", unformat_ip6_address, &src_v6addr)) + { + encap_src = &src_v6addr; + } else if (unformat (input, "add sl")) operation = 1; else if (unformat (input, "del sl index %d", &sl_index)) @@ -1015,12 +1029,10 @@ sr_policy_command_fn (vlib_main_t * vm, unformat_input_t * input, sr_policy_fn_registration_t *plugin = 0, **vec_plugins = 0; sr_policy_fn_registration_t **plugin_it = 0; - /* *INDENT-OFF* */ pool_foreach (plugin, sm->policy_plugin_functions) { vec_add1 (vec_plugins, plugin); } - /* *INDENT-ON* */ vec_foreach (plugin_it, vec_plugins) { @@ -1058,8 +1070,8 @@ sr_policy_command_fn (vlib_main_t * vm, unformat_input_t * input, if (vec_len (segments) == 0) return clib_error_return (0, "No Segment List specified"); - rv = sr_policy_add (&bsid, segments, weight, type, fib_table, is_encap, - behavior, ls_plugin_mem); + rv = sr_policy_add (&bsid, segments, encap_src, weight, type, fib_table, + is_encap, behavior, ls_plugin_mem); vec_free (segments); } @@ -1077,9 +1089,9 @@ sr_policy_command_fn (vlib_main_t * vm, unformat_input_t * input, if (operation == 3 && weight == (u32) ~ 0) return clib_error_return (0, "No new weight for the SL specified"); - rv = sr_policy_mod ((sr_policy_index != (u32) ~ 0 ? NULL : &bsid), + rv = sr_policy_mod ((sr_policy_index != (u32) ~0 ? NULL : &bsid), sr_policy_index, fib_table, operation, segments, - sl_index, weight); + encap_src, sl_index, weight); if (segments) vec_free (segments); @@ -1115,7 +1127,6 @@ sr_policy_command_fn (vlib_main_t * vm, unformat_input_t * input, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (sr_policy_command, static) = { .path = "sr policy", .short_help = "sr policy [add||del||mod] [bsid 2001::1||index 5] " @@ -1135,7 +1146,6 @@ VLIB_CLI_COMMAND (sr_policy_command, static) = { "SID lists.\n", .function = sr_policy_command_fn, }; -/* *INDENT-ON* */ /** * @brief CLI to display onscreen all the SR policies @@ -1155,10 +1165,8 @@ show_sr_policies_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_output (vm, "SR policies:"); - /* *INDENT-OFF* */ pool_foreach (sr_policy, sm->sr_policies) {vec_add1 (vec_policies, sr_policy); } - /* *INDENT-ON* */ vec_foreach_index (i, vec_policies) { @@ -1169,6 +1177,11 @@ show_sr_policies_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_output (vm, "\tBehavior: %s", (sr_policy->is_encap ? "Encapsulation" : "SRH insertion")); + if (sr_policy->is_encap) + { + vlib_cli_output (vm, "\tEncapSrcIP: %U", format_ip6_address, + &sr_policy->encap_src); + } switch (sr_policy->type) { case SR_POLICY_TYPE_SPRAY: @@ -1205,13 +1218,11 @@ show_sr_policies_command_fn (vlib_main_t * vm, unformat_input_t * input, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_sr_policies_command, static) = { .path = "show sr policies", .short_help = "show sr policies", .function = show_sr_policies_command_fn, }; -/* *INDENT-ON* */ /** * @brief CLI to display onscreen the SR encaps source addr @@ -1226,13 +1237,11 @@ show_sr_encaps_source_command_fn (vlib_main_t * vm, unformat_input_t * input, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_sr_encaps_source_command, static) = { .path = "show sr encaps source addr", .short_help = "show sr encaps source addr", .function = show_sr_encaps_source_command_fn, }; -/* *INDENT-ON* */ /** * @brief CLI to display onscreen the hop-limit value used for SRv6 encapsulation @@ -1247,13 +1256,11 @@ show_sr_encaps_hop_limit_command_fn (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_sr_encaps_hop_limit_command, static) = { .path = "show sr encaps hop-limit", .short_help = "show sr encaps hop-limit", .function = show_sr_encaps_hop_limit_command_fn, }; -/* *INDENT-ON* */ /*************************** SR rewrite graph node ****************************/ /** @@ -1293,14 +1300,14 @@ srv6_tef_behavior (vlib_node_runtime_t *node, vlib_buffer_t *b0, sizeof (ip6_address_t) * (srh->last_entry + 1)); unix_time_now_nsec_fraction (&ts.sec, &ts.nsec); - srh_pt_tlv->t64.sec = htobe32 (ts.sec); - srh_pt_tlv->t64.nsec = htobe32 (ts.nsec); + srh_pt_tlv->t64.sec = clib_host_to_net_u32 (ts.sec); + srh_pt_tlv->t64.nsec = clib_host_to_net_u32 (ts.nsec); ls = sr_pt_find_iface (vnet_buffer (b0)->sw_if_index[VLIB_RX]); if (ls) { id_ld = ls->id << 4; id_ld |= ls->ingress_load; - srh_pt_tlv->id_ld = htobe16 (id_ld); + srh_pt_tlv->id_ld = clib_host_to_net_u16 (id_ld); } } @@ -1571,7 +1578,6 @@ sr_policy_rewrite_encaps (vlib_main_t * vm, vlib_node_runtime_t * node, return from_frame->n_vectors; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (sr_policy_rewrite_encaps_node) = { .function = sr_policy_rewrite_encaps, .name = "sr-pl-rewrite-encaps", @@ -1587,7 +1593,6 @@ VLIB_REGISTER_NODE (sr_policy_rewrite_encaps_node) = { #undef _ }, }; -/* *INDENT-ON* */ /** * @brief IPv4 encapsulation processing as per RFC2473 @@ -1864,7 +1869,6 @@ sr_policy_rewrite_encaps_v4 (vlib_main_t * vm, vlib_node_runtime_t * node, return from_frame->n_vectors; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (sr_policy_rewrite_encaps_v4_node) = { .function = sr_policy_rewrite_encaps_v4, .name = "sr-pl-rewrite-encaps-v4", @@ -1880,7 +1884,6 @@ VLIB_REGISTER_NODE (sr_policy_rewrite_encaps_v4_node) = { #undef _ }, }; -/* *INDENT-ON* */ always_inline u32 ip_flow_hash (void *data) @@ -2306,7 +2309,6 @@ sr_policy_rewrite_encaps_l2 (vlib_main_t * vm, vlib_node_runtime_t * node, return from_frame->n_vectors; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (sr_policy_rewrite_encaps_l2_node) = { .function = sr_policy_rewrite_encaps_l2, .name = "sr-pl-rewrite-encaps-l2", @@ -2322,7 +2324,6 @@ VLIB_REGISTER_NODE (sr_policy_rewrite_encaps_l2_node) = { #undef _ }, }; -/* *INDENT-ON* */ /** * @brief Graph node for applying a SR policy into a packet. SRH insertion. @@ -2728,7 +2729,6 @@ sr_policy_rewrite_insert (vlib_main_t * vm, vlib_node_runtime_t * node, return from_frame->n_vectors; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (sr_policy_rewrite_insert_node) = { .function = sr_policy_rewrite_insert, .name = "sr-pl-rewrite-insert", @@ -2744,7 +2744,6 @@ VLIB_REGISTER_NODE (sr_policy_rewrite_insert_node) = { #undef _ }, }; -/* *INDENT-ON* */ /** * @brief Graph node for applying a SR policy into a packet. BSID - SRH insertion. @@ -3139,7 +3138,6 @@ sr_policy_rewrite_b_insert (vlib_main_t * vm, vlib_node_runtime_t * node, return from_frame->n_vectors; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (sr_policy_rewrite_b_insert_node) = { .function = sr_policy_rewrite_b_insert, .name = "sr-pl-rewrite-b-insert", @@ -3155,7 +3153,6 @@ VLIB_REGISTER_NODE (sr_policy_rewrite_b_insert_node) = { #undef _ }, }; -/* *INDENT-ON* */ /** * @brief Function BSID encapsulation @@ -3450,7 +3447,6 @@ sr_policy_rewrite_b_encaps (vlib_main_t * vm, vlib_node_runtime_t * node, return from_frame->n_vectors; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (sr_policy_rewrite_b_encaps_node) = { .function = sr_policy_rewrite_b_encaps, .name = "sr-pl-rewrite-b-encaps", @@ -3466,7 +3462,6 @@ VLIB_REGISTER_NODE (sr_policy_rewrite_b_encaps_node) = { #undef _ }, }; -/* *INDENT-ON* */ /*************************** SR Policy plugins ******************************/ /** @@ -3534,10 +3529,8 @@ show_sr_policy_behaviors_command_fn (vlib_main_t * vm, vlib_cli_output (vm, "SR Policy behaviors:\n-----------------------\n\n"); - /* *INDENT-OFF* */ pool_foreach (plugin, sm->policy_plugin_functions) { vec_add1 (plugins_vec, plugin); } - /* *INDENT-ON* */ vlib_cli_output (vm, "Plugin behaviors:\n"); for (i = 0; i < vec_len (plugins_vec); i++) @@ -3550,13 +3543,11 @@ show_sr_policy_behaviors_command_fn (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_sr_policy_behaviors_command, static) = { .path = "show sr policy behaviors", .short_help = "show sr policy behaviors", .function = show_sr_policy_behaviors_command_fn, }; -/* *INDENT-ON* */ /*************************** SR Segment Lists DPOs ****************************/ static u8 * diff --git a/src/vnet/srv6/sr_pt.api b/src/vnet/srv6/sr_pt.api new file mode 100644 index 00000000000..e86359b421f --- /dev/null +++ b/src/vnet/srv6/sr_pt.api @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2022 Cisco Systems, Inc. + */ + +option version = "1.0.0"; + +import "vnet/interface_types.api"; + +/** \brief SR PT iface dump request + @param client_index - opaque cookie to identifty the sender + @param context - sender context, to match reply w/ request +*/ +define sr_pt_iface_dump +{ + u32 client_index; + u32 context; +}; + +define sr_pt_iface_details +{ + u32 context; + vl_api_interface_index_t sw_if_index; + u16 id; + u8 ingress_load; + u8 egress_load; + u8 tts_template; +}; + +/** \brief SR PT iface add request + @param client_index - opaque cookie to identifty the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - index of the interface to add to SR PT + @param id - SR PT interface id + @param ingress_load - incoming interface load + @param egress_load - outgoing interface load + @param tts_template - truncated timestamp template to use +*/ +autoreply define sr_pt_iface_add +{ + u32 client_index; + u32 context; + vl_api_interface_index_t sw_if_index; + u16 id; + u8 ingress_load; + u8 egress_load; + u8 tts_template; +}; + +/** \brief SR PT iface del request + @param client_index - opaque cookie to identifty the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - index of the interface to delete from SR PT +*/ +autoreply define sr_pt_iface_del +{ + u32 client_index; + u32 context; + vl_api_interface_index_t sw_if_index; +};
\ No newline at end of file diff --git a/src/vnet/srv6/sr_pt.c b/src/vnet/srv6/sr_pt.c index 5d907eaf639..6299faa84ab 100644 --- a/src/vnet/srv6/sr_pt.c +++ b/src/vnet/srv6/sr_pt.c @@ -69,6 +69,8 @@ sr_pt_add_iface (u32 iface, u16 id, u8 ingress_load, u8 egress_load, if (tts_template > SR_PT_TTS_TEMPLATE_MAX) return SR_PT_ERR_TTS_TEMPLATE_INVALID; + vnet_feature_enable_disable ("ip6-output", "pt", iface, 1, 0, 0); + /* Create a new sr_pt_iface */ pool_get_zero (sr_pt->sr_pt_iface, ls); ls->iface = iface; @@ -101,6 +103,7 @@ sr_pt_del_iface (u32 iface) { /* Retrieve sr_pt_iface */ ls = pool_elt_at_index (sr_pt->sr_pt_iface, p[0]); + vnet_feature_enable_disable ("ip6-output", "pt", iface, 0, 0, 0); /* Delete sr_pt_iface */ pool_put (sr_pt->sr_pt_iface, ls); mhash_unset (&sr_pt->sr_pt_iface_index_hash, &iface, NULL); diff --git a/src/vnet/srv6/sr_pt.h b/src/vnet/srv6/sr_pt.h index 87fdb68a36e..53001e10ac7 100644 --- a/src/vnet/srv6/sr_pt.h +++ b/src/vnet/srv6/sr_pt.h @@ -11,6 +11,8 @@ #ifndef included_vnet_sr_pt_h #define included_vnet_sr_pt_h +#define IP6_HBH_PT_TYPE 50 + /*SR PT error codes*/ #define SR_PT_ERR_NOENT -1 /* No such entry*/ #define SR_PT_ERR_EXIST -2 /* Entry exists */ @@ -37,6 +39,11 @@ #define SR_PT_TTS_SHIFT_TEMPLATE_2 16 #define SR_PT_TTS_SHIFT_TEMPLATE_3 20 +/*PT node behaviors*/ +#define PT_BEHAVIOR_SRC 0 +#define PT_BEHAVIOR_MID 1 +#define PT_BEHAVIOR_SNK 2 + typedef struct { u32 iface; /**< Interface */ @@ -46,6 +53,17 @@ typedef struct u8 tts_template; /**< Interface TTS Template */ } sr_pt_iface_t; +typedef struct +{ + u16 oif_oil; + u8 tts; +} __clib_packed sr_pt_cmd_t; + +typedef struct +{ + sr_pt_cmd_t cmd_stack[12]; +} __clib_packed ip6_hop_by_hop_option_pt_t; + /** * @brief SR Path Tracing main datastructure */ @@ -57,9 +75,12 @@ typedef struct /* Hash table for sr_pt_iface parameters */ mhash_t sr_pt_iface_index_hash; + /* convenience */ + u16 msg_id_base; } sr_pt_main_t; extern sr_pt_main_t sr_pt_main; +extern vlib_node_registration_t sr_pt_node; extern int sr_pt_add_iface (u32 iface, u16 id, u8 ingress_load, u8 egress_load, u8 tts_template); extern int sr_pt_del_iface (u32 iface); diff --git a/src/vnet/srv6/sr_pt_api.c b/src/vnet/srv6/sr_pt_api.c new file mode 100644 index 00000000000..b0b67a210fb --- /dev/null +++ b/src/vnet/srv6/sr_pt_api.c @@ -0,0 +1,97 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2022 Cisco Systems, Inc. + */ + +#include <vnet/vnet.h> +#include <vlibmemory/api.h> +#include <vnet/srv6/sr_pt.h> + +#include <vnet/interface.h> +#include <vnet/api_errno.h> + +#include <vnet/srv6/sr_pt.api_enum.h> +#include <vnet/srv6/sr_pt.api_types.h> + +#define REPLY_MSG_ID_BASE sr_pt_main.msg_id_base +#include <vlibapi/api_helper_macros.h> + +static void +send_sr_pt_iface_details (sr_pt_iface_t *t, vl_api_registration_t *reg, + u32 context) +{ + vl_api_sr_pt_iface_details_t *rmp; + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + clib_memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (REPLY_MSG_ID_BASE + VL_API_SR_PT_IFACE_DETAILS); + + rmp->sw_if_index = ntohl (t->iface); + rmp->id = ntohs (t->id); + rmp->ingress_load = t->ingress_load; + rmp->egress_load = t->egress_load; + rmp->tts_template = t->tts_template; + + rmp->context = context; + + vl_api_send_msg (reg, (u8 *) rmp); +} + +static void +vl_api_sr_pt_iface_dump_t_handler (vl_api_sr_pt_iface_dump_t *mp) +{ + vl_api_registration_t *reg; + sr_pt_main_t *pt = &sr_pt_main; + sr_pt_iface_t *t; + + reg = vl_api_client_index_to_registration (mp->client_index); + if (!reg) + return; + + pool_foreach (t, pt->sr_pt_iface) + { + send_sr_pt_iface_details (t, reg, mp->context); + } +} + +static void +vl_api_sr_pt_iface_add_t_handler (vl_api_sr_pt_iface_add_t *mp) +{ + vl_api_sr_pt_iface_add_reply_t *rmp; + int rv = 0; + + VALIDATE_SW_IF_INDEX (mp); + + rv = sr_pt_add_iface (ntohl (mp->sw_if_index), ntohs (mp->id), + mp->ingress_load, mp->egress_load, mp->tts_template); + + BAD_SW_IF_INDEX_LABEL; + REPLY_MACRO (VL_API_SR_PT_IFACE_ADD_REPLY); +} + +static void +vl_api_sr_pt_iface_del_t_handler (vl_api_sr_pt_iface_del_t *mp) +{ + vl_api_sr_pt_iface_del_reply_t *rmp; + int rv = 0; + + VALIDATE_SW_IF_INDEX (mp); + + rv = sr_pt_del_iface (ntohl (mp->sw_if_index)); + + BAD_SW_IF_INDEX_LABEL; + REPLY_MACRO (VL_API_SR_PT_IFACE_DEL_REPLY); +} + +#include <vnet/srv6/sr_pt.api.c> +static clib_error_t * +sr_pt_api_hookup (vlib_main_t *vm) +{ + /* + * Set up the (msg_name, crc, message-id) table + */ + REPLY_MSG_ID_BASE = setup_message_id_table (); + + return 0; +} + +VLIB_API_INIT_FUNCTION (sr_pt_api_hookup);
\ No newline at end of file diff --git a/src/vnet/srv6/sr_pt_node.c b/src/vnet/srv6/sr_pt_node.c new file mode 100644 index 00000000000..fa8b1f69b57 --- /dev/null +++ b/src/vnet/srv6/sr_pt_node.c @@ -0,0 +1,175 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2022 Cisco Systems, Inc. + */ + +#include <vnet/fib/ip6_fib.h> +#include <vnet/dpo/load_balance.h> +#include <vnet/l2/feat_bitmap.h> +#include <vnet/fib/fib_table.h> +#include <vnet/srv6/sr.h> +#include <vnet/srv6/sr_pt.h> + +/** + * @brief PT node trace + */ +typedef struct +{ + u32 iface; + u16 id; + u8 load; + timestamp_64_t t64; + u8 tts_template; + u8 tts; + u8 behavior; +} pt_trace_t; + +static u8 * +format_pt_trace (u8 *s, va_list *args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + pt_trace_t *t = va_arg (*args, pt_trace_t *); + switch (t->behavior) + { + case PT_BEHAVIOR_MID: + s = format ( + s, + "Behavior Midpoint, outgoing interface %U, outgoing interface id %u, " + "outgoing interface load %u, t64_sec %u, t64_nsec %u, tts_template " + "%u, tts %u", + format_vnet_sw_if_index_name, vnet_get_main (), t->iface, t->id, + t->load, clib_host_to_net_u32 (t->t64.sec), + clib_host_to_net_u32 (t->t64.nsec), t->tts_template, t->tts); + break; + default: + break; + } + return s; +} + +static_always_inline void +pt_midpoint_processing (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_buffer_t *b0, ip6_header_t *ip0, + sr_pt_iface_t *ls, timestamp_64_t t64) +{ + ip6_hop_by_hop_header_t *hbh; + ip6_hop_by_hop_option_t *hbh_opt; + ip6_hop_by_hop_option_pt_t *hbh_opt_pt; + + if (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) + { + hbh = (void *) (ip0 + 1); + hbh_opt = (void *) (hbh + 1); + if (hbh_opt->type == IP6_HBH_PT_TYPE) + { + hbh_opt_pt = (void *) (hbh_opt + 1); + clib_memcpy_fast (&hbh_opt_pt->cmd_stack[1], + &hbh_opt_pt->cmd_stack[0], 33); + hbh_opt_pt->cmd_stack[0].oif_oil = + clib_host_to_net_u16 (ls->id << 4); + hbh_opt_pt->cmd_stack[0].oif_oil |= ls->egress_load; + switch (ls->tts_template) + { + case SR_PT_TTS_TEMPLATE_0: + hbh_opt_pt->cmd_stack[0].tts = + t64.nsec >> SR_PT_TTS_SHIFT_TEMPLATE_0; + break; + case SR_PT_TTS_TEMPLATE_1: + hbh_opt_pt->cmd_stack[0].tts = + t64.nsec >> SR_PT_TTS_SHIFT_TEMPLATE_1; + break; + case SR_PT_TTS_TEMPLATE_2: + hbh_opt_pt->cmd_stack[0].tts = + t64.nsec >> SR_PT_TTS_SHIFT_TEMPLATE_2; + break; + case SR_PT_TTS_TEMPLATE_3: + hbh_opt_pt->cmd_stack[0].tts = + t64.nsec >> SR_PT_TTS_SHIFT_TEMPLATE_0; + break; + default: + break; + } + } + } + return; +} + +VLIB_NODE_FN (sr_pt_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame) +{ + u32 n_left_from, next_index, *from, *to_next; + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + next_index = node->cached_next_index; + u8 pt_behavior = ~(u8) 0; + sr_pt_iface_t *ls = 0; + while (n_left_from > 0) + { + u32 n_left_to_next; + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + // Getting the timestamp (one for each batch of packets) + timestamp_64_t t64 = {}; + unix_time_now_nsec_fraction (&t64.sec, &t64.nsec); + + // Single loop for potentially the last three packets + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + u32 iface; + vlib_buffer_t *b0; + u32 next0 = 0; + ethernet_header_t *en0; + ip6_header_t *ip0 = 0; + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + iface = vnet_buffer (b0)->sw_if_index[VLIB_TX]; + ls = sr_pt_find_iface (iface); + if (ls) + { + en0 = vlib_buffer_get_current (b0); + ip0 = (void *) (en0 + 1); + pt_midpoint_processing (vm, node, b0, ip0, ls, t64); + pt_behavior = PT_BEHAVIOR_MID; + } + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + pt_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof (*tr)); + tr->iface = iface; + tr->id = ls->id; + tr->load = ls->egress_load; + tr->tts_template = ls->tts_template; + tr->t64.sec = t64.sec; + tr->t64.nsec = t64.nsec; + tr->tts = t64.nsec >> 20; + tr->behavior = pt_behavior; + } + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return from_frame->n_vectors; +} + +VLIB_REGISTER_NODE (sr_pt_node) = { + .name = "pt", + .vector_size = sizeof (u32), + .format_trace = format_pt_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = 0, + .n_next_nodes = 1, + .next_nodes = { [0] = "interface-output" }, +}; + +VNET_FEATURE_INIT (sr_pt_node, static) = { + .arc_name = "ip6-output", + .node_name = "pt", +};
\ No newline at end of file diff --git a/src/vnet/srv6/sr_steering.c b/src/vnet/srv6/sr_steering.c index 6e5c5e0e9f0..94c3d67a27a 100644 --- a/src/vnet/srv6/sr_steering.c +++ b/src/vnet/srv6/sr_steering.c @@ -456,7 +456,6 @@ sr_steer_policy_command_fn (vlib_main_t * vm, unformat_input_t * input, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (sr_steer_policy_command, static) = { .path = "sr steer", .short_help = "sr steer (del) [l3 <ip_addr/mask>|l2 <sf_if>] " @@ -471,7 +470,6 @@ VLIB_CLI_COMMAND (sr_steer_policy_command, static) = { "\t\tsr steer del l3 2001::/64 via sr_policy index 5\n", .function = sr_steer_policy_command_fn, }; -/* *INDENT-ON* */ static clib_error_t * show_sr_steering_policies_command_fn (vlib_main_t * vm, @@ -488,9 +486,7 @@ show_sr_steering_policies_command_fn (vlib_main_t * vm, int i; vlib_cli_output (vm, "SR steering policies:"); - /* *INDENT-OFF* */ pool_foreach (steer_pl, sm->steer_policies) {vec_add1(steer_policies, steer_pl);} - /* *INDENT-ON* */ vlib_cli_output (vm, "Traffic\t\tSR policy BSID"); for (i = 0; i < vec_len (steer_policies); i++) { @@ -523,13 +519,11 @@ show_sr_steering_policies_command_fn (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_sr_steering_policies_command, static) = { .path = "show sr steering-policies", .short_help = "show sr steering-policies", .function = show_sr_steering_policies_command_fn, }; -/* *INDENT-ON* */ clib_error_t * sr_steering_init (vlib_main_t * vm) @@ -547,18 +541,14 @@ sr_steering_init (vlib_main_t * vm) return 0; } -/* *INDENT-OFF* */ VLIB_INIT_FUNCTION (sr_steering_init); -/* *INDENT-ON* */ -/* *INDENT-OFF* */ VNET_FEATURE_INIT (sr_pl_rewrite_encaps_l2, static) = { .arc_name = "device-input", .node_name = "sr-pl-rewrite-encaps-l2", .runs_before = VNET_FEATURES ("ethernet-input"), }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/srv6/sr_test.c b/src/vnet/srv6/sr_test.c index 85f64e1e230..be898599e96 100644 --- a/src/vnet/srv6/sr_test.c +++ b/src/vnet/srv6/sr_test.c @@ -80,6 +80,18 @@ api_sr_policy_add (vat_main_t *vam) } static int +api_sr_policy_mod_v2 (vat_main_t *vam) +{ + return -1; +} + +static int +api_sr_policy_add_v2 (vat_main_t *vam) +{ + return -1; +} + +static int api_sr_localsids_dump (vat_main_t *vam) { return -1; @@ -92,6 +104,12 @@ api_sr_policies_dump (vat_main_t *vam) } static int +api_sr_policies_v2_dump (vat_main_t *vam) +{ + return -1; +} + +static int api_sr_policies_with_sl_index_dump (vat_main_t *vam) { return -1; @@ -109,6 +127,11 @@ vl_api_sr_policies_details_t_handler (vl_api_sr_policies_details_t *mp) } static void +vl_api_sr_policies_v2_details_t_handler (vl_api_sr_policies_v2_details_t *mp) +{ +} + +static void vl_api_sr_localsids_details_t_handler (vl_api_sr_localsids_details_t *mp) { } diff --git a/src/vnet/syslog/syslog.c b/src/vnet/syslog/syslog.c index 8f3313950e8..caa55830eb3 100644 --- a/src/vnet/syslog/syslog.c +++ b/src/vnet/syslog/syslog.c @@ -506,7 +506,6 @@ show_syslog_filter_command_fn (vlib_main_t * vm, unformat_input_t * input, return 0; } -/* *INDENT-OFF* */ /*? * Set syslog sender configuration. * @@ -599,7 +598,6 @@ VLIB_CLI_COMMAND (show_syslog_filter_command, static) = { .short_help = "show syslog filter", .function = show_syslog_filter_command_fn, }; -/* *INDENT-ON* */ static clib_error_t * syslog_init (vlib_main_t * vm) diff --git a/src/vnet/syslog/syslog_api.c b/src/vnet/syslog/syslog_api.c index 21e79c6e2bd..195a6e52eef 100644 --- a/src/vnet/syslog/syslog_api.c +++ b/src/vnet/syslog/syslog_api.c @@ -128,7 +128,6 @@ vl_api_syslog_get_sender_t_handler (vl_api_syslog_get_sender_t * mp) syslog_main_t *sm = &syslog_main; u32 vrf_id; - /* *INDENT-OFF* */ REPLY_MACRO2 (VL_API_SYSLOG_GET_SENDER_REPLY, ({ clib_memcpy (&rmp->collector_address, &(sm->collector), @@ -143,7 +142,6 @@ vl_api_syslog_get_sender_t_handler (vl_api_syslog_get_sender_t * mp) rmp->vrf_id = vrf_id; rmp->max_msg_size = htonl (sm->max_msg_size); })) - /* *INDENT-ON* */ } static void @@ -171,12 +169,10 @@ vl_api_syslog_get_filter_t_handler (vl_api_syslog_get_filter_t * mp) vl_api_syslog_get_filter_reply_t *rmp; syslog_main_t *sm = &syslog_main; - /* *INDENT-OFF* */ REPLY_MACRO2 (VL_API_SYSLOG_GET_FILTER_REPLY, ({ rv = syslog_severity_encode (sm->severity_filter, &rmp->severity); })) - /* *INDENT-ON* */ } #include <vnet/syslog/syslog.api.c> diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c index 09913fa1242..efc72a227e8 100644 --- a/src/vnet/tcp/tcp.c +++ b/src/vnet/tcp/tcp.c @@ -25,6 +25,8 @@ #include <vnet/dpo/load_balance.h> #include <math.h> +#include <vlib/stats/stats.h> + tcp_main_t tcp_main; typedef struct @@ -188,8 +190,7 @@ tcp_session_get_listener (u32 listener_index) static tcp_connection_t * tcp_half_open_connection_alloc (void) { - ASSERT (vlib_get_thread_index () == 0); - return tcp_connection_alloc (0); + return tcp_connection_alloc (transport_cl_thread ()); } /** @@ -199,7 +200,8 @@ tcp_half_open_connection_alloc (void) static void tcp_half_open_connection_free (tcp_connection_t * tc) { - ASSERT (vlib_get_thread_index () == 0); + ASSERT (vlib_get_thread_index () == tc->c_thread_index || + vlib_thread_is_main_w_barrier ()); return tcp_connection_free (tc); } @@ -240,8 +242,8 @@ tcp_connection_cleanup (tcp_connection_t * tc) /* Cleanup local endpoint if this was an active connect */ if (!(tc->cfg_flags & TCP_CFG_F_NO_ENDPOINT)) - transport_endpoint_cleanup (TRANSPORT_PROTO_TCP, &tc->c_lcl_ip, - tc->c_lcl_port); + transport_release_local_endpoint (TRANSPORT_PROTO_TCP, &tc->c_lcl_ip, + tc->c_lcl_port); /* Check if connection is not yet fully established */ if (tc->state == TCP_STATE_SYN_SENT) @@ -408,8 +410,8 @@ tcp_connection_close (tcp_connection_t * tc) case TCP_STATE_CLOSE_WAIT: if (!transport_max_tx_dequeue (&tc->connection)) { - tcp_send_fin (tc); tcp_connection_timers_reset (tc); + tcp_send_fin (tc); tcp_connection_set_state (tc, TCP_STATE_LAST_ACK); tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_WAITCLOSE, tcp_cfg.lastack_time); @@ -489,6 +491,14 @@ tcp_session_reset (u32 conn_index, u32 thread_index) { tcp_connection_t *tc; tc = tcp_connection_get (conn_index, thread_index); + + /* For half-opens just cleanup */ + if (tc->state == TCP_STATE_SYN_SENT) + { + tcp_connection_cleanup (tc); + return; + } + tcp_send_reset (tc); tcp_connection_timers_reset (tc); tcp_cong_recovery_off (tc); @@ -764,11 +774,13 @@ tcp_connection_init_vars (tcp_connection_t * tc) } static int -tcp_alloc_custom_local_endpoint (tcp_main_t * tm, ip46_address_t * lcl_addr, - u16 * lcl_port, u8 is_ip4) +tcp_alloc_custom_local_endpoint (ip46_address_t *lcl_addr, u16 *lcl_port, + transport_endpoint_cfg_t *rmt) { + tcp_main_t *tm = vnet_get_tcp_main (); int index, port; - if (is_ip4) + + if (rmt->is_ip4) { index = tm->last_v4_addr_rotor++; if (tm->last_v4_addr_rotor >= vec_len (tcp_cfg.ip4_src_addrs)) @@ -784,7 +796,7 @@ tcp_alloc_custom_local_endpoint (tcp_main_t * tm, ip46_address_t * lcl_addr, clib_memcpy_fast (&lcl_addr->ip6, &tcp_cfg.ip6_src_addrs[index], sizeof (ip6_address_t)); } - port = transport_alloc_local_port (TRANSPORT_PROTO_TCP, lcl_addr); + port = transport_alloc_local_port (TRANSPORT_PROTO_TCP, lcl_addr, rmt); if (port < 1) return SESSION_E_NOPORT; *lcl_port = port; @@ -794,7 +806,6 @@ tcp_alloc_custom_local_endpoint (tcp_main_t * tm, ip46_address_t * lcl_addr, static int tcp_session_open (transport_endpoint_cfg_t * rmt) { - tcp_main_t *tm = vnet_get_tcp_main (); tcp_connection_t *tc; ip46_address_t lcl_addr; u16 lcl_port; @@ -805,27 +816,13 @@ tcp_session_open (transport_endpoint_cfg_t * rmt) */ if ((rmt->is_ip4 && vec_len (tcp_cfg.ip4_src_addrs)) || (!rmt->is_ip4 && vec_len (tcp_cfg.ip6_src_addrs))) - rv = tcp_alloc_custom_local_endpoint (tm, &lcl_addr, &lcl_port, - rmt->is_ip4); + rv = tcp_alloc_custom_local_endpoint (&lcl_addr, &lcl_port, rmt); else - rv = transport_alloc_local_endpoint (TRANSPORT_PROTO_TCP, - rmt, &lcl_addr, &lcl_port); + rv = transport_alloc_local_endpoint (TRANSPORT_PROTO_TCP, rmt, &lcl_addr, + &lcl_port); if (rv) - { - if (rv != SESSION_E_PORTINUSE) - return rv; - - if (session_lookup_connection (rmt->fib_index, &lcl_addr, &rmt->ip, - lcl_port, rmt->port, TRANSPORT_PROTO_TCP, - rmt->is_ip4)) - return SESSION_E_PORTINUSE; - - /* 5-tuple is available so increase lcl endpoint refcount and proceed - * with connection allocation */ - transport_share_local_endpoint (TRANSPORT_PROTO_TCP, &lcl_addr, - lcl_port); - } + return rv; /* * Create connection and send SYN @@ -834,7 +831,7 @@ tcp_session_open (transport_endpoint_cfg_t * rmt) ip_copy (&tc->c_rmt_ip, &rmt->ip, rmt->is_ip4); ip_copy (&tc->c_lcl_ip, &lcl_addr, rmt->is_ip4); tc->c_rmt_port = rmt->port; - tc->c_lcl_port = clib_host_to_net_u16 (lcl_port); + tc->c_lcl_port = lcl_port; tc->c_is_ip4 = rmt->is_ip4; tc->c_proto = TRANSPORT_PROTO_TCP; tc->c_fib_index = rmt->fib_index; @@ -1226,7 +1223,6 @@ tcp_timer_waitclose_handler (tcp_connection_t * tc) } } -/* *INDENT-OFF* */ static timer_expiration_handler *timer_expiration_handlers[TCP_N_TIMERS] = { tcp_timer_retransmit_handler, @@ -1234,7 +1230,6 @@ static timer_expiration_handler *timer_expiration_handlers[TCP_N_TIMERS] = tcp_timer_waitclose_handler, tcp_timer_retransmit_syn_handler, }; -/* *INDENT-ON* */ static void tcp_dispatch_pending_timers (tcp_worker_ctx_t * wrk) @@ -1342,7 +1337,6 @@ tcp_session_app_rx_evt (transport_connection_t *conn) return 0; } -/* *INDENT-OFF* */ const static transport_proto_vft_t tcp_proto = { .enable = vnet_tcp_enable_disable, .start_listen = tcp_session_bind, @@ -1373,7 +1367,6 @@ const static transport_proto_vft_t tcp_proto = { .service_type = TRANSPORT_SERVICE_VC, }, }; -/* *INDENT-ON* */ void tcp_connection_tx_pacer_update (tcp_connection_t * tc) @@ -1463,6 +1456,51 @@ tcp_initialize_iss_seed (tcp_main_t * tm) tm->iss_seed.second = random_u64 (&time_now); } +static void +tcp_stats_collector_fn (vlib_stats_collector_data_t *d) +{ + tcp_main_t *tm = vnet_get_tcp_main (); + counter_t **counters = d->entry->data; + counter_t *cb = counters[0]; + tcp_wrk_stats_t acc = {}; + tcp_worker_ctx_t *wrk; + + vec_foreach (wrk, tm->wrk_ctx) + { +#define _(name, type, str) acc.name += wrk->stats.name; + foreach_tcp_wrk_stat +#undef _ + } + +#define _(name, type, str) cb[TCP_STAT_##name] = acc.name; + foreach_tcp_wrk_stat +#undef _ +} + +static void +tcp_counters_init (tcp_main_t *tm) +{ + vlib_stats_collector_reg_t r = {}; + u32 idx; + + if (tm->counters_init) + return; + + r.entry_index = idx = vlib_stats_add_counter_vector ("/sys/tcp"); + r.collect_fn = tcp_stats_collector_fn; + vlib_stats_validate (idx, 0, TCP_STAT_no_buffer); + +#define _(name, type, str) \ + vlib_stats_add_symlink (idx, TCP_STAT_##name, "/sys/tcp/%s", \ + CLIB_STRING_MACRO (name)); + foreach_tcp_wrk_stat +#undef _ + + vlib_stats_register_collector_fn (&r); + + tm->counters_init = 1; +} + static clib_error_t * tcp_main_enable (vlib_main_t * vm) { @@ -1539,10 +1577,8 @@ tcp_main_enable (vlib_main_t * vm) tm->bytes_per_buffer = vlib_buffer_get_default_data_size (vm); tm->cc_last_type = TCP_CC_LAST; - tm->ipl_next_node[0] = vlib_node_get_next (vm, session_queue_node.index, - ip4_lookup_node.index); - tm->ipl_next_node[1] = vlib_node_get_next (vm, session_queue_node.index, - ip6_lookup_node.index); + tcp_counters_init (tm); + return error; } diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h index c5dd3172302..2362a8bb857 100644 --- a/src/vnet/tcp/tcp.h +++ b/src/vnet/tcp/tcp.h @@ -66,6 +66,13 @@ typedef struct tcp_wrk_stats_ #undef _ } tcp_wrk_stats_t; +typedef enum +{ +#define _(name, type, str) TCP_STAT_##name, + foreach_tcp_wrk_stat +#undef _ +} tcp_wrk_stats_e; + typedef struct tcp_free_req_ { clib_time_type_t free_time; @@ -215,9 +222,6 @@ typedef struct _tcp_main /** vlib buffer size */ u32 bytes_per_buffer; - /** Session layer edge indices to ip lookup (syns, rst) */ - u32 ipl_next_node[2]; - /** Dispatch table by state and flags */ tcp_lookup_dispatch_t dispatch_table[TCP_N_STATES][64]; @@ -236,6 +240,9 @@ typedef struct _tcp_main /** Flag that indicates if stack is on or off */ u8 is_enabled; + /** Set if counters on stats segment initialized */ + u8 counters_init; + /** Flag that indicates if v4 punting is enabled */ u8 punt_unknown4; @@ -268,6 +275,10 @@ extern vlib_node_registration_t tcp4_rcv_process_node; extern vlib_node_registration_t tcp6_rcv_process_node; extern vlib_node_registration_t tcp4_listen_node; extern vlib_node_registration_t tcp6_listen_node; +extern vlib_node_registration_t tcp4_input_nolookup_node; +extern vlib_node_registration_t tcp6_input_nolookup_node; +extern vlib_node_registration_t tcp4_drop_node; +extern vlib_node_registration_t tcp6_drop_node; #define tcp_cfg tcp_main.cfg #define tcp_node_index(node_id, is_ip4) \ diff --git a/src/vnet/tcp/tcp_bt.c b/src/vnet/tcp/tcp_bt.c index 67e9a14ceda..3cb57a550de 100644 --- a/src/vnet/tcp/tcp_bt.c +++ b/src/vnet/tcp/tcp_bt.c @@ -638,11 +638,9 @@ tcp_bt_flush_samples (tcp_connection_t * tc) vec_validate (samples, pool_elts (bt->samples) - 1); vec_reset_length (samples); - /* *INDENT-OFF* */ pool_foreach (bts, bt->samples) { vec_add1 (samples, bts - bt->samples); } - /* *INDENT-ON* */ vec_foreach (si, samples) { diff --git a/src/vnet/tcp/tcp_cli.c b/src/vnet/tcp/tcp_cli.c index f7d26ff79d0..b04c0bdc0cf 100644 --- a/src/vnet/tcp/tcp_cli.c +++ b/src/vnet/tcp/tcp_cli.c @@ -613,14 +613,12 @@ tcp_src_address_fn (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (tcp_src_address_command, static) = { .path = "tcp src-address", .short_help = "tcp src-address <ip-addr> [- <ip-addr>] add src address range", .function = tcp_src_address_fn, }; -/* *INDENT-ON* */ static u8 * tcp_scoreboard_dump_trace (u8 * s, sack_scoreboard_t * sb) @@ -676,14 +674,12 @@ tcp_show_scoreboard_trace_fn (vlib_main_t * vm, unformat_input_t * input, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (tcp_show_scoreboard_trace_command, static) = { .path = "show tcp scoreboard trace", .short_help = "show tcp scoreboard trace <connection>", .function = tcp_show_scoreboard_trace_fn, }; -/* *INDENT-ON* */ u8 * tcp_scoreboard_replay (u8 * s, tcp_connection_t * tc, u8 verbose) @@ -801,14 +797,12 @@ tcp_scoreboard_trace_fn (vlib_main_t * vm, unformat_input_t * input, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (tcp_replay_scoreboard_command, static) = { .path = "tcp replay scoreboard", .short_help = "tcp replay scoreboard <connection>", .function = tcp_scoreboard_trace_fn, }; -/* *INDENT-ON* */ static clib_error_t * show_tcp_punt_fn (vlib_main_t * vm, unformat_input_t * input, @@ -824,14 +818,12 @@ show_tcp_punt_fn (vlib_main_t * vm, unformat_input_t * input, tm->punt_unknown6 ? "enabled" : "disabled"); return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_tcp_punt_command, static) = { .path = "show tcp punt", .short_help = "show tcp punt", .function = show_tcp_punt_fn, }; -/* *INDENT-ON* */ static clib_error_t * show_tcp_stats_fn (vlib_main_t * vm, unformat_input_t * input, @@ -863,14 +855,12 @@ show_tcp_stats_fn (vlib_main_t * vm, unformat_input_t * input, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_tcp_stats_command, static) = { .path = "show tcp stats", .short_help = "show tcp stats", .function = show_tcp_stats_fn, }; -/* *INDENT-ON* */ static clib_error_t * clear_tcp_stats_fn (vlib_main_t * vm, unformat_input_t * input, @@ -893,14 +883,12 @@ clear_tcp_stats_fn (vlib_main_t * vm, unformat_input_t * input, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (clear_tcp_stats_command, static) = { .path = "clear tcp stats", .short_help = "clear tcp stats", .function = clear_tcp_stats_fn, }; -/* *INDENT-ON* */ uword unformat_tcp_cc_algo (unformat_input_t * input, va_list * va) diff --git a/src/vnet/tcp/tcp_debug.c b/src/vnet/tcp/tcp_debug.c index 309b6951559..ab466f30efb 100644 --- a/src/vnet/tcp/tcp_debug.c +++ b/src/vnet/tcp/tcp_debug.c @@ -134,14 +134,12 @@ done: return error; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (tcp_debug_command, static) = { .path = "tcp debug", .short_help = "tcp [show] [debug group <N> level <N>]", .function = tcp_debug_fn, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/tcp/tcp_debug.h b/src/vnet/tcp/tcp_debug.h index 1202f7f44d3..04e921cd601 100644 --- a/src/vnet/tcp/tcp_debug.h +++ b/src/vnet/tcp/tcp_debug.h @@ -17,13 +17,18 @@ #define SRC_VNET_TCP_TCP_DEBUG_H_ #include <vlib/vlib.h> +#include <vpp/vnet/config.h> /** * Build debugging infra unconditionally. Debug components controlled via * debug configuration. Comes with some overhead so it's not recommended for * production/performance scenarios. Takes priority over TCP_DEBUG_ENABLE. */ +#ifdef VPP_TCP_DEBUG_ALWAYS +#define TCP_DEBUG_ALWAYS (1) +#else #define TCP_DEBUG_ALWAYS (0) +#endif /** * Build debugging infra only if enabled. Debug components controlled via * macros that follow. @@ -867,11 +872,12 @@ if (TCP_DEBUG_CC > 1) \ */ #if TCP_DEBUG_CS || TCP_DEBUG_ALWAYS -#define STATS_INTERVAL 1 +#define STATS_INTERVAL 0.001 -#define tcp_cc_time_to_print_stats(_tc) \ - _tc->c_cc_stat_tstamp + STATS_INTERVAL < tcp_time_now() \ - || tcp_in_fastrecovery (_tc) \ +#define tcp_cc_time_to_print_stats(_tc) \ + _tc->c_cc_stat_tstamp + STATS_INTERVAL < \ + tcp_time_now_us (_tc->c_thread_index) || \ + tcp_in_fastrecovery (_tc) #define TCP_EVT_CC_RTO_STAT_PRINT(_tc) \ { \ @@ -887,14 +893,14 @@ if (TCP_DEBUG_CC > 1) \ ed->data[3] = _tc->rttvar; \ } -#define TCP_EVT_CC_RTO_STAT_HANDLER(_tc, ...) \ -{ \ -if (tcp_cc_time_to_print_stats (_tc)) \ -{ \ - TCP_EVT_CC_RTO_STAT_PRINT (_tc); \ - _tc->c_cc_stat_tstamp = tcp_time_now (); \ -} \ -} +#define TCP_EVT_CC_RTO_STAT_HANDLER(_tc, ...) \ + { \ + if (tcp_cc_time_to_print_stats (_tc)) \ + { \ + TCP_EVT_CC_RTO_STAT_PRINT (_tc); \ + _tc->c_cc_stat_tstamp = tcp_time_now_us (_tc->c_thread_index); \ + } \ + } #define TCP_EVT_CC_SND_STAT_PRINT(_tc) \ { \ @@ -911,14 +917,14 @@ if (tcp_cc_time_to_print_stats (_tc)) \ ed->data[3] = _tc->snd_rxt_bytes; \ } -#define TCP_EVT_CC_SND_STAT_HANDLER(_tc, ...) \ -{ \ -if (tcp_cc_time_to_print_stats (_tc)) \ -{ \ - TCP_EVT_CC_SND_STAT_PRINT(_tc); \ - _tc->c_cc_stat_tstamp = tcp_time_now (); \ -} \ -} +#define TCP_EVT_CC_SND_STAT_HANDLER(_tc, ...) \ + { \ + if (tcp_cc_time_to_print_stats (_tc)) \ + { \ + TCP_EVT_CC_SND_STAT_PRINT (_tc); \ + _tc->c_cc_stat_tstamp = tcp_time_now_us (_tc->c_thread_index); \ + } \ + } #define TCP_EVT_CC_STAT_PRINT(_tc) \ { \ @@ -937,14 +943,14 @@ if (tcp_cc_time_to_print_stats (_tc)) \ TCP_EVT_CC_SND_STAT_PRINT (_tc); \ } -#define TCP_EVT_CC_STAT_HANDLER(_tc, ...) \ -{ \ -if (tcp_cc_time_to_print_stats (_tc)) \ -{ \ - TCP_EVT_CC_STAT_PRINT (_tc); \ - _tc->c_cc_stat_tstamp = tcp_time_now(); \ -} \ -} +#define TCP_EVT_CC_STAT_HANDLER(_tc, ...) \ + { \ + if (tcp_cc_time_to_print_stats (_tc)) \ + { \ + TCP_EVT_CC_STAT_PRINT (_tc); \ + _tc->c_cc_stat_tstamp = tcp_time_now_us (_tc->c_thread_index); \ + } \ + } #else #define TCP_EVT_CC_STAT_HANDLER(_tc, ...) #define TCP_EVT_CC_STAT_PRINT(_tc) diff --git a/src/vnet/tcp/tcp_error.def b/src/vnet/tcp/tcp_error.def index a6f0ce4b35f..87fdcc02615 100644 --- a/src/vnet/tcp/tcp_error.def +++ b/src/vnet/tcp/tcp_error.def @@ -49,3 +49,4 @@ tcp_error (RCV_WND, rcv_wnd, WARN, "Segment not in receive window") tcp_error (FIN_RCVD, fin_rcvd, INFO, "FINs received") tcp_error (LINK_LOCAL_RW, link_local_rw, ERROR, "No rewrite for link local connection") tcp_error (ZERO_RWND, zero_rwnd, WARN, "Zero receive window") +tcp_error (CONN_ACCEPTED, conn_accepted, INFO, "Connections accepted")
\ No newline at end of file diff --git a/src/vnet/tcp/tcp_format.c b/src/vnet/tcp/tcp_format.c index a3245f2046a..4674f2cbaed 100644 --- a/src/vnet/tcp/tcp_format.c +++ b/src/vnet/tcp/tcp_format.c @@ -52,12 +52,68 @@ format_tcp_flags (u8 * s, va_list * args) return s; } +u8 * +format_tcp_options (u8 *s, va_list *args) +{ + tcp_options_t *opts = va_arg (*args, tcp_options_t *); + u32 indent, n_opts = 0; + int i; + + if (!opts->flags) + return s; + + indent = format_get_indent (s); + indent += 2; + + s = format (s, "options:\n%U", format_white_space, indent); + + if (tcp_opts_mss (opts)) + { + s = format (s, "mss %d", opts->mss); + n_opts++; + } + if (tcp_opts_wscale (opts)) + { + s = format (s, "%swindow scale %d", n_opts > 0 ? ", " : "", + format_white_space, indent, opts->wscale); + n_opts++; + } + if (tcp_opts_tstamp (opts)) + { + s = format (s, "%stimestamp %d, echo/reflected timestamp", + n_opts > 0 ? ", " : "", format_white_space, indent, + opts->tsval, opts->tsecr); + n_opts++; + } + if (tcp_opts_sack_permitted (opts)) + { + s = format (s, "%ssack permitted", n_opts > 0 ? ", " : "", + format_white_space, indent); + n_opts++; + } + if (tcp_opts_sack (opts)) + { + s = format (s, "%ssacks:", n_opts > 0 ? ", " : "", format_white_space, + indent); + for (i = 0; i < opts->n_sack_blocks; ++i) + { + s = format (s, "\n%Ublock %d: start %d, end %d", format_white_space, + indent + 2, i + 1, opts->sacks[i].start, + opts->sacks[i].end); + } + n_opts++; + } + + return s; +} + /* Format TCP header. */ u8 * format_tcp_header (u8 * s, va_list * args) { tcp_header_t *tcp = va_arg (*args, tcp_header_t *); u32 max_header_bytes = va_arg (*args, u32); + tcp_options_t opts = { .flags = 0 }; u32 header_bytes; u32 indent; @@ -83,32 +139,13 @@ format_tcp_header (u8 * s, va_list * args) clib_net_to_host_u16 (tcp->window), clib_net_to_host_u16 (tcp->checksum)); - -#if 0 - /* Format TCP options. */ - { - u8 *o; - u8 *option_start = (void *) (tcp + 1); - u8 *option_end = (void *) tcp + header_bytes; - - for (o = option_start; o < option_end;) - { - u32 length = o[1]; - switch (o[0]) - { - case TCP_OPTION_END: - length = 1; - o = option_end; - break; - - case TCP_OPTION_NOOP: - length = 1; - break; - - } - } - } -#endif + if (header_bytes > max_header_bytes) + s = format (s, "\n%Uoptions: truncated", format_white_space, indent); + else if (tcp_options_parse (tcp, &opts, tcp_is_syn (tcp)) < 0) + s = format (s, "\n%Uoptions: parsing failed", format_white_space, indent); + else + s = format (s, "\n%U%U", format_white_space, indent, format_tcp_options, + &opts); /* Recurse into next protocol layer. */ if (max_header_bytes != 0 && header_bytes < max_header_bytes) diff --git a/src/vnet/tcp/tcp_inlines.h b/src/vnet/tcp/tcp_inlines.h index 69f8ce7ff27..ccd0e3fe3ee 100644 --- a/src/vnet/tcp/tcp_inlines.h +++ b/src/vnet/tcp/tcp_inlines.h @@ -18,6 +18,35 @@ #include <vnet/tcp/tcp.h> +always_inline void +tcp_node_inc_counter_i (vlib_main_t *vm, u32 tcp4_node, u32 tcp6_node, + u8 is_ip4, u32 evt, u32 val) +{ + if (is_ip4) + vlib_node_increment_counter (vm, tcp4_node, evt, val); + else + vlib_node_increment_counter (vm, tcp6_node, evt, val); +} + +#define tcp_inc_counter(node_id, err, count) \ + tcp_node_inc_counter_i (vm, tcp4_##node_id##_node.index, \ + tcp6_##node_id##_node.index, is_ip4, err, count) +#define tcp_maybe_inc_err_counter(cnts, err) \ + { \ + cnts[err] += (next0 != tcp_next_drop (is_ip4)); \ + } +#define tcp_inc_err_counter(cnts, err, val) \ + { \ + cnts[err] += val; \ + } +#define tcp_store_err_counters(node_id, cnts) \ + { \ + int i; \ + for (i = 0; i < TCP_N_ERROR; i++) \ + if (cnts[i]) \ + tcp_inc_counter (node_id, i, cnts[i]); \ + } + always_inline tcp_header_t * tcp_buffer_hdr (vlib_buffer_t * b) { @@ -66,7 +95,7 @@ tcp_listener_get (u32 tli) always_inline tcp_connection_t * tcp_half_open_connection_get (u32 conn_index) { - return tcp_connection_get (conn_index, 0); + return tcp_connection_get (conn_index, transport_cl_thread ()); } /** diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c index a6d135812e1..70b5d28e0cc 100644 --- a/src/vnet/tcp/tcp_input.c +++ b/src/vnet/tcp/tcp_input.c @@ -27,59 +27,17 @@ static vlib_error_desc_t tcp_input_error_counters[] = { #undef tcp_error }; -/* All TCP nodes have the same outgoing arcs */ -#define foreach_tcp_state_next \ - _ (DROP4, "ip4-drop") \ - _ (DROP6, "ip6-drop") \ - _ (TCP4_OUTPUT, "tcp4-output") \ - _ (TCP6_OUTPUT, "tcp6-output") - -typedef enum _tcp_established_next -{ -#define _(s,n) TCP_ESTABLISHED_NEXT_##s, - foreach_tcp_state_next -#undef _ - TCP_ESTABLISHED_N_NEXT, -} tcp_established_next_t; - -typedef enum _tcp_rcv_process_next -{ -#define _(s,n) TCP_RCV_PROCESS_NEXT_##s, - foreach_tcp_state_next -#undef _ - TCP_RCV_PROCESS_N_NEXT, -} tcp_rcv_process_next_t; - -typedef enum _tcp_syn_sent_next -{ -#define _(s,n) TCP_SYN_SENT_NEXT_##s, - foreach_tcp_state_next -#undef _ - TCP_SYN_SENT_N_NEXT, -} tcp_syn_sent_next_t; - -typedef enum _tcp_listen_next -{ -#define _(s,n) TCP_LISTEN_NEXT_##s, - foreach_tcp_state_next -#undef _ - TCP_LISTEN_N_NEXT, -} tcp_listen_next_t; - -/* Generic, state independent indices */ -typedef enum _tcp_state_next +typedef enum _tcp_input_next { -#define _(s,n) TCP_NEXT_##s, - foreach_tcp_state_next -#undef _ - TCP_STATE_N_NEXT, -} tcp_state_next_t; - -#define tcp_next_output(is_ip4) (is_ip4 ? TCP_NEXT_TCP4_OUTPUT \ - : TCP_NEXT_TCP6_OUTPUT) - -#define tcp_next_drop(is_ip4) (is_ip4 ? TCP_NEXT_DROP4 \ - : TCP_NEXT_DROP6) + TCP_INPUT_NEXT_DROP, + TCP_INPUT_NEXT_LISTEN, + TCP_INPUT_NEXT_RCV_PROCESS, + TCP_INPUT_NEXT_SYN_SENT, + TCP_INPUT_NEXT_ESTABLISHED, + TCP_INPUT_NEXT_RESET, + TCP_INPUT_NEXT_PUNT, + TCP_INPUT_N_NEXT +} tcp_input_next_t; /** * Validate segment sequence number. As per RFC793: @@ -404,17 +362,10 @@ tcp_rcv_ack_no_cc (tcp_connection_t * tc, vlib_buffer_t * b, u32 * error) if (!(seq_leq (tc->snd_una, vnet_buffer (b)->tcp.ack_number) && seq_leq (vnet_buffer (b)->tcp.ack_number, tc->snd_nxt))) { - if (seq_leq (vnet_buffer (b)->tcp.ack_number, tc->snd_nxt) - && seq_gt (vnet_buffer (b)->tcp.ack_number, tc->snd_una)) - { - tc->snd_nxt = vnet_buffer (b)->tcp.ack_number; - goto acceptable; - } *error = TCP_ERROR_ACK_INVALID; return -1; } -acceptable: tc->bytes_acked = vnet_buffer (b)->tcp.ack_number - tc->snd_una; tc->snd_una = vnet_buffer (b)->tcp.ack_number; *error = TCP_ERROR_ACK_OK; @@ -981,15 +932,6 @@ tcp_rcv_ack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, vlib_buffer_t * b, /* If the ACK acks something not yet sent (SEG.ACK > SND.NXT) */ if (PREDICT_FALSE (seq_gt (vnet_buffer (b)->tcp.ack_number, tc->snd_nxt))) { - /* We've probably entered recovery and the peer still has some - * of the data we've sent. Update snd_nxt and accept the ack */ - if (seq_leq (vnet_buffer (b)->tcp.ack_number, tc->snd_nxt) - && seq_gt (vnet_buffer (b)->tcp.ack_number, tc->snd_una)) - { - tc->snd_nxt = vnet_buffer (b)->tcp.ack_number; - goto process_ack; - } - tc->errors.above_ack_wnd += 1; *error = TCP_ERROR_ACK_FUTURE; TCP_EVT (TCP_EVT_ACK_RCV_ERR, tc, 0, vnet_buffer (b)->tcp.ack_number); @@ -1012,8 +954,6 @@ tcp_rcv_ack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, vlib_buffer_t * b, return 0; } -process_ack: - /* * Looks okay, process feedback */ @@ -1356,9 +1296,13 @@ format_tcp_rx_trace (u8 * s, va_list * args) tcp_connection_t *tc = &t->tcp_connection; u32 indent = format_get_indent (s); - s = format (s, "%U state %U\n%U%U", format_tcp_connection_id, tc, - format_tcp_state, tc->state, format_white_space, indent, - format_tcp_header, &t->tcp_header, 128); + if (!tc->c_lcl_port) + s = format (s, "no tcp connection\n%U%U", format_white_space, indent, + format_tcp_header, &t->tcp_header, 128); + else + s = format (s, "%U state %U\n%U%U", format_tcp_connection_id, tc, + format_tcp_state, tc->state, format_white_space, indent, + format_tcp_header, &t->tcp_header, 128); return s; } @@ -1428,53 +1372,14 @@ tcp_established_trace_frame (vlib_main_t * vm, vlib_node_runtime_t * node, } } -always_inline void -tcp_node_inc_counter_i (vlib_main_t * vm, u32 tcp4_node, u32 tcp6_node, - u8 is_ip4, u32 evt, u32 val) -{ - if (is_ip4) - vlib_node_increment_counter (vm, tcp4_node, evt, val); - else - vlib_node_increment_counter (vm, tcp6_node, evt, val); -} - -#define tcp_maybe_inc_counter(node_id, err, count) \ -{ \ - if (next0 != tcp_next_drop (is_ip4)) \ - tcp_node_inc_counter_i (vm, tcp4_##node_id##_node.index, \ - tcp6_##node_id##_node.index, is_ip4, err, \ - 1); \ -} -#define tcp_inc_counter(node_id, err, count) \ - tcp_node_inc_counter_i (vm, tcp4_##node_id##_node.index, \ - tcp6_##node_id##_node.index, is_ip4, \ - err, count) -#define tcp_maybe_inc_err_counter(cnts, err) \ -{ \ - cnts[err] += (next0 != tcp_next_drop (is_ip4)); \ -} -#define tcp_inc_err_counter(cnts, err, val) \ -{ \ - cnts[err] += val; \ -} -#define tcp_store_err_counters(node_id, cnts) \ -{ \ - int i; \ - for (i = 0; i < TCP_N_ERROR; i++) \ - if (cnts[i]) \ - tcp_inc_counter(node_id, i, cnts[i]); \ -} - - always_inline uword tcp46_established_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame, int is_ip4) { - u32 thread_index = vm->thread_index, errors = 0; + u32 thread_index = vm->thread_index, n_left_from, *from; tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index); vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b; u16 err_counters[TCP_N_ERROR] = { 0 }; - u32 n_left_from, *from; if (node->flags & VLIB_NODE_FLAG_TRACE) tcp_established_trace_frame (vm, node, frame, is_ip4); @@ -1538,9 +1443,7 @@ tcp46_established_inline (vlib_main_t * vm, vlib_node_runtime_t * node, b += 1; } - errors = session_main_flush_enqueue_events (TRANSPORT_PROTO_TCP, - thread_index); - err_counters[TCP_ERROR_MSG_QUEUE_FULL] = errors; + session_main_flush_enqueue_events (TRANSPORT_PROTO_TCP, thread_index); tcp_store_err_counters (established, err_counters); tcp_handle_postponed_dequeues (wrk); tcp_handle_disconnects (wrk); @@ -1563,43 +1466,23 @@ VLIB_NODE_FN (tcp6_established_node) (vlib_main_t * vm, return tcp46_established_inline (vm, node, from_frame, 0 /* is_ip4 */ ); } -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (tcp4_established_node) = -{ +VLIB_REGISTER_NODE (tcp4_established_node) = { .name = "tcp4-established", /* Takes a vector of packets. */ .vector_size = sizeof (u32), .n_errors = TCP_N_ERROR, .error_counters = tcp_input_error_counters, - .n_next_nodes = TCP_ESTABLISHED_N_NEXT, - .next_nodes = - { -#define _(s,n) [TCP_ESTABLISHED_NEXT_##s] = n, - foreach_tcp_state_next -#undef _ - }, .format_trace = format_tcp_rx_trace_short, }; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (tcp6_established_node) = -{ +VLIB_REGISTER_NODE (tcp6_established_node) = { .name = "tcp6-established", /* Takes a vector of packets. */ .vector_size = sizeof (u32), .n_errors = TCP_N_ERROR, .error_counters = tcp_input_error_counters, - .n_next_nodes = TCP_ESTABLISHED_N_NEXT, - .next_nodes = - { -#define _(s,n) [TCP_ESTABLISHED_NEXT_##s] = n, - foreach_tcp_state_next -#undef _ - }, .format_trace = format_tcp_rx_trace_short, }; -/* *INDENT-ON* */ static u8 @@ -1795,11 +1678,50 @@ tcp_check_tx_offload (tcp_connection_t * tc, int is_ipv4) tc->cfg_flags |= TCP_CFG_F_TSO; } +static void +tcp_input_trace_frame (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_buffer_t **bs, u16 *nexts, u32 n_bufs, u8 is_ip4) +{ + tcp_connection_t *tc; + tcp_header_t *tcp; + tcp_rx_trace_t *t; + u8 flags; + int i; + + for (i = 0; i < n_bufs; i++) + { + if (!(bs[i]->flags & VLIB_BUFFER_IS_TRACED)) + continue; + + t = vlib_add_trace (vm, node, bs[i], sizeof (*t)); + if (nexts[i] == TCP_INPUT_NEXT_DROP || nexts[i] == TCP_INPUT_NEXT_PUNT || + nexts[i] == TCP_INPUT_NEXT_RESET) + { + tc = 0; + } + else + { + flags = vnet_buffer (bs[i])->tcp.flags; + + if (flags == TCP_STATE_LISTEN) + tc = tcp_listener_get (vnet_buffer (bs[i])->tcp.connection_index); + else if (flags == TCP_STATE_SYN_SENT) + tc = tcp_half_open_connection_get ( + vnet_buffer (bs[i])->tcp.connection_index); + else + tc = tcp_connection_get (vnet_buffer (bs[i])->tcp.connection_index, + vm->thread_index); + } + tcp = tcp_buffer_hdr (bs[i]); + tcp_set_rx_trace_data (t, tc, tcp, bs[i], is_ip4); + } +} + always_inline uword tcp46_syn_sent_inline (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame, int is_ip4) { - u32 n_left_from, *from, thread_index = vm->thread_index, errors = 0; + u32 n_left_from, *from, thread_index = vm->thread_index; tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index); vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b; @@ -1965,7 +1887,9 @@ tcp46_syn_sent_inline (vlib_main_t *vm, vlib_node_runtime_t *node, SESSION_E_NONE)) { tcp_send_reset_w_pkt (new_tc, b[0], thread_index, is_ip4); - tcp_connection_cleanup (new_tc); + tcp_program_cleanup (wrk, new_tc); + new_tc->state = TCP_STATE_CLOSED; + new_tc->c_s_index = ~0; error = TCP_ERROR_CREATE_SESSION_FAIL; goto cleanup_ho; } @@ -1986,8 +1910,10 @@ tcp46_syn_sent_inline (vlib_main_t *vm, vlib_node_runtime_t *node, if (session_stream_connect_notify (&new_tc->connection, SESSION_E_NONE)) { - tcp_connection_cleanup (new_tc); tcp_send_reset_w_pkt (tc, b[0], thread_index, is_ip4); + tcp_program_cleanup (wrk, new_tc); + new_tc->state = TCP_STATE_CLOSED; + new_tc->c_s_index = ~0; TCP_EVT (TCP_EVT_RST_SENT, tc); error = TCP_ERROR_CREATE_SESSION_FAIL; goto cleanup_ho; @@ -2034,9 +1960,7 @@ tcp46_syn_sent_inline (vlib_main_t *vm, vlib_node_runtime_t *node, tcp_inc_counter (syn_sent, error, 1); } - errors = - session_main_flush_enqueue_events (TRANSPORT_PROTO_TCP, thread_index); - tcp_inc_counter (syn_sent, TCP_ERROR_MSG_QUEUE_FULL, errors); + session_main_flush_enqueue_events (TRANSPORT_PROTO_TCP, thread_index); vlib_buffer_free (vm, from, frame->n_vectors); tcp_handle_disconnects (wrk); @@ -2057,7 +1981,6 @@ VLIB_NODE_FN (tcp6_syn_sent_node) (vlib_main_t * vm, return tcp46_syn_sent_inline (vm, node, from_frame, 0 /* is_ip4 */ ); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (tcp4_syn_sent_node) = { .name = "tcp4-syn-sent", @@ -2065,18 +1988,9 @@ VLIB_REGISTER_NODE (tcp4_syn_sent_node) = .vector_size = sizeof (u32), .n_errors = TCP_N_ERROR, .error_counters = tcp_input_error_counters, - .n_next_nodes = TCP_SYN_SENT_N_NEXT, - .next_nodes = - { -#define _(s,n) [TCP_SYN_SENT_NEXT_##s] = n, - foreach_tcp_state_next -#undef _ - }, .format_trace = format_tcp_rx_trace_short, }; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (tcp6_syn_sent_node) = { .name = "tcp6-syn-sent", @@ -2084,16 +1998,8 @@ VLIB_REGISTER_NODE (tcp6_syn_sent_node) = .vector_size = sizeof (u32), .n_errors = TCP_N_ERROR, .error_counters = tcp_input_error_counters, - .n_next_nodes = TCP_SYN_SENT_N_NEXT, - .next_nodes = - { -#define _(s,n) [TCP_SYN_SENT_NEXT_##s] = n, - foreach_tcp_state_next -#undef _ - }, .format_trace = format_tcp_rx_trace_short, }; -/* *INDENT-ON* */ static void tcp46_rcv_process_trace_frame (vlib_main_t *vm, vlib_node_runtime_t *node, @@ -2125,7 +2031,7 @@ always_inline uword tcp46_rcv_process_inline (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame, int is_ip4) { - u32 thread_index = vm->thread_index, errors, n_left_from, *from, max_deq; + u32 thread_index = vm->thread_index, n_left_from, *from, max_deq; tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index); vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b; @@ -2193,15 +2099,6 @@ tcp46_rcv_process_inline (vlib_main_t *vm, vlib_node_runtime_t *node, switch (tc->state) { case TCP_STATE_SYN_RCVD: - - /* Make sure the segment is exactly right */ - if (tc->rcv_nxt != vnet_buffer (b[0])->tcp.seq_number || is_fin) - { - tcp_send_reset_w_pkt (tc, b[0], thread_index, is_ip4); - error = TCP_ERROR_SEGMENT_INVALID; - goto drop; - } - /* * If the segment acknowledgment is not acceptable, form a * reset segment, @@ -2215,6 +2112,10 @@ tcp46_rcv_process_inline (vlib_main_t *vm, vlib_node_runtime_t *node, goto drop; } + /* Avoid notifying app if connection is about to be closed */ + if (PREDICT_FALSE (is_fin)) + break; + /* Update rtt and rto */ tcp_estimate_initial_rtt (tc); tcp_connection_tx_pacer_update (tc); @@ -2243,7 +2144,7 @@ tcp46_rcv_process_inline (vlib_main_t *vm, vlib_node_runtime_t *node, tcp_connection_cleanup (tc); goto drop; } - error = TCP_ERROR_ACK_OK; + error = TCP_ERROR_CONN_ACCEPTED; break; case TCP_STATE_ESTABLISHED: /* We can get packets in established state here because they @@ -2322,8 +2223,8 @@ tcp46_rcv_process_inline (vlib_main_t *vm, vlib_node_runtime_t *node, if (max_deq > tc->burst_acked) break; - tcp_send_fin (tc); tcp_connection_timers_reset (tc); + tcp_send_fin (tc); tcp_connection_set_state (tc, TCP_STATE_LAST_ACK); tcp_timer_set (&wrk->timer_wheel, tc, TCP_TIMER_WAITCLOSE, tcp_cfg.lastack_time); @@ -2435,15 +2336,15 @@ tcp46_rcv_process_inline (vlib_main_t *vm, vlib_node_runtime_t *node, tcp_cfg.closewait_time); break; case TCP_STATE_SYN_RCVD: - /* Send FIN-ACK, enter LAST-ACK and because the app was not - * notified yet, set a cleanup timer instead of relying on - * disconnect notify and the implicit close call. */ + /* Send FIN-ACK and enter TIME-WAIT, as opposed to LAST-ACK, + * because the app was not notified yet and we want to avoid + * session state transitions to ensure cleanup does not + * propagate to app. */ tcp_connection_timers_reset (tc); tc->rcv_nxt += 1; tcp_send_fin (tc); - tcp_connection_set_state (tc, TCP_STATE_LAST_ACK); - tcp_timer_set (&wrk->timer_wheel, tc, TCP_TIMER_WAITCLOSE, - tcp_cfg.lastack_time); + tcp_connection_set_state (tc, TCP_STATE_TIME_WAIT); + tcp_program_cleanup (wrk, tc); break; case TCP_STATE_CLOSE_WAIT: case TCP_STATE_CLOSING: @@ -2498,9 +2399,7 @@ tcp46_rcv_process_inline (vlib_main_t *vm, vlib_node_runtime_t *node, tcp_inc_counter (rcv_process, error, 1); } - errors = session_main_flush_enqueue_events (TRANSPORT_PROTO_TCP, - thread_index); - tcp_inc_counter (rcv_process, TCP_ERROR_MSG_QUEUE_FULL, errors); + session_main_flush_enqueue_events (TRANSPORT_PROTO_TCP, thread_index); tcp_handle_postponed_dequeues (wrk); tcp_handle_disconnects (wrk); vlib_buffer_free (vm, from, frame->n_vectors); @@ -2522,43 +2421,23 @@ VLIB_NODE_FN (tcp6_rcv_process_node) (vlib_main_t * vm, return tcp46_rcv_process_inline (vm, node, from_frame, 0 /* is_ip4 */ ); } -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (tcp4_rcv_process_node) = -{ +VLIB_REGISTER_NODE (tcp4_rcv_process_node) = { .name = "tcp4-rcv-process", /* Takes a vector of packets. */ .vector_size = sizeof (u32), .n_errors = TCP_N_ERROR, .error_counters = tcp_input_error_counters, - .n_next_nodes = TCP_RCV_PROCESS_N_NEXT, - .next_nodes = - { -#define _(s,n) [TCP_RCV_PROCESS_NEXT_##s] = n, - foreach_tcp_state_next -#undef _ - }, .format_trace = format_tcp_rx_trace_short, }; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (tcp6_rcv_process_node) = -{ +VLIB_REGISTER_NODE (tcp6_rcv_process_node) = { .name = "tcp6-rcv-process", /* Takes a vector of packets. */ .vector_size = sizeof (u32), .n_errors = TCP_N_ERROR, .error_counters = tcp_input_error_counters, - .n_next_nodes = TCP_RCV_PROCESS_N_NEXT, - .next_nodes = - { -#define _(s,n) [TCP_RCV_PROCESS_NEXT_##s] = n, - foreach_tcp_state_next -#undef _ - }, .format_trace = format_tcp_rx_trace_short, }; -/* *INDENT-ON* */ static void tcp46_listen_trace_frame (vlib_main_t *vm, vlib_node_runtime_t *node, @@ -2666,7 +2545,8 @@ tcp46_listen_inline (vlib_main_t *vm, vlib_node_runtime_t *node, { lc = tcp_listener_get (vnet_buffer (b[0])->tcp.connection_index); } - else /* We are in TimeWait state*/ + /* Probably we are in time-wait or closed state */ + else { tcp_connection_t *tc; tc = tcp_connection_get (vnet_buffer (b[0])->tcp.connection_index, @@ -2780,98 +2660,82 @@ VLIB_NODE_FN (tcp6_listen_node) (vlib_main_t * vm, vlib_node_runtime_t * node, return tcp46_listen_inline (vm, node, from_frame, 0 /* is_ip4 */ ); } -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (tcp4_listen_node) = -{ +VLIB_REGISTER_NODE (tcp4_listen_node) = { .name = "tcp4-listen", /* Takes a vector of packets. */ .vector_size = sizeof (u32), .n_errors = TCP_N_ERROR, .error_counters = tcp_input_error_counters, - .n_next_nodes = TCP_LISTEN_N_NEXT, - .next_nodes = - { -#define _(s,n) [TCP_LISTEN_NEXT_##s] = n, - foreach_tcp_state_next -#undef _ - }, .format_trace = format_tcp_rx_trace_short, }; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (tcp6_listen_node) = -{ +VLIB_REGISTER_NODE (tcp6_listen_node) = { .name = "tcp6-listen", /* Takes a vector of packets. */ .vector_size = sizeof (u32), .n_errors = TCP_N_ERROR, .error_counters = tcp_input_error_counters, - .n_next_nodes = TCP_LISTEN_N_NEXT, - .next_nodes = - { -#define _(s,n) [TCP_LISTEN_NEXT_##s] = n, - foreach_tcp_state_next -#undef _ - }, .format_trace = format_tcp_rx_trace_short, }; -/* *INDENT-ON* */ -typedef enum _tcp_input_next +always_inline uword +tcp46_drop_inline (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame, int is_ip4) { - TCP_INPUT_NEXT_DROP, - TCP_INPUT_NEXT_LISTEN, - TCP_INPUT_NEXT_RCV_PROCESS, - TCP_INPUT_NEXT_SYN_SENT, - TCP_INPUT_NEXT_ESTABLISHED, - TCP_INPUT_NEXT_RESET, - TCP_INPUT_NEXT_PUNT, - TCP_INPUT_N_NEXT -} tcp_input_next_t; + u32 *from = vlib_frame_vector_args (frame); -#define foreach_tcp4_input_next \ - _ (DROP, "ip4-drop") \ - _ (LISTEN, "tcp4-listen") \ - _ (RCV_PROCESS, "tcp4-rcv-process") \ - _ (SYN_SENT, "tcp4-syn-sent") \ - _ (ESTABLISHED, "tcp4-established") \ - _ (RESET, "tcp4-reset") \ - _ (PUNT, "ip4-punt") - -#define foreach_tcp6_input_next \ - _ (DROP, "ip6-drop") \ - _ (LISTEN, "tcp6-listen") \ - _ (RCV_PROCESS, "tcp6-rcv-process") \ - _ (SYN_SENT, "tcp6-syn-sent") \ - _ (ESTABLISHED, "tcp6-established") \ - _ (RESET, "tcp6-reset") \ - _ (PUNT, "ip6-punt") + /* Error counters must be incremented by previous nodes */ + vlib_buffer_free (vm, from, frame->n_vectors); -#define filter_flags (TCP_FLAG_SYN|TCP_FLAG_ACK|TCP_FLAG_RST|TCP_FLAG_FIN) + return frame->n_vectors; +} -static void -tcp_input_trace_frame (vlib_main_t * vm, vlib_node_runtime_t * node, - vlib_buffer_t ** bs, u32 n_bufs, u8 is_ip4) +VLIB_NODE_FN (tcp4_drop_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame) { - tcp_connection_t *tc; - tcp_header_t *tcp; - tcp_rx_trace_t *t; - int i; + return tcp46_drop_inline (vm, node, from_frame, 1 /* is_ip4 */); +} - for (i = 0; i < n_bufs; i++) - { - if (bs[i]->flags & VLIB_BUFFER_IS_TRACED) - { - t = vlib_add_trace (vm, node, bs[i], sizeof (*t)); - tc = tcp_connection_get (vnet_buffer (bs[i])->tcp.connection_index, - vm->thread_index); - tcp = vlib_buffer_get_current (bs[i]); - tcp_set_rx_trace_data (t, tc, tcp, bs[i], is_ip4); - } - } +VLIB_NODE_FN (tcp6_drop_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame) +{ + return tcp46_drop_inline (vm, node, from_frame, 0 /* is_ip4 */); } +VLIB_REGISTER_NODE (tcp4_drop_node) = { + .name = "tcp4-drop", + .vector_size = sizeof (u32), + .n_errors = TCP_N_ERROR, + .error_counters = tcp_input_error_counters, +}; + +VLIB_REGISTER_NODE (tcp6_drop_node) = { + .name = "tcp6-drop", + .vector_size = sizeof (u32), + .n_errors = TCP_N_ERROR, + .error_counters = tcp_input_error_counters, +}; + +#define foreach_tcp4_input_next \ + _ (DROP, "tcp4-drop") \ + _ (LISTEN, "tcp4-listen") \ + _ (RCV_PROCESS, "tcp4-rcv-process") \ + _ (SYN_SENT, "tcp4-syn-sent") \ + _ (ESTABLISHED, "tcp4-established") \ + _ (RESET, "tcp4-reset") \ + _ (PUNT, "ip4-punt") + +#define foreach_tcp6_input_next \ + _ (DROP, "tcp6-drop") \ + _ (LISTEN, "tcp6-listen") \ + _ (RCV_PROCESS, "tcp6-rcv-process") \ + _ (SYN_SENT, "tcp6-syn-sent") \ + _ (ESTABLISHED, "tcp6-established") \ + _ (RESET, "tcp6-reset") \ + _ (PUNT, "ip6-punt") + +#define filter_flags (TCP_FLAG_SYN|TCP_FLAG_ACK|TCP_FLAG_RST|TCP_FLAG_FIN) + static void tcp_input_set_error_next (tcp_main_t * tm, u16 * next, u32 * error, u8 is_ip4) { @@ -2892,9 +2756,8 @@ tcp_input_set_error_next (tcp_main_t * tm, u16 * next, u32 * error, u8 is_ip4) } static inline void -tcp_input_dispatch_buffer (tcp_main_t * tm, tcp_connection_t * tc, - vlib_buffer_t * b, u16 * next, - vlib_node_runtime_t * error_node) +tcp_input_dispatch_buffer (tcp_main_t *tm, tcp_connection_t *tc, + vlib_buffer_t *b, u16 *next, u16 *err_counters) { tcp_header_t *tcp; u32 error; @@ -2916,7 +2779,7 @@ tcp_input_dispatch_buffer (tcp_main_t * tm, tcp_connection_t * tc, if (PREDICT_FALSE (error != TCP_ERROR_NONE)) { - b->error = error_node->errors[error]; + tcp_inc_err_counter (err_counters, error, 1); if (error == TCP_ERROR_DISPATCH) clib_warning ("tcp conn %u disp error state %U flags %U", tc->c_c_index, format_tcp_state, tc->state, @@ -2932,6 +2795,7 @@ tcp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, tcp_main_t *tm = vnet_get_tcp_main (); vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b; u16 nexts[VLIB_FRAME_SIZE], *next; + u16 err_counters[TCP_N_ERROR] = { 0 }; tcp_update_time_now (tcp_get_worker (thread_index)); @@ -2970,8 +2834,8 @@ tcp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vnet_buffer (b[0])->tcp.connection_index = tc0->c_c_index; vnet_buffer (b[1])->tcp.connection_index = tc1->c_c_index; - tcp_input_dispatch_buffer (tm, tc0, b[0], &next[0], node); - tcp_input_dispatch_buffer (tm, tc1, b[1], &next[1], node); + tcp_input_dispatch_buffer (tm, tc0, b[0], &next[0], err_counters); + tcp_input_dispatch_buffer (tm, tc1, b[1], &next[1], err_counters); } else { @@ -2979,24 +2843,26 @@ tcp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, { ASSERT (tcp_lookup_is_valid (tc0, b[0], tcp_buffer_hdr (b[0]))); vnet_buffer (b[0])->tcp.connection_index = tc0->c_c_index; - tcp_input_dispatch_buffer (tm, tc0, b[0], &next[0], node); + tcp_input_dispatch_buffer (tm, tc0, b[0], &next[0], + err_counters); } else { tcp_input_set_error_next (tm, &next[0], &error0, is_ip4); - b[0]->error = node->errors[error0]; + tcp_inc_err_counter (err_counters, error0, 1); } if (PREDICT_TRUE (tc1 != 0)) { ASSERT (tcp_lookup_is_valid (tc1, b[1], tcp_buffer_hdr (b[1]))); vnet_buffer (b[1])->tcp.connection_index = tc1->c_c_index; - tcp_input_dispatch_buffer (tm, tc1, b[1], &next[1], node); + tcp_input_dispatch_buffer (tm, tc1, b[1], &next[1], + err_counters); } else { tcp_input_set_error_next (tm, &next[1], &error1, is_ip4); - b[1]->error = node->errors[error1]; + tcp_inc_err_counter (err_counters, error1, 1); } } @@ -3022,12 +2888,12 @@ tcp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, { ASSERT (tcp_lookup_is_valid (tc0, b[0], tcp_buffer_hdr (b[0]))); vnet_buffer (b[0])->tcp.connection_index = tc0->c_c_index; - tcp_input_dispatch_buffer (tm, tc0, b[0], &next[0], node); + tcp_input_dispatch_buffer (tm, tc0, b[0], &next[0], err_counters); } else { tcp_input_set_error_next (tm, &next[0], &error0, is_ip4); - b[0]->error = node->errors[error0]; + tcp_inc_err_counter (err_counters, error0, 1); } b += 1; @@ -3036,8 +2902,9 @@ tcp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, } if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE)) - tcp_input_trace_frame (vm, node, bufs, frame->n_vectors, is_ip4); + tcp_input_trace_frame (vm, node, bufs, nexts, frame->n_vectors, is_ip4); + tcp_store_err_counters (input, err_counters); vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors); return frame->n_vectors; } @@ -3058,7 +2925,6 @@ VLIB_NODE_FN (tcp6_input_nolookup_node) (vlib_main_t * vm, 1 /* is_nolookup */ ); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (tcp4_input_nolookup_node) = { .name = "tcp4-input-nolookup", @@ -3076,9 +2942,7 @@ VLIB_REGISTER_NODE (tcp4_input_nolookup_node) = .format_buffer = format_tcp_header, .format_trace = format_tcp_rx_trace, }; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (tcp6_input_nolookup_node) = { .name = "tcp6-input-nolookup", @@ -3096,7 +2960,6 @@ VLIB_REGISTER_NODE (tcp6_input_nolookup_node) = .format_buffer = format_tcp_header, .format_trace = format_tcp_rx_trace, }; -/* *INDENT-ON* */ VLIB_NODE_FN (tcp4_input_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * from_frame) @@ -3112,7 +2975,6 @@ VLIB_NODE_FN (tcp6_input_node) (vlib_main_t * vm, vlib_node_runtime_t * node, 0 /* is_nolookup */ ); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (tcp4_input_node) = { .name = "tcp4-input", @@ -3130,9 +2992,7 @@ VLIB_REGISTER_NODE (tcp4_input_node) = .format_buffer = format_tcp_header, .format_trace = format_tcp_rx_trace, }; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (tcp6_input_node) = { .name = "tcp6-input", @@ -3150,7 +3010,6 @@ VLIB_REGISTER_NODE (tcp6_input_node) = .format_buffer = format_tcp_header, .format_trace = format_tcp_rx_trace, }; -/* *INDENT-ON* */ #ifndef CLIB_MARCH_VARIANT void @@ -3336,6 +3195,8 @@ do { \ _(FIN_WAIT_2, TCP_FLAG_RST | TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE); _(FIN_WAIT_2, TCP_FLAG_SYN, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE); + _ (FIN_WAIT_2, TCP_FLAG_SYN | TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS, + TCP_ERROR_NONE); _(CLOSE_WAIT, TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE); _(CLOSE_WAIT, TCP_FLAG_FIN | TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE); @@ -3385,7 +3246,7 @@ do { \ _(CLOSED, TCP_FLAG_RST | TCP_FLAG_ACK, TCP_INPUT_NEXT_DROP, TCP_ERROR_CONNECTION_CLOSED); _(CLOSED, TCP_FLAG_ACK, TCP_INPUT_NEXT_RESET, TCP_ERROR_CONNECTION_CLOSED); - _(CLOSED, TCP_FLAG_SYN, TCP_INPUT_NEXT_RESET, TCP_ERROR_CONNECTION_CLOSED); + _ (CLOSED, TCP_FLAG_SYN, TCP_INPUT_NEXT_LISTEN, TCP_ERROR_NONE); _(CLOSED, TCP_FLAG_FIN | TCP_FLAG_ACK, TCP_INPUT_NEXT_RESET, TCP_ERROR_CONNECTION_CLOSED); #undef _ diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c index f5035006822..78148cd5695 100644 --- a/src/vnet/tcp/tcp_output.c +++ b/src/vnet/tcp/tcp_output.c @@ -420,7 +420,7 @@ static inline void tcp_make_ack_i (tcp_connection_t * tc, vlib_buffer_t * b, tcp_state_t state, u8 flags) { - tcp_options_t _snd_opts, *snd_opts = &_snd_opts; + tcp_options_t _snd_opts = {}, *snd_opts = &_snd_opts; u8 tcp_opts_len, tcp_hdr_opts_len; tcp_header_t *th; u16 wnd; @@ -656,8 +656,8 @@ tcp_send_reset_w_pkt (tcp_connection_t * tc, vlib_buffer_t * pkt, u8 tcp_hdr_len, flags = 0; tcp_header_t *th, *pkt_th; u32 seq, ack, bi; - ip4_header_t *ih4, *pkt_ih4; - ip6_header_t *ih6, *pkt_ih6; + ip4_header_t *pkt_ih4; + ip6_header_t *pkt_ih6; if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1))) { @@ -667,6 +667,7 @@ tcp_send_reset_w_pkt (tcp_connection_t * tc, vlib_buffer_t * pkt, b = vlib_get_buffer (vm, bi); tcp_init_buffer (vm, b); + vnet_buffer (b)->tcp.connection_index = tc->c_c_index; /* Make and write options */ tcp_hdr_len = sizeof (tcp_header_t); @@ -698,28 +699,7 @@ tcp_send_reset_w_pkt (tcp_connection_t * tc, vlib_buffer_t * pkt, th = vlib_buffer_push_tcp_net_order (b, pkt_th->dst_port, pkt_th->src_port, seq, ack, tcp_hdr_len, flags, 0); - - /* Swap src and dst ip */ - if (is_ip4) - { - ASSERT ((pkt_ih4->ip_version_and_header_length & 0xF0) == 0x40); - ih4 = vlib_buffer_push_ip4 (vm, b, &pkt_ih4->dst_address, - &pkt_ih4->src_address, IP_PROTOCOL_TCP, - tcp_csum_offload (tc)); - th->checksum = ip4_tcp_udp_compute_checksum (vm, b, ih4); - } - else - { - int bogus = ~0; - ASSERT ((pkt_ih6->ip_version_traffic_class_and_flow_label & 0xF0) == - 0x60); - ih6 = vlib_buffer_push_ip6_custom (vm, b, &pkt_ih6->dst_address, - &pkt_ih6->src_address, - IP_PROTOCOL_TCP, - tc->ipv6_flow_label); - th->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b, ih6, &bogus); - ASSERT (!bogus); - } + th->checksum = tcp_compute_checksum (tc, b); tcp_enqueue_half_open (wrk, tc, b, bi); TCP_EVT (TCP_EVT_RST_SENT, tc); @@ -858,10 +838,9 @@ tcp_send_fin (tcp_connection_t * tc) /* Out of buffers so program fin retransmit ASAP */ tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_RETRANSMIT, tcp_cfg.alloc_err_timeout); - if (fin_snt) - tc->snd_nxt += 1; - else - /* Make sure retransmit retries a fin not data */ + tc->snd_nxt += 1; + /* Make sure retransmit retries a fin not data with right snd_nxt */ + if (!fin_snt) tc->flags |= TCP_CONN_FINSNT; tcp_worker_stats_inc (wrk, no_buffer, 1); return; @@ -1137,7 +1116,7 @@ tcp_prepare_segment (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, data = tcp_init_buffer (vm, *b); n_bytes = session_tx_fifo_peek_bytes (&tc->connection, data, offset, max_deq_bytes); - ASSERT (n_bytes == max_deq_bytes); + ASSERT (n_bytes > 0); b[0]->current_length = n_bytes; tcp_push_hdr_i (tc, *b, tc->snd_una + offset, /* compute opts */ 0, /* burst */ 0, /* update_snd_nxt */ 0); @@ -1299,6 +1278,7 @@ tcp_cc_init_rxt_timeout (tcp_connection_t * tc) tc->cwnd_acc_bytes = 0; tc->tr_occurences += 1; tc->sack_sb.reorder = TCP_DUPACK_THRESHOLD; + tc->sack_sb.rescue_rxt = tc->snd_una - 1; tcp_recovery_on (tc); } @@ -1749,7 +1729,7 @@ tcp_retransmit_sack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, && tc->rxt_head != tc->snd_una && tcp_retransmit_should_retry_head (tc, sb)) { - max_bytes = clib_min (tc->snd_mss, tc->snd_congestion - tc->snd_una); + max_bytes = clib_min (tc->snd_mss, tc->snd_nxt - tc->snd_una); n_written = tcp_prepare_retransmit_segment (wrk, tc, 0, max_bytes, &b); if (!n_written) { @@ -1781,7 +1761,7 @@ tcp_retransmit_sack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, if (!hole) { /* We are out of lost holes to retransmit so send some new data. */ - if (max_deq > tc->snd_mss) + if (max_deq) { u32 n_segs_new; int av_wnd; @@ -1791,7 +1771,10 @@ tcp_retransmit_sack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, av_wnd = (int) tc->snd_wnd - (tc->snd_nxt - tc->snd_una); av_wnd = clib_max (av_wnd - tc->snd_mss, 0); snd_space = clib_min (snd_space, av_wnd); - snd_space = clib_min (max_deq, snd_space); + /* Low bound max_deq to mss to be able to send a segment even + * when it is less than mss */ + snd_space = + clib_min (clib_max (max_deq, tc->snd_mss), snd_space); burst_size = clib_min (burst_size - n_segs, snd_space / tc->snd_mss); burst_size = clib_min (burst_size, TCP_RXT_MAX_BURST); @@ -1803,8 +1786,7 @@ tcp_retransmit_sack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, goto done; } - if (tcp_in_recovery (tc) || !can_rescue - || scoreboard_rescue_rxt_valid (sb, tc)) + if (!can_rescue || scoreboard_rescue_rxt_valid (sb, tc)) break; /* If rescue rxt undefined or less than snd_una then one segment of @@ -1828,7 +1810,11 @@ tcp_retransmit_sack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, break; } - max_bytes = clib_min (hole->end - sb->high_rxt, snd_space); + max_bytes = hole->end - sb->high_rxt; + /* Avoid retransmitting segment less than mss if possible */ + if (snd_space < tc->snd_mss && max_bytes > snd_space) + break; + max_bytes = clib_min (max_bytes, snd_space); max_bytes = snd_limited ? clib_min (max_bytes, tc->snd_mss) : max_bytes; if (max_bytes == 0) break; @@ -2191,6 +2177,7 @@ tcp46_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node, u32 n_left_from, *from, thread_index = vm->thread_index; vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b; u16 nexts[VLIB_FRAME_SIZE], *next; + u16 err_counters[TCP_N_ERROR] = { 0 }; from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; @@ -2241,7 +2228,8 @@ tcp46_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node, } else { - b[0]->error = node->errors[TCP_ERROR_INVALID_CONNECTION]; + tcp_inc_err_counter (err_counters, TCP_ERROR_INVALID_CONNECTION, + 1); next[0] = TCP_OUTPUT_NEXT_DROP; } if (tc1 != 0) @@ -2252,7 +2240,8 @@ tcp46_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node, } else { - b[1]->error = node->errors[TCP_ERROR_INVALID_CONNECTION]; + tcp_inc_err_counter (err_counters, TCP_ERROR_INVALID_CONNECTION, + 1); next[1] = TCP_OUTPUT_NEXT_DROP; } } @@ -2282,7 +2271,7 @@ tcp46_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node, } else { - b[0]->error = node->errors[TCP_ERROR_INVALID_CONNECTION]; + tcp_inc_err_counter (err_counters, TCP_ERROR_INVALID_CONNECTION, 1); next[0] = TCP_OUTPUT_NEXT_DROP; } @@ -2291,6 +2280,7 @@ tcp46_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node, n_left_from -= 1; } + tcp_store_err_counters (output, err_counters); vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors); vlib_node_increment_counter (vm, tcp_node_index (output, is_ip4), TCP_ERROR_PKTS_SENT, frame->n_vectors); @@ -2309,7 +2299,6 @@ VLIB_NODE_FN (tcp6_output_node) (vlib_main_t * vm, vlib_node_runtime_t * node, return tcp46_output_inline (vm, node, from_frame, 0 /* is_ip4 */ ); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (tcp4_output_node) = { .name = "tcp4-output", @@ -2327,9 +2316,7 @@ VLIB_REGISTER_NODE (tcp4_output_node) = .format_buffer = format_tcp_header, .format_trace = format_tcp_tx_trace, }; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (tcp6_output_node) = { .name = "tcp6-output", @@ -2347,7 +2334,6 @@ VLIB_REGISTER_NODE (tcp6_output_node) = .format_buffer = format_tcp_header, .format_trace = format_tcp_tx_trace, }; -/* *INDENT-ON* */ typedef enum _tcp_reset_next { @@ -2458,7 +2444,6 @@ VLIB_NODE_FN (tcp6_reset_node) (vlib_main_t * vm, vlib_node_runtime_t * node, return tcp46_reset_inline (vm, node, from_frame, 0); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (tcp4_reset_node) = { .name = "tcp4-reset", .vector_size = sizeof (u32), @@ -2472,9 +2457,7 @@ VLIB_REGISTER_NODE (tcp4_reset_node) = { }, .format_trace = format_tcp_tx_trace, }; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (tcp6_reset_node) = { .name = "tcp6-reset", .vector_size = sizeof (u32), @@ -2488,7 +2471,6 @@ VLIB_REGISTER_NODE (tcp6_reset_node) = { }, .format_trace = format_tcp_tx_trace, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/tcp/tcp_pg.c b/src/vnet/tcp/tcp_pg.c index 07bdb113fd0..9b98e3d8ee4 100644 --- a/src/vnet/tcp/tcp_pg.c +++ b/src/vnet/tcp/tcp_pg.c @@ -51,6 +51,13 @@ _ (ECE) \ _ (CWR) +#define foreach_tcp_options \ + _ (mss, TCP_OPTION_MSS, TCP_OPTION_LEN_MSS, 1) \ + _ (timestamp, TCP_OPTION_TIMESTAMP, TCP_OPTION_LEN_TIMESTAMP, 2) \ + _ (winscale, TCP_OPTION_WINDOW_SCALE, TCP_OPTION_LEN_WINDOW_SCALE, 1) \ + _ (sackperm, TCP_OPTION_SACK_PERMITTED, TCP_OPTION_LEN_SACK_PERMITTED, 0) \ + _ (sack, TCP_OPTION_SACK_BLOCK, TCP_OPTION_LEN_SACK_BLOCK, 0) + static void tcp_pg_edit_function (pg_main_t * pg, pg_stream_t * s, @@ -150,82 +157,192 @@ uword unformat_pg_tcp_header (unformat_input_t * input, va_list * args) { pg_stream_t *s = va_arg (*args, pg_stream_t *); - pg_tcp_header_t *p; - u32 group_index; + pg_tcp_header_t *pth; + u32 header_group_index, opt_group_index = ~0, noop_len, opts_len = 0; - p = pg_create_edit_group (s, sizeof (p[0]), sizeof (tcp_header_t), - &group_index); - pg_tcp_header_init (p); + pth = pg_create_edit_group (s, sizeof (pth[0]), sizeof (tcp_header_t), + &header_group_index); + pg_tcp_header_init (pth); /* Defaults. */ - pg_edit_set_fixed (&p->seq_number, 0); - pg_edit_set_fixed (&p->ack_number, 0); - - pg_edit_set_fixed (&p->data_offset_and_reserved, - sizeof (tcp_header_t) / sizeof (u32)); + pg_edit_set_fixed (&pth->seq_number, 0); + pg_edit_set_fixed (&pth->ack_number, 0); - pg_edit_set_fixed (&p->window, 4096); - pg_edit_set_fixed (&p->urgent_pointer, 0); + pg_edit_set_fixed (&pth->window, 4096); + pg_edit_set_fixed (&pth->urgent_pointer, 0); -#define _(f) pg_edit_set_fixed (&p->f##_flag, 0); +#define _(f) pg_edit_set_fixed (&pth->f##_flag, 0); foreach_tcp_flag #undef _ - p->checksum.type = PG_EDIT_UNSPECIFIED; + pth->checksum.type = PG_EDIT_UNSPECIFIED; - if (!unformat (input, "TCP: %U -> %U", - unformat_pg_edit, - unformat_tcp_udp_port, &p->src, - unformat_pg_edit, unformat_tcp_udp_port, &p->dst)) + if (!unformat (input, "TCP: %U -> %U", unformat_pg_edit, + unformat_tcp_udp_port, &pth->src, unformat_pg_edit, + unformat_tcp_udp_port, &pth->dst)) goto error; /* Parse options. */ while (1) { - if (unformat (input, "window %U", - unformat_pg_edit, unformat_pg_number, &p->window)) + if (unformat (input, "window %U", unformat_pg_edit, unformat_pg_number, + &pth->window)) ; - else if (unformat (input, "checksum %U", - unformat_pg_edit, unformat_pg_number, &p->checksum)) + else if (unformat (input, "checksum %U", unformat_pg_edit, + unformat_pg_number, &pth->checksum)) ; else if (unformat (input, "seqnum %U", unformat_pg_edit, - unformat_pg_number, &p->seq_number)) + unformat_pg_number, &pth->seq_number)) ; else if (unformat (input, "acknum %U", unformat_pg_edit, - unformat_pg_number, &p->ack_number)) + unformat_pg_number, &pth->ack_number)) ; /* Flags. */ -#define _(f) else if (unformat (input, #f)) pg_edit_set_fixed (&p->f##_flag, 1); +#define _(f) \ + else if (unformat (input, #f)) pg_edit_set_fixed (&pth->f##_flag, 1); foreach_tcp_flag #undef _ - /* Can't parse input: try next protocol level. */ + /* Can't parse input: try TCP options and next protocol level. */ + else break; + } + + while (unformat (input, "opt")) + { + int i; + pg_edit_t *opt_header, *opt_values; + u8 type, opt_len, n_values; + + /* first allocate a new edit group for options */ + if (opt_group_index == ~0) + (void) pg_create_edit_group (s, 0, 0, &opt_group_index); + + if (false) + { + } +#define _(n, t, l, k) \ + else if (unformat (input, #n)) \ + { \ + type = (t); \ + opt_len = (l); \ + n_values = (k); \ + } + foreach_tcp_options +#undef _ else + { + /* unknown TCP option */ break; + } + +#define pg_tcp_option_init(e, o, b) \ + do \ + { \ + *(o) += (b); \ + (e)->lsb_bit_offset = *(o) > 0 ? (*(o) -1) * BITS (u8) : 0; \ + (e)->n_bits = (b) *BITS (u8); \ + } \ + while (0); + + /* if we don't know how many values to read, just ask */ + if (n_values == 0 && + unformat (input, "nvalues %D", sizeof (n_values), &n_values)) + { + switch (type) + { + case TCP_OPTION_SACK_BLOCK: + /* each sack block is composed of 2 32-bits values */ + n_values *= 2; + /* + opt_len contains the length of a single sack block, + it needs to be updated to contains the final number of bytes + for the sack options + */ + opt_len = 2 + 2 * opt_len; + break; + default: + /* unknown variable options */ + continue; + } + } + + opt_header = pg_add_edits (s, sizeof (pg_edit_t) * (2 + n_values), + opt_len, opt_group_index); + pg_tcp_option_init (opt_header, &opts_len, 1); + pg_tcp_option_init (opt_header + 1, &opts_len, 1); + pg_edit_set_fixed (opt_header, type); + pg_edit_set_fixed (opt_header + 1, opt_len); + opt_values = opt_header + 2; + + switch (type) + { + case TCP_OPTION_MSS: + pg_tcp_option_init (opt_values, &opts_len, 2); + break; + case TCP_OPTION_WINDOW_SCALE: + pg_tcp_option_init (opt_values, &opts_len, 1); + break; + case TCP_OPTION_TIMESTAMP: + case TCP_OPTION_SACK_BLOCK: + for (i = 0; i < n_values; ++i) + pg_tcp_option_init (opt_values + i, &opts_len, 4); + break; + default: + break; + } + + for (i = 0; i < n_values; ++i) + { + if (!unformat (input, "%U", unformat_pg_edit, unformat_pg_number, + opt_values + i)) + goto error; + } } + /* add TCP NO-OP options to fill options up to a 4-bytes boundary */ + noop_len = (TCP_OPTS_ALIGN - opts_len % TCP_OPTS_ALIGN) % TCP_OPTS_ALIGN; + if (noop_len > 0) + { + pg_edit_t *noop_edit; + u8 *noops = 0; + + vec_validate (noops, noop_len - 1); + clib_memset (noops, 1, noop_len); + + noop_edit = + pg_add_edits (s, sizeof (noop_edit[0]), noop_len, opt_group_index); + pg_tcp_option_init (noop_edit, &opts_len, noop_len); + noop_edit->type = PG_EDIT_FIXED; + noop_edit->values[PG_EDIT_LO] = noops; + } +#undef pg_tcp_option_init + + /* set the data offset according to options */ + pg_edit_set_fixed (&pth->data_offset_and_reserved, + (sizeof (tcp_header_t) + opts_len) / sizeof (u32)); + { ip_main_t *im = &ip_main; u16 dst_port; tcp_udp_port_info_t *pi; pi = 0; - if (p->dst.type == PG_EDIT_FIXED) + if (pth->dst.type == PG_EDIT_FIXED) { - dst_port = pg_edit_get_value (&p->dst, PG_EDIT_LO); + dst_port = pg_edit_get_value (&pth->dst, PG_EDIT_LO); pi = ip_get_tcp_udp_port_info (im, dst_port); } - if (pi && pi->unformat_pg_edit - && unformat_user (input, pi->unformat_pg_edit, s)) + if (pi && pi->unformat_pg_edit && + unformat_user (input, pi->unformat_pg_edit, s)) ; else if (!unformat_user (input, unformat_pg_payload, s)) goto error; - if (p->checksum.type == PG_EDIT_UNSPECIFIED) + if (pth->checksum.type == PG_EDIT_UNSPECIFIED) { - pg_edit_group_t *g = pg_stream_get_group (s, group_index); + pg_edit_group_t *g = pg_stream_get_group (s, header_group_index); g->edit_function = tcp_pg_edit_function; g->edit_function_opaque = 0; } diff --git a/src/vnet/tcp/tcp_syn_filter4.c b/src/vnet/tcp/tcp_syn_filter4.c index 1b003e04e51..6e867240ad6 100644 --- a/src/vnet/tcp/tcp_syn_filter4.c +++ b/src/vnet/tcp/tcp_syn_filter4.c @@ -399,7 +399,6 @@ VLIB_NODE_FN (syn_filter4_node) (vlib_main_t * vm, return frame->n_vectors; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (syn_filter4_node) = { .name = "syn-filter-4", @@ -418,16 +417,13 @@ VLIB_REGISTER_NODE (syn_filter4_node) = [SYN_FILTER_NEXT_DROP] = "error-drop", }, }; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ VNET_FEATURE_INIT (syn_filter_4, static) = { .arc_name = "ip4-local", .node_name = "syn-filter-4", .runs_before = VNET_FEATURES("ip4-local-end-of-arc"), }; -/* *INDENT-ON* */ #ifndef CLIB_MARCH_VARIANT int @@ -525,14 +521,12 @@ syn_filter_enable_disable_command_fn (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (sr_content_command, static) = { .path = "ip syn filter", .short_help = "ip syn filter <interface-name> [disable]", .function = syn_filter_enable_disable_command_fn, }; -/* *INDENT-ON* */ #endif /* CLIB_MARCH_VARIANT */ /* diff --git a/src/vnet/tcp/tcp_timer.h b/src/vnet/tcp/tcp_timer.h index 7f7dbf193eb..c0907cae1cc 100644 --- a/src/vnet/tcp/tcp_timer.h +++ b/src/vnet/tcp/tcp_timer.h @@ -17,11 +17,18 @@ #include <vnet/tcp/tcp_types.h> +static inline u8 +tcp_timer_thread_is_valid (tcp_connection_t *tc) +{ + return ((tc->c_thread_index == vlib_get_thread_index ()) || + vlib_thread_is_main_w_barrier ()); +} + always_inline void -tcp_timer_set (tcp_timer_wheel_t * tw, tcp_connection_t * tc, u8 timer_id, +tcp_timer_set (tcp_timer_wheel_t *tw, tcp_connection_t *tc, u8 timer_id, u32 interval) { - ASSERT (tc->c_thread_index == vlib_get_thread_index ()); + ASSERT (tcp_timer_thread_is_valid (tc)); ASSERT (tc->timers[timer_id] == TCP_TIMER_HANDLE_INVALID); tc->timers[timer_id] = tw_timer_start_tcp_twsl (tw, tc->c_c_index, timer_id, interval); @@ -30,7 +37,7 @@ tcp_timer_set (tcp_timer_wheel_t * tw, tcp_connection_t * tc, u8 timer_id, always_inline void tcp_timer_reset (tcp_timer_wheel_t * tw, tcp_connection_t * tc, u8 timer_id) { - ASSERT (tc->c_thread_index == vlib_get_thread_index ()); + ASSERT (tcp_timer_thread_is_valid (tc)); tc->pending_timers &= ~(1 << timer_id); if (tc->timers[timer_id] == TCP_TIMER_HANDLE_INVALID) return; @@ -43,7 +50,7 @@ always_inline void tcp_timer_update (tcp_timer_wheel_t * tw, tcp_connection_t * tc, u8 timer_id, u32 interval) { - ASSERT (tc->c_thread_index == vlib_get_thread_index ()); + ASSERT (tcp_timer_thread_is_valid (tc)); if (tc->timers[timer_id] != TCP_TIMER_HANDLE_INVALID) tw_timer_update_tcp_twsl (tw, tc->timers[timer_id], interval); else diff --git a/src/vnet/tcp/tcp_types.h b/src/vnet/tcp/tcp_types.h index aacfd8f2fd4..f9a9ff9a4da 100644 --- a/src/vnet/tcp/tcp_types.h +++ b/src/vnet/tcp/tcp_types.h @@ -389,7 +389,6 @@ typedef struct _tcp_connection #define rst_state snd_wl1 } tcp_connection_t; -/* *INDENT-OFF* */ struct _tcp_cc_algorithm { const char *name; @@ -406,7 +405,6 @@ struct _tcp_cc_algorithm void (*event) (tcp_connection_t *tc, tcp_cc_event_t evt); u64 (*get_pacing_rate) (tcp_connection_t *tc); }; -/* *INDENT-ON* */ #define tcp_fastrecovery_on(tc) (tc)->flags |= TCP_CONN_FAST_RECOVERY #define tcp_fastrecovery_off(tc) (tc)->flags &= ~TCP_CONN_FAST_RECOVERY diff --git a/src/vnet/teib/teib.c b/src/vnet/teib/teib.c index 44bbc7cfd89..a9234bbeb5e 100644 --- a/src/vnet/teib/teib.c +++ b/src/vnet/teib/teib.c @@ -34,7 +34,7 @@ struct teib_entry_t_ { teib_key_t *te_key; fib_prefix_t te_nh; - u32 te_fib_index; + u32 te_nh_fib_index; }; typedef struct teib_db_t_ @@ -83,7 +83,7 @@ teib_entry_get_af (const teib_entry_t * te) u32 teib_entry_get_fib_index (const teib_entry_t * te) { - return (te->te_fib_index); + return (te->te_nh_fib_index); } const ip_address_t * @@ -101,7 +101,7 @@ teib_entry_get_nh (const teib_entry_t * te) void teib_entry_adj_stack (const teib_entry_t * te, adj_index_t ai) { - adj_midchain_delegate_stack (ai, te->te_fib_index, &te->te_nh); + adj_midchain_delegate_stack (ai, te->te_nh_fib_index, &te->te_nh); } teib_entry_t * @@ -139,7 +139,7 @@ teib_entry_find_46 (u32 sw_if_index, } static void -teib_adj_fib_add (const ip_address_t * ip, u32 sw_if_index, u32 fib_index) +teib_adj_fib_add (const ip_address_t *ip, u32 sw_if_index, u32 peer_fib_index) { if (AF_IP6 == ip_addr_version (ip) && ip6_address_is_link_local_unicast (&ip_addr_v6 (ip))) @@ -155,21 +155,18 @@ teib_adj_fib_add (const ip_address_t * ip, u32 sw_if_index, u32 fib_index) fib_prefix_t pfx; ip_address_to_fib_prefix (ip, &pfx); - fib_table_entry_path_add (fib_index, &pfx, FIB_SOURCE_ADJ, - FIB_ENTRY_FLAG_ATTACHED, - fib_proto_to_dpo (pfx.fp_proto), - &pfx.fp_addr, - sw_if_index, - ~0, 1, NULL, FIB_ROUTE_PATH_FLAG_NONE); - + fib_table_entry_path_add ( + peer_fib_index, &pfx, FIB_SOURCE_ADJ, FIB_ENTRY_FLAG_ATTACHED, + fib_proto_to_dpo (pfx.fp_proto), &pfx.fp_addr, sw_if_index, ~0, 1, + NULL, FIB_ROUTE_PATH_FLAG_NONE); if (0 == teib_db.td_n_entries[ip_addr_version (ip)]++) - fib_table_lock (fib_index, pfx.fp_proto, FIB_SOURCE_ADJ); + fib_table_lock (peer_fib_index, pfx.fp_proto, FIB_SOURCE_ADJ); } } static void -teib_adj_fib_remove (ip_address_t * ip, u32 sw_if_index, u32 fib_index) +teib_adj_fib_remove (ip_address_t *ip, u32 sw_if_index, u32 peer_fib_index) { if (AF_IP6 == ip_addr_version (ip) && ip6_address_is_link_local_unicast (&ip_addr_v6 (ip))) @@ -185,14 +182,12 @@ teib_adj_fib_remove (ip_address_t * ip, u32 sw_if_index, u32 fib_index) fib_prefix_t pfx; ip_address_to_fib_prefix (ip, &pfx); - fib_table_entry_path_remove (fib_index, &pfx, FIB_SOURCE_ADJ, - fib_proto_to_dpo (pfx.fp_proto), - &pfx.fp_addr, - sw_if_index, - ~0, 1, FIB_ROUTE_PATH_FLAG_NONE); + fib_table_entry_path_remove ( + peer_fib_index, &pfx, FIB_SOURCE_ADJ, fib_proto_to_dpo (pfx.fp_proto), + &pfx.fp_addr, sw_if_index, ~0, 1, FIB_ROUTE_PATH_FLAG_NONE); if (0 == --teib_db.td_n_entries[ip_addr_version (ip)]) - fib_table_unlock (fib_index, pfx.fp_proto, FIB_SOURCE_ADJ); + fib_table_unlock (peer_fib_index, pfx.fp_proto, FIB_SOURCE_ADJ); } } @@ -203,15 +198,17 @@ teib_entry_add (u32 sw_if_index, { fib_protocol_t nh_proto; teib_entry_t *te; - u32 fib_index; + u32 nh_fib_index, peer_fib_index; index_t tei; nh_proto = (AF_IP4 == ip_addr_version (nh) ? FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6); - fib_index = fib_table_find (nh_proto, nh_table_id); + peer_fib_index = fib_table_get_index_for_sw_if_index ( + ip_address_family_to_fib_proto (peer->version), sw_if_index); + nh_fib_index = fib_table_find (nh_proto, nh_table_id); - if (~0 == fib_index) + if (~0 == nh_fib_index) { return (VNET_API_ERROR_NO_SUCH_FIB); } @@ -233,12 +230,12 @@ teib_entry_add (u32 sw_if_index, clib_memcpy (te->te_key, &nk, sizeof (*te->te_key)); ip_address_to_fib_prefix (nh, &te->te_nh); - te->te_fib_index = fib_index; + te->te_nh_fib_index = nh_fib_index; hash_set_mem (teib_db.td_db, te->te_key, tei); /* we how have a /32 in the overlay, add an adj-fib */ - teib_adj_fib_add (&te->te_key->tk_peer, sw_if_index, fib_index); + teib_adj_fib_add (&te->te_key->tk_peer, sw_if_index, peer_fib_index); TEIB_NOTIFY (te, nv_added); TEIB_TE_INFO (te, "created"); @@ -262,13 +259,12 @@ teib_entry_del (u32 sw_if_index, const ip_address_t * peer) { TEIB_TE_INFO (te, "removed"); - u32 fib_index; + u32 peer_fib_index; - fib_index = fib_table_get_index_for_sw_if_index - (ip_address_family_to_fib_proto (ip_addr_version (peer)), - sw_if_index); + peer_fib_index = fib_table_get_index_for_sw_if_index ( + ip_address_family_to_fib_proto (peer->version), sw_if_index); - teib_adj_fib_remove (&te->te_key->tk_peer, sw_if_index, fib_index); + teib_adj_fib_remove (&te->te_key->tk_peer, sw_if_index, peer_fib_index); hash_unset_mem (teib_db.td_db, te->te_key); @@ -301,7 +297,7 @@ format_teib_entry (u8 * s, va_list * args) s = format (s, "%U", format_ip_address, &te->te_key->tk_peer, IP46_TYPE_ANY); s = format (s, " via [%d]:%U", - fib_table_get_table_id (te->te_fib_index, te->te_nh.fp_proto), + fib_table_get_table_id (te->te_nh_fib_index, te->te_nh.fp_proto), format_fib_prefix, &te->te_nh); return (s); @@ -312,12 +308,10 @@ teib_walk (teib_walk_cb_t fn, void *ctx) { index_t tei; - /* *INDENT-OFF* */ pool_foreach_index (tei, teib_pool) { fn(tei, ctx); } - /* *INDENT-ON* */ } void @@ -325,13 +319,11 @@ teib_walk_itf (u32 sw_if_index, teib_walk_cb_t fn, void *ctx) { index_t tei; - /* *INDENT-OFF* */ pool_foreach_index (tei, teib_pool) { if (sw_if_index == teib_entry_get_sw_if_index(teib_entry_get(tei))) fn(tei, ctx); } - /* *INDENT-ON* */ } static void @@ -340,20 +332,18 @@ teib_walk_itf_proto (u32 sw_if_index, { index_t tei; - /* *INDENT-OFF* */ pool_foreach_index (tei, teib_pool) { if (sw_if_index == teib_entry_get_sw_if_index(teib_entry_get(tei)) && af == teib_entry_get_af(teib_entry_get(tei))) fn(tei, ctx); } - /* *INDENT-ON* */ } typedef struct teib_table_bind_ctx_t_ { - u32 new_fib_index; - u32 old_fib_index; + u32 new_peer_fib_index; + u32 old_peer_fib_index; } teib_table_bind_ctx_t; static walk_rc_t @@ -364,12 +354,13 @@ teib_walk_table_bind (index_t tei, void *arg) te = teib_entry_get (tei); - TEIB_TE_INFO (te, "bind: %d -> %d", ctx->old_fib_index, ctx->new_fib_index); + TEIB_TE_INFO (te, "bind: %d -> %d", ctx->old_peer_fib_index, + ctx->new_peer_fib_index); - teib_adj_fib_remove (&te->te_key->tk_peer, - te->te_key->tk_sw_if_index, ctx->old_fib_index); - teib_adj_fib_add (&te->te_key->tk_peer, - te->te_key->tk_sw_if_index, ctx->new_fib_index); + teib_adj_fib_remove (&te->te_key->tk_peer, te->te_key->tk_sw_if_index, + ctx->old_peer_fib_index); + teib_adj_fib_add (&te->te_key->tk_peer, te->te_key->tk_sw_if_index, + ctx->new_peer_fib_index); return (WALK_CONTINUE); } @@ -380,8 +371,8 @@ teib_table_bind_v4 (ip4_main_t * im, u32 sw_if_index, u32 new_fib_index, u32 old_fib_index) { teib_table_bind_ctx_t ctx = { - .old_fib_index = old_fib_index, - .new_fib_index = new_fib_index, + .old_peer_fib_index = old_fib_index, + .new_peer_fib_index = new_fib_index, }; teib_walk_itf_proto (sw_if_index, AF_IP4, teib_walk_table_bind, &ctx); @@ -393,8 +384,8 @@ teib_table_bind_v6 (ip6_main_t * im, u32 sw_if_index, u32 new_fib_index, u32 old_fib_index) { teib_table_bind_ctx_t ctx = { - .old_fib_index = old_fib_index, - .new_fib_index = new_fib_index, + .old_peer_fib_index = old_fib_index, + .new_peer_fib_index = new_fib_index, }; teib_walk_itf_proto (sw_if_index, AF_IP6, teib_walk_table_bind, &ctx); diff --git a/src/vnet/teib/teib_cli.c b/src/vnet/teib/teib_cli.c index a23902e0f60..03cec15c7a1 100644 --- a/src/vnet/teib/teib_cli.c +++ b/src/vnet/teib/teib_cli.c @@ -85,13 +85,11 @@ done: return error; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (teib_create_command, static) = { .path = "create teib", .short_help = "create teib <interface> peer <addr> nh <addr> [nh-table-id <ID>]", .function = teib_add, }; -/* *INDENT-ON* */ static clib_error_t * teib_del (vlib_main_t * vm, @@ -150,13 +148,11 @@ done: return error; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (teib_delete_command, static) = { .path = "delete teib", .short_help = "delete teib <interface> peer <addr>", .function = teib_del, }; -/* *INDENT-ON* */ static walk_rc_t teib_show_one (index_t nei, void *ctx) @@ -175,13 +171,11 @@ teib_show (vlib_main_t * vm, return (NULL); } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (teib_show_command, static) = { .path = "show teib", .short_help = "show teib", .function = teib_show, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/tls/tls.c b/src/vnet/tls/tls.c index c1689954975..5f00e6e302d 100644 --- a/src/vnet/tls/tls.c +++ b/src/vnet/tls/tls.c @@ -61,8 +61,7 @@ tls_add_vpp_q_rx_evt (session_t * s) int tls_add_vpp_q_builtin_rx_evt (session_t * s) { - if (svm_fifo_set_event (s->rx_fifo)) - session_send_io_evt_to_thread (s->rx_fifo, SESSION_IO_EVT_BUILTIN_RX); + session_enqueue_notify (s); return 0; } @@ -75,9 +74,10 @@ tls_add_vpp_q_tx_evt (session_t * s) } static inline int -tls_add_app_q_evt (app_worker_t * app, session_t * app_session) +tls_add_app_q_evt (app_worker_t *app_wrk, session_t *app_session) { - return app_worker_lock_and_send_event (app, app_session, SESSION_IO_EVT_RX); + app_worker_add_event (app_wrk, app_session, SESSION_IO_EVT_RX); + return 0; } u32 @@ -115,57 +115,74 @@ u32 tls_ctx_half_open_alloc (void) { tls_main_t *tm = &tls_main; - u8 will_expand = pool_get_will_expand (tm->half_open_ctx_pool); tls_ctx_t *ctx; - u32 ctx_index; - if (PREDICT_FALSE (will_expand && vlib_num_workers ())) - { - clib_rwlock_writer_lock (&tm->half_open_rwlock); - pool_get_zero (tm->half_open_ctx_pool, ctx); - ctx->c_c_index = ctx - tm->half_open_ctx_pool; - ctx_index = ctx->c_c_index; - clib_rwlock_writer_unlock (&tm->half_open_rwlock); - } - else - { - /* reader lock assumption: only main thread will call pool_get */ - clib_rwlock_reader_lock (&tm->half_open_rwlock); - pool_get_zero (tm->half_open_ctx_pool, ctx); - ctx->c_c_index = ctx - tm->half_open_ctx_pool; - ctx_index = ctx->c_c_index; - clib_rwlock_reader_unlock (&tm->half_open_rwlock); - } - return ctx_index; + if (vec_len (tm->postponed_ho_free)) + tls_flush_postponed_ho_cleanups (); + + pool_get_aligned_safe (tm->half_open_ctx_pool, ctx, CLIB_CACHE_LINE_BYTES); + + clib_memset (ctx, 0, sizeof (*ctx)); + ctx->c_c_index = ctx - tm->half_open_ctx_pool; + ctx->c_thread_index = transport_cl_thread (); + + return ctx->c_c_index; } void tls_ctx_half_open_free (u32 ho_index) { - tls_main_t *tm = &tls_main; - clib_rwlock_writer_lock (&tm->half_open_rwlock); pool_put_index (tls_main.half_open_ctx_pool, ho_index); - clib_rwlock_writer_unlock (&tm->half_open_rwlock); } tls_ctx_t * tls_ctx_half_open_get (u32 ctx_index) { tls_main_t *tm = &tls_main; - clib_rwlock_reader_lock (&tm->half_open_rwlock); return pool_elt_at_index (tm->half_open_ctx_pool, ctx_index); } void -tls_ctx_half_open_reader_unlock () +tls_add_postponed_ho_cleanups (u32 ho_index) { - clib_rwlock_reader_unlock (&tls_main.half_open_rwlock); + tls_main_t *tm = &tls_main; + vec_add1 (tm->postponed_ho_free, ho_index); } -u32 -tls_ctx_half_open_index (tls_ctx_t * ctx) +static void +tls_ctx_ho_try_free (u32 ho_index) { - return (ctx - tls_main.half_open_ctx_pool); + tls_ctx_t *ctx; + + ctx = tls_ctx_half_open_get (ho_index); + /* Probably tcp connected just before tcp establish timeout and + * worker that owns established session has not yet received + * @ref tls_session_connected_cb */ + if (!(ctx->flags & TLS_CONN_F_HO_DONE)) + { + ctx->tls_session_handle = SESSION_INVALID_HANDLE; + tls_add_postponed_ho_cleanups (ho_index); + return; + } + if (!(ctx->flags & TLS_CONN_F_NO_APP_SESSION)) + session_half_open_delete_notify (&ctx->connection); + tls_ctx_half_open_free (ho_index); +} + +void +tls_flush_postponed_ho_cleanups () +{ + tls_main_t *tm = &tls_main; + u32 *ho_indexp, *tmp; + + tmp = tm->postponed_ho_free; + tm->postponed_ho_free = tm->ho_free_list; + tm->ho_free_list = tmp; + + vec_foreach (ho_indexp, tm->ho_free_list) + tls_ctx_ho_try_free (*ho_indexp); + + vec_reset_length (tm->ho_free_list); } void @@ -188,17 +205,19 @@ tls_notify_app_accept (tls_ctx_t * ctx) lctx = tls_listener_ctx_get (ctx->listener_ctx_index); app_listener = listen_session_get_from_handle (lctx->app_session_handle); - app_session = session_get (ctx->c_s_index, ctx->c_thread_index); - app_session->app_wrk_index = ctx->parent_app_wrk_index; - app_session->connection_index = ctx->tls_ctx_handle; + app_session = session_alloc (ctx->c_thread_index); + app_session->session_state = SESSION_STATE_ACCEPTING; app_session->session_type = app_listener->session_type; app_session->listener_handle = listen_session_get_handle (app_listener); - app_session->session_state = SESSION_STATE_ACCEPTING; + app_session->app_wrk_index = ctx->parent_app_wrk_index; + app_session->connection_index = ctx->tls_ctx_handle; + ctx->c_s_index = app_session->session_index; if ((rv = app_worker_init_accepted (app_session))) { TLS_DBG (1, "failed to allocate fifos"); session_free (app_session); + ctx->flags |= TLS_CONN_F_NO_APP_SESSION; return rv; } ctx->app_session_handle = session_handle (app_session); @@ -217,45 +236,44 @@ tls_notify_app_connected (tls_ctx_t * ctx, session_error_t err) app_wrk = app_worker_get_if_valid (ctx->parent_app_wrk_index); if (!app_wrk) { - if (ctx->tls_type == TRANSPORT_PROTO_TLS) - session_free (session_get (ctx->c_s_index, ctx->c_thread_index)); - ctx->no_app_session = 1; + ctx->flags |= TLS_CONN_F_NO_APP_SESSION; return -1; } if (err) { - /* Free app session pre-allocated when transport was established */ - if (ctx->tls_type == TRANSPORT_PROTO_TLS) - session_free (session_get (ctx->c_s_index, ctx->c_thread_index)); - ctx->no_app_session = 1; + ctx->flags |= TLS_CONN_F_NO_APP_SESSION; goto send_reply; } - /* For DTLS the app session is not preallocated because the underlying udp - * session might migrate to a different worker during the handshake */ + app_session = session_alloc (ctx->c_thread_index); + app_session->session_state = SESSION_STATE_CREATED; + app_session->connection_index = ctx->tls_ctx_handle; + if (ctx->tls_type == TRANSPORT_PROTO_DTLS) { - session_type_t st; /* Cleanup half-open session as we don't get notification from udp */ session_half_open_delete_notify (&ctx->connection); - app_session = session_alloc (ctx->c_thread_index); - app_session->session_state = SESSION_STATE_CREATED; - ctx->c_s_index = app_session->session_index; - st = + app_session->session_type = session_type_from_proto_and_ip (TRANSPORT_PROTO_DTLS, ctx->tcp_is_ip4); - app_session->session_type = st; - app_session->connection_index = ctx->tls_ctx_handle; } else { - app_session = session_get (ctx->c_s_index, ctx->c_thread_index); + app_session->session_type = + session_type_from_proto_and_ip (TRANSPORT_PROTO_TLS, ctx->tcp_is_ip4); } app_session->app_wrk_index = ctx->parent_app_wrk_index; + app_session->opaque = ctx->parent_app_api_context; + ctx->c_s_index = app_session->session_index; if ((err = app_worker_init_connected (app_wrk, app_session))) - goto failed; + { + app_worker_connect_notify (app_wrk, 0, err, ctx->parent_app_api_context); + ctx->flags |= TLS_CONN_F_NO_APP_SESSION; + session_free (app_session); + return -1; + } app_session->session_state = SESSION_STATE_READY; parent_app_api_ctx = ctx->parent_app_api_context; @@ -266,15 +284,12 @@ tls_notify_app_connected (tls_ctx_t * ctx, session_error_t err) { TLS_DBG (1, "failed to notify app"); session_free (session_get (ctx->c_s_index, ctx->c_thread_index)); - ctx->no_app_session = 1; + ctx->flags |= TLS_CONN_F_NO_APP_SESSION; return -1; } return 0; -failed: - ctx->no_app_session = 1; - tls_disconnect (ctx->tls_ctx_handle, vlib_get_thread_index ()); send_reply: return app_worker_connect_notify (app_wrk, 0, err, ctx->parent_app_api_context); @@ -385,6 +400,12 @@ tls_ctx_transport_close (tls_ctx_t * ctx) } static inline int +tls_ctx_transport_reset (tls_ctx_t *ctx) +{ + return tls_vfts[ctx->tls_ctx_engine].ctx_transport_reset (ctx); +} + +static inline int tls_ctx_app_close (tls_ctx_t * ctx) { return tls_vfts[ctx->tls_ctx_engine].ctx_app_close (ctx); @@ -419,43 +440,20 @@ tls_notify_app_io_error (tls_ctx_t *ctx) } void -tls_session_reset_callback (session_t * s) +tls_session_reset_callback (session_t *ts) { tls_ctx_t *ctx; - transport_connection_t *tc; - session_t *app_session; - ctx = tls_ctx_get (s->opaque); - ctx->is_passive_close = 1; - tc = &ctx->connection; - if (tls_ctx_handshake_is_over (ctx)) - { - session_transport_reset_notify (tc); - session_transport_closed_notify (tc); - tls_disconnect_transport (ctx); - } - else - if ((app_session = - session_get_if_valid (ctx->c_s_index, ctx->c_thread_index))) - { - session_free (app_session); - ctx->c_s_index = SESSION_INVALID_INDEX; - tls_disconnect_transport (ctx); - } + ctx = tls_ctx_get_w_thread (ts->opaque, ts->thread_index); + ctx->flags |= TLS_CONN_F_PASSIVE_CLOSE; + tls_ctx_transport_reset (ctx); } static void tls_session_cleanup_ho (session_t *s) { - tls_ctx_t *ctx; - u32 ho_index; - /* session opaque stores the opaque passed on connect */ - ho_index = s->opaque; - ctx = tls_ctx_half_open_get (ho_index); - session_half_open_delete_notify (&ctx->connection); - tls_ctx_half_open_reader_unlock (); - tls_ctx_half_open_free (ho_index); + tls_ctx_ho_try_free (s->opaque); } int @@ -483,61 +481,69 @@ tls_session_disconnect_callback (session_t * tls_session) || vlib_thread_is_main_w_barrier ()); ctx = tls_ctx_get_w_thread (tls_session->opaque, tls_session->thread_index); - ctx->is_passive_close = 1; + ctx->flags |= TLS_CONN_F_PASSIVE_CLOSE; tls_ctx_transport_close (ctx); } int -tls_session_accept_callback (session_t * tls_session) +tls_session_accept_callback (session_t *ts) { - session_t *tls_listener, *app_session; + session_t *tls_listener; tls_ctx_t *lctx, *ctx; u32 ctx_handle; - tls_listener = - listen_session_get_from_handle (tls_session->listener_handle); + tls_listener = listen_session_get_from_handle (ts->listener_handle); lctx = tls_listener_ctx_get (tls_listener->opaque); ctx_handle = tls_ctx_alloc (lctx->tls_ctx_engine); ctx = tls_ctx_get (ctx_handle); - memcpy (ctx, lctx, sizeof (*lctx)); - ctx->c_thread_index = vlib_get_thread_index (); + clib_memcpy (ctx, lctx, sizeof (*lctx)); + ctx->c_s_index = SESSION_INVALID_INDEX; + ctx->c_thread_index = ts->thread_index; ctx->tls_ctx_handle = ctx_handle; - tls_session->session_state = SESSION_STATE_READY; - tls_session->opaque = ctx_handle; - ctx->tls_session_handle = session_handle (tls_session); + ts->opaque = ctx_handle; + ctx->tls_session_handle = session_handle (ts); ctx->listener_ctx_index = tls_listener->opaque; ctx->c_flags |= TRANSPORT_CONNECTION_F_NO_LOOKUP; ctx->ckpair_index = lctx->ckpair_index; - /* Preallocate app session. Avoids allocating a session post handshake - * on tls_session rx and potentially invalidating the session pool */ - app_session = session_alloc (ctx->c_thread_index); - app_session->session_state = SESSION_STATE_CREATED; - ctx->c_s_index = app_session->session_index; - TLS_DBG (1, "Accept on listener %u new connection [%u]%x", tls_listener->opaque, vlib_get_thread_index (), ctx_handle); - return tls_ctx_init_server (ctx); + if (tls_ctx_init_server (ctx)) + { + /* Do not free ctx yet, in case we have pending rx events */ + ctx->flags |= TLS_CONN_F_NO_APP_SESSION; + tls_disconnect_transport (ctx); + } + + if (ts->session_state < SESSION_STATE_READY) + ts->session_state = SESSION_STATE_READY; + + return 0; } int -tls_app_rx_callback (session_t * tls_session) +tls_app_rx_callback (session_t *ts) { tls_ctx_t *ctx; /* DTLS session migrating, wait for next notification */ - if (PREDICT_FALSE (tls_session->flags & SESSION_F_IS_MIGRATING)) + if (PREDICT_FALSE (ts->flags & SESSION_F_IS_MIGRATING)) return 0; - ctx = tls_ctx_get (tls_session->opaque); - if (PREDICT_FALSE (ctx->no_app_session)) + /* Read rescheduled but underlying transport deleted now */ + if (PREDICT_FALSE ((ts->session_state == SESSION_STATE_TRANSPORT_DELETED))) + return 0; + + ctx = tls_ctx_get (ts->opaque); + if (PREDICT_FALSE ((ctx->flags & TLS_CONN_F_NO_APP_SESSION) || + (ctx->flags & TLS_CONN_F_APP_CLOSED))) { TLS_DBG (1, "Local App closed"); return 0; } - tls_ctx_read (ctx, tls_session); + tls_ctx_read (ctx, ts); return 0; } @@ -556,9 +562,7 @@ int tls_session_connected_cb (u32 tls_app_index, u32 ho_ctx_index, session_t *tls_session, session_error_t err) { - session_t *app_session; tls_ctx_t *ho_ctx, *ctx; - session_type_t st; u32 ctx_handle; ho_ctx = tls_ctx_half_open_get (ho_ctx_index); @@ -566,8 +570,9 @@ tls_session_connected_cb (u32 tls_app_index, u32 ho_ctx_index, ctx_handle = tls_ctx_alloc (ho_ctx->tls_ctx_engine); ctx = tls_ctx_get (ctx_handle); clib_memcpy_fast (ctx, ho_ctx, sizeof (*ctx)); + /* Half-open freed on tcp half-open cleanup notification */ - tls_ctx_half_open_reader_unlock (); + __atomic_fetch_or (&ho_ctx->flags, TLS_CONN_F_HO_DONE, __ATOMIC_RELEASE); ctx->c_thread_index = vlib_get_thread_index (); ctx->tls_ctx_handle = ctx_handle; @@ -579,18 +584,17 @@ tls_session_connected_cb (u32 tls_app_index, u32 ho_ctx_index, ctx->tls_session_handle = session_handle (tls_session); tls_session->opaque = ctx_handle; - tls_session->session_state = SESSION_STATE_READY; - /* Preallocate app session. Avoids allocating a session post handshake - * on tls_session rx and potentially invalidating the session pool */ - app_session = session_alloc (ctx->c_thread_index); - app_session->session_state = SESSION_STATE_CREATED; - ctx->c_s_index = app_session->session_index; - st = session_type_from_proto_and_ip (TRANSPORT_PROTO_TLS, ctx->tcp_is_ip4); - app_session->session_type = st; - app_session->connection_index = ctx->tls_ctx_handle; + if (tls_ctx_init_client (ctx)) + { + tls_notify_app_connected (ctx, SESSION_E_TLS_HANDSHAKE); + tls_disconnect_transport (ctx); + } - return tls_ctx_init_client (ctx); + if (tls_session->session_state < SESSION_STATE_READY) + tls_session->session_state = SESSION_STATE_READY; + + return 0; } int @@ -622,13 +626,13 @@ tls_session_connected_callback (u32 tls_app_index, u32 ho_ctx_index, u32 api_context; ho_ctx = tls_ctx_half_open_get (ho_ctx_index); + ho_ctx->flags |= TLS_CONN_F_HO_DONE; app_wrk = app_worker_get_if_valid (ho_ctx->parent_app_wrk_index); if (app_wrk) { api_context = ho_ctx->parent_app_api_context; app_worker_connect_notify (app_wrk, 0, err, api_context); } - tls_ctx_half_open_reader_unlock (); return 0; } @@ -655,7 +659,7 @@ tls_app_session_cleanup (session_t * s, session_cleanup_ntf_t ntf) } ctx = tls_ctx_get (s->opaque); - if (!ctx->no_app_session) + if (!(ctx->flags & TLS_CONN_F_NO_APP_SESSION)) session_transport_delete_notify (&ctx->connection); tls_ctx_free (ctx); } @@ -681,7 +685,7 @@ dtls_migrate_ctx (void *arg) /* Probably the app detached while the session was migrating. Cleanup */ if (session_half_open_migrated_notify (&ctx->connection)) { - ctx->no_app_session = 1; + ctx->flags |= TLS_CONN_F_NO_APP_SESSION; tls_disconnect (ctx->tls_ctx_handle, vlib_get_thread_index ()); return; } @@ -700,7 +704,7 @@ dtls_session_migrate_callback (session_t *us, session_handle_t new_sh) ctx = tls_ctx_get_w_thread (us->opaque, us->thread_index); ctx->tls_session_handle = new_sh; cloned_ctx = tls_ctx_detach (ctx); - ctx->is_migrated = 1; + ctx->flags |= TLS_CONN_F_MIGRATED; session_half_open_migrate_notify (&ctx->connection); session_send_rpc_evt_to_thread (new_thread, dtls_migrate_ctx, @@ -709,11 +713,22 @@ dtls_session_migrate_callback (session_t *us, session_handle_t new_sh) tls_ctx_free (ctx); } +static void +tls_session_transport_closed_callback (session_t *ts) +{ + tls_ctx_t *ctx; + + ctx = tls_ctx_get_w_thread (ts->opaque, ts->thread_index); + if (!(ctx->flags & TLS_CONN_F_NO_APP_SESSION)) + session_transport_closed_notify (&ctx->connection); +} + static session_cb_vft_t tls_app_cb_vft = { .session_accept_callback = tls_session_accept_callback, .session_disconnect_callback = tls_session_disconnect_callback, .session_connected_callback = tls_session_connected_callback, .session_reset_callback = tls_session_reset_callback, + .session_transport_closed_callback = tls_session_transport_closed_callback, .half_open_cleanup_callback = tls_session_cleanup_ho, .add_segment_callback = tls_add_segment_callback, .del_segment_callback = tls_del_segment_callback, @@ -766,7 +781,6 @@ tls_connect (transport_endpoint_cfg_t * tep) ctx->srv_hostname = format (0, "%s", ccfg->hostname); vec_terminate_c_string (ctx->srv_hostname); } - tls_ctx_half_open_reader_unlock (); ctx->tls_ctx_engine = engine_type; @@ -776,7 +790,10 @@ tls_connect (transport_endpoint_cfg_t * tep) cargs->api_context = ctx_index; cargs->sep_ext.ns_index = app->ns_index; if ((rv = vnet_connect (cargs))) - return rv; + { + tls_ctx_half_open_free (ctx_index); + return rv; + } /* Track half-open tcp session in case we need to clean it up */ ctx->tls_session_handle = cargs->sh; @@ -793,6 +810,7 @@ tls_disconnect (u32 ctx_handle, u32 thread_index) TLS_DBG (1, "Disconnecting %x", ctx_handle); ctx = tls_ctx_get (ctx_handle); + ctx->flags |= TLS_CONN_F_APP_CLOSED; tls_ctx_app_close (ctx); } @@ -936,39 +954,53 @@ tls_listener_get (u32 listener_index) static transport_connection_t * tls_half_open_get (u32 ho_index) { - tls_main_t *tm = &tls_main; tls_ctx_t *ctx; ctx = tls_ctx_half_open_get (ho_index); - clib_rwlock_reader_unlock (&tm->half_open_rwlock); return &ctx->connection; } static void tls_cleanup_ho (u32 ho_index) { - tls_main_t *tm = &tls_main; - session_handle_t tcp_sh; tls_ctx_t *ctx; + session_t *s; ctx = tls_ctx_half_open_get (ho_index); - tcp_sh = ctx->tls_session_handle; - clib_rwlock_reader_unlock (&tm->half_open_rwlock); - session_cleanup_half_open (tcp_sh); - tls_ctx_half_open_free (ho_index); + /* Already pending cleanup */ + if (ctx->tls_session_handle == SESSION_INVALID_HANDLE) + { + ASSERT (ctx->flags & TLS_CONN_F_HO_DONE); + ctx->flags |= TLS_CONN_F_NO_APP_SESSION; + return; + } + + s = session_get_from_handle (ctx->tls_session_handle); + /* If no pending cleanup notification, force cleanup now. Otherwise, + * wait for cleanup notification and set no app session on ctx */ + if (s->session_state != SESSION_STATE_TRANSPORT_DELETED) + { + session_cleanup_half_open (ctx->tls_session_handle); + tls_ctx_half_open_free (ho_index); + } + else + ctx->flags |= TLS_CONN_F_NO_APP_SESSION; } int tls_custom_tx_callback (void *session, transport_send_params_t * sp) { - session_t *app_session = (session_t *) session; + session_t *as = (session_t *) session; tls_ctx_t *ctx; - if (PREDICT_FALSE (app_session->session_state - >= SESSION_STATE_TRANSPORT_CLOSED)) - return 0; + if (PREDICT_FALSE (as->session_state >= SESSION_STATE_TRANSPORT_CLOSED || + as->session_state <= SESSION_STATE_ACCEPTING)) + { + sp->flags |= TRANSPORT_SND_F_DESCHED; + return 0; + } - ctx = tls_ctx_get (app_session->connection_index); - return tls_ctx_write (ctx, app_session, sp); + ctx = tls_ctx_get (as->connection_index); + return tls_ctx_write (ctx, as, sp); } u8 * @@ -1079,6 +1111,7 @@ format_tls_half_open (u8 * s, va_list * args) { u32 ho_index = va_arg (*args, u32); u32 __clib_unused thread_index = va_arg (*args, u32); + u32 __clib_unused verbose = va_arg (*args, u32); session_t *tcp_ho; tls_ctx_t *ho_ctx; @@ -1090,7 +1123,6 @@ format_tls_half_open (u8 * s, va_list * args) ho_ctx->parent_app_wrk_index, ho_ctx->tls_ctx_engine, tcp_ho->thread_index, tcp_ho->session_index); - tls_ctx_half_open_reader_unlock (); return s; } @@ -1099,10 +1131,11 @@ tls_transport_endpoint_get (u32 ctx_handle, u32 thread_index, transport_endpoint_t * tep, u8 is_lcl) { tls_ctx_t *ctx = tls_ctx_get_w_thread (ctx_handle, thread_index); - session_t *tcp_session; + session_t *ts; - tcp_session = session_get_from_handle (ctx->tls_session_handle); - session_get_endpoint (tcp_session, tep, is_lcl); + ts = session_get_from_handle (ctx->tls_session_handle); + if (ts && ts->session_state < SESSION_STATE_TRANSPORT_DELETED) + session_get_endpoint (ts, tep, is_lcl); } static void @@ -1125,7 +1158,7 @@ tls_enable (vlib_main_t * vm, u8 is_en) vnet_app_attach_args_t _a, *a = &_a; u64 options[APP_OPTIONS_N_OPTIONS]; tls_main_t *tm = &tls_main; - u32 fifo_size = 128 << 12; + u32 fifo_size = 512 << 10; if (!is_en) { @@ -1334,8 +1367,6 @@ tls_init (vlib_main_t * vm) if (!tm->ca_cert_path) tm->ca_cert_path = TLS_CA_CERT_PATH; - clib_rwlock_init (&tm->half_open_rwlock); - vec_validate (tm->rx_bufs, num_threads - 1); vec_validate (tm->tx_bufs, num_threads - 1); diff --git a/src/vnet/tls/tls.h b/src/vnet/tls/tls.h index 4a5da15a88f..6bd1371b984 100644 --- a/src/vnet/tls/tls.h +++ b/src/vnet/tls/tls.h @@ -36,26 +36,48 @@ #define TLS_DBG(_lvl, _fmt, _args...) #endif -/* *INDENT-OFF* */ typedef struct tls_cxt_id_ { - union { - session_handle_t app_session_handle; - u32 parent_app_api_ctx; - }; + session_handle_t app_session_handle; session_handle_t tls_session_handle; void *migrate_ctx; u32 parent_app_wrk_index; u32 ssl_ctx; - u32 listener_ctx_index; + union + { + u32 listener_ctx_index; + u32 parent_app_api_ctx; + }; u8 tcp_is_ip4; u8 tls_engine_id; } tls_ctx_id_t; -/* *INDENT-ON* */ STATIC_ASSERT (sizeof (tls_ctx_id_t) <= TRANSPORT_CONN_ID_LEN, "ctx id must be less than TRANSPORT_CONN_ID_LEN"); +#define foreach_tls_conn_flags \ + _ (HO_DONE, "ho-done") \ + _ (PASSIVE_CLOSE, "passive-close") \ + _ (APP_CLOSED, "app-closed") \ + _ (MIGRATED, "migrated") \ + _ (NO_APP_SESSION, "no-app-session") \ + _ (RESUME, "resume") \ + _ (HS_DONE, "handshake-done") + +typedef enum tls_conn_flags_bit_ +{ +#define _(sym, str) TLS_CONN_F_BIT_##sym, + foreach_tls_conn_flags +#undef _ +} tls_conn_flags_bit_t; + +typedef enum tls_conn_flags_ +{ +#define _(sym, str) TLS_CONN_F_##sym = 1 << TLS_CONN_F_BIT_##sym, + foreach_tls_conn_flags +#undef _ +} __clib_packed tls_conn_flags_t; + typedef struct tls_ctx_ { union @@ -76,11 +98,7 @@ typedef struct tls_ctx_ #define parent_app_api_context c_tls_ctx_id.parent_app_api_ctx #define migration_ctx c_tls_ctx_id.migrate_ctx - u8 is_passive_close; - u8 resume; - u8 app_closed; - u8 no_app_session; - u8 is_migrated; + tls_conn_flags_t flags; u8 *srv_hostname; u32 evt_index; u32 ckpair_index; @@ -92,7 +110,8 @@ typedef struct tls_main_ u32 app_index; tls_ctx_t *listener_ctx_pool; tls_ctx_t *half_open_ctx_pool; - clib_rwlock_t half_open_rwlock; + u32 *postponed_ho_free; + u32 *ho_free_list; u8 **rx_bufs; u8 **tx_bufs; @@ -124,6 +143,7 @@ typedef struct tls_engine_vft_ int (*ctx_start_listen) (tls_ctx_t * ctx); int (*ctx_stop_listen) (tls_ctx_t * ctx); int (*ctx_transport_close) (tls_ctx_t * ctx); + int (*ctx_transport_reset) (tls_ctx_t *ctx); int (*ctx_app_close) (tls_ctx_t * ctx); int (*ctx_reinit_cachain) (void); } tls_engine_vft_t; @@ -141,6 +161,10 @@ void tls_notify_app_enqueue (tls_ctx_t * ctx, session_t * app_session); void tls_notify_app_io_error (tls_ctx_t *ctx); void tls_disconnect_transport (tls_ctx_t * ctx); int tls_reinit_ca_chain (crypto_engine_type_t tls_engine_id); + +void tls_add_postponed_ho_cleanups (u32 ho_index); +void tls_flush_postponed_ho_cleanups (); + #endif /* SRC_VNET_TLS_TLS_H_ */ /* diff --git a/src/vnet/udp/udp.api b/src/vnet/udp/udp.api index 02176be7c2b..6b468be461a 100644 --- a/src/vnet/udp/udp.api +++ b/src/vnet/udp/udp.api @@ -32,7 +32,7 @@ import "vnet/ip/ip_types.api"; * @param dst_ip - Encap destination address * @param src_ip - Encap source address * @param dst_port - Encap destination port - * @param src_port - Encap source port + * @param src_port - Encap source port, 0 for entopy per rfc7510 * @param id - VPP assigned id; ignored in add message, set in dump */ typedef udp_encap diff --git a/src/vnet/udp/udp.c b/src/vnet/udp/udp.c index 9b2ed886d0f..b3c02510232 100644 --- a/src/vnet/udp/udp.c +++ b/src/vnet/udp/udp.c @@ -26,80 +26,60 @@ static void udp_connection_register_port (u16 lcl_port, u8 is_ip4) { udp_main_t *um = &udp_main; - udp_dst_port_info_t *pi; u16 *n; - pi = udp_get_dst_port_info (um, lcl_port, is_ip4); - if (!pi) - { - udp_add_dst_port (um, lcl_port, 0, is_ip4); - pi = udp_get_dst_port_info (um, lcl_port, is_ip4); - pi->n_connections = 1; - } - else - { - pi->n_connections += 1; - /* Do not return. The fact that the pi is valid does not mean - * it's up to date */ - } - - pi->node_index = is_ip4 ? udp4_input_node.index : udp6_input_node.index; - pi->next_index = um->local_to_input_edge[is_ip4]; + /* Setup udp protocol -> next index sparse vector mapping. Do not setup + * udp_dst_port_info_t as that is used to distinguish between external + * and transport consumed ports */ - /* Setup udp protocol -> next index sparse vector mapping. */ if (is_ip4) - n = sparse_vec_validate (um->next_by_dst_port4, - clib_host_to_net_u16 (lcl_port)); + n = sparse_vec_validate (um->next_by_dst_port4, lcl_port); else - n = sparse_vec_validate (um->next_by_dst_port6, - clib_host_to_net_u16 (lcl_port)); + n = sparse_vec_validate (um->next_by_dst_port6, lcl_port); + + n[0] = um->local_to_input_edge[is_ip4]; + + __atomic_add_fetch (&um->transport_ports_refcnt[is_ip4][lcl_port], 1, + __ATOMIC_RELAXED); +} - n[0] = pi->next_index; +void +udp_connection_share_port (u16 lcl_port, u8 is_ip4) +{ + udp_main_t *um = &udp_main; + __atomic_add_fetch (&um->transport_ports_refcnt[is_ip4][lcl_port], 1, + __ATOMIC_RELAXED); } static void udp_connection_unregister_port (u16 lcl_port, u8 is_ip4) { udp_main_t *um = &udp_main; - udp_dst_port_info_t *pi; + u16 *n; - pi = udp_get_dst_port_info (um, lcl_port, is_ip4); - if (!pi) + /* Needed because listeners are not tracked as local endpoints */ + if (__atomic_sub_fetch (&um->transport_ports_refcnt[is_ip4][lcl_port], 1, + __ATOMIC_RELAXED)) return; - if (!pi->n_connections) - { - clib_warning ("no connections using port %u", lcl_port); - return; - } - - if (!clib_atomic_sub_fetch (&pi->n_connections, 1)) - udp_unregister_dst_port (0, lcl_port, is_ip4); -} - -void -udp_connection_share_port (u16 lcl_port, u8 is_ip4) -{ - udp_main_t *um = &udp_main; - udp_dst_port_info_t *pi; + if (is_ip4) + n = sparse_vec_validate (um->next_by_dst_port4, lcl_port); + else + n = sparse_vec_validate (um->next_by_dst_port6, lcl_port); - /* Done without a lock but the operation is atomic. Writers to pi hash - * table and vector should be guarded by a barrier sync */ - pi = udp_get_dst_port_info (um, lcl_port, is_ip4); - clib_atomic_fetch_add_rel (&pi->n_connections, 1); + n[0] = UDP_NO_NODE_SET; } udp_connection_t * udp_connection_alloc (u32 thread_index) { - udp_main_t *um = &udp_main; + udp_worker_t *wrk = udp_worker_get (thread_index); udp_connection_t *uc; - pool_get_aligned_safe (um->connections[thread_index], uc, - CLIB_CACHE_LINE_BYTES); + pool_get_aligned_safe (wrk->connections, uc, CLIB_CACHE_LINE_BYTES); clib_memset (uc, 0, sizeof (*uc)); - uc->c_c_index = uc - um->connections[thread_index]; + uc->c_c_index = uc - wrk->connections; uc->c_thread_index = thread_index; uc->c_proto = TRANSPORT_PROTO_UDP; return uc; @@ -108,20 +88,20 @@ udp_connection_alloc (u32 thread_index) void udp_connection_free (udp_connection_t * uc) { - u32 thread_index = uc->c_thread_index; + udp_worker_t *wrk = udp_worker_get (uc->c_thread_index); + clib_spinlock_free (&uc->rx_lock); if (CLIB_DEBUG) clib_memset (uc, 0xFA, sizeof (*uc)); - pool_put (udp_main.connections[thread_index], uc); + pool_put (wrk->connections, uc); } static void udp_connection_cleanup (udp_connection_t * uc) { - transport_endpoint_cleanup (TRANSPORT_PROTO_UDP, &uc->c_lcl_ip, - uc->c_lcl_port); - udp_connection_unregister_port (clib_net_to_host_u16 (uc->c_lcl_port), - uc->c_is_ip4); + transport_release_local_endpoint (TRANSPORT_PROTO_UDP, &uc->c_lcl_ip, + uc->c_lcl_port); + udp_connection_unregister_port (uc->c_lcl_port, uc->c_is_ip4); udp_connection_free (uc); } @@ -132,6 +112,38 @@ udp_connection_delete (udp_connection_t * uc) udp_connection_cleanup (uc); } +static void +udp_handle_cleanups (void *args) +{ + u32 thread_index = (u32) pointer_to_uword (args); + udp_connection_t *uc; + udp_worker_t *wrk; + u32 *uc_index; + + wrk = udp_worker_get (thread_index); + vec_foreach (uc_index, wrk->pending_cleanups) + { + uc = udp_connection_get (*uc_index, thread_index); + udp_connection_delete (uc); + } + vec_reset_length (wrk->pending_cleanups); +} + +static void +udp_connection_program_cleanup (udp_connection_t *uc) +{ + uword thread_index = uc->c_thread_index; + udp_worker_t *wrk; + + wrk = udp_worker_get (uc->c_thread_index); + vec_add1 (wrk->pending_cleanups, uc->c_c_index); + + if (vec_len (wrk->pending_cleanups) == 1) + session_send_rpc_evt_to_thread_force ( + thread_index, udp_handle_cleanups, + uword_to_pointer (thread_index, void *)); +} + static u8 udp_connection_port_used_extern (u16 lcl_port, u8 is_ip4) { @@ -139,8 +151,7 @@ udp_connection_port_used_extern (u16 lcl_port, u8 is_ip4) udp_dst_port_info_t *pi; pi = udp_get_dst_port_info (um, lcl_port, is_ip4); - return (pi && !pi->n_connections - && udp_is_valid_dst_port (lcl_port, is_ip4)); + return (pi && udp_is_valid_dst_port (lcl_port, is_ip4)); } static u16 @@ -156,12 +167,10 @@ udp_session_bind (u32 session_index, transport_endpoint_cfg_t *lcl) udp_main_t *um = vnet_get_udp_main (); transport_endpoint_cfg_t *lcl_ext; udp_connection_t *listener; - u16 lcl_port_ho; void *iface_ip; - lcl_port_ho = clib_net_to_host_u16 (lcl->port); - - if (udp_connection_port_used_extern (lcl_port_ho, lcl->is_ip4)) + if (udp_connection_port_used_extern (clib_net_to_host_u16 (lcl->port), + lcl->is_ip4)) { clib_warning ("port already used"); return SESSION_E_PORTINUSE; @@ -194,8 +203,10 @@ udp_session_bind (u32 session_index, transport_endpoint_cfg_t *lcl) else listener->c_flags |= TRANSPORT_CONNECTION_F_CLESS; clib_spinlock_init (&listener->rx_lock); + if (!um->csum_offload) + listener->cfg_flags |= UDP_CFG_F_NO_CSUM_OFFLOAD; - udp_connection_register_port (lcl_port_ho, lcl->is_ip4); + udp_connection_register_port (listener->c_lcl_port, lcl->is_ip4); return listener->c_c_index; } @@ -206,8 +217,7 @@ udp_session_unbind (u32 listener_index) udp_connection_t *listener; listener = udp_listener_get (listener_index); - udp_connection_unregister_port (clib_net_to_host_u16 (listener->c_lcl_port), - listener->c_is_ip4); + udp_connection_unregister_port (listener->c_lcl_port, listener->c_is_ip4); clib_spinlock_free (&listener->rx_lock); pool_put (um->listener_pool, listener); return 0; @@ -223,38 +233,68 @@ udp_session_get_listener (u32 listener_index) } always_inline u32 -udp_push_one_header (vlib_main_t *vm, udp_connection_t *uc, vlib_buffer_t *b) +udp_push_one_header (vlib_main_t *vm, udp_connection_t *uc, vlib_buffer_t *b, + u8 is_cless) { - vlib_buffer_push_udp (b, uc->c_lcl_port, uc->c_rmt_port, 1); - if (uc->c_is_ip4) - vlib_buffer_push_ip4_custom (vm, b, &uc->c_lcl_ip4, &uc->c_rmt_ip4, - IP_PROTOCOL_UDP, 1 /* csum offload */, - 0 /* is_df */, uc->c_dscp); - else - vlib_buffer_push_ip6 (vm, b, &uc->c_lcl_ip6, &uc->c_rmt_ip6, - IP_PROTOCOL_UDP); - vnet_buffer (b)->sw_if_index[VLIB_RX] = uc->sw_if_index; - vnet_buffer (b)->sw_if_index[VLIB_TX] = uc->c_fib_index; b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED; + /* reuse tcp medatada for now */ + vnet_buffer (b)->tcp.connection_index = uc->c_c_index; + + if (!is_cless) + { + vlib_buffer_push_udp (b, uc->c_lcl_port, uc->c_rmt_port, + udp_csum_offload (uc)); + + if (uc->c_is_ip4) + vlib_buffer_push_ip4_custom (vm, b, &uc->c_lcl_ip4, &uc->c_rmt_ip4, + IP_PROTOCOL_UDP, udp_csum_offload (uc), + 0 /* is_df */, uc->c_dscp); + else + vlib_buffer_push_ip6 (vm, b, &uc->c_lcl_ip6, &uc->c_rmt_ip6, + IP_PROTOCOL_UDP); + + vnet_buffer (b)->tcp.flags = 0; + } + else + { + u8 *data = vlib_buffer_get_current (b); + session_dgram_hdr_t hdr; + + hdr = *(session_dgram_hdr_t *) (data - sizeof (hdr)); + + /* Local port assumed to be bound, not overwriting it */ + vlib_buffer_push_udp (b, uc->c_lcl_port, hdr.rmt_port, + udp_csum_offload (uc)); + + if (uc->c_is_ip4) + vlib_buffer_push_ip4_custom (vm, b, &hdr.lcl_ip.ip4, &hdr.rmt_ip.ip4, + IP_PROTOCOL_UDP, udp_csum_offload (uc), + 0 /* is_df */, uc->c_dscp); + else + vlib_buffer_push_ip6 (vm, b, &hdr.lcl_ip.ip6, &hdr.rmt_ip.ip6, + IP_PROTOCOL_UDP); + + /* Not connected udp session. Mark buffer for custom handling in + * udp_output */ + vnet_buffer (b)->tcp.flags |= UDP_CONN_F_LISTEN; + } return 0; } -static u32 -udp_push_header (transport_connection_t *tc, vlib_buffer_t **bs, u32 n_bufs) +always_inline void +udp_push_header_batch (udp_connection_t *uc, vlib_buffer_t **bs, u32 n_bufs, + u8 is_cless) { vlib_main_t *vm = vlib_get_main (); - udp_connection_t *uc; - - uc = udp_connection_from_transport (tc); while (n_bufs >= 4) { vlib_prefetch_buffer_header (bs[2], STORE); vlib_prefetch_buffer_header (bs[3], STORE); - udp_push_one_header (vm, uc, bs[0]); - udp_push_one_header (vm, uc, bs[1]); + udp_push_one_header (vm, uc, bs[0], is_cless); + udp_push_one_header (vm, uc, bs[1], is_cless); n_bufs -= 2; bs += 2; @@ -264,16 +304,28 @@ udp_push_header (transport_connection_t *tc, vlib_buffer_t **bs, u32 n_bufs) if (n_bufs > 1) vlib_prefetch_buffer_header (bs[1], STORE); - udp_push_one_header (vm, uc, bs[0]); + udp_push_one_header (vm, uc, bs[0], is_cless); n_bufs -= 1; bs += 1; } +} + +static u32 +udp_push_header (transport_connection_t *tc, vlib_buffer_t **bs, u32 n_bufs) +{ + udp_connection_t *uc; + + uc = udp_connection_from_transport (tc); + if (uc->flags & UDP_CONN_F_CONNECTED) + udp_push_header_batch (uc, bs, n_bufs, 0 /* is_cless */); + else + udp_push_header_batch (uc, bs, n_bufs, 1 /* is_cless */); if (PREDICT_FALSE (uc->flags & UDP_CONN_F_CLOSING)) { - if (!transport_max_tx_dequeue (&uc->connection)) - udp_connection_delete (uc); + if (!transport_tx_fifo_has_dgram (&uc->connection)) + udp_connection_program_cleanup (uc); } return 0; @@ -298,8 +350,8 @@ udp_session_close (u32 connection_index, u32 thread_index) if (!uc || (uc->flags & UDP_CONN_F_MIGRATED)) return; - if (!transport_max_tx_dequeue (&uc->connection)) - udp_connection_delete (uc); + if (!transport_tx_fifo_has_dgram (&uc->connection)) + udp_connection_program_cleanup (uc); else uc->flags |= UDP_CONN_F_CLOSING; } @@ -347,46 +399,32 @@ udp_open_connection (transport_endpoint_cfg_t * rmt) rv = transport_alloc_local_endpoint (TRANSPORT_PROTO_UDP, rmt, &lcl_addr, &lcl_port); if (rv) - { - if (rv != SESSION_E_PORTINUSE) - return rv; - - if (udp_connection_port_used_extern (lcl_port, rmt->is_ip4)) - return SESSION_E_PORTINUSE; - - /* If port in use, check if 5-tuple is also in use */ - if (session_lookup_connection (rmt->fib_index, &lcl_addr, &rmt->ip, - lcl_port, rmt->port, TRANSPORT_PROTO_UDP, - rmt->is_ip4)) - return SESSION_E_PORTINUSE; - - /* 5-tuple is available so increase lcl endpoint refcount and proceed - * with connection allocation */ - transport_share_local_endpoint (TRANSPORT_PROTO_UDP, &lcl_addr, - lcl_port); - goto conn_alloc; - } + return rv; - if (udp_is_valid_dst_port (lcl_port, rmt->is_ip4)) + if (udp_connection_port_used_extern (clib_net_to_host_u16 (lcl_port), + rmt->is_ip4)) { /* If specific source port was requested abort */ if (rmt->peer.port) - return SESSION_E_PORTINUSE; + { + transport_release_local_endpoint (TRANSPORT_PROTO_UDP, &lcl_addr, + lcl_port); + return SESSION_E_PORTINUSE; + } /* Try to find a port that's not used */ - while (udp_is_valid_dst_port (lcl_port, rmt->is_ip4)) + while (udp_connection_port_used_extern (clib_net_to_host_u16 (lcl_port), + rmt->is_ip4)) { - lcl_port = transport_alloc_local_port (TRANSPORT_PROTO_UDP, - &lcl_addr); - if (lcl_port < 1) + transport_release_local_endpoint (TRANSPORT_PROTO_UDP, &lcl_addr, + lcl_port); + lcl_port = + transport_alloc_local_port (TRANSPORT_PROTO_UDP, &lcl_addr, rmt); + if ((int) lcl_port < 1) return SESSION_E_PORTINUSE; } } -conn_alloc: - - udp_connection_register_port (lcl_port, rmt->is_ip4); - /* We don't poll main thread if we have workers */ thread_index = transport_cl_thread (); @@ -394,7 +432,7 @@ conn_alloc: ip_copy (&uc->c_rmt_ip, &rmt->ip, rmt->is_ip4); ip_copy (&uc->c_lcl_ip, &lcl_addr, rmt->is_ip4); uc->c_rmt_port = rmt->port; - uc->c_lcl_port = clib_host_to_net_u16 (lcl_port); + uc->c_lcl_port = lcl_port; uc->c_is_ip4 = rmt->is_ip4; uc->c_proto = TRANSPORT_PROTO_UDP; uc->c_fib_index = rmt->fib_index; @@ -412,6 +450,12 @@ conn_alloc: clib_spinlock_init (&uc->rx_lock); uc->c_flags |= TRANSPORT_CONNECTION_F_CLESS; } + if (!um->csum_offload) + uc->cfg_flags |= UDP_CFG_F_NO_CSUM_OFFLOAD; + uc->next_node_index = rmt->next_node_index; + uc->next_node_opaque = rmt->next_node_opaque; + + udp_connection_register_port (uc->c_lcl_port, rmt->is_ip4); return uc->c_c_index; } @@ -461,8 +505,90 @@ format_udp_listener_session (u8 * s, va_list * args) return format (s, "%U", format_udp_connection, uc, verbose); } -/* *INDENT-OFF* */ +static void +udp_realloc_ports_sv (u16 **ports_nh_svp) +{ + u16 port, port_no, *ports_nh_sv, *mc; + u32 *ports = 0, *nh = 0, msum, i; + sparse_vec_header_t *h; + uword sv_index, *mb; + + ports_nh_sv = *ports_nh_svp; + + for (port = 1; port < 65535; port++) + { + port_no = clib_host_to_net_u16 (port); + + sv_index = sparse_vec_index (ports_nh_sv, port_no); + if (sv_index != SPARSE_VEC_INVALID_INDEX) + { + vec_add1 (ports, port_no); + vec_add1 (nh, ports_nh_sv[sv_index]); + } + } + + sparse_vec_free (ports_nh_sv); + + ports_nh_sv = + sparse_vec_new (/* elt bytes */ sizeof (ports_nh_sv[0]), + /* bits in index */ BITS (((udp_header_t *) 0)->dst_port)); + + vec_resize (ports_nh_sv, 65535); + + for (port = 1; port < 65535; port++) + ports_nh_sv[port] = UDP_NO_NODE_SET; + + for (i = 0; i < vec_len (ports); i++) + ports_nh_sv[ports[i]] = nh[i]; + + h = sparse_vec_header (ports_nh_sv); + vec_foreach (mb, h->is_member_bitmap) + *mb = (uword) ~0; + + msum = 0; + vec_foreach (mc, h->member_counts) + { + *mc = msum; + msum += msum == 0 ? 63 : 64; + } + + vec_free (ports); + vec_free (nh); + + *ports_nh_svp = ports_nh_sv; +} + +static clib_error_t * +udp_enable_disable (vlib_main_t *vm, u8 is_en) +{ + udp_main_t *um = &udp_main; + + /* Not ideal. The sparse vector used to map ports to next nodes assumes + * only a few ports are ever used. When udp transport is enabled this does + * not hold and, to make matters worse, ports are consumed in a random + * order. + * + * This can lead to a lot of slow updates to internal data structures + * which in turn can slow udp connection allocations until all ports are + * eventually consumed. + * + * Consequently, reallocate sparse vector, preallocate all ports and have + * them point to UDP_NO_NODE_SET. We could consider switching the sparse + * vector to a preallocated vector but that would increase memory + * consumption for vpp deployments that do not rely on host stack. + */ + + udp_realloc_ports_sv (&um->next_by_dst_port4); + udp_realloc_ports_sv (&um->next_by_dst_port6); + + vec_validate (um->transport_ports_refcnt[0], 65535); + vec_validate (um->transport_ports_refcnt[1], 65535); + + return 0; +} + static const transport_proto_vft_t udp_proto = { + .enable = udp_enable_disable, .start_listen = udp_session_bind, .connect = udp_open_connection, .stop_listen = udp_session_unbind, @@ -483,7 +609,6 @@ static const transport_proto_vft_t udp_proto = { .service_type = TRANSPORT_SERVICE_CL, }, }; -/* *INDENT-ON* */ static clib_error_t * udp_init (vlib_main_t * vm) @@ -505,18 +630,18 @@ udp_init (vlib_main_t * vm) pi->format_header = format_udp_header; pi->unformat_pg_edit = unformat_pg_udp_header; - /* Register as transport with URI */ + /* Register as transport with session layer */ transport_register_protocol (TRANSPORT_PROTO_UDP, &udp_proto, - FIB_PROTOCOL_IP4, ip4_lookup_node.index); + FIB_PROTOCOL_IP4, udp4_output_node.index); transport_register_protocol (TRANSPORT_PROTO_UDP, &udp_proto, - FIB_PROTOCOL_IP6, ip6_lookup_node.index); + FIB_PROTOCOL_IP6, udp6_output_node.index); /* * Initialize data structures */ num_threads = 1 /* main thread */ + tm->n_threads; - vec_validate (um->connections, num_threads - 1); + vec_validate (um->wrk, num_threads - 1); um->local_to_input_edge[UDP_IP4] = vlib_node_add_next (vm, udp4_local_node.index, udp4_input_node.index); @@ -524,16 +649,15 @@ udp_init (vlib_main_t * vm) vlib_node_add_next (vm, udp6_local_node.index, udp6_input_node.index); um->default_mtu = 1500; + um->csum_offload = 1; return 0; } -/* *INDENT-OFF* */ VLIB_INIT_FUNCTION (udp_init) = { .runs_after = VLIB_INITS("ip_main_init", "ip4_lookup_init", "ip6_lookup_init"), }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/udp/udp.h b/src/vnet/udp/udp.h index d51805099ce..8e4e87f85a8 100644 --- a/src/vnet/udp/udp.h +++ b/src/vnet/udp/udp.h @@ -25,6 +25,8 @@ #include <vnet/ip/ip.h> #include <vnet/session/transport.h> +#define UDP_NO_NODE_SET ((u16) ~0) + typedef enum { #define udp_error(f, n, s, d) UDP_ERROR_##f, @@ -55,6 +57,24 @@ typedef enum udp_conn_flags_ #undef _ } udp_conn_flags_t; +#define foreach_udp_cfg_flag _ (NO_CSUM_OFFLOAD, "no-csum-offload") + +typedef enum udp_cfg_flag_bits_ +{ +#define _(sym, str) UDP_CFG_F_##sym##_BIT, + foreach_udp_cfg_flag +#undef _ + UDP_CFG_N_FLAG_BITS +} udp_cfg_flag_bits_e; + +typedef enum udp_cfg_flag_ +{ +#define _(sym, str) UDP_CFG_F_##sym = 1 << UDP_CFG_F_##sym##_BIT, + foreach_udp_cfg_flag +#undef _ + UDP_CFG_N_FLAGS +} __clib_packed udp_cfg_flags_t; + typedef struct { /** Required for pool_get_aligned */ @@ -62,10 +82,15 @@ typedef struct transport_connection_t connection; /**< must be first */ clib_spinlock_t rx_lock; /**< rx fifo lock */ u8 flags; /**< connection flags */ + udp_cfg_flags_t cfg_flags; /**< configuration flags */ u16 mss; /**< connection mss */ u32 sw_if_index; /**< connection sw_if_index */ + u32 next_node_index; /**< Can be used to control next node in output */ + u32 next_node_opaque; /**< Opaque to pass to next node */ } udp_connection_t; +#define udp_csum_offload(uc) (!((uc)->cfg_flags & UDP_CFG_F_NO_CSUM_OFFLOAD)) + typedef struct { /* Name (a c string). */ @@ -80,9 +105,6 @@ typedef struct /* Next index for this type. */ u32 next_index; - /* UDP sessions refcount (not tunnels) */ - u32 n_connections; - /* Parser for packet generator edits for this protocol */ unformat_function_t *unformat_pg_edit; } udp_dst_port_info_t; @@ -94,6 +116,12 @@ typedef enum N_UDP_AF, } udp_af_t; +typedef struct udp_worker_ +{ + udp_connection_t *connections; + u32 *pending_cleanups; +} udp_worker_t; + typedef struct { udp_dst_port_info_t *dst_port_infos[N_UDP_AF]; @@ -113,13 +141,19 @@ typedef struct u32 local_to_input_edge[N_UDP_AF]; /* - * Per-worker thread udp connection pools used with session layer + * UDP transport layer per-thread context */ - udp_connection_t **connections; + + udp_worker_t *wrk; udp_connection_t *listener_pool; + /* Refcounts for ports consumed by udp transports to handle + * both passive and active opens using the same port */ + u16 *transport_ports_refcnt[N_UDP_AF]; + u16 default_mtu; u16 msg_id_base; + u8 csum_offload; u8 icmp_send_unreachable_disabled; } udp_main_t; @@ -129,16 +163,26 @@ extern vlib_node_registration_t udp4_input_node; extern vlib_node_registration_t udp6_input_node; extern vlib_node_registration_t udp4_local_node; extern vlib_node_registration_t udp6_local_node; +extern vlib_node_registration_t udp4_output_node; +extern vlib_node_registration_t udp6_output_node; void udp_add_dst_port (udp_main_t * um, udp_dst_port_t dst_port, char *dst_port_name, u8 is_ip4); +always_inline udp_worker_t * +udp_worker_get (u32 thread_index) +{ + return vec_elt_at_index (udp_main.wrk, thread_index); +} + always_inline udp_connection_t * udp_connection_get (u32 conn_index, u32 thread_index) { - if (pool_is_free_index (udp_main.connections[thread_index], conn_index)) + udp_worker_t *wrk = udp_worker_get (thread_index); + + if (pool_is_free_index (wrk->connections, conn_index)) return 0; - return pool_elt_at_index (udp_main.connections[thread_index], conn_index); + return pool_elt_at_index (wrk->connections, conn_index); } always_inline udp_connection_t * @@ -161,6 +205,7 @@ udp_connection_from_transport (transport_connection_t * tc) void udp_connection_free (udp_connection_t * uc); udp_connection_t *udp_connection_alloc (u32 thread_index); +void udp_connection_share_port (u16 lcl_port, u8 is_ip4); always_inline udp_connection_t * udp_connection_clone_safe (u32 connection_index, u32 thread_index) @@ -171,7 +216,7 @@ udp_connection_clone_safe (u32 connection_index, u32 thread_index) new_c = udp_connection_alloc (current_thread_index); new_index = new_c->c_c_index; /* Connection pool always realloced with barrier */ - old_c = udp_main.connections[thread_index] + connection_index; + old_c = udp_main.wrk[thread_index].connections + connection_index; clib_memcpy_fast (new_c, old_c, sizeof (*new_c)); old_c->flags |= UDP_CONN_F_MIGRATED; new_c->c_thread_index = current_thread_index; @@ -195,8 +240,6 @@ format_function_t format_udp_connection; unformat_function_t unformat_udp_header; unformat_function_t unformat_udp_port; -void udp_connection_share_port (u16 lcl_port, u8 is_ip4); - void udp_punt_unknown (vlib_main_t * vm, u8 is_ip4, u8 is_add); /* diff --git a/src/vnet/udp/udp_api.c b/src/vnet/udp/udp_api.c index 0f2d014946f..1f952aa36ea 100644 --- a/src/vnet/udp/udp_api.c +++ b/src/vnet/udp/udp_api.c @@ -86,12 +86,10 @@ vl_api_udp_encap_dump_t_handler (vl_api_udp_encap_dump_t *mp) if (!reg) return; - /* *INDENT-OFF* */ pool_foreach (ue, udp_encap_pool) { send_udp_encap_details(ue, reg, mp->context); } - /* *INDENT-ON* */ } static void @@ -99,6 +97,7 @@ vl_api_udp_encap_add_t_handler (vl_api_udp_encap_add_t *mp) { vl_api_udp_encap_add_reply_t *rmp; ip46_address_t src_ip, dst_ip; + udp_encap_fixup_flags_t flags; u32 fib_index, table_id; fib_protocol_t fproto; ip46_type_t itype; @@ -119,19 +118,19 @@ vl_api_udp_encap_add_t_handler (vl_api_udp_encap_add_t *mp) goto done; } - uei = udp_encap_add_and_lock (fproto, fib_index, - &src_ip, &dst_ip, + flags = UDP_ENCAP_FIXUP_NONE; + if (mp->udp_encap.src_port == 0) + flags |= UDP_ENCAP_FIXUP_UDP_SRC_PORT_ENTROPY; + + uei = udp_encap_add_and_lock (fproto, fib_index, &src_ip, &dst_ip, ntohs (mp->udp_encap.src_port), - ntohs (mp->udp_encap.dst_port), - UDP_ENCAP_FIXUP_NONE); + ntohs (mp->udp_encap.dst_port), flags); done: - /* *INDENT-OFF* */ REPLY_MACRO2 (VL_API_UDP_ENCAP_ADD_REPLY, ({ rmp->id = ntohl (uei); })); - /* *INDENT-ON* */ } @@ -189,11 +188,19 @@ vl_api_udp_decap_add_del_t_handler (vl_api_udp_decap_add_del_t *mp) static clib_error_t * udp_api_hookup (vlib_main_t * vm) { + api_main_t *am = vlibapi_get_main (); + /* * Set up the (msg_name, crc, message-id) table */ REPLY_MSG_ID_BASE = setup_message_id_table (); + /* Mark these APIs as mp safe */ + vl_api_set_msg_thread_safe (am, REPLY_MSG_ID_BASE + VL_API_UDP_ENCAP_ADD, 1); + vl_api_set_msg_thread_safe (am, REPLY_MSG_ID_BASE + VL_API_UDP_ENCAP_DEL, 1); + vl_api_set_msg_thread_safe (am, REPLY_MSG_ID_BASE + VL_API_UDP_ENCAP_DUMP, + 1); + return 0; } diff --git a/src/vnet/udp/udp_cli.c b/src/vnet/udp/udp_cli.c index 9787eedf933..6c8992cd0de 100644 --- a/src/vnet/udp/udp_cli.c +++ b/src/vnet/udp/udp_cli.c @@ -38,6 +38,33 @@ format_udp_connection_id (u8 * s, va_list * args) return s; } +static const char *udp_cfg_flags_str[] = { +#define _(sym, str) str, + foreach_udp_cfg_flag +#undef _ +}; + +static u8 * +format_udp_cfg_flags (u8 *s, va_list *args) +{ + udp_connection_t *tc = va_arg (*args, udp_connection_t *); + int i, last = -1; + + for (i = 0; i < UDP_CFG_N_FLAG_BITS; i++) + if (tc->cfg_flags & (1 << i)) + last = i; + if (last >= 0) + s = format (s, " cfg: "); + for (i = 0; i < last; i++) + { + if (tc->cfg_flags & (1 << i)) + s = format (s, "%s, ", udp_cfg_flags_str[i]); + } + if (last >= 0) + s = format (s, "%s", udp_cfg_flags_str[last]); + return s; +} + static const char *udp_connection_flags_str[] = { #define _(sym, str) str, foreach_udp_connection_flag @@ -68,10 +95,13 @@ format_udp_vars (u8 * s, va_list * args) { udp_connection_t *uc = va_arg (*args, udp_connection_t *); - s = format (s, " index %u flags: %U", uc->c_c_index, - format_udp_connection_flags, uc); + s = format (s, " index %u%U flags: %U\n", uc->c_c_index, + format_udp_cfg_flags, uc, format_udp_connection_flags, uc); + s = format (s, " fib_index: %u next_node: %u opaque: %u ", uc->c_fib_index); if (!(uc->flags & UDP_CONN_F_LISTEN)) - s = format (s, " \n sw_if_index: %d, mss: %u\n", uc->sw_if_index, uc->mss); + s = format (s, " sw_if_index: %d mss: %u\n", uc->sw_if_index, uc->mss); + else + s = format (s, "\n"); return s; } @@ -106,6 +136,8 @@ udp_config_fn (vlib_main_t * vm, unformat_input_t * input) um->default_mtu = tmp; else if (unformat (input, "icmp-unreachable-disabled")) um->icmp_send_unreachable_disabled = 1; + else if (unformat (input, "no-csum-offload")) + um->csum_offload = 0; else return clib_error_return (0, "unknown input `%U'", format_unformat_error, input); @@ -155,7 +187,7 @@ show_udp_punt_fn (vlib_main_t * vm, unformat_input_t * input, u8 *s = NULL; vec_foreach (port_info, um->dst_port_infos[UDP_IP6]) { - if (udp_is_valid_dst_port (port_info->dst_port, 01)) + if (udp_is_valid_dst_port (port_info->dst_port, 0)) { s = format (s, (!s) ? "%d" : ", %d", port_info->dst_port); } @@ -166,29 +198,32 @@ show_udp_punt_fn (vlib_main_t * vm, unformat_input_t * input, return (error); } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_tcp_punt_command, static) = { .path = "show udp punt", .short_help = "show udp punt [ipv4|ipv6]", .function = show_udp_punt_fn, }; -/* *INDENT-ON* */ static void table_format_udp_port_ (vlib_main_t *vm, udp_main_t *um, table_t *t, int *c, int port, int bind, int is_ip4) { - const udp_dst_port_info_t *pi = udp_get_dst_port_info (um, port, is_ip4); - if (!pi) + const udp_dst_port_info_t *pi; + + if (bind && !udp_is_valid_dst_port (port, is_ip4)) return; - if (bind && ~0 == pi->node_index) + + pi = udp_get_dst_port_info (um, port, is_ip4); + if (!pi) return; + table_format_cell (t, *c, 0, "%d", pi->dst_port); table_format_cell (t, *c, 1, is_ip4 ? "ip4" : "ip6"); table_format_cell (t, *c, 2, ~0 == pi->node_index ? "none" : "%U", format_vlib_node_name, vm, pi->node_index); table_format_cell (t, *c, 3, "%s", pi->name); + (*c)++; } @@ -265,6 +300,98 @@ VLIB_CLI_COMMAND (show_udp_ports_cmd, static) = { .is_mp_safe = 1, }; +static void +table_format_udp_transport_port_ (vlib_main_t *vm, table_t *t, int *c, + int port, int is_ip4) +{ + udp_main_t *um = &udp_main; + u32 refcnt; + u16 port_ne; + + port_ne = clib_host_to_net_u16 (port); + refcnt = um->transport_ports_refcnt[is_ip4][port_ne]; + if (!refcnt) + return; + + if (!udp_is_valid_dst_port (port, is_ip4)) + { + clib_warning ("Port %u is not registered refcnt %u!", port, refcnt); + return; + } + + table_format_cell (t, *c, 0, "%d", port); + table_format_cell (t, *c, 1, is_ip4 ? "ip4" : "ip6"); + table_format_cell (t, *c, 2, "%d", refcnt); + + (*c)++; +} + +static void +table_format_udp_transport_port (vlib_main_t *vm, table_t *t, int *c, int port, + int ipv) +{ + if (ipv == -1 || ipv == 0) + table_format_udp_transport_port_ (vm, t, c, port, 1 /* is_ip4 */); + if (ipv == -1 || ipv == 1) + table_format_udp_transport_port_ (vm, t, c, port, 0 /* is_ip4 */); +} + +static clib_error_t * +show_udp_transport_ports (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + table_t table = {}, *t = &table; + int ipv = -1, port = -1, c = 0; + clib_error_t *err = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "ip4")) + ipv = 0; + else if (unformat (input, "ip6")) + ipv = 1; + else if (unformat (input, "%d", &port)) + ; + else + { + err = clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + goto out; + } + } + + table_add_header_col (t, 3, "port", "proto", "ref-cnt"); + + if (port > 65535) + { + err = clib_error_return (0, "wrong port %d", port); + goto out; + } + + if (port < 0) + { + for (port = 0; port < 65536; port++) + table_format_udp_transport_port (vm, t, &c, port, ipv); + } + else + { + table_format_udp_transport_port (vm, t, &c, port, ipv); + } + + vlib_cli_output (vm, "%U\n", format_table, t); + +out: + table_free (t); + return err; +} + +VLIB_CLI_COMMAND (show_udp_transport_ports_cmd, static) = { + .path = "show udp transport ports", + .function = show_udp_transport_ports, + .short_help = "show udp transport ports [ip4|ip6] [<port>]", + .is_mp_safe = 1, +}; + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/vnet/udp/udp_encap.c b/src/vnet/udp/udp_encap.c index a0f5a50c223..e4e5271da63 100644 --- a/src/vnet/udp/udp_encap.c +++ b/src/vnet/udp/udp_encap.c @@ -195,6 +195,20 @@ udp_encap_dpo_unlock (dpo_id_t * dpo) fib_node_unlock (&ue->ue_fib_node); } +u8 * +format_udp_encap_fixup_flags (u8 *s, va_list *args) +{ + udp_encap_fixup_flags_t flags = va_arg (*args, udp_encap_fixup_flags_t); + + if (flags == UDP_ENCAP_FIXUP_NONE) + return format (s, "none"); + + if (flags & UDP_ENCAP_FIXUP_UDP_SRC_PORT_ENTROPY) + s = format (s, "%s", "src-port-is-entropy"); + + return (s); +} + static u8 * format_udp_encap_i (u8 * s, va_list * args) { @@ -210,23 +224,21 @@ format_udp_encap_i (u8 * s, va_list * args) s = format (s, "udp-encap:[%d]: ip-fib-index:%d ", uei, ue->ue_fib_index); if (FIB_PROTOCOL_IP4 == ue->ue_ip_proto) { - s = format (s, "ip:[src:%U, dst:%U] udp:[src:%d, dst:%d]", - format_ip4_address, - &ue->ue_hdrs.ip4.ue_ip4.src_address, - format_ip4_address, - &ue->ue_hdrs.ip4.ue_ip4.dst_address, + s = format (s, "ip:[src:%U, dst:%U] udp:[src:%d, dst:%d] flags:%U", + format_ip4_address, &ue->ue_hdrs.ip4.ue_ip4.src_address, + format_ip4_address, &ue->ue_hdrs.ip4.ue_ip4.dst_address, clib_net_to_host_u16 (ue->ue_hdrs.ip4.ue_udp.src_port), - clib_net_to_host_u16 (ue->ue_hdrs.ip4.ue_udp.dst_port)); + clib_net_to_host_u16 (ue->ue_hdrs.ip4.ue_udp.dst_port), + format_udp_encap_fixup_flags, ue->ue_flags); } else { - s = format (s, "ip:[src:%U, dst:%U] udp:[src:%d dst:%d]", - format_ip6_address, - &ue->ue_hdrs.ip6.ue_ip6.src_address, - format_ip6_address, - &ue->ue_hdrs.ip6.ue_ip6.dst_address, + s = format (s, "ip:[src:%U, dst:%U] udp:[src:%d dst:%d] flags:%U", + format_ip6_address, &ue->ue_hdrs.ip6.ue_ip6.src_address, + format_ip6_address, &ue->ue_hdrs.ip6.ue_ip6.dst_address, clib_net_to_host_u16 (ue->ue_hdrs.ip6.ue_udp.src_port), - clib_net_to_host_u16 (ue->ue_hdrs.ip6.ue_udp.dst_port)); + clib_net_to_host_u16 (ue->ue_hdrs.ip6.ue_udp.dst_port), + format_udp_encap_fixup_flags, ue->ue_flags); } vlib_get_combined_counter (&(udp_encap_counters), uei, &to); s = format (s, " to:[%Ld:%Ld]]", to.packets, to.bytes); @@ -506,13 +518,11 @@ udp_encap_walk (udp_encap_walk_cb_t cb, void *ctx) { index_t uei; - /* *INDENT-OFF* */ pool_foreach_index (uei, udp_encap_pool) { if (WALK_STOP == cb(uei, ctx)) break; } - /* *INDENT-ON* */ } clib_error_t * @@ -535,12 +545,10 @@ udp_encap_show (vlib_main_t * vm, if (INDEX_INVALID == uei) { - /* *INDENT-OFF* */ pool_foreach_index (uei, udp_encap_pool) { vlib_cli_output(vm, "%U", format_udp_encap, uei, 0); } - /* *INDENT-ON* */ } else { @@ -550,20 +558,20 @@ udp_encap_show (vlib_main_t * vm, return NULL; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (udp_encap_add_command, static) = { .path = "udp encap", - .short_help = "udp encap [add|del] <id ID> <src-ip> <dst-ip> [<src-port>] <dst-port> [src-port-is-entropy] [table-id <table>]", + .short_help = "udp encap [add|del] <id ID> <src-ip> <dst-ip> [<src-port>] " + "<dst-port> [src-port-is-entropy] [table-id <table>]", .function = udp_encap_cli, .is_mp_safe = 1, }; + VLIB_CLI_COMMAND (udp_encap_show_command, static) = { .path = "show udp encap", .short_help = "show udp encap [ID]", .function = udp_encap_show, .is_mp_safe = 1, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/udp/udp_encap.h b/src/vnet/udp/udp_encap.h index 648e3b59e6d..c8b42ffa92c 100644 --- a/src/vnet/udp/udp_encap.h +++ b/src/vnet/udp/udp_encap.h @@ -115,6 +115,7 @@ extern index_t udp_encap_add_and_lock (fib_protocol_t proto, extern void udp_encap_lock (index_t uei); extern void udp_encap_unlock (index_t uei); extern u8 *format_udp_encap (u8 * s, va_list * args); +extern u8 *format_udp_encap_fixup_flags (u8 *s, va_list *args); extern void udp_encap_contribute_forwarding (index_t uei, dpo_proto_t proto, dpo_id_t * dpo); diff --git a/src/vnet/udp/udp_encap_node.c b/src/vnet/udp/udp_encap_node.c index 1ebe79532f4..a86614f5475 100644 --- a/src/vnet/udp/udp_encap_node.c +++ b/src/vnet/udp/udp_encap_node.c @@ -20,12 +20,16 @@ typedef struct udp4_encap_trace_t_ { udp_header_t udp; ip4_header_t ip; + u32 flow_hash; + udp_encap_fixup_flags_t flags; } udp4_encap_trace_t; typedef struct udp6_encap_trace_t_ { udp_header_t udp; ip6_header_t ip; + u32 flow_hash; + udp_encap_fixup_flags_t flags; } udp6_encap_trace_t; extern vlib_combined_counter_main_t udp_encap_counters; @@ -35,13 +39,16 @@ format_udp4_encap_trace (u8 * s, va_list * args) { CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + u32 indent = format_get_indent (s); udp4_encap_trace_t *t; t = va_arg (*args, udp4_encap_trace_t *); - s = format (s, "%U\n %U", - format_ip4_header, &t->ip, sizeof (t->ip), - format_udp_header, &t->udp, sizeof (t->udp)); + s = format (s, "flags: %U, flow hash: 0x%08x\n%U%U\n%U%U", + format_udp_encap_fixup_flags, t->flags, t->flow_hash, + format_white_space, indent, format_ip4_header, &t->ip, + sizeof (t->ip), format_white_space, indent, format_udp_header, + &t->udp, sizeof (t->udp)); return (s); } @@ -50,13 +57,16 @@ format_udp6_encap_trace (u8 * s, va_list * args) { CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + u32 indent = format_get_indent (s); udp6_encap_trace_t *t; t = va_arg (*args, udp6_encap_trace_t *); - s = format (s, "%U\n %U", - format_ip6_header, &t->ip, sizeof (t->ip), - format_udp_header, &t->udp, sizeof (t->udp)); + s = format (s, "flags: %U, flow hash: 0x%08x\n%U%U\n%U%U", + format_udp_encap_fixup_flags, t->flags, t->flow_hash, + format_white_space, indent, format_ip6_header, &t->ip, + sizeof (t->ip), format_white_space, indent, format_udp_header, + &t->udp, sizeof (t->udp)); return (s); } @@ -127,13 +137,16 @@ udp_encap_inline (vlib_main_t *vm, vlib_node_runtime_t *node, sizeof (udp_header_t) + sizeof (ip6_header_t); ip_udp_encap_two (vm, b0, b1, (u8 *) &ue0->ue_hdrs, (u8 *) &ue1->ue_hdrs, n_bytes, encap_family, - payload_family); + payload_family, ue0->ue_flags, ue1->ue_flags); + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) { udp6_encap_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof (*tr)); tr->udp = ue0->ue_hdrs.ip6.ue_udp; tr->ip = ue0->ue_hdrs.ip6.ue_ip6; + tr->flags = ue0->ue_flags; + tr->flow_hash = vnet_buffer (b0)->ip.flow_hash; } if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED)) { @@ -141,6 +154,8 @@ udp_encap_inline (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_add_trace (vm, node, b1, sizeof (*tr)); tr->udp = ue1->ue_hdrs.ip6.ue_udp; tr->ip = ue1->ue_hdrs.ip6.ue_ip6; + tr->flags = ue1->ue_flags; + tr->flow_hash = vnet_buffer (b1)->ip.flow_hash; } } else @@ -150,7 +165,7 @@ udp_encap_inline (vlib_main_t *vm, vlib_node_runtime_t *node, ip_udp_encap_two (vm, b0, b1, (u8 *) &ue0->ue_hdrs, (u8 *) &ue1->ue_hdrs, n_bytes, encap_family, - payload_family); + payload_family, ue0->ue_flags, ue1->ue_flags); if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) { @@ -158,6 +173,8 @@ udp_encap_inline (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_add_trace (vm, node, b0, sizeof (*tr)); tr->udp = ue0->ue_hdrs.ip4.ue_udp; tr->ip = ue0->ue_hdrs.ip4.ue_ip4; + tr->flags = ue0->ue_flags; + tr->flow_hash = vnet_buffer (b0)->ip.flow_hash; } if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED)) { @@ -165,6 +182,8 @@ udp_encap_inline (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_add_trace (vm, node, b1, sizeof (*tr)); tr->udp = ue1->ue_hdrs.ip4.ue_udp; tr->ip = ue1->ue_hdrs.ip4.ue_ip4; + tr->flags = ue1->ue_flags; + tr->flow_hash = vnet_buffer (b1)->ip.flow_hash; } } @@ -208,7 +227,7 @@ udp_encap_inline (vlib_main_t *vm, vlib_node_runtime_t *node, const u8 n_bytes = sizeof (udp_header_t) + sizeof (ip6_header_t); ip_udp_encap_one (vm, b0, (u8 *) &ue0->ue_hdrs.ip6, n_bytes, - encap_family, payload_family); + encap_family, payload_family, ue0->ue_flags); if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) { @@ -216,6 +235,8 @@ udp_encap_inline (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_add_trace (vm, node, b0, sizeof (*tr)); tr->udp = ue0->ue_hdrs.ip6.ue_udp; tr->ip = ue0->ue_hdrs.ip6.ue_ip6; + tr->flags = ue0->ue_flags; + tr->flow_hash = vnet_buffer (b0)->ip.flow_hash; } } else @@ -224,7 +245,7 @@ udp_encap_inline (vlib_main_t *vm, vlib_node_runtime_t *node, sizeof (udp_header_t) + sizeof (ip4_header_t); ip_udp_encap_one (vm, b0, (u8 *) &ue0->ue_hdrs.ip4, n_bytes, - encap_family, payload_family); + encap_family, payload_family, ue0->ue_flags); if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) { @@ -232,6 +253,8 @@ udp_encap_inline (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_add_trace (vm, node, b0, sizeof (*tr)); tr->udp = ue0->ue_hdrs.ip4.ue_udp; tr->ip = ue0->ue_hdrs.ip4.ue_ip4; + tr->flags = ue0->ue_flags; + tr->flow_hash = vnet_buffer (b0)->ip.flow_hash; } } @@ -285,7 +308,6 @@ VLIB_NODE_FN (udp6_encap_node) return udp_encap_inline (vm, node, frame, AF_IP6, N_AF); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (udp4o4_encap_node) = { .name = "udp4o4-encap", .vector_size = sizeof (u32), @@ -296,7 +318,7 @@ VLIB_REGISTER_NODE (udp4o4_encap_node) = { VLIB_REGISTER_NODE (udp6o4_encap_node) = { .name = "udp6o4-encap", .vector_size = sizeof (u32), - .format_trace = format_udp6_encap_trace, + .format_trace = format_udp4_encap_trace, .n_next_nodes = 0, .sibling_of = "udp4o4-encap", }; @@ -319,7 +341,7 @@ VLIB_REGISTER_NODE (udp6o6_encap_node) = { VLIB_REGISTER_NODE (udp4o6_encap_node) = { .name = "udp4o6-encap", .vector_size = sizeof (u32), - .format_trace = format_udp4_encap_trace, + .format_trace = format_udp6_encap_trace, .n_next_nodes = 0, .sibling_of = "udp6o6-encap", }; @@ -331,7 +353,6 @@ VLIB_REGISTER_NODE (udp6_encap_node) = { .n_next_nodes = 0, .sibling_of = "udp6o6-encap", }; -/* *INDENT-ON* */ /* diff --git a/src/vnet/udp/udp_error.def b/src/vnet/udp/udp_error.def index 178d5c96b2c..ef19970ce72 100644 --- a/src/vnet/udp/udp_error.def +++ b/src/vnet/udp/udp_error.def @@ -21,7 +21,10 @@ udp_error (LENGTH_ERROR, length_error, ERROR, "Packets with length errors") udp_error (PUNT, punt, ERROR, "No listener punt") udp_error (ENQUEUED, enqueued, INFO, "Packets enqueued") udp_error (FIFO_FULL, fifo_full, ERROR, "Fifo full") +udp_error (FIFO_NOMEM, fifo_nomem, ERROR, "Fifo no mem") udp_error (NOT_READY, not_ready, ERROR, "Connection not ready") udp_error (ACCEPT, accept, INFO, "Accepted session") udp_error (CREATE_SESSION, create_session, ERROR, "Failed to create session") udp_error (MQ_FULL, mq_full, ERROR, "Application msg queue full") +udp_error (INVALID_CONNECTION, invalid_connection, ERROR, "Invalid connection") +udp_error (PKTS_SENT, pkts_sent, INFO, "Packets sent") diff --git a/src/vnet/udp/udp_inlines.h b/src/vnet/udp/udp_inlines.h index 025809e1873..f0dd44f48b5 100644 --- a/src/vnet/udp/udp_inlines.h +++ b/src/vnet/udp/udp_inlines.h @@ -21,6 +21,9 @@ #include <vnet/ip/ip6.h> #include <vnet/udp/udp_packet.h> #include <vnet/interface_output.h> +#include <vnet/ip/ip4_inlines.h> +#include <vnet/ip/ip6_inlines.h> +#include <vnet/udp/udp_encap.h> always_inline void * vlib_buffer_push_udp (vlib_buffer_t * b, u16 sp, u16 dp, u8 offload_csum) @@ -42,8 +45,39 @@ vlib_buffer_push_udp (vlib_buffer_t * b, u16 sp, u16 dp, u8 offload_csum) return uh; } +/* + * Encode udp source port entropy value per + * https://datatracker.ietf.org/doc/html/rfc7510#section-3 + */ +always_inline u16 +ip_udp_sport_entropy (vlib_buffer_t *b0) +{ + u16 port = clib_host_to_net_u16 (0x03 << 14); + port |= vnet_buffer (b0)->ip.flow_hash & 0xffff; + return port; +} + +always_inline u32 +ip_udp_compute_flow_hash (vlib_buffer_t *b0, u8 is_ip4) +{ + ip4_header_t *ip4; + ip6_header_t *ip6; + + if (is_ip4) + { + ip4 = (ip4_header_t *) (b0->data + vnet_buffer (b0)->l3_hdr_offset); + return ip4_compute_flow_hash (ip4, IP_FLOW_HASH_DEFAULT); + } + else + { + ip6 = (ip6_header_t *) (b0->data + vnet_buffer (b0)->l3_hdr_offset); + return ip6_compute_flow_hash (ip6, IP_FLOW_HASH_DEFAULT); + } +} + always_inline void -ip_udp_fixup_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 is_ip4) +ip_udp_fixup_one (vlib_main_t *vm, vlib_buffer_t *b0, u8 is_ip4, + u8 sport_entropy) { u16 new_l0; udp_header_t *udp0; @@ -71,6 +105,9 @@ ip_udp_fixup_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 is_ip4) new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) - sizeof (*ip0)); udp0->length = new_l0; + + if (sport_entropy) + udp0->src_port = ip_udp_sport_entropy (b0); } else { @@ -87,6 +124,9 @@ ip_udp_fixup_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 is_ip4) udp0 = (udp_header_t *) (ip0 + 1); udp0->length = new_l0; + if (sport_entropy) + udp0->src_port = ip_udp_sport_entropy (b0); + udp0->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b0, ip0, &bogus0); ASSERT (bogus0 == 0); @@ -99,13 +139,20 @@ ip_udp_fixup_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 is_ip4) always_inline void ip_udp_encap_one (vlib_main_t *vm, vlib_buffer_t *b0, u8 *ec0, word ec_len, ip_address_family_t encap_family, - ip_address_family_t payload_family) + ip_address_family_t payload_family, + udp_encap_fixup_flags_t flags) { + u8 sport_entropy = (flags & UDP_ENCAP_FIXUP_UDP_SRC_PORT_ENTROPY) != 0; if (payload_family < N_AF) { vnet_calc_checksums_inline (vm, b0, payload_family == AF_IP4, payload_family == AF_IP6); + + /* Сalculate flow hash to be used for entropy */ + if (sport_entropy && 0 == vnet_buffer (b0)->ip.flow_hash) + vnet_buffer (b0)->ip.flow_hash = + ip_udp_compute_flow_hash (b0, payload_family == AF_IP4); } vlib_buffer_advance (b0, -ec_len); @@ -118,7 +165,7 @@ ip_udp_encap_one (vlib_main_t *vm, vlib_buffer_t *b0, u8 *ec0, word ec_len, /* Apply the encap string. */ clib_memcpy_fast (ip0, ec0, ec_len); - ip_udp_fixup_one (vm, b0, 1); + ip_udp_fixup_one (vm, b0, 1, sport_entropy); } else { @@ -128,7 +175,7 @@ ip_udp_encap_one (vlib_main_t *vm, vlib_buffer_t *b0, u8 *ec0, word ec_len, /* Apply the encap string. */ clib_memcpy_fast (ip0, ec0, ec_len); - ip_udp_fixup_one (vm, b0, 0); + ip_udp_fixup_one (vm, b0, 0, sport_entropy); } } @@ -136,16 +183,28 @@ always_inline void ip_udp_encap_two (vlib_main_t *vm, vlib_buffer_t *b0, vlib_buffer_t *b1, u8 *ec0, u8 *ec1, word ec_len, ip_address_family_t encap_family, - ip_address_family_t payload_family) + ip_address_family_t payload_family, + udp_encap_fixup_flags_t flags0, + udp_encap_fixup_flags_t flags1) { u16 new_l0, new_l1; udp_header_t *udp0, *udp1; int payload_ip4 = (payload_family == AF_IP4); + int sport_entropy0 = (flags0 & UDP_ENCAP_FIXUP_UDP_SRC_PORT_ENTROPY) != 0; + int sport_entropy1 = (flags1 & UDP_ENCAP_FIXUP_UDP_SRC_PORT_ENTROPY) != 0; if (payload_family < N_AF) { vnet_calc_checksums_inline (vm, b0, payload_ip4, !payload_ip4); vnet_calc_checksums_inline (vm, b1, payload_ip4, !payload_ip4); + + /* Сalculate flow hash to be used for entropy */ + if (sport_entropy0 && 0 == vnet_buffer (b0)->ip.flow_hash) + vnet_buffer (b0)->ip.flow_hash = + ip_udp_compute_flow_hash (b0, payload_ip4); + if (sport_entropy1 && 0 == vnet_buffer (b1)->ip.flow_hash) + vnet_buffer (b1)->ip.flow_hash = + ip_udp_compute_flow_hash (b1, payload_ip4); } vlib_buffer_advance (b0, -ec_len); @@ -195,6 +254,11 @@ ip_udp_encap_two (vlib_main_t *vm, vlib_buffer_t *b0, vlib_buffer_t *b1, sizeof (*ip1)); udp0->length = new_l0; udp1->length = new_l1; + + if (sport_entropy0) + udp0->src_port = ip_udp_sport_entropy (b0); + if (sport_entropy1) + udp1->src_port = ip_udp_sport_entropy (b1); } else { @@ -222,6 +286,11 @@ ip_udp_encap_two (vlib_main_t *vm, vlib_buffer_t *b0, vlib_buffer_t *b1, udp0->length = new_l0; udp1->length = new_l1; + if (sport_entropy0) + udp0->src_port = ip_udp_sport_entropy (b0); + if (sport_entropy1) + udp1->src_port = ip_udp_sport_entropy (b1); + udp0->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b0, ip0, &bogus0); udp1->checksum = diff --git a/src/vnet/udp/udp_input.c b/src/vnet/udp/udp_input.c index c11c0d51214..a90461186c1 100644 --- a/src/vnet/udp/udp_input.c +++ b/src/vnet/udp/udp_input.c @@ -115,6 +115,7 @@ udp_connection_accept (udp_connection_t * listener, session_dgram_hdr_t * hdr, uc->c_fib_index = listener->c_fib_index; uc->mss = listener->mss; uc->flags |= UDP_CONN_F_CONNECTED; + uc->cfg_flags = listener->cfg_flags; if (session_dgram_accept (&uc->connection, listener->c_s_index, listener->c_thread_index)) @@ -122,8 +123,8 @@ udp_connection_accept (udp_connection_t * listener, session_dgram_hdr_t * hdr, udp_connection_free (uc); return 0; } - udp_connection_share_port (clib_net_to_host_u16 - (uc->c_lcl_port), uc->c_is_ip4); + + udp_connection_share_port (uc->c_lcl_port, uc->c_is_ip4); return uc; } @@ -135,37 +136,46 @@ udp_connection_enqueue (udp_connection_t * uc0, session_t * s0, int wrote0; if (!(uc0->flags & UDP_CONN_F_CONNECTED)) - clib_spinlock_lock (&uc0->rx_lock); + { + clib_spinlock_lock (&uc0->rx_lock); + + wrote0 = session_enqueue_dgram_connection_cl ( + s0, hdr0, b, TRANSPORT_PROTO_UDP, queue_event); + + clib_spinlock_unlock (&uc0->rx_lock); + + /* Expect cl udp enqueue to fail because fifo enqueue */ + if (PREDICT_FALSE (wrote0 == 0)) + *error0 = UDP_ERROR_FIFO_FULL; + + return; + } if (svm_fifo_max_enqueue_prod (s0->rx_fifo) < hdr0->data_length + sizeof (session_dgram_hdr_t)) { *error0 = UDP_ERROR_FIFO_FULL; - goto unlock_rx_lock; + return; } /* If session is owned by another thread and rx event needed, * enqueue event now while we still have the peeker lock */ if (s0->thread_index != thread_index) { - wrote0 = session_enqueue_dgram_connection (s0, hdr0, b, - TRANSPORT_PROTO_UDP, - /* queue event */ 0); - if (queue_event && !svm_fifo_has_event (s0->rx_fifo)) - session_enqueue_notify (s0); + wrote0 = session_enqueue_dgram_connection2 ( + s0, hdr0, b, TRANSPORT_PROTO_UDP, + queue_event && !svm_fifo_has_event (s0->rx_fifo)); } else { - wrote0 = session_enqueue_dgram_connection (s0, hdr0, b, - TRANSPORT_PROTO_UDP, - queue_event); + wrote0 = session_enqueue_dgram_connection ( + s0, hdr0, b, TRANSPORT_PROTO_UDP, queue_event); } - ASSERT (wrote0 > 0); - -unlock_rx_lock: - if (!(uc0->flags & UDP_CONN_F_CONNECTED)) - clib_spinlock_unlock (&uc0->rx_lock); + /* In some rare cases, session_enqueue_dgram_connection can fail because a + * chunk cannot be allocated in the RX FIFO */ + if (PREDICT_FALSE (wrote0 == 0)) + *error0 = UDP_ERROR_FIFO_NOMEM; } always_inline session_t * @@ -184,6 +194,7 @@ udp_parse_and_lookup_buffer (vlib_buffer_t * b, session_dgram_hdr_t * hdr, hdr->lcl_port = udp->dst_port; hdr->rmt_port = udp->src_port; hdr->is_ip4 = is_ip4; + hdr->gso_size = 0; if (is_ip4) { @@ -213,6 +224,10 @@ udp_parse_and_lookup_buffer (vlib_buffer_t * b, session_dgram_hdr_t * hdr, udp->src_port, TRANSPORT_PROTO_UDP); } + /* Set the sw_if_index[VLIB_RX] to the interface we received + * the connection on (the local interface) */ + vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->ip.rx_sw_if_index; + if (PREDICT_TRUE (!(b->flags & VLIB_BUFFER_NEXT_PRESENT))) b->current_length = hdr->data_length; else @@ -226,10 +241,9 @@ always_inline uword udp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame, u8 is_ip4) { - u32 n_left_from, *from, errors, *first_buffer; + u32 thread_index = vm->thread_index, n_left_from, *from, *first_buffer; vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b; u16 err_counters[UDP_N_ERROR] = { 0 }; - u32 thread_index = vm->thread_index; from = first_buffer = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; @@ -281,7 +295,8 @@ udp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, udp_connection_enqueue (uc0, s0, &hdr0, thread_index, b[0], queue_event, &error0); } - else if (s0->session_state == SESSION_STATE_READY) + else if (s0->session_state == SESSION_STATE_READY || + s0->session_state == SESSION_STATE_ACCEPTING) { uc0 = udp_connection_from_transport (session_get_transport (s0)); udp_connection_enqueue (uc0, s0, &hdr0, thread_index, b[0], 1, @@ -321,9 +336,7 @@ udp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, } vlib_buffer_free (vm, first_buffer, frame->n_vectors); - errors = session_main_flush_enqueue_events (TRANSPORT_PROTO_UDP, - thread_index); - err_counters[UDP_ERROR_MQ_FULL] = errors; + session_main_flush_enqueue_events (TRANSPORT_PROTO_UDP, thread_index); udp_store_err_counters (vm, is_ip4, err_counters); return frame->n_vectors; } @@ -335,7 +348,6 @@ udp4_input (vlib_main_t * vm, vlib_node_runtime_t * node, return udp46_input_inline (vm, node, frame, 1); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (udp4_input_node) = { .function = udp4_input, @@ -352,7 +364,6 @@ VLIB_REGISTER_NODE (udp4_input_node) = #undef _ }, }; -/* *INDENT-ON* */ static uword udp6_input (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -361,7 +372,6 @@ udp6_input (vlib_main_t * vm, vlib_node_runtime_t * node, return udp46_input_inline (vm, node, frame, 0); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (udp6_input_node) = { .function = udp6_input, @@ -378,7 +388,6 @@ VLIB_REGISTER_NODE (udp6_input_node) = #undef _ }, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/udp/udp_local.c b/src/vnet/udp/udp_local.c index 06bafbb4be8..6531b73cd11 100644 --- a/src/vnet/udp/udp_local.c +++ b/src/vnet/udp/udp_local.c @@ -42,8 +42,6 @@ static vlib_error_desc_t udp_error_counters[] = { #undef udp_error }; -#define UDP_NO_NODE_SET ((u16) ~0) - #ifndef CLIB_MARCH_VARIANT u8 * format_udp_rx_trace (u8 * s, va_list * args) @@ -127,9 +125,8 @@ udp46_local_inline (vlib_main_t * vm, u32 bi0, bi1; vlib_buffer_t *b0, *b1; udp_header_t *h0 = 0, *h1 = 0; - u32 i0, i1, dst_port0, dst_port1; + u32 i0, i1, next0, next1; u32 advance0, advance1; - u32 error0, next0, error1, next1; /* Prefetch next iteration. */ { @@ -171,72 +168,106 @@ udp46_local_inline (vlib_main_t * vm, if (PREDICT_FALSE (b0->current_length < advance0 + sizeof (*h0))) { - error0 = UDP_ERROR_LENGTH_ERROR; + b0->error = node->errors[UDP_ERROR_LENGTH_ERROR]; next0 = UDP_LOCAL_NEXT_DROP; } else { vlib_buffer_advance (b0, advance0); h0 = vlib_buffer_get_current (b0); - error0 = UDP_ERROR_NONE; next0 = UDP_LOCAL_NEXT_PUNT; if (PREDICT_FALSE (clib_net_to_host_u16 (h0->length) > vlib_buffer_length_in_chain (vm, b0))) { - error0 = UDP_ERROR_LENGTH_ERROR; + b0->error = node->errors[UDP_ERROR_LENGTH_ERROR]; next0 = UDP_LOCAL_NEXT_DROP; } } if (PREDICT_FALSE (b1->current_length < advance1 + sizeof (*h1))) { - error1 = UDP_ERROR_LENGTH_ERROR; + b1->error = node->errors[UDP_ERROR_LENGTH_ERROR]; next1 = UDP_LOCAL_NEXT_DROP; } else { vlib_buffer_advance (b1, advance1); h1 = vlib_buffer_get_current (b1); - error1 = UDP_ERROR_NONE; next1 = UDP_LOCAL_NEXT_PUNT; if (PREDICT_FALSE (clib_net_to_host_u16 (h1->length) > vlib_buffer_length_in_chain (vm, b1))) { - error1 = UDP_ERROR_LENGTH_ERROR; + b1->error = node->errors[UDP_ERROR_LENGTH_ERROR]; next1 = UDP_LOCAL_NEXT_DROP; } } /* Index sparse array with network byte order. */ - dst_port0 = (error0 == 0) ? h0->dst_port : 0; - dst_port1 = (error1 == 0) ? h1->dst_port : 0; - sparse_vec_index2 (next_by_dst_port, dst_port0, dst_port1, &i0, - &i1); - next0 = (error0 == 0) ? vec_elt (next_by_dst_port, i0) : next0; - next1 = (error1 == 0) ? vec_elt (next_by_dst_port, i1) : next1; - - if (PREDICT_FALSE (i0 == SPARSE_VEC_INVALID_INDEX || - next0 == UDP_NO_NODE_SET)) + if (PREDICT_TRUE (next0 == UDP_LOCAL_NEXT_PUNT && + next1 == UDP_LOCAL_NEXT_PUNT)) { - udp_dispatch_error (node, b0, advance0, is_ip4, &next0); + sparse_vec_index2 (next_by_dst_port, h0->dst_port, h1->dst_port, + &i0, &i1); + next0 = vec_elt (next_by_dst_port, i0); + next1 = vec_elt (next_by_dst_port, i1); + + if (PREDICT_FALSE (i0 == SPARSE_VEC_INVALID_INDEX || + next0 == UDP_NO_NODE_SET)) + { + udp_dispatch_error (node, b0, advance0, is_ip4, &next0); + } + else + { + b0->error = node->errors[UDP_ERROR_NONE]; + // advance to the payload + vlib_buffer_advance (b0, sizeof (*h0)); + } + + if (PREDICT_FALSE (i1 == SPARSE_VEC_INVALID_INDEX || + next1 == UDP_NO_NODE_SET)) + { + udp_dispatch_error (node, b1, advance1, is_ip4, &next1); + } + else + { + b1->error = node->errors[UDP_ERROR_NONE]; + // advance to the payload + vlib_buffer_advance (b1, sizeof (*h1)); + } } - else + else if (next0 == UDP_LOCAL_NEXT_PUNT) { - b0->error = node->errors[UDP_ERROR_NONE]; - // advance to the payload - vlib_buffer_advance (b0, sizeof (*h0)); - } + i0 = sparse_vec_index (next_by_dst_port, h0->dst_port); + next0 = vec_elt (next_by_dst_port, i0); - if (PREDICT_FALSE (i1 == SPARSE_VEC_INVALID_INDEX || - next1 == UDP_NO_NODE_SET)) - { - udp_dispatch_error (node, b1, advance1, is_ip4, &next1); + if (PREDICT_FALSE (i0 == SPARSE_VEC_INVALID_INDEX || + next0 == UDP_NO_NODE_SET)) + { + udp_dispatch_error (node, b0, advance0, is_ip4, &next0); + } + else + { + b0->error = node->errors[UDP_ERROR_NONE]; + // advance to the payload + vlib_buffer_advance (b0, sizeof (*h0)); + } } - else + else if (next1 == UDP_LOCAL_NEXT_PUNT) { - b1->error = node->errors[UDP_ERROR_NONE]; - // advance to the payload - vlib_buffer_advance (b1, sizeof (*h1)); + i1 = sparse_vec_index (next_by_dst_port, h1->dst_port); + next1 = vec_elt (next_by_dst_port, i1); + + if (PREDICT_FALSE (i1 == SPARSE_VEC_INVALID_INDEX || + next1 == UDP_NO_NODE_SET)) + { + udp_dispatch_error (node, b1, advance1, is_ip4, &next1); + } + else + { + b1->error = node->errors[UDP_ERROR_NONE]; + // advance to the payload + vlib_buffer_advance (b1, sizeof (*h1)); + } } if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) @@ -362,7 +393,6 @@ VLIB_NODE_FN (udp6_local_node) (vlib_main_t * vm, return udp46_local_inline (vm, node, from_frame, 0 /* is_ip4 */ ); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (udp4_local_node) = { .name = "ip4-udp-lookup", /* Takes a vector of packets. */ @@ -382,9 +412,7 @@ VLIB_REGISTER_NODE (udp4_local_node) = { .format_trace = format_udp_rx_trace, .unformat_buffer = unformat_udp_header, }; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (udp6_local_node) = { .name = "ip6-udp-lookup", /* Takes a vector of packets. */ @@ -404,7 +432,6 @@ VLIB_REGISTER_NODE (udp6_local_node) = { .format_trace = format_udp_rx_trace, .unformat_buffer = unformat_udp_header, }; -/* *INDENT-ON* */ #ifndef CLIB_MARCH_VARIANT void @@ -492,16 +519,12 @@ u8 udp_is_valid_dst_port (udp_dst_port_t dst_port, u8 is_ip4) { udp_main_t *um = &udp_main; - u16 *n; - - if (is_ip4) - n = sparse_vec_validate (um->next_by_dst_port4, - clib_host_to_net_u16 (dst_port)); - else - n = sparse_vec_validate (um->next_by_dst_port6, - clib_host_to_net_u16 (dst_port)); - - return (n[0] != SPARSE_VEC_INVALID_INDEX && n[0] != UDP_NO_NODE_SET); + u16 *next_by_dst_port = + is_ip4 ? um->next_by_dst_port4 : um->next_by_dst_port6; + uword index = + sparse_vec_index (next_by_dst_port, clib_host_to_net_u16 (dst_port)); + return (index != SPARSE_VEC_INVALID_INDEX && + vec_elt (next_by_dst_port, index) != UDP_NO_NODE_SET); } void diff --git a/src/vnet/udp/udp_output.c b/src/vnet/udp/udp_output.c new file mode 100644 index 00000000000..22b94141365 --- /dev/null +++ b/src/vnet/udp/udp_output.c @@ -0,0 +1,254 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2022 Cisco Systems, Inc. + */ + +#include <vnet/udp/udp.h> +#include <vnet/ip/ip4_inlines.h> +#include <vnet/ip/ip6_inlines.h> + +#define udp_node_index(node_id, is_ip4) \ + ((is_ip4) ? udp4_##node_id##_node.index : udp6_##node_id##_node.index) + +typedef enum udp_output_next_ +{ + UDP_OUTPUT_NEXT_DROP, + UDP_OUTPUT_NEXT_IP_LOOKUP, + UDP_OUTPUT_N_NEXT +} udp_output_next_t; + +#define foreach_udp4_output_next \ + _ (DROP, "error-drop") \ + _ (IP_LOOKUP, "ip4-lookup") + +#define foreach_udp6_output_next \ + _ (DROP, "error-drop") \ + _ (IP_LOOKUP, "ip6-lookup") + +static vlib_error_desc_t udp_output_error_counters[] = { +#define udp_error(f, n, s, d) { #n, d, VL_COUNTER_SEVERITY_##s }, +#include <vnet/udp/udp_error.def> +#undef udp_error +}; + +typedef struct udp_tx_trace_ +{ + udp_header_t udp_header; + udp_connection_t udp_connection; +} udp_tx_trace_t; + +static u8 * +format_udp_tx_trace (u8 *s, va_list *args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + udp_tx_trace_t *t = va_arg (*args, udp_tx_trace_t *); + udp_connection_t *uc = &t->udp_connection; + u32 indent = format_get_indent (s); + + s = format (s, "%U\n%U%U", format_udp_connection, uc, 1, format_white_space, + indent, format_udp_header, &t->udp_header, 128); + + return s; +} + +always_inline udp_connection_t * +udp_output_get_connection (vlib_buffer_t *b, u32 thread_index) +{ + if (PREDICT_FALSE (vnet_buffer (b)->tcp.flags & UDP_CONN_F_LISTEN)) + return udp_listener_get (vnet_buffer (b)->tcp.connection_index); + + return udp_connection_get (vnet_buffer (b)->tcp.connection_index, + thread_index); +} + +static void +udp46_output_trace_frame (vlib_main_t *vm, vlib_node_runtime_t *node, + u32 *to_next, u32 n_bufs) +{ + udp_connection_t *uc; + udp_tx_trace_t *t; + vlib_buffer_t *b; + udp_header_t *uh; + int i; + + for (i = 0; i < n_bufs; i++) + { + b = vlib_get_buffer (vm, to_next[i]); + if (!(b->flags & VLIB_BUFFER_IS_TRACED)) + continue; + uh = vlib_buffer_get_current (b); + uc = udp_output_get_connection (b, vm->thread_index); + t = vlib_add_trace (vm, node, b, sizeof (*t)); + clib_memcpy_fast (&t->udp_header, uh, sizeof (t->udp_header)); + clib_memcpy_fast (&t->udp_connection, uc, sizeof (t->udp_connection)); + } +} + +always_inline void +udp_output_handle_packet (udp_connection_t *uc0, vlib_buffer_t *b0, + vlib_node_runtime_t *error_node, u16 *next0, + u8 is_ip4) +{ + /* If next_index is not drop use it */ + if (uc0->next_node_index) + { + *next0 = uc0->next_node_index; + vnet_buffer (b0)->tcp.next_node_opaque = uc0->next_node_opaque; + } + else + { + *next0 = UDP_OUTPUT_NEXT_IP_LOOKUP; + } + + vnet_buffer (b0)->sw_if_index[VLIB_TX] = uc0->c_fib_index; + vnet_buffer (b0)->sw_if_index[VLIB_RX] = uc0->sw_if_index; +} + +always_inline uword +udp46_output_inline (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame, int is_ip4) +{ + u32 n_left_from, *from, thread_index = vm->thread_index; + vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b; + u16 nexts[VLIB_FRAME_SIZE], *next; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + + if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE)) + udp46_output_trace_frame (vm, node, from, n_left_from); + + vlib_get_buffers (vm, from, bufs, n_left_from); + b = bufs; + next = nexts; + + while (n_left_from >= 4) + { + udp_connection_t *uc0, *uc1; + + vlib_prefetch_buffer_header (b[2], STORE); + CLIB_PREFETCH (b[2]->data, 2 * CLIB_CACHE_LINE_BYTES, STORE); + + vlib_prefetch_buffer_header (b[3], STORE); + CLIB_PREFETCH (b[3]->data, 2 * CLIB_CACHE_LINE_BYTES, STORE); + + uc0 = udp_output_get_connection (b[0], thread_index); + uc1 = udp_output_get_connection (b[1], thread_index); + + if (PREDICT_TRUE (!uc0 + !uc1 == 0)) + { + udp_output_handle_packet (uc0, b[0], node, &next[0], is_ip4); + udp_output_handle_packet (uc1, b[1], node, &next[1], is_ip4); + } + else + { + if (uc0 != 0) + { + udp_output_handle_packet (uc0, b[0], node, &next[0], is_ip4); + } + else + { + b[0]->error = node->errors[UDP_ERROR_INVALID_CONNECTION]; + next[0] = UDP_OUTPUT_NEXT_DROP; + } + if (uc1 != 0) + { + udp_output_handle_packet (uc1, b[1], node, &next[1], is_ip4); + } + else + { + b[1]->error = node->errors[UDP_ERROR_INVALID_CONNECTION]; + next[1] = UDP_OUTPUT_NEXT_DROP; + } + } + + b += 2; + next += 2; + n_left_from -= 2; + } + while (n_left_from > 0) + { + udp_connection_t *uc0; + + if (n_left_from > 1) + { + vlib_prefetch_buffer_header (b[1], STORE); + CLIB_PREFETCH (b[1]->data, 2 * CLIB_CACHE_LINE_BYTES, STORE); + } + + uc0 = udp_output_get_connection (b[0], thread_index); + + if (PREDICT_TRUE (uc0 != 0)) + { + udp_output_handle_packet (uc0, b[0], node, &next[0], is_ip4); + } + else + { + b[0]->error = node->errors[UDP_ERROR_INVALID_CONNECTION]; + next[0] = UDP_OUTPUT_NEXT_DROP; + } + + b += 1; + next += 1; + n_left_from -= 1; + } + + vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors); + vlib_node_increment_counter (vm, udp_node_index (output, is_ip4), + UDP_ERROR_PKTS_SENT, frame->n_vectors); + return frame->n_vectors; +} + +VLIB_NODE_FN (udp4_output_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame) +{ + return udp46_output_inline (vm, node, from_frame, 1 /* is_ip4 */); +} + +VLIB_NODE_FN (udp6_output_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame) +{ + return udp46_output_inline (vm, node, from_frame, 0 /* is_ip4 */); +} + +VLIB_REGISTER_NODE (udp4_output_node) = +{ + .name = "udp4-output", + .vector_size = sizeof (u32), + .n_errors = UDP_N_ERROR, + .protocol_hint = VLIB_NODE_PROTO_HINT_UDP, + .error_counters = udp_output_error_counters, + .n_next_nodes = UDP_OUTPUT_N_NEXT, + .next_nodes = { +#define _(s, n) [UDP_OUTPUT_NEXT_##s] = n, + foreach_udp4_output_next +#undef _ + }, + .format_buffer = format_udp_header, + .format_trace = format_udp_tx_trace, +}; + +VLIB_REGISTER_NODE (udp6_output_node) = +{ + .name = "udp6-output", + .vector_size = sizeof (u32), + .n_errors = UDP_N_ERROR, + .protocol_hint = VLIB_NODE_PROTO_HINT_UDP, + .error_counters = udp_output_error_counters, + .n_next_nodes = UDP_OUTPUT_N_NEXT, + .next_nodes = { +#define _(s, n) [UDP_OUTPUT_NEXT_##s] = n, + foreach_udp6_output_next +#undef _ + }, + .format_buffer = format_udp_header, + .format_trace = format_udp_tx_trace, +}; + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/unix/gdb_funcs.c b/src/vnet/unix/gdb_funcs.c index 91dabe394ba..d6fdc985bd9 100644 --- a/src/vnet/unix/gdb_funcs.c +++ b/src/vnet/unix/gdb_funcs.c @@ -318,13 +318,11 @@ show_gdb_command_fn (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_gdb_funcs_command, static) = { .path = "show gdb", .short_help = "Describe functions which can be called from gdb", .function = show_gdb_command_fn, }; -/* *INDENT-ON* */ vlib_buffer_t * vgb (u32 bi) diff --git a/src/vnet/unix/tuntap.c b/src/vnet/unix/tuntap.c index b75b1f670b9..f1102dc321e 100644 --- a/src/vnet/unix/tuntap.c +++ b/src/vnet/unix/tuntap.c @@ -217,14 +217,12 @@ tuntap_tx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) return n_packets; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (tuntap_tx_node,static) = { .function = tuntap_tx, .name = "tuntap-tx", .type = VLIB_NODE_TYPE_INTERNAL, .vector_size = 4, }; -/* *INDENT-ON* */ /** * @brief TUNTAP receive node @@ -366,7 +364,7 @@ tuntap_rx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) next_index = VNET_DEVICE_INPUT_NEXT_DROP; } - vnet_feature_start_device_input_x1 (tm->sw_if_index, &next_index, b); + vnet_feature_start_device_input (tm->sw_if_index, &next_index, b); vlib_set_next_frame_buffer (vm, node, next_index, bi); @@ -385,7 +383,6 @@ static char *tuntap_rx_error_strings[] = { "unknown packet type", }; -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (tuntap_rx_node,static) = { .function = tuntap_rx, .flags = VLIB_NODE_FLAG_TRACE_SUPPORTED, @@ -397,7 +394,6 @@ VLIB_REGISTER_NODE (tuntap_rx_node,static) = { .n_errors = 1, .error_strings = tuntap_rx_error_strings, }; -/* *INDENT-ON* */ /** * @brief Gets called when file descriptor is ready from epoll. @@ -933,12 +929,10 @@ tuntap_nopunt_frame (vlib_main_t * vm, vlib_frame_free (vm, frame); } -/* *INDENT-OFF* */ VNET_HW_INTERFACE_CLASS (tuntap_interface_class,static) = { .name = "tuntap", .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P, }; -/* *INDENT-ON* */ /** * @brief Format tun/tap interface name @@ -984,13 +978,11 @@ tuntap_intfc_tx (vlib_main_t * vm, return n_buffers; } -/* *INDENT-OFF* */ VNET_DEVICE_CLASS (tuntap_dev_class,static) = { .name = "tuntap", .tx_function = tuntap_intfc_tx, .format_device_name = format_tuntap_interface_name, }; -/* *INDENT-ON* */ /** * @brief tun/tap node init @@ -1025,12 +1017,10 @@ tuntap_init (vlib_main_t * vm) return 0; } -/* *INDENT-OFF* */ VLIB_INIT_FUNCTION (tuntap_init) = { .runs_after = VLIB_INITS("ip4_init"), }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/util/throttle.c b/src/vnet/util/throttle.c index 0985b4a81a3..8b8e030bf53 100644 --- a/src/vnet/util/throttle.c +++ b/src/vnet/util/throttle.c @@ -16,17 +16,18 @@ #include <vnet/util/throttle.h> void -throttle_init (throttle_t * t, u32 n_threads, f64 time) +throttle_init (throttle_t *t, u32 n_threads, u32 buckets, f64 time) { u32 i; t->time = time; + t->buckets = 1 << max_log2 (buckets); vec_validate (t->bitmaps, n_threads); vec_validate (t->seeds, n_threads); vec_validate (t->last_seed_change_time, n_threads); for (i = 0; i < n_threads; i++) - vec_validate (t->bitmaps[i], (THROTTLE_BITS / BITS (uword)) - 1); + clib_bitmap_alloc (t->bitmaps[i], t->buckets); } /* diff --git a/src/vnet/util/throttle.h b/src/vnet/util/throttle.h index 38ace280131..53435c4a359 100644 --- a/src/vnet/util/throttle.h +++ b/src/vnet/util/throttle.h @@ -31,11 +31,13 @@ typedef struct throttle_t_ uword **bitmaps; u64 *seeds; f64 *last_seed_change_time; + u32 buckets; } throttle_t; #define THROTTLE_BITS (512) -extern void throttle_init (throttle_t * t, u32 n_threads, f64 time); +extern void throttle_init (throttle_t *t, u32 n_threads, u32 buckets, + f64 time); always_inline u64 throttle_seed (throttle_t * t, u32 thread_index, f64 time_now) @@ -43,7 +45,7 @@ throttle_seed (throttle_t * t, u32 thread_index, f64 time_now) if (time_now - t->last_seed_change_time[thread_index] > t->time) { (void) random_u64 (&t->seeds[thread_index]); - clib_memset (t->bitmaps[thread_index], 0, THROTTLE_BITS / BITS (u8)); + clib_bitmap_zero (t->bitmaps[thread_index]); t->last_seed_change_time[thread_index] = time_now; } @@ -53,21 +55,14 @@ throttle_seed (throttle_t * t, u32 thread_index, f64 time_now) always_inline int throttle_check (throttle_t * t, u32 thread_index, u64 hash, u64 seed) { - int drop; - uword m; - u32 w; + ASSERT (is_pow2 (t->buckets)); hash = clib_xxhash (hash ^ seed); /* Select bit number */ - hash &= THROTTLE_BITS - 1; - w = hash / BITS (uword); - m = (uword) 1 << (hash % BITS (uword)); + hash &= t->buckets - 1; - drop = (t->bitmaps[thread_index][w] & m) != 0; - t->bitmaps[thread_index][w] |= m; - - return (drop); + return clib_bitmap_set_no_check (t->bitmaps[thread_index], hash, 1); } #endif diff --git a/src/vnet/vnet.h b/src/vnet/vnet.h index 227fa5be30c..54988aec667 100644 --- a/src/vnet/vnet.h +++ b/src/vnet/vnet.h @@ -71,6 +71,7 @@ typedef struct u32 pcap_sw_if_index; pcap_main_t pcap_main; u32 filter_classify_table_index; + vlib_is_packet_traced_fn_t *current_filter_function; vlib_error_t pcap_error_index; } vnet_pcap_t; diff --git a/src/vnet/vxlan-gpe/decap.c b/src/vnet/vxlan-gpe/decap.c index 62513614389..d4c7424630d 100644 --- a/src/vnet/vxlan-gpe/decap.c +++ b/src/vnet/vxlan-gpe/decap.c @@ -622,7 +622,6 @@ static char *vxlan_gpe_error_strings[] = { #undef _ }; -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (vxlan4_gpe_input_node) = { .name = "vxlan4-gpe-input", /* Takes a vector of packets. */ @@ -642,9 +641,7 @@ VLIB_REGISTER_NODE (vxlan4_gpe_input_node) = { .format_trace = format_vxlan_gpe_rx_trace, // $$$$ .unformat_buffer = unformat_vxlan_gpe_header, }; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (vxlan6_gpe_input_node) = { .name = "vxlan6-gpe-input", /* Takes a vector of packets. */ @@ -664,7 +661,6 @@ VLIB_REGISTER_NODE (vxlan6_gpe_input_node) = { .format_trace = format_vxlan_gpe_rx_trace, // $$$$ .unformat_buffer = unformat_vxlan_gpe_header, }; -/* *INDENT-ON* */ typedef enum { @@ -1105,7 +1101,6 @@ VLIB_NODE_FN (ip4_vxlan_gpe_bypass_node) (vlib_main_t * vm, return ip_vxlan_gpe_bypass_inline (vm, node, frame, /* is_ip4 */ 1); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_vxlan_gpe_bypass_node) = { .name = "ip4-vxlan-gpe-bypass", .vector_size = sizeof (u32), @@ -1119,7 +1114,6 @@ VLIB_REGISTER_NODE (ip4_vxlan_gpe_bypass_node) = { .format_buffer = format_ip4_header, .format_trace = format_ip4_forward_next_trace, }; -/* *INDENT-ON* */ #ifndef CLIB_MARCH_VARIANT /* Dummy init function to get us linked in. */ @@ -1139,7 +1133,6 @@ VLIB_NODE_FN (ip6_vxlan_gpe_bypass_node) (vlib_main_t * vm, return ip_vxlan_gpe_bypass_inline (vm, node, frame, /* is_ip4 */ 0); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip6_vxlan_gpe_bypass_node) = { .name = "ip6-vxlan-gpe-bypass", .vector_size = sizeof (u32), @@ -1153,7 +1146,6 @@ VLIB_REGISTER_NODE (ip6_vxlan_gpe_bypass_node) = { .format_buffer = format_ip6_header, .format_trace = format_ip6_forward_next_trace, }; -/* *INDENT-ON* */ #ifndef CLIB_MARCH_VARIANT /* Dummy init function to get us linked in. */ diff --git a/src/vnet/vxlan-gpe/encap.c b/src/vnet/vxlan-gpe/encap.c index 35a5529e80b..a769861577d 100644 --- a/src/vnet/vxlan-gpe/encap.c +++ b/src/vnet/vxlan-gpe/encap.c @@ -96,7 +96,7 @@ vxlan_gpe_encap_one_inline (vxlan_gpe_main_t *ngm, vlib_buffer_t *b0, ASSERT (sizeof (ip6_vxlan_gpe_header_t) == 56); ip_udp_encap_one (ngm->vlib_main, b0, t0->rewrite, t0->rewrite_size, af, - N_AF); + N_AF, UDP_ENCAP_FIXUP_NONE); next0[0] = t0->encap_next_node; } @@ -123,9 +123,9 @@ vxlan_gpe_encap_two_inline (vxlan_gpe_main_t *ngm, vlib_buffer_t *b0, ASSERT (sizeof (ip6_vxlan_gpe_header_t) == 56); ip_udp_encap_one (ngm->vlib_main, b0, t0->rewrite, t0->rewrite_size, af, - N_AF); + N_AF, UDP_ENCAP_FIXUP_NONE); ip_udp_encap_one (ngm->vlib_main, b1, t1->rewrite, t1->rewrite_size, af, - N_AF); + N_AF, UDP_ENCAP_FIXUP_NONE); next0[0] = next1[0] = t0->encap_next_node; } @@ -404,7 +404,6 @@ vxlan_gpe_encap (vlib_main_t * vm, return from_frame->n_vectors; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (vxlan_gpe_encap_node) = { .function = vxlan_gpe_encap, .name = "vxlan-gpe-encap", @@ -423,7 +422,6 @@ VLIB_REGISTER_NODE (vxlan_gpe_encap_node) = { [VXLAN_GPE_ENCAP_NEXT_DROP] = "error-drop", }, }; -/* *INDENT-ON* */ /* diff --git a/src/vnet/vxlan-gpe/vxlan_gpe.c b/src/vnet/vxlan-gpe/vxlan_gpe.c index a926847051f..5a5262ea9db 100644 --- a/src/vnet/vxlan-gpe/vxlan_gpe.c +++ b/src/vnet/vxlan-gpe/vxlan_gpe.c @@ -144,14 +144,12 @@ vxlan_gpe_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, return 0; } -/* *INDENT-OFF* */ VNET_DEVICE_CLASS (vxlan_gpe_device_class,static) = { .name = "VXLAN_GPE", .format_device_name = format_vxlan_gpe_name, .format_tx_trace = format_vxlan_gpe_encap_trace, .admin_up_down_function = vxlan_gpe_interface_admin_up_down, }; -/* *INDENT-ON* */ /** @@ -171,13 +169,11 @@ format_vxlan_gpe_header_with_length (u8 * s, va_list * args) return s; } -/* *INDENT-OFF* */ VNET_HW_INTERFACE_CLASS (vxlan_gpe_hw_class) = { .name = "VXLAN_GPE", .format_header = format_vxlan_gpe_header_with_length, .build_rewrite = default_build_rewrite, }; -/* *INDENT-ON* */ static void vxlan_gpe_tunnel_restack_dpo (vxlan_gpe_tunnel_t * t) @@ -388,7 +384,6 @@ vxlan6_gpe_rewrite (vxlan_gpe_tunnel_t * t, u32 extension_size, return (0); } -/* *INDENT-OFF* */ typedef CLIB_PACKED(union { struct { fib_node_index_t mfib_entry_index; @@ -396,7 +391,6 @@ typedef CLIB_PACKED(union { }; u64 as_u64; }) mcast_shared_t; -/* *INDENT-ON* */ static inline mcast_shared_t mcast_shared_get (ip46_address_t * ip) @@ -496,7 +490,6 @@ int vnet_vxlan_gpe_add_del_tunnel clib_memset (t, 0, sizeof (*t)); /* copy from arg structure */ -/* *INDENT-OFF* */ #define _(x) t->x = a->x; foreach_gpe_copy_field; if (!a->is_ip6) @@ -504,7 +497,6 @@ int vnet_vxlan_gpe_add_del_tunnel else foreach_copy_ipv6 #undef _ -/* *INDENT-ON* */ if (!a->is_ip6) t->flags |= VXLAN_GPE_TUNNEL_IS_IPV4; @@ -594,7 +586,8 @@ int vnet_vxlan_gpe_add_del_tunnel fib_prefix_t tun_remote_pfx; vnet_flood_class_t flood_class = VNET_FLOOD_CLASS_TUNNEL_NORMAL; - fib_prefix_from_ip46_addr (&t->remote, &tun_remote_pfx); + fib_protocol_t fp = fib_ip_proto (is_ip6); + fib_prefix_from_ip46_addr (fp, &t->remote, &tun_remote_pfx); if (!ip46_address_is_multicast (&t->remote)) { /* Unicast tunnel - @@ -618,8 +611,6 @@ int vnet_vxlan_gpe_add_del_tunnel * with different VNIs, create the output fib adjacency only if * it does not already exist */ - fib_protocol_t fp = fib_ip_proto (is_ip6); - if (vtep_addr_ref (&ngm->vtep_table, t->encap_fib_index, &t->remote) == 1) { @@ -919,7 +910,6 @@ vxlan_gpe_add_del_tunnel_command_fn (vlib_main_t * vm, a->is_add = is_add; a->is_ip6 = ipv6_set; -/* *INDENT-OFF* */ #define _(x) a->x = x; foreach_gpe_copy_field; if (ipv4_set) @@ -927,7 +917,6 @@ vxlan_gpe_add_del_tunnel_command_fn (vlib_main_t * vm, else foreach_copy_ipv6 #undef _ -/* *INDENT-ON* */ rv = vnet_vxlan_gpe_add_del_tunnel (a, &sw_if_index); @@ -980,7 +969,6 @@ done: * Example of how to delete a VXLAN-GPE Tunnel: * @cliexcmd{create vxlan-gpe tunnel local 10.0.3.1 remote 10.0.3.3 vni 13 del} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (create_vxlan_gpe_tunnel_command, static) = { .path = "create vxlan-gpe tunnel", .short_help = @@ -990,7 +978,6 @@ VLIB_CLI_COMMAND (create_vxlan_gpe_tunnel_command, static) = { " [encap-vrf-id <nn>] [decap-vrf-id <nn>] [del]\n", .function = vxlan_gpe_add_del_tunnel_command_fn, }; -/* *INDENT-ON* */ /** * @brief CLI function for showing VXLAN GPE tunnels @@ -1013,12 +1000,10 @@ show_vxlan_gpe_tunnel_command_fn (vlib_main_t * vm, if (pool_elts (ngm->tunnels) == 0) vlib_cli_output (vm, "No vxlan-gpe tunnels configured."); - /* *INDENT-OFF* */ pool_foreach (t, ngm->tunnels) { vlib_cli_output (vm, "%U", format_vxlan_gpe_tunnel, t); } - /* *INDENT-ON* */ return 0; } @@ -1032,12 +1017,10 @@ show_vxlan_gpe_tunnel_command_fn (vlib_main_t * vm, * [0] local 10.0.3.1 remote 10.0.3.3 vni 13 encap_fib_index 0 sw_if_index 5 decap_next l2 * @cliexend ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_vxlan_gpe_tunnel_command, static) = { .path = "show vxlan-gpe", .function = show_vxlan_gpe_tunnel_command_fn, }; -/* *INDENT-ON* */ void vnet_int_vxlan_gpe_bypass_mode (u32 sw_if_index, u8 is_ip6, u8 is_enable) @@ -1145,13 +1128,11 @@ set_ip4_vxlan_gpe_bypass (vlib_main_t * vm, * @cliexcmd{set interface ip vxlan-gpe-bypass GigabitEthernet2/0/0 del} * @endparblock ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (set_interface_ip_vxlan_gpe_bypass_command, static) = { .path = "set interface ip vxlan-gpe-bypass", .function = set_ip4_vxlan_gpe_bypass, .short_help = "set interface ip vxlan-gpe-bypass <interface> [del]", }; -/* *INDENT-ON* */ static clib_error_t * set_ip6_vxlan_gpe_bypass (vlib_main_t * vm, @@ -1203,15 +1184,12 @@ set_ip6_vxlan_gpe_bypass (vlib_main_t * vm, * @cliexcmd{set interface ip6 vxlan-gpe-bypass GigabitEthernet2/0/0 del} * @endparblock ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (set_interface_ip6_vxlan_gpe_bypass_command, static) = { .path = "set interface ip6 vxlan-gpe-bypass", .function = set_ip6_vxlan_gpe_bypass, .short_help = "set interface ip6 vxlan-gpe-bypass <interface> [del]", }; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ VNET_FEATURE_INIT (ip4_vxlan_gpe_bypass, static) = { .arc_name = "ip4-unicast", @@ -1225,7 +1203,6 @@ VNET_FEATURE_INIT (ip6_vxlan_gpe_bypass, static) = .node_name = "ip6-vxlan-gpe-bypass", .runs_before = VNET_FEATURES ("ip6-lookup"), }; -/* *INDENT-ON* */ /** * @brief Feature init function for VXLAN GPE diff --git a/src/vnet/vxlan-gpe/vxlan_gpe.h b/src/vnet/vxlan-gpe/vxlan_gpe.h index 2cbbb6c5f36..aabaafeee6f 100644 --- a/src/vnet/vxlan-gpe/vxlan_gpe.h +++ b/src/vnet/vxlan-gpe/vxlan_gpe.h @@ -40,7 +40,6 @@ * @brief VXLAN GPE header struct * */ -/* *INDENT-OFF* */ typedef CLIB_PACKED (struct { /** 20 bytes */ ip4_header_t ip4; @@ -49,9 +48,7 @@ typedef CLIB_PACKED (struct { /** 8 bytes */ vxlan_gpe_header_t vxlan; }) ip4_vxlan_gpe_header_t; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ typedef CLIB_PACKED (struct { /** 40 bytes */ ip6_header_t ip6; @@ -60,7 +57,6 @@ typedef CLIB_PACKED (struct { /** 8 bytes */ vxlan_gpe_header_t vxlan; }) ip6_vxlan_gpe_header_t; -/* *INDENT-ON* */ /** * @brief Key struct for IPv4 VXLAN GPE tunnel. @@ -68,7 +64,6 @@ typedef CLIB_PACKED (struct { * all fields in NET byte order * VNI shifted 8 bits */ -/* *INDENT-OFF* */ typedef CLIB_PACKED(struct { union { struct { @@ -81,7 +76,6 @@ typedef CLIB_PACKED(struct { u64 as_u64[2]; }; }) vxlan4_gpe_tunnel_key_t; -/* *INDENT-ON* */ /** * @brief Key struct for IPv6 VXLAN GPE tunnel. @@ -89,14 +83,12 @@ typedef CLIB_PACKED(struct { * all fields in NET byte order * VNI shifted 8 bits */ -/* *INDENT-OFF* */ typedef CLIB_PACKED(struct { ip6_address_t local; ip6_address_t remote; u32 vni; u32 port; }) vxlan6_gpe_tunnel_key_t; -/* *INDENT-ON* */ typedef union { diff --git a/src/vnet/vxlan-gpe/vxlan_gpe_api.c b/src/vnet/vxlan-gpe/vxlan_gpe_api.c index e9cf17f270b..cc74e1f58d4 100644 --- a/src/vnet/vxlan-gpe/vxlan_gpe_api.c +++ b/src/vnet/vxlan-gpe/vxlan_gpe_api.c @@ -114,12 +114,10 @@ static void rv = vnet_vxlan_gpe_add_del_tunnel (a, &sw_if_index); out: - /* *INDENT-OFF* */ REPLY_MACRO2(VL_API_VXLAN_GPE_ADD_DEL_TUNNEL_REPLY, ({ rmp->sw_if_index = ntohl (sw_if_index); })); - /* *INDENT-ON* */ } static void @@ -242,12 +240,10 @@ static void vl_api_vxlan_gpe_tunnel_dump_t_handler if (~0 == sw_if_index) { - /* *INDENT-OFF* */ pool_foreach (t, vgm->tunnels) { send_vxlan_gpe_tunnel_details (t, reg, mp->context); } - /* *INDENT-ON* */ } else { diff --git a/src/vnet/vxlan/FEATURE.yaml b/src/vnet/vxlan/FEATURE.yaml deleted file mode 100644 index dc7d21b010e..00000000000 --- a/src/vnet/vxlan/FEATURE.yaml +++ /dev/null @@ -1,14 +0,0 @@ ---- -name: Virtual eXtensible LAN -maintainer: John Lo <loj@cisco.com> -features: - - VXLAN tunnel for support of L2 overlay/virtual networks (RFC-7348) - - Support either IPv4 or IPv6 underlay network VTEPs - - Flooding via headend replication if all VXLAN tunnels in BD are unicast ones - - Multicast VXLAN tunnel can be added to BD to flood via IP multicast - - VXLAN encap with flow-hashed source port for better underlay IP load balance - - VXLAN decap optimization via vxlan-bypass IP feature on underlay interfaces - - VXLAN decap HW offload using flow director with DPDK on Intel Fortville NICs -description: "Virtual eXtensible LAN (VXLAN) tunnels support L2 overlay networks that span L3 networks" -state: production -properties: [API, CLI, MULTITHREAD] diff --git a/src/vnet/vxlan/decap.c b/src/vnet/vxlan/decap.c deleted file mode 100644 index 729293fb3e5..00000000000 --- a/src/vnet/vxlan/decap.c +++ /dev/null @@ -1,1330 +0,0 @@ -/* - * decap.c: vxlan tunnel decap packet processing - * - * Copyright (c) 2013 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include <vlib/vlib.h> -#include <vnet/vxlan/vxlan.h> -#include <vnet/udp/udp_local.h> - -#ifndef CLIB_MARCH_VARIANT -vlib_node_registration_t vxlan4_input_node; -vlib_node_registration_t vxlan6_input_node; -#endif - -typedef struct -{ - u32 next_index; - u32 tunnel_index; - u32 error; - u32 vni; -} vxlan_rx_trace_t; - -static u8 * -format_vxlan_rx_trace (u8 * s, va_list * args) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); - vxlan_rx_trace_t *t = va_arg (*args, vxlan_rx_trace_t *); - - if (t->tunnel_index == ~0) - return format (s, "VXLAN decap error - tunnel for vni %d does not exist", - t->vni); - return format (s, "VXLAN decap from vxlan_tunnel%d vni %d next %d error %d", - t->tunnel_index, t->vni, t->next_index, t->error); -} - -typedef vxlan4_tunnel_key_t last_tunnel_cache4; - -static const vxlan_decap_info_t decap_not_found = { - .sw_if_index = ~0, - .next_index = VXLAN_INPUT_NEXT_DROP, - .error = VXLAN_ERROR_NO_SUCH_TUNNEL -}; - -static const vxlan_decap_info_t decap_bad_flags = { - .sw_if_index = ~0, - .next_index = VXLAN_INPUT_NEXT_DROP, - .error = VXLAN_ERROR_BAD_FLAGS -}; - -always_inline vxlan_decap_info_t -vxlan4_find_tunnel (vxlan_main_t * vxm, last_tunnel_cache4 * cache, - u32 fib_index, ip4_header_t * ip4_0, - vxlan_header_t * vxlan0, u32 * stats_sw_if_index) -{ - if (PREDICT_FALSE (vxlan0->flags != VXLAN_FLAGS_I)) - return decap_bad_flags; - - /* Make sure VXLAN tunnel exist according to packet S/D IP, UDP port, VRF, - * and VNI */ - u32 dst = ip4_0->dst_address.as_u32; - u32 src = ip4_0->src_address.as_u32; - udp_header_t *udp = ip4_next_header (ip4_0); - vxlan4_tunnel_key_t key4 = { - .key[0] = ((u64) dst << 32) | src, - .key[1] = ((u64) udp->dst_port << 48) | ((u64) fib_index << 32) | - vxlan0->vni_reserved, - }; - - if (PREDICT_TRUE - (key4.key[0] == cache->key[0] && key4.key[1] == cache->key[1])) - { - /* cache hit */ - vxlan_decap_info_t di = {.as_u64 = cache->value }; - *stats_sw_if_index = di.sw_if_index; - return di; - } - - int rv = clib_bihash_search_inline_16_8 (&vxm->vxlan4_tunnel_by_key, &key4); - if (PREDICT_TRUE (rv == 0)) - { - *cache = key4; - vxlan_decap_info_t di = {.as_u64 = key4.value }; - *stats_sw_if_index = di.sw_if_index; - return di; - } - - /* try multicast */ - if (PREDICT_TRUE (!ip4_address_is_multicast (&ip4_0->dst_address))) - return decap_not_found; - - /* search for mcast decap info by mcast address */ - key4.key[0] = dst; - rv = clib_bihash_search_inline_16_8 (&vxm->vxlan4_tunnel_by_key, &key4); - if (rv != 0) - return decap_not_found; - - /* search for unicast tunnel using the mcast tunnel local(src) ip */ - vxlan_decap_info_t mdi = {.as_u64 = key4.value }; - key4.key[0] = ((u64) mdi.local_ip.as_u32 << 32) | src; - rv = clib_bihash_search_inline_16_8 (&vxm->vxlan4_tunnel_by_key, &key4); - if (PREDICT_FALSE (rv != 0)) - return decap_not_found; - - /* mcast traffic does not update the cache */ - *stats_sw_if_index = mdi.sw_if_index; - vxlan_decap_info_t di = {.as_u64 = key4.value }; - return di; -} - -typedef vxlan6_tunnel_key_t last_tunnel_cache6; - -always_inline vxlan_decap_info_t -vxlan6_find_tunnel (vxlan_main_t * vxm, last_tunnel_cache6 * cache, - u32 fib_index, ip6_header_t * ip6_0, - vxlan_header_t * vxlan0, u32 * stats_sw_if_index) -{ - if (PREDICT_FALSE (vxlan0->flags != VXLAN_FLAGS_I)) - return decap_bad_flags; - - /* Make sure VXLAN tunnel exist according to packet SIP, UDP port, VRF, and - * VNI */ - udp_header_t *udp = ip6_next_header (ip6_0); - vxlan6_tunnel_key_t key6 = { - .key[0] = ip6_0->src_address.as_u64[0], - .key[1] = ip6_0->src_address.as_u64[1], - .key[2] = ((u64) udp->dst_port << 48) | ((u64) fib_index << 32) | - vxlan0->vni_reserved, - }; - - if (PREDICT_FALSE - (clib_bihash_key_compare_24_8 (key6.key, cache->key) == 0)) - { - int rv = - clib_bihash_search_inline_24_8 (&vxm->vxlan6_tunnel_by_key, &key6); - if (PREDICT_FALSE (rv != 0)) - return decap_not_found; - - *cache = key6; - } - vxlan_tunnel_t *t0 = pool_elt_at_index (vxm->tunnels, cache->value); - - /* Validate VXLAN tunnel SIP against packet DIP */ - if (PREDICT_TRUE (ip6_address_is_equal (&ip6_0->dst_address, &t0->src.ip6))) - *stats_sw_if_index = t0->sw_if_index; - else - { - /* try multicast */ - if (PREDICT_TRUE (!ip6_address_is_multicast (&ip6_0->dst_address))) - return decap_not_found; - - /* Make sure mcast VXLAN tunnel exist by packet DIP and VNI */ - key6.key[0] = ip6_0->dst_address.as_u64[0]; - key6.key[1] = ip6_0->dst_address.as_u64[1]; - int rv = - clib_bihash_search_inline_24_8 (&vxm->vxlan6_tunnel_by_key, &key6); - if (PREDICT_FALSE (rv != 0)) - return decap_not_found; - - vxlan_tunnel_t *mcast_t0 = pool_elt_at_index (vxm->tunnels, key6.value); - *stats_sw_if_index = mcast_t0->sw_if_index; - } - - vxlan_decap_info_t di = { - .sw_if_index = t0->sw_if_index, - .next_index = t0->decap_next_index, - }; - return di; -} - -always_inline uword -vxlan_input (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * from_frame, u32 is_ip4) -{ - vxlan_main_t *vxm = &vxlan_main; - vnet_main_t *vnm = vxm->vnet_main; - vnet_interface_main_t *im = &vnm->interface_main; - vlib_combined_counter_main_t *rx_counter = - im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX; - last_tunnel_cache4 last4; - last_tunnel_cache6 last6; - u32 pkts_dropped = 0; - u32 thread_index = vlib_get_thread_index (); - - if (is_ip4) - clib_memset (&last4, 0xff, sizeof last4); - else - clib_memset (&last6, 0xff, sizeof last6); - - u32 *from = vlib_frame_vector_args (from_frame); - u32 n_left_from = from_frame->n_vectors; - - vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs; - vlib_get_buffers (vm, from, bufs, n_left_from); - - u32 stats_if0 = ~0, stats_if1 = ~0; - u16 nexts[VLIB_FRAME_SIZE], *next = nexts; - while (n_left_from >= 4) - { - /* Prefetch next iteration. */ - vlib_prefetch_buffer_header (b[2], LOAD); - vlib_prefetch_buffer_header (b[3], LOAD); - - /* udp leaves current_data pointing at the vxlan header */ - void *cur0 = vlib_buffer_get_current (b[0]); - void *cur1 = vlib_buffer_get_current (b[1]); - vxlan_header_t *vxlan0 = cur0; - vxlan_header_t *vxlan1 = cur1; - - - ip4_header_t *ip4_0, *ip4_1; - ip6_header_t *ip6_0, *ip6_1; - if (is_ip4) - { - ip4_0 = cur0 - sizeof (udp_header_t) - sizeof (ip4_header_t); - ip4_1 = cur1 - sizeof (udp_header_t) - sizeof (ip4_header_t); - } - else - { - ip6_0 = cur0 - sizeof (udp_header_t) - sizeof (ip6_header_t); - ip6_1 = cur1 - sizeof (udp_header_t) - sizeof (ip6_header_t); - } - - /* pop vxlan */ - vlib_buffer_advance (b[0], sizeof *vxlan0); - vlib_buffer_advance (b[1], sizeof *vxlan1); - - u32 fi0 = vlib_buffer_get_ip_fib_index (b[0], is_ip4); - u32 fi1 = vlib_buffer_get_ip_fib_index (b[1], is_ip4); - - vxlan_decap_info_t di0 = is_ip4 ? - vxlan4_find_tunnel (vxm, &last4, fi0, ip4_0, vxlan0, &stats_if0) : - vxlan6_find_tunnel (vxm, &last6, fi0, ip6_0, vxlan0, &stats_if0); - vxlan_decap_info_t di1 = is_ip4 ? - vxlan4_find_tunnel (vxm, &last4, fi1, ip4_1, vxlan1, &stats_if1) : - vxlan6_find_tunnel (vxm, &last6, fi1, ip6_1, vxlan1, &stats_if1); - - /* Prefetch next iteration. */ - clib_prefetch_load (b[2]->data); - clib_prefetch_load (b[3]->data); - - u32 len0 = vlib_buffer_length_in_chain (vm, b[0]); - u32 len1 = vlib_buffer_length_in_chain (vm, b[1]); - - next[0] = di0.next_index; - next[1] = di1.next_index; - - u8 any_error = di0.error | di1.error; - if (PREDICT_TRUE (any_error == 0)) - { - /* Required to make the l2 tag push / pop code work on l2 subifs */ - vnet_update_l2_len (b[0]); - vnet_update_l2_len (b[1]); - /* Set packet input sw_if_index to unicast VXLAN tunnel for learning */ - vnet_buffer (b[0])->sw_if_index[VLIB_RX] = di0.sw_if_index; - vnet_buffer (b[1])->sw_if_index[VLIB_RX] = di1.sw_if_index; - vlib_increment_combined_counter (rx_counter, thread_index, - stats_if0, 1, len0); - vlib_increment_combined_counter (rx_counter, thread_index, - stats_if1, 1, len1); - } - else - { - if (di0.error == 0) - { - vnet_update_l2_len (b[0]); - vnet_buffer (b[0])->sw_if_index[VLIB_RX] = di0.sw_if_index; - vlib_increment_combined_counter (rx_counter, thread_index, - stats_if0, 1, len0); - } - else - { - b[0]->error = node->errors[di0.error]; - pkts_dropped++; - } - - if (di1.error == 0) - { - vnet_update_l2_len (b[1]); - vnet_buffer (b[1])->sw_if_index[VLIB_RX] = di1.sw_if_index; - vlib_increment_combined_counter (rx_counter, thread_index, - stats_if1, 1, len1); - } - else - { - b[1]->error = node->errors[di1.error]; - pkts_dropped++; - } - } - - if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED)) - { - vxlan_rx_trace_t *tr = - vlib_add_trace (vm, node, b[0], sizeof (*tr)); - tr->next_index = next[0]; - tr->error = di0.error; - tr->tunnel_index = di0.sw_if_index == ~0 ? - ~0 : vxm->tunnel_index_by_sw_if_index[di0.sw_if_index]; - tr->vni = vnet_get_vni (vxlan0); - } - if (PREDICT_FALSE (b[1]->flags & VLIB_BUFFER_IS_TRACED)) - { - vxlan_rx_trace_t *tr = - vlib_add_trace (vm, node, b[1], sizeof (*tr)); - tr->next_index = next[1]; - tr->error = di1.error; - tr->tunnel_index = di1.sw_if_index == ~0 ? - ~0 : vxm->tunnel_index_by_sw_if_index[di1.sw_if_index]; - tr->vni = vnet_get_vni (vxlan1); - } - b += 2; - next += 2; - n_left_from -= 2; - } - - while (n_left_from > 0) - { - /* udp leaves current_data pointing at the vxlan header */ - void *cur0 = vlib_buffer_get_current (b[0]); - vxlan_header_t *vxlan0 = cur0; - ip4_header_t *ip4_0; - ip6_header_t *ip6_0; - if (is_ip4) - ip4_0 = cur0 - sizeof (udp_header_t) - sizeof (ip4_header_t); - else - ip6_0 = cur0 - sizeof (udp_header_t) - sizeof (ip6_header_t); - - /* pop (ip, udp, vxlan) */ - vlib_buffer_advance (b[0], sizeof (*vxlan0)); - - u32 fi0 = vlib_buffer_get_ip_fib_index (b[0], is_ip4); - - vxlan_decap_info_t di0 = is_ip4 ? - vxlan4_find_tunnel (vxm, &last4, fi0, ip4_0, vxlan0, &stats_if0) : - vxlan6_find_tunnel (vxm, &last6, fi0, ip6_0, vxlan0, &stats_if0); - - uword len0 = vlib_buffer_length_in_chain (vm, b[0]); - - next[0] = di0.next_index; - - /* Validate VXLAN tunnel encap-fib index against packet */ - if (di0.error == 0) - { - /* Required to make the l2 tag push / pop code work on l2 subifs */ - vnet_update_l2_len (b[0]); - - /* Set packet input sw_if_index to unicast VXLAN tunnel for learning */ - vnet_buffer (b[0])->sw_if_index[VLIB_RX] = di0.sw_if_index; - - vlib_increment_combined_counter (rx_counter, thread_index, - stats_if0, 1, len0); - } - else - { - b[0]->error = node->errors[di0.error]; - pkts_dropped++; - } - - if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED)) - { - vxlan_rx_trace_t *tr - = vlib_add_trace (vm, node, b[0], sizeof (*tr)); - tr->next_index = next[0]; - tr->error = di0.error; - tr->tunnel_index = di0.sw_if_index == ~0 ? - ~0 : vxm->tunnel_index_by_sw_if_index[di0.sw_if_index]; - tr->vni = vnet_get_vni (vxlan0); - } - b += 1; - next += 1; - n_left_from -= 1; - } - vlib_buffer_enqueue_to_next (vm, node, from, nexts, from_frame->n_vectors); - /* Do we still need this now that tunnel tx stats is kept? */ - u32 node_idx = is_ip4 ? vxlan4_input_node.index : vxlan6_input_node.index; - vlib_node_increment_counter (vm, node_idx, VXLAN_ERROR_DECAPSULATED, - from_frame->n_vectors - pkts_dropped); - - return from_frame->n_vectors; -} - -VLIB_NODE_FN (vxlan4_input_node) (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * from_frame) -{ - return vxlan_input (vm, node, from_frame, /* is_ip4 */ 1); -} - -VLIB_NODE_FN (vxlan6_input_node) (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * from_frame) -{ - return vxlan_input (vm, node, from_frame, /* is_ip4 */ 0); -} - -static char *vxlan_error_strings[] = { -#define vxlan_error(n,s) s, -#include <vnet/vxlan/vxlan_error.def> -#undef vxlan_error -}; - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (vxlan4_input_node) = -{ - .name = "vxlan4-input", - .vector_size = sizeof (u32), - .n_errors = VXLAN_N_ERROR, - .error_strings = vxlan_error_strings, - .n_next_nodes = VXLAN_INPUT_N_NEXT, - .format_trace = format_vxlan_rx_trace, - .next_nodes = { -#define _(s,n) [VXLAN_INPUT_NEXT_##s] = n, - foreach_vxlan_input_next -#undef _ - }, -}; - -VLIB_REGISTER_NODE (vxlan6_input_node) = -{ - .name = "vxlan6-input", - .vector_size = sizeof (u32), - .n_errors = VXLAN_N_ERROR, - .error_strings = vxlan_error_strings, - .n_next_nodes = VXLAN_INPUT_N_NEXT, - .next_nodes = { -#define _(s,n) [VXLAN_INPUT_NEXT_##s] = n, - foreach_vxlan_input_next -#undef _ - }, - .format_trace = format_vxlan_rx_trace, -}; -/* *INDENT-ON* */ - -typedef enum -{ - IP_VXLAN_BYPASS_NEXT_DROP, - IP_VXLAN_BYPASS_NEXT_VXLAN, - IP_VXLAN_BYPASS_N_NEXT, -} ip_vxlan_bypass_next_t; - -always_inline uword -ip_vxlan_bypass_inline (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame, u32 is_ip4) -{ - vxlan_main_t *vxm = &vxlan_main; - u32 *from, *to_next, n_left_from, n_left_to_next, next_index; - vlib_node_runtime_t *error_node = - vlib_node_get_runtime (vm, ip4_input_node.index); - vtep4_key_t last_vtep4; /* last IPv4 address / fib index - matching a local VTEP address */ - vtep6_key_t last_vtep6; /* last IPv6 address / fib index - matching a local VTEP address */ - vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs; - - last_tunnel_cache4 last4; - last_tunnel_cache6 last6; - - from = vlib_frame_vector_args (frame); - n_left_from = frame->n_vectors; - next_index = node->cached_next_index; - - vlib_get_buffers (vm, from, bufs, n_left_from); - - if (node->flags & VLIB_NODE_FLAG_TRACE) - ip4_forward_next_trace (vm, node, frame, VLIB_TX); - - if (is_ip4) - { - vtep4_key_init (&last_vtep4); - clib_memset (&last4, 0xff, sizeof last4); - } - else - { - vtep6_key_init (&last_vtep6); - clib_memset (&last6, 0xff, sizeof last6); - } - - while (n_left_from > 0) - { - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - while (n_left_from >= 4 && n_left_to_next >= 2) - { - vlib_buffer_t *b0, *b1; - ip4_header_t *ip40, *ip41; - ip6_header_t *ip60, *ip61; - udp_header_t *udp0, *udp1; - vxlan_header_t *vxlan0, *vxlan1; - u32 bi0, ip_len0, udp_len0, flags0, next0; - u32 bi1, ip_len1, udp_len1, flags1, next1; - i32 len_diff0, len_diff1; - u8 error0, good_udp0, proto0; - u8 error1, good_udp1, proto1; - u32 stats_if0 = ~0, stats_if1 = ~0; - - /* Prefetch next iteration. */ - { - vlib_prefetch_buffer_header (b[2], LOAD); - vlib_prefetch_buffer_header (b[3], LOAD); - - CLIB_PREFETCH (b[2]->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD); - CLIB_PREFETCH (b[3]->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD); - } - - bi0 = to_next[0] = from[0]; - bi1 = to_next[1] = from[1]; - from += 2; - n_left_from -= 2; - to_next += 2; - n_left_to_next -= 2; - - b0 = b[0]; - b1 = b[1]; - b += 2; - if (is_ip4) - { - ip40 = vlib_buffer_get_current (b0); - ip41 = vlib_buffer_get_current (b1); - } - else - { - ip60 = vlib_buffer_get_current (b0); - ip61 = vlib_buffer_get_current (b1); - } - - /* Setup packet for next IP feature */ - vnet_feature_next (&next0, b0); - vnet_feature_next (&next1, b1); - - if (is_ip4) - { - /* Treat IP frag packets as "experimental" protocol for now - until support of IP frag reassembly is implemented */ - proto0 = ip4_is_fragment (ip40) ? 0xfe : ip40->protocol; - proto1 = ip4_is_fragment (ip41) ? 0xfe : ip41->protocol; - } - else - { - proto0 = ip60->protocol; - proto1 = ip61->protocol; - } - - /* Process packet 0 */ - if (proto0 != IP_PROTOCOL_UDP) - goto exit0; /* not UDP packet */ - - if (is_ip4) - udp0 = ip4_next_header (ip40); - else - udp0 = ip6_next_header (ip60); - - u32 fi0 = vlib_buffer_get_ip_fib_index (b0, is_ip4); - vxlan0 = vlib_buffer_get_current (b0) + sizeof (udp_header_t) + - sizeof (ip4_header_t); - - vxlan_decap_info_t di0 = - is_ip4 ? - vxlan4_find_tunnel (vxm, &last4, fi0, ip40, vxlan0, &stats_if0) : - vxlan6_find_tunnel (vxm, &last6, fi0, ip60, vxlan0, &stats_if0); - - if (PREDICT_FALSE (di0.sw_if_index == ~0)) - goto exit0; /* unknown interface */ - - /* Validate DIP against VTEPs */ - if (is_ip4) - { -#ifdef CLIB_HAVE_VEC512 - if (!vtep4_check_vector (&vxm->vtep_table, b0, ip40, &last_vtep4, - &vxm->vtep4_u512)) -#else - if (!vtep4_check (&vxm->vtep_table, b0, ip40, &last_vtep4)) -#endif - goto exit0; /* no local VTEP for VXLAN packet */ - } - else - { - if (!vtep6_check (&vxm->vtep_table, b0, ip60, &last_vtep6)) - goto exit0; /* no local VTEP for VXLAN packet */ - } - - flags0 = b0->flags; - good_udp0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; - - /* Don't verify UDP checksum for packets with explicit zero checksum. */ - good_udp0 |= udp0->checksum == 0; - - /* Verify UDP length */ - if (is_ip4) - ip_len0 = clib_net_to_host_u16 (ip40->length); - else - ip_len0 = clib_net_to_host_u16 (ip60->payload_length); - udp_len0 = clib_net_to_host_u16 (udp0->length); - len_diff0 = ip_len0 - udp_len0; - - /* Verify UDP checksum */ - if (PREDICT_FALSE (!good_udp0)) - { - if (is_ip4) - flags0 = ip4_tcp_udp_validate_checksum (vm, b0); - else - flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, b0); - good_udp0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; - } - - if (is_ip4) - { - error0 = good_udp0 ? 0 : IP4_ERROR_UDP_CHECKSUM; - error0 = (len_diff0 >= 0) ? error0 : IP4_ERROR_UDP_LENGTH; - } - else - { - error0 = good_udp0 ? 0 : IP6_ERROR_UDP_CHECKSUM; - error0 = (len_diff0 >= 0) ? error0 : IP6_ERROR_UDP_LENGTH; - } - - next0 = error0 ? - IP_VXLAN_BYPASS_NEXT_DROP : IP_VXLAN_BYPASS_NEXT_VXLAN; - b0->error = error0 ? error_node->errors[error0] : 0; - - /* vxlan-input node expect current at VXLAN header */ - if (is_ip4) - vlib_buffer_advance (b0, - sizeof (ip4_header_t) + - sizeof (udp_header_t)); - else - vlib_buffer_advance (b0, - sizeof (ip6_header_t) + - sizeof (udp_header_t)); - - exit0: - /* Process packet 1 */ - if (proto1 != IP_PROTOCOL_UDP) - goto exit1; /* not UDP packet */ - - if (is_ip4) - udp1 = ip4_next_header (ip41); - else - udp1 = ip6_next_header (ip61); - - u32 fi1 = vlib_buffer_get_ip_fib_index (b1, is_ip4); - vxlan1 = vlib_buffer_get_current (b1) + sizeof (udp_header_t) + - sizeof (ip4_header_t); - - vxlan_decap_info_t di1 = - is_ip4 ? - vxlan4_find_tunnel (vxm, &last4, fi1, ip41, vxlan1, &stats_if1) : - vxlan6_find_tunnel (vxm, &last6, fi1, ip61, vxlan1, &stats_if1); - - if (PREDICT_FALSE (di1.sw_if_index == ~0)) - goto exit1; /* unknown interface */ - - /* Validate DIP against VTEPs */ - if (is_ip4) - { -#ifdef CLIB_HAVE_VEC512 - if (!vtep4_check_vector (&vxm->vtep_table, b1, ip41, &last_vtep4, - &vxm->vtep4_u512)) -#else - if (!vtep4_check (&vxm->vtep_table, b1, ip41, &last_vtep4)) -#endif - goto exit1; /* no local VTEP for VXLAN packet */ - } - else - { - if (!vtep6_check (&vxm->vtep_table, b1, ip61, &last_vtep6)) - goto exit1; /* no local VTEP for VXLAN packet */ - } - - flags1 = b1->flags; - good_udp1 = (flags1 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; - - /* Don't verify UDP checksum for packets with explicit zero checksum. */ - good_udp1 |= udp1->checksum == 0; - - /* Verify UDP length */ - if (is_ip4) - ip_len1 = clib_net_to_host_u16 (ip41->length); - else - ip_len1 = clib_net_to_host_u16 (ip61->payload_length); - udp_len1 = clib_net_to_host_u16 (udp1->length); - len_diff1 = ip_len1 - udp_len1; - - /* Verify UDP checksum */ - if (PREDICT_FALSE (!good_udp1)) - { - if (is_ip4) - flags1 = ip4_tcp_udp_validate_checksum (vm, b1); - else - flags1 = ip6_tcp_udp_icmp_validate_checksum (vm, b1); - good_udp1 = (flags1 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; - } - - if (is_ip4) - { - error1 = good_udp1 ? 0 : IP4_ERROR_UDP_CHECKSUM; - error1 = (len_diff1 >= 0) ? error1 : IP4_ERROR_UDP_LENGTH; - } - else - { - error1 = good_udp1 ? 0 : IP6_ERROR_UDP_CHECKSUM; - error1 = (len_diff1 >= 0) ? error1 : IP6_ERROR_UDP_LENGTH; - } - - next1 = error1 ? - IP_VXLAN_BYPASS_NEXT_DROP : IP_VXLAN_BYPASS_NEXT_VXLAN; - b1->error = error1 ? error_node->errors[error1] : 0; - - /* vxlan-input node expect current at VXLAN header */ - if (is_ip4) - vlib_buffer_advance (b1, - sizeof (ip4_header_t) + - sizeof (udp_header_t)); - else - vlib_buffer_advance (b1, - sizeof (ip6_header_t) + - sizeof (udp_header_t)); - - exit1: - vlib_validate_buffer_enqueue_x2 (vm, node, next_index, - to_next, n_left_to_next, - bi0, bi1, next0, next1); - } - - while (n_left_from > 0 && n_left_to_next > 0) - { - vlib_buffer_t *b0; - ip4_header_t *ip40; - ip6_header_t *ip60; - udp_header_t *udp0; - vxlan_header_t *vxlan0; - u32 bi0, ip_len0, udp_len0, flags0, next0; - i32 len_diff0; - u8 error0, good_udp0, proto0; - u32 stats_if0 = ~0; - - bi0 = to_next[0] = from[0]; - from += 1; - n_left_from -= 1; - to_next += 1; - n_left_to_next -= 1; - - b0 = b[0]; - b++; - if (is_ip4) - ip40 = vlib_buffer_get_current (b0); - else - ip60 = vlib_buffer_get_current (b0); - - /* Setup packet for next IP feature */ - vnet_feature_next (&next0, b0); - - if (is_ip4) - /* Treat IP4 frag packets as "experimental" protocol for now - until support of IP frag reassembly is implemented */ - proto0 = ip4_is_fragment (ip40) ? 0xfe : ip40->protocol; - else - proto0 = ip60->protocol; - - if (proto0 != IP_PROTOCOL_UDP) - goto exit; /* not UDP packet */ - - if (is_ip4) - udp0 = ip4_next_header (ip40); - else - udp0 = ip6_next_header (ip60); - - u32 fi0 = vlib_buffer_get_ip_fib_index (b0, is_ip4); - vxlan0 = vlib_buffer_get_current (b0) + sizeof (udp_header_t) + - sizeof (ip4_header_t); - - vxlan_decap_info_t di0 = - is_ip4 ? - vxlan4_find_tunnel (vxm, &last4, fi0, ip40, vxlan0, &stats_if0) : - vxlan6_find_tunnel (vxm, &last6, fi0, ip60, vxlan0, &stats_if0); - - if (PREDICT_FALSE (di0.sw_if_index == ~0)) - goto exit; /* unknown interface */ - - /* Validate DIP against VTEPs */ - if (is_ip4) - { -#ifdef CLIB_HAVE_VEC512 - if (!vtep4_check_vector (&vxm->vtep_table, b0, ip40, &last_vtep4, - &vxm->vtep4_u512)) -#else - if (!vtep4_check (&vxm->vtep_table, b0, ip40, &last_vtep4)) -#endif - goto exit; /* no local VTEP for VXLAN packet */ - } - else - { - if (!vtep6_check (&vxm->vtep_table, b0, ip60, &last_vtep6)) - goto exit; /* no local VTEP for VXLAN packet */ - } - - flags0 = b0->flags; - good_udp0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; - - /* Don't verify UDP checksum for packets with explicit zero checksum. */ - good_udp0 |= udp0->checksum == 0; - - /* Verify UDP length */ - if (is_ip4) - ip_len0 = clib_net_to_host_u16 (ip40->length); - else - ip_len0 = clib_net_to_host_u16 (ip60->payload_length); - udp_len0 = clib_net_to_host_u16 (udp0->length); - len_diff0 = ip_len0 - udp_len0; - - /* Verify UDP checksum */ - if (PREDICT_FALSE (!good_udp0)) - { - if (is_ip4) - flags0 = ip4_tcp_udp_validate_checksum (vm, b0); - else - flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, b0); - good_udp0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; - } - - if (is_ip4) - { - error0 = good_udp0 ? 0 : IP4_ERROR_UDP_CHECKSUM; - error0 = (len_diff0 >= 0) ? error0 : IP4_ERROR_UDP_LENGTH; - } - else - { - error0 = good_udp0 ? 0 : IP6_ERROR_UDP_CHECKSUM; - error0 = (len_diff0 >= 0) ? error0 : IP6_ERROR_UDP_LENGTH; - } - - next0 = error0 ? - IP_VXLAN_BYPASS_NEXT_DROP : IP_VXLAN_BYPASS_NEXT_VXLAN; - b0->error = error0 ? error_node->errors[error0] : 0; - - /* vxlan-input node expect current at VXLAN header */ - if (is_ip4) - vlib_buffer_advance (b0, - sizeof (ip4_header_t) + - sizeof (udp_header_t)); - else - vlib_buffer_advance (b0, - sizeof (ip6_header_t) + - sizeof (udp_header_t)); - - exit: - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, - bi0, next0); - } - - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - - return frame->n_vectors; -} - -VLIB_NODE_FN (ip4_vxlan_bypass_node) (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) -{ - return ip_vxlan_bypass_inline (vm, node, frame, /* is_ip4 */ 1); -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (ip4_vxlan_bypass_node) = -{ - .name = "ip4-vxlan-bypass", - .vector_size = sizeof (u32), - .n_next_nodes = IP_VXLAN_BYPASS_N_NEXT, - .next_nodes = { - [IP_VXLAN_BYPASS_NEXT_DROP] = "error-drop", - [IP_VXLAN_BYPASS_NEXT_VXLAN] = "vxlan4-input", - }, - .format_buffer = format_ip4_header, - .format_trace = format_ip4_forward_next_trace, -}; - -/* *INDENT-ON* */ - -/* Dummy init function to get us linked in. */ -static clib_error_t * -ip4_vxlan_bypass_init (vlib_main_t * vm) -{ - return 0; -} - -VLIB_INIT_FUNCTION (ip4_vxlan_bypass_init); - -VLIB_NODE_FN (ip6_vxlan_bypass_node) (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) -{ - return ip_vxlan_bypass_inline (vm, node, frame, /* is_ip4 */ 0); -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (ip6_vxlan_bypass_node) = -{ - .name = "ip6-vxlan-bypass", - .vector_size = sizeof (u32), - .n_next_nodes = IP_VXLAN_BYPASS_N_NEXT, - .next_nodes = { - [IP_VXLAN_BYPASS_NEXT_DROP] = "error-drop", - [IP_VXLAN_BYPASS_NEXT_VXLAN] = "vxlan6-input", - }, - .format_buffer = format_ip6_header, - .format_trace = format_ip6_forward_next_trace, -}; - -/* *INDENT-ON* */ - -/* Dummy init function to get us linked in. */ -static clib_error_t * -ip6_vxlan_bypass_init (vlib_main_t * vm) -{ - return 0; -} - -VLIB_INIT_FUNCTION (ip6_vxlan_bypass_init); - -#define foreach_vxlan_flow_input_next \ -_(DROP, "error-drop") \ -_(L2_INPUT, "l2-input") - -typedef enum -{ -#define _(s,n) VXLAN_FLOW_NEXT_##s, - foreach_vxlan_flow_input_next -#undef _ - VXLAN_FLOW_N_NEXT, -} vxlan_flow_input_next_t; - -#define foreach_vxlan_flow_error \ - _(NONE, "no error") \ - _(IP_CHECKSUM_ERROR, "Rx ip checksum errors") \ - _(IP_HEADER_ERROR, "Rx ip header errors") \ - _(UDP_CHECKSUM_ERROR, "Rx udp checksum errors") \ - _(UDP_LENGTH_ERROR, "Rx udp length errors") - -typedef enum -{ -#define _(f,s) VXLAN_FLOW_ERROR_##f, - foreach_vxlan_flow_error -#undef _ - VXLAN_FLOW_N_ERROR, -} vxlan_flow_error_t; - -static char *vxlan_flow_error_strings[] = { -#define _(n,s) s, - foreach_vxlan_flow_error -#undef _ -}; - - -static_always_inline u8 -vxlan_validate_udp_csum (vlib_main_t * vm, vlib_buffer_t * b) -{ - u32 flags = b->flags; - enum - { offset = - sizeof (ip4_header_t) + sizeof (udp_header_t) + sizeof (vxlan_header_t), - }; - - /* Verify UDP checksum */ - if ((flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED) == 0) - { - vlib_buffer_advance (b, -offset); - flags = ip4_tcp_udp_validate_checksum (vm, b); - vlib_buffer_advance (b, offset); - } - - return (flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; -} - -static_always_inline u8 -vxlan_check_udp_csum (vlib_main_t * vm, vlib_buffer_t * b) -{ - ip4_vxlan_header_t *hdr = vlib_buffer_get_current (b) - sizeof *hdr; - udp_header_t *udp = &hdr->udp; - /* Don't verify UDP checksum for packets with explicit zero checksum. */ - u8 good_csum = (b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0 || - udp->checksum == 0; - - return !good_csum; -} - -static_always_inline u8 -vxlan_check_ip (vlib_buffer_t * b, u16 payload_len) -{ - ip4_vxlan_header_t *hdr = vlib_buffer_get_current (b) - sizeof *hdr; - u16 ip_len = clib_net_to_host_u16 (hdr->ip4.length); - u16 expected = payload_len + sizeof *hdr; - return ip_len > expected || hdr->ip4.ttl == 0 - || hdr->ip4.ip_version_and_header_length != 0x45; -} - -static_always_inline u8 -vxlan_check_ip_udp_len (vlib_buffer_t * b) -{ - ip4_vxlan_header_t *hdr = vlib_buffer_get_current (b) - sizeof *hdr; - u16 ip_len = clib_net_to_host_u16 (hdr->ip4.length); - u16 udp_len = clib_net_to_host_u16 (hdr->udp.length); - return udp_len > ip_len; -} - -static_always_inline u8 -vxlan_err_code (u8 ip_err0, u8 udp_err0, u8 csum_err0) -{ - u8 error0 = VXLAN_FLOW_ERROR_NONE; - if (ip_err0) - error0 = VXLAN_FLOW_ERROR_IP_HEADER_ERROR; - if (udp_err0) - error0 = VXLAN_FLOW_ERROR_UDP_LENGTH_ERROR; - if (csum_err0) - error0 = VXLAN_FLOW_ERROR_UDP_CHECKSUM_ERROR; - return error0; -} - -VLIB_NODE_FN (vxlan4_flow_input_node) (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * f) -{ - enum - { payload_offset = sizeof (ip4_vxlan_header_t) }; - - vxlan_main_t *vxm = &vxlan_main; - vnet_interface_main_t *im = &vnet_main.interface_main; - vlib_combined_counter_main_t *rx_counter[VXLAN_FLOW_N_NEXT] = { - [VXLAN_FLOW_NEXT_DROP] = - im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_DROP, - [VXLAN_FLOW_NEXT_L2_INPUT] = - im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, - }; - u32 thread_index = vlib_get_thread_index (); - - u32 *from = vlib_frame_vector_args (f); - u32 n_left_from = f->n_vectors; - u32 next_index = VXLAN_FLOW_NEXT_L2_INPUT; - - while (n_left_from > 0) - { - u32 n_left_to_next, *to_next; - - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - while (n_left_from > 3 && n_left_to_next > 3) - { - u32 bi0 = to_next[0] = from[0]; - u32 bi1 = to_next[1] = from[1]; - u32 bi2 = to_next[2] = from[2]; - u32 bi3 = to_next[3] = from[3]; - from += 4; - n_left_from -= 4; - to_next += 4; - n_left_to_next -= 4; - - vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0); - vlib_buffer_t *b1 = vlib_get_buffer (vm, bi1); - vlib_buffer_t *b2 = vlib_get_buffer (vm, bi2); - vlib_buffer_t *b3 = vlib_get_buffer (vm, bi3); - - vlib_buffer_advance (b0, payload_offset); - vlib_buffer_advance (b1, payload_offset); - vlib_buffer_advance (b2, payload_offset); - vlib_buffer_advance (b3, payload_offset); - - u16 len0 = vlib_buffer_length_in_chain (vm, b0); - u16 len1 = vlib_buffer_length_in_chain (vm, b1); - u16 len2 = vlib_buffer_length_in_chain (vm, b2); - u16 len3 = vlib_buffer_length_in_chain (vm, b3); - - u32 next0 = VXLAN_FLOW_NEXT_L2_INPUT, next1 = - VXLAN_FLOW_NEXT_L2_INPUT, next2 = - VXLAN_FLOW_NEXT_L2_INPUT, next3 = VXLAN_FLOW_NEXT_L2_INPUT; - - u8 ip_err0 = vxlan_check_ip (b0, len0); - u8 ip_err1 = vxlan_check_ip (b1, len1); - u8 ip_err2 = vxlan_check_ip (b2, len2); - u8 ip_err3 = vxlan_check_ip (b3, len3); - u8 ip_err = ip_err0 | ip_err1 | ip_err2 | ip_err3; - - u8 udp_err0 = vxlan_check_ip_udp_len (b0); - u8 udp_err1 = vxlan_check_ip_udp_len (b1); - u8 udp_err2 = vxlan_check_ip_udp_len (b2); - u8 udp_err3 = vxlan_check_ip_udp_len (b3); - u8 udp_err = udp_err0 | udp_err1 | udp_err2 | udp_err3; - - u8 csum_err0 = vxlan_check_udp_csum (vm, b0); - u8 csum_err1 = vxlan_check_udp_csum (vm, b1); - u8 csum_err2 = vxlan_check_udp_csum (vm, b2); - u8 csum_err3 = vxlan_check_udp_csum (vm, b3); - u8 csum_err = csum_err0 | csum_err1 | csum_err2 | csum_err3; - - if (PREDICT_FALSE (csum_err)) - { - if (csum_err0) - csum_err0 = !vxlan_validate_udp_csum (vm, b0); - if (csum_err1) - csum_err1 = !vxlan_validate_udp_csum (vm, b1); - if (csum_err2) - csum_err2 = !vxlan_validate_udp_csum (vm, b2); - if (csum_err3) - csum_err3 = !vxlan_validate_udp_csum (vm, b3); - csum_err = csum_err0 | csum_err1 | csum_err2 | csum_err3; - } - - if (PREDICT_FALSE (ip_err || udp_err || csum_err)) - { - if (ip_err0 || udp_err0 || csum_err0) - { - next0 = VXLAN_FLOW_NEXT_DROP; - u8 error0 = vxlan_err_code (ip_err0, udp_err0, csum_err0); - b0->error = node->errors[error0]; - } - if (ip_err1 || udp_err1 || csum_err1) - { - next1 = VXLAN_FLOW_NEXT_DROP; - u8 error1 = vxlan_err_code (ip_err1, udp_err1, csum_err1); - b1->error = node->errors[error1]; - } - if (ip_err2 || udp_err2 || csum_err2) - { - next2 = VXLAN_FLOW_NEXT_DROP; - u8 error2 = vxlan_err_code (ip_err2, udp_err2, csum_err2); - b2->error = node->errors[error2]; - } - if (ip_err3 || udp_err3 || csum_err3) - { - next3 = VXLAN_FLOW_NEXT_DROP; - u8 error3 = vxlan_err_code (ip_err3, udp_err3, csum_err3); - b3->error = node->errors[error3]; - } - } - - vnet_update_l2_len (b0); - vnet_update_l2_len (b1); - vnet_update_l2_len (b2); - vnet_update_l2_len (b3); - - ASSERT (b0->flow_id != 0); - ASSERT (b1->flow_id != 0); - ASSERT (b2->flow_id != 0); - ASSERT (b3->flow_id != 0); - - u32 t_index0 = b0->flow_id - vxm->flow_id_start; - u32 t_index1 = b1->flow_id - vxm->flow_id_start; - u32 t_index2 = b2->flow_id - vxm->flow_id_start; - u32 t_index3 = b3->flow_id - vxm->flow_id_start; - - vxlan_tunnel_t *t0 = &vxm->tunnels[t_index0]; - vxlan_tunnel_t *t1 = &vxm->tunnels[t_index1]; - vxlan_tunnel_t *t2 = &vxm->tunnels[t_index2]; - vxlan_tunnel_t *t3 = &vxm->tunnels[t_index3]; - - /* flow id consumed */ - b0->flow_id = 0; - b1->flow_id = 0; - b2->flow_id = 0; - b3->flow_id = 0; - - u32 sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX] = - t0->sw_if_index; - u32 sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX] = - t1->sw_if_index; - u32 sw_if_index2 = vnet_buffer (b2)->sw_if_index[VLIB_RX] = - t2->sw_if_index; - u32 sw_if_index3 = vnet_buffer (b3)->sw_if_index[VLIB_RX] = - t3->sw_if_index; - - vlib_increment_combined_counter (rx_counter[next0], thread_index, - sw_if_index0, 1, len0); - vlib_increment_combined_counter (rx_counter[next1], thread_index, - sw_if_index1, 1, len1); - vlib_increment_combined_counter (rx_counter[next2], thread_index, - sw_if_index2, 1, len2); - vlib_increment_combined_counter (rx_counter[next3], thread_index, - sw_if_index3, 1, len3); - - u32 flags = b0->flags | b1->flags | b2->flags | b3->flags; - - if (PREDICT_FALSE (flags & VLIB_BUFFER_IS_TRACED)) - { - if (b0->flags & VLIB_BUFFER_IS_TRACED) - { - vxlan_rx_trace_t *tr = - vlib_add_trace (vm, node, b0, sizeof *tr); - u8 error0 = vxlan_err_code (ip_err0, udp_err0, csum_err0); - tr->next_index = next0; - tr->error = error0; - tr->tunnel_index = t_index0; - tr->vni = t0->vni; - } - if (b1->flags & VLIB_BUFFER_IS_TRACED) - { - vxlan_rx_trace_t *tr = - vlib_add_trace (vm, node, b1, sizeof *tr); - u8 error1 = vxlan_err_code (ip_err1, udp_err1, csum_err1); - tr->next_index = next1; - tr->error = error1; - tr->tunnel_index = t_index1; - tr->vni = t1->vni; - } - if (b2->flags & VLIB_BUFFER_IS_TRACED) - { - vxlan_rx_trace_t *tr = - vlib_add_trace (vm, node, b2, sizeof *tr); - u8 error2 = vxlan_err_code (ip_err2, udp_err2, csum_err2); - tr->next_index = next2; - tr->error = error2; - tr->tunnel_index = t_index2; - tr->vni = t2->vni; - } - if (b3->flags & VLIB_BUFFER_IS_TRACED) - { - vxlan_rx_trace_t *tr = - vlib_add_trace (vm, node, b3, sizeof *tr); - u8 error3 = vxlan_err_code (ip_err3, udp_err3, csum_err3); - tr->next_index = next3; - tr->error = error3; - tr->tunnel_index = t_index3; - tr->vni = t3->vni; - } - } - vlib_validate_buffer_enqueue_x4 - (vm, node, next_index, to_next, n_left_to_next, - bi0, bi1, bi2, bi3, next0, next1, next2, next3); - } - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 bi0 = to_next[0] = from[0]; - from++; - n_left_from--; - to_next++; - n_left_to_next--; - - vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0); - vlib_buffer_advance (b0, payload_offset); - - u16 len0 = vlib_buffer_length_in_chain (vm, b0); - u32 next0 = VXLAN_FLOW_NEXT_L2_INPUT; - - u8 ip_err0 = vxlan_check_ip (b0, len0); - u8 udp_err0 = vxlan_check_ip_udp_len (b0); - u8 csum_err0 = vxlan_check_udp_csum (vm, b0); - - if (csum_err0) - csum_err0 = !vxlan_validate_udp_csum (vm, b0); - if (ip_err0 || udp_err0 || csum_err0) - { - next0 = VXLAN_FLOW_NEXT_DROP; - u8 error0 = vxlan_err_code (ip_err0, udp_err0, csum_err0); - b0->error = node->errors[error0]; - } - - vnet_update_l2_len (b0); - - ASSERT (b0->flow_id != 0); - u32 t_index0 = b0->flow_id - vxm->flow_id_start; - vxlan_tunnel_t *t0 = &vxm->tunnels[t_index0]; - b0->flow_id = 0; - - u32 sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX] = - t0->sw_if_index; - vlib_increment_combined_counter (rx_counter[next0], thread_index, - sw_if_index0, 1, len0); - - if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) - { - vxlan_rx_trace_t *tr = - vlib_add_trace (vm, node, b0, sizeof *tr); - u8 error0 = vxlan_err_code (ip_err0, udp_err0, csum_err0); - tr->next_index = next0; - tr->error = error0; - tr->tunnel_index = t_index0; - tr->vni = t0->vni; - } - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, - bi0, next0); - } - - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - - return f->n_vectors; -} - -/* *INDENT-OFF* */ -#ifndef CLIB_MULTIARCH_VARIANT -VLIB_REGISTER_NODE (vxlan4_flow_input_node) = { - .name = "vxlan-flow-input", - .type = VLIB_NODE_TYPE_INTERNAL, - .vector_size = sizeof (u32), - - .format_trace = format_vxlan_rx_trace, - - .n_errors = VXLAN_FLOW_N_ERROR, - .error_strings = vxlan_flow_error_strings, - - .n_next_nodes = VXLAN_FLOW_N_NEXT, - .next_nodes = { -#define _(s,n) [VXLAN_FLOW_NEXT_##s] = n, - foreach_vxlan_flow_input_next -#undef _ - }, -}; -#endif -/* *INDENT-ON* */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/vxlan/dir.dox b/src/vnet/vxlan/dir.dox deleted file mode 100644 index 31a9e2b6112..00000000000 --- a/src/vnet/vxlan/dir.dox +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Copyright (c) 2016 Cisco and/or its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** -@dir -@brief VXLAN Code. - -This directory contains source code to support VXLAN. - -*/ -/*? %%clicmd:group_label VXLAN CLI %% ?*/ diff --git a/src/vnet/vxlan/encap.c b/src/vnet/vxlan/encap.c deleted file mode 100644 index 0961a27942d..00000000000 --- a/src/vnet/vxlan/encap.c +++ /dev/null @@ -1,540 +0,0 @@ - -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include <vppinfra/error.h> -#include <vppinfra/hash.h> -#include <vnet/vnet.h> -#include <vnet/ip/ip.h> -#include <vnet/ethernet/ethernet.h> -#include <vnet/interface_output.h> -#include <vnet/vxlan/vxlan.h> -#include <vnet/qos/qos_types.h> -#include <vnet/adj/rewrite.h> - -/* Statistics (not all errors) */ -#define foreach_vxlan_encap_error \ -_(ENCAPSULATED, "good packets encapsulated") - -static char *vxlan_encap_error_strings[] = { -#define _(sym,string) string, - foreach_vxlan_encap_error -#undef _ -}; - -typedef enum -{ -#define _(sym,str) VXLAN_ENCAP_ERROR_##sym, - foreach_vxlan_encap_error -#undef _ - VXLAN_ENCAP_N_ERROR, -} vxlan_encap_error_t; - -typedef enum -{ - VXLAN_ENCAP_NEXT_DROP, - VXLAN_ENCAP_N_NEXT, -} vxlan_encap_next_t; - -typedef struct -{ - u32 tunnel_index; - u32 vni; -} vxlan_encap_trace_t; - -#ifndef CLIB_MARCH_VARIANT -u8 * -format_vxlan_encap_trace (u8 * s, va_list * args) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); - vxlan_encap_trace_t *t = va_arg (*args, vxlan_encap_trace_t *); - - s = format (s, "VXLAN encap to vxlan_tunnel%d vni %d", - t->tunnel_index, t->vni); - return s; -} -#endif - -always_inline uword -vxlan_encap_inline (vlib_main_t *vm, vlib_node_runtime_t *node, - vlib_frame_t *from_frame, u8 is_ip4) -{ - u32 n_left_from, next_index, *from, *to_next; - vxlan_main_t *vxm = &vxlan_main; - vnet_main_t *vnm = vxm->vnet_main; - vnet_interface_main_t *im = &vnm->interface_main; - vlib_combined_counter_main_t *tx_counter = - im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX; - u32 pkts_encapsulated = 0; - u32 thread_index = vlib_get_thread_index (); - u32 sw_if_index0 = 0, sw_if_index1 = 0; - u32 next0 = 0, next1 = 0; - vxlan_tunnel_t *t0 = NULL, *t1 = NULL; - index_t dpoi_idx0 = INDEX_INVALID, dpoi_idx1 = INDEX_INVALID; - vlib_buffer_t *bufs[VLIB_FRAME_SIZE]; - vlib_buffer_t **b = bufs; - - from = vlib_frame_vector_args (from_frame); - n_left_from = from_frame->n_vectors; - - next_index = node->cached_next_index; - - STATIC_ASSERT_SIZEOF (ip6_vxlan_header_t, 56); - STATIC_ASSERT_SIZEOF (ip4_vxlan_header_t, 36); - - u8 const underlay_hdr_len = is_ip4 ? - sizeof (ip4_vxlan_header_t) : sizeof (ip6_vxlan_header_t); - u16 const l3_len = is_ip4 ? sizeof (ip4_header_t) : sizeof (ip6_header_t); - u32 const outer_packet_csum_offload_flags = - is_ip4 ? (VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM | - VNET_BUFFER_OFFLOAD_F_TNL_VXLAN) : - (VNET_BUFFER_OFFLOAD_F_OUTER_UDP_CKSUM | - VNET_BUFFER_OFFLOAD_F_TNL_VXLAN); - - vlib_get_buffers (vm, from, bufs, n_left_from); - - while (n_left_from > 0) - { - u32 n_left_to_next; - - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - while (n_left_from >= 4 && n_left_to_next >= 2) - { - /* Prefetch next iteration. */ - { - vlib_prefetch_buffer_header (b[2], LOAD); - vlib_prefetch_buffer_header (b[3], LOAD); - - CLIB_PREFETCH (b[2]->data - CLIB_CACHE_LINE_BYTES, - 2 * CLIB_CACHE_LINE_BYTES, LOAD); - CLIB_PREFETCH (b[3]->data - CLIB_CACHE_LINE_BYTES, - 2 * CLIB_CACHE_LINE_BYTES, LOAD); - } - - u32 bi0 = to_next[0] = from[0]; - u32 bi1 = to_next[1] = from[1]; - from += 2; - to_next += 2; - n_left_to_next -= 2; - n_left_from -= 2; - - vlib_buffer_t *b0 = b[0]; - vlib_buffer_t *b1 = b[1]; - b += 2; - - u32 flow_hash0 = vnet_l2_compute_flow_hash (b0); - u32 flow_hash1 = vnet_l2_compute_flow_hash (b1); - - /* Get next node index and adj index from tunnel next_dpo */ - if (sw_if_index0 != vnet_buffer (b0)->sw_if_index[VLIB_TX]) - { - sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX]; - vnet_hw_interface_t *hi0 = - vnet_get_sup_hw_interface (vnm, sw_if_index0); - t0 = &vxm->tunnels[hi0->dev_instance]; - /* Note: change to always set next0 if it may set to drop */ - next0 = t0->next_dpo.dpoi_next_node; - dpoi_idx0 = t0->next_dpo.dpoi_index; - } - - /* Get next node index and adj index from tunnel next_dpo */ - if (sw_if_index1 != vnet_buffer (b1)->sw_if_index[VLIB_TX]) - { - if (sw_if_index0 == vnet_buffer (b1)->sw_if_index[VLIB_TX]) - { - sw_if_index1 = sw_if_index0; - t1 = t0; - next1 = next0; - dpoi_idx1 = dpoi_idx0; - } - else - { - sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_TX]; - vnet_hw_interface_t *hi1 = - vnet_get_sup_hw_interface (vnm, sw_if_index1); - t1 = &vxm->tunnels[hi1->dev_instance]; - /* Note: change to always set next1 if it may set to drop */ - next1 = t1->next_dpo.dpoi_next_node; - dpoi_idx1 = t1->next_dpo.dpoi_index; - } - } - - vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpoi_idx0; - vnet_buffer (b1)->ip.adj_index[VLIB_TX] = dpoi_idx1; - - ASSERT (t0->rewrite_header.data_bytes == underlay_hdr_len); - ASSERT (t1->rewrite_header.data_bytes == underlay_hdr_len); - vnet_rewrite_two_headers (*t0, *t1, vlib_buffer_get_current (b0), - vlib_buffer_get_current (b1), - underlay_hdr_len); - - vlib_buffer_advance (b0, -underlay_hdr_len); - vlib_buffer_advance (b1, -underlay_hdr_len); - - u32 len0 = vlib_buffer_length_in_chain (vm, b0); - u32 len1 = vlib_buffer_length_in_chain (vm, b1); - u16 payload_l0 = clib_host_to_net_u16 (len0 - l3_len); - u16 payload_l1 = clib_host_to_net_u16 (len1 - l3_len); - - void *underlay0 = vlib_buffer_get_current (b0); - void *underlay1 = vlib_buffer_get_current (b1); - - ip4_header_t *ip4_0, *ip4_1; - qos_bits_t ip4_0_tos = 0, ip4_1_tos = 0; - ip6_header_t *ip6_0, *ip6_1; - udp_header_t *udp0, *udp1; - u8 *l3_0, *l3_1; - if (is_ip4) - { - ip4_vxlan_header_t *hdr0 = underlay0; - ip4_vxlan_header_t *hdr1 = underlay1; - - /* Fix the IP4 checksum and length */ - ip4_0 = &hdr0->ip4; - ip4_1 = &hdr1->ip4; - ip4_0->length = clib_host_to_net_u16 (len0); - ip4_1->length = clib_host_to_net_u16 (len1); - - if (PREDICT_FALSE (b0->flags & VNET_BUFFER_F_QOS_DATA_VALID)) - { - ip4_0_tos = vnet_buffer2 (b0)->qos.bits; - ip4_0->tos = ip4_0_tos; - } - if (PREDICT_FALSE (b1->flags & VNET_BUFFER_F_QOS_DATA_VALID)) - { - ip4_1_tos = vnet_buffer2 (b1)->qos.bits; - ip4_1->tos = ip4_1_tos; - } - - l3_0 = (u8 *) ip4_0; - l3_1 = (u8 *) ip4_1; - udp0 = &hdr0->udp; - udp1 = &hdr1->udp; - } - else /* ipv6 */ - { - ip6_vxlan_header_t *hdr0 = underlay0; - ip6_vxlan_header_t *hdr1 = underlay1; - - /* Fix IP6 payload length */ - ip6_0 = &hdr0->ip6; - ip6_1 = &hdr1->ip6; - ip6_0->payload_length = payload_l0; - ip6_1->payload_length = payload_l1; - - l3_0 = (u8 *) ip6_0; - l3_1 = (u8 *) ip6_1; - udp0 = &hdr0->udp; - udp1 = &hdr1->udp; - } - - /* Fix UDP length and set source port */ - udp0->length = payload_l0; - udp0->src_port = flow_hash0; - udp1->length = payload_l1; - udp1->src_port = flow_hash1; - - if (b0->flags & VNET_BUFFER_F_OFFLOAD) - { - vnet_buffer2 (b0)->outer_l3_hdr_offset = l3_0 - b0->data; - vnet_buffer2 (b0)->outer_l4_hdr_offset = (u8 *) udp0 - b0->data; - vnet_buffer_offload_flags_set (b0, - outer_packet_csum_offload_flags); - } - /* IPv4 checksum only */ - else if (is_ip4) - { - ip_csum_t sum0 = ip4_0->checksum; - sum0 = ip_csum_update (sum0, 0, ip4_0->length, ip4_header_t, - length /* changed member */); - if (PREDICT_FALSE (ip4_0_tos)) - { - sum0 = ip_csum_update (sum0, 0, ip4_0_tos, ip4_header_t, - tos /* changed member */); - } - ip4_0->checksum = ip_csum_fold (sum0); - } - /* IPv6 UDP checksum is mandatory */ - else - { - int bogus = 0; - - udp0->checksum = - ip6_tcp_udp_icmp_compute_checksum (vm, b0, ip6_0, &bogus); - ASSERT (bogus == 0); - if (udp0->checksum == 0) - udp0->checksum = 0xffff; - } - - if (b1->flags & VNET_BUFFER_F_OFFLOAD) - { - vnet_buffer2 (b1)->outer_l3_hdr_offset = l3_1 - b1->data; - vnet_buffer2 (b1)->outer_l4_hdr_offset = (u8 *) udp1 - b1->data; - vnet_buffer_offload_flags_set (b1, - outer_packet_csum_offload_flags); - } - /* IPv4 checksum only */ - else if (is_ip4) - { - ip_csum_t sum1 = ip4_1->checksum; - sum1 = ip_csum_update (sum1, 0, ip4_1->length, ip4_header_t, - length /* changed member */); - if (PREDICT_FALSE (ip4_1_tos)) - { - sum1 = ip_csum_update (sum1, 0, ip4_1_tos, ip4_header_t, - tos /* changed member */); - } - ip4_1->checksum = ip_csum_fold (sum1); - } - /* IPv6 UDP checksum is mandatory */ - else - { - int bogus = 0; - - udp1->checksum = ip6_tcp_udp_icmp_compute_checksum - (vm, b1, ip6_1, &bogus); - ASSERT (bogus == 0); - if (udp1->checksum == 0) - udp1->checksum = 0xffff; - } - - /* save inner packet flow_hash for load-balance node */ - vnet_buffer (b0)->ip.flow_hash = flow_hash0; - vnet_buffer (b1)->ip.flow_hash = flow_hash1; - - if (sw_if_index0 == sw_if_index1) - { - vlib_increment_combined_counter (tx_counter, thread_index, - sw_if_index0, 2, len0 + len1); - } - else - { - vlib_increment_combined_counter (tx_counter, thread_index, - sw_if_index0, 1, len0); - vlib_increment_combined_counter (tx_counter, thread_index, - sw_if_index1, 1, len1); - } - pkts_encapsulated += 2; - - if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) - { - vxlan_encap_trace_t *tr = - vlib_add_trace (vm, node, b0, sizeof (*tr)); - tr->tunnel_index = t0 - vxm->tunnels; - tr->vni = t0->vni; - } - - if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED)) - { - vxlan_encap_trace_t *tr = - vlib_add_trace (vm, node, b1, sizeof (*tr)); - tr->tunnel_index = t1 - vxm->tunnels; - tr->vni = t1->vni; - } - - vlib_validate_buffer_enqueue_x2 (vm, node, next_index, - to_next, n_left_to_next, - bi0, bi1, next0, next1); - } - - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 bi0 = to_next[0] = from[0]; - from += 1; - to_next += 1; - n_left_from -= 1; - n_left_to_next -= 1; - - vlib_buffer_t *b0 = b[0]; - b += 1; - - u32 flow_hash0 = vnet_l2_compute_flow_hash (b0); - - /* Get next node index and adj index from tunnel next_dpo */ - if (sw_if_index0 != vnet_buffer (b0)->sw_if_index[VLIB_TX]) - { - sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX]; - vnet_hw_interface_t *hi0 = - vnet_get_sup_hw_interface (vnm, sw_if_index0); - t0 = &vxm->tunnels[hi0->dev_instance]; - /* Note: change to always set next0 if it may be set to drop */ - next0 = t0->next_dpo.dpoi_next_node; - dpoi_idx0 = t0->next_dpo.dpoi_index; - } - vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpoi_idx0; - - ASSERT (t0->rewrite_header.data_bytes == underlay_hdr_len); - vnet_rewrite_one_header (*t0, vlib_buffer_get_current (b0), - underlay_hdr_len); - - vlib_buffer_advance (b0, -underlay_hdr_len); - void *underlay0 = vlib_buffer_get_current (b0); - - u32 len0 = vlib_buffer_length_in_chain (vm, b0); - u16 payload_l0 = clib_host_to_net_u16 (len0 - l3_len); - - udp_header_t *udp0; - ip4_header_t *ip4_0; - qos_bits_t ip4_0_tos = 0; - ip6_header_t *ip6_0; - u8 *l3_0; - if (is_ip4) - { - ip4_vxlan_header_t *hdr = underlay0; - - /* Fix the IP4 checksum and length */ - ip4_0 = &hdr->ip4; - ip4_0->length = clib_host_to_net_u16 (len0); - - if (PREDICT_FALSE (b0->flags & VNET_BUFFER_F_QOS_DATA_VALID)) - { - ip4_0_tos = vnet_buffer2 (b0)->qos.bits; - ip4_0->tos = ip4_0_tos; - } - - l3_0 = (u8 *) ip4_0; - udp0 = &hdr->udp; - } - else /* ip6 path */ - { - ip6_vxlan_header_t *hdr = underlay0; - - /* Fix IP6 payload length */ - ip6_0 = &hdr->ip6; - ip6_0->payload_length = payload_l0; - - l3_0 = (u8 *) ip6_0; - udp0 = &hdr->udp; - } - - /* Fix UDP length and set source port */ - udp0->length = payload_l0; - udp0->src_port = flow_hash0; - - if (b0->flags & VNET_BUFFER_F_OFFLOAD) - { - vnet_buffer2 (b0)->outer_l3_hdr_offset = l3_0 - b0->data; - vnet_buffer2 (b0)->outer_l4_hdr_offset = (u8 *) udp0 - b0->data; - vnet_buffer_offload_flags_set (b0, - outer_packet_csum_offload_flags); - } - /* IPv4 checksum only */ - else if (is_ip4) - { - ip_csum_t sum0 = ip4_0->checksum; - sum0 = ip_csum_update (sum0, 0, ip4_0->length, ip4_header_t, - length /* changed member */); - if (PREDICT_FALSE (ip4_0_tos)) - { - sum0 = ip_csum_update (sum0, 0, ip4_0_tos, ip4_header_t, - tos /* changed member */); - } - ip4_0->checksum = ip_csum_fold (sum0); - } - /* IPv6 UDP checksum is mandatory */ - else - { - int bogus = 0; - - udp0->checksum = ip6_tcp_udp_icmp_compute_checksum - (vm, b0, ip6_0, &bogus); - ASSERT (bogus == 0); - if (udp0->checksum == 0) - udp0->checksum = 0xffff; - } - - /* reuse inner packet flow_hash for load-balance node */ - vnet_buffer (b0)->ip.flow_hash = flow_hash0; - - vlib_increment_combined_counter (tx_counter, thread_index, - sw_if_index0, 1, len0); - pkts_encapsulated++; - - if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) - { - vxlan_encap_trace_t *tr = - vlib_add_trace (vm, node, b0, sizeof (*tr)); - tr->tunnel_index = t0 - vxm->tunnels; - tr->vni = t0->vni; - } - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, - bi0, next0); - } - - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - - /* Do we still need this now that tunnel tx stats is kept? */ - vlib_node_increment_counter (vm, node->node_index, - VXLAN_ENCAP_ERROR_ENCAPSULATED, - pkts_encapsulated); - - return from_frame->n_vectors; -} - -VLIB_NODE_FN (vxlan4_encap_node) (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * from_frame) -{ - /* Disable chksum offload as setup overhead in tx node is not worthwhile - for ip4 header checksum only, unless udp checksum is also required */ - return vxlan_encap_inline (vm, node, from_frame, /* is_ip4 */ 1); -} - -VLIB_NODE_FN (vxlan6_encap_node) (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * from_frame) -{ - /* Enable checksum offload for ip6 as udp checksum is mandatory, */ - return vxlan_encap_inline (vm, node, from_frame, /* is_ip4 */ 0); -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (vxlan4_encap_node) = { - .name = "vxlan4-encap", - .vector_size = sizeof (u32), - .format_trace = format_vxlan_encap_trace, - .type = VLIB_NODE_TYPE_INTERNAL, - .n_errors = ARRAY_LEN(vxlan_encap_error_strings), - .error_strings = vxlan_encap_error_strings, - .n_next_nodes = VXLAN_ENCAP_N_NEXT, - .next_nodes = { - [VXLAN_ENCAP_NEXT_DROP] = "error-drop", - }, -}; - -VLIB_REGISTER_NODE (vxlan6_encap_node) = { - .name = "vxlan6-encap", - .vector_size = sizeof (u32), - .format_trace = format_vxlan_encap_trace, - .type = VLIB_NODE_TYPE_INTERNAL, - .n_errors = ARRAY_LEN(vxlan_encap_error_strings), - .error_strings = vxlan_encap_error_strings, - .n_next_nodes = VXLAN_ENCAP_N_NEXT, - .next_nodes = { - [VXLAN_ENCAP_NEXT_DROP] = "error-drop", - }, -}; -/* *INDENT-ON* */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/vxlan/vxlan.api b/src/vnet/vxlan/vxlan.api deleted file mode 100644 index b7e678595d8..00000000000 --- a/src/vnet/vxlan/vxlan.api +++ /dev/null @@ -1,198 +0,0 @@ -/* - * Copyright (c) 2015-2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -option version = "2.1.0"; - -import "vnet/interface_types.api"; -import "vnet/ip/ip_types.api"; - -/** \brief Create or delete a VXLAN tunnel - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param is_add - Use 1 to create the tunnel, 0 to remove it - @param instance - optional unique custom device instance, else ~0. - @param src_address - Source IP address - @param dst_address - Destination IP address, can be multicast - @param mcast_sw_if_index - Interface for multicast destination - @param encap_vrf_id - Encap route table FIB index - @param decap_next_index - index of decap next graph node - @param vni - The VXLAN Network Identifier, uint24 -*/ -define vxlan_add_del_tunnel -{ - u32 client_index; - u32 context; - bool is_add [default=true]; - u32 instance; /* If non-~0, specifies a custom dev instance */ - vl_api_address_t src_address; - vl_api_address_t dst_address; - vl_api_interface_index_t mcast_sw_if_index; - u32 encap_vrf_id; - u32 decap_next_index; - u32 vni; -}; - -/** \brief Create or delete a VXLAN tunnel - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param is_add - Use 1 to create the tunnel, 0 to remove it - @param instance - optional unique custom device instance, else ~0. - @param src_address - Source IP address - @param dst_address - Destination IP address, can be multicast - @param src_port - Source UDP port. It is not included in sent packets. Used only for port registration - @param dst_port - Destination UDP port - @param mcast_sw_if_index - Interface for multicast destination - @param encap_vrf_id - Encap route table FIB index - @param decap_next_index - index of decap next graph node - @param vni - The VXLAN Network Identifier, uint24 -*/ -define vxlan_add_del_tunnel_v2 -{ - u32 client_index; - u32 context; - bool is_add [default=true]; - u32 instance [default=0xffffffff]; /* If non-~0, specifies a custom dev instance */ - vl_api_address_t src_address; - vl_api_address_t dst_address; - u16 src_port; - u16 dst_port; - vl_api_interface_index_t mcast_sw_if_index; - u32 encap_vrf_id; - u32 decap_next_index; - u32 vni; -}; - -/** \brief Create or delete a VXLAN tunnel - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param is_add - Use 1 to create the tunnel, 0 to remove it - @param instance - optional unique custom device instance, else ~0. - @param src_address - Source IP address - @param dst_address - Destination IP address, can be multicast - @param src_port - Source UDP port. It is not included in sent packets. Used only for port registration - @param dst_port - Destination UDP port - @param mcast_sw_if_index - Interface for multicast destination - @param encap_vrf_id - Encap route table FIB index - @param decap_next_index - index of decap next graph node - @param vni - The VXLAN Network Identifier, uint24 - @param is_l3 - if true, create the interface in L3 mode, w/o MAC -*/ -define vxlan_add_del_tunnel_v3 -{ - u32 client_index; - u32 context; - bool is_add [default=true]; - u32 instance [default=0xffffffff]; /* If non-~0, specifies a custom dev instance */ - vl_api_address_t src_address; - vl_api_address_t dst_address; - u16 src_port; - u16 dst_port; - vl_api_interface_index_t mcast_sw_if_index; - u32 encap_vrf_id; - u32 decap_next_index; - u32 vni; - bool is_l3 [default=false]; -}; - -define vxlan_add_del_tunnel_reply -{ - u32 context; - i32 retval; - vl_api_interface_index_t sw_if_index; -}; -define vxlan_add_del_tunnel_v2_reply -{ - u32 context; - i32 retval; - vl_api_interface_index_t sw_if_index; -}; -define vxlan_add_del_tunnel_v3_reply -{ - u32 context; - i32 retval; - vl_api_interface_index_t sw_if_index; -}; - -define vxlan_tunnel_dump -{ - u32 client_index; - u32 context; - vl_api_interface_index_t sw_if_index; -}; -define vxlan_tunnel_v2_dump -{ - u32 client_index; - u32 context; - vl_api_interface_index_t sw_if_index; -}; - -define vxlan_tunnel_details -{ - u32 context; - vl_api_interface_index_t sw_if_index; - u32 instance; - vl_api_address_t src_address; - vl_api_address_t dst_address; - vl_api_interface_index_t mcast_sw_if_index; - u32 encap_vrf_id; - u32 decap_next_index; - u32 vni; -}; -define vxlan_tunnel_v2_details -{ - u32 context; - vl_api_interface_index_t sw_if_index; - u32 instance; - vl_api_address_t src_address; - vl_api_address_t dst_address; - u16 src_port; - u16 dst_port; - vl_api_interface_index_t mcast_sw_if_index; - u32 encap_vrf_id; - u32 decap_next_index; - u32 vni; -}; - -/** \brief Interface set vxlan-bypass request - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param sw_if_index - interface used to reach neighbor - @param is_ipv6 - if non-zero, enable ipv6-vxlan-bypass, else ipv4-vxlan-bypass - @param enable - if non-zero enable, else disable -*/ -autoreply define sw_interface_set_vxlan_bypass -{ - u32 client_index; - u32 context; - vl_api_interface_index_t sw_if_index; - bool is_ipv6; - bool enable [default=true]; -}; - -/** \brief Offload vxlan rx request - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param hw_if_index - rx hw interface - @param sw_if_index - vxlan interface to offload - @param enable - if non-zero enable, else disable -*/ -autoreply define vxlan_offload_rx -{ - u32 client_index; - u32 context; - vl_api_interface_index_t hw_if_index; - vl_api_interface_index_t sw_if_index; - bool enable [default=true]; -}; diff --git a/src/vnet/vxlan/vxlan.c b/src/vnet/vxlan/vxlan.c deleted file mode 100644 index f670ee9c764..00000000000 --- a/src/vnet/vxlan/vxlan.c +++ /dev/null @@ -1,1350 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include <vnet/vxlan/vxlan.h> -#include <vnet/ip/format.h> -#include <vnet/fib/fib_entry.h> -#include <vnet/fib/fib_table.h> -#include <vnet/fib/fib_entry_track.h> -#include <vnet/mfib/mfib_table.h> -#include <vnet/adj/adj_mcast.h> -#include <vnet/adj/rewrite.h> -#include <vnet/dpo/drop_dpo.h> -#include <vnet/interface.h> -#include <vnet/flow/flow.h> -#include <vnet/udp/udp_local.h> -#include <vlib/vlib.h> - -/** - * @file - * @brief VXLAN. - * - * VXLAN provides the features needed to allow L2 bridge domains (BDs) - * to span multiple servers. This is done by building an L2 overlay on - * top of an L3 network underlay using VXLAN tunnels. - * - * This makes it possible for servers to be co-located in the same data - * center or be separated geographically as long as they are reachable - * through the underlay L3 network. - * - * You can refer to this kind of L2 overlay bridge domain as a VXLAN - * (Virtual eXtensible VLAN) segment. - */ - - -vxlan_main_t vxlan_main; - -static u32 -vxlan_eth_flag_change (vnet_main_t *vnm, vnet_hw_interface_t *hi, u32 flags) -{ - /* nothing for now */ - return 0; -} - -static clib_error_t * -vxlan_eth_set_max_frame_size (vnet_main_t *vnm, vnet_hw_interface_t *hw, - u32 frame_size) -{ - /* nothing for now */ - return 0; -} - -static u8 * -format_decap_next (u8 * s, va_list * args) -{ - u32 next_index = va_arg (*args, u32); - - if (next_index == VXLAN_INPUT_NEXT_DROP) - return format (s, "drop"); - else - return format (s, "index %d", next_index); - return s; -} - -u8 * -format_vxlan_tunnel (u8 * s, va_list * args) -{ - vxlan_tunnel_t *t = va_arg (*args, vxlan_tunnel_t *); - - s = format (s, - "[%d] instance %d src %U dst %U src_port %d dst_port %d vni %d " - "fib-idx %d sw-if-idx %d ", - t->dev_instance, t->user_instance, format_ip46_address, &t->src, - IP46_TYPE_ANY, format_ip46_address, &t->dst, IP46_TYPE_ANY, - t->src_port, t->dst_port, t->vni, t->encap_fib_index, - t->sw_if_index); - - s = format (s, "encap-dpo-idx %d ", t->next_dpo.dpoi_index); - - if (PREDICT_FALSE (t->decap_next_index != VXLAN_INPUT_NEXT_L2_INPUT)) - s = format (s, "decap-next-%U ", format_decap_next, t->decap_next_index); - - if (PREDICT_FALSE (ip46_address_is_multicast (&t->dst))) - s = format (s, "mcast-sw-if-idx %d ", t->mcast_sw_if_index); - - if (t->flow_index != ~0) - s = format (s, "flow-index %d [%U]", t->flow_index, - format_flow_enabled_hw, t->flow_index); - - return s; -} - -static u8 * -format_vxlan_name (u8 * s, va_list * args) -{ - u32 dev_instance = va_arg (*args, u32); - vxlan_main_t *vxm = &vxlan_main; - vxlan_tunnel_t *t; - - if (dev_instance == ~0) - return format (s, "<cached-unused>"); - - if (dev_instance >= vec_len (vxm->tunnels)) - return format (s, "<improperly-referenced>"); - - t = pool_elt_at_index (vxm->tunnels, dev_instance); - - return format (s, "vxlan_tunnel%d", t->user_instance); -} - -static clib_error_t * -vxlan_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) -{ - u32 hw_flags = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) ? - VNET_HW_INTERFACE_FLAG_LINK_UP : 0; - vnet_hw_interface_set_flags (vnm, hw_if_index, hw_flags); - - return /* no error */ 0; -} - -/* *INDENT-OFF* */ -VNET_DEVICE_CLASS (vxlan_device_class, static) = { - .name = "VXLAN", - .format_device_name = format_vxlan_name, - .format_tx_trace = format_vxlan_encap_trace, - .admin_up_down_function = vxlan_interface_admin_up_down, -}; -/* *INDENT-ON* */ - -static u8 * -format_vxlan_header_with_length (u8 * s, va_list * args) -{ - u32 dev_instance = va_arg (*args, u32); - s = format (s, "unimplemented dev %u", dev_instance); - return s; -} - -/* *INDENT-OFF* */ -VNET_HW_INTERFACE_CLASS (vxlan_hw_class) = { - .name = "VXLAN", - .format_header = format_vxlan_header_with_length, - .build_rewrite = default_build_rewrite, -}; -/* *INDENT-ON* */ - -static void -vxlan_tunnel_restack_dpo (vxlan_tunnel_t * t) -{ - u8 is_ip4 = ip46_address_is_ip4 (&t->dst); - dpo_id_t dpo = DPO_INVALID; - fib_forward_chain_type_t forw_type = is_ip4 ? - FIB_FORW_CHAIN_TYPE_UNICAST_IP4 : FIB_FORW_CHAIN_TYPE_UNICAST_IP6; - - fib_entry_contribute_forwarding (t->fib_entry_index, forw_type, &dpo); - - /* vxlan uses the payload hash as the udp source port - * hence the packet's hash is unknown - * skip single bucket load balance dpo's */ - while (DPO_LOAD_BALANCE == dpo.dpoi_type) - { - const load_balance_t *lb; - const dpo_id_t *choice; - - lb = load_balance_get (dpo.dpoi_index); - if (lb->lb_n_buckets > 1) - break; - - choice = load_balance_get_bucket_i (lb, 0); - - if (DPO_RECEIVE == choice->dpoi_type) - dpo_copy (&dpo, drop_dpo_get (choice->dpoi_proto)); - else - dpo_copy (&dpo, choice); - } - - u32 encap_index = is_ip4 ? - vxlan4_encap_node.index : vxlan6_encap_node.index; - dpo_stack_from_node (encap_index, &t->next_dpo, &dpo); - dpo_reset (&dpo); -} - -static vxlan_tunnel_t * -vxlan_tunnel_from_fib_node (fib_node_t * node) -{ - ASSERT (FIB_NODE_TYPE_VXLAN_TUNNEL == node->fn_type); - return ((vxlan_tunnel_t *) (((char *) node) - - STRUCT_OFFSET_OF (vxlan_tunnel_t, node))); -} - -/** - * Function definition to backwalk a FIB node - - * Here we will restack the new dpo of VXLAN DIP to encap node. - */ -static fib_node_back_walk_rc_t -vxlan_tunnel_back_walk (fib_node_t * node, fib_node_back_walk_ctx_t * ctx) -{ - vxlan_tunnel_restack_dpo (vxlan_tunnel_from_fib_node (node)); - return (FIB_NODE_BACK_WALK_CONTINUE); -} - -/** - * Function definition to get a FIB node from its index - */ -static fib_node_t * -vxlan_tunnel_fib_node_get (fib_node_index_t index) -{ - vxlan_tunnel_t *t; - vxlan_main_t *vxm = &vxlan_main; - - t = pool_elt_at_index (vxm->tunnels, index); - - return (&t->node); -} - -/** - * Function definition to inform the FIB node that its last lock has gone. - */ -static void -vxlan_tunnel_last_lock_gone (fib_node_t * node) -{ - /* - * The VXLAN tunnel is a root of the graph. As such - * it never has children and thus is never locked. - */ - ASSERT (0); -} - -/* - * Virtual function table registered by VXLAN tunnels - * for participation in the FIB object graph. - */ -const static fib_node_vft_t vxlan_vft = { - .fnv_get = vxlan_tunnel_fib_node_get, - .fnv_last_lock = vxlan_tunnel_last_lock_gone, - .fnv_back_walk = vxlan_tunnel_back_walk, -}; - -#define foreach_copy_field \ - _ (vni) \ - _ (mcast_sw_if_index) \ - _ (encap_fib_index) \ - _ (decap_next_index) \ - _ (src) \ - _ (dst) \ - _ (src_port) \ - _ (dst_port) - -static void -vxlan_rewrite (vxlan_tunnel_t * t, bool is_ip6) -{ - union - { - ip4_vxlan_header_t h4; - ip6_vxlan_header_t h6; - } h; - int len = is_ip6 ? sizeof h.h6 : sizeof h.h4; - - udp_header_t *udp; - vxlan_header_t *vxlan; - /* Fixed portion of the (outer) ip header */ - - clib_memset (&h, 0, sizeof (h)); - if (!is_ip6) - { - ip4_header_t *ip = &h.h4.ip4; - udp = &h.h4.udp, vxlan = &h.h4.vxlan; - ip->ip_version_and_header_length = 0x45; - ip->ttl = 254; - ip->protocol = IP_PROTOCOL_UDP; - - ip->src_address = t->src.ip4; - ip->dst_address = t->dst.ip4; - - /* we fix up the ip4 header length and checksum after-the-fact */ - ip->checksum = ip4_header_checksum (ip); - } - else - { - ip6_header_t *ip = &h.h6.ip6; - udp = &h.h6.udp, vxlan = &h.h6.vxlan; - ip->ip_version_traffic_class_and_flow_label = - clib_host_to_net_u32 (6 << 28); - ip->hop_limit = 255; - ip->protocol = IP_PROTOCOL_UDP; - - ip->src_address = t->src.ip6; - ip->dst_address = t->dst.ip6; - } - - /* UDP header, randomize src port on something, maybe? */ - udp->src_port = clib_host_to_net_u16 (t->src_port); - udp->dst_port = clib_host_to_net_u16 (t->dst_port); - - /* VXLAN header */ - vnet_set_vni_and_flags (vxlan, t->vni); - vnet_rewrite_set_data (*t, &h, len); -} - -static bool -vxlan_decap_next_is_valid (vxlan_main_t * vxm, u32 is_ip6, - u32 decap_next_index) -{ - vlib_main_t *vm = vxm->vlib_main; - u32 input_idx = (!is_ip6) ? - vxlan4_input_node.index : vxlan6_input_node.index; - vlib_node_runtime_t *r = vlib_node_get_runtime (vm, input_idx); - - return decap_next_index < r->n_next_nodes; -} - -/* *INDENT-OFF* */ -typedef CLIB_PACKED(union -{ - struct - { - fib_node_index_t mfib_entry_index; - adj_index_t mcast_adj_index; - }; - u64 as_u64; -}) mcast_shared_t; -/* *INDENT-ON* */ - -static inline mcast_shared_t -mcast_shared_get (ip46_address_t * ip) -{ - ASSERT (ip46_address_is_multicast (ip)); - uword *p = hash_get_mem (vxlan_main.mcast_shared, ip); - ALWAYS_ASSERT (p); - mcast_shared_t ret = {.as_u64 = *p }; - return ret; -} - -static inline void -mcast_shared_add (ip46_address_t * dst, fib_node_index_t mfei, adj_index_t ai) -{ - mcast_shared_t new_ep = { - .mcast_adj_index = ai, - .mfib_entry_index = mfei, - }; - - hash_set_mem_alloc (&vxlan_main.mcast_shared, dst, new_ep.as_u64); -} - -static inline void -mcast_shared_remove (ip46_address_t * dst) -{ - mcast_shared_t ep = mcast_shared_get (dst); - - adj_unlock (ep.mcast_adj_index); - mfib_table_entry_delete_index (ep.mfib_entry_index, MFIB_SOURCE_VXLAN); - - hash_unset_mem_free (&vxlan_main.mcast_shared, dst); -} - -int vnet_vxlan_add_del_tunnel - (vnet_vxlan_add_del_tunnel_args_t * a, u32 * sw_if_indexp) -{ - vxlan_main_t *vxm = &vxlan_main; - vnet_main_t *vnm = vxm->vnet_main; - vxlan_decap_info_t *p; - u32 sw_if_index = ~0; - vxlan4_tunnel_key_t key4; - vxlan6_tunnel_key_t key6; - u32 is_ip6 = a->is_ip6; - vlib_main_t *vm = vlib_get_main (); - u8 hw_addr[6]; - - /* Set udp-ports */ - if (a->src_port == 0) - a->src_port = is_ip6 ? UDP_DST_PORT_vxlan6 : UDP_DST_PORT_vxlan; - - if (a->dst_port == 0) - a->dst_port = is_ip6 ? UDP_DST_PORT_vxlan6 : UDP_DST_PORT_vxlan; - - int not_found; - if (!is_ip6) - { - /* ip4 mcast is indexed by mcast addr only */ - key4.key[0] = ip46_address_is_multicast (&a->dst) ? - a->dst.ip4.as_u32 : - a->dst.ip4.as_u32 | (((u64) a->src.ip4.as_u32) << 32); - key4.key[1] = ((u64) clib_host_to_net_u16 (a->src_port) << 48) | - (((u64) a->encap_fib_index) << 32) | - clib_host_to_net_u32 (a->vni << 8); - not_found = - clib_bihash_search_inline_16_8 (&vxm->vxlan4_tunnel_by_key, &key4); - p = (void *) &key4.value; - } - else - { - key6.key[0] = a->dst.ip6.as_u64[0]; - key6.key[1] = a->dst.ip6.as_u64[1]; - key6.key[2] = (((u64) clib_host_to_net_u16 (a->src_port) << 48) | - ((u64) a->encap_fib_index) << 32) | - clib_host_to_net_u32 (a->vni << 8); - not_found = - clib_bihash_search_inline_24_8 (&vxm->vxlan6_tunnel_by_key, &key6); - p = (void *) &key6.value; - } - - if (not_found) - p = 0; - - if (a->is_add) - { - l2input_main_t *l2im = &l2input_main; - u32 dev_instance; /* real dev instance tunnel index */ - u32 user_instance; /* request and actual instance number */ - - /* adding a tunnel: tunnel must not already exist */ - if (p) - return VNET_API_ERROR_TUNNEL_EXIST; - - /*if not set explicitly, default to l2 */ - if (a->decap_next_index == ~0) - a->decap_next_index = VXLAN_INPUT_NEXT_L2_INPUT; - if (!vxlan_decap_next_is_valid (vxm, is_ip6, a->decap_next_index)) - return VNET_API_ERROR_INVALID_DECAP_NEXT; - - vxlan_tunnel_t *t; - pool_get_aligned (vxm->tunnels, t, CLIB_CACHE_LINE_BYTES); - clib_memset (t, 0, sizeof (*t)); - dev_instance = t - vxm->tunnels; - - /* copy from arg structure */ -#define _(x) t->x = a->x; - foreach_copy_field; -#undef _ - - vxlan_rewrite (t, is_ip6); - /* - * Reconcile the real dev_instance and a possible requested instance. - */ - user_instance = a->instance; - if (user_instance == ~0) - user_instance = dev_instance; - if (hash_get (vxm->instance_used, user_instance)) - { - pool_put (vxm->tunnels, t); - return VNET_API_ERROR_INSTANCE_IN_USE; - } - - hash_set (vxm->instance_used, user_instance, 1); - - t->dev_instance = dev_instance; /* actual */ - t->user_instance = user_instance; /* name */ - t->flow_index = ~0; - - if (a->is_l3) - t->hw_if_index = - vnet_register_interface (vnm, vxlan_device_class.index, dev_instance, - vxlan_hw_class.index, dev_instance); - else - { - vnet_eth_interface_registration_t eir = {}; - f64 now = vlib_time_now (vm); - u32 rnd; - rnd = (u32) (now * 1e6); - rnd = random_u32 (&rnd); - memcpy (hw_addr + 2, &rnd, sizeof (rnd)); - hw_addr[0] = 2; - hw_addr[1] = 0xfe; - - eir.dev_class_index = vxlan_device_class.index; - eir.dev_instance = dev_instance; - eir.address = hw_addr; - eir.cb.flag_change = vxlan_eth_flag_change; - eir.cb.set_max_frame_size = vxlan_eth_set_max_frame_size; - t->hw_if_index = vnet_eth_register_interface (vnm, &eir); - } - - vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, t->hw_if_index); - - /* Set vxlan tunnel output node */ - u32 encap_index = !is_ip6 ? - vxlan4_encap_node.index : vxlan6_encap_node.index; - vnet_set_interface_output_node (vnm, t->hw_if_index, encap_index); - - t->sw_if_index = sw_if_index = hi->sw_if_index; - - /* copy the key */ - int add_failed; - if (is_ip6) - { - key6.value = (u64) dev_instance; - add_failed = clib_bihash_add_del_24_8 (&vxm->vxlan6_tunnel_by_key, - &key6, 1 /*add */ ); - } - else - { - vxlan_decap_info_t di = {.sw_if_index = t->sw_if_index, }; - if (ip46_address_is_multicast (&t->dst)) - di.local_ip = t->src.ip4; - else - di.next_index = t->decap_next_index; - key4.value = di.as_u64; - add_failed = clib_bihash_add_del_16_8 (&vxm->vxlan4_tunnel_by_key, - &key4, 1 /*add */ ); - } - - if (add_failed) - { - if (a->is_l3) - vnet_delete_hw_interface (vnm, t->hw_if_index); - else - ethernet_delete_interface (vnm, t->hw_if_index); - hash_unset (vxm->instance_used, t->user_instance); - pool_put (vxm->tunnels, t); - return VNET_API_ERROR_INVALID_REGISTRATION; - } - - vec_validate_init_empty (vxm->tunnel_index_by_sw_if_index, sw_if_index, - ~0); - vxm->tunnel_index_by_sw_if_index[sw_if_index] = dev_instance; - - /* setup l2 input config with l2 feature and bd 0 to drop packet */ - vec_validate (l2im->configs, sw_if_index); - l2im->configs[sw_if_index].feature_bitmap = L2INPUT_FEAT_DROP; - l2im->configs[sw_if_index].bd_index = 0; - - vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index); - si->flags &= ~VNET_SW_INTERFACE_FLAG_HIDDEN; - vnet_sw_interface_set_flags (vnm, sw_if_index, - VNET_SW_INTERFACE_FLAG_ADMIN_UP); - - fib_node_init (&t->node, FIB_NODE_TYPE_VXLAN_TUNNEL); - fib_prefix_t tun_dst_pfx; - vnet_flood_class_t flood_class = VNET_FLOOD_CLASS_TUNNEL_NORMAL; - - fib_prefix_from_ip46_addr (&t->dst, &tun_dst_pfx); - if (!ip46_address_is_multicast (&t->dst)) - { - /* Unicast tunnel - - * source the FIB entry for the tunnel's destination - * and become a child thereof. The tunnel will then get poked - * when the forwarding for the entry updates, and the tunnel can - * re-stack accordingly - */ - vtep_addr_ref (&vxm->vtep_table, t->encap_fib_index, &t->src); - t->fib_entry_index = fib_entry_track (t->encap_fib_index, - &tun_dst_pfx, - FIB_NODE_TYPE_VXLAN_TUNNEL, - dev_instance, - &t->sibling_index); - vxlan_tunnel_restack_dpo (t); - } - else - { - /* Multicast tunnel - - * as the same mcast group can be used for multiple mcast tunnels - * with different VNIs, create the output fib adjacency only if - * it does not already exist - */ - fib_protocol_t fp = fib_ip_proto (is_ip6); - - if (vtep_addr_ref (&vxm->vtep_table, - t->encap_fib_index, &t->dst) == 1) - { - fib_node_index_t mfei; - adj_index_t ai; - fib_route_path_t path = { - .frp_proto = fib_proto_to_dpo (fp), - .frp_addr = zero_addr, - .frp_sw_if_index = 0xffffffff, - .frp_fib_index = ~0, - .frp_weight = 1, - .frp_flags = FIB_ROUTE_PATH_LOCAL, - .frp_mitf_flags = MFIB_ITF_FLAG_FORWARD, - }; - const mfib_prefix_t mpfx = { - .fp_proto = fp, - .fp_len = (is_ip6 ? 128 : 32), - .fp_grp_addr = tun_dst_pfx.fp_addr, - }; - - /* - * Setup the (*,G) to receive traffic on the mcast group - * - the forwarding interface is for-us - * - the accepting interface is that from the API - */ - mfib_table_entry_path_update (t->encap_fib_index, &mpfx, - MFIB_SOURCE_VXLAN, - MFIB_ENTRY_FLAG_NONE, &path); - - path.frp_sw_if_index = a->mcast_sw_if_index; - path.frp_flags = FIB_ROUTE_PATH_FLAG_NONE; - path.frp_mitf_flags = MFIB_ITF_FLAG_ACCEPT; - mfei = mfib_table_entry_path_update ( - t->encap_fib_index, &mpfx, MFIB_SOURCE_VXLAN, - MFIB_ENTRY_FLAG_NONE, &path); - - /* - * Create the mcast adjacency to send traffic to the group - */ - ai = adj_mcast_add_or_lock (fp, - fib_proto_to_link (fp), - a->mcast_sw_if_index); - - /* - * create a new end-point - */ - mcast_shared_add (&t->dst, mfei, ai); - } - - dpo_id_t dpo = DPO_INVALID; - mcast_shared_t ep = mcast_shared_get (&t->dst); - - /* Stack shared mcast dst mac addr rewrite on encap */ - dpo_set (&dpo, DPO_ADJACENCY_MCAST, - fib_proto_to_dpo (fp), ep.mcast_adj_index); - - dpo_stack_from_node (encap_index, &t->next_dpo, &dpo); - dpo_reset (&dpo); - flood_class = VNET_FLOOD_CLASS_TUNNEL_MASTER; - } - - vnet_get_sw_interface (vnet_get_main (), sw_if_index)->flood_class = - flood_class; - } - else - { - /* deleting a tunnel: tunnel must exist */ - if (!p) - return VNET_API_ERROR_NO_SUCH_ENTRY; - - u32 instance = is_ip6 ? key6.value : - vxm->tunnel_index_by_sw_if_index[p->sw_if_index]; - vxlan_tunnel_t *t = pool_elt_at_index (vxm->tunnels, instance); - - sw_if_index = t->sw_if_index; - vnet_sw_interface_set_flags (vnm, sw_if_index, 0 /* down */ ); - - vxm->tunnel_index_by_sw_if_index[sw_if_index] = ~0; - - if (!is_ip6) - clib_bihash_add_del_16_8 (&vxm->vxlan4_tunnel_by_key, &key4, - 0 /*del */ ); - else - clib_bihash_add_del_24_8 (&vxm->vxlan6_tunnel_by_key, &key6, - 0 /*del */ ); - - if (!ip46_address_is_multicast (&t->dst)) - { - if (t->flow_index != ~0) - vnet_flow_del (vnm, t->flow_index); - - vtep_addr_unref (&vxm->vtep_table, t->encap_fib_index, &t->src); - fib_entry_untrack (t->fib_entry_index, t->sibling_index); - } - else if (vtep_addr_unref (&vxm->vtep_table, - t->encap_fib_index, &t->dst) == 0) - { - mcast_shared_remove (&t->dst); - } - - vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, t->hw_if_index); - if (hw->dev_class_index == vxlan_device_class.index) - vnet_delete_hw_interface (vnm, t->hw_if_index); - else - ethernet_delete_interface (vnm, t->hw_if_index); - hash_unset (vxm->instance_used, t->user_instance); - - fib_node_deinit (&t->node); - pool_put (vxm->tunnels, t); - } - - if (sw_if_indexp) - *sw_if_indexp = sw_if_index; - - if (a->is_add) - { - /* register udp ports */ - if (!is_ip6 && !udp_is_valid_dst_port (a->src_port, 1)) - udp_register_dst_port (vxm->vlib_main, a->src_port, - vxlan4_input_node.index, 1); - if (is_ip6 && !udp_is_valid_dst_port (a->src_port, 0)) - udp_register_dst_port (vxm->vlib_main, a->src_port, - vxlan6_input_node.index, 0); - } - - return 0; -} - -static uword -get_decap_next_for_node (u32 node_index, u32 ipv4_set) -{ - vxlan_main_t *vxm = &vxlan_main; - vlib_main_t *vm = vxm->vlib_main; - uword input_node = (ipv4_set) ? vxlan4_input_node.index : - vxlan6_input_node.index; - - return vlib_node_add_next (vm, input_node, node_index); -} - -static uword -unformat_decap_next (unformat_input_t * input, va_list * args) -{ - u32 *result = va_arg (*args, u32 *); - u32 ipv4_set = va_arg (*args, int); - vxlan_main_t *vxm = &vxlan_main; - vlib_main_t *vm = vxm->vlib_main; - u32 node_index; - u32 tmp; - - if (unformat (input, "l2")) - *result = VXLAN_INPUT_NEXT_L2_INPUT; - else if (unformat (input, "node %U", unformat_vlib_node, vm, &node_index)) - *result = get_decap_next_for_node (node_index, ipv4_set); - else if (unformat (input, "%d", &tmp)) - *result = tmp; - else - return 0; - return 1; -} - -static clib_error_t * -vxlan_add_del_tunnel_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - ip46_address_t src = ip46_address_initializer, dst = - ip46_address_initializer; - u8 is_add = 1; - u8 src_set = 0; - u8 dst_set = 0; - u8 grp_set = 0; - u8 ipv4_set = 0; - u8 ipv6_set = 0; - u8 is_l3 = 0; - u32 instance = ~0; - u32 encap_fib_index = 0; - u32 mcast_sw_if_index = ~0; - u32 decap_next_index = VXLAN_INPUT_NEXT_L2_INPUT; - u32 vni = 0; - u32 src_port = 0; - u32 dst_port = 0; - u32 table_id; - clib_error_t *parse_error = NULL; - - /* Get a line of input. */ - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (line_input, "del")) - { - is_add = 0; - } - else if (unformat (line_input, "instance %d", &instance)) - ; - else if (unformat (line_input, "src %U", - unformat_ip46_address, &src, IP46_TYPE_ANY)) - { - src_set = 1; - ip46_address_is_ip4 (&src) ? (ipv4_set = 1) : (ipv6_set = 1); - } - else if (unformat (line_input, "dst %U", - unformat_ip46_address, &dst, IP46_TYPE_ANY)) - { - dst_set = 1; - ip46_address_is_ip4 (&dst) ? (ipv4_set = 1) : (ipv6_set = 1); - } - else if (unformat (line_input, "group %U %U", - unformat_ip46_address, &dst, IP46_TYPE_ANY, - unformat_vnet_sw_interface, - vnet_get_main (), &mcast_sw_if_index)) - { - grp_set = dst_set = 1; - ip46_address_is_ip4 (&dst) ? (ipv4_set = 1) : (ipv6_set = 1); - } - else if (unformat (line_input, "encap-vrf-id %d", &table_id)) - { - encap_fib_index = - fib_table_find (fib_ip_proto (ipv6_set), table_id); - } - else if (unformat (line_input, "l3")) - is_l3 = 1; - else if (unformat (line_input, "decap-next %U", unformat_decap_next, - &decap_next_index, ipv4_set)) - ; - else if (unformat (line_input, "vni %d", &vni)) - ; - else if (unformat (line_input, "src_port %d", &src_port)) - ; - else if (unformat (line_input, "dst_port %d", &dst_port)) - ; - else - { - parse_error = clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); - break; - } - } - - unformat_free (line_input); - - if (parse_error) - return parse_error; - - if (is_l3 && decap_next_index == VXLAN_INPUT_NEXT_L2_INPUT) - { - vlib_node_t *node = vlib_get_node_by_name ( - vm, (u8 *) (ipv4_set ? "ip4-input" : "ip6-input")); - decap_next_index = get_decap_next_for_node (node->index, ipv4_set); - } - - if (encap_fib_index == ~0) - return clib_error_return (0, "nonexistent encap-vrf-id %d", table_id); - - if (src_set == 0) - return clib_error_return (0, "tunnel src address not specified"); - - if (dst_set == 0) - return clib_error_return (0, "tunnel dst address not specified"); - - if (grp_set && !ip46_address_is_multicast (&dst)) - return clib_error_return (0, "tunnel group address not multicast"); - - if (grp_set == 0 && ip46_address_is_multicast (&dst)) - return clib_error_return (0, "dst address must be unicast"); - - if (grp_set && mcast_sw_if_index == ~0) - return clib_error_return (0, "tunnel nonexistent multicast device"); - - if (ipv4_set && ipv6_set) - return clib_error_return (0, "both IPv4 and IPv6 addresses specified"); - - if (ip46_address_cmp (&src, &dst) == 0) - return clib_error_return (0, "src and dst addresses are identical"); - - if (decap_next_index == ~0) - return clib_error_return (0, "next node not found"); - - if (vni == 0) - return clib_error_return (0, "vni not specified"); - - if (vni >> 24) - return clib_error_return (0, "vni %d out of range", vni); - - vnet_vxlan_add_del_tunnel_args_t a = { .is_add = is_add, - .is_ip6 = ipv6_set, - .is_l3 = is_l3, - .instance = instance, -#define _(x) .x = x, - foreach_copy_field -#undef _ - }; - - u32 tunnel_sw_if_index; - int rv = vnet_vxlan_add_del_tunnel (&a, &tunnel_sw_if_index); - - switch (rv) - { - case 0: - if (is_add) - vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name, - vnet_get_main (), tunnel_sw_if_index); - break; - - case VNET_API_ERROR_TUNNEL_EXIST: - return clib_error_return (0, "tunnel already exists..."); - - case VNET_API_ERROR_NO_SUCH_ENTRY: - return clib_error_return (0, "tunnel does not exist..."); - - case VNET_API_ERROR_INSTANCE_IN_USE: - return clib_error_return (0, "Instance is in use"); - - default: - return clib_error_return - (0, "vnet_vxlan_add_del_tunnel returned %d", rv); - } - - return 0; -} - -/*? - * Add or delete a VXLAN Tunnel. - * - * VXLAN provides the features needed to allow L2 bridge domains (BDs) - * to span multiple servers. This is done by building an L2 overlay on - * top of an L3 network underlay using VXLAN tunnels. - * - * This makes it possible for servers to be co-located in the same data - * center or be separated geographically as long as they are reachable - * through the underlay L3 network. - * - * You can refer to this kind of L2 overlay bridge domain as a VXLAN - * (Virtual eXtensible VLAN) segment. - * - * @cliexpar - * Example of how to create a VXLAN Tunnel: - * @cliexcmd{create vxlan tunnel src 10.0.3.1 dst 10.0.3.3 vni 13 encap-vrf-id - 7} - * Example of how to create a VXLAN Tunnel with a known name, vxlan_tunnel42: - * @cliexcmd{create vxlan tunnel src 10.0.3.1 dst 10.0.3.3 instance 42} - * Example of how to create a multicast VXLAN Tunnel with a known name, - vxlan_tunnel23: - * @cliexcmd{create vxlan tunnel src 10.0.3.1 group 239.1.1.1 - GigabitEthernet0/8/0 instance 23} - * Example of how to create a VXLAN Tunnel with custom udp-ports: - * @cliexcmd{create vxlan tunnel src 10.0.3.1 dst 10.0.3.3 vni 13 src_port - 59000 dst_port 59001} - * Example of how to delete a VXLAN Tunnel: - * @cliexcmd{create vxlan tunnel src 10.0.3.1 dst 10.0.3.3 vni 13 del} - ?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (create_vxlan_tunnel_command, static) = { - .path = "create vxlan tunnel", - .short_help = - "create vxlan tunnel src <local-vtep-addr>" - " {dst <remote-vtep-addr>|group <mcast-vtep-addr> <intf-name>} vni <nn>" - " [instance <id>]" - " [encap-vrf-id <nn>] [decap-next [l2|node <name>]] [del] [l3]" - " [src_port <local-vtep-udp-port>] [dst_port <remote-vtep-udp-port>]", - .function = vxlan_add_del_tunnel_command_fn, -}; -/* *INDENT-ON* */ - -static clib_error_t * -show_vxlan_tunnel_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - vxlan_main_t *vxm = &vxlan_main; - vxlan_tunnel_t *t; - int raw = 0; - - while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (input, "raw")) - raw = 1; - else - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, input); - } - - if (pool_elts (vxm->tunnels) == 0) - vlib_cli_output (vm, "No vxlan tunnels configured..."); - -/* *INDENT-OFF* */ - pool_foreach (t, vxm->tunnels) - { - vlib_cli_output (vm, "%U", format_vxlan_tunnel, t); - } -/* *INDENT-ON* */ - - if (raw) - { - vlib_cli_output (vm, "Raw IPv4 Hash Table:\n%U\n", - format_bihash_16_8, &vxm->vxlan4_tunnel_by_key, - 1 /* verbose */ ); - vlib_cli_output (vm, "Raw IPv6 Hash Table:\n%U\n", - format_bihash_24_8, &vxm->vxlan6_tunnel_by_key, - 1 /* verbose */ ); - } - - return 0; -} - -/*? - * Display all the VXLAN Tunnel entries. - * - * @cliexpar - * Example of how to display the VXLAN Tunnel entries: - * @cliexstart{show vxlan tunnel} - * [0] src 10.0.3.1 dst 10.0.3.3 src_port 4789 dst_port 4789 vni 13 - encap_fib_index 0 sw_if_index 5 decap_next l2 - * @cliexend - ?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (show_vxlan_tunnel_command, static) = { - .path = "show vxlan tunnel", - .short_help = "show vxlan tunnel [raw]", - .function = show_vxlan_tunnel_command_fn, -}; -/* *INDENT-ON* */ - - -void -vnet_int_vxlan_bypass_mode (u32 sw_if_index, u8 is_ip6, u8 is_enable) -{ - vxlan_main_t *vxm = &vxlan_main; - - if (pool_is_free_index (vxm->vnet_main->interface_main.sw_interfaces, - sw_if_index)) - return; - - is_enable = ! !is_enable; - - if (is_ip6) - { - if (clib_bitmap_get (vxm->bm_ip6_bypass_enabled_by_sw_if, sw_if_index) - != is_enable) - { - vnet_feature_enable_disable ("ip6-unicast", "ip6-vxlan-bypass", - sw_if_index, is_enable, 0, 0); - vxm->bm_ip6_bypass_enabled_by_sw_if = - clib_bitmap_set (vxm->bm_ip6_bypass_enabled_by_sw_if, - sw_if_index, is_enable); - } - } - else - { - if (clib_bitmap_get (vxm->bm_ip4_bypass_enabled_by_sw_if, sw_if_index) - != is_enable) - { - vnet_feature_enable_disable ("ip4-unicast", "ip4-vxlan-bypass", - sw_if_index, is_enable, 0, 0); - vxm->bm_ip4_bypass_enabled_by_sw_if = - clib_bitmap_set (vxm->bm_ip4_bypass_enabled_by_sw_if, - sw_if_index, is_enable); - } - } -} - - -static clib_error_t * -set_ip_vxlan_bypass (u32 is_ip6, - unformat_input_t * input, vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - vnet_main_t *vnm = vnet_get_main (); - clib_error_t *error = 0; - u32 sw_if_index, is_enable; - - sw_if_index = ~0; - is_enable = 1; - - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat_user - (line_input, unformat_vnet_sw_interface, vnm, &sw_if_index)) - ; - else if (unformat (line_input, "del")) - is_enable = 0; - else - { - error = unformat_parse_error (line_input); - goto done; - } - } - - if (~0 == sw_if_index) - { - error = clib_error_return (0, "unknown interface `%U'", - format_unformat_error, line_input); - goto done; - } - - vnet_int_vxlan_bypass_mode (sw_if_index, is_ip6, is_enable); - -done: - unformat_free (line_input); - - return error; -} - -static clib_error_t * -set_ip4_vxlan_bypass (vlib_main_t * vm, - unformat_input_t * input, vlib_cli_command_t * cmd) -{ - return set_ip_vxlan_bypass (0, input, cmd); -} - -/*? - * This command adds the 'ip4-vxlan-bypass' graph node for a given interface. - * By adding the IPv4 vxlan-bypass graph node to an interface, the node checks - * for and validate input vxlan packet and bypass ip4-lookup, ip4-local, - * ip4-udp-lookup nodes to speedup vxlan packet forwarding. This node will - * cause extra overhead to for non-vxlan packets which is kept at a minimum. - * - * @cliexpar - * @parblock - * Example of graph node before ip4-vxlan-bypass is enabled: - * @cliexstart{show vlib graph ip4-vxlan-bypass} - * Name Next Previous - * ip4-vxlan-bypass error-drop [0] - * vxlan4-input [1] - * ip4-lookup [2] - * @cliexend - * - * Example of how to enable ip4-vxlan-bypass on an interface: - * @cliexcmd{set interface ip vxlan-bypass GigabitEthernet2/0/0} - * - * Example of graph node after ip4-vxlan-bypass is enabled: - * @cliexstart{show vlib graph ip4-vxlan-bypass} - * Name Next Previous - * ip4-vxlan-bypass error-drop [0] ip4-input - * vxlan4-input [1] ip4-input-no-checksum - * ip4-lookup [2] - * @cliexend - * - * Example of how to display the feature enabled on an interface: - * @cliexstart{show ip interface features GigabitEthernet2/0/0} - * IP feature paths configured on GigabitEthernet2/0/0... - * ... - * ipv4 unicast: - * ip4-vxlan-bypass - * ip4-lookup - * ... - * @cliexend - * - * Example of how to disable ip4-vxlan-bypass on an interface: - * @cliexcmd{set interface ip vxlan-bypass GigabitEthernet2/0/0 del} - * @endparblock -?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (set_interface_ip_vxlan_bypass_command, static) = { - .path = "set interface ip vxlan-bypass", - .function = set_ip4_vxlan_bypass, - .short_help = "set interface ip vxlan-bypass <interface> [del]", -}; -/* *INDENT-ON* */ - -static clib_error_t * -set_ip6_vxlan_bypass (vlib_main_t * vm, - unformat_input_t * input, vlib_cli_command_t * cmd) -{ - return set_ip_vxlan_bypass (1, input, cmd); -} - -/*? - * This command adds the 'ip6-vxlan-bypass' graph node for a given interface. - * By adding the IPv6 vxlan-bypass graph node to an interface, the node checks - * for and validate input vxlan packet and bypass ip6-lookup, ip6-local, - * ip6-udp-lookup nodes to speedup vxlan packet forwarding. This node will - * cause extra overhead to for non-vxlan packets which is kept at a minimum. - * - * @cliexpar - * @parblock - * Example of graph node before ip6-vxlan-bypass is enabled: - * @cliexstart{show vlib graph ip6-vxlan-bypass} - * Name Next Previous - * ip6-vxlan-bypass error-drop [0] - * vxlan6-input [1] - * ip6-lookup [2] - * @cliexend - * - * Example of how to enable ip6-vxlan-bypass on an interface: - * @cliexcmd{set interface ip6 vxlan-bypass GigabitEthernet2/0/0} - * - * Example of graph node after ip6-vxlan-bypass is enabled: - * @cliexstart{show vlib graph ip6-vxlan-bypass} - * Name Next Previous - * ip6-vxlan-bypass error-drop [0] ip6-input - * vxlan6-input [1] ip4-input-no-checksum - * ip6-lookup [2] - * @cliexend - * - * Example of how to display the feature enabled on an interface: - * @cliexstart{show ip interface features GigabitEthernet2/0/0} - * IP feature paths configured on GigabitEthernet2/0/0... - * ... - * ipv6 unicast: - * ip6-vxlan-bypass - * ip6-lookup - * ... - * @cliexend - * - * Example of how to disable ip6-vxlan-bypass on an interface: - * @cliexcmd{set interface ip6 vxlan-bypass GigabitEthernet2/0/0 del} - * @endparblock -?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (set_interface_ip6_vxlan_bypass_command, static) = { - .path = "set interface ip6 vxlan-bypass", - .function = set_ip6_vxlan_bypass, - .short_help = "set interface ip6 vxlan-bypass <interface> [del]", -}; -/* *INDENT-ON* */ - -int -vnet_vxlan_add_del_rx_flow (u32 hw_if_index, u32 t_index, int is_add) -{ - vxlan_main_t *vxm = &vxlan_main; - vxlan_tunnel_t *t = pool_elt_at_index (vxm->tunnels, t_index); - vnet_main_t *vnm = vnet_get_main (); - if (is_add) - { - if (t->flow_index == ~0) - { - vxlan_main_t *vxm = &vxlan_main; - vnet_flow_t flow = { - .actions = - VNET_FLOW_ACTION_REDIRECT_TO_NODE | VNET_FLOW_ACTION_MARK | - VNET_FLOW_ACTION_BUFFER_ADVANCE, - .mark_flow_id = t->dev_instance + vxm->flow_id_start, - .redirect_node_index = vxlan4_flow_input_node.index, - .buffer_advance = sizeof (ethernet_header_t), - .type = VNET_FLOW_TYPE_IP4_VXLAN, - .ip4_vxlan = { - .protocol.prot = IP_PROTOCOL_UDP, - .src_addr.addr = t->dst.ip4, - .dst_addr.addr = t->src.ip4, - .src_addr.mask.as_u32 = ~0, - .dst_addr.mask.as_u32 = ~0, - .dst_port.port = t->src_port, - .dst_port.mask = 0xFF, - .vni = t->vni, - } - , - }; - vnet_flow_add (vnm, &flow, &t->flow_index); - } - return vnet_flow_enable (vnm, t->flow_index, hw_if_index); - } - /* flow index is removed when the tunnel is deleted */ - return vnet_flow_disable (vnm, t->flow_index, hw_if_index); -} - -u32 -vnet_vxlan_get_tunnel_index (u32 sw_if_index) -{ - vxlan_main_t *vxm = &vxlan_main; - - if (sw_if_index >= vec_len (vxm->tunnel_index_by_sw_if_index)) - return ~0; - return vxm->tunnel_index_by_sw_if_index[sw_if_index]; -} - -static clib_error_t * -vxlan_offload_command_fn (vlib_main_t * vm, - unformat_input_t * input, vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - - /* Get a line of input. */ - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - vnet_main_t *vnm = vnet_get_main (); - u32 rx_sw_if_index = ~0; - u32 hw_if_index = ~0; - int is_add = 1; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (line_input, "hw %U", unformat_vnet_hw_interface, vnm, - &hw_if_index)) - continue; - if (unformat (line_input, "rx %U", unformat_vnet_sw_interface, vnm, - &rx_sw_if_index)) - continue; - if (unformat (line_input, "del")) - { - is_add = 0; - continue; - } - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, line_input); - } - - if (rx_sw_if_index == ~0) - return clib_error_return (0, "missing rx interface"); - if (hw_if_index == ~0) - return clib_error_return (0, "missing hw interface"); - - u32 t_index = vnet_vxlan_get_tunnel_index (rx_sw_if_index);; - if (t_index == ~0) - return clib_error_return (0, "%U is not a vxlan tunnel", - format_vnet_sw_if_index_name, vnm, - rx_sw_if_index); - - vxlan_main_t *vxm = &vxlan_main; - vxlan_tunnel_t *t = pool_elt_at_index (vxm->tunnels, t_index); - - if (!ip46_address_is_ip4 (&t->dst)) - return clib_error_return (0, "currently only IPV4 tunnels are supported"); - - vnet_hw_interface_t *hw_if = vnet_get_hw_interface (vnm, hw_if_index); - ip4_main_t *im = &ip4_main; - u32 rx_fib_index = - vec_elt (im->fib_index_by_sw_if_index, hw_if->sw_if_index); - - if (t->encap_fib_index != rx_fib_index) - return clib_error_return (0, "interface/tunnel fib mismatch"); - - if (vnet_vxlan_add_del_rx_flow (hw_if_index, t_index, is_add)) - return clib_error_return (0, "error %s flow", - is_add ? "enabling" : "disabling"); - - return 0; -} - -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (vxlan_offload_command, static) = { - .path = "set flow-offload vxlan", - .short_help = - "set flow-offload vxlan hw <interface-name> rx <tunnel-name> [del]", - .function = vxlan_offload_command_fn, -}; -/* *INDENT-ON* */ - -#define VXLAN_HASH_NUM_BUCKETS (2 * 1024) -#define VXLAN_HASH_MEMORY_SIZE (1 << 20) - -clib_error_t * -vxlan_init (vlib_main_t * vm) -{ - vxlan_main_t *vxm = &vxlan_main; - - vxm->vnet_main = vnet_get_main (); - vxm->vlib_main = vm; - - vnet_flow_get_range (vxm->vnet_main, "vxlan", 1024 * 1024, - &vxm->flow_id_start); - - vxm->bm_ip4_bypass_enabled_by_sw_if = 0; - vxm->bm_ip6_bypass_enabled_by_sw_if = 0; - - /* initialize the ip6 hash */ - clib_bihash_init_16_8 (&vxm->vxlan4_tunnel_by_key, "vxlan4", - VXLAN_HASH_NUM_BUCKETS, VXLAN_HASH_MEMORY_SIZE); - clib_bihash_init_24_8 (&vxm->vxlan6_tunnel_by_key, "vxlan6", - VXLAN_HASH_NUM_BUCKETS, VXLAN_HASH_MEMORY_SIZE); - vxm->vtep_table = vtep_table_create (); - vxm->mcast_shared = hash_create_mem (0, - sizeof (ip46_address_t), - sizeof (mcast_shared_t)); - - fib_node_register_type (FIB_NODE_TYPE_VXLAN_TUNNEL, &vxlan_vft); - - return 0; -} - -VLIB_INIT_FUNCTION (vxlan_init); - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/vxlan/vxlan.h b/src/vnet/vxlan/vxlan.h deleted file mode 100644 index fa47605e42d..00000000000 --- a/src/vnet/vxlan/vxlan.h +++ /dev/null @@ -1,242 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef included_vnet_vxlan_h -#define included_vnet_vxlan_h - -#include <vppinfra/error.h> -#include <vppinfra/hash.h> -#include <vppinfra/bihash_16_8.h> -#include <vppinfra/bihash_24_8.h> -#include <vnet/vnet.h> -#include <vnet/ip/ip.h> -#include <vnet/ip/vtep.h> -#include <vnet/l2/l2_input.h> -#include <vnet/l2/l2_output.h> -#include <vnet/l2/l2_bd.h> -#include <vnet/ethernet/ethernet.h> -#include <vnet/vxlan/vxlan_packet.h> -#include <vnet/ip/ip4_packet.h> -#include <vnet/ip/ip6_packet.h> -#include <vnet/udp/udp_packet.h> -#include <vnet/dpo/dpo.h> -#include <vnet/adj/adj_types.h> - -/* *INDENT-OFF* */ -typedef CLIB_PACKED (struct { - ip4_header_t ip4; /* 20 bytes */ - udp_header_t udp; /* 8 bytes */ - vxlan_header_t vxlan; /* 8 bytes */ -}) ip4_vxlan_header_t; - -typedef CLIB_PACKED (struct { - ip6_header_t ip6; /* 40 bytes */ - udp_header_t udp; /* 8 bytes */ - vxlan_header_t vxlan; /* 8 bytes */ -}) ip6_vxlan_header_t; -/* *INDENT-ON* */ - -/* -* Key fields: remote ip, vni on incoming VXLAN packet -* all fields in NET byte order -*/ -typedef clib_bihash_kv_16_8_t vxlan4_tunnel_key_t; - -/* -* Key fields: remote ip, vni and fib index on incoming VXLAN packet -* ip, vni fields in NET byte order -* fib index field in host byte order -*/ -typedef clib_bihash_kv_24_8_t vxlan6_tunnel_key_t; - -typedef union -{ - struct - { - u32 sw_if_index; /* unicast - input interface / mcast - stats interface */ - union - { - struct /* unicast action */ - { - u16 next_index; - u8 error; - }; - ip4_address_t local_ip; /* used as dst ip for mcast pkts to assign them to unicast tunnel */ - }; - }; - u64 as_u64; -} vxlan_decap_info_t; - -typedef struct -{ - /* Required for pool_get_aligned */ - CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); - - /* FIB DPO for IP forwarding of VXLAN encap packet */ - dpo_id_t next_dpo; - - /* vxlan VNI in HOST byte order */ - u32 vni; - - /* tunnel src and dst addresses */ - ip46_address_t src; - ip46_address_t dst; - - /* udp-ports */ - u16 src_port; - u16 dst_port; - - /* mcast packet output intfc index (used only if dst is mcast) */ - u32 mcast_sw_if_index; - - /* decap next index */ - u16 decap_next_index; - - /* The FIB index for src/dst addresses */ - u32 encap_fib_index; - - /* vnet intfc index */ - u32 sw_if_index; - u32 hw_if_index; - - /** - * Linkage into the FIB object graph - */ - fib_node_t node; - - /* - * The FIB entry for (depending on VXLAN tunnel is unicast or mcast) - * sending unicast VXLAN encap packets or receiving mcast VXLAN packets - */ - fib_node_index_t fib_entry_index; - adj_index_t mcast_adj_index; - - /** - * The tunnel is a child of the FIB entry for its destination. This is - * so it receives updates when the forwarding information for that entry - * changes. - * The tunnels sibling index on the FIB entry's dependency list. - */ - u32 sibling_index; - - u32 flow_index; /* infra flow index */ - u32 dev_instance; /* Real device instance in tunnel vector */ - u32 user_instance; /* Instance name being shown to user */ - - VNET_DECLARE_REWRITE; -} vxlan_tunnel_t; - -#define foreach_vxlan_input_next \ -_(DROP, "error-drop") \ -_(L2_INPUT, "l2-input") - -typedef enum -{ -#define _(s,n) VXLAN_INPUT_NEXT_##s, - foreach_vxlan_input_next -#undef _ - VXLAN_INPUT_N_NEXT, -} vxlan_input_next_t; - -typedef enum -{ -#define vxlan_error(n,s) VXLAN_ERROR_##n, -#include <vnet/vxlan/vxlan_error.def> -#undef vxlan_error - VXLAN_N_ERROR, -} vxlan_input_error_t; - -typedef struct -{ - /* vector of encap tunnel instances */ - vxlan_tunnel_t *tunnels; - - /* lookup tunnel by key */ - clib_bihash_16_8_t - vxlan4_tunnel_by_key; /* keyed on ipv4.dst + src_port + fib + vni */ - clib_bihash_24_8_t - vxlan6_tunnel_by_key; /* keyed on ipv6.dst + src_port + fib + vni */ - - /* local VTEP IPs ref count used by vxlan-bypass node to check if - received VXLAN packet DIP matches any local VTEP address */ - vtep_table_t vtep_table; - - /* mcast shared info */ - uword *mcast_shared; /* keyed on mcast ip46 addr */ - - /* Mapping from sw_if_index to tunnel index */ - u32 *tunnel_index_by_sw_if_index; - - /* graph node state */ - uword *bm_ip4_bypass_enabled_by_sw_if; - uword *bm_ip6_bypass_enabled_by_sw_if; - - /* convenience */ - vlib_main_t *vlib_main; - vnet_main_t *vnet_main; - - /* Record used instances */ - uword *instance_used; - u32 flow_id_start; - - /* cache for last 8 vxlan tunnel */ - vtep4_cache_t vtep4_u512; - -} vxlan_main_t; - -extern vxlan_main_t vxlan_main; - -extern vlib_node_registration_t vxlan4_input_node; -extern vlib_node_registration_t vxlan6_input_node; -extern vlib_node_registration_t vxlan4_encap_node; -extern vlib_node_registration_t vxlan6_encap_node; -extern vlib_node_registration_t vxlan4_flow_input_node; - -u8 *format_vxlan_encap_trace (u8 * s, va_list * args); - -typedef struct -{ - u8 is_add; - - /* we normally use is_ip4, but since this adds to the - * structure, this seems less of a breaking change */ - u8 is_ip6; - u8 is_l3; - u32 instance; - ip46_address_t src, dst; - u32 mcast_sw_if_index; - u32 encap_fib_index; - u32 decap_next_index; - u32 vni; - u16 src_port; - u16 dst_port; -} vnet_vxlan_add_del_tunnel_args_t; - -int vnet_vxlan_add_del_tunnel - (vnet_vxlan_add_del_tunnel_args_t * a, u32 * sw_if_indexp); - -void vnet_int_vxlan_bypass_mode (u32 sw_if_index, u8 is_ip6, u8 is_enable); - -int vnet_vxlan_add_del_rx_flow (u32 hw_if_index, u32 t_imdex, int is_add); - -u32 vnet_vxlan_get_tunnel_index (u32 sw_if_index); -#endif /* included_vnet_vxlan_h */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/vxlan/vxlan_api.c b/src/vnet/vxlan/vxlan_api.c deleted file mode 100644 index 56fd654951f..00000000000 --- a/src/vnet/vxlan/vxlan_api.c +++ /dev/null @@ -1,376 +0,0 @@ -/* - *------------------------------------------------------------------ - * vxlan_api.c - vxlan api - * - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *------------------------------------------------------------------ - */ - -#include <vnet/vnet.h> -#include <vlibmemory/api.h> - -#include <vnet/interface.h> -#include <vnet/api_errno.h> -#include <vnet/feature/feature.h> -#include <vnet/vxlan/vxlan.h> -#include <vnet/fib/fib_table.h> -#include <vnet/ip/ip_types_api.h> -#include <vnet/udp/udp_local.h> -#include <vnet/format_fns.h> -#include <vxlan/vxlan.api_enum.h> -#include <vxlan/vxlan.api_types.h> - -static u16 msg_id_base; - -#define REPLY_MSG_ID_BASE msg_id_base -#include <vlibapi/api_helper_macros.h> - -static void -vl_api_vxlan_offload_rx_t_handler (vl_api_vxlan_offload_rx_t * mp) -{ - vl_api_vxlan_offload_rx_reply_t *rmp; - int rv = 0; - u32 hw_if_index = ntohl (mp->hw_if_index); - u32 sw_if_index = ntohl (mp->sw_if_index); - - if (!vnet_hw_interface_is_valid (vnet_get_main (), hw_if_index)) - { - rv = VNET_API_ERROR_NO_SUCH_ENTRY; - goto err; - } - VALIDATE_SW_IF_INDEX (mp); - - u32 t_index = vnet_vxlan_get_tunnel_index (sw_if_index); - if (t_index == ~0) - { - rv = VNET_API_ERROR_INVALID_SW_IF_INDEX_2; - goto err; - } - - vxlan_main_t *vxm = &vxlan_main; - vxlan_tunnel_t *t = pool_elt_at_index (vxm->tunnels, t_index); - if (!ip46_address_is_ip4 (&t->dst)) - { - rv = VNET_API_ERROR_INVALID_ADDRESS_FAMILY; - goto err; - } - - vnet_main_t *vnm = vnet_get_main (); - vnet_hw_interface_t *hw_if = vnet_get_hw_interface (vnm, hw_if_index); - ip4_main_t *im = &ip4_main; - u32 rx_fib_index = - vec_elt (im->fib_index_by_sw_if_index, hw_if->sw_if_index); - - if (t->encap_fib_index != rx_fib_index) - { - rv = VNET_API_ERROR_NO_SUCH_FIB; - goto err; - } - - if (vnet_vxlan_add_del_rx_flow (hw_if_index, t_index, mp->enable)) - { - rv = VNET_API_ERROR_UNSPECIFIED; - goto err; - } - BAD_SW_IF_INDEX_LABEL; -err: - - REPLY_MACRO (VL_API_VXLAN_OFFLOAD_RX_REPLY); -} - -static void - vl_api_sw_interface_set_vxlan_bypass_t_handler - (vl_api_sw_interface_set_vxlan_bypass_t * mp) -{ - vl_api_sw_interface_set_vxlan_bypass_reply_t *rmp; - int rv = 0; - u32 sw_if_index = ntohl (mp->sw_if_index); - - VALIDATE_SW_IF_INDEX (mp); - - vnet_int_vxlan_bypass_mode (sw_if_index, mp->is_ipv6, mp->enable); - BAD_SW_IF_INDEX_LABEL; - - REPLY_MACRO (VL_API_SW_INTERFACE_SET_VXLAN_BYPASS_REPLY); -} - -static int -vxlan_add_del_tunnel_clean_input (vnet_vxlan_add_del_tunnel_args_t *a, - u32 encap_vrf_id) -{ - a->is_ip6 = !ip46_address_is_ip4 (&a->src); - - a->encap_fib_index = fib_table_find (fib_ip_proto (a->is_ip6), encap_vrf_id); - if (a->encap_fib_index == ~0) - { - return VNET_API_ERROR_NO_SUCH_FIB; - } - - if (ip46_address_is_ip4 (&a->src) != ip46_address_is_ip4 (&a->dst)) - { - return VNET_API_ERROR_INVALID_VALUE; - } - - /* Check src & dst are different */ - if (ip46_address_cmp (&a->dst, &a->src) == 0) - { - return VNET_API_ERROR_SAME_SRC_DST; - } - if (ip46_address_is_multicast (&a->dst) && - !vnet_sw_if_index_is_api_valid (a->mcast_sw_if_index)) - { - return VNET_API_ERROR_INVALID_SW_IF_INDEX; - } - return 0; -} - -static void -vl_api_vxlan_add_del_tunnel_t_handler (vl_api_vxlan_add_del_tunnel_t *mp) -{ - vl_api_vxlan_add_del_tunnel_reply_t *rmp; - u32 sw_if_index = ~0; - int rv = 0; - - vnet_vxlan_add_del_tunnel_args_t a = { - .is_add = mp->is_add, - .instance = ntohl (mp->instance), - .mcast_sw_if_index = ntohl (mp->mcast_sw_if_index), - .decap_next_index = ntohl (mp->decap_next_index), - .vni = ntohl (mp->vni), - }; - ip_address_decode (&mp->src_address, &a.src); - ip_address_decode (&mp->dst_address, &a.dst); - - rv = vxlan_add_del_tunnel_clean_input (&a, ntohl (mp->encap_vrf_id)); - if (rv) - goto out; - a.dst_port = a.is_ip6 ? UDP_DST_PORT_vxlan6 : UDP_DST_PORT_vxlan, - a.src_port = a.is_ip6 ? UDP_DST_PORT_vxlan6 : UDP_DST_PORT_vxlan, - rv = vnet_vxlan_add_del_tunnel (&a, &sw_if_index); - -out: - REPLY_MACRO2(VL_API_VXLAN_ADD_DEL_TUNNEL_REPLY, - ({ - rmp->sw_if_index = ntohl (sw_if_index); - })); -} - -static void -vl_api_vxlan_add_del_tunnel_v2_t_handler (vl_api_vxlan_add_del_tunnel_v2_t *mp) -{ - vl_api_vxlan_add_del_tunnel_v2_reply_t *rmp; - u32 sw_if_index = ~0; - int rv = 0; - - vnet_vxlan_add_del_tunnel_args_t a = { - .is_add = mp->is_add, - .instance = ntohl (mp->instance), - .mcast_sw_if_index = ntohl (mp->mcast_sw_if_index), - .decap_next_index = ntohl (mp->decap_next_index), - .vni = ntohl (mp->vni), - .dst_port = ntohs (mp->dst_port), - .src_port = ntohs (mp->src_port), - }; - - ip_address_decode (&mp->src_address, &a.src); - ip_address_decode (&mp->dst_address, &a.dst); - - rv = vxlan_add_del_tunnel_clean_input (&a, ntohl (mp->encap_vrf_id)); - if (rv) - goto out; - rv = vnet_vxlan_add_del_tunnel (&a, &sw_if_index); -out: - REPLY_MACRO2 (VL_API_VXLAN_ADD_DEL_TUNNEL_V2_REPLY, - ({ rmp->sw_if_index = ntohl (sw_if_index); })); -} - -static void -vl_api_vxlan_add_del_tunnel_v3_t_handler (vl_api_vxlan_add_del_tunnel_v3_t *mp) -{ - vl_api_vxlan_add_del_tunnel_v3_reply_t *rmp; - u32 sw_if_index = ~0; - int rv = 0; - - vnet_vxlan_add_del_tunnel_args_t a = { - .is_add = mp->is_add, - .instance = ntohl (mp->instance), - .mcast_sw_if_index = ntohl (mp->mcast_sw_if_index), - .decap_next_index = ntohl (mp->decap_next_index), - .vni = ntohl (mp->vni), - .dst_port = ntohs (mp->dst_port), - .src_port = ntohs (mp->src_port), - .is_l3 = mp->is_l3, - }; - - ip_address_decode (&mp->src_address, &a.src); - ip_address_decode (&mp->dst_address, &a.dst); - - rv = vxlan_add_del_tunnel_clean_input (&a, ntohl (mp->encap_vrf_id)); - if (rv) - goto out; - rv = vnet_vxlan_add_del_tunnel (&a, &sw_if_index); -out: - REPLY_MACRO2 (VL_API_VXLAN_ADD_DEL_TUNNEL_V3_REPLY, - ({ rmp->sw_if_index = ntohl (sw_if_index); })); -} - -static void send_vxlan_tunnel_details - (vxlan_tunnel_t * t, vl_api_registration_t * reg, u32 context) -{ - vl_api_vxlan_tunnel_details_t *rmp; - ip4_main_t *im4 = &ip4_main; - ip6_main_t *im6 = &ip6_main; - - rmp = vl_msg_api_alloc (sizeof (*rmp)); - clib_memset (rmp, 0, sizeof (*rmp)); - rmp->_vl_msg_id = ntohs (REPLY_MSG_ID_BASE + VL_API_VXLAN_TUNNEL_DETAILS); - - ip_address_encode (&t->src, IP46_TYPE_ANY, &rmp->src_address); - ip_address_encode (&t->dst, IP46_TYPE_ANY, &rmp->dst_address); - - if (ip46_address_is_ip4 (&t->dst)) - rmp->encap_vrf_id = htonl (im4->fibs[t->encap_fib_index].ft_table_id); - else - rmp->encap_vrf_id = htonl (im6->fibs[t->encap_fib_index].ft_table_id); - - rmp->instance = htonl (t->user_instance); - rmp->mcast_sw_if_index = htonl (t->mcast_sw_if_index); - rmp->vni = htonl (t->vni); - rmp->decap_next_index = htonl (t->decap_next_index); - rmp->sw_if_index = htonl (t->sw_if_index); - rmp->context = context; - - vl_api_send_msg (reg, (u8 *) rmp); -} - -static void vl_api_vxlan_tunnel_dump_t_handler - (vl_api_vxlan_tunnel_dump_t * mp) -{ - vl_api_registration_t *reg; - vxlan_main_t *vxm = &vxlan_main; - vxlan_tunnel_t *t; - u32 sw_if_index; - - reg = vl_api_client_index_to_registration (mp->client_index); - if (!reg) - return; - - sw_if_index = ntohl (mp->sw_if_index); - - if (~0 == sw_if_index) - { - pool_foreach (t, vxm->tunnels) - send_vxlan_tunnel_details(t, reg, mp->context); - } - else - { - if ((sw_if_index >= vec_len (vxm->tunnel_index_by_sw_if_index)) || - (~0 == vxm->tunnel_index_by_sw_if_index[sw_if_index])) - { - return; - } - t = &vxm->tunnels[vxm->tunnel_index_by_sw_if_index[sw_if_index]]; - send_vxlan_tunnel_details (t, reg, mp->context); - } -} - -static void -send_vxlan_tunnel_v2_details (vxlan_tunnel_t *t, vl_api_registration_t *reg, - u32 context) -{ - vl_api_vxlan_tunnel_v2_details_t *rmp; - ip4_main_t *im4 = &ip4_main; - ip6_main_t *im6 = &ip6_main; - - rmp = vl_msg_api_alloc (sizeof (*rmp)); - clib_memset (rmp, 0, sizeof (*rmp)); - rmp->_vl_msg_id = ntohs (REPLY_MSG_ID_BASE + VL_API_VXLAN_TUNNEL_V2_DETAILS); - - ip_address_encode (&t->src, IP46_TYPE_ANY, &rmp->src_address); - ip_address_encode (&t->dst, IP46_TYPE_ANY, &rmp->dst_address); - rmp->src_port = htons (t->src_port); - rmp->dst_port = htons (t->dst_port); - - if (ip46_address_is_ip4 (&t->dst)) - rmp->encap_vrf_id = htonl (im4->fibs[t->encap_fib_index].ft_table_id); - else - rmp->encap_vrf_id = htonl (im6->fibs[t->encap_fib_index].ft_table_id); - - rmp->instance = htonl (t->user_instance); - rmp->mcast_sw_if_index = htonl (t->mcast_sw_if_index); - rmp->vni = htonl (t->vni); - rmp->decap_next_index = htonl (t->decap_next_index); - rmp->sw_if_index = htonl (t->sw_if_index); - rmp->context = context; - - vl_api_send_msg (reg, (u8 *) rmp); -} - -static void -vl_api_vxlan_tunnel_v2_dump_t_handler (vl_api_vxlan_tunnel_v2_dump_t *mp) -{ - vl_api_registration_t *reg; - vxlan_main_t *vxm = &vxlan_main; - vxlan_tunnel_t *t; - u32 sw_if_index; - - reg = vl_api_client_index_to_registration (mp->client_index); - if (!reg) - return; - - sw_if_index = ntohl (mp->sw_if_index); - - if (~0 == sw_if_index) - { - pool_foreach (t, vxm->tunnels) - send_vxlan_tunnel_v2_details (t, reg, mp->context); - } - else - { - if ((sw_if_index >= vec_len (vxm->tunnel_index_by_sw_if_index)) || - (~0 == vxm->tunnel_index_by_sw_if_index[sw_if_index])) - { - return; - } - t = &vxm->tunnels[vxm->tunnel_index_by_sw_if_index[sw_if_index]]; - send_vxlan_tunnel_v2_details (t, reg, mp->context); - } -} - -#include <vxlan/vxlan.api.c> -static clib_error_t * -vxlan_api_hookup (vlib_main_t * vm) -{ - api_main_t *am = vlibapi_get_main (); - - vl_api_increase_msg_trace_size (am, VL_API_VXLAN_ADD_DEL_TUNNEL, - 16 * sizeof (u32)); - - /* - * Set up the (msg_name, crc, message-id) table - */ - msg_id_base = setup_message_id_table (); - - return 0; -} - -VLIB_API_INIT_FUNCTION (vxlan_api_hookup); - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/vxlan/vxlan_error.def b/src/vnet/vxlan/vxlan_error.def deleted file mode 100644 index 17f905950f5..00000000000 --- a/src/vnet/vxlan/vxlan_error.def +++ /dev/null @@ -1,17 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -vxlan_error (DECAPSULATED, "good packets decapsulated") -vxlan_error (NO_SUCH_TUNNEL, "no such tunnel packets") -vxlan_error (BAD_FLAGS, "packets with bad flags field in vxlan header") diff --git a/src/vnet/vxlan/vxlan_packet.h b/src/vnet/vxlan/vxlan_packet.h deleted file mode 100644 index d1d1ed813e5..00000000000 --- a/src/vnet/vxlan/vxlan_packet.h +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef __included_vxlan_packet_h__ -#define __included_vxlan_packet_h__ 1 - -/* - * From RFC-7348 - * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - * |R|R|R|R|I|R|R|R| Reserved | - * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - * | VXLAN Network Identifier (VNI) | Reserved | - * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - * - * VXLAN Header: This is an 8-byte field that has: - * - * - Flags (8 bits): where the I flag MUST be set to 1 for a valid - * VXLAN Network ID (VNI). The other 7 bits (designated "R") are - * reserved fields and MUST be set to zero on transmission and - * ignored on receipt. - * - * - VXLAN Segment ID/VXLAN Network Identifier (VNI): this is a - * 24-bit value used to designate the individual VXLAN overlay - * network on which the communicating VMs are situated. VMs in - * different VXLAN overlay networks cannot communicate with each - * other. - * - * - Reserved fields (24 bits and 8 bits): MUST be set to zero on - * transmission and ignored on receipt. - * - */ - -typedef struct -{ - u8 flags; - u8 res1; - u8 res2; - u8 res3; - u32 vni_reserved; -} vxlan_header_t; - -#define VXLAN_FLAGS_I 0x08 - -static inline u32 -vnet_get_vni (vxlan_header_t * h) -{ - u32 vni_reserved_host_byte_order; - - vni_reserved_host_byte_order = clib_net_to_host_u32 (h->vni_reserved); - return vni_reserved_host_byte_order >> 8; -} - -static inline void -vnet_set_vni_and_flags (vxlan_header_t * h, u32 vni) -{ - h->vni_reserved = clib_host_to_net_u32 (vni << 8); - *(u32 *) h = 0; - h->flags = VXLAN_FLAGS_I; -} - -#endif /* __included_vxlan_packet_h__ */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ |