diff options
Diffstat (limited to 'src/vnet/udp')
-rw-r--r-- | src/vnet/udp/udp.api | 2 | ||||
-rw-r--r-- | src/vnet/udp/udp.c | 460 | ||||
-rw-r--r-- | src/vnet/udp/udp.h | 121 | ||||
-rw-r--r-- | src/vnet/udp/udp_api.c | 23 | ||||
-rw-r--r-- | src/vnet/udp/udp_cli.c | 231 | ||||
-rw-r--r-- | src/vnet/udp/udp_encap.c | 57 | ||||
-rw-r--r-- | src/vnet/udp/udp_encap.h | 3 | ||||
-rw-r--r-- | src/vnet/udp/udp_encap_node.c | 138 | ||||
-rw-r--r-- | src/vnet/udp/udp_error.def | 23 | ||||
-rw-r--r-- | src/vnet/udp/udp_inlines.h | 111 | ||||
-rw-r--r-- | src/vnet/udp/udp_input.c | 88 | ||||
-rw-r--r-- | src/vnet/udp/udp_local.c | 131 | ||||
-rw-r--r-- | src/vnet/udp/udp_output.c | 254 |
13 files changed, 1232 insertions, 410 deletions
diff --git a/src/vnet/udp/udp.api b/src/vnet/udp/udp.api index 02176be7c2b..6b468be461a 100644 --- a/src/vnet/udp/udp.api +++ b/src/vnet/udp/udp.api @@ -32,7 +32,7 @@ import "vnet/ip/ip_types.api"; * @param dst_ip - Encap destination address * @param src_ip - Encap source address * @param dst_port - Encap destination port - * @param src_port - Encap source port + * @param src_port - Encap source port, 0 for entopy per rfc7510 * @param id - VPP assigned id; ignored in add message, set in dump */ typedef udp_encap diff --git a/src/vnet/udp/udp.c b/src/vnet/udp/udp.c index 40e0053bb96..9c1121f7cfb 100644 --- a/src/vnet/udp/udp.c +++ b/src/vnet/udp/udp.c @@ -23,97 +23,63 @@ udp_main_t udp_main; static void -udp_connection_register_port (vlib_main_t * vm, u16 lcl_port, u8 is_ip4) +udp_connection_register_port (u16 lcl_port, u8 is_ip4) { udp_main_t *um = &udp_main; - udp_dst_port_info_t *pi; u16 *n; - pi = udp_get_dst_port_info (um, lcl_port, is_ip4); - if (!pi) - { - udp_add_dst_port (um, lcl_port, 0, is_ip4); - pi = udp_get_dst_port_info (um, lcl_port, is_ip4); - pi->n_connections = 1; - } - else - { - pi->n_connections += 1; - /* Do not return. The fact that the pi is valid does not mean - * it's up to date */ - } + /* Setup udp protocol -> next index sparse vector mapping. Do not setup + * udp_dst_port_info_t as that is used to distinguish between external + * and transport consumed ports */ - pi->node_index = is_ip4 ? udp4_input_node.index : udp6_input_node.index; - pi->next_index = um->local_to_input_edge[is_ip4]; - - /* Setup udp protocol -> next index sparse vector mapping. */ if (is_ip4) - n = sparse_vec_validate (um->next_by_dst_port4, - clib_host_to_net_u16 (lcl_port)); + n = sparse_vec_validate (um->next_by_dst_port4, lcl_port); else - n = sparse_vec_validate (um->next_by_dst_port6, - clib_host_to_net_u16 (lcl_port)); + n = sparse_vec_validate (um->next_by_dst_port6, lcl_port); + + n[0] = um->local_to_input_edge[is_ip4]; - n[0] = pi->next_index; + __atomic_add_fetch (&um->transport_ports_refcnt[is_ip4][lcl_port], 1, + __ATOMIC_RELAXED); +} + +void +udp_connection_share_port (u16 lcl_port, u8 is_ip4) +{ + udp_main_t *um = &udp_main; + __atomic_add_fetch (&um->transport_ports_refcnt[is_ip4][lcl_port], 1, + __ATOMIC_RELAXED); } static void udp_connection_unregister_port (u16 lcl_port, u8 is_ip4) { udp_main_t *um = &udp_main; - udp_dst_port_info_t *pi; + u16 *n; - pi = udp_get_dst_port_info (um, lcl_port, is_ip4); - if (!pi) + /* Needed because listeners are not tracked as local endpoints */ + if (__atomic_sub_fetch (&um->transport_ports_refcnt[is_ip4][lcl_port], 1, + __ATOMIC_RELAXED)) return; - if (!pi->n_connections) - { - clib_warning ("no connections using port %u", lcl_port); - return; - } - - if (!clib_atomic_sub_fetch (&pi->n_connections, 1)) - udp_unregister_dst_port (0, lcl_port, is_ip4); -} - -void -udp_connection_share_port (u16 lcl_port, u8 is_ip4) -{ - udp_main_t *um = &udp_main; - udp_dst_port_info_t *pi; + if (is_ip4) + n = sparse_vec_validate (um->next_by_dst_port4, lcl_port); + else + n = sparse_vec_validate (um->next_by_dst_port6, lcl_port); - /* Done without a lock but the operation is atomic. Writers to pi hash - * table and vector should be guarded by a barrier sync */ - pi = udp_get_dst_port_info (um, lcl_port, is_ip4); - clib_atomic_fetch_add_rel (&pi->n_connections, 1); + n[0] = UDP_NO_NODE_SET; } udp_connection_t * udp_connection_alloc (u32 thread_index) { - udp_main_t *um = &udp_main; + udp_worker_t *wrk = udp_worker_get (thread_index); udp_connection_t *uc; - u32 will_expand = 0; - pool_get_aligned_will_expand (um->connections[thread_index], will_expand, - CLIB_CACHE_LINE_BYTES); - if (PREDICT_FALSE (will_expand)) - { - clib_spinlock_lock_if_init (&udp_main.peekers_write_locks - [thread_index]); - pool_get_aligned (udp_main.connections[thread_index], uc, - CLIB_CACHE_LINE_BYTES); - clib_spinlock_unlock_if_init (&udp_main.peekers_write_locks - [thread_index]); - } - else - { - pool_get_aligned (um->connections[thread_index], uc, - CLIB_CACHE_LINE_BYTES); - } + pool_get_aligned_safe (wrk->connections, uc, CLIB_CACHE_LINE_BYTES); + clib_memset (uc, 0, sizeof (*uc)); - uc->c_c_index = uc - um->connections[thread_index]; + uc->c_c_index = uc - wrk->connections; uc->c_thread_index = thread_index; uc->c_proto = TRANSPORT_PROTO_UDP; return uc; @@ -122,20 +88,20 @@ udp_connection_alloc (u32 thread_index) void udp_connection_free (udp_connection_t * uc) { - u32 thread_index = uc->c_thread_index; + udp_worker_t *wrk = udp_worker_get (uc->c_thread_index); + clib_spinlock_free (&uc->rx_lock); if (CLIB_DEBUG) clib_memset (uc, 0xFA, sizeof (*uc)); - pool_put (udp_main.connections[thread_index], uc); + pool_put (wrk->connections, uc); } static void udp_connection_cleanup (udp_connection_t * uc) { - transport_endpoint_cleanup (TRANSPORT_PROTO_UDP, &uc->c_lcl_ip, - uc->c_lcl_port); - udp_connection_unregister_port (clib_net_to_host_u16 (uc->c_lcl_port), - uc->c_is_ip4); + transport_release_local_endpoint (TRANSPORT_PROTO_UDP, &uc->c_lcl_ip, + uc->c_lcl_port); + udp_connection_unregister_port (uc->c_lcl_port, uc->c_is_ip4); udp_connection_free (uc); } @@ -146,6 +112,38 @@ udp_connection_delete (udp_connection_t * uc) udp_connection_cleanup (uc); } +static void +udp_handle_cleanups (void *args) +{ + u32 thread_index = (u32) pointer_to_uword (args); + udp_connection_t *uc; + udp_worker_t *wrk; + u32 *uc_index; + + wrk = udp_worker_get (thread_index); + vec_foreach (uc_index, wrk->pending_cleanups) + { + uc = udp_connection_get (*uc_index, thread_index); + udp_connection_delete (uc); + } + vec_reset_length (wrk->pending_cleanups); +} + +static void +udp_connection_program_cleanup (udp_connection_t *uc) +{ + uword thread_index = uc->c_thread_index; + udp_worker_t *wrk; + + wrk = udp_worker_get (uc->c_thread_index); + vec_add1 (wrk->pending_cleanups, uc->c_c_index); + + if (vec_len (wrk->pending_cleanups) == 1) + session_send_rpc_evt_to_thread_force ( + thread_index, udp_handle_cleanups, + uword_to_pointer (thread_index, void *)); +} + static u8 udp_connection_port_used_extern (u16 lcl_port, u8 is_ip4) { @@ -153,8 +151,7 @@ udp_connection_port_used_extern (u16 lcl_port, u8 is_ip4) udp_dst_port_info_t *pi; pi = udp_get_dst_port_info (um, lcl_port, is_ip4); - return (pi && !pi->n_connections - && udp_is_valid_dst_port (lcl_port, is_ip4)); + return (pi && udp_is_valid_dst_port (lcl_port, is_ip4)); } static u16 @@ -165,18 +162,15 @@ udp_default_mtu (udp_main_t * um, u8 is_ip4) } static u32 -udp_session_bind (u32 session_index, transport_endpoint_t * lcl) +udp_session_bind (u32 session_index, transport_endpoint_cfg_t *lcl) { udp_main_t *um = vnet_get_udp_main (); - vlib_main_t *vm = vlib_get_main (); transport_endpoint_cfg_t *lcl_ext; udp_connection_t *listener; - u16 lcl_port_ho; void *iface_ip; - lcl_port_ho = clib_net_to_host_u16 (lcl->port); - - if (udp_connection_port_used_extern (lcl_port_ho, lcl->is_ip4)) + if (udp_connection_port_used_extern (clib_net_to_host_u16 (lcl->port), + lcl->is_ip4)) { clib_warning ("port already used"); return SESSION_E_PORTINUSE; @@ -200,7 +194,8 @@ udp_session_bind (u32 session_index, transport_endpoint_t * lcl) listener->c_proto = TRANSPORT_PROTO_UDP; listener->c_s_index = session_index; listener->c_fib_index = lcl->fib_index; - listener->mss = udp_default_mtu (um, listener->c_is_ip4); + listener->mss = + lcl->mss ? lcl->mss : udp_default_mtu (um, listener->c_is_ip4); listener->flags |= UDP_CONN_F_OWNS_PORT | UDP_CONN_F_LISTEN; lcl_ext = (transport_endpoint_cfg_t *) lcl; if (lcl_ext->transport_flags & TRANSPORT_CFG_F_CONNECTED) @@ -208,8 +203,10 @@ udp_session_bind (u32 session_index, transport_endpoint_t * lcl) else listener->c_flags |= TRANSPORT_CONNECTION_F_CLESS; clib_spinlock_init (&listener->rx_lock); + if (!um->csum_offload) + listener->cfg_flags |= UDP_CFG_F_NO_CSUM_OFFLOAD; - udp_connection_register_port (vm, lcl_port_ho, lcl->is_ip4); + udp_connection_register_port (listener->c_lcl_port, lcl->is_ip4); return listener->c_c_index; } @@ -220,8 +217,7 @@ udp_session_unbind (u32 listener_index) udp_connection_t *listener; listener = udp_listener_get (listener_index); - udp_connection_unregister_port (clib_net_to_host_u16 (listener->c_lcl_port), - listener->c_is_ip4); + udp_connection_unregister_port (listener->c_lcl_port, listener->c_is_ip4); clib_spinlock_free (&listener->rx_lock); pool_put (um->listener_pool, listener); return 0; @@ -236,30 +232,127 @@ udp_session_get_listener (u32 listener_index) return &us->connection; } +always_inline u16 +udp_compute_checksum (vlib_main_t *vm, vlib_buffer_t *b, u8 csum_offload, + u8 is_ip4) +{ + u16 csum = 0; + + if (csum_offload) + vnet_buffer_offload_flags_set (b, VNET_BUFFER_OFFLOAD_F_UDP_CKSUM); + else + { + if (is_ip4) + csum = + ip4_tcp_udp_compute_checksum (vm, b, vlib_buffer_get_current (b)); + else + { + int bogus = 0; + csum = ip6_tcp_udp_icmp_compute_checksum ( + vm, b, vlib_buffer_get_current (b), &bogus); + } + } + + return csum; +} + +always_inline u32 +udp_push_one_header (vlib_main_t *vm, udp_connection_t *uc, vlib_buffer_t *b, + u8 is_cless) +{ + udp_header_t *uh; + + b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED; + /* reuse tcp medatada for now */ + vnet_buffer (b)->tcp.connection_index = uc->c_c_index; + + if (!is_cless) + { + uh = vlib_buffer_push_udp (b, uc->c_lcl_port, uc->c_rmt_port); + + if (uc->c_is_ip4) + vlib_buffer_push_ip4_custom (vm, b, &uc->c_lcl_ip4, &uc->c_rmt_ip4, + IP_PROTOCOL_UDP, udp_csum_offload (uc), + 0 /* is_df */, uc->c_dscp); + else + vlib_buffer_push_ip6 (vm, b, &uc->c_lcl_ip6, &uc->c_rmt_ip6, + IP_PROTOCOL_UDP); + + vnet_buffer (b)->tcp.flags = 0; + } + else + { + u8 *data = vlib_buffer_get_current (b); + session_dgram_hdr_t hdr; + + hdr = *(session_dgram_hdr_t *) (data - sizeof (hdr)); + + /* Local port assumed to be bound, not overwriting it */ + uh = vlib_buffer_push_udp (b, uc->c_lcl_port, hdr.rmt_port); + + if (uc->c_is_ip4) + vlib_buffer_push_ip4_custom (vm, b, &hdr.lcl_ip.ip4, &hdr.rmt_ip.ip4, + IP_PROTOCOL_UDP, udp_csum_offload (uc), + 0 /* is_df */, uc->c_dscp); + else + vlib_buffer_push_ip6 (vm, b, &hdr.lcl_ip.ip6, &hdr.rmt_ip.ip6, + IP_PROTOCOL_UDP); + + /* Not connected udp session. Mark buffer for custom handling in + * udp_output */ + vnet_buffer (b)->tcp.flags |= UDP_CONN_F_LISTEN; + } + + uh->checksum = + udp_compute_checksum (vm, b, udp_csum_offload (uc), uc->c_is_ip4); + + return 0; +} + +always_inline void +udp_push_header_batch (udp_connection_t *uc, vlib_buffer_t **bs, u32 n_bufs, + u8 is_cless) +{ + vlib_main_t *vm = vlib_get_main (); + + while (n_bufs >= 4) + { + vlib_prefetch_buffer_header (bs[2], STORE); + vlib_prefetch_buffer_header (bs[3], STORE); + + udp_push_one_header (vm, uc, bs[0], is_cless); + udp_push_one_header (vm, uc, bs[1], is_cless); + + n_bufs -= 2; + bs += 2; + } + while (n_bufs) + { + if (n_bufs > 1) + vlib_prefetch_buffer_header (bs[1], STORE); + + udp_push_one_header (vm, uc, bs[0], is_cless); + + n_bufs -= 1; + bs += 1; + } +} + static u32 -udp_push_header (transport_connection_t * tc, vlib_buffer_t * b) +udp_push_header (transport_connection_t *tc, vlib_buffer_t **bs, u32 n_bufs) { udp_connection_t *uc; - vlib_main_t *vm = vlib_get_main (); uc = udp_connection_from_transport (tc); - - vlib_buffer_push_udp (b, uc->c_lcl_port, uc->c_rmt_port, 1); - if (tc->is_ip4) - vlib_buffer_push_ip4_custom (vm, b, &uc->c_lcl_ip4, &uc->c_rmt_ip4, - IP_PROTOCOL_UDP, 1 /* csum offload */ , - 0 /* is_df */ ); + if (uc->flags & UDP_CONN_F_CONNECTED) + udp_push_header_batch (uc, bs, n_bufs, 0 /* is_cless */); else - vlib_buffer_push_ip6 (vm, b, &uc->c_lcl_ip6, &uc->c_rmt_ip6, - IP_PROTOCOL_UDP); - vnet_buffer (b)->sw_if_index[VLIB_RX] = 0; - vnet_buffer (b)->sw_if_index[VLIB_TX] = uc->c_fib_index; - b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED; + udp_push_header_batch (uc, bs, n_bufs, 1 /* is_cless */); if (PREDICT_FALSE (uc->flags & UDP_CONN_F_CLOSING)) { - if (!transport_max_tx_dequeue (&uc->connection)) - udp_connection_delete (uc); + if (!transport_tx_fifo_has_dgram (&uc->connection)) + udp_connection_program_cleanup (uc); } return 0; @@ -281,11 +374,11 @@ udp_session_close (u32 connection_index, u32 thread_index) udp_connection_t *uc; uc = udp_connection_get (connection_index, thread_index); - if (!uc) + if (!uc || (uc->flags & UDP_CONN_F_MIGRATED)) return; - if (!transport_max_tx_dequeue (&uc->connection)) - udp_connection_delete (uc); + if (!transport_tx_fifo_has_dgram (&uc->connection)) + udp_connection_program_cleanup (uc); else uc->flags |= UDP_CONN_F_CLOSING; } @@ -323,57 +416,42 @@ udp_session_send_params (transport_connection_t * tconn, static int udp_open_connection (transport_endpoint_cfg_t * rmt) { - vlib_main_t *vm = vlib_get_main (); - u32 thread_index = vm->thread_index; udp_main_t *um = &udp_main; ip46_address_t lcl_addr; udp_connection_t *uc; + u32 thread_index; u16 lcl_port; int rv; rv = transport_alloc_local_endpoint (TRANSPORT_PROTO_UDP, rmt, &lcl_addr, &lcl_port); if (rv) - { - if (rv != SESSION_E_PORTINUSE) - return rv; - - if (udp_connection_port_used_extern (lcl_port, rmt->is_ip4)) - return SESSION_E_PORTINUSE; - - /* If port in use, check if 5-tuple is also in use */ - if (session_lookup_connection (rmt->fib_index, &lcl_addr, &rmt->ip, - lcl_port, rmt->port, TRANSPORT_PROTO_UDP, - rmt->is_ip4)) - return SESSION_E_PORTINUSE; - - /* 5-tuple is available so increase lcl endpoint refcount and proceed - * with connection allocation */ - transport_share_local_endpoint (TRANSPORT_PROTO_UDP, &lcl_addr, - lcl_port); - goto conn_alloc; - } + return rv; - if (udp_is_valid_dst_port (lcl_port, rmt->is_ip4)) + if (udp_connection_port_used_extern (clib_net_to_host_u16 (lcl_port), + rmt->is_ip4)) { /* If specific source port was requested abort */ if (rmt->peer.port) - return SESSION_E_PORTINUSE; + { + transport_release_local_endpoint (TRANSPORT_PROTO_UDP, &lcl_addr, + lcl_port); + return SESSION_E_PORTINUSE; + } /* Try to find a port that's not used */ - while (udp_is_valid_dst_port (lcl_port, rmt->is_ip4)) + while (udp_connection_port_used_extern (clib_net_to_host_u16 (lcl_port), + rmt->is_ip4)) { - lcl_port = transport_alloc_local_port (TRANSPORT_PROTO_UDP, - &lcl_addr); - if (lcl_port < 1) + transport_release_local_endpoint (TRANSPORT_PROTO_UDP, &lcl_addr, + lcl_port); + lcl_port = + transport_alloc_local_port (TRANSPORT_PROTO_UDP, &lcl_addr, rmt); + if ((int) lcl_port < 1) return SESSION_E_PORTINUSE; } } -conn_alloc: - - udp_connection_register_port (vm, lcl_port, rmt->is_ip4); - /* We don't poll main thread if we have workers */ thread_index = transport_cl_thread (); @@ -381,11 +459,14 @@ conn_alloc: ip_copy (&uc->c_rmt_ip, &rmt->ip, rmt->is_ip4); ip_copy (&uc->c_lcl_ip, &lcl_addr, rmt->is_ip4); uc->c_rmt_port = rmt->port; - uc->c_lcl_port = clib_host_to_net_u16 (lcl_port); + uc->c_lcl_port = lcl_port; uc->c_is_ip4 = rmt->is_ip4; uc->c_proto = TRANSPORT_PROTO_UDP; uc->c_fib_index = rmt->fib_index; + uc->c_dscp = rmt->dscp; uc->mss = rmt->mss ? rmt->mss : udp_default_mtu (um, uc->c_is_ip4); + if (rmt->peer.sw_if_index != ENDPOINT_INVALID_INDEX) + uc->sw_if_index = rmt->peer.sw_if_index; uc->flags |= UDP_CONN_F_OWNS_PORT; if (rmt->transport_flags & TRANSPORT_CFG_F_CONNECTED) { @@ -396,6 +477,12 @@ conn_alloc: clib_spinlock_init (&uc->rx_lock); uc->c_flags |= TRANSPORT_CONNECTION_F_CLESS; } + if (!um->csum_offload) + uc->cfg_flags |= UDP_CFG_F_NO_CSUM_OFFLOAD; + uc->next_node_index = rmt->next_node_index; + uc->next_node_opaque = rmt->next_node_opaque; + + udp_connection_register_port (uc->c_lcl_port, rmt->is_ip4); return uc->c_c_index; } @@ -445,8 +532,90 @@ format_udp_listener_session (u8 * s, va_list * args) return format (s, "%U", format_udp_connection, uc, verbose); } -/* *INDENT-OFF* */ +static void +udp_realloc_ports_sv (u16 **ports_nh_svp) +{ + u16 port, port_no, *ports_nh_sv, *mc; + u32 *ports = 0, *nh = 0, msum, i; + sparse_vec_header_t *h; + uword sv_index, *mb; + + ports_nh_sv = *ports_nh_svp; + + for (port = 1; port < 65535; port++) + { + port_no = clib_host_to_net_u16 (port); + + sv_index = sparse_vec_index (ports_nh_sv, port_no); + if (sv_index != SPARSE_VEC_INVALID_INDEX) + { + vec_add1 (ports, port_no); + vec_add1 (nh, ports_nh_sv[sv_index]); + } + } + + sparse_vec_free (ports_nh_sv); + + ports_nh_sv = + sparse_vec_new (/* elt bytes */ sizeof (ports_nh_sv[0]), + /* bits in index */ BITS (((udp_header_t *) 0)->dst_port)); + + vec_resize (ports_nh_sv, 65535); + + for (port = 1; port < 65535; port++) + ports_nh_sv[port] = UDP_NO_NODE_SET; + + for (i = 0; i < vec_len (ports); i++) + ports_nh_sv[ports[i]] = nh[i]; + + h = sparse_vec_header (ports_nh_sv); + vec_foreach (mb, h->is_member_bitmap) + *mb = (uword) ~0; + + msum = 0; + vec_foreach (mc, h->member_counts) + { + *mc = msum; + msum += msum == 0 ? 63 : 64; + } + + vec_free (ports); + vec_free (nh); + + *ports_nh_svp = ports_nh_sv; +} + +static clib_error_t * +udp_enable_disable (vlib_main_t *vm, u8 is_en) +{ + udp_main_t *um = &udp_main; + + /* Not ideal. The sparse vector used to map ports to next nodes assumes + * only a few ports are ever used. When udp transport is enabled this does + * not hold and, to make matters worse, ports are consumed in a random + * order. + * + * This can lead to a lot of slow updates to internal data structures + * which in turn can slow udp connection allocations until all ports are + * eventually consumed. + * + * Consequently, reallocate sparse vector, preallocate all ports and have + * them point to UDP_NO_NODE_SET. We could consider switching the sparse + * vector to a preallocated vector but that would increase memory + * consumption for vpp deployments that do not rely on host stack. + */ + + udp_realloc_ports_sv (&um->next_by_dst_port4); + udp_realloc_ports_sv (&um->next_by_dst_port6); + + vec_validate (um->transport_ports_refcnt[0], 65535); + vec_validate (um->transport_ports_refcnt[1], 65535); + + return 0; +} + static const transport_proto_vft_t udp_proto = { + .enable = udp_enable_disable, .start_listen = udp_session_bind, .connect = udp_open_connection, .stop_listen = udp_session_unbind, @@ -467,7 +636,6 @@ static const transport_proto_vft_t udp_proto = { .service_type = TRANSPORT_SERVICE_CL, }, }; -/* *INDENT-ON* */ static clib_error_t * udp_init (vlib_main_t * vm) @@ -477,7 +645,6 @@ udp_init (vlib_main_t * vm) vlib_thread_main_t *tm = vlib_get_thread_main (); u32 num_threads; ip_protocol_info_t *pi; - int i; /* * Registrations @@ -490,28 +657,18 @@ udp_init (vlib_main_t * vm) pi->format_header = format_udp_header; pi->unformat_pg_edit = unformat_pg_udp_header; - /* Register as transport with URI */ + /* Register as transport with session layer */ transport_register_protocol (TRANSPORT_PROTO_UDP, &udp_proto, - FIB_PROTOCOL_IP4, ip4_lookup_node.index); + FIB_PROTOCOL_IP4, udp4_output_node.index); transport_register_protocol (TRANSPORT_PROTO_UDP, &udp_proto, - FIB_PROTOCOL_IP6, ip6_lookup_node.index); + FIB_PROTOCOL_IP6, udp6_output_node.index); /* * Initialize data structures */ num_threads = 1 /* main thread */ + tm->n_threads; - vec_validate (um->connections, num_threads - 1); - vec_validate (um->connection_peekers, num_threads - 1); - vec_validate (um->peekers_readers_locks, num_threads - 1); - vec_validate (um->peekers_write_locks, num_threads - 1); - - if (num_threads > 1) - for (i = 0; i < num_threads; i++) - { - clib_spinlock_init (&um->peekers_readers_locks[i]); - clib_spinlock_init (&um->peekers_write_locks[i]); - } + vec_validate (um->wrk, num_threads - 1); um->local_to_input_edge[UDP_IP4] = vlib_node_add_next (vm, udp4_local_node.index, udp4_input_node.index); @@ -519,16 +676,15 @@ udp_init (vlib_main_t * vm) vlib_node_add_next (vm, udp6_local_node.index, udp6_input_node.index); um->default_mtu = 1500; + um->csum_offload = 1; return 0; } -/* *INDENT-OFF* */ VLIB_INIT_FUNCTION (udp_init) = { .runs_after = VLIB_INITS("ip_main_init", "ip4_lookup_init", "ip6_lookup_init"), }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/udp/udp.h b/src/vnet/udp/udp.h index 89539e58c6d..8e4e87f85a8 100644 --- a/src/vnet/udp/udp.h +++ b/src/vnet/udp/udp.h @@ -25,9 +25,11 @@ #include <vnet/ip/ip.h> #include <vnet/session/transport.h> +#define UDP_NO_NODE_SET ((u16) ~0) + typedef enum { -#define udp_error(n,s) UDP_ERROR_##n, +#define udp_error(f, n, s, d) UDP_ERROR_##f, #include <vnet/udp/udp_error.def> #undef udp_error UDP_N_ERROR, @@ -55,6 +57,24 @@ typedef enum udp_conn_flags_ #undef _ } udp_conn_flags_t; +#define foreach_udp_cfg_flag _ (NO_CSUM_OFFLOAD, "no-csum-offload") + +typedef enum udp_cfg_flag_bits_ +{ +#define _(sym, str) UDP_CFG_F_##sym##_BIT, + foreach_udp_cfg_flag +#undef _ + UDP_CFG_N_FLAG_BITS +} udp_cfg_flag_bits_e; + +typedef enum udp_cfg_flag_ +{ +#define _(sym, str) UDP_CFG_F_##sym = 1 << UDP_CFG_F_##sym##_BIT, + foreach_udp_cfg_flag +#undef _ + UDP_CFG_N_FLAGS +} __clib_packed udp_cfg_flags_t; + typedef struct { /** Required for pool_get_aligned */ @@ -62,9 +82,15 @@ typedef struct transport_connection_t connection; /**< must be first */ clib_spinlock_t rx_lock; /**< rx fifo lock */ u8 flags; /**< connection flags */ + udp_cfg_flags_t cfg_flags; /**< configuration flags */ u16 mss; /**< connection mss */ + u32 sw_if_index; /**< connection sw_if_index */ + u32 next_node_index; /**< Can be used to control next node in output */ + u32 next_node_opaque; /**< Opaque to pass to next node */ } udp_connection_t; +#define udp_csum_offload(uc) (!((uc)->cfg_flags & UDP_CFG_F_NO_CSUM_OFFLOAD)) + typedef struct { /* Name (a c string). */ @@ -79,9 +105,6 @@ typedef struct /* Next index for this type. */ u32 next_index; - /* UDP sessions refcount (not tunnels) */ - u32 n_connections; - /* Parser for packet generator edits for this protocol */ unformat_function_t *unformat_pg_edit; } udp_dst_port_info_t; @@ -93,6 +116,12 @@ typedef enum N_UDP_AF, } udp_af_t; +typedef struct udp_worker_ +{ + udp_connection_t *connections; + u32 *pending_cleanups; +} udp_worker_t; + typedef struct { udp_dst_port_info_t *dst_port_infos[N_UDP_AF]; @@ -112,16 +141,19 @@ typedef struct u32 local_to_input_edge[N_UDP_AF]; /* - * Per-worker thread udp connection pools used with session layer + * UDP transport layer per-thread context */ - udp_connection_t **connections; - u32 *connection_peekers; - clib_spinlock_t *peekers_readers_locks; - clib_spinlock_t *peekers_write_locks; + + udp_worker_t *wrk; udp_connection_t *listener_pool; + /* Refcounts for ports consumed by udp transports to handle + * both passive and active opens using the same port */ + u16 *transport_ports_refcnt[N_UDP_AF]; + u16 default_mtu; u16 msg_id_base; + u8 csum_offload; u8 icmp_send_unreachable_disabled; } udp_main_t; @@ -131,16 +163,26 @@ extern vlib_node_registration_t udp4_input_node; extern vlib_node_registration_t udp6_input_node; extern vlib_node_registration_t udp4_local_node; extern vlib_node_registration_t udp6_local_node; +extern vlib_node_registration_t udp4_output_node; +extern vlib_node_registration_t udp6_output_node; void udp_add_dst_port (udp_main_t * um, udp_dst_port_t dst_port, char *dst_port_name, u8 is_ip4); +always_inline udp_worker_t * +udp_worker_get (u32 thread_index) +{ + return vec_elt_at_index (udp_main.wrk, thread_index); +} + always_inline udp_connection_t * udp_connection_get (u32 conn_index, u32 thread_index) { - if (pool_is_free_index (udp_main.connections[thread_index], conn_index)) + udp_worker_t *wrk = udp_worker_get (thread_index); + + if (pool_is_free_index (wrk->connections, conn_index)) return 0; - return pool_elt_at_index (udp_main.connections[thread_index], conn_index); + return pool_elt_at_index (wrk->connections, conn_index); } always_inline udp_connection_t * @@ -161,65 +203,24 @@ udp_connection_from_transport (transport_connection_t * tc) return ((udp_connection_t *) tc); } -always_inline u32 -udp_connection_index (udp_connection_t * uc) -{ - return (uc - udp_main.connections[uc->c_thread_index]); -} - void udp_connection_free (udp_connection_t * uc); udp_connection_t *udp_connection_alloc (u32 thread_index); - -/** - * Acquires a lock that blocks a connection pool from expanding. - */ -always_inline void -udp_pool_add_peeker (u32 thread_index) -{ - if (thread_index != vlib_get_thread_index ()) - return; - clib_spinlock_lock_if_init (&udp_main.peekers_readers_locks[thread_index]); - udp_main.connection_peekers[thread_index] += 1; - if (udp_main.connection_peekers[thread_index] == 1) - clib_spinlock_lock_if_init (&udp_main.peekers_write_locks[thread_index]); - clib_spinlock_unlock_if_init (&udp_main.peekers_readers_locks - [thread_index]); -} - -always_inline void -udp_pool_remove_peeker (u32 thread_index) -{ - if (thread_index != vlib_get_thread_index ()) - return; - ASSERT (udp_main.connection_peekers[thread_index] > 0); - clib_spinlock_lock_if_init (&udp_main.peekers_readers_locks[thread_index]); - udp_main.connection_peekers[thread_index] -= 1; - if (udp_main.connection_peekers[thread_index] == 0) - clib_spinlock_unlock_if_init (&udp_main.peekers_write_locks - [thread_index]); - clib_spinlock_unlock_if_init (&udp_main.peekers_readers_locks - [thread_index]); -} +void udp_connection_share_port (u16 lcl_port, u8 is_ip4); always_inline udp_connection_t * udp_connection_clone_safe (u32 connection_index, u32 thread_index) { + u32 current_thread_index = vlib_get_thread_index (), new_index; udp_connection_t *old_c, *new_c; - u32 current_thread_index = vlib_get_thread_index (); - new_c = udp_connection_alloc (current_thread_index); - /* If during the memcpy pool is reallocated AND the memory allocator - * decides to give the old chunk of memory to somebody in a hurry to - * scribble something on it, we have a problem. So add this thread as - * a session pool peeker. - */ - udp_pool_add_peeker (thread_index); - old_c = udp_main.connections[thread_index] + connection_index; + new_c = udp_connection_alloc (current_thread_index); + new_index = new_c->c_c_index; + /* Connection pool always realloced with barrier */ + old_c = udp_main.wrk[thread_index].connections + connection_index; clib_memcpy_fast (new_c, old_c, sizeof (*new_c)); old_c->flags |= UDP_CONN_F_MIGRATED; - udp_pool_remove_peeker (thread_index); new_c->c_thread_index = current_thread_index; - new_c->c_c_index = udp_connection_index (new_c); + new_c->c_c_index = new_index; new_c->c_fib_index = old_c->c_fib_index; /* Assume cloned sessions don't need lock */ new_c->rx_lock = 0; @@ -239,8 +240,6 @@ format_function_t format_udp_connection; unformat_function_t unformat_udp_header; unformat_function_t unformat_udp_port; -void udp_connection_share_port (u16 lcl_port, u8 is_ip4); - void udp_punt_unknown (vlib_main_t * vm, u8 is_ip4, u8 is_add); /* diff --git a/src/vnet/udp/udp_api.c b/src/vnet/udp/udp_api.c index 0f2d014946f..1f952aa36ea 100644 --- a/src/vnet/udp/udp_api.c +++ b/src/vnet/udp/udp_api.c @@ -86,12 +86,10 @@ vl_api_udp_encap_dump_t_handler (vl_api_udp_encap_dump_t *mp) if (!reg) return; - /* *INDENT-OFF* */ pool_foreach (ue, udp_encap_pool) { send_udp_encap_details(ue, reg, mp->context); } - /* *INDENT-ON* */ } static void @@ -99,6 +97,7 @@ vl_api_udp_encap_add_t_handler (vl_api_udp_encap_add_t *mp) { vl_api_udp_encap_add_reply_t *rmp; ip46_address_t src_ip, dst_ip; + udp_encap_fixup_flags_t flags; u32 fib_index, table_id; fib_protocol_t fproto; ip46_type_t itype; @@ -119,19 +118,19 @@ vl_api_udp_encap_add_t_handler (vl_api_udp_encap_add_t *mp) goto done; } - uei = udp_encap_add_and_lock (fproto, fib_index, - &src_ip, &dst_ip, + flags = UDP_ENCAP_FIXUP_NONE; + if (mp->udp_encap.src_port == 0) + flags |= UDP_ENCAP_FIXUP_UDP_SRC_PORT_ENTROPY; + + uei = udp_encap_add_and_lock (fproto, fib_index, &src_ip, &dst_ip, ntohs (mp->udp_encap.src_port), - ntohs (mp->udp_encap.dst_port), - UDP_ENCAP_FIXUP_NONE); + ntohs (mp->udp_encap.dst_port), flags); done: - /* *INDENT-OFF* */ REPLY_MACRO2 (VL_API_UDP_ENCAP_ADD_REPLY, ({ rmp->id = ntohl (uei); })); - /* *INDENT-ON* */ } @@ -189,11 +188,19 @@ vl_api_udp_decap_add_del_t_handler (vl_api_udp_decap_add_del_t *mp) static clib_error_t * udp_api_hookup (vlib_main_t * vm) { + api_main_t *am = vlibapi_get_main (); + /* * Set up the (msg_name, crc, message-id) table */ REPLY_MSG_ID_BASE = setup_message_id_table (); + /* Mark these APIs as mp safe */ + vl_api_set_msg_thread_safe (am, REPLY_MSG_ID_BASE + VL_API_UDP_ENCAP_ADD, 1); + vl_api_set_msg_thread_safe (am, REPLY_MSG_ID_BASE + VL_API_UDP_ENCAP_DEL, 1); + vl_api_set_msg_thread_safe (am, REPLY_MSG_ID_BASE + VL_API_UDP_ENCAP_DUMP, + 1); + return 0; } diff --git a/src/vnet/udp/udp_cli.c b/src/vnet/udp/udp_cli.c index 97760f4c4f8..6c8992cd0de 100644 --- a/src/vnet/udp/udp_cli.c +++ b/src/vnet/udp/udp_cli.c @@ -13,6 +13,9 @@ * limitations under the License. */ +#include <vppinfra/error.h> +#include <vppinfra/format.h> +#include <vppinfra/format_table.h> #include <vnet/udp/udp.h> #include <vnet/session/session_types.h> @@ -35,6 +38,33 @@ format_udp_connection_id (u8 * s, va_list * args) return s; } +static const char *udp_cfg_flags_str[] = { +#define _(sym, str) str, + foreach_udp_cfg_flag +#undef _ +}; + +static u8 * +format_udp_cfg_flags (u8 *s, va_list *args) +{ + udp_connection_t *tc = va_arg (*args, udp_connection_t *); + int i, last = -1; + + for (i = 0; i < UDP_CFG_N_FLAG_BITS; i++) + if (tc->cfg_flags & (1 << i)) + last = i; + if (last >= 0) + s = format (s, " cfg: "); + for (i = 0; i < last; i++) + { + if (tc->cfg_flags & (1 << i)) + s = format (s, "%s, ", udp_cfg_flags_str[i]); + } + if (last >= 0) + s = format (s, "%s", udp_cfg_flags_str[last]); + return s; +} + static const char *udp_connection_flags_str[] = { #define _(sym, str) str, foreach_udp_connection_flag @@ -64,11 +94,15 @@ static u8 * format_udp_vars (u8 * s, va_list * args) { udp_connection_t *uc = va_arg (*args, udp_connection_t *); - s = format (s, " index %u flags: %U", uc->c_c_index, - format_udp_connection_flags, uc); + s = format (s, " index %u%U flags: %U\n", uc->c_c_index, + format_udp_cfg_flags, uc, format_udp_connection_flags, uc); + s = format (s, " fib_index: %u next_node: %u opaque: %u ", uc->c_fib_index); if (!(uc->flags & UDP_CONN_F_LISTEN)) + s = format (s, " sw_if_index: %d mss: %u\n", uc->sw_if_index, uc->mss); + else s = format (s, "\n"); + return s; } @@ -102,6 +136,8 @@ udp_config_fn (vlib_main_t * vm, unformat_input_t * input) um->default_mtu = tmp; else if (unformat (input, "icmp-unreachable-disabled")) um->icmp_send_unreachable_disabled = 1; + else if (unformat (input, "no-csum-offload")) + um->csum_offload = 0; else return clib_error_return (0, "unknown input `%U'", format_unformat_error, input); @@ -151,7 +187,7 @@ show_udp_punt_fn (vlib_main_t * vm, unformat_input_t * input, u8 *s = NULL; vec_foreach (port_info, um->dst_port_infos[UDP_IP6]) { - if (udp_is_valid_dst_port (port_info->dst_port, 01)) + if (udp_is_valid_dst_port (port_info->dst_port, 0)) { s = format (s, (!s) ? "%d" : ", %d", port_info->dst_port); } @@ -162,14 +198,199 @@ show_udp_punt_fn (vlib_main_t * vm, unformat_input_t * input, return (error); } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_tcp_punt_command, static) = { .path = "show udp punt", .short_help = "show udp punt [ipv4|ipv6]", .function = show_udp_punt_fn, }; -/* *INDENT-ON* */ + +static void +table_format_udp_port_ (vlib_main_t *vm, udp_main_t *um, table_t *t, int *c, + int port, int bind, int is_ip4) +{ + const udp_dst_port_info_t *pi; + + if (bind && !udp_is_valid_dst_port (port, is_ip4)) + return; + + pi = udp_get_dst_port_info (um, port, is_ip4); + if (!pi) + return; + + table_format_cell (t, *c, 0, "%d", pi->dst_port); + table_format_cell (t, *c, 1, is_ip4 ? "ip4" : "ip6"); + table_format_cell (t, *c, 2, ~0 == pi->node_index ? "none" : "%U", + format_vlib_node_name, vm, pi->node_index); + table_format_cell (t, *c, 3, "%s", pi->name); + + (*c)++; +} + +static void +table_format_udp_port (vlib_main_t *vm, udp_main_t *um, table_t *t, int *c, + int port, int bind, int ip4, int ip6) +{ + if (ip4) + table_format_udp_port_ (vm, um, t, c, port, bind, 1 /* is_ip4 */); + if (ip6) + table_format_udp_port_ (vm, um, t, c, port, bind, 0 /* is_ip4 */); +} + +static clib_error_t * +show_udp_ports (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + table_t table = {}, *t = &table; + udp_main_t *um = &udp_main; + clib_error_t *err = 0; + int ip4 = 1, ip6 = 1; + int port = -1; + int bind = 1; + int c = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "ip4")) + ip6 = 0; + else if (unformat (input, "ip6")) + ip4 = 0; + else if (unformat (input, "bind")) + bind = 1; + else if (unformat (input, "all")) + bind = 0; + else if (unformat (input, "%d", &port)) + ; + else + { + err = clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + goto out; + } + } + + table_add_header_col (t, 4, "port", "proto", "node", "desc"); + + if (port > 65535) + { + err = clib_error_return (0, "wrong port %d", port); + goto out; + } + else if (port < 0) + { + for (port = 0; port < 65536; port++) + table_format_udp_port (vm, um, t, &c, port, bind, ip4, ip6); + } + else + { + table_format_udp_port (vm, um, t, &c, port, bind, ip4, ip6); + } + + vlib_cli_output (vm, "%U", format_table, t); + +out: + table_free (t); + return err; +} + +VLIB_CLI_COMMAND (show_udp_ports_cmd, static) = { + .path = "show udp ports", + .function = show_udp_ports, + .short_help = "show udp ports [ip4|ip6] [bind|all|<port>]", + .is_mp_safe = 1, +}; + +static void +table_format_udp_transport_port_ (vlib_main_t *vm, table_t *t, int *c, + int port, int is_ip4) +{ + udp_main_t *um = &udp_main; + u32 refcnt; + u16 port_ne; + + port_ne = clib_host_to_net_u16 (port); + refcnt = um->transport_ports_refcnt[is_ip4][port_ne]; + if (!refcnt) + return; + + if (!udp_is_valid_dst_port (port, is_ip4)) + { + clib_warning ("Port %u is not registered refcnt %u!", port, refcnt); + return; + } + + table_format_cell (t, *c, 0, "%d", port); + table_format_cell (t, *c, 1, is_ip4 ? "ip4" : "ip6"); + table_format_cell (t, *c, 2, "%d", refcnt); + + (*c)++; +} + +static void +table_format_udp_transport_port (vlib_main_t *vm, table_t *t, int *c, int port, + int ipv) +{ + if (ipv == -1 || ipv == 0) + table_format_udp_transport_port_ (vm, t, c, port, 1 /* is_ip4 */); + if (ipv == -1 || ipv == 1) + table_format_udp_transport_port_ (vm, t, c, port, 0 /* is_ip4 */); +} + +static clib_error_t * +show_udp_transport_ports (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + table_t table = {}, *t = &table; + int ipv = -1, port = -1, c = 0; + clib_error_t *err = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "ip4")) + ipv = 0; + else if (unformat (input, "ip6")) + ipv = 1; + else if (unformat (input, "%d", &port)) + ; + else + { + err = clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + goto out; + } + } + + table_add_header_col (t, 3, "port", "proto", "ref-cnt"); + + if (port > 65535) + { + err = clib_error_return (0, "wrong port %d", port); + goto out; + } + + if (port < 0) + { + for (port = 0; port < 65536; port++) + table_format_udp_transport_port (vm, t, &c, port, ipv); + } + else + { + table_format_udp_transport_port (vm, t, &c, port, ipv); + } + + vlib_cli_output (vm, "%U\n", format_table, t); + +out: + table_free (t); + return err; +} + +VLIB_CLI_COMMAND (show_udp_transport_ports_cmd, static) = { + .path = "show udp transport ports", + .function = show_udp_transport_ports, + .short_help = "show udp transport ports [ip4|ip6] [<port>]", + .is_mp_safe = 1, +}; /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/udp/udp_encap.c b/src/vnet/udp/udp_encap.c index cb93adb8d39..e4e5271da63 100644 --- a/src/vnet/udp/udp_encap.c +++ b/src/vnet/udp/udp_encap.c @@ -47,8 +47,7 @@ static void udp_encap_restack (udp_encap_t * ue) { dpo_stack (udp_encap_dpo_types[ue->ue_ip_proto], - fib_proto_to_dpo (ue->ue_ip_proto), - &ue->ue_dpo, + fib_proto_to_dpo (ue->ue_ip_proto), &ue->ue_dpo, fib_entry_contribute_ip_forwarding (ue->ue_fib_entry_index)); } @@ -196,6 +195,20 @@ udp_encap_dpo_unlock (dpo_id_t * dpo) fib_node_unlock (&ue->ue_fib_node); } +u8 * +format_udp_encap_fixup_flags (u8 *s, va_list *args) +{ + udp_encap_fixup_flags_t flags = va_arg (*args, udp_encap_fixup_flags_t); + + if (flags == UDP_ENCAP_FIXUP_NONE) + return format (s, "none"); + + if (flags & UDP_ENCAP_FIXUP_UDP_SRC_PORT_ENTROPY) + s = format (s, "%s", "src-port-is-entropy"); + + return (s); +} + static u8 * format_udp_encap_i (u8 * s, va_list * args) { @@ -211,23 +224,21 @@ format_udp_encap_i (u8 * s, va_list * args) s = format (s, "udp-encap:[%d]: ip-fib-index:%d ", uei, ue->ue_fib_index); if (FIB_PROTOCOL_IP4 == ue->ue_ip_proto) { - s = format (s, "ip:[src:%U, dst:%U] udp:[src:%d, dst:%d]", - format_ip4_address, - &ue->ue_hdrs.ip4.ue_ip4.src_address, - format_ip4_address, - &ue->ue_hdrs.ip4.ue_ip4.dst_address, + s = format (s, "ip:[src:%U, dst:%U] udp:[src:%d, dst:%d] flags:%U", + format_ip4_address, &ue->ue_hdrs.ip4.ue_ip4.src_address, + format_ip4_address, &ue->ue_hdrs.ip4.ue_ip4.dst_address, clib_net_to_host_u16 (ue->ue_hdrs.ip4.ue_udp.src_port), - clib_net_to_host_u16 (ue->ue_hdrs.ip4.ue_udp.dst_port)); + clib_net_to_host_u16 (ue->ue_hdrs.ip4.ue_udp.dst_port), + format_udp_encap_fixup_flags, ue->ue_flags); } else { - s = format (s, "ip:[src:%U, dst:%U] udp:[src:%d dst:%d]", - format_ip6_address, - &ue->ue_hdrs.ip6.ue_ip6.src_address, - format_ip6_address, - &ue->ue_hdrs.ip6.ue_ip6.dst_address, + s = format (s, "ip:[src:%U, dst:%U] udp:[src:%d dst:%d] flags:%U", + format_ip6_address, &ue->ue_hdrs.ip6.ue_ip6.src_address, + format_ip6_address, &ue->ue_hdrs.ip6.ue_ip6.dst_address, clib_net_to_host_u16 (ue->ue_hdrs.ip6.ue_udp.src_port), - clib_net_to_host_u16 (ue->ue_hdrs.ip6.ue_udp.dst_port)); + clib_net_to_host_u16 (ue->ue_hdrs.ip6.ue_udp.dst_port), + format_udp_encap_fixup_flags, ue->ue_flags); } vlib_get_combined_counter (&(udp_encap_counters), uei, &to); s = format (s, " to:[%Ld:%Ld]]", to.packets, to.bytes); @@ -325,12 +336,12 @@ udp_encap_fib_last_lock_gone (fib_node_t * node) } const static char *const udp4_encap_ip4_nodes[] = { - "udp4-encap", + "udp4o4-encap", NULL, }; const static char *const udp4_encap_ip6_nodes[] = { - "udp4-encap", + "udp6o4-encap", NULL, }; @@ -345,12 +356,12 @@ const static char *const udp4_encap_bier_nodes[] = { }; const static char *const udp6_encap_ip4_nodes[] = { - "udp6-encap", + "udp4o6-encap", NULL, }; const static char *const udp6_encap_ip6_nodes[] = { - "udp6-encap", + "udp6o6-encap", NULL, }; @@ -507,13 +518,11 @@ udp_encap_walk (udp_encap_walk_cb_t cb, void *ctx) { index_t uei; - /* *INDENT-OFF* */ pool_foreach_index (uei, udp_encap_pool) { if (WALK_STOP == cb(uei, ctx)) break; } - /* *INDENT-ON* */ } clib_error_t * @@ -536,12 +545,10 @@ udp_encap_show (vlib_main_t * vm, if (INDEX_INVALID == uei) { - /* *INDENT-OFF* */ pool_foreach_index (uei, udp_encap_pool) { vlib_cli_output(vm, "%U", format_udp_encap, uei, 0); } - /* *INDENT-ON* */ } else { @@ -551,20 +558,20 @@ udp_encap_show (vlib_main_t * vm, return NULL; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (udp_encap_add_command, static) = { .path = "udp encap", - .short_help = "udp encap [add|del] <id ID> <src-ip> <dst-ip> [<src-port>] <dst-port> [src-port-is-entropy] [table-id <table>]", + .short_help = "udp encap [add|del] <id ID> <src-ip> <dst-ip> [<src-port>] " + "<dst-port> [src-port-is-entropy] [table-id <table>]", .function = udp_encap_cli, .is_mp_safe = 1, }; + VLIB_CLI_COMMAND (udp_encap_show_command, static) = { .path = "show udp encap", .short_help = "show udp encap [ID]", .function = udp_encap_show, .is_mp_safe = 1, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/udp/udp_encap.h b/src/vnet/udp/udp_encap.h index b096e0f5c09..c8b42ffa92c 100644 --- a/src/vnet/udp/udp_encap.h +++ b/src/vnet/udp/udp_encap.h @@ -85,7 +85,7 @@ typedef struct udp_encap_t_ /** * The second cacheline contains control-plane data */ - CLIB_CACHE_LINE_ALIGN_MARK (cacheline1); + CLIB_CACHE_LINE_ALIGN_MARK (cacheline1); /** * linkage into the FIB graph @@ -115,6 +115,7 @@ extern index_t udp_encap_add_and_lock (fib_protocol_t proto, extern void udp_encap_lock (index_t uei); extern void udp_encap_unlock (index_t uei); extern u8 *format_udp_encap (u8 * s, va_list * args); +extern u8 *format_udp_encap_fixup_flags (u8 *s, va_list *args); extern void udp_encap_contribute_forwarding (index_t uei, dpo_proto_t proto, dpo_id_t * dpo); diff --git a/src/vnet/udp/udp_encap_node.c b/src/vnet/udp/udp_encap_node.c index 5b9fc0bf34b..a86614f5475 100644 --- a/src/vnet/udp/udp_encap_node.c +++ b/src/vnet/udp/udp_encap_node.c @@ -20,12 +20,16 @@ typedef struct udp4_encap_trace_t_ { udp_header_t udp; ip4_header_t ip; + u32 flow_hash; + udp_encap_fixup_flags_t flags; } udp4_encap_trace_t; typedef struct udp6_encap_trace_t_ { udp_header_t udp; ip6_header_t ip; + u32 flow_hash; + udp_encap_fixup_flags_t flags; } udp6_encap_trace_t; extern vlib_combined_counter_main_t udp_encap_counters; @@ -35,13 +39,16 @@ format_udp4_encap_trace (u8 * s, va_list * args) { CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + u32 indent = format_get_indent (s); udp4_encap_trace_t *t; t = va_arg (*args, udp4_encap_trace_t *); - s = format (s, "%U\n %U", - format_ip4_header, &t->ip, sizeof (t->ip), - format_udp_header, &t->udp, sizeof (t->udp)); + s = format (s, "flags: %U, flow hash: 0x%08x\n%U%U\n%U%U", + format_udp_encap_fixup_flags, t->flags, t->flow_hash, + format_white_space, indent, format_ip4_header, &t->ip, + sizeof (t->ip), format_white_space, indent, format_udp_header, + &t->udp, sizeof (t->udp)); return (s); } @@ -50,20 +57,23 @@ format_udp6_encap_trace (u8 * s, va_list * args) { CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + u32 indent = format_get_indent (s); udp6_encap_trace_t *t; t = va_arg (*args, udp6_encap_trace_t *); - s = format (s, "%U\n %U", - format_ip6_header, &t->ip, sizeof (t->ip), - format_udp_header, &t->udp, sizeof (t->udp)); + s = format (s, "flags: %U, flow hash: 0x%08x\n%U%U\n%U%U", + format_udp_encap_fixup_flags, t->flags, t->flow_hash, + format_white_space, indent, format_ip6_header, &t->ip, + sizeof (t->ip), format_white_space, indent, format_udp_header, + &t->udp, sizeof (t->udp)); return (s); } always_inline uword -udp_encap_inline (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame, int is_encap_v6) +udp_encap_inline (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame, ip_address_family_t encap_family, + ip_address_family_t payload_family) { vlib_combined_counter_main_t *cm = &udp_encap_counters; u32 *from = vlib_frame_vector_args (frame); @@ -121,18 +131,22 @@ udp_encap_inline (vlib_main_t * vm, ue1 = udp_encap_get (uei1); /* Paint */ - if (is_encap_v6) + if (encap_family == AF_IP6) { const u8 n_bytes = sizeof (udp_header_t) + sizeof (ip6_header_t); - ip_udp_encap_two (vm, b0, b1, (u8 *) & ue0->ue_hdrs, - (u8 *) & ue1->ue_hdrs, n_bytes, 0); + ip_udp_encap_two (vm, b0, b1, (u8 *) &ue0->ue_hdrs, + (u8 *) &ue1->ue_hdrs, n_bytes, encap_family, + payload_family, ue0->ue_flags, ue1->ue_flags); + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) { udp6_encap_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof (*tr)); tr->udp = ue0->ue_hdrs.ip6.ue_udp; tr->ip = ue0->ue_hdrs.ip6.ue_ip6; + tr->flags = ue0->ue_flags; + tr->flow_hash = vnet_buffer (b0)->ip.flow_hash; } if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED)) { @@ -140,6 +154,8 @@ udp_encap_inline (vlib_main_t * vm, vlib_add_trace (vm, node, b1, sizeof (*tr)); tr->udp = ue1->ue_hdrs.ip6.ue_udp; tr->ip = ue1->ue_hdrs.ip6.ue_ip6; + tr->flags = ue1->ue_flags; + tr->flow_hash = vnet_buffer (b1)->ip.flow_hash; } } else @@ -147,9 +163,9 @@ udp_encap_inline (vlib_main_t * vm, const u8 n_bytes = sizeof (udp_header_t) + sizeof (ip4_header_t); - ip_udp_encap_two (vm, b0, b1, - (u8 *) & ue0->ue_hdrs, - (u8 *) & ue1->ue_hdrs, n_bytes, 1); + ip_udp_encap_two (vm, b0, b1, (u8 *) &ue0->ue_hdrs, + (u8 *) &ue1->ue_hdrs, n_bytes, encap_family, + payload_family, ue0->ue_flags, ue1->ue_flags); if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) { @@ -157,6 +173,8 @@ udp_encap_inline (vlib_main_t * vm, vlib_add_trace (vm, node, b0, sizeof (*tr)); tr->udp = ue0->ue_hdrs.ip4.ue_udp; tr->ip = ue0->ue_hdrs.ip4.ue_ip4; + tr->flags = ue0->ue_flags; + tr->flow_hash = vnet_buffer (b0)->ip.flow_hash; } if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED)) { @@ -164,6 +182,8 @@ udp_encap_inline (vlib_main_t * vm, vlib_add_trace (vm, node, b1, sizeof (*tr)); tr->udp = ue1->ue_hdrs.ip4.ue_udp; tr->ip = ue1->ue_hdrs.ip4.ue_ip4; + tr->flags = ue1->ue_flags; + tr->flow_hash = vnet_buffer (b1)->ip.flow_hash; } } @@ -202,12 +222,12 @@ udp_encap_inline (vlib_main_t * vm, b0)); /* Paint */ - if (is_encap_v6) + if (encap_family == AF_IP6) { const u8 n_bytes = sizeof (udp_header_t) + sizeof (ip6_header_t); - ip_udp_encap_one (vm, b0, (u8 *) & ue0->ue_hdrs.ip6, n_bytes, - 0); + ip_udp_encap_one (vm, b0, (u8 *) &ue0->ue_hdrs.ip6, n_bytes, + encap_family, payload_family, ue0->ue_flags); if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) { @@ -215,6 +235,8 @@ udp_encap_inline (vlib_main_t * vm, vlib_add_trace (vm, node, b0, sizeof (*tr)); tr->udp = ue0->ue_hdrs.ip6.ue_udp; tr->ip = ue0->ue_hdrs.ip6.ue_ip6; + tr->flags = ue0->ue_flags; + tr->flow_hash = vnet_buffer (b0)->ip.flow_hash; } } else @@ -222,8 +244,8 @@ udp_encap_inline (vlib_main_t * vm, const u8 n_bytes = sizeof (udp_header_t) + sizeof (ip4_header_t); - ip_udp_encap_one (vm, b0, (u8 *) & ue0->ue_hdrs.ip4, n_bytes, - 1); + ip_udp_encap_one (vm, b0, (u8 *) &ue0->ue_hdrs.ip4, n_bytes, + encap_family, payload_family, ue0->ue_flags); if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) { @@ -231,6 +253,8 @@ udp_encap_inline (vlib_main_t * vm, vlib_add_trace (vm, node, b0, sizeof (*tr)); tr->udp = ue0->ue_hdrs.ip4.ue_udp; tr->ip = ue0->ue_hdrs.ip4.ue_ip4; + tr->flags = ue0->ue_flags; + tr->flow_hash = vnet_buffer (b0)->ip.flow_hash; } } @@ -248,39 +272,87 @@ udp_encap_inline (vlib_main_t * vm, return frame->n_vectors; } -VLIB_NODE_FN (udp4_encap_node) (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) +VLIB_NODE_FN (udp4o4_encap_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + return udp_encap_inline (vm, node, frame, AF_IP4, AF_IP4); +} + +VLIB_NODE_FN (udp6o4_encap_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + return udp_encap_inline (vm, node, frame, AF_IP4, AF_IP6); +} + +VLIB_NODE_FN (udp4_encap_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + return udp_encap_inline (vm, node, frame, AF_IP4, N_AF); +} + +VLIB_NODE_FN (udp6o6_encap_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) { - return udp_encap_inline (vm, node, frame, 0); + return udp_encap_inline (vm, node, frame, AF_IP6, AF_IP6); } -VLIB_NODE_FN (udp6_encap_node) (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) +VLIB_NODE_FN (udp4o6_encap_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) { - return udp_encap_inline (vm, node, frame, 1); + return udp_encap_inline (vm, node, frame, AF_IP6, AF_IP4); } -/* *INDENT-OFF* */ +VLIB_NODE_FN (udp6_encap_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + return udp_encap_inline (vm, node, frame, AF_IP6, N_AF); +} + +VLIB_REGISTER_NODE (udp4o4_encap_node) = { + .name = "udp4o4-encap", + .vector_size = sizeof (u32), + .format_trace = format_udp4_encap_trace, + .n_next_nodes = 0, +}; + +VLIB_REGISTER_NODE (udp6o4_encap_node) = { + .name = "udp6o4-encap", + .vector_size = sizeof (u32), + .format_trace = format_udp4_encap_trace, + .n_next_nodes = 0, + .sibling_of = "udp4o4-encap", +}; + VLIB_REGISTER_NODE (udp4_encap_node) = { .name = "udp4-encap", .vector_size = sizeof (u32), - .format_trace = format_udp4_encap_trace, + .n_next_nodes = 0, + .sibling_of = "udp4o4-encap", +}; +VLIB_REGISTER_NODE (udp6o6_encap_node) = { + .name = "udp6o6-encap", + .vector_size = sizeof (u32), + .format_trace = format_udp6_encap_trace, + .n_next_nodes = 0, +}; + +VLIB_REGISTER_NODE (udp4o6_encap_node) = { + .name = "udp4o6-encap", + .vector_size = sizeof (u32), + .format_trace = format_udp6_encap_trace, .n_next_nodes = 0, + .sibling_of = "udp6o6-encap", }; VLIB_REGISTER_NODE (udp6_encap_node) = { .name = "udp6-encap", .vector_size = sizeof (u32), - .format_trace = format_udp6_encap_trace, - .n_next_nodes = 0, + .sibling_of = "udp6o6-encap", }; -/* *INDENT-ON* */ /* diff --git a/src/vnet/udp/udp_error.def b/src/vnet/udp/udp_error.def index 776d94a8ec1..ef19970ce72 100644 --- a/src/vnet/udp/udp_error.def +++ b/src/vnet/udp/udp_error.def @@ -15,13 +15,16 @@ * limitations under the License. */ -udp_error (NONE, "No error") -udp_error (NO_LISTENER, "No listener for dst port") -udp_error (LENGTH_ERROR, "Packets with length errors") -udp_error (PUNT, "No listener punt") -udp_error (ENQUEUED, "Packets enqueued") -udp_error (FIFO_FULL, "Fifo full") -udp_error (NOT_READY, "Connection not ready") -udp_error (ACCEPT, "Accepted session") -udp_error (CREATE_SESSION, "Failed to create session") -udp_error (MQ_FULL, "Application msg queue full") +udp_error (NONE, none, INFO, "No error") +udp_error (NO_LISTENER, no_listener, ERROR, "No listener for dst port") +udp_error (LENGTH_ERROR, length_error, ERROR, "Packets with length errors") +udp_error (PUNT, punt, ERROR, "No listener punt") +udp_error (ENQUEUED, enqueued, INFO, "Packets enqueued") +udp_error (FIFO_FULL, fifo_full, ERROR, "Fifo full") +udp_error (FIFO_NOMEM, fifo_nomem, ERROR, "Fifo no mem") +udp_error (NOT_READY, not_ready, ERROR, "Connection not ready") +udp_error (ACCEPT, accept, INFO, "Accepted session") +udp_error (CREATE_SESSION, create_session, ERROR, "Failed to create session") +udp_error (MQ_FULL, mq_full, ERROR, "Application msg queue full") +udp_error (INVALID_CONNECTION, invalid_connection, ERROR, "Invalid connection") +udp_error (PKTS_SENT, pkts_sent, INFO, "Packets sent") diff --git a/src/vnet/udp/udp_inlines.h b/src/vnet/udp/udp_inlines.h index e4eb0c88e83..ceec0b191b1 100644 --- a/src/vnet/udp/udp_inlines.h +++ b/src/vnet/udp/udp_inlines.h @@ -21,9 +21,12 @@ #include <vnet/ip/ip6.h> #include <vnet/udp/udp_packet.h> #include <vnet/interface_output.h> +#include <vnet/ip/ip4_inlines.h> +#include <vnet/ip/ip6_inlines.h> +#include <vnet/udp/udp_encap.h> always_inline void * -vlib_buffer_push_udp (vlib_buffer_t * b, u16 sp, u16 dp, u8 offload_csum) +vlib_buffer_push_udp (vlib_buffer_t *b, u16 sp, u16 dp) { udp_header_t *uh; u16 udp_len = sizeof (udp_header_t) + b->current_length; @@ -35,15 +38,44 @@ vlib_buffer_push_udp (vlib_buffer_t * b, u16 sp, u16 dp, u8 offload_csum) uh->dst_port = dp; uh->checksum = 0; uh->length = clib_host_to_net_u16 (udp_len); - if (offload_csum) - vnet_buffer_offload_flags_set (b, VNET_BUFFER_OFFLOAD_F_UDP_CKSUM); vnet_buffer (b)->l4_hdr_offset = (u8 *) uh - b->data; b->flags |= VNET_BUFFER_F_L4_HDR_OFFSET_VALID; return uh; } +/* + * Encode udp source port entropy value per + * https://datatracker.ietf.org/doc/html/rfc7510#section-3 + */ +always_inline u16 +ip_udp_sport_entropy (vlib_buffer_t *b0) +{ + u16 port = clib_host_to_net_u16 (0x03 << 14); + port |= vnet_buffer (b0)->ip.flow_hash & 0xffff; + return port; +} + +always_inline u32 +ip_udp_compute_flow_hash (vlib_buffer_t *b0, u8 is_ip4) +{ + ip4_header_t *ip4; + ip6_header_t *ip6; + + if (is_ip4) + { + ip4 = (ip4_header_t *) (b0->data + vnet_buffer (b0)->l3_hdr_offset); + return ip4_compute_flow_hash (ip4, IP_FLOW_HASH_DEFAULT); + } + else + { + ip6 = (ip6_header_t *) (b0->data + vnet_buffer (b0)->l3_hdr_offset); + return ip6_compute_flow_hash (ip6, IP_FLOW_HASH_DEFAULT); + } +} + always_inline void -ip_udp_fixup_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 is_ip4) +ip_udp_fixup_one (vlib_main_t *vm, vlib_buffer_t *b0, u8 is_ip4, + u8 sport_entropy) { u16 new_l0; udp_header_t *udp0; @@ -71,6 +103,9 @@ ip_udp_fixup_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 is_ip4) new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) - sizeof (*ip0)); udp0->length = new_l0; + + if (sport_entropy) + udp0->src_port = ip_udp_sport_entropy (b0); } else { @@ -87,6 +122,9 @@ ip_udp_fixup_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 is_ip4) udp0 = (udp_header_t *) (ip0 + 1); udp0->length = new_l0; + if (sport_entropy) + udp0->src_port = ip_udp_sport_entropy (b0); + udp0->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b0, ip0, &bogus0); ASSERT (bogus0 == 0); @@ -97,14 +135,27 @@ ip_udp_fixup_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 is_ip4) } always_inline void -ip_udp_encap_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 * ec0, word ec_len, - u8 is_ip4) +ip_udp_encap_one (vlib_main_t *vm, vlib_buffer_t *b0, u8 *ec0, word ec_len, + ip_address_family_t encap_family, + ip_address_family_t payload_family, + udp_encap_fixup_flags_t flags) { - vnet_calc_checksums_inline (vm, b0, is_ip4, !is_ip4); + u8 sport_entropy = (flags & UDP_ENCAP_FIXUP_UDP_SRC_PORT_ENTROPY) != 0; + + if (payload_family < N_AF) + { + vnet_calc_checksums_inline (vm, b0, payload_family == AF_IP4, + payload_family == AF_IP6); + + /* Сalculate flow hash to be used for entropy */ + if (sport_entropy && 0 == vnet_buffer (b0)->ip.flow_hash) + vnet_buffer (b0)->ip.flow_hash = + ip_udp_compute_flow_hash (b0, payload_family == AF_IP4); + } vlib_buffer_advance (b0, -ec_len); - if (is_ip4) + if (encap_family == AF_IP4) { ip4_header_t *ip0; @@ -112,7 +163,7 @@ ip_udp_encap_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 * ec0, word ec_len, /* Apply the encap string. */ clib_memcpy_fast (ip0, ec0, ec_len); - ip_udp_fixup_one (vm, b0, 1); + ip_udp_fixup_one (vm, b0, 1, sport_entropy); } else { @@ -122,26 +173,42 @@ ip_udp_encap_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 * ec0, word ec_len, /* Apply the encap string. */ clib_memcpy_fast (ip0, ec0, ec_len); - ip_udp_fixup_one (vm, b0, 0); + ip_udp_fixup_one (vm, b0, 0, sport_entropy); } } always_inline void -ip_udp_encap_two (vlib_main_t * vm, vlib_buffer_t * b0, vlib_buffer_t * b1, - u8 * ec0, u8 * ec1, word ec_len, u8 is_v4) +ip_udp_encap_two (vlib_main_t *vm, vlib_buffer_t *b0, vlib_buffer_t *b1, + u8 *ec0, u8 *ec1, word ec_len, + ip_address_family_t encap_family, + ip_address_family_t payload_family, + udp_encap_fixup_flags_t flags0, + udp_encap_fixup_flags_t flags1) { u16 new_l0, new_l1; udp_header_t *udp0, *udp1; + int payload_ip4 = (payload_family == AF_IP4); + int sport_entropy0 = (flags0 & UDP_ENCAP_FIXUP_UDP_SRC_PORT_ENTROPY) != 0; + int sport_entropy1 = (flags1 & UDP_ENCAP_FIXUP_UDP_SRC_PORT_ENTROPY) != 0; - ASSERT (_vec_len (ec0) == _vec_len (ec1)); - - vnet_calc_checksums_inline (vm, b0, is_v4, !is_v4); - vnet_calc_checksums_inline (vm, b1, is_v4, !is_v4); + if (payload_family < N_AF) + { + vnet_calc_checksums_inline (vm, b0, payload_ip4, !payload_ip4); + vnet_calc_checksums_inline (vm, b1, payload_ip4, !payload_ip4); + + /* Сalculate flow hash to be used for entropy */ + if (sport_entropy0 && 0 == vnet_buffer (b0)->ip.flow_hash) + vnet_buffer (b0)->ip.flow_hash = + ip_udp_compute_flow_hash (b0, payload_ip4); + if (sport_entropy1 && 0 == vnet_buffer (b1)->ip.flow_hash) + vnet_buffer (b1)->ip.flow_hash = + ip_udp_compute_flow_hash (b1, payload_ip4); + } vlib_buffer_advance (b0, -ec_len); vlib_buffer_advance (b1, -ec_len); - if (is_v4) + if (encap_family == AF_IP4) { ip4_header_t *ip0, *ip1; ip_csum_t sum0, sum1; @@ -185,6 +252,11 @@ ip_udp_encap_two (vlib_main_t * vm, vlib_buffer_t * b0, vlib_buffer_t * b1, sizeof (*ip1)); udp0->length = new_l0; udp1->length = new_l1; + + if (sport_entropy0) + udp0->src_port = ip_udp_sport_entropy (b0); + if (sport_entropy1) + udp1->src_port = ip_udp_sport_entropy (b1); } else { @@ -212,6 +284,11 @@ ip_udp_encap_two (vlib_main_t * vm, vlib_buffer_t * b0, vlib_buffer_t * b1, udp0->length = new_l0; udp1->length = new_l1; + if (sport_entropy0) + udp0->src_port = ip_udp_sport_entropy (b0); + if (sport_entropy1) + udp1->src_port = ip_udp_sport_entropy (b1); + udp0->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b0, ip0, &bogus0); udp1->checksum = diff --git a/src/vnet/udp/udp_input.c b/src/vnet/udp/udp_input.c index c76c1b796bd..a90461186c1 100644 --- a/src/vnet/udp/udp_input.c +++ b/src/vnet/udp/udp_input.c @@ -26,8 +26,8 @@ #include <vnet/udp/udp_packet.h> #include <vnet/session/session.h> -static char *udp_error_strings[] = { -#define udp_error(n,s) s, +static vlib_error_desc_t udp_error_counters[] = { +#define udp_error(f, n, s, d) { #n, d, VL_COUNTER_SEVERITY_##s }, #include "udp_error.def" #undef udp_error }; @@ -115,6 +115,7 @@ udp_connection_accept (udp_connection_t * listener, session_dgram_hdr_t * hdr, uc->c_fib_index = listener->c_fib_index; uc->mss = listener->mss; uc->flags |= UDP_CONN_F_CONNECTED; + uc->cfg_flags = listener->cfg_flags; if (session_dgram_accept (&uc->connection, listener->c_s_index, listener->c_thread_index)) @@ -122,8 +123,8 @@ udp_connection_accept (udp_connection_t * listener, session_dgram_hdr_t * hdr, udp_connection_free (uc); return 0; } - udp_connection_share_port (clib_net_to_host_u16 - (uc->c_lcl_port), uc->c_is_ip4); + + udp_connection_share_port (uc->c_lcl_port, uc->c_is_ip4); return uc; } @@ -135,37 +136,46 @@ udp_connection_enqueue (udp_connection_t * uc0, session_t * s0, int wrote0; if (!(uc0->flags & UDP_CONN_F_CONNECTED)) - clib_spinlock_lock (&uc0->rx_lock); + { + clib_spinlock_lock (&uc0->rx_lock); + + wrote0 = session_enqueue_dgram_connection_cl ( + s0, hdr0, b, TRANSPORT_PROTO_UDP, queue_event); + + clib_spinlock_unlock (&uc0->rx_lock); + + /* Expect cl udp enqueue to fail because fifo enqueue */ + if (PREDICT_FALSE (wrote0 == 0)) + *error0 = UDP_ERROR_FIFO_FULL; + + return; + } if (svm_fifo_max_enqueue_prod (s0->rx_fifo) < hdr0->data_length + sizeof (session_dgram_hdr_t)) { *error0 = UDP_ERROR_FIFO_FULL; - goto unlock_rx_lock; + return; } /* If session is owned by another thread and rx event needed, * enqueue event now while we still have the peeker lock */ if (s0->thread_index != thread_index) { - wrote0 = session_enqueue_dgram_connection (s0, hdr0, b, - TRANSPORT_PROTO_UDP, - /* queue event */ 0); - if (queue_event && !svm_fifo_has_event (s0->rx_fifo)) - session_enqueue_notify (s0); + wrote0 = session_enqueue_dgram_connection2 ( + s0, hdr0, b, TRANSPORT_PROTO_UDP, + queue_event && !svm_fifo_has_event (s0->rx_fifo)); } else { - wrote0 = session_enqueue_dgram_connection (s0, hdr0, b, - TRANSPORT_PROTO_UDP, - queue_event); + wrote0 = session_enqueue_dgram_connection ( + s0, hdr0, b, TRANSPORT_PROTO_UDP, queue_event); } - ASSERT (wrote0 > 0); - -unlock_rx_lock: - if (!(uc0->flags & UDP_CONN_F_CONNECTED)) - clib_spinlock_unlock (&uc0->rx_lock); + /* In some rare cases, session_enqueue_dgram_connection can fail because a + * chunk cannot be allocated in the RX FIFO */ + if (PREDICT_FALSE (wrote0 == 0)) + *error0 = UDP_ERROR_FIFO_NOMEM; } always_inline session_t * @@ -184,6 +194,7 @@ udp_parse_and_lookup_buffer (vlib_buffer_t * b, session_dgram_hdr_t * hdr, hdr->lcl_port = udp->dst_port; hdr->rmt_port = udp->src_port; hdr->is_ip4 = is_ip4; + hdr->gso_size = 0; if (is_ip4) { @@ -213,6 +224,10 @@ udp_parse_and_lookup_buffer (vlib_buffer_t * b, session_dgram_hdr_t * hdr, udp->src_port, TRANSPORT_PROTO_UDP); } + /* Set the sw_if_index[VLIB_RX] to the interface we received + * the connection on (the local interface) */ + vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->ip.rx_sw_if_index; + if (PREDICT_TRUE (!(b->flags & VLIB_BUFFER_NEXT_PRESENT))) b->current_length = hdr->data_length; else @@ -226,10 +241,9 @@ always_inline uword udp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame, u8 is_ip4) { - u32 n_left_from, *from, errors, *first_buffer; + u32 thread_index = vm->thread_index, n_left_from, *from, *first_buffer; vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b; u16 err_counters[UDP_N_ERROR] = { 0 }; - u32 thread_index = vm->thread_index; from = first_buffer = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; @@ -251,15 +265,11 @@ udp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, goto done; } - /* - * If session exists pool peeker lock is taken at this point unless - * the session is already on the right thread or is a listener - */ - if (s0->session_state == SESSION_STATE_OPENED) { u8 queue_event = 1; uc0 = udp_connection_from_transport (session_get_transport (s0)); + uc0->sw_if_index = vnet_buffer (b[0])->sw_if_index[VLIB_RX]; if (uc0->flags & UDP_CONN_F_CONNECTED) { if (s0->thread_index != thread_index) @@ -273,10 +283,8 @@ udp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, ASSERT (s0->session_index == uc0->c_s_index); /* - * Drop the peeker lock on pool resize and ask session - * layer for a new session. + * Ask session layer for a new session. */ - session_pool_remove_peeker (s0->thread_index); session_dgram_connect_notify (&uc0->connection, s0->thread_index, &s0); queue_event = 0; @@ -286,9 +294,9 @@ udp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, } udp_connection_enqueue (uc0, s0, &hdr0, thread_index, b[0], queue_event, &error0); - session_pool_remove_peeker (s0->thread_index); } - else if (s0->session_state == SESSION_STATE_READY) + else if (s0->session_state == SESSION_STATE_READY || + s0->session_state == SESSION_STATE_ACCEPTING) { uc0 = udp_connection_from_transport (session_get_transport (s0)); udp_connection_enqueue (uc0, s0, &hdr0, thread_index, b[0], 1, @@ -306,6 +314,7 @@ udp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, goto done; } s0 = session_get (uc0->c_s_index, uc0->c_thread_index); + uc0->sw_if_index = vnet_buffer (b[0])->sw_if_index[VLIB_RX]; error0 = UDP_ERROR_ACCEPT; } udp_connection_enqueue (uc0, s0, &hdr0, thread_index, b[0], 1, @@ -314,7 +323,6 @@ udp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, else { error0 = UDP_ERROR_NOT_READY; - session_pool_remove_peeker (s0->thread_index); } done: @@ -328,9 +336,7 @@ udp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, } vlib_buffer_free (vm, first_buffer, frame->n_vectors); - errors = session_main_flush_enqueue_events (TRANSPORT_PROTO_UDP, - thread_index); - err_counters[UDP_ERROR_MQ_FULL] = errors; + session_main_flush_enqueue_events (TRANSPORT_PROTO_UDP, thread_index); udp_store_err_counters (vm, is_ip4, err_counters); return frame->n_vectors; } @@ -342,7 +348,6 @@ udp4_input (vlib_main_t * vm, vlib_node_runtime_t * node, return udp46_input_inline (vm, node, frame, 1); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (udp4_input_node) = { .function = udp4_input, @@ -350,8 +355,8 @@ VLIB_REGISTER_NODE (udp4_input_node) = .vector_size = sizeof (u32), .format_trace = format_udp_input_trace, .type = VLIB_NODE_TYPE_INTERNAL, - .n_errors = ARRAY_LEN (udp_error_strings), - .error_strings = udp_error_strings, + .n_errors = UDP_N_ERROR, + .error_counters = udp_error_counters, .n_next_nodes = UDP_INPUT_N_NEXT, .next_nodes = { #define _(s, n) [UDP_INPUT_NEXT_##s] = n, @@ -359,7 +364,6 @@ VLIB_REGISTER_NODE (udp4_input_node) = #undef _ }, }; -/* *INDENT-ON* */ static uword udp6_input (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -368,7 +372,6 @@ udp6_input (vlib_main_t * vm, vlib_node_runtime_t * node, return udp46_input_inline (vm, node, frame, 0); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (udp6_input_node) = { .function = udp6_input, @@ -376,8 +379,8 @@ VLIB_REGISTER_NODE (udp6_input_node) = .vector_size = sizeof (u32), .format_trace = format_udp_input_trace, .type = VLIB_NODE_TYPE_INTERNAL, - .n_errors = ARRAY_LEN (udp_error_strings), - .error_strings = udp_error_strings, + .n_errors = UDP_N_ERROR, + .error_counters = udp_error_counters, .n_next_nodes = UDP_INPUT_N_NEXT, .next_nodes = { #define _(s, n) [UDP_INPUT_NEXT_##s] = n, @@ -385,7 +388,6 @@ VLIB_REGISTER_NODE (udp6_input_node) = #undef _ }, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/udp/udp_local.c b/src/vnet/udp/udp_local.c index edfec3359c3..6531b73cd11 100644 --- a/src/vnet/udp/udp_local.c +++ b/src/vnet/udp/udp_local.c @@ -36,7 +36,11 @@ typedef struct u8 bound; } udp_local_rx_trace_t; -#define UDP_NO_NODE_SET ((u16) ~0) +static vlib_error_desc_t udp_error_counters[] = { +#define udp_error(f, n, s, d) { #n, d, VL_COUNTER_SEVERITY_##s }, +#include "udp_error.def" +#undef udp_error +}; #ifndef CLIB_MARCH_VARIANT u8 * @@ -121,9 +125,8 @@ udp46_local_inline (vlib_main_t * vm, u32 bi0, bi1; vlib_buffer_t *b0, *b1; udp_header_t *h0 = 0, *h1 = 0; - u32 i0, i1, dst_port0, dst_port1; + u32 i0, i1, next0, next1; u32 advance0, advance1; - u32 error0, next0, error1, next1; /* Prefetch next iteration. */ { @@ -165,72 +168,106 @@ udp46_local_inline (vlib_main_t * vm, if (PREDICT_FALSE (b0->current_length < advance0 + sizeof (*h0))) { - error0 = UDP_ERROR_LENGTH_ERROR; + b0->error = node->errors[UDP_ERROR_LENGTH_ERROR]; next0 = UDP_LOCAL_NEXT_DROP; } else { vlib_buffer_advance (b0, advance0); h0 = vlib_buffer_get_current (b0); - error0 = UDP_ERROR_NONE; next0 = UDP_LOCAL_NEXT_PUNT; if (PREDICT_FALSE (clib_net_to_host_u16 (h0->length) > vlib_buffer_length_in_chain (vm, b0))) { - error0 = UDP_ERROR_LENGTH_ERROR; + b0->error = node->errors[UDP_ERROR_LENGTH_ERROR]; next0 = UDP_LOCAL_NEXT_DROP; } } if (PREDICT_FALSE (b1->current_length < advance1 + sizeof (*h1))) { - error1 = UDP_ERROR_LENGTH_ERROR; + b1->error = node->errors[UDP_ERROR_LENGTH_ERROR]; next1 = UDP_LOCAL_NEXT_DROP; } else { vlib_buffer_advance (b1, advance1); h1 = vlib_buffer_get_current (b1); - error1 = UDP_ERROR_NONE; next1 = UDP_LOCAL_NEXT_PUNT; if (PREDICT_FALSE (clib_net_to_host_u16 (h1->length) > vlib_buffer_length_in_chain (vm, b1))) { - error1 = UDP_ERROR_LENGTH_ERROR; + b1->error = node->errors[UDP_ERROR_LENGTH_ERROR]; next1 = UDP_LOCAL_NEXT_DROP; } } /* Index sparse array with network byte order. */ - dst_port0 = (error0 == 0) ? h0->dst_port : 0; - dst_port1 = (error1 == 0) ? h1->dst_port : 0; - sparse_vec_index2 (next_by_dst_port, dst_port0, dst_port1, &i0, - &i1); - next0 = (error0 == 0) ? vec_elt (next_by_dst_port, i0) : next0; - next1 = (error1 == 0) ? vec_elt (next_by_dst_port, i1) : next1; - - if (PREDICT_FALSE (i0 == SPARSE_VEC_INVALID_INDEX || - next0 == UDP_NO_NODE_SET)) + if (PREDICT_TRUE (next0 == UDP_LOCAL_NEXT_PUNT && + next1 == UDP_LOCAL_NEXT_PUNT)) { - udp_dispatch_error (node, b0, advance0, is_ip4, &next0); + sparse_vec_index2 (next_by_dst_port, h0->dst_port, h1->dst_port, + &i0, &i1); + next0 = vec_elt (next_by_dst_port, i0); + next1 = vec_elt (next_by_dst_port, i1); + + if (PREDICT_FALSE (i0 == SPARSE_VEC_INVALID_INDEX || + next0 == UDP_NO_NODE_SET)) + { + udp_dispatch_error (node, b0, advance0, is_ip4, &next0); + } + else + { + b0->error = node->errors[UDP_ERROR_NONE]; + // advance to the payload + vlib_buffer_advance (b0, sizeof (*h0)); + } + + if (PREDICT_FALSE (i1 == SPARSE_VEC_INVALID_INDEX || + next1 == UDP_NO_NODE_SET)) + { + udp_dispatch_error (node, b1, advance1, is_ip4, &next1); + } + else + { + b1->error = node->errors[UDP_ERROR_NONE]; + // advance to the payload + vlib_buffer_advance (b1, sizeof (*h1)); + } } - else + else if (next0 == UDP_LOCAL_NEXT_PUNT) { - b0->error = node->errors[UDP_ERROR_NONE]; - // advance to the payload - vlib_buffer_advance (b0, sizeof (*h0)); - } + i0 = sparse_vec_index (next_by_dst_port, h0->dst_port); + next0 = vec_elt (next_by_dst_port, i0); - if (PREDICT_FALSE (i1 == SPARSE_VEC_INVALID_INDEX || - next1 == UDP_NO_NODE_SET)) - { - udp_dispatch_error (node, b1, advance1, is_ip4, &next1); + if (PREDICT_FALSE (i0 == SPARSE_VEC_INVALID_INDEX || + next0 == UDP_NO_NODE_SET)) + { + udp_dispatch_error (node, b0, advance0, is_ip4, &next0); + } + else + { + b0->error = node->errors[UDP_ERROR_NONE]; + // advance to the payload + vlib_buffer_advance (b0, sizeof (*h0)); + } } - else + else if (next1 == UDP_LOCAL_NEXT_PUNT) { - b1->error = node->errors[UDP_ERROR_NONE]; - // advance to the payload - vlib_buffer_advance (b1, sizeof (*h1)); + i1 = sparse_vec_index (next_by_dst_port, h1->dst_port); + next1 = vec_elt (next_by_dst_port, i1); + + if (PREDICT_FALSE (i1 == SPARSE_VEC_INVALID_INDEX || + next1 == UDP_NO_NODE_SET)) + { + udp_dispatch_error (node, b1, advance1, is_ip4, &next1); + } + else + { + b1->error = node->errors[UDP_ERROR_NONE]; + // advance to the payload + vlib_buffer_advance (b1, sizeof (*h1)); + } } if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) @@ -342,12 +379,6 @@ udp46_local_inline (vlib_main_t * vm, return from_frame->n_vectors; } -static char *udp_error_strings[] = { -#define udp_error(n,s) s, -#include "udp_error.def" -#undef udp_error -}; - VLIB_NODE_FN (udp4_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * from_frame) @@ -362,14 +393,13 @@ VLIB_NODE_FN (udp6_local_node) (vlib_main_t * vm, return udp46_local_inline (vm, node, from_frame, 0 /* is_ip4 */ ); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (udp4_local_node) = { .name = "ip4-udp-lookup", /* Takes a vector of packets. */ .vector_size = sizeof (u32), .n_errors = UDP_N_ERROR, - .error_strings = udp_error_strings, + .error_counters = udp_error_counters, .n_next_nodes = UDP_LOCAL_N_NEXT, .next_nodes = { @@ -382,16 +412,14 @@ VLIB_REGISTER_NODE (udp4_local_node) = { .format_trace = format_udp_rx_trace, .unformat_buffer = unformat_udp_header, }; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (udp6_local_node) = { .name = "ip6-udp-lookup", /* Takes a vector of packets. */ .vector_size = sizeof (u32), .n_errors = UDP_N_ERROR, - .error_strings = udp_error_strings, + .error_counters = udp_error_counters, .n_next_nodes = UDP_LOCAL_N_NEXT, .next_nodes = { @@ -404,7 +432,6 @@ VLIB_REGISTER_NODE (udp6_local_node) = { .format_trace = format_udp_rx_trace, .unformat_buffer = unformat_udp_header, }; -/* *INDENT-ON* */ #ifndef CLIB_MARCH_VARIANT void @@ -492,16 +519,12 @@ u8 udp_is_valid_dst_port (udp_dst_port_t dst_port, u8 is_ip4) { udp_main_t *um = &udp_main; - u16 *n; - - if (is_ip4) - n = sparse_vec_validate (um->next_by_dst_port4, - clib_host_to_net_u16 (dst_port)); - else - n = sparse_vec_validate (um->next_by_dst_port6, - clib_host_to_net_u16 (dst_port)); - - return (n[0] != SPARSE_VEC_INVALID_INDEX && n[0] != UDP_NO_NODE_SET); + u16 *next_by_dst_port = + is_ip4 ? um->next_by_dst_port4 : um->next_by_dst_port6; + uword index = + sparse_vec_index (next_by_dst_port, clib_host_to_net_u16 (dst_port)); + return (index != SPARSE_VEC_INVALID_INDEX && + vec_elt (next_by_dst_port, index) != UDP_NO_NODE_SET); } void diff --git a/src/vnet/udp/udp_output.c b/src/vnet/udp/udp_output.c new file mode 100644 index 00000000000..22b94141365 --- /dev/null +++ b/src/vnet/udp/udp_output.c @@ -0,0 +1,254 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2022 Cisco Systems, Inc. + */ + +#include <vnet/udp/udp.h> +#include <vnet/ip/ip4_inlines.h> +#include <vnet/ip/ip6_inlines.h> + +#define udp_node_index(node_id, is_ip4) \ + ((is_ip4) ? udp4_##node_id##_node.index : udp6_##node_id##_node.index) + +typedef enum udp_output_next_ +{ + UDP_OUTPUT_NEXT_DROP, + UDP_OUTPUT_NEXT_IP_LOOKUP, + UDP_OUTPUT_N_NEXT +} udp_output_next_t; + +#define foreach_udp4_output_next \ + _ (DROP, "error-drop") \ + _ (IP_LOOKUP, "ip4-lookup") + +#define foreach_udp6_output_next \ + _ (DROP, "error-drop") \ + _ (IP_LOOKUP, "ip6-lookup") + +static vlib_error_desc_t udp_output_error_counters[] = { +#define udp_error(f, n, s, d) { #n, d, VL_COUNTER_SEVERITY_##s }, +#include <vnet/udp/udp_error.def> +#undef udp_error +}; + +typedef struct udp_tx_trace_ +{ + udp_header_t udp_header; + udp_connection_t udp_connection; +} udp_tx_trace_t; + +static u8 * +format_udp_tx_trace (u8 *s, va_list *args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + udp_tx_trace_t *t = va_arg (*args, udp_tx_trace_t *); + udp_connection_t *uc = &t->udp_connection; + u32 indent = format_get_indent (s); + + s = format (s, "%U\n%U%U", format_udp_connection, uc, 1, format_white_space, + indent, format_udp_header, &t->udp_header, 128); + + return s; +} + +always_inline udp_connection_t * +udp_output_get_connection (vlib_buffer_t *b, u32 thread_index) +{ + if (PREDICT_FALSE (vnet_buffer (b)->tcp.flags & UDP_CONN_F_LISTEN)) + return udp_listener_get (vnet_buffer (b)->tcp.connection_index); + + return udp_connection_get (vnet_buffer (b)->tcp.connection_index, + thread_index); +} + +static void +udp46_output_trace_frame (vlib_main_t *vm, vlib_node_runtime_t *node, + u32 *to_next, u32 n_bufs) +{ + udp_connection_t *uc; + udp_tx_trace_t *t; + vlib_buffer_t *b; + udp_header_t *uh; + int i; + + for (i = 0; i < n_bufs; i++) + { + b = vlib_get_buffer (vm, to_next[i]); + if (!(b->flags & VLIB_BUFFER_IS_TRACED)) + continue; + uh = vlib_buffer_get_current (b); + uc = udp_output_get_connection (b, vm->thread_index); + t = vlib_add_trace (vm, node, b, sizeof (*t)); + clib_memcpy_fast (&t->udp_header, uh, sizeof (t->udp_header)); + clib_memcpy_fast (&t->udp_connection, uc, sizeof (t->udp_connection)); + } +} + +always_inline void +udp_output_handle_packet (udp_connection_t *uc0, vlib_buffer_t *b0, + vlib_node_runtime_t *error_node, u16 *next0, + u8 is_ip4) +{ + /* If next_index is not drop use it */ + if (uc0->next_node_index) + { + *next0 = uc0->next_node_index; + vnet_buffer (b0)->tcp.next_node_opaque = uc0->next_node_opaque; + } + else + { + *next0 = UDP_OUTPUT_NEXT_IP_LOOKUP; + } + + vnet_buffer (b0)->sw_if_index[VLIB_TX] = uc0->c_fib_index; + vnet_buffer (b0)->sw_if_index[VLIB_RX] = uc0->sw_if_index; +} + +always_inline uword +udp46_output_inline (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame, int is_ip4) +{ + u32 n_left_from, *from, thread_index = vm->thread_index; + vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b; + u16 nexts[VLIB_FRAME_SIZE], *next; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + + if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE)) + udp46_output_trace_frame (vm, node, from, n_left_from); + + vlib_get_buffers (vm, from, bufs, n_left_from); + b = bufs; + next = nexts; + + while (n_left_from >= 4) + { + udp_connection_t *uc0, *uc1; + + vlib_prefetch_buffer_header (b[2], STORE); + CLIB_PREFETCH (b[2]->data, 2 * CLIB_CACHE_LINE_BYTES, STORE); + + vlib_prefetch_buffer_header (b[3], STORE); + CLIB_PREFETCH (b[3]->data, 2 * CLIB_CACHE_LINE_BYTES, STORE); + + uc0 = udp_output_get_connection (b[0], thread_index); + uc1 = udp_output_get_connection (b[1], thread_index); + + if (PREDICT_TRUE (!uc0 + !uc1 == 0)) + { + udp_output_handle_packet (uc0, b[0], node, &next[0], is_ip4); + udp_output_handle_packet (uc1, b[1], node, &next[1], is_ip4); + } + else + { + if (uc0 != 0) + { + udp_output_handle_packet (uc0, b[0], node, &next[0], is_ip4); + } + else + { + b[0]->error = node->errors[UDP_ERROR_INVALID_CONNECTION]; + next[0] = UDP_OUTPUT_NEXT_DROP; + } + if (uc1 != 0) + { + udp_output_handle_packet (uc1, b[1], node, &next[1], is_ip4); + } + else + { + b[1]->error = node->errors[UDP_ERROR_INVALID_CONNECTION]; + next[1] = UDP_OUTPUT_NEXT_DROP; + } + } + + b += 2; + next += 2; + n_left_from -= 2; + } + while (n_left_from > 0) + { + udp_connection_t *uc0; + + if (n_left_from > 1) + { + vlib_prefetch_buffer_header (b[1], STORE); + CLIB_PREFETCH (b[1]->data, 2 * CLIB_CACHE_LINE_BYTES, STORE); + } + + uc0 = udp_output_get_connection (b[0], thread_index); + + if (PREDICT_TRUE (uc0 != 0)) + { + udp_output_handle_packet (uc0, b[0], node, &next[0], is_ip4); + } + else + { + b[0]->error = node->errors[UDP_ERROR_INVALID_CONNECTION]; + next[0] = UDP_OUTPUT_NEXT_DROP; + } + + b += 1; + next += 1; + n_left_from -= 1; + } + + vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors); + vlib_node_increment_counter (vm, udp_node_index (output, is_ip4), + UDP_ERROR_PKTS_SENT, frame->n_vectors); + return frame->n_vectors; +} + +VLIB_NODE_FN (udp4_output_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame) +{ + return udp46_output_inline (vm, node, from_frame, 1 /* is_ip4 */); +} + +VLIB_NODE_FN (udp6_output_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame) +{ + return udp46_output_inline (vm, node, from_frame, 0 /* is_ip4 */); +} + +VLIB_REGISTER_NODE (udp4_output_node) = +{ + .name = "udp4-output", + .vector_size = sizeof (u32), + .n_errors = UDP_N_ERROR, + .protocol_hint = VLIB_NODE_PROTO_HINT_UDP, + .error_counters = udp_output_error_counters, + .n_next_nodes = UDP_OUTPUT_N_NEXT, + .next_nodes = { +#define _(s, n) [UDP_OUTPUT_NEXT_##s] = n, + foreach_udp4_output_next +#undef _ + }, + .format_buffer = format_udp_header, + .format_trace = format_udp_tx_trace, +}; + +VLIB_REGISTER_NODE (udp6_output_node) = +{ + .name = "udp6-output", + .vector_size = sizeof (u32), + .n_errors = UDP_N_ERROR, + .protocol_hint = VLIB_NODE_PROTO_HINT_UDP, + .error_counters = udp_output_error_counters, + .n_next_nodes = UDP_OUTPUT_N_NEXT, + .next_nodes = { +#define _(s, n) [UDP_OUTPUT_NEXT_##s] = n, + foreach_udp6_output_next +#undef _ + }, + .format_buffer = format_udp_header, + .format_trace = format_udp_tx_trace, +}; + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ |