aboutsummaryrefslogtreecommitdiffstats
path: root/src/vnet/udp
diff options
context:
space:
mode:
Diffstat (limited to 'src/vnet/udp')
-rw-r--r--src/vnet/udp/udp.api2
-rw-r--r--src/vnet/udp/udp.c460
-rw-r--r--src/vnet/udp/udp.h119
-rw-r--r--src/vnet/udp/udp_api.c23
-rw-r--r--src/vnet/udp/udp_cli.c231
-rw-r--r--src/vnet/udp/udp_encap.c57
-rw-r--r--src/vnet/udp/udp_encap.h3
-rw-r--r--src/vnet/udp/udp_encap_node.c138
-rw-r--r--src/vnet/udp/udp_error.def3
-rw-r--r--src/vnet/udp/udp_inlines.h111
-rw-r--r--src/vnet/udp/udp_input.c76
-rw-r--r--src/vnet/udp/udp_local.c117
-rw-r--r--src/vnet/udp/udp_output.c254
13 files changed, 1208 insertions, 386 deletions
diff --git a/src/vnet/udp/udp.api b/src/vnet/udp/udp.api
index 02176be7c2b..6b468be461a 100644
--- a/src/vnet/udp/udp.api
+++ b/src/vnet/udp/udp.api
@@ -32,7 +32,7 @@ import "vnet/ip/ip_types.api";
* @param dst_ip - Encap destination address
* @param src_ip - Encap source address
* @param dst_port - Encap destination port
- * @param src_port - Encap source port
+ * @param src_port - Encap source port, 0 for entopy per rfc7510
* @param id - VPP assigned id; ignored in add message, set in dump
*/
typedef udp_encap
diff --git a/src/vnet/udp/udp.c b/src/vnet/udp/udp.c
index 40e0053bb96..9c1121f7cfb 100644
--- a/src/vnet/udp/udp.c
+++ b/src/vnet/udp/udp.c
@@ -23,97 +23,63 @@
udp_main_t udp_main;
static void
-udp_connection_register_port (vlib_main_t * vm, u16 lcl_port, u8 is_ip4)
+udp_connection_register_port (u16 lcl_port, u8 is_ip4)
{
udp_main_t *um = &udp_main;
- udp_dst_port_info_t *pi;
u16 *n;
- pi = udp_get_dst_port_info (um, lcl_port, is_ip4);
- if (!pi)
- {
- udp_add_dst_port (um, lcl_port, 0, is_ip4);
- pi = udp_get_dst_port_info (um, lcl_port, is_ip4);
- pi->n_connections = 1;
- }
- else
- {
- pi->n_connections += 1;
- /* Do not return. The fact that the pi is valid does not mean
- * it's up to date */
- }
+ /* Setup udp protocol -> next index sparse vector mapping. Do not setup
+ * udp_dst_port_info_t as that is used to distinguish between external
+ * and transport consumed ports */
- pi->node_index = is_ip4 ? udp4_input_node.index : udp6_input_node.index;
- pi->next_index = um->local_to_input_edge[is_ip4];
-
- /* Setup udp protocol -> next index sparse vector mapping. */
if (is_ip4)
- n = sparse_vec_validate (um->next_by_dst_port4,
- clib_host_to_net_u16 (lcl_port));
+ n = sparse_vec_validate (um->next_by_dst_port4, lcl_port);
else
- n = sparse_vec_validate (um->next_by_dst_port6,
- clib_host_to_net_u16 (lcl_port));
+ n = sparse_vec_validate (um->next_by_dst_port6, lcl_port);
+
+ n[0] = um->local_to_input_edge[is_ip4];
- n[0] = pi->next_index;
+ __atomic_add_fetch (&um->transport_ports_refcnt[is_ip4][lcl_port], 1,
+ __ATOMIC_RELAXED);
+}
+
+void
+udp_connection_share_port (u16 lcl_port, u8 is_ip4)
+{
+ udp_main_t *um = &udp_main;
+ __atomic_add_fetch (&um->transport_ports_refcnt[is_ip4][lcl_port], 1,
+ __ATOMIC_RELAXED);
}
static void
udp_connection_unregister_port (u16 lcl_port, u8 is_ip4)
{
udp_main_t *um = &udp_main;
- udp_dst_port_info_t *pi;
+ u16 *n;
- pi = udp_get_dst_port_info (um, lcl_port, is_ip4);
- if (!pi)
+ /* Needed because listeners are not tracked as local endpoints */
+ if (__atomic_sub_fetch (&um->transport_ports_refcnt[is_ip4][lcl_port], 1,
+ __ATOMIC_RELAXED))
return;
- if (!pi->n_connections)
- {
- clib_warning ("no connections using port %u", lcl_port);
- return;
- }
-
- if (!clib_atomic_sub_fetch (&pi->n_connections, 1))
- udp_unregister_dst_port (0, lcl_port, is_ip4);
-}
-
-void
-udp_connection_share_port (u16 lcl_port, u8 is_ip4)
-{
- udp_main_t *um = &udp_main;
- udp_dst_port_info_t *pi;
+ if (is_ip4)
+ n = sparse_vec_validate (um->next_by_dst_port4, lcl_port);
+ else
+ n = sparse_vec_validate (um->next_by_dst_port6, lcl_port);
- /* Done without a lock but the operation is atomic. Writers to pi hash
- * table and vector should be guarded by a barrier sync */
- pi = udp_get_dst_port_info (um, lcl_port, is_ip4);
- clib_atomic_fetch_add_rel (&pi->n_connections, 1);
+ n[0] = UDP_NO_NODE_SET;
}
udp_connection_t *
udp_connection_alloc (u32 thread_index)
{
- udp_main_t *um = &udp_main;
+ udp_worker_t *wrk = udp_worker_get (thread_index);
udp_connection_t *uc;
- u32 will_expand = 0;
- pool_get_aligned_will_expand (um->connections[thread_index], will_expand,
- CLIB_CACHE_LINE_BYTES);
- if (PREDICT_FALSE (will_expand))
- {
- clib_spinlock_lock_if_init (&udp_main.peekers_write_locks
- [thread_index]);
- pool_get_aligned (udp_main.connections[thread_index], uc,
- CLIB_CACHE_LINE_BYTES);
- clib_spinlock_unlock_if_init (&udp_main.peekers_write_locks
- [thread_index]);
- }
- else
- {
- pool_get_aligned (um->connections[thread_index], uc,
- CLIB_CACHE_LINE_BYTES);
- }
+ pool_get_aligned_safe (wrk->connections, uc, CLIB_CACHE_LINE_BYTES);
+
clib_memset (uc, 0, sizeof (*uc));
- uc->c_c_index = uc - um->connections[thread_index];
+ uc->c_c_index = uc - wrk->connections;
uc->c_thread_index = thread_index;
uc->c_proto = TRANSPORT_PROTO_UDP;
return uc;
@@ -122,20 +88,20 @@ udp_connection_alloc (u32 thread_index)
void
udp_connection_free (udp_connection_t * uc)
{
- u32 thread_index = uc->c_thread_index;
+ udp_worker_t *wrk = udp_worker_get (uc->c_thread_index);
+
clib_spinlock_free (&uc->rx_lock);
if (CLIB_DEBUG)
clib_memset (uc, 0xFA, sizeof (*uc));
- pool_put (udp_main.connections[thread_index], uc);
+ pool_put (wrk->connections, uc);
}
static void
udp_connection_cleanup (udp_connection_t * uc)
{
- transport_endpoint_cleanup (TRANSPORT_PROTO_UDP, &uc->c_lcl_ip,
- uc->c_lcl_port);
- udp_connection_unregister_port (clib_net_to_host_u16 (uc->c_lcl_port),
- uc->c_is_ip4);
+ transport_release_local_endpoint (TRANSPORT_PROTO_UDP, &uc->c_lcl_ip,
+ uc->c_lcl_port);
+ udp_connection_unregister_port (uc->c_lcl_port, uc->c_is_ip4);
udp_connection_free (uc);
}
@@ -146,6 +112,38 @@ udp_connection_delete (udp_connection_t * uc)
udp_connection_cleanup (uc);
}
+static void
+udp_handle_cleanups (void *args)
+{
+ u32 thread_index = (u32) pointer_to_uword (args);
+ udp_connection_t *uc;
+ udp_worker_t *wrk;
+ u32 *uc_index;
+
+ wrk = udp_worker_get (thread_index);
+ vec_foreach (uc_index, wrk->pending_cleanups)
+ {
+ uc = udp_connection_get (*uc_index, thread_index);
+ udp_connection_delete (uc);
+ }
+ vec_reset_length (wrk->pending_cleanups);
+}
+
+static void
+udp_connection_program_cleanup (udp_connection_t *uc)
+{
+ uword thread_index = uc->c_thread_index;
+ udp_worker_t *wrk;
+
+ wrk = udp_worker_get (uc->c_thread_index);
+ vec_add1 (wrk->pending_cleanups, uc->c_c_index);
+
+ if (vec_len (wrk->pending_cleanups) == 1)
+ session_send_rpc_evt_to_thread_force (
+ thread_index, udp_handle_cleanups,
+ uword_to_pointer (thread_index, void *));
+}
+
static u8
udp_connection_port_used_extern (u16 lcl_port, u8 is_ip4)
{
@@ -153,8 +151,7 @@ udp_connection_port_used_extern (u16 lcl_port, u8 is_ip4)
udp_dst_port_info_t *pi;
pi = udp_get_dst_port_info (um, lcl_port, is_ip4);
- return (pi && !pi->n_connections
- && udp_is_valid_dst_port (lcl_port, is_ip4));
+ return (pi && udp_is_valid_dst_port (lcl_port, is_ip4));
}
static u16
@@ -165,18 +162,15 @@ udp_default_mtu (udp_main_t * um, u8 is_ip4)
}
static u32
-udp_session_bind (u32 session_index, transport_endpoint_t * lcl)
+udp_session_bind (u32 session_index, transport_endpoint_cfg_t *lcl)
{
udp_main_t *um = vnet_get_udp_main ();
- vlib_main_t *vm = vlib_get_main ();
transport_endpoint_cfg_t *lcl_ext;
udp_connection_t *listener;
- u16 lcl_port_ho;
void *iface_ip;
- lcl_port_ho = clib_net_to_host_u16 (lcl->port);
-
- if (udp_connection_port_used_extern (lcl_port_ho, lcl->is_ip4))
+ if (udp_connection_port_used_extern (clib_net_to_host_u16 (lcl->port),
+ lcl->is_ip4))
{
clib_warning ("port already used");
return SESSION_E_PORTINUSE;
@@ -200,7 +194,8 @@ udp_session_bind (u32 session_index, transport_endpoint_t * lcl)
listener->c_proto = TRANSPORT_PROTO_UDP;
listener->c_s_index = session_index;
listener->c_fib_index = lcl->fib_index;
- listener->mss = udp_default_mtu (um, listener->c_is_ip4);
+ listener->mss =
+ lcl->mss ? lcl->mss : udp_default_mtu (um, listener->c_is_ip4);
listener->flags |= UDP_CONN_F_OWNS_PORT | UDP_CONN_F_LISTEN;
lcl_ext = (transport_endpoint_cfg_t *) lcl;
if (lcl_ext->transport_flags & TRANSPORT_CFG_F_CONNECTED)
@@ -208,8 +203,10 @@ udp_session_bind (u32 session_index, transport_endpoint_t * lcl)
else
listener->c_flags |= TRANSPORT_CONNECTION_F_CLESS;
clib_spinlock_init (&listener->rx_lock);
+ if (!um->csum_offload)
+ listener->cfg_flags |= UDP_CFG_F_NO_CSUM_OFFLOAD;
- udp_connection_register_port (vm, lcl_port_ho, lcl->is_ip4);
+ udp_connection_register_port (listener->c_lcl_port, lcl->is_ip4);
return listener->c_c_index;
}
@@ -220,8 +217,7 @@ udp_session_unbind (u32 listener_index)
udp_connection_t *listener;
listener = udp_listener_get (listener_index);
- udp_connection_unregister_port (clib_net_to_host_u16 (listener->c_lcl_port),
- listener->c_is_ip4);
+ udp_connection_unregister_port (listener->c_lcl_port, listener->c_is_ip4);
clib_spinlock_free (&listener->rx_lock);
pool_put (um->listener_pool, listener);
return 0;
@@ -236,30 +232,127 @@ udp_session_get_listener (u32 listener_index)
return &us->connection;
}
+always_inline u16
+udp_compute_checksum (vlib_main_t *vm, vlib_buffer_t *b, u8 csum_offload,
+ u8 is_ip4)
+{
+ u16 csum = 0;
+
+ if (csum_offload)
+ vnet_buffer_offload_flags_set (b, VNET_BUFFER_OFFLOAD_F_UDP_CKSUM);
+ else
+ {
+ if (is_ip4)
+ csum =
+ ip4_tcp_udp_compute_checksum (vm, b, vlib_buffer_get_current (b));
+ else
+ {
+ int bogus = 0;
+ csum = ip6_tcp_udp_icmp_compute_checksum (
+ vm, b, vlib_buffer_get_current (b), &bogus);
+ }
+ }
+
+ return csum;
+}
+
+always_inline u32
+udp_push_one_header (vlib_main_t *vm, udp_connection_t *uc, vlib_buffer_t *b,
+ u8 is_cless)
+{
+ udp_header_t *uh;
+
+ b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
+ /* reuse tcp medatada for now */
+ vnet_buffer (b)->tcp.connection_index = uc->c_c_index;
+
+ if (!is_cless)
+ {
+ uh = vlib_buffer_push_udp (b, uc->c_lcl_port, uc->c_rmt_port);
+
+ if (uc->c_is_ip4)
+ vlib_buffer_push_ip4_custom (vm, b, &uc->c_lcl_ip4, &uc->c_rmt_ip4,
+ IP_PROTOCOL_UDP, udp_csum_offload (uc),
+ 0 /* is_df */, uc->c_dscp);
+ else
+ vlib_buffer_push_ip6 (vm, b, &uc->c_lcl_ip6, &uc->c_rmt_ip6,
+ IP_PROTOCOL_UDP);
+
+ vnet_buffer (b)->tcp.flags = 0;
+ }
+ else
+ {
+ u8 *data = vlib_buffer_get_current (b);
+ session_dgram_hdr_t hdr;
+
+ hdr = *(session_dgram_hdr_t *) (data - sizeof (hdr));
+
+ /* Local port assumed to be bound, not overwriting it */
+ uh = vlib_buffer_push_udp (b, uc->c_lcl_port, hdr.rmt_port);
+
+ if (uc->c_is_ip4)
+ vlib_buffer_push_ip4_custom (vm, b, &hdr.lcl_ip.ip4, &hdr.rmt_ip.ip4,
+ IP_PROTOCOL_UDP, udp_csum_offload (uc),
+ 0 /* is_df */, uc->c_dscp);
+ else
+ vlib_buffer_push_ip6 (vm, b, &hdr.lcl_ip.ip6, &hdr.rmt_ip.ip6,
+ IP_PROTOCOL_UDP);
+
+ /* Not connected udp session. Mark buffer for custom handling in
+ * udp_output */
+ vnet_buffer (b)->tcp.flags |= UDP_CONN_F_LISTEN;
+ }
+
+ uh->checksum =
+ udp_compute_checksum (vm, b, udp_csum_offload (uc), uc->c_is_ip4);
+
+ return 0;
+}
+
+always_inline void
+udp_push_header_batch (udp_connection_t *uc, vlib_buffer_t **bs, u32 n_bufs,
+ u8 is_cless)
+{
+ vlib_main_t *vm = vlib_get_main ();
+
+ while (n_bufs >= 4)
+ {
+ vlib_prefetch_buffer_header (bs[2], STORE);
+ vlib_prefetch_buffer_header (bs[3], STORE);
+
+ udp_push_one_header (vm, uc, bs[0], is_cless);
+ udp_push_one_header (vm, uc, bs[1], is_cless);
+
+ n_bufs -= 2;
+ bs += 2;
+ }
+ while (n_bufs)
+ {
+ if (n_bufs > 1)
+ vlib_prefetch_buffer_header (bs[1], STORE);
+
+ udp_push_one_header (vm, uc, bs[0], is_cless);
+
+ n_bufs -= 1;
+ bs += 1;
+ }
+}
+
static u32
-udp_push_header (transport_connection_t * tc, vlib_buffer_t * b)
+udp_push_header (transport_connection_t *tc, vlib_buffer_t **bs, u32 n_bufs)
{
udp_connection_t *uc;
- vlib_main_t *vm = vlib_get_main ();
uc = udp_connection_from_transport (tc);
-
- vlib_buffer_push_udp (b, uc->c_lcl_port, uc->c_rmt_port, 1);
- if (tc->is_ip4)
- vlib_buffer_push_ip4_custom (vm, b, &uc->c_lcl_ip4, &uc->c_rmt_ip4,
- IP_PROTOCOL_UDP, 1 /* csum offload */ ,
- 0 /* is_df */ );
+ if (uc->flags & UDP_CONN_F_CONNECTED)
+ udp_push_header_batch (uc, bs, n_bufs, 0 /* is_cless */);
else
- vlib_buffer_push_ip6 (vm, b, &uc->c_lcl_ip6, &uc->c_rmt_ip6,
- IP_PROTOCOL_UDP);
- vnet_buffer (b)->sw_if_index[VLIB_RX] = 0;
- vnet_buffer (b)->sw_if_index[VLIB_TX] = uc->c_fib_index;
- b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
+ udp_push_header_batch (uc, bs, n_bufs, 1 /* is_cless */);
if (PREDICT_FALSE (uc->flags & UDP_CONN_F_CLOSING))
{
- if (!transport_max_tx_dequeue (&uc->connection))
- udp_connection_delete (uc);
+ if (!transport_tx_fifo_has_dgram (&uc->connection))
+ udp_connection_program_cleanup (uc);
}
return 0;
@@ -281,11 +374,11 @@ udp_session_close (u32 connection_index, u32 thread_index)
udp_connection_t *uc;
uc = udp_connection_get (connection_index, thread_index);
- if (!uc)
+ if (!uc || (uc->flags & UDP_CONN_F_MIGRATED))
return;
- if (!transport_max_tx_dequeue (&uc->connection))
- udp_connection_delete (uc);
+ if (!transport_tx_fifo_has_dgram (&uc->connection))
+ udp_connection_program_cleanup (uc);
else
uc->flags |= UDP_CONN_F_CLOSING;
}
@@ -323,57 +416,42 @@ udp_session_send_params (transport_connection_t * tconn,
static int
udp_open_connection (transport_endpoint_cfg_t * rmt)
{
- vlib_main_t *vm = vlib_get_main ();
- u32 thread_index = vm->thread_index;
udp_main_t *um = &udp_main;
ip46_address_t lcl_addr;
udp_connection_t *uc;
+ u32 thread_index;
u16 lcl_port;
int rv;
rv = transport_alloc_local_endpoint (TRANSPORT_PROTO_UDP, rmt, &lcl_addr,
&lcl_port);
if (rv)
- {
- if (rv != SESSION_E_PORTINUSE)
- return rv;
-
- if (udp_connection_port_used_extern (lcl_port, rmt->is_ip4))
- return SESSION_E_PORTINUSE;
-
- /* If port in use, check if 5-tuple is also in use */
- if (session_lookup_connection (rmt->fib_index, &lcl_addr, &rmt->ip,
- lcl_port, rmt->port, TRANSPORT_PROTO_UDP,
- rmt->is_ip4))
- return SESSION_E_PORTINUSE;
-
- /* 5-tuple is available so increase lcl endpoint refcount and proceed
- * with connection allocation */
- transport_share_local_endpoint (TRANSPORT_PROTO_UDP, &lcl_addr,
- lcl_port);
- goto conn_alloc;
- }
+ return rv;
- if (udp_is_valid_dst_port (lcl_port, rmt->is_ip4))
+ if (udp_connection_port_used_extern (clib_net_to_host_u16 (lcl_port),
+ rmt->is_ip4))
{
/* If specific source port was requested abort */
if (rmt->peer.port)
- return SESSION_E_PORTINUSE;
+ {
+ transport_release_local_endpoint (TRANSPORT_PROTO_UDP, &lcl_addr,
+ lcl_port);
+ return SESSION_E_PORTINUSE;
+ }
/* Try to find a port that's not used */
- while (udp_is_valid_dst_port (lcl_port, rmt->is_ip4))
+ while (udp_connection_port_used_extern (clib_net_to_host_u16 (lcl_port),
+ rmt->is_ip4))
{
- lcl_port = transport_alloc_local_port (TRANSPORT_PROTO_UDP,
- &lcl_addr);
- if (lcl_port < 1)
+ transport_release_local_endpoint (TRANSPORT_PROTO_UDP, &lcl_addr,
+ lcl_port);
+ lcl_port =
+ transport_alloc_local_port (TRANSPORT_PROTO_UDP, &lcl_addr, rmt);
+ if ((int) lcl_port < 1)
return SESSION_E_PORTINUSE;
}
}
-conn_alloc:
-
- udp_connection_register_port (vm, lcl_port, rmt->is_ip4);
-
/* We don't poll main thread if we have workers */
thread_index = transport_cl_thread ();
@@ -381,11 +459,14 @@ conn_alloc:
ip_copy (&uc->c_rmt_ip, &rmt->ip, rmt->is_ip4);
ip_copy (&uc->c_lcl_ip, &lcl_addr, rmt->is_ip4);
uc->c_rmt_port = rmt->port;
- uc->c_lcl_port = clib_host_to_net_u16 (lcl_port);
+ uc->c_lcl_port = lcl_port;
uc->c_is_ip4 = rmt->is_ip4;
uc->c_proto = TRANSPORT_PROTO_UDP;
uc->c_fib_index = rmt->fib_index;
+ uc->c_dscp = rmt->dscp;
uc->mss = rmt->mss ? rmt->mss : udp_default_mtu (um, uc->c_is_ip4);
+ if (rmt->peer.sw_if_index != ENDPOINT_INVALID_INDEX)
+ uc->sw_if_index = rmt->peer.sw_if_index;
uc->flags |= UDP_CONN_F_OWNS_PORT;
if (rmt->transport_flags & TRANSPORT_CFG_F_CONNECTED)
{
@@ -396,6 +477,12 @@ conn_alloc:
clib_spinlock_init (&uc->rx_lock);
uc->c_flags |= TRANSPORT_CONNECTION_F_CLESS;
}
+ if (!um->csum_offload)
+ uc->cfg_flags |= UDP_CFG_F_NO_CSUM_OFFLOAD;
+ uc->next_node_index = rmt->next_node_index;
+ uc->next_node_opaque = rmt->next_node_opaque;
+
+ udp_connection_register_port (uc->c_lcl_port, rmt->is_ip4);
return uc->c_c_index;
}
@@ -445,8 +532,90 @@ format_udp_listener_session (u8 * s, va_list * args)
return format (s, "%U", format_udp_connection, uc, verbose);
}
-/* *INDENT-OFF* */
+static void
+udp_realloc_ports_sv (u16 **ports_nh_svp)
+{
+ u16 port, port_no, *ports_nh_sv, *mc;
+ u32 *ports = 0, *nh = 0, msum, i;
+ sparse_vec_header_t *h;
+ uword sv_index, *mb;
+
+ ports_nh_sv = *ports_nh_svp;
+
+ for (port = 1; port < 65535; port++)
+ {
+ port_no = clib_host_to_net_u16 (port);
+
+ sv_index = sparse_vec_index (ports_nh_sv, port_no);
+ if (sv_index != SPARSE_VEC_INVALID_INDEX)
+ {
+ vec_add1 (ports, port_no);
+ vec_add1 (nh, ports_nh_sv[sv_index]);
+ }
+ }
+
+ sparse_vec_free (ports_nh_sv);
+
+ ports_nh_sv =
+ sparse_vec_new (/* elt bytes */ sizeof (ports_nh_sv[0]),
+ /* bits in index */ BITS (((udp_header_t *) 0)->dst_port));
+
+ vec_resize (ports_nh_sv, 65535);
+
+ for (port = 1; port < 65535; port++)
+ ports_nh_sv[port] = UDP_NO_NODE_SET;
+
+ for (i = 0; i < vec_len (ports); i++)
+ ports_nh_sv[ports[i]] = nh[i];
+
+ h = sparse_vec_header (ports_nh_sv);
+ vec_foreach (mb, h->is_member_bitmap)
+ *mb = (uword) ~0;
+
+ msum = 0;
+ vec_foreach (mc, h->member_counts)
+ {
+ *mc = msum;
+ msum += msum == 0 ? 63 : 64;
+ }
+
+ vec_free (ports);
+ vec_free (nh);
+
+ *ports_nh_svp = ports_nh_sv;
+}
+
+static clib_error_t *
+udp_enable_disable (vlib_main_t *vm, u8 is_en)
+{
+ udp_main_t *um = &udp_main;
+
+ /* Not ideal. The sparse vector used to map ports to next nodes assumes
+ * only a few ports are ever used. When udp transport is enabled this does
+ * not hold and, to make matters worse, ports are consumed in a random
+ * order.
+ *
+ * This can lead to a lot of slow updates to internal data structures
+ * which in turn can slow udp connection allocations until all ports are
+ * eventually consumed.
+ *
+ * Consequently, reallocate sparse vector, preallocate all ports and have
+ * them point to UDP_NO_NODE_SET. We could consider switching the sparse
+ * vector to a preallocated vector but that would increase memory
+ * consumption for vpp deployments that do not rely on host stack.
+ */
+
+ udp_realloc_ports_sv (&um->next_by_dst_port4);
+ udp_realloc_ports_sv (&um->next_by_dst_port6);
+
+ vec_validate (um->transport_ports_refcnt[0], 65535);
+ vec_validate (um->transport_ports_refcnt[1], 65535);
+
+ return 0;
+}
+
static const transport_proto_vft_t udp_proto = {
+ .enable = udp_enable_disable,
.start_listen = udp_session_bind,
.connect = udp_open_connection,
.stop_listen = udp_session_unbind,
@@ -467,7 +636,6 @@ static const transport_proto_vft_t udp_proto = {
.service_type = TRANSPORT_SERVICE_CL,
},
};
-/* *INDENT-ON* */
static clib_error_t *
udp_init (vlib_main_t * vm)
@@ -477,7 +645,6 @@ udp_init (vlib_main_t * vm)
vlib_thread_main_t *tm = vlib_get_thread_main ();
u32 num_threads;
ip_protocol_info_t *pi;
- int i;
/*
* Registrations
@@ -490,28 +657,18 @@ udp_init (vlib_main_t * vm)
pi->format_header = format_udp_header;
pi->unformat_pg_edit = unformat_pg_udp_header;
- /* Register as transport with URI */
+ /* Register as transport with session layer */
transport_register_protocol (TRANSPORT_PROTO_UDP, &udp_proto,
- FIB_PROTOCOL_IP4, ip4_lookup_node.index);
+ FIB_PROTOCOL_IP4, udp4_output_node.index);
transport_register_protocol (TRANSPORT_PROTO_UDP, &udp_proto,
- FIB_PROTOCOL_IP6, ip6_lookup_node.index);
+ FIB_PROTOCOL_IP6, udp6_output_node.index);
/*
* Initialize data structures
*/
num_threads = 1 /* main thread */ + tm->n_threads;
- vec_validate (um->connections, num_threads - 1);
- vec_validate (um->connection_peekers, num_threads - 1);
- vec_validate (um->peekers_readers_locks, num_threads - 1);
- vec_validate (um->peekers_write_locks, num_threads - 1);
-
- if (num_threads > 1)
- for (i = 0; i < num_threads; i++)
- {
- clib_spinlock_init (&um->peekers_readers_locks[i]);
- clib_spinlock_init (&um->peekers_write_locks[i]);
- }
+ vec_validate (um->wrk, num_threads - 1);
um->local_to_input_edge[UDP_IP4] =
vlib_node_add_next (vm, udp4_local_node.index, udp4_input_node.index);
@@ -519,16 +676,15 @@ udp_init (vlib_main_t * vm)
vlib_node_add_next (vm, udp6_local_node.index, udp6_input_node.index);
um->default_mtu = 1500;
+ um->csum_offload = 1;
return 0;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (udp_init) =
{
.runs_after = VLIB_INITS("ip_main_init", "ip4_lookup_init",
"ip6_lookup_init"),
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/udp/udp.h b/src/vnet/udp/udp.h
index f157711ba2b..8e4e87f85a8 100644
--- a/src/vnet/udp/udp.h
+++ b/src/vnet/udp/udp.h
@@ -25,6 +25,8 @@
#include <vnet/ip/ip.h>
#include <vnet/session/transport.h>
+#define UDP_NO_NODE_SET ((u16) ~0)
+
typedef enum
{
#define udp_error(f, n, s, d) UDP_ERROR_##f,
@@ -55,6 +57,24 @@ typedef enum udp_conn_flags_
#undef _
} udp_conn_flags_t;
+#define foreach_udp_cfg_flag _ (NO_CSUM_OFFLOAD, "no-csum-offload")
+
+typedef enum udp_cfg_flag_bits_
+{
+#define _(sym, str) UDP_CFG_F_##sym##_BIT,
+ foreach_udp_cfg_flag
+#undef _
+ UDP_CFG_N_FLAG_BITS
+} udp_cfg_flag_bits_e;
+
+typedef enum udp_cfg_flag_
+{
+#define _(sym, str) UDP_CFG_F_##sym = 1 << UDP_CFG_F_##sym##_BIT,
+ foreach_udp_cfg_flag
+#undef _
+ UDP_CFG_N_FLAGS
+} __clib_packed udp_cfg_flags_t;
+
typedef struct
{
/** Required for pool_get_aligned */
@@ -62,9 +82,15 @@ typedef struct
transport_connection_t connection; /**< must be first */
clib_spinlock_t rx_lock; /**< rx fifo lock */
u8 flags; /**< connection flags */
+ udp_cfg_flags_t cfg_flags; /**< configuration flags */
u16 mss; /**< connection mss */
+ u32 sw_if_index; /**< connection sw_if_index */
+ u32 next_node_index; /**< Can be used to control next node in output */
+ u32 next_node_opaque; /**< Opaque to pass to next node */
} udp_connection_t;
+#define udp_csum_offload(uc) (!((uc)->cfg_flags & UDP_CFG_F_NO_CSUM_OFFLOAD))
+
typedef struct
{
/* Name (a c string). */
@@ -79,9 +105,6 @@ typedef struct
/* Next index for this type. */
u32 next_index;
- /* UDP sessions refcount (not tunnels) */
- u32 n_connections;
-
/* Parser for packet generator edits for this protocol */
unformat_function_t *unformat_pg_edit;
} udp_dst_port_info_t;
@@ -93,6 +116,12 @@ typedef enum
N_UDP_AF,
} udp_af_t;
+typedef struct udp_worker_
+{
+ udp_connection_t *connections;
+ u32 *pending_cleanups;
+} udp_worker_t;
+
typedef struct
{
udp_dst_port_info_t *dst_port_infos[N_UDP_AF];
@@ -112,16 +141,19 @@ typedef struct
u32 local_to_input_edge[N_UDP_AF];
/*
- * Per-worker thread udp connection pools used with session layer
+ * UDP transport layer per-thread context
*/
- udp_connection_t **connections;
- u32 *connection_peekers;
- clib_spinlock_t *peekers_readers_locks;
- clib_spinlock_t *peekers_write_locks;
+
+ udp_worker_t *wrk;
udp_connection_t *listener_pool;
+ /* Refcounts for ports consumed by udp transports to handle
+ * both passive and active opens using the same port */
+ u16 *transport_ports_refcnt[N_UDP_AF];
+
u16 default_mtu;
u16 msg_id_base;
+ u8 csum_offload;
u8 icmp_send_unreachable_disabled;
} udp_main_t;
@@ -131,16 +163,26 @@ extern vlib_node_registration_t udp4_input_node;
extern vlib_node_registration_t udp6_input_node;
extern vlib_node_registration_t udp4_local_node;
extern vlib_node_registration_t udp6_local_node;
+extern vlib_node_registration_t udp4_output_node;
+extern vlib_node_registration_t udp6_output_node;
void udp_add_dst_port (udp_main_t * um, udp_dst_port_t dst_port,
char *dst_port_name, u8 is_ip4);
+always_inline udp_worker_t *
+udp_worker_get (u32 thread_index)
+{
+ return vec_elt_at_index (udp_main.wrk, thread_index);
+}
+
always_inline udp_connection_t *
udp_connection_get (u32 conn_index, u32 thread_index)
{
- if (pool_is_free_index (udp_main.connections[thread_index], conn_index))
+ udp_worker_t *wrk = udp_worker_get (thread_index);
+
+ if (pool_is_free_index (wrk->connections, conn_index))
return 0;
- return pool_elt_at_index (udp_main.connections[thread_index], conn_index);
+ return pool_elt_at_index (wrk->connections, conn_index);
}
always_inline udp_connection_t *
@@ -161,65 +203,24 @@ udp_connection_from_transport (transport_connection_t * tc)
return ((udp_connection_t *) tc);
}
-always_inline u32
-udp_connection_index (udp_connection_t * uc)
-{
- return (uc - udp_main.connections[uc->c_thread_index]);
-}
-
void udp_connection_free (udp_connection_t * uc);
udp_connection_t *udp_connection_alloc (u32 thread_index);
-
-/**
- * Acquires a lock that blocks a connection pool from expanding.
- */
-always_inline void
-udp_pool_add_peeker (u32 thread_index)
-{
- if (thread_index != vlib_get_thread_index ())
- return;
- clib_spinlock_lock_if_init (&udp_main.peekers_readers_locks[thread_index]);
- udp_main.connection_peekers[thread_index] += 1;
- if (udp_main.connection_peekers[thread_index] == 1)
- clib_spinlock_lock_if_init (&udp_main.peekers_write_locks[thread_index]);
- clib_spinlock_unlock_if_init (&udp_main.peekers_readers_locks
- [thread_index]);
-}
-
-always_inline void
-udp_pool_remove_peeker (u32 thread_index)
-{
- if (thread_index != vlib_get_thread_index ())
- return;
- ASSERT (udp_main.connection_peekers[thread_index] > 0);
- clib_spinlock_lock_if_init (&udp_main.peekers_readers_locks[thread_index]);
- udp_main.connection_peekers[thread_index] -= 1;
- if (udp_main.connection_peekers[thread_index] == 0)
- clib_spinlock_unlock_if_init (&udp_main.peekers_write_locks
- [thread_index]);
- clib_spinlock_unlock_if_init (&udp_main.peekers_readers_locks
- [thread_index]);
-}
+void udp_connection_share_port (u16 lcl_port, u8 is_ip4);
always_inline udp_connection_t *
udp_connection_clone_safe (u32 connection_index, u32 thread_index)
{
+ u32 current_thread_index = vlib_get_thread_index (), new_index;
udp_connection_t *old_c, *new_c;
- u32 current_thread_index = vlib_get_thread_index ();
- new_c = udp_connection_alloc (current_thread_index);
- /* If during the memcpy pool is reallocated AND the memory allocator
- * decides to give the old chunk of memory to somebody in a hurry to
- * scribble something on it, we have a problem. So add this thread as
- * a session pool peeker.
- */
- udp_pool_add_peeker (thread_index);
- old_c = udp_main.connections[thread_index] + connection_index;
+ new_c = udp_connection_alloc (current_thread_index);
+ new_index = new_c->c_c_index;
+ /* Connection pool always realloced with barrier */
+ old_c = udp_main.wrk[thread_index].connections + connection_index;
clib_memcpy_fast (new_c, old_c, sizeof (*new_c));
old_c->flags |= UDP_CONN_F_MIGRATED;
- udp_pool_remove_peeker (thread_index);
new_c->c_thread_index = current_thread_index;
- new_c->c_c_index = udp_connection_index (new_c);
+ new_c->c_c_index = new_index;
new_c->c_fib_index = old_c->c_fib_index;
/* Assume cloned sessions don't need lock */
new_c->rx_lock = 0;
@@ -239,8 +240,6 @@ format_function_t format_udp_connection;
unformat_function_t unformat_udp_header;
unformat_function_t unformat_udp_port;
-void udp_connection_share_port (u16 lcl_port, u8 is_ip4);
-
void udp_punt_unknown (vlib_main_t * vm, u8 is_ip4, u8 is_add);
/*
diff --git a/src/vnet/udp/udp_api.c b/src/vnet/udp/udp_api.c
index 0f2d014946f..1f952aa36ea 100644
--- a/src/vnet/udp/udp_api.c
+++ b/src/vnet/udp/udp_api.c
@@ -86,12 +86,10 @@ vl_api_udp_encap_dump_t_handler (vl_api_udp_encap_dump_t *mp)
if (!reg)
return;
- /* *INDENT-OFF* */
pool_foreach (ue, udp_encap_pool)
{
send_udp_encap_details(ue, reg, mp->context);
}
- /* *INDENT-ON* */
}
static void
@@ -99,6 +97,7 @@ vl_api_udp_encap_add_t_handler (vl_api_udp_encap_add_t *mp)
{
vl_api_udp_encap_add_reply_t *rmp;
ip46_address_t src_ip, dst_ip;
+ udp_encap_fixup_flags_t flags;
u32 fib_index, table_id;
fib_protocol_t fproto;
ip46_type_t itype;
@@ -119,19 +118,19 @@ vl_api_udp_encap_add_t_handler (vl_api_udp_encap_add_t *mp)
goto done;
}
- uei = udp_encap_add_and_lock (fproto, fib_index,
- &src_ip, &dst_ip,
+ flags = UDP_ENCAP_FIXUP_NONE;
+ if (mp->udp_encap.src_port == 0)
+ flags |= UDP_ENCAP_FIXUP_UDP_SRC_PORT_ENTROPY;
+
+ uei = udp_encap_add_and_lock (fproto, fib_index, &src_ip, &dst_ip,
ntohs (mp->udp_encap.src_port),
- ntohs (mp->udp_encap.dst_port),
- UDP_ENCAP_FIXUP_NONE);
+ ntohs (mp->udp_encap.dst_port), flags);
done:
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_UDP_ENCAP_ADD_REPLY,
({
rmp->id = ntohl (uei);
}));
- /* *INDENT-ON* */
}
@@ -189,11 +188,19 @@ vl_api_udp_decap_add_del_t_handler (vl_api_udp_decap_add_del_t *mp)
static clib_error_t *
udp_api_hookup (vlib_main_t * vm)
{
+ api_main_t *am = vlibapi_get_main ();
+
/*
* Set up the (msg_name, crc, message-id) table
*/
REPLY_MSG_ID_BASE = setup_message_id_table ();
+ /* Mark these APIs as mp safe */
+ vl_api_set_msg_thread_safe (am, REPLY_MSG_ID_BASE + VL_API_UDP_ENCAP_ADD, 1);
+ vl_api_set_msg_thread_safe (am, REPLY_MSG_ID_BASE + VL_API_UDP_ENCAP_DEL, 1);
+ vl_api_set_msg_thread_safe (am, REPLY_MSG_ID_BASE + VL_API_UDP_ENCAP_DUMP,
+ 1);
+
return 0;
}
diff --git a/src/vnet/udp/udp_cli.c b/src/vnet/udp/udp_cli.c
index 97760f4c4f8..6c8992cd0de 100644
--- a/src/vnet/udp/udp_cli.c
+++ b/src/vnet/udp/udp_cli.c
@@ -13,6 +13,9 @@
* limitations under the License.
*/
+#include <vppinfra/error.h>
+#include <vppinfra/format.h>
+#include <vppinfra/format_table.h>
#include <vnet/udp/udp.h>
#include <vnet/session/session_types.h>
@@ -35,6 +38,33 @@ format_udp_connection_id (u8 * s, va_list * args)
return s;
}
+static const char *udp_cfg_flags_str[] = {
+#define _(sym, str) str,
+ foreach_udp_cfg_flag
+#undef _
+};
+
+static u8 *
+format_udp_cfg_flags (u8 *s, va_list *args)
+{
+ udp_connection_t *tc = va_arg (*args, udp_connection_t *);
+ int i, last = -1;
+
+ for (i = 0; i < UDP_CFG_N_FLAG_BITS; i++)
+ if (tc->cfg_flags & (1 << i))
+ last = i;
+ if (last >= 0)
+ s = format (s, " cfg: ");
+ for (i = 0; i < last; i++)
+ {
+ if (tc->cfg_flags & (1 << i))
+ s = format (s, "%s, ", udp_cfg_flags_str[i]);
+ }
+ if (last >= 0)
+ s = format (s, "%s", udp_cfg_flags_str[last]);
+ return s;
+}
+
static const char *udp_connection_flags_str[] = {
#define _(sym, str) str,
foreach_udp_connection_flag
@@ -64,11 +94,15 @@ static u8 *
format_udp_vars (u8 * s, va_list * args)
{
udp_connection_t *uc = va_arg (*args, udp_connection_t *);
- s = format (s, " index %u flags: %U", uc->c_c_index,
- format_udp_connection_flags, uc);
+ s = format (s, " index %u%U flags: %U\n", uc->c_c_index,
+ format_udp_cfg_flags, uc, format_udp_connection_flags, uc);
+ s = format (s, " fib_index: %u next_node: %u opaque: %u ", uc->c_fib_index);
if (!(uc->flags & UDP_CONN_F_LISTEN))
+ s = format (s, " sw_if_index: %d mss: %u\n", uc->sw_if_index, uc->mss);
+ else
s = format (s, "\n");
+
return s;
}
@@ -102,6 +136,8 @@ udp_config_fn (vlib_main_t * vm, unformat_input_t * input)
um->default_mtu = tmp;
else if (unformat (input, "icmp-unreachable-disabled"))
um->icmp_send_unreachable_disabled = 1;
+ else if (unformat (input, "no-csum-offload"))
+ um->csum_offload = 0;
else
return clib_error_return (0, "unknown input `%U'",
format_unformat_error, input);
@@ -151,7 +187,7 @@ show_udp_punt_fn (vlib_main_t * vm, unformat_input_t * input,
u8 *s = NULL;
vec_foreach (port_info, um->dst_port_infos[UDP_IP6])
{
- if (udp_is_valid_dst_port (port_info->dst_port, 01))
+ if (udp_is_valid_dst_port (port_info->dst_port, 0))
{
s = format (s, (!s) ? "%d" : ", %d", port_info->dst_port);
}
@@ -162,14 +198,199 @@ show_udp_punt_fn (vlib_main_t * vm, unformat_input_t * input,
return (error);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_tcp_punt_command, static) =
{
.path = "show udp punt",
.short_help = "show udp punt [ipv4|ipv6]",
.function = show_udp_punt_fn,
};
-/* *INDENT-ON* */
+
+static void
+table_format_udp_port_ (vlib_main_t *vm, udp_main_t *um, table_t *t, int *c,
+ int port, int bind, int is_ip4)
+{
+ const udp_dst_port_info_t *pi;
+
+ if (bind && !udp_is_valid_dst_port (port, is_ip4))
+ return;
+
+ pi = udp_get_dst_port_info (um, port, is_ip4);
+ if (!pi)
+ return;
+
+ table_format_cell (t, *c, 0, "%d", pi->dst_port);
+ table_format_cell (t, *c, 1, is_ip4 ? "ip4" : "ip6");
+ table_format_cell (t, *c, 2, ~0 == pi->node_index ? "none" : "%U",
+ format_vlib_node_name, vm, pi->node_index);
+ table_format_cell (t, *c, 3, "%s", pi->name);
+
+ (*c)++;
+}
+
+static void
+table_format_udp_port (vlib_main_t *vm, udp_main_t *um, table_t *t, int *c,
+ int port, int bind, int ip4, int ip6)
+{
+ if (ip4)
+ table_format_udp_port_ (vm, um, t, c, port, bind, 1 /* is_ip4 */);
+ if (ip6)
+ table_format_udp_port_ (vm, um, t, c, port, bind, 0 /* is_ip4 */);
+}
+
+static clib_error_t *
+show_udp_ports (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ table_t table = {}, *t = &table;
+ udp_main_t *um = &udp_main;
+ clib_error_t *err = 0;
+ int ip4 = 1, ip6 = 1;
+ int port = -1;
+ int bind = 1;
+ int c = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "ip4"))
+ ip6 = 0;
+ else if (unformat (input, "ip6"))
+ ip4 = 0;
+ else if (unformat (input, "bind"))
+ bind = 1;
+ else if (unformat (input, "all"))
+ bind = 0;
+ else if (unformat (input, "%d", &port))
+ ;
+ else
+ {
+ err = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ goto out;
+ }
+ }
+
+ table_add_header_col (t, 4, "port", "proto", "node", "desc");
+
+ if (port > 65535)
+ {
+ err = clib_error_return (0, "wrong port %d", port);
+ goto out;
+ }
+ else if (port < 0)
+ {
+ for (port = 0; port < 65536; port++)
+ table_format_udp_port (vm, um, t, &c, port, bind, ip4, ip6);
+ }
+ else
+ {
+ table_format_udp_port (vm, um, t, &c, port, bind, ip4, ip6);
+ }
+
+ vlib_cli_output (vm, "%U", format_table, t);
+
+out:
+ table_free (t);
+ return err;
+}
+
+VLIB_CLI_COMMAND (show_udp_ports_cmd, static) = {
+ .path = "show udp ports",
+ .function = show_udp_ports,
+ .short_help = "show udp ports [ip4|ip6] [bind|all|<port>]",
+ .is_mp_safe = 1,
+};
+
+static void
+table_format_udp_transport_port_ (vlib_main_t *vm, table_t *t, int *c,
+ int port, int is_ip4)
+{
+ udp_main_t *um = &udp_main;
+ u32 refcnt;
+ u16 port_ne;
+
+ port_ne = clib_host_to_net_u16 (port);
+ refcnt = um->transport_ports_refcnt[is_ip4][port_ne];
+ if (!refcnt)
+ return;
+
+ if (!udp_is_valid_dst_port (port, is_ip4))
+ {
+ clib_warning ("Port %u is not registered refcnt %u!", port, refcnt);
+ return;
+ }
+
+ table_format_cell (t, *c, 0, "%d", port);
+ table_format_cell (t, *c, 1, is_ip4 ? "ip4" : "ip6");
+ table_format_cell (t, *c, 2, "%d", refcnt);
+
+ (*c)++;
+}
+
+static void
+table_format_udp_transport_port (vlib_main_t *vm, table_t *t, int *c, int port,
+ int ipv)
+{
+ if (ipv == -1 || ipv == 0)
+ table_format_udp_transport_port_ (vm, t, c, port, 1 /* is_ip4 */);
+ if (ipv == -1 || ipv == 1)
+ table_format_udp_transport_port_ (vm, t, c, port, 0 /* is_ip4 */);
+}
+
+static clib_error_t *
+show_udp_transport_ports (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ table_t table = {}, *t = &table;
+ int ipv = -1, port = -1, c = 0;
+ clib_error_t *err = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "ip4"))
+ ipv = 0;
+ else if (unformat (input, "ip6"))
+ ipv = 1;
+ else if (unformat (input, "%d", &port))
+ ;
+ else
+ {
+ err = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ goto out;
+ }
+ }
+
+ table_add_header_col (t, 3, "port", "proto", "ref-cnt");
+
+ if (port > 65535)
+ {
+ err = clib_error_return (0, "wrong port %d", port);
+ goto out;
+ }
+
+ if (port < 0)
+ {
+ for (port = 0; port < 65536; port++)
+ table_format_udp_transport_port (vm, t, &c, port, ipv);
+ }
+ else
+ {
+ table_format_udp_transport_port (vm, t, &c, port, ipv);
+ }
+
+ vlib_cli_output (vm, "%U\n", format_table, t);
+
+out:
+ table_free (t);
+ return err;
+}
+
+VLIB_CLI_COMMAND (show_udp_transport_ports_cmd, static) = {
+ .path = "show udp transport ports",
+ .function = show_udp_transport_ports,
+ .short_help = "show udp transport ports [ip4|ip6] [<port>]",
+ .is_mp_safe = 1,
+};
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/udp/udp_encap.c b/src/vnet/udp/udp_encap.c
index cb93adb8d39..e4e5271da63 100644
--- a/src/vnet/udp/udp_encap.c
+++ b/src/vnet/udp/udp_encap.c
@@ -47,8 +47,7 @@ static void
udp_encap_restack (udp_encap_t * ue)
{
dpo_stack (udp_encap_dpo_types[ue->ue_ip_proto],
- fib_proto_to_dpo (ue->ue_ip_proto),
- &ue->ue_dpo,
+ fib_proto_to_dpo (ue->ue_ip_proto), &ue->ue_dpo,
fib_entry_contribute_ip_forwarding (ue->ue_fib_entry_index));
}
@@ -196,6 +195,20 @@ udp_encap_dpo_unlock (dpo_id_t * dpo)
fib_node_unlock (&ue->ue_fib_node);
}
+u8 *
+format_udp_encap_fixup_flags (u8 *s, va_list *args)
+{
+ udp_encap_fixup_flags_t flags = va_arg (*args, udp_encap_fixup_flags_t);
+
+ if (flags == UDP_ENCAP_FIXUP_NONE)
+ return format (s, "none");
+
+ if (flags & UDP_ENCAP_FIXUP_UDP_SRC_PORT_ENTROPY)
+ s = format (s, "%s", "src-port-is-entropy");
+
+ return (s);
+}
+
static u8 *
format_udp_encap_i (u8 * s, va_list * args)
{
@@ -211,23 +224,21 @@ format_udp_encap_i (u8 * s, va_list * args)
s = format (s, "udp-encap:[%d]: ip-fib-index:%d ", uei, ue->ue_fib_index);
if (FIB_PROTOCOL_IP4 == ue->ue_ip_proto)
{
- s = format (s, "ip:[src:%U, dst:%U] udp:[src:%d, dst:%d]",
- format_ip4_address,
- &ue->ue_hdrs.ip4.ue_ip4.src_address,
- format_ip4_address,
- &ue->ue_hdrs.ip4.ue_ip4.dst_address,
+ s = format (s, "ip:[src:%U, dst:%U] udp:[src:%d, dst:%d] flags:%U",
+ format_ip4_address, &ue->ue_hdrs.ip4.ue_ip4.src_address,
+ format_ip4_address, &ue->ue_hdrs.ip4.ue_ip4.dst_address,
clib_net_to_host_u16 (ue->ue_hdrs.ip4.ue_udp.src_port),
- clib_net_to_host_u16 (ue->ue_hdrs.ip4.ue_udp.dst_port));
+ clib_net_to_host_u16 (ue->ue_hdrs.ip4.ue_udp.dst_port),
+ format_udp_encap_fixup_flags, ue->ue_flags);
}
else
{
- s = format (s, "ip:[src:%U, dst:%U] udp:[src:%d dst:%d]",
- format_ip6_address,
- &ue->ue_hdrs.ip6.ue_ip6.src_address,
- format_ip6_address,
- &ue->ue_hdrs.ip6.ue_ip6.dst_address,
+ s = format (s, "ip:[src:%U, dst:%U] udp:[src:%d dst:%d] flags:%U",
+ format_ip6_address, &ue->ue_hdrs.ip6.ue_ip6.src_address,
+ format_ip6_address, &ue->ue_hdrs.ip6.ue_ip6.dst_address,
clib_net_to_host_u16 (ue->ue_hdrs.ip6.ue_udp.src_port),
- clib_net_to_host_u16 (ue->ue_hdrs.ip6.ue_udp.dst_port));
+ clib_net_to_host_u16 (ue->ue_hdrs.ip6.ue_udp.dst_port),
+ format_udp_encap_fixup_flags, ue->ue_flags);
}
vlib_get_combined_counter (&(udp_encap_counters), uei, &to);
s = format (s, " to:[%Ld:%Ld]]", to.packets, to.bytes);
@@ -325,12 +336,12 @@ udp_encap_fib_last_lock_gone (fib_node_t * node)
}
const static char *const udp4_encap_ip4_nodes[] = {
- "udp4-encap",
+ "udp4o4-encap",
NULL,
};
const static char *const udp4_encap_ip6_nodes[] = {
- "udp4-encap",
+ "udp6o4-encap",
NULL,
};
@@ -345,12 +356,12 @@ const static char *const udp4_encap_bier_nodes[] = {
};
const static char *const udp6_encap_ip4_nodes[] = {
- "udp6-encap",
+ "udp4o6-encap",
NULL,
};
const static char *const udp6_encap_ip6_nodes[] = {
- "udp6-encap",
+ "udp6o6-encap",
NULL,
};
@@ -507,13 +518,11 @@ udp_encap_walk (udp_encap_walk_cb_t cb, void *ctx)
{
index_t uei;
- /* *INDENT-OFF* */
pool_foreach_index (uei, udp_encap_pool)
{
if (WALK_STOP == cb(uei, ctx))
break;
}
- /* *INDENT-ON* */
}
clib_error_t *
@@ -536,12 +545,10 @@ udp_encap_show (vlib_main_t * vm,
if (INDEX_INVALID == uei)
{
- /* *INDENT-OFF* */
pool_foreach_index (uei, udp_encap_pool)
{
vlib_cli_output(vm, "%U", format_udp_encap, uei, 0);
}
- /* *INDENT-ON* */
}
else
{
@@ -551,20 +558,20 @@ udp_encap_show (vlib_main_t * vm,
return NULL;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (udp_encap_add_command, static) = {
.path = "udp encap",
- .short_help = "udp encap [add|del] <id ID> <src-ip> <dst-ip> [<src-port>] <dst-port> [src-port-is-entropy] [table-id <table>]",
+ .short_help = "udp encap [add|del] <id ID> <src-ip> <dst-ip> [<src-port>] "
+ "<dst-port> [src-port-is-entropy] [table-id <table>]",
.function = udp_encap_cli,
.is_mp_safe = 1,
};
+
VLIB_CLI_COMMAND (udp_encap_show_command, static) = {
.path = "show udp encap",
.short_help = "show udp encap [ID]",
.function = udp_encap_show,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/udp/udp_encap.h b/src/vnet/udp/udp_encap.h
index b096e0f5c09..c8b42ffa92c 100644
--- a/src/vnet/udp/udp_encap.h
+++ b/src/vnet/udp/udp_encap.h
@@ -85,7 +85,7 @@ typedef struct udp_encap_t_
/**
* The second cacheline contains control-plane data
*/
- CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
/**
* linkage into the FIB graph
@@ -115,6 +115,7 @@ extern index_t udp_encap_add_and_lock (fib_protocol_t proto,
extern void udp_encap_lock (index_t uei);
extern void udp_encap_unlock (index_t uei);
extern u8 *format_udp_encap (u8 * s, va_list * args);
+extern u8 *format_udp_encap_fixup_flags (u8 *s, va_list *args);
extern void udp_encap_contribute_forwarding (index_t uei,
dpo_proto_t proto,
dpo_id_t * dpo);
diff --git a/src/vnet/udp/udp_encap_node.c b/src/vnet/udp/udp_encap_node.c
index 5b9fc0bf34b..a86614f5475 100644
--- a/src/vnet/udp/udp_encap_node.c
+++ b/src/vnet/udp/udp_encap_node.c
@@ -20,12 +20,16 @@ typedef struct udp4_encap_trace_t_
{
udp_header_t udp;
ip4_header_t ip;
+ u32 flow_hash;
+ udp_encap_fixup_flags_t flags;
} udp4_encap_trace_t;
typedef struct udp6_encap_trace_t_
{
udp_header_t udp;
ip6_header_t ip;
+ u32 flow_hash;
+ udp_encap_fixup_flags_t flags;
} udp6_encap_trace_t;
extern vlib_combined_counter_main_t udp_encap_counters;
@@ -35,13 +39,16 @@ format_udp4_encap_trace (u8 * s, va_list * args)
{
CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ u32 indent = format_get_indent (s);
udp4_encap_trace_t *t;
t = va_arg (*args, udp4_encap_trace_t *);
- s = format (s, "%U\n %U",
- format_ip4_header, &t->ip, sizeof (t->ip),
- format_udp_header, &t->udp, sizeof (t->udp));
+ s = format (s, "flags: %U, flow hash: 0x%08x\n%U%U\n%U%U",
+ format_udp_encap_fixup_flags, t->flags, t->flow_hash,
+ format_white_space, indent, format_ip4_header, &t->ip,
+ sizeof (t->ip), format_white_space, indent, format_udp_header,
+ &t->udp, sizeof (t->udp));
return (s);
}
@@ -50,20 +57,23 @@ format_udp6_encap_trace (u8 * s, va_list * args)
{
CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ u32 indent = format_get_indent (s);
udp6_encap_trace_t *t;
t = va_arg (*args, udp6_encap_trace_t *);
- s = format (s, "%U\n %U",
- format_ip6_header, &t->ip, sizeof (t->ip),
- format_udp_header, &t->udp, sizeof (t->udp));
+ s = format (s, "flags: %U, flow hash: 0x%08x\n%U%U\n%U%U",
+ format_udp_encap_fixup_flags, t->flags, t->flow_hash,
+ format_white_space, indent, format_ip6_header, &t->ip,
+ sizeof (t->ip), format_white_space, indent, format_udp_header,
+ &t->udp, sizeof (t->udp));
return (s);
}
always_inline uword
-udp_encap_inline (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame, int is_encap_v6)
+udp_encap_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, ip_address_family_t encap_family,
+ ip_address_family_t payload_family)
{
vlib_combined_counter_main_t *cm = &udp_encap_counters;
u32 *from = vlib_frame_vector_args (frame);
@@ -121,18 +131,22 @@ udp_encap_inline (vlib_main_t * vm,
ue1 = udp_encap_get (uei1);
/* Paint */
- if (is_encap_v6)
+ if (encap_family == AF_IP6)
{
const u8 n_bytes =
sizeof (udp_header_t) + sizeof (ip6_header_t);
- ip_udp_encap_two (vm, b0, b1, (u8 *) & ue0->ue_hdrs,
- (u8 *) & ue1->ue_hdrs, n_bytes, 0);
+ ip_udp_encap_two (vm, b0, b1, (u8 *) &ue0->ue_hdrs,
+ (u8 *) &ue1->ue_hdrs, n_bytes, encap_family,
+ payload_family, ue0->ue_flags, ue1->ue_flags);
+
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
udp6_encap_trace_t *tr =
vlib_add_trace (vm, node, b0, sizeof (*tr));
tr->udp = ue0->ue_hdrs.ip6.ue_udp;
tr->ip = ue0->ue_hdrs.ip6.ue_ip6;
+ tr->flags = ue0->ue_flags;
+ tr->flow_hash = vnet_buffer (b0)->ip.flow_hash;
}
if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
{
@@ -140,6 +154,8 @@ udp_encap_inline (vlib_main_t * vm,
vlib_add_trace (vm, node, b1, sizeof (*tr));
tr->udp = ue1->ue_hdrs.ip6.ue_udp;
tr->ip = ue1->ue_hdrs.ip6.ue_ip6;
+ tr->flags = ue1->ue_flags;
+ tr->flow_hash = vnet_buffer (b1)->ip.flow_hash;
}
}
else
@@ -147,9 +163,9 @@ udp_encap_inline (vlib_main_t * vm,
const u8 n_bytes =
sizeof (udp_header_t) + sizeof (ip4_header_t);
- ip_udp_encap_two (vm, b0, b1,
- (u8 *) & ue0->ue_hdrs,
- (u8 *) & ue1->ue_hdrs, n_bytes, 1);
+ ip_udp_encap_two (vm, b0, b1, (u8 *) &ue0->ue_hdrs,
+ (u8 *) &ue1->ue_hdrs, n_bytes, encap_family,
+ payload_family, ue0->ue_flags, ue1->ue_flags);
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
@@ -157,6 +173,8 @@ udp_encap_inline (vlib_main_t * vm,
vlib_add_trace (vm, node, b0, sizeof (*tr));
tr->udp = ue0->ue_hdrs.ip4.ue_udp;
tr->ip = ue0->ue_hdrs.ip4.ue_ip4;
+ tr->flags = ue0->ue_flags;
+ tr->flow_hash = vnet_buffer (b0)->ip.flow_hash;
}
if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
{
@@ -164,6 +182,8 @@ udp_encap_inline (vlib_main_t * vm,
vlib_add_trace (vm, node, b1, sizeof (*tr));
tr->udp = ue1->ue_hdrs.ip4.ue_udp;
tr->ip = ue1->ue_hdrs.ip4.ue_ip4;
+ tr->flags = ue1->ue_flags;
+ tr->flow_hash = vnet_buffer (b1)->ip.flow_hash;
}
}
@@ -202,12 +222,12 @@ udp_encap_inline (vlib_main_t * vm,
b0));
/* Paint */
- if (is_encap_v6)
+ if (encap_family == AF_IP6)
{
const u8 n_bytes =
sizeof (udp_header_t) + sizeof (ip6_header_t);
- ip_udp_encap_one (vm, b0, (u8 *) & ue0->ue_hdrs.ip6, n_bytes,
- 0);
+ ip_udp_encap_one (vm, b0, (u8 *) &ue0->ue_hdrs.ip6, n_bytes,
+ encap_family, payload_family, ue0->ue_flags);
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
@@ -215,6 +235,8 @@ udp_encap_inline (vlib_main_t * vm,
vlib_add_trace (vm, node, b0, sizeof (*tr));
tr->udp = ue0->ue_hdrs.ip6.ue_udp;
tr->ip = ue0->ue_hdrs.ip6.ue_ip6;
+ tr->flags = ue0->ue_flags;
+ tr->flow_hash = vnet_buffer (b0)->ip.flow_hash;
}
}
else
@@ -222,8 +244,8 @@ udp_encap_inline (vlib_main_t * vm,
const u8 n_bytes =
sizeof (udp_header_t) + sizeof (ip4_header_t);
- ip_udp_encap_one (vm, b0, (u8 *) & ue0->ue_hdrs.ip4, n_bytes,
- 1);
+ ip_udp_encap_one (vm, b0, (u8 *) &ue0->ue_hdrs.ip4, n_bytes,
+ encap_family, payload_family, ue0->ue_flags);
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
@@ -231,6 +253,8 @@ udp_encap_inline (vlib_main_t * vm,
vlib_add_trace (vm, node, b0, sizeof (*tr));
tr->udp = ue0->ue_hdrs.ip4.ue_udp;
tr->ip = ue0->ue_hdrs.ip4.ue_ip4;
+ tr->flags = ue0->ue_flags;
+ tr->flow_hash = vnet_buffer (b0)->ip.flow_hash;
}
}
@@ -248,39 +272,87 @@ udp_encap_inline (vlib_main_t * vm,
return frame->n_vectors;
}
-VLIB_NODE_FN (udp4_encap_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
+VLIB_NODE_FN (udp4o4_encap_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return udp_encap_inline (vm, node, frame, AF_IP4, AF_IP4);
+}
+
+VLIB_NODE_FN (udp6o4_encap_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return udp_encap_inline (vm, node, frame, AF_IP4, AF_IP6);
+}
+
+VLIB_NODE_FN (udp4_encap_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return udp_encap_inline (vm, node, frame, AF_IP4, N_AF);
+}
+
+VLIB_NODE_FN (udp6o6_encap_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
{
- return udp_encap_inline (vm, node, frame, 0);
+ return udp_encap_inline (vm, node, frame, AF_IP6, AF_IP6);
}
-VLIB_NODE_FN (udp6_encap_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
+VLIB_NODE_FN (udp4o6_encap_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
{
- return udp_encap_inline (vm, node, frame, 1);
+ return udp_encap_inline (vm, node, frame, AF_IP6, AF_IP4);
}
-/* *INDENT-OFF* */
+VLIB_NODE_FN (udp6_encap_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return udp_encap_inline (vm, node, frame, AF_IP6, N_AF);
+}
+
+VLIB_REGISTER_NODE (udp4o4_encap_node) = {
+ .name = "udp4o4-encap",
+ .vector_size = sizeof (u32),
+ .format_trace = format_udp4_encap_trace,
+ .n_next_nodes = 0,
+};
+
+VLIB_REGISTER_NODE (udp6o4_encap_node) = {
+ .name = "udp6o4-encap",
+ .vector_size = sizeof (u32),
+ .format_trace = format_udp4_encap_trace,
+ .n_next_nodes = 0,
+ .sibling_of = "udp4o4-encap",
+};
+
VLIB_REGISTER_NODE (udp4_encap_node) = {
.name = "udp4-encap",
.vector_size = sizeof (u32),
-
.format_trace = format_udp4_encap_trace,
+ .n_next_nodes = 0,
+ .sibling_of = "udp4o4-encap",
+};
+VLIB_REGISTER_NODE (udp6o6_encap_node) = {
+ .name = "udp6o6-encap",
+ .vector_size = sizeof (u32),
+ .format_trace = format_udp6_encap_trace,
+ .n_next_nodes = 0,
+};
+
+VLIB_REGISTER_NODE (udp4o6_encap_node) = {
+ .name = "udp4o6-encap",
+ .vector_size = sizeof (u32),
+ .format_trace = format_udp6_encap_trace,
.n_next_nodes = 0,
+ .sibling_of = "udp6o6-encap",
};
VLIB_REGISTER_NODE (udp6_encap_node) = {
.name = "udp6-encap",
.vector_size = sizeof (u32),
-
.format_trace = format_udp6_encap_trace,
-
.n_next_nodes = 0,
+ .sibling_of = "udp6o6-encap",
};
-/* *INDENT-ON* */
/*
diff --git a/src/vnet/udp/udp_error.def b/src/vnet/udp/udp_error.def
index 178d5c96b2c..ef19970ce72 100644
--- a/src/vnet/udp/udp_error.def
+++ b/src/vnet/udp/udp_error.def
@@ -21,7 +21,10 @@ udp_error (LENGTH_ERROR, length_error, ERROR, "Packets with length errors")
udp_error (PUNT, punt, ERROR, "No listener punt")
udp_error (ENQUEUED, enqueued, INFO, "Packets enqueued")
udp_error (FIFO_FULL, fifo_full, ERROR, "Fifo full")
+udp_error (FIFO_NOMEM, fifo_nomem, ERROR, "Fifo no mem")
udp_error (NOT_READY, not_ready, ERROR, "Connection not ready")
udp_error (ACCEPT, accept, INFO, "Accepted session")
udp_error (CREATE_SESSION, create_session, ERROR, "Failed to create session")
udp_error (MQ_FULL, mq_full, ERROR, "Application msg queue full")
+udp_error (INVALID_CONNECTION, invalid_connection, ERROR, "Invalid connection")
+udp_error (PKTS_SENT, pkts_sent, INFO, "Packets sent")
diff --git a/src/vnet/udp/udp_inlines.h b/src/vnet/udp/udp_inlines.h
index e4eb0c88e83..ceec0b191b1 100644
--- a/src/vnet/udp/udp_inlines.h
+++ b/src/vnet/udp/udp_inlines.h
@@ -21,9 +21,12 @@
#include <vnet/ip/ip6.h>
#include <vnet/udp/udp_packet.h>
#include <vnet/interface_output.h>
+#include <vnet/ip/ip4_inlines.h>
+#include <vnet/ip/ip6_inlines.h>
+#include <vnet/udp/udp_encap.h>
always_inline void *
-vlib_buffer_push_udp (vlib_buffer_t * b, u16 sp, u16 dp, u8 offload_csum)
+vlib_buffer_push_udp (vlib_buffer_t *b, u16 sp, u16 dp)
{
udp_header_t *uh;
u16 udp_len = sizeof (udp_header_t) + b->current_length;
@@ -35,15 +38,44 @@ vlib_buffer_push_udp (vlib_buffer_t * b, u16 sp, u16 dp, u8 offload_csum)
uh->dst_port = dp;
uh->checksum = 0;
uh->length = clib_host_to_net_u16 (udp_len);
- if (offload_csum)
- vnet_buffer_offload_flags_set (b, VNET_BUFFER_OFFLOAD_F_UDP_CKSUM);
vnet_buffer (b)->l4_hdr_offset = (u8 *) uh - b->data;
b->flags |= VNET_BUFFER_F_L4_HDR_OFFSET_VALID;
return uh;
}
+/*
+ * Encode udp source port entropy value per
+ * https://datatracker.ietf.org/doc/html/rfc7510#section-3
+ */
+always_inline u16
+ip_udp_sport_entropy (vlib_buffer_t *b0)
+{
+ u16 port = clib_host_to_net_u16 (0x03 << 14);
+ port |= vnet_buffer (b0)->ip.flow_hash & 0xffff;
+ return port;
+}
+
+always_inline u32
+ip_udp_compute_flow_hash (vlib_buffer_t *b0, u8 is_ip4)
+{
+ ip4_header_t *ip4;
+ ip6_header_t *ip6;
+
+ if (is_ip4)
+ {
+ ip4 = (ip4_header_t *) (b0->data + vnet_buffer (b0)->l3_hdr_offset);
+ return ip4_compute_flow_hash (ip4, IP_FLOW_HASH_DEFAULT);
+ }
+ else
+ {
+ ip6 = (ip6_header_t *) (b0->data + vnet_buffer (b0)->l3_hdr_offset);
+ return ip6_compute_flow_hash (ip6, IP_FLOW_HASH_DEFAULT);
+ }
+}
+
always_inline void
-ip_udp_fixup_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 is_ip4)
+ip_udp_fixup_one (vlib_main_t *vm, vlib_buffer_t *b0, u8 is_ip4,
+ u8 sport_entropy)
{
u16 new_l0;
udp_header_t *udp0;
@@ -71,6 +103,9 @@ ip_udp_fixup_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 is_ip4)
new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)
- sizeof (*ip0));
udp0->length = new_l0;
+
+ if (sport_entropy)
+ udp0->src_port = ip_udp_sport_entropy (b0);
}
else
{
@@ -87,6 +122,9 @@ ip_udp_fixup_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 is_ip4)
udp0 = (udp_header_t *) (ip0 + 1);
udp0->length = new_l0;
+ if (sport_entropy)
+ udp0->src_port = ip_udp_sport_entropy (b0);
+
udp0->checksum =
ip6_tcp_udp_icmp_compute_checksum (vm, b0, ip0, &bogus0);
ASSERT (bogus0 == 0);
@@ -97,14 +135,27 @@ ip_udp_fixup_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 is_ip4)
}
always_inline void
-ip_udp_encap_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 * ec0, word ec_len,
- u8 is_ip4)
+ip_udp_encap_one (vlib_main_t *vm, vlib_buffer_t *b0, u8 *ec0, word ec_len,
+ ip_address_family_t encap_family,
+ ip_address_family_t payload_family,
+ udp_encap_fixup_flags_t flags)
{
- vnet_calc_checksums_inline (vm, b0, is_ip4, !is_ip4);
+ u8 sport_entropy = (flags & UDP_ENCAP_FIXUP_UDP_SRC_PORT_ENTROPY) != 0;
+
+ if (payload_family < N_AF)
+ {
+ vnet_calc_checksums_inline (vm, b0, payload_family == AF_IP4,
+ payload_family == AF_IP6);
+
+ /* Сalculate flow hash to be used for entropy */
+ if (sport_entropy && 0 == vnet_buffer (b0)->ip.flow_hash)
+ vnet_buffer (b0)->ip.flow_hash =
+ ip_udp_compute_flow_hash (b0, payload_family == AF_IP4);
+ }
vlib_buffer_advance (b0, -ec_len);
- if (is_ip4)
+ if (encap_family == AF_IP4)
{
ip4_header_t *ip0;
@@ -112,7 +163,7 @@ ip_udp_encap_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 * ec0, word ec_len,
/* Apply the encap string. */
clib_memcpy_fast (ip0, ec0, ec_len);
- ip_udp_fixup_one (vm, b0, 1);
+ ip_udp_fixup_one (vm, b0, 1, sport_entropy);
}
else
{
@@ -122,26 +173,42 @@ ip_udp_encap_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 * ec0, word ec_len,
/* Apply the encap string. */
clib_memcpy_fast (ip0, ec0, ec_len);
- ip_udp_fixup_one (vm, b0, 0);
+ ip_udp_fixup_one (vm, b0, 0, sport_entropy);
}
}
always_inline void
-ip_udp_encap_two (vlib_main_t * vm, vlib_buffer_t * b0, vlib_buffer_t * b1,
- u8 * ec0, u8 * ec1, word ec_len, u8 is_v4)
+ip_udp_encap_two (vlib_main_t *vm, vlib_buffer_t *b0, vlib_buffer_t *b1,
+ u8 *ec0, u8 *ec1, word ec_len,
+ ip_address_family_t encap_family,
+ ip_address_family_t payload_family,
+ udp_encap_fixup_flags_t flags0,
+ udp_encap_fixup_flags_t flags1)
{
u16 new_l0, new_l1;
udp_header_t *udp0, *udp1;
+ int payload_ip4 = (payload_family == AF_IP4);
+ int sport_entropy0 = (flags0 & UDP_ENCAP_FIXUP_UDP_SRC_PORT_ENTROPY) != 0;
+ int sport_entropy1 = (flags1 & UDP_ENCAP_FIXUP_UDP_SRC_PORT_ENTROPY) != 0;
- ASSERT (_vec_len (ec0) == _vec_len (ec1));
-
- vnet_calc_checksums_inline (vm, b0, is_v4, !is_v4);
- vnet_calc_checksums_inline (vm, b1, is_v4, !is_v4);
+ if (payload_family < N_AF)
+ {
+ vnet_calc_checksums_inline (vm, b0, payload_ip4, !payload_ip4);
+ vnet_calc_checksums_inline (vm, b1, payload_ip4, !payload_ip4);
+
+ /* Сalculate flow hash to be used for entropy */
+ if (sport_entropy0 && 0 == vnet_buffer (b0)->ip.flow_hash)
+ vnet_buffer (b0)->ip.flow_hash =
+ ip_udp_compute_flow_hash (b0, payload_ip4);
+ if (sport_entropy1 && 0 == vnet_buffer (b1)->ip.flow_hash)
+ vnet_buffer (b1)->ip.flow_hash =
+ ip_udp_compute_flow_hash (b1, payload_ip4);
+ }
vlib_buffer_advance (b0, -ec_len);
vlib_buffer_advance (b1, -ec_len);
- if (is_v4)
+ if (encap_family == AF_IP4)
{
ip4_header_t *ip0, *ip1;
ip_csum_t sum0, sum1;
@@ -185,6 +252,11 @@ ip_udp_encap_two (vlib_main_t * vm, vlib_buffer_t * b0, vlib_buffer_t * b1,
sizeof (*ip1));
udp0->length = new_l0;
udp1->length = new_l1;
+
+ if (sport_entropy0)
+ udp0->src_port = ip_udp_sport_entropy (b0);
+ if (sport_entropy1)
+ udp1->src_port = ip_udp_sport_entropy (b1);
}
else
{
@@ -212,6 +284,11 @@ ip_udp_encap_two (vlib_main_t * vm, vlib_buffer_t * b0, vlib_buffer_t * b1,
udp0->length = new_l0;
udp1->length = new_l1;
+ if (sport_entropy0)
+ udp0->src_port = ip_udp_sport_entropy (b0);
+ if (sport_entropy1)
+ udp1->src_port = ip_udp_sport_entropy (b1);
+
udp0->checksum =
ip6_tcp_udp_icmp_compute_checksum (vm, b0, ip0, &bogus0);
udp1->checksum =
diff --git a/src/vnet/udp/udp_input.c b/src/vnet/udp/udp_input.c
index d14bdb8a298..a90461186c1 100644
--- a/src/vnet/udp/udp_input.c
+++ b/src/vnet/udp/udp_input.c
@@ -115,6 +115,7 @@ udp_connection_accept (udp_connection_t * listener, session_dgram_hdr_t * hdr,
uc->c_fib_index = listener->c_fib_index;
uc->mss = listener->mss;
uc->flags |= UDP_CONN_F_CONNECTED;
+ uc->cfg_flags = listener->cfg_flags;
if (session_dgram_accept (&uc->connection, listener->c_s_index,
listener->c_thread_index))
@@ -122,8 +123,8 @@ udp_connection_accept (udp_connection_t * listener, session_dgram_hdr_t * hdr,
udp_connection_free (uc);
return 0;
}
- udp_connection_share_port (clib_net_to_host_u16
- (uc->c_lcl_port), uc->c_is_ip4);
+
+ udp_connection_share_port (uc->c_lcl_port, uc->c_is_ip4);
return uc;
}
@@ -135,37 +136,46 @@ udp_connection_enqueue (udp_connection_t * uc0, session_t * s0,
int wrote0;
if (!(uc0->flags & UDP_CONN_F_CONNECTED))
- clib_spinlock_lock (&uc0->rx_lock);
+ {
+ clib_spinlock_lock (&uc0->rx_lock);
+
+ wrote0 = session_enqueue_dgram_connection_cl (
+ s0, hdr0, b, TRANSPORT_PROTO_UDP, queue_event);
+
+ clib_spinlock_unlock (&uc0->rx_lock);
+
+ /* Expect cl udp enqueue to fail because fifo enqueue */
+ if (PREDICT_FALSE (wrote0 == 0))
+ *error0 = UDP_ERROR_FIFO_FULL;
+
+ return;
+ }
if (svm_fifo_max_enqueue_prod (s0->rx_fifo)
< hdr0->data_length + sizeof (session_dgram_hdr_t))
{
*error0 = UDP_ERROR_FIFO_FULL;
- goto unlock_rx_lock;
+ return;
}
/* If session is owned by another thread and rx event needed,
* enqueue event now while we still have the peeker lock */
if (s0->thread_index != thread_index)
{
- wrote0 = session_enqueue_dgram_connection (s0, hdr0, b,
- TRANSPORT_PROTO_UDP,
- /* queue event */ 0);
- if (queue_event && !svm_fifo_has_event (s0->rx_fifo))
- session_enqueue_notify (s0);
+ wrote0 = session_enqueue_dgram_connection2 (
+ s0, hdr0, b, TRANSPORT_PROTO_UDP,
+ queue_event && !svm_fifo_has_event (s0->rx_fifo));
}
else
{
- wrote0 = session_enqueue_dgram_connection (s0, hdr0, b,
- TRANSPORT_PROTO_UDP,
- queue_event);
+ wrote0 = session_enqueue_dgram_connection (
+ s0, hdr0, b, TRANSPORT_PROTO_UDP, queue_event);
}
- ASSERT (wrote0 > 0);
-
-unlock_rx_lock:
- if (!(uc0->flags & UDP_CONN_F_CONNECTED))
- clib_spinlock_unlock (&uc0->rx_lock);
+ /* In some rare cases, session_enqueue_dgram_connection can fail because a
+ * chunk cannot be allocated in the RX FIFO */
+ if (PREDICT_FALSE (wrote0 == 0))
+ *error0 = UDP_ERROR_FIFO_NOMEM;
}
always_inline session_t *
@@ -184,6 +194,7 @@ udp_parse_and_lookup_buffer (vlib_buffer_t * b, session_dgram_hdr_t * hdr,
hdr->lcl_port = udp->dst_port;
hdr->rmt_port = udp->src_port;
hdr->is_ip4 = is_ip4;
+ hdr->gso_size = 0;
if (is_ip4)
{
@@ -213,6 +224,10 @@ udp_parse_and_lookup_buffer (vlib_buffer_t * b, session_dgram_hdr_t * hdr,
udp->src_port, TRANSPORT_PROTO_UDP);
}
+ /* Set the sw_if_index[VLIB_RX] to the interface we received
+ * the connection on (the local interface) */
+ vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->ip.rx_sw_if_index;
+
if (PREDICT_TRUE (!(b->flags & VLIB_BUFFER_NEXT_PRESENT)))
b->current_length = hdr->data_length;
else
@@ -226,10 +241,9 @@ always_inline uword
udp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * frame, u8 is_ip4)
{
- u32 n_left_from, *from, errors, *first_buffer;
+ u32 thread_index = vm->thread_index, n_left_from, *from, *first_buffer;
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
u16 err_counters[UDP_N_ERROR] = { 0 };
- u32 thread_index = vm->thread_index;
from = first_buffer = vlib_frame_vector_args (frame);
n_left_from = frame->n_vectors;
@@ -251,15 +265,11 @@ udp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
goto done;
}
- /*
- * If session exists pool peeker lock is taken at this point unless
- * the session is already on the right thread or is a listener
- */
-
if (s0->session_state == SESSION_STATE_OPENED)
{
u8 queue_event = 1;
uc0 = udp_connection_from_transport (session_get_transport (s0));
+ uc0->sw_if_index = vnet_buffer (b[0])->sw_if_index[VLIB_RX];
if (uc0->flags & UDP_CONN_F_CONNECTED)
{
if (s0->thread_index != thread_index)
@@ -273,10 +283,8 @@ udp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
ASSERT (s0->session_index == uc0->c_s_index);
/*
- * Drop the peeker lock on pool resize and ask session
- * layer for a new session.
+ * Ask session layer for a new session.
*/
- session_pool_remove_peeker (s0->thread_index);
session_dgram_connect_notify (&uc0->connection,
s0->thread_index, &s0);
queue_event = 0;
@@ -286,9 +294,9 @@ udp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
}
udp_connection_enqueue (uc0, s0, &hdr0, thread_index, b[0],
queue_event, &error0);
- session_pool_remove_peeker (s0->thread_index);
}
- else if (s0->session_state == SESSION_STATE_READY)
+ else if (s0->session_state == SESSION_STATE_READY ||
+ s0->session_state == SESSION_STATE_ACCEPTING)
{
uc0 = udp_connection_from_transport (session_get_transport (s0));
udp_connection_enqueue (uc0, s0, &hdr0, thread_index, b[0], 1,
@@ -306,6 +314,7 @@ udp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
goto done;
}
s0 = session_get (uc0->c_s_index, uc0->c_thread_index);
+ uc0->sw_if_index = vnet_buffer (b[0])->sw_if_index[VLIB_RX];
error0 = UDP_ERROR_ACCEPT;
}
udp_connection_enqueue (uc0, s0, &hdr0, thread_index, b[0], 1,
@@ -314,7 +323,6 @@ udp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
else
{
error0 = UDP_ERROR_NOT_READY;
- session_pool_remove_peeker (s0->thread_index);
}
done:
@@ -328,9 +336,7 @@ udp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
}
vlib_buffer_free (vm, first_buffer, frame->n_vectors);
- errors = session_main_flush_enqueue_events (TRANSPORT_PROTO_UDP,
- thread_index);
- err_counters[UDP_ERROR_MQ_FULL] = errors;
+ session_main_flush_enqueue_events (TRANSPORT_PROTO_UDP, thread_index);
udp_store_err_counters (vm, is_ip4, err_counters);
return frame->n_vectors;
}
@@ -342,7 +348,6 @@ udp4_input (vlib_main_t * vm, vlib_node_runtime_t * node,
return udp46_input_inline (vm, node, frame, 1);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (udp4_input_node) =
{
.function = udp4_input,
@@ -359,7 +364,6 @@ VLIB_REGISTER_NODE (udp4_input_node) =
#undef _
},
};
-/* *INDENT-ON* */
static uword
udp6_input (vlib_main_t * vm, vlib_node_runtime_t * node,
@@ -368,7 +372,6 @@ udp6_input (vlib_main_t * vm, vlib_node_runtime_t * node,
return udp46_input_inline (vm, node, frame, 0);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (udp6_input_node) =
{
.function = udp6_input,
@@ -385,7 +388,6 @@ VLIB_REGISTER_NODE (udp6_input_node) =
#undef _
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/udp/udp_local.c b/src/vnet/udp/udp_local.c
index 06bafbb4be8..6531b73cd11 100644
--- a/src/vnet/udp/udp_local.c
+++ b/src/vnet/udp/udp_local.c
@@ -42,8 +42,6 @@ static vlib_error_desc_t udp_error_counters[] = {
#undef udp_error
};
-#define UDP_NO_NODE_SET ((u16) ~0)
-
#ifndef CLIB_MARCH_VARIANT
u8 *
format_udp_rx_trace (u8 * s, va_list * args)
@@ -127,9 +125,8 @@ udp46_local_inline (vlib_main_t * vm,
u32 bi0, bi1;
vlib_buffer_t *b0, *b1;
udp_header_t *h0 = 0, *h1 = 0;
- u32 i0, i1, dst_port0, dst_port1;
+ u32 i0, i1, next0, next1;
u32 advance0, advance1;
- u32 error0, next0, error1, next1;
/* Prefetch next iteration. */
{
@@ -171,72 +168,106 @@ udp46_local_inline (vlib_main_t * vm,
if (PREDICT_FALSE (b0->current_length < advance0 + sizeof (*h0)))
{
- error0 = UDP_ERROR_LENGTH_ERROR;
+ b0->error = node->errors[UDP_ERROR_LENGTH_ERROR];
next0 = UDP_LOCAL_NEXT_DROP;
}
else
{
vlib_buffer_advance (b0, advance0);
h0 = vlib_buffer_get_current (b0);
- error0 = UDP_ERROR_NONE;
next0 = UDP_LOCAL_NEXT_PUNT;
if (PREDICT_FALSE (clib_net_to_host_u16 (h0->length) >
vlib_buffer_length_in_chain (vm, b0)))
{
- error0 = UDP_ERROR_LENGTH_ERROR;
+ b0->error = node->errors[UDP_ERROR_LENGTH_ERROR];
next0 = UDP_LOCAL_NEXT_DROP;
}
}
if (PREDICT_FALSE (b1->current_length < advance1 + sizeof (*h1)))
{
- error1 = UDP_ERROR_LENGTH_ERROR;
+ b1->error = node->errors[UDP_ERROR_LENGTH_ERROR];
next1 = UDP_LOCAL_NEXT_DROP;
}
else
{
vlib_buffer_advance (b1, advance1);
h1 = vlib_buffer_get_current (b1);
- error1 = UDP_ERROR_NONE;
next1 = UDP_LOCAL_NEXT_PUNT;
if (PREDICT_FALSE (clib_net_to_host_u16 (h1->length) >
vlib_buffer_length_in_chain (vm, b1)))
{
- error1 = UDP_ERROR_LENGTH_ERROR;
+ b1->error = node->errors[UDP_ERROR_LENGTH_ERROR];
next1 = UDP_LOCAL_NEXT_DROP;
}
}
/* Index sparse array with network byte order. */
- dst_port0 = (error0 == 0) ? h0->dst_port : 0;
- dst_port1 = (error1 == 0) ? h1->dst_port : 0;
- sparse_vec_index2 (next_by_dst_port, dst_port0, dst_port1, &i0,
- &i1);
- next0 = (error0 == 0) ? vec_elt (next_by_dst_port, i0) : next0;
- next1 = (error1 == 0) ? vec_elt (next_by_dst_port, i1) : next1;
-
- if (PREDICT_FALSE (i0 == SPARSE_VEC_INVALID_INDEX ||
- next0 == UDP_NO_NODE_SET))
+ if (PREDICT_TRUE (next0 == UDP_LOCAL_NEXT_PUNT &&
+ next1 == UDP_LOCAL_NEXT_PUNT))
{
- udp_dispatch_error (node, b0, advance0, is_ip4, &next0);
+ sparse_vec_index2 (next_by_dst_port, h0->dst_port, h1->dst_port,
+ &i0, &i1);
+ next0 = vec_elt (next_by_dst_port, i0);
+ next1 = vec_elt (next_by_dst_port, i1);
+
+ if (PREDICT_FALSE (i0 == SPARSE_VEC_INVALID_INDEX ||
+ next0 == UDP_NO_NODE_SET))
+ {
+ udp_dispatch_error (node, b0, advance0, is_ip4, &next0);
+ }
+ else
+ {
+ b0->error = node->errors[UDP_ERROR_NONE];
+ // advance to the payload
+ vlib_buffer_advance (b0, sizeof (*h0));
+ }
+
+ if (PREDICT_FALSE (i1 == SPARSE_VEC_INVALID_INDEX ||
+ next1 == UDP_NO_NODE_SET))
+ {
+ udp_dispatch_error (node, b1, advance1, is_ip4, &next1);
+ }
+ else
+ {
+ b1->error = node->errors[UDP_ERROR_NONE];
+ // advance to the payload
+ vlib_buffer_advance (b1, sizeof (*h1));
+ }
}
- else
+ else if (next0 == UDP_LOCAL_NEXT_PUNT)
{
- b0->error = node->errors[UDP_ERROR_NONE];
- // advance to the payload
- vlib_buffer_advance (b0, sizeof (*h0));
- }
+ i0 = sparse_vec_index (next_by_dst_port, h0->dst_port);
+ next0 = vec_elt (next_by_dst_port, i0);
- if (PREDICT_FALSE (i1 == SPARSE_VEC_INVALID_INDEX ||
- next1 == UDP_NO_NODE_SET))
- {
- udp_dispatch_error (node, b1, advance1, is_ip4, &next1);
+ if (PREDICT_FALSE (i0 == SPARSE_VEC_INVALID_INDEX ||
+ next0 == UDP_NO_NODE_SET))
+ {
+ udp_dispatch_error (node, b0, advance0, is_ip4, &next0);
+ }
+ else
+ {
+ b0->error = node->errors[UDP_ERROR_NONE];
+ // advance to the payload
+ vlib_buffer_advance (b0, sizeof (*h0));
+ }
}
- else
+ else if (next1 == UDP_LOCAL_NEXT_PUNT)
{
- b1->error = node->errors[UDP_ERROR_NONE];
- // advance to the payload
- vlib_buffer_advance (b1, sizeof (*h1));
+ i1 = sparse_vec_index (next_by_dst_port, h1->dst_port);
+ next1 = vec_elt (next_by_dst_port, i1);
+
+ if (PREDICT_FALSE (i1 == SPARSE_VEC_INVALID_INDEX ||
+ next1 == UDP_NO_NODE_SET))
+ {
+ udp_dispatch_error (node, b1, advance1, is_ip4, &next1);
+ }
+ else
+ {
+ b1->error = node->errors[UDP_ERROR_NONE];
+ // advance to the payload
+ vlib_buffer_advance (b1, sizeof (*h1));
+ }
}
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
@@ -362,7 +393,6 @@ VLIB_NODE_FN (udp6_local_node) (vlib_main_t * vm,
return udp46_local_inline (vm, node, from_frame, 0 /* is_ip4 */ );
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (udp4_local_node) = {
.name = "ip4-udp-lookup",
/* Takes a vector of packets. */
@@ -382,9 +412,7 @@ VLIB_REGISTER_NODE (udp4_local_node) = {
.format_trace = format_udp_rx_trace,
.unformat_buffer = unformat_udp_header,
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (udp6_local_node) = {
.name = "ip6-udp-lookup",
/* Takes a vector of packets. */
@@ -404,7 +432,6 @@ VLIB_REGISTER_NODE (udp6_local_node) = {
.format_trace = format_udp_rx_trace,
.unformat_buffer = unformat_udp_header,
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
void
@@ -492,16 +519,12 @@ u8
udp_is_valid_dst_port (udp_dst_port_t dst_port, u8 is_ip4)
{
udp_main_t *um = &udp_main;
- u16 *n;
-
- if (is_ip4)
- n = sparse_vec_validate (um->next_by_dst_port4,
- clib_host_to_net_u16 (dst_port));
- else
- n = sparse_vec_validate (um->next_by_dst_port6,
- clib_host_to_net_u16 (dst_port));
-
- return (n[0] != SPARSE_VEC_INVALID_INDEX && n[0] != UDP_NO_NODE_SET);
+ u16 *next_by_dst_port =
+ is_ip4 ? um->next_by_dst_port4 : um->next_by_dst_port6;
+ uword index =
+ sparse_vec_index (next_by_dst_port, clib_host_to_net_u16 (dst_port));
+ return (index != SPARSE_VEC_INVALID_INDEX &&
+ vec_elt (next_by_dst_port, index) != UDP_NO_NODE_SET);
}
void
diff --git a/src/vnet/udp/udp_output.c b/src/vnet/udp/udp_output.c
new file mode 100644
index 00000000000..22b94141365
--- /dev/null
+++ b/src/vnet/udp/udp_output.c
@@ -0,0 +1,254 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2022 Cisco Systems, Inc.
+ */
+
+#include <vnet/udp/udp.h>
+#include <vnet/ip/ip4_inlines.h>
+#include <vnet/ip/ip6_inlines.h>
+
+#define udp_node_index(node_id, is_ip4) \
+ ((is_ip4) ? udp4_##node_id##_node.index : udp6_##node_id##_node.index)
+
+typedef enum udp_output_next_
+{
+ UDP_OUTPUT_NEXT_DROP,
+ UDP_OUTPUT_NEXT_IP_LOOKUP,
+ UDP_OUTPUT_N_NEXT
+} udp_output_next_t;
+
+#define foreach_udp4_output_next \
+ _ (DROP, "error-drop") \
+ _ (IP_LOOKUP, "ip4-lookup")
+
+#define foreach_udp6_output_next \
+ _ (DROP, "error-drop") \
+ _ (IP_LOOKUP, "ip6-lookup")
+
+static vlib_error_desc_t udp_output_error_counters[] = {
+#define udp_error(f, n, s, d) { #n, d, VL_COUNTER_SEVERITY_##s },
+#include <vnet/udp/udp_error.def>
+#undef udp_error
+};
+
+typedef struct udp_tx_trace_
+{
+ udp_header_t udp_header;
+ udp_connection_t udp_connection;
+} udp_tx_trace_t;
+
+static u8 *
+format_udp_tx_trace (u8 *s, va_list *args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ udp_tx_trace_t *t = va_arg (*args, udp_tx_trace_t *);
+ udp_connection_t *uc = &t->udp_connection;
+ u32 indent = format_get_indent (s);
+
+ s = format (s, "%U\n%U%U", format_udp_connection, uc, 1, format_white_space,
+ indent, format_udp_header, &t->udp_header, 128);
+
+ return s;
+}
+
+always_inline udp_connection_t *
+udp_output_get_connection (vlib_buffer_t *b, u32 thread_index)
+{
+ if (PREDICT_FALSE (vnet_buffer (b)->tcp.flags & UDP_CONN_F_LISTEN))
+ return udp_listener_get (vnet_buffer (b)->tcp.connection_index);
+
+ return udp_connection_get (vnet_buffer (b)->tcp.connection_index,
+ thread_index);
+}
+
+static void
+udp46_output_trace_frame (vlib_main_t *vm, vlib_node_runtime_t *node,
+ u32 *to_next, u32 n_bufs)
+{
+ udp_connection_t *uc;
+ udp_tx_trace_t *t;
+ vlib_buffer_t *b;
+ udp_header_t *uh;
+ int i;
+
+ for (i = 0; i < n_bufs; i++)
+ {
+ b = vlib_get_buffer (vm, to_next[i]);
+ if (!(b->flags & VLIB_BUFFER_IS_TRACED))
+ continue;
+ uh = vlib_buffer_get_current (b);
+ uc = udp_output_get_connection (b, vm->thread_index);
+ t = vlib_add_trace (vm, node, b, sizeof (*t));
+ clib_memcpy_fast (&t->udp_header, uh, sizeof (t->udp_header));
+ clib_memcpy_fast (&t->udp_connection, uc, sizeof (t->udp_connection));
+ }
+}
+
+always_inline void
+udp_output_handle_packet (udp_connection_t *uc0, vlib_buffer_t *b0,
+ vlib_node_runtime_t *error_node, u16 *next0,
+ u8 is_ip4)
+{
+ /* If next_index is not drop use it */
+ if (uc0->next_node_index)
+ {
+ *next0 = uc0->next_node_index;
+ vnet_buffer (b0)->tcp.next_node_opaque = uc0->next_node_opaque;
+ }
+ else
+ {
+ *next0 = UDP_OUTPUT_NEXT_IP_LOOKUP;
+ }
+
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = uc0->c_fib_index;
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] = uc0->sw_if_index;
+}
+
+always_inline uword
+udp46_output_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, int is_ip4)
+{
+ u32 n_left_from, *from, thread_index = vm->thread_index;
+ vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
+ u16 nexts[VLIB_FRAME_SIZE], *next;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+
+ if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE))
+ udp46_output_trace_frame (vm, node, from, n_left_from);
+
+ vlib_get_buffers (vm, from, bufs, n_left_from);
+ b = bufs;
+ next = nexts;
+
+ while (n_left_from >= 4)
+ {
+ udp_connection_t *uc0, *uc1;
+
+ vlib_prefetch_buffer_header (b[2], STORE);
+ CLIB_PREFETCH (b[2]->data, 2 * CLIB_CACHE_LINE_BYTES, STORE);
+
+ vlib_prefetch_buffer_header (b[3], STORE);
+ CLIB_PREFETCH (b[3]->data, 2 * CLIB_CACHE_LINE_BYTES, STORE);
+
+ uc0 = udp_output_get_connection (b[0], thread_index);
+ uc1 = udp_output_get_connection (b[1], thread_index);
+
+ if (PREDICT_TRUE (!uc0 + !uc1 == 0))
+ {
+ udp_output_handle_packet (uc0, b[0], node, &next[0], is_ip4);
+ udp_output_handle_packet (uc1, b[1], node, &next[1], is_ip4);
+ }
+ else
+ {
+ if (uc0 != 0)
+ {
+ udp_output_handle_packet (uc0, b[0], node, &next[0], is_ip4);
+ }
+ else
+ {
+ b[0]->error = node->errors[UDP_ERROR_INVALID_CONNECTION];
+ next[0] = UDP_OUTPUT_NEXT_DROP;
+ }
+ if (uc1 != 0)
+ {
+ udp_output_handle_packet (uc1, b[1], node, &next[1], is_ip4);
+ }
+ else
+ {
+ b[1]->error = node->errors[UDP_ERROR_INVALID_CONNECTION];
+ next[1] = UDP_OUTPUT_NEXT_DROP;
+ }
+ }
+
+ b += 2;
+ next += 2;
+ n_left_from -= 2;
+ }
+ while (n_left_from > 0)
+ {
+ udp_connection_t *uc0;
+
+ if (n_left_from > 1)
+ {
+ vlib_prefetch_buffer_header (b[1], STORE);
+ CLIB_PREFETCH (b[1]->data, 2 * CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ uc0 = udp_output_get_connection (b[0], thread_index);
+
+ if (PREDICT_TRUE (uc0 != 0))
+ {
+ udp_output_handle_packet (uc0, b[0], node, &next[0], is_ip4);
+ }
+ else
+ {
+ b[0]->error = node->errors[UDP_ERROR_INVALID_CONNECTION];
+ next[0] = UDP_OUTPUT_NEXT_DROP;
+ }
+
+ b += 1;
+ next += 1;
+ n_left_from -= 1;
+ }
+
+ vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
+ vlib_node_increment_counter (vm, udp_node_index (output, is_ip4),
+ UDP_ERROR_PKTS_SENT, frame->n_vectors);
+ return frame->n_vectors;
+}
+
+VLIB_NODE_FN (udp4_output_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame)
+{
+ return udp46_output_inline (vm, node, from_frame, 1 /* is_ip4 */);
+}
+
+VLIB_NODE_FN (udp6_output_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame)
+{
+ return udp46_output_inline (vm, node, from_frame, 0 /* is_ip4 */);
+}
+
+VLIB_REGISTER_NODE (udp4_output_node) =
+{
+ .name = "udp4-output",
+ .vector_size = sizeof (u32),
+ .n_errors = UDP_N_ERROR,
+ .protocol_hint = VLIB_NODE_PROTO_HINT_UDP,
+ .error_counters = udp_output_error_counters,
+ .n_next_nodes = UDP_OUTPUT_N_NEXT,
+ .next_nodes = {
+#define _(s, n) [UDP_OUTPUT_NEXT_##s] = n,
+ foreach_udp4_output_next
+#undef _
+ },
+ .format_buffer = format_udp_header,
+ .format_trace = format_udp_tx_trace,
+};
+
+VLIB_REGISTER_NODE (udp6_output_node) =
+{
+ .name = "udp6-output",
+ .vector_size = sizeof (u32),
+ .n_errors = UDP_N_ERROR,
+ .protocol_hint = VLIB_NODE_PROTO_HINT_UDP,
+ .error_counters = udp_output_error_counters,
+ .n_next_nodes = UDP_OUTPUT_N_NEXT,
+ .next_nodes = {
+#define _(s, n) [UDP_OUTPUT_NEXT_##s] = n,
+ foreach_udp6_output_next
+#undef _
+ },
+ .format_buffer = format_udp_header,
+ .format_trace = format_udp_tx_trace,
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */