aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFlorin Coras <fcoras@cisco.com>2018-12-02 13:36:00 -0800
committerDave Barach <openvpp@barachs.net>2018-12-03 15:49:27 +0000
commitadc74d7b3da5a48cd346ce14694415e7c5c307bb (patch)
tree52168b4b95aa1c76e0814c2290beb4a4883ceced
parentb4a7a7dcf81f85ca3a22c791fb57d7eddc111661 (diff)
svm: use explicit svm fifo segment main for slaves
Change-Id: Id39d64bf1b49345a3dc31c63360569212aba6865 Signed-off-by: Florin Coras <fcoras@cisco.com>
-rw-r--r--src/svm/svm_fifo_segment.c32
-rw-r--r--src/svm/svm_fifo_segment.h30
-rw-r--r--src/svm/test_svm_fifo1.c28
-rw-r--r--src/tests/vnet/session/tcp_echo.c13
-rw-r--r--src/tests/vnet/session/udp_echo.c24
-rw-r--r--src/vcl/vcl_bapi.c7
-rw-r--r--src/vcl/vcl_private.h2
-rw-r--r--src/vcl/vppcom.c2
8 files changed, 76 insertions, 62 deletions
diff --git a/src/svm/svm_fifo_segment.c b/src/svm/svm_fifo_segment.c
index 6cb402f1a6b..c72de406633 100644
--- a/src/svm/svm_fifo_segment.c
+++ b/src/svm/svm_fifo_segment.c
@@ -15,8 +15,6 @@
#include <svm/svm_fifo_segment.h>
-svm_fifo_segment_main_t svm_fifo_segment_main;
-
static void
allocate_new_fifo_chunk (svm_fifo_segment_header_t * fsh,
u32 data_size_in_bytes, int chunk_size)
@@ -203,9 +201,9 @@ svm_fifo_segment_init (svm_fifo_segment_private_t * s)
* Create an svm fifo segment and initialize as master
*/
int
-svm_fifo_segment_create (svm_fifo_segment_create_args_t * a)
+svm_fifo_segment_create (svm_fifo_segment_main_t * sm,
+ svm_fifo_segment_create_args_t * a)
{
- svm_fifo_segment_main_t *sm = &svm_fifo_segment_main;
svm_fifo_segment_private_t *s;
int rv;
@@ -237,9 +235,9 @@ svm_fifo_segment_create (svm_fifo_segment_create_args_t * a)
* Create an svm fifo segment in process-private memory
*/
int
-svm_fifo_segment_create_process_private (svm_fifo_segment_create_args_t * a)
+svm_fifo_segment_create_process_private (svm_fifo_segment_main_t * sm,
+ svm_fifo_segment_create_args_t * a)
{
- svm_fifo_segment_main_t *sm = &svm_fifo_segment_main;
svm_fifo_segment_private_t *s;
ssvm_shared_header_t *sh;
u32 rnd_size = 0;
@@ -291,9 +289,9 @@ svm_fifo_segment_create_process_private (svm_fifo_segment_create_args_t * a)
* Attach as slave to an svm fifo segment
*/
int
-svm_fifo_segment_attach (svm_fifo_segment_create_args_t * a)
+svm_fifo_segment_attach (svm_fifo_segment_main_t * sm,
+ svm_fifo_segment_create_args_t * a)
{
- svm_fifo_segment_main_t *sm = &svm_fifo_segment_main;
svm_fifo_segment_private_t *s;
int rv;
@@ -324,10 +322,9 @@ svm_fifo_segment_attach (svm_fifo_segment_create_args_t * a)
}
void
-svm_fifo_segment_delete (svm_fifo_segment_private_t * s)
+svm_fifo_segment_delete (svm_fifo_segment_main_t * sm,
+ svm_fifo_segment_private_t * s)
{
- svm_fifo_segment_main_t *sm = &svm_fifo_segment_main;
-
ssvm_delete (&s->ssvm);
clib_memset (s, 0xfe, sizeof (*s));
pool_put (sm->segments, s);
@@ -495,27 +492,26 @@ svm_fifo_segment_free_fifo (svm_fifo_segment_private_t * s, svm_fifo_t * f,
}
void
-svm_fifo_segment_main_init (u64 baseva, u32 timeout_in_seconds)
+svm_fifo_segment_main_init (svm_fifo_segment_main_t * sm, u64 baseva,
+ u32 timeout_in_seconds)
{
- svm_fifo_segment_main_t *sm = &svm_fifo_segment_main;
-
sm->next_baseva = baseva;
sm->timeout_in_seconds = timeout_in_seconds;
}
u32
-svm_fifo_segment_index (svm_fifo_segment_private_t * s)
+svm_fifo_segment_index (svm_fifo_segment_main_t * sm,
+ svm_fifo_segment_private_t * s)
{
- return s - svm_fifo_segment_main.segments;
+ return s - sm->segments;
}
/**
* Retrieve svm segments pool. Used only for debug purposes.
*/
svm_fifo_segment_private_t *
-svm_fifo_segment_segments_pool (void)
+svm_fifo_segment_segments_pool (svm_fifo_segment_main_t * sm)
{
- svm_fifo_segment_main_t *sm = &svm_fifo_segment_main;
return sm->segments;
}
diff --git a/src/svm/svm_fifo_segment.h b/src/svm/svm_fifo_segment.h
index cbc327e8e9b..a7695549d1c 100644
--- a/src/svm/svm_fifo_segment.h
+++ b/src/svm/svm_fifo_segment.h
@@ -59,8 +59,6 @@ typedef struct
u32 timeout_in_seconds;
} svm_fifo_segment_main_t;
-extern svm_fifo_segment_main_t svm_fifo_segment_main;
-
typedef struct
{
ssvm_segment_type_t segment_type;
@@ -73,10 +71,9 @@ typedef struct
#define svm_fifo_segment_flags(_seg) _seg->h->flags
static inline svm_fifo_segment_private_t *
-svm_fifo_segment_get_segment (u32 segment_index)
+svm_fifo_segment_get_segment (svm_fifo_segment_main_t * sm, u32 segment_index)
{
- svm_fifo_segment_main_t *ssm = &svm_fifo_segment_main;
- return pool_elt_at_index (ssm->segments, segment_index);
+ return pool_elt_at_index (sm->segments, segment_index);
}
static inline u8
@@ -92,15 +89,19 @@ svm_fifo_segment_get_fifo_list (svm_fifo_segment_private_t * fifo_segment)
}
int svm_fifo_segment_init (svm_fifo_segment_private_t * s);
-int svm_fifo_segment_create (svm_fifo_segment_create_args_t * a);
-int svm_fifo_segment_create_process_private (svm_fifo_segment_create_args_t
- * a);
+int svm_fifo_segment_create (svm_fifo_segment_main_t * sm,
+ svm_fifo_segment_create_args_t * a);
+int svm_fifo_segment_create_process_private (svm_fifo_segment_main_t * sm,
+ svm_fifo_segment_create_args_t
+ *);
void svm_fifo_segment_preallocate_fifo_pairs (svm_fifo_segment_private_t * s,
u32 rx_fifo_size,
u32 tx_fifo_size,
u32 * n_fifo_pairs);
-int svm_fifo_segment_attach (svm_fifo_segment_create_args_t * a);
-void svm_fifo_segment_delete (svm_fifo_segment_private_t * s);
+int svm_fifo_segment_attach (svm_fifo_segment_main_t * sm,
+ svm_fifo_segment_create_args_t * a);
+void svm_fifo_segment_delete (svm_fifo_segment_main_t * sm,
+ svm_fifo_segment_private_t * s);
svm_fifo_t *svm_fifo_segment_alloc_fifo (svm_fifo_segment_private_t * s,
u32 data_size_in_bytes,
@@ -108,15 +109,18 @@ svm_fifo_t *svm_fifo_segment_alloc_fifo (svm_fifo_segment_private_t * s,
void svm_fifo_segment_free_fifo (svm_fifo_segment_private_t * s,
svm_fifo_t * f,
svm_fifo_segment_freelist_t index);
-void svm_fifo_segment_main_init (u64 baseva, u32 timeout_in_seconds);
-u32 svm_fifo_segment_index (svm_fifo_segment_private_t * s);
+void svm_fifo_segment_main_init (svm_fifo_segment_main_t * sm, u64 baseva,
+ u32 timeout_in_seconds);
+u32 svm_fifo_segment_index (svm_fifo_segment_main_t * sm,
+ svm_fifo_segment_private_t * s);
u32 svm_fifo_segment_num_fifos (svm_fifo_segment_private_t * fifo_segment);
u32 svm_fifo_segment_num_free_fifos (svm_fifo_segment_private_t *
fifo_segment, u32 fifo_size_in_bytes);
void svm_fifo_segment_info (svm_fifo_segment_private_t * seg, uword * address,
u64 * size);
-svm_fifo_segment_private_t *svm_fifo_segment_segments_pool (void);
+svm_fifo_segment_private_t
+ * svm_fifo_segment_segments_pool (svm_fifo_segment_main_t * sm);
format_function_t format_svm_fifo_segment;
format_function_t format_svm_fifo_segment_type;
diff --git a/src/svm/test_svm_fifo1.c b/src/svm/test_svm_fifo1.c
index 243f8b61673..0a09916f3bf 100644
--- a/src/svm/test_svm_fifo1.c
+++ b/src/svm/test_svm_fifo1.c
@@ -15,10 +15,13 @@
#include "svm_fifo_segment.h"
+svm_fifo_segment_main_t segment_main;
+
clib_error_t *
hello_world (int verbose)
{
svm_fifo_segment_create_args_t _a, *a = &_a;
+ svm_fifo_segment_main_t *sm = &segment_main;
svm_fifo_segment_private_t *sp;
svm_fifo_t *f;
int rv;
@@ -31,12 +34,12 @@ hello_world (int verbose)
a->segment_name = "fifo-test1";
a->segment_size = 256 << 10;
- rv = svm_fifo_segment_create (a);
+ rv = svm_fifo_segment_create (sm, a);
if (rv)
return clib_error_return (0, "svm_fifo_segment_create returned %d", rv);
- sp = svm_fifo_segment_get_segment (a->new_segment_indices[0]);
+ sp = svm_fifo_segment_get_segment (sm, a->new_segment_indices[0]);
f = svm_fifo_segment_alloc_fifo (sp, 4096, FIFO_SEGMENT_RX_FREELIST);
@@ -72,6 +75,7 @@ clib_error_t *
master (int verbose)
{
svm_fifo_segment_create_args_t _a, *a = &_a;
+ svm_fifo_segment_main_t *sm = &segment_main;
svm_fifo_segment_private_t *sp;
svm_fifo_t *f;
int rv;
@@ -84,12 +88,12 @@ master (int verbose)
a->segment_name = "fifo-test1";
a->segment_size = 256 << 10;
- rv = svm_fifo_segment_create (a);
+ rv = svm_fifo_segment_create (sm, a);
if (rv)
return clib_error_return (0, "svm_fifo_segment_create returned %d", rv);
- sp = svm_fifo_segment_get_segment (a->new_segment_indices[0]);
+ sp = svm_fifo_segment_get_segment (sm, a->new_segment_indices[0]);
f = svm_fifo_segment_alloc_fifo (sp, 4096, FIFO_SEGMENT_RX_FREELIST);
@@ -109,6 +113,7 @@ clib_error_t *
mempig (int verbose)
{
svm_fifo_segment_create_args_t _a, *a = &_a;
+ svm_fifo_segment_main_t *sm = &segment_main;
svm_fifo_segment_private_t *sp;
svm_fifo_t *f;
svm_fifo_t **flist = 0;
@@ -120,12 +125,12 @@ mempig (int verbose)
a->segment_name = "fifo-test1";
a->segment_size = 256 << 10;
- rv = svm_fifo_segment_create (a);
+ rv = svm_fifo_segment_create (sm, a);
if (rv)
return clib_error_return (0, "svm_fifo_segment_create returned %d", rv);
- sp = svm_fifo_segment_get_segment (a->new_segment_indices[0]);
+ sp = svm_fifo_segment_get_segment (sm, a->new_segment_indices[0]);
for (i = 0; i < 1000; i++)
{
@@ -166,6 +171,7 @@ clib_error_t *
offset (int verbose)
{
svm_fifo_segment_create_args_t _a, *a = &_a;
+ svm_fifo_segment_main_t *sm = &segment_main;
svm_fifo_segment_private_t *sp;
svm_fifo_t *f;
int rv;
@@ -178,12 +184,12 @@ offset (int verbose)
a->segment_name = "fifo-test1";
a->segment_size = 256 << 10;
- rv = svm_fifo_segment_create (a);
+ rv = svm_fifo_segment_create (sm, a);
if (rv)
return clib_error_return (0, "svm_fifo_segment_create returned %d", rv);
- sp = svm_fifo_segment_get_segment (a->new_segment_indices[0]);
+ sp = svm_fifo_segment_get_segment (sm, a->new_segment_indices[0]);
f = svm_fifo_segment_alloc_fifo (sp, 200 << 10, FIFO_SEGMENT_RX_FREELIST);
@@ -225,6 +231,7 @@ clib_error_t *
slave (int verbose)
{
svm_fifo_segment_create_args_t _a, *a = &_a;
+ svm_fifo_segment_main_t *sm = &segment_main;
svm_fifo_segment_private_t *sp;
svm_fifo_t *f;
ssvm_shared_header_t *sh;
@@ -243,7 +250,7 @@ slave (int verbose)
if (rv)
return clib_error_return (0, "svm_fifo_segment_attach returned %d", rv);
- sp = svm_fifo_segment_get_segment (a->new_segment_indices[0]);
+ sp = svm_fifo_segment_get_segment (sm, a->new_segment_indices[0]);
sh = sp->ssvm.sh;
fsh = (svm_fifo_segment_header_t *) sh->opaque[0];
@@ -269,11 +276,12 @@ slave (int verbose)
int
test_ssvm_fifo1 (unformat_input_t * input)
{
+ svm_fifo_segment_main_t *sm = &segment_main;
clib_error_t *error = 0;
int verbose = 0;
int test_id = 0;
- svm_fifo_segment_main_init (0x200000000ULL, 20);
+ svm_fifo_segment_main_init (sm, 0x200000000ULL, 20);
while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
diff --git a/src/tests/vnet/session/tcp_echo.c b/src/tests/vnet/session/tcp_echo.c
index bde55ad37f4..e553a3ac9fa 100644
--- a/src/tests/vnet/session/tcp_echo.c
+++ b/src/tests/vnet/session/tcp_echo.c
@@ -142,8 +142,7 @@ typedef struct
* and all other messages are exchanged using shm IPC. */
u8 use_sock_api;
- /* convenience */
- svm_fifo_segment_main_t *segment_main;
+ svm_fifo_segment_main_t segment_main;
} echo_main_t;
echo_main_t echo_main;
@@ -350,6 +349,7 @@ static int
ssvm_segment_attach (char *name, ssvm_segment_type_t type, int fd)
{
svm_fifo_segment_create_args_t _a, *a = &_a;
+ svm_fifo_segment_main_t *sm = &echo_main.segment_main;
int rv;
clib_memset (a, 0, sizeof (*a));
@@ -359,7 +359,7 @@ ssvm_segment_attach (char *name, ssvm_segment_type_t type, int fd)
if (type == SSVM_SEGMENT_MEMFD)
a->memfd_fd = fd;
- if ((rv = svm_fifo_segment_attach (a)))
+ if ((rv = svm_fifo_segment_attach (sm, a)))
{
clib_warning ("svm_fifo_segment_attach ('%s') failed", name);
return rv;
@@ -514,6 +514,7 @@ disconnect_from_vpp (echo_main_t * em)
static void
vl_api_map_another_segment_t_handler (vl_api_map_another_segment_t * mp)
{
+ svm_fifo_segment_main_t *sm = &echo_main.segment_main;
svm_fifo_segment_create_args_t _a, *a = &_a;
int rv;
@@ -521,7 +522,7 @@ vl_api_map_another_segment_t_handler (vl_api_map_another_segment_t * mp)
a->segment_name = (char *) mp->segment_name;
a->segment_size = mp->segment_size;
/* Attach to the segment vpp created */
- rv = svm_fifo_segment_attach (a);
+ rv = svm_fifo_segment_attach (sm, a);
if (rv)
{
clib_warning ("svm_fifo_segment_attach ('%s') failed",
@@ -1384,6 +1385,7 @@ main (int argc, char **argv)
{
int i_am_server = 1, test_return_packets = 0;
echo_main_t *em = &echo_main;
+ svm_fifo_segment_main_t *sm = &em->segment_main;
unformat_input_t _argv, *a = &_argv;
u8 *chroot_prefix;
u8 *uri = 0;
@@ -1406,7 +1408,7 @@ main (int argc, char **argv)
clib_time_init (&em->clib_time);
init_error_string_table (em);
- svm_fifo_segment_main_init (0x200000000ULL, 20);
+ svm_fifo_segment_main_init (sm, 0x200000000ULL, 20);
unformat_init_command_line (a, argv);
while (unformat_check_input (a) != UNFORMAT_END_OF_INPUT)
@@ -1467,7 +1469,6 @@ main (int argc, char **argv)
}
em->i_am_master = i_am_server;
- em->segment_main = &svm_fifo_segment_main;
em->test_return_packets = test_return_packets;
em->bytes_to_send = bytes_to_send;
em->time_to_stop = 0;
diff --git a/src/tests/vnet/session/udp_echo.c b/src/tests/vnet/session/udp_echo.c
index ea68ee162d1..b8c4c5daf31 100644
--- a/src/tests/vnet/session/udp_echo.c
+++ b/src/tests/vnet/session/udp_echo.c
@@ -121,8 +121,7 @@ typedef struct
/* VNET_API_ERROR_FOO -> "Foo" hash table */
uword *error_string_by_error_number;
- /* convenience */
- svm_fifo_segment_main_t *segment_main;
+ svm_fifo_segment_main_t segment_main;
u8 *connect_test_data;
@@ -336,8 +335,9 @@ static void
vl_api_application_attach_reply_t_handler (vl_api_application_attach_reply_t *
mp)
{
- udp_echo_main_t *utm = &udp_echo_main;
svm_fifo_segment_create_args_t _a = { 0 }, *a = &_a;
+ udp_echo_main_t *utm = &udp_echo_main;
+ svm_fifo_segment_main_t *sm = &utm->segment_main;
int rv;
if (mp->retval)
@@ -359,7 +359,7 @@ vl_api_application_attach_reply_t_handler (vl_api_application_attach_reply_t *
ASSERT (mp->app_event_queue_address);
/* Attach to the segment vpp created */
- rv = svm_fifo_segment_attach (a);
+ rv = svm_fifo_segment_attach (sm, a);
if (rv)
{
clib_warning ("svm_fifo_segment_attach ('%s') failed",
@@ -912,8 +912,9 @@ vl_api_bind_uri_reply_t_handler (vl_api_bind_uri_reply_t * mp)
static void
vl_api_map_another_segment_t_handler (vl_api_map_another_segment_t * mp)
{
- udp_echo_main_t *utm = &udp_echo_main;
svm_fifo_segment_create_args_t _a, *a = &_a;
+ udp_echo_main_t *utm = &udp_echo_main;
+ svm_fifo_segment_main_t *sm = &utm->segment_main;
svm_fifo_segment_private_t *seg;
int rv;
@@ -921,14 +922,14 @@ vl_api_map_another_segment_t_handler (vl_api_map_another_segment_t * mp)
a->segment_name = (char *) mp->segment_name;
a->segment_size = mp->segment_size;
/* Attach to the segment vpp created */
- rv = svm_fifo_segment_attach (a);
+ rv = svm_fifo_segment_attach (sm, a);
if (rv)
{
clib_warning ("svm_fifo_segment_attach ('%s') failed",
mp->segment_name);
return;
}
- seg = svm_fifo_segment_get_segment (a->new_segment_indices[0]);
+ seg = svm_fifo_segment_get_segment (sm, a->new_segment_indices[0]);
clib_warning ("Mapped new segment '%s' size %d", seg->ssvm.name,
seg->ssvm.ssvm_size);
hash_set (utm->segments_table, clib_net_to_host_u64 (mp->segment_handle),
@@ -939,6 +940,7 @@ static void
vl_api_unmap_segment_t_handler (vl_api_unmap_segment_t * mp)
{
udp_echo_main_t *utm = &udp_echo_main;
+ svm_fifo_segment_main_t *sm = &utm->segment_main;
svm_fifo_segment_private_t *seg;
u64 *seg_indexp, segment_handle;
@@ -950,8 +952,8 @@ vl_api_unmap_segment_t_handler (vl_api_unmap_segment_t * mp)
return;
}
hash_unset (utm->segments_table, segment_handle);
- seg = svm_fifo_segment_get_segment ((u32) seg_indexp[0]);
- svm_fifo_segment_delete (seg);
+ seg = svm_fifo_segment_get_segment (sm, (u32) seg_indexp[0]);
+ svm_fifo_segment_delete (sm, seg);
clib_warning ("Unmapped segment '%s'", segment_handle);
}
@@ -1206,6 +1208,7 @@ int
main (int argc, char **argv)
{
udp_echo_main_t *utm = &udp_echo_main;
+ svm_fifo_segment_main_t *sm = &utm->segment_main;
u8 *uri = (u8 *) "udp://6.0.1.1/1234";
unformat_input_t _argv, *a = &_argv;
int i_am_server = 1;
@@ -1217,7 +1220,7 @@ main (int argc, char **argv)
clib_mem_init_thread_safe (0, 256 << 20);
- svm_fifo_segment_main_init (0x200000000ULL, 20);
+ svm_fifo_segment_main_init (sm, 0x200000000ULL, 20);
vec_validate (utm->rx_buf, 128 << 10);
utm->session_index_by_vpp_handles = hash_create (0, sizeof (uword));
@@ -1226,7 +1229,6 @@ main (int argc, char **argv)
utm->have_return = 1;
utm->bytes_to_send = 1024;
utm->fifo_size = 128 << 10;
- utm->segment_main = &svm_fifo_segment_main;
utm->cut_through_session_index = ~0;
clib_time_init (&utm->clib_time);
diff --git a/src/vcl/vcl_bapi.c b/src/vcl/vcl_bapi.c
index c6e7fdf369d..cd3aaaa3d4e 100644
--- a/src/vcl/vcl_bapi.c
+++ b/src/vcl/vcl_bapi.c
@@ -75,7 +75,7 @@ vcl_segment_attach (u64 segment_handle, char *name, ssvm_segment_type_t type,
if (type == SSVM_SEGMENT_MEMFD)
a->memfd_fd = fd;
- if ((rv = svm_fifo_segment_attach (a)))
+ if ((rv = svm_fifo_segment_attach (&vcm->segment_main, a)))
{
clib_warning ("svm_fifo_segment_attach ('%s') failed", name);
return rv;
@@ -88,14 +88,15 @@ vcl_segment_attach (u64 segment_handle, char *name, ssvm_segment_type_t type,
static void
vcl_segment_detach (u64 segment_handle)
{
+ svm_fifo_segment_main_t *sm = &vcm->segment_main;
svm_fifo_segment_private_t *segment;
u32 segment_index;
segment_index = vcl_segment_table_lookup (segment_handle);
if (segment_index == (u32) ~ 0)
return;
- segment = svm_fifo_segment_get_segment (segment_index);
- svm_fifo_segment_delete (segment);
+ segment = svm_fifo_segment_get_segment (sm, segment_index);
+ svm_fifo_segment_delete (sm, segment);
vcl_segment_table_del (segment_handle);
}
diff --git a/src/vcl/vcl_private.h b/src/vcl/vcl_private.h
index 6ca0471e965..6a289d640e0 100644
--- a/src/vcl/vcl_private.h
+++ b/src/vcl/vcl_private.h
@@ -326,6 +326,8 @@ typedef struct vppcom_main_t_
/** Mapped segments table */
uword *segment_table;
+ svm_fifo_segment_main_t segment_main;
+
#ifdef VCL_ELOG
/* VPP Event-logger */
elog_main_t elog_main;
diff --git a/src/vcl/vppcom.c b/src/vcl/vppcom.c
index 34b663b9831..db767e9e713 100644
--- a/src/vcl/vppcom.c
+++ b/src/vcl/vppcom.c
@@ -767,7 +767,7 @@ vppcom_app_create (char *app_name)
vcm->main_pid = getpid ();
vcm->app_name = format (0, "%s", app_name);
vppcom_init_error_string_table ();
- svm_fifo_segment_main_init (vcl_cfg->segment_baseva,
+ svm_fifo_segment_main_init (&vcm->segment_main, vcl_cfg->segment_baseva,
20 /* timeout in secs */ );
pool_alloc (vcm->workers, vcl_cfg->max_workers);
clib_spinlock_init (&vcm->workers_lock);
} /* Keyword */ .highlight .l { color: #ae81ff } /* Literal */ .highlight .n { color: #f8f8f2 } /* Name */ .highlight .o { color: #f92672 } /* Operator */ .highlight .p { color: #f8f8f2 } /* Punctuation */ .highlight .ch { color: #75715e } /* Comment.Hashbang */ .highlight .cm { color: #75715e } /* Comment.Multiline */ .highlight .cp { color: #75715e } /* Comment.Preproc */ .highlight .cpf { color: #75715e } /* Comment.PreprocFile */ .highlight .c1 { color: #75715e } /* Comment.Single */ .highlight .cs { color: #75715e } /* Comment.Special */ .highlight .gd { color: #f92672 } /* Generic.Deleted */ .highlight .ge { font-style: italic } /* Generic.Emph */ .highlight .gi { color: #a6e22e } /* Generic.Inserted */ .highlight .gs { font-weight: bold } /* Generic.Strong */ .highlight .gu { color: #75715e } /* Generic.Subheading */ .highlight .kc { color: #66d9ef } /* Keyword.Constant */ .highlight .kd { color: #66d9ef } /* Keyword.Declaration */ .highlight .kn { color: #f92672 } /* Keyword.Namespace */ .highlight .kp { color: #66d9ef } /* Keyword.Pseudo */ .highlight .kr { color: #66d9ef } /* Keyword.Reserved */ .highlight .kt { color: #66d9ef } /* Keyword.Type */ .highlight .ld { color: #e6db74 } /* Literal.Date */ .highlight .m { color: #ae81ff } /* Literal.Number */ .highlight .s { color: #e6db74 } /* Literal.String */ .highlight .na { color: #a6e22e } /* Name.Attribute */ .highlight .nb { color: #f8f8f2 } /* Name.Builtin */ .highlight .nc { color: #a6e22e } /* Name.Class */ .highlight .no { color: #66d9ef } /* Name.Constant */ .highlight .nd { color: #a6e22e } /* Name.Decorator */ .highlight .ni { color: #f8f8f2 } /* Name.Entity */ .highlight .ne { color: #a6e22e } /* Name.Exception */ .highlight .nf { color: #a6e22e } /* Name.Function */ .highlight .nl { color: #f8f8f2 } /* Name.Label */ .highlight .nn { color: #f8f8f2 } /* Name.Namespace */ .highlight .nx { color: #a6e22e } /* Name.Other */ .highlight .py { color: #f8f8f2 } /* Name.Property */ .highlight .nt { color: #f92672 } /* Name.Tag */ .highlight .nv { color: #f8f8f2 } /* Name.Variable */ .highlight .ow { color: #f92672 } /* Operator.Word */ .highlight .w { color: #f8f8f2 } /* Text.Whitespace */ .highlight .mb { color: #ae81ff } /* Literal.Number.Bin */ .highlight .mf { color: #ae81ff } /* Literal.Number.Float */ .highlight .mh { color: #ae81ff } /* Literal.Number.Hex */ .highlight .mi { color: #ae81ff } /* Literal.Number.Integer */ .highlight .mo { color: #ae81ff } /* Literal.Number.Oct */ .highlight .sa { color: #e6db74 } /* Literal.String.Affix */ .highlight .sb { color: #e6db74 } /* Literal.String.Backtick */ .highlight .sc { color: #e6db74 } /* Literal.String.Char */ .highlight .dl { color: #e6db74 } /* Literal.String.Delimiter */ .highlight .sd { color: #e6db74 } /* Literal.String.Doc */ .highlight .s2 { color: #e6db74 } /* Literal.String.Double */ .highlight .se { color: #ae81ff } /* Literal.String.Escape */ .highlight .sh { color: #e6db74 } /* Literal.String.Heredoc */ .highlight .si { color: #e6db74 } /* Literal.String.Interpol */ .highlight .sx { color: #e6db74 } /* Literal.String.Other */ .highlight .sr { color: #e6db74 } /* Literal.String.Regex */ .highlight .s1 { color: #e6db74 } /* Literal.String.Single */ .highlight .ss { color: #e6db74 } /* Literal.String.Symbol */ .highlight .bp { color: #f8f8f2 } /* Name.Builtin.Pseudo */ .highlight .fm { color: #a6e22e } /* Name.Function.Magic */ .highlight .vc { color: #f8f8f2 } /* Name.Variable.Class */ .highlight .vg { color: #f8f8f2 } /* Name.Variable.Global */ .highlight .vi { color: #f8f8f2 } /* Name.Variable.Instance */ .highlight .vm { color: #f8f8f2 } /* Name.Variable.Magic */ .highlight .il { color: #ae81ff } /* Literal.Number.Integer.Long */ } @media (prefers-color-scheme: light) { .highlight .hll { background-color: #ffffcc } .highlight .c { color: #888888 } /* Comment */ .highlight .err { color: #a61717; background-color: #e3d2d2 } /* Error */ .highlight .k { color: #008800; font-weight: bold } /* Keyword */ .highlight .ch { color: #888888 } /* Comment.Hashbang */ .highlight .cm { color: #888888 } /* Comment.Multiline */ .highlight .cp { color: #cc0000; font-weight: bold } /* Comment.Preproc */ .highlight .cpf { color: #888888 } /* Comment.PreprocFile */ .highlight .c1 { color: #888888 } /* Comment.Single */ .highlight .cs { color: #cc0000; font-weight: bold; background-color: #fff0f0 } /* Comment.Special */ .highlight .gd { color: #000000; background-color: #ffdddd } /* Generic.Deleted */ .highlight .ge { font-style: italic } /* Generic.Emph */ .highlight .gr { color: #aa0000 } /* Generic.Error */ .highlight .gh { color: #333333 } /* Generic.Heading */ .highlight .gi { color: #000000; background-color: #ddffdd } /* Generic.Inserted */ .highlight .go { color: #888888 } /* Generic.Output */ .highlight .gp { color: #555555 } /* Generic.Prompt */ .highlight .gs { font-weight: bold } /* Generic.Strong */ .highlight .gu { color: #666666 } /* Generic.Subheading */ .highlight .gt { color: #aa0000 } /* Generic.Traceback */ .highlight .kc { color: #008800; font-weight: bold } /* Keyword.Constant */ .highlight .kd { color: #008800; font-weight: bold } /* Keyword.Declaration */ .highlight .kn { color: #008800; font-weight: bold } /* Keyword.Namespace */ .highlight .kp { color: #008800 } /* Keyword.Pseudo */ .highlight .kr { color: #008800; font-weight: bold } /* Keyword.Reserved */ .highlight .kt { color: #888888; font-weight: bold } /* Keyword.Type */ .highlight .m { color: #0000DD; font-weight: bold } /* Literal.Number */ .highlight .s { color: #dd2200; background-color: #fff0f0 } /* Literal.String */ .highlight .na { color: #336699 } /* Name.Attribute */ .highlight .nb { color: #003388 } /* Name.Builtin */ .highlight .nc { color: #bb0066; font-weight: bold } /* Name.Class */ .highlight .no { color: #003366; font-weight: bold } /* Name.Constant */ .highlight .nd { color: #555555 } /* Name.Decorator */ .highlight .ne { color: #bb0066; font-weight: bold } /* Name.Exception */ .highlight .nf { color: #0066bb; font-weight: bold } /* Name.Function */ .highlight .nl { color: #336699; font-style: italic } /* Name.Label */ .highlight .nn { color: #bb0066; font-weight: bold } /* Name.Namespace */ .highlight .py { color: #336699; font-weight: bold } /* Name.Property */ .highlight .nt { color: #bb0066; font-weight: bold } /* Name.Tag */ .highlight .nv { color: #336699 } /* Name.Variable */ .highlight .ow { color: #008800 } /* Operator.Word */ .highlight .w { color: #bbbbbb } /* Text.Whitespace */ .highlight .mb { color: #0000DD; font-weight: bold } /* Literal.Number.Bin */ .highlight .mf { color: #0000DD; font-weight: bold } /* Literal.Number.Float */ .highlight .mh { color: #0000DD; font-weight: bold } /* Literal.Number.Hex */ .highlight .mi { color: #0000DD; font-weight: bold } /* Literal.Number.Integer */ .highlight .mo { color: #0000DD; font-weight: bold } /* Literal.Number.Oct */ .highlight .sa { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Affix */ .highlight .sb { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Backtick */ .highlight .sc { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Char */ .highlight .dl { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Delimiter */ .highlight .sd { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Doc */ .highlight .s2 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Double */ .highlight .se { color: #0044dd; background-color: #fff0f0 } /* Literal.String.Escape */ .highlight .sh { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Heredoc */ .highlight .si { color: #3333bb; background-color: #fff0f0 } /* Literal.String.Interpol */ .highlight .sx { color: #22bb22; background-color: #f0fff0 } /* Literal.String.Other */ .highlight .sr { color: #008800; background-color: #fff0ff } /* Literal.String.Regex */ .highlight .s1 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Single */ .highlight .ss { color: #aa6600; background-color: #fff0f0 } /* Literal.String.Symbol */ .highlight .bp { color: #003388 } /* Name.Builtin.Pseudo */ .highlight .fm { color: #0066bb; font-weight: bold } /* Name.Function.Magic */ .highlight .vc { color: #336699 } /* Name.Variable.Class */ .highlight .vg { color: #dd7700 } /* Name.Variable.Global */ .highlight .vi { color: #3333bb } /* Name.Variable.Instance */ .highlight .vm { color: #336699 } /* Name.Variable.Magic */ .highlight .il { color: #0000DD; font-weight: bold } /* Literal.Number.Integer.Long */ }
/*
 * Copyright (c) 2016 Cisco and/or its affiliates.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/**
 * @file
 * @brief Local TCP/IP stack punt infrastructure.
 *
 * Provides a set of VPP nodes together with the relevant APIs and CLI
 * commands in order to adjust and dispatch packets from the VPP data plane
 * to the local TCP/IP stack
 */

#include <vnet/ip/ip.h>
#include <vlib/vlib.h>
#include <vnet/pg/pg.h>
#include <vnet/udp/udp.h>
#include <vnet/tcp/tcp.h>
#include <vnet/sctp/sctp.h>
#include <vnet/ip/punt.h>
#include <vppinfra/sparse_vec.h>
#include <vlib/unix/unix.h>

#include <stdio.h>
#include <unistd.h>
#include <sys/socket.h>
#include <sys/uio.h>
#include <stdlib.h>

#define foreach_punt_next			\
  _ (PUNT4, "ip4-punt")                         \
  _ (PUNT6, "ip6-punt")

typedef enum
{
#define _(s,n) PUNT_NEXT_##s,
  foreach_punt_next
#undef _
    PUNT_N_NEXT,
} punt_next_t;

enum punt_socket_rx_next_e
{
  PUNT_SOCKET_RX_NEXT_INTERFACE_OUTPUT,
  PUNT_SOCKET_RX_NEXT_IP4_LOOKUP,
  PUNT_SOCKET_RX_NEXT_IP6_LOOKUP,
  PUNT_SOCKET_RX_N_NEXT
};

#define punt_next_punt(is_ip4) (is_ip4 ? PUNT_NEXT_PUNT4 : PUNT_NEXT_PUNT6)

extern vlib_node_registration_t udp4_punt_node;
extern vlib_node_registration_t udp6_punt_node;
extern vlib_node_registration_t udp4_punt_socket_node;
extern vlib_node_registration_t udp6_punt_socket_node;
static vlib_node_registration_t punt_socket_rx_node;

extern punt_main_t punt_main;

#ifndef CLIB_MARCH_VARIANT
punt_main_t punt_main;

char *
vnet_punt_get_server_pathname (void)
{
  punt_main_t *pm = &punt_main;
  return pm->sun_path;
}
#endif /* CLIB_MARCH_VARIANT */

/** @brief IPv4/IPv6 UDP punt node main loop.

    This is the main loop inline function for IPv4/IPv6 UDP punt
    transition node.

    @param vm vlib_main_t corresponding to the current thread
    @param node vlib_node_runtime_t
    @param frame vlib_frame_t whose contents should be dispatched
    @param is_ipv4 indicates if called for IPv4 or IPv6 node
*/
always_inline uword
udp46_punt_inline (vlib_main_t * vm,
		   vlib_node_runtime_t * node,
		   vlib_frame_t * from_frame, int is_ip4)
{
  u32 n_left_from, *from, *to_next;
  word advance;

  from = vlib_frame_vector_args (from_frame);
  n_left_from = from_frame->n_vectors;

  /* udp[46]_lookup hands us the data payload, not the IP header */
  if (is_ip4)
    advance = -(sizeof (ip4_header_t) + sizeof (udp_header_t));
  else
    advance = -(sizeof (ip6_header_t) + sizeof (udp_header_t));

  while (n_left_from > 0)
    {
      u32 n_left_to_next;

      vlib_get_next_frame (vm, node, punt_next_punt (is_ip4), to_next,
			   n_left_to_next);

      while (n_left_from > 0 && n_left_to_next > 0)
	{
	  u32 bi0;
	  vlib_buffer_t *b0;

	  bi0 = from[0];
	  to_next[0] = bi0;
	  from += 1;
	  to_next += 1;
	  n_left_from -= 1;
	  n_left_to_next -= 1;

	  b0 = vlib_get_buffer (vm, bi0);
	  vlib_buffer_advance (b0, advance);
	  b0->error = node->errors[PUNT_ERROR_UDP_PORT];
	}

      vlib_put_next_frame (vm, node, punt_next_punt (is_ip4), n_left_to_next);
    }

  return from_frame->n_vectors;
}

static char *punt_error_strings[] = {
#define punt_error(n,s) s,
#include "punt_error.def"
#undef punt_error
};

/** @brief IPv4 UDP punt node.
    @node ip4-udp-punt

    This is the IPv4 UDP punt transition node. It is registered as a next
    node for the "ip4-udp-lookup" handling UDP port(s) requested for punt.
    The buffer's current data pointer is adjusted to the original packet
    IPv4 header. All buffers are dispatched to "error-punt".

    @param vm vlib_main_t corresponding to the current thread
    @param node vlib_node_runtime_t
    @param frame vlib_frame_t whose contents should be dispatched

    @par Graph mechanics: next index usage

    @em Sets:
    - <code>vnet_buffer(b)->current_data</code>
    - <code>vnet_buffer(b)->current_len</code>

    <em>Next Index:</em>
    - Dispatches the packet to the "error-punt" node
*/
VLIB_NODE_FN (udp4_punt_node) (vlib_main_t * vm,
			       vlib_node_runtime_t * node,
			       vlib_frame_t * from_frame)
{
  return udp46_punt_inline (vm, node, from_frame, 1 /* is_ip4 */ );
}

/** @brief IPv6 UDP punt node.
    @node ip6-udp-punt

    This is the IPv6 UDP punt transition node. It is registered as a next
    node for the "ip6-udp-lookup" handling UDP port(s) requested for punt.
    The buffer's current data pointer is adjusted to the original packet
    IPv6 header. All buffers are dispatched to "error-punt".

    @param vm vlib_main_t corresponding to the current thread
    @param node vlib_node_runtime_t
    @param frame vlib_frame_t whose contents should be dispatched

    @par Graph mechanics: next index usage

    @em Sets:
    - <code>vnet_buffer(b)->current_data</code>
    - <code>vnet_buffer(b)->current_len</code>

    <em>Next Index:</em>
    - Dispatches the packet to the "error-punt" node
*/
VLIB_NODE_FN (udp6_punt_node) (vlib_main_t * vm,
			       vlib_node_runtime_t * node,
			       vlib_frame_t * from_frame)
{
  return udp46_punt_inline (vm, node, from_frame, 0 /* is_ip4 */ );
}

/* *INDENT-OFF* */
VLIB_REGISTER_NODE (udp4_punt_node) = {
  .name = "ip4-udp-punt",
  /* Takes a vector of packets. */
  .vector_size = sizeof (u32),

  .n_errors = PUNT_N_ERROR,
  .error_strings = punt_error_strings,

  .n_next_nodes = PUNT_N_NEXT,
  .next_nodes = {
#define _(s,n) [PUNT_NEXT_##s] = n,
     foreach_punt_next
#undef _
  },
};

VLIB_REGISTER_NODE (udp6_punt_node) = {
  .name = "ip6-udp-punt",
  /* Takes a vector of packets. */
  .vector_size = sizeof (u32),

  .n_errors = PUNT_N_ERROR,
  .error_strings = punt_error_strings,

  .n_next_nodes = PUNT_N_NEXT,
  .next_nodes = {
#define _(s,n) [PUNT_NEXT_##s] = n,
     foreach_punt_next
#undef _
  },
};

/* *INDENT-ON* */

static punt_client_t *
punt_client_get (bool is_ip4, u16 port)
{
  punt_main_t *pm = &punt_main;
  punt_client_t *v =
    is_ip4 ? pm->clients_by_dst_port4 : pm->clients_by_dst_port6;

  u16 i = sparse_vec_index (v, port);
  if (i == SPARSE_VEC_INVALID_INDEX)
    return 0;

  return &vec_elt (v, i);
}

static struct sockaddr_un *
punt_socket_get (bool is_ip4, u16 port)
{
  punt_client_t *v = punt_client_get (is_ip4, port);
  if (v)
    return &v->caddr;

  return NULL;
}

#ifndef CLIB_MARCH_VARIANT
static int
punt_socket_register (bool is_ip4, u8 protocol, u16 port,
		      char *client_pathname)
{
  punt_main_t *pm = &punt_main;
  punt_client_t c, *n;
  punt_client_t *v = is_ip4 ? pm->clients_by_dst_port4 :
    pm->clients_by_dst_port6;

  if (strncmp (client_pathname, vnet_punt_get_server_pathname (),
	       UNIX_PATH_MAX) == 0)
    return -1;

  clib_memset (&c, 0, sizeof (c));
  memcpy (c.caddr.sun_path, client_pathname, sizeof (c.caddr.sun_path));
  c.caddr.sun_family = AF_UNIX;
  c.port = port;
  c.protocol = protocol;
  n = sparse_vec_validate (v, port);
  n[0] = c;
  return 0;
}

/* $$$$ Just leaves the mapping in place for now */
static void
punt_socket_unregister (bool is_ip4, u8 protocol, u16 port)
{
  return;
}
#endif /* CLIB_MARCH_VARIANT */

typedef struct
{
  punt_client_t client;
  u8 is_midchain;
} udp_punt_trace_t;

static u8 *
format_udp_punt_trace (u8 * s, va_list * args)
{
  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
  udp_punt_trace_t *t = va_arg (*args, udp_punt_trace_t *);
  u32 indent = format_get_indent (s);
  s = format (s, "to: %s", t->client.caddr.sun_path);
  if (t->is_midchain)
    {
      s = format (s, "\n%U(buffer is part of chain)", format_white_space,
		  indent);
    }
  return s;
}

always_inline uword
udp46_punt_socket_inline (vlib_main_t * vm,
			  vlib_node_runtime_t * node,
			  vlib_frame_t * frame, bool is_ip4)
{
  u32 *buffers = vlib_frame_vector_args (frame);
  uword n_packets = frame->n_vectors;
  struct iovec *iovecs = 0;
  punt_main_t *pm = &punt_main;
  int i;

  u32 node_index = is_ip4 ? udp4_punt_socket_node.index :
    udp6_punt_socket_node.index;

  for (i = 0; i < n_packets; i++)
    {
      struct iovec *iov;
      vlib_buffer_t *b;
      uword l;
      punt_packetdesc_t packetdesc;

      b = vlib_get_buffer (vm, buffers[i]);

      /* Reverse UDP Punt advance */
      udp_header_t *udp;
      if (is_ip4)
	{
	  vlib_buffer_advance (b, -(sizeof (ip4_header_t) +
				    sizeof (udp_header_t)));
	  ip4_header_t *ip = vlib_buffer_get_current (b);
	  udp = (udp_header_t *) (ip + 1);
	}
      else
	{
	  vlib_buffer_advance (b, -(sizeof (ip6_header_t) +
				    sizeof (udp_header_t)));
	  ip6_header_t *ip = vlib_buffer_get_current (b);
	  udp = (udp_header_t *) (ip + 1);
	}

      u16 port = clib_net_to_host_u16 (udp->dst_port);

      /*
       * Find registerered client
       * If no registered client, drop packet and count
       */
      struct sockaddr_un *caddr;
      caddr = punt_socket_get (is_ip4, port);
      if (!caddr)
	{
	  vlib_node_increment_counter (vm, node_index,
				       PUNT_ERROR_SOCKET_TX_ERROR, 1);
	  goto error;
	}

      punt_client_t *c = NULL;
      if (PREDICT_FALSE (b->flags & VLIB_BUFFER_IS_TRACED))
	{
	  c = punt_client_get (is_ip4, port);
	  udp_punt_trace_t *t;
	  t = vlib_add_trace (vm, node, b, sizeof (t[0]));
	  clib_memcpy_fast (&t->client, c, sizeof (t->client));
	}

      /* Re-set iovecs if present. */
      if (iovecs)
	_vec_len (iovecs) = 0;

      /* Add packet descriptor */
      packetdesc.sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
      packetdesc.action = 0;
      vec_add2 (iovecs, iov, 1);
      iov->iov_base = &packetdesc;
      iov->iov_len = sizeof (packetdesc);

      /** VLIB buffer chain -> Unix iovec(s). */
      vlib_buffer_advance (b, -(sizeof (ethernet_header_t)));
      vec_add2 (iovecs, iov, 1);
      iov->iov_base = b->data + b->current_data;
      iov->iov_len = l = b->current_length;

      if (PREDICT_FALSE (b->flags & VLIB_BUFFER_NEXT_PRESENT))
	{
	  do
	    {
	      b = vlib_get_buffer (vm, b->next_buffer);
	      if (PREDICT_FALSE (b->flags & VLIB_BUFFER_IS_TRACED))
		{
		  if (PREDICT_FALSE (!c))
		    {
		      c = punt_client_get (is_ip4, port);
		    }
		  udp_punt_trace_t *t;
		  t = vlib_add_trace (vm, node, b, sizeof (t[0]));
		  clib_memcpy_fast (&t->client, c, sizeof (t->client));
		  t->is_midchain = 1;
		}

	      vec_add2 (iovecs, iov, 1);

	      iov->iov_base = b->data + b->current_data;
	      iov->iov_len = b->current_length;
	      l += b->current_length;
	    }
	  while (b->flags & VLIB_BUFFER_NEXT_PRESENT);
	}

      struct msghdr msg = {
	.msg_name = caddr,
	.msg_namelen = sizeof (*caddr),
	.msg_iov = iovecs,
	.msg_iovlen = vec_len (iovecs),
      };

      if (sendmsg (pm->socket_fd, &msg, 0) < (ssize_t) l)
	vlib_node_increment_counter (vm, node_index,
				     PUNT_ERROR_SOCKET_TX_ERROR, 1);
      else
	vlib_node_increment_counter (vm, node_index, PUNT_ERROR_SOCKET_TX, 1);

    }

error:
  vlib_buffer_free (vm, buffers, n_packets);

  return n_packets;
}

static uword
udp4_punt_socket (vlib_main_t * vm,
		  vlib_node_runtime_t * node, vlib_frame_t * from_frame)
{
  return udp46_punt_socket_inline (vm, node, from_frame, true /* is_ip4 */ );
}

static uword
udp6_punt_socket (vlib_main_t * vm,
		  vlib_node_runtime_t * node, vlib_frame_t * from_frame)
{
  return udp46_punt_socket_inline (vm, node, from_frame, false /* is_ip4 */ );
}


/* *INDENT-OFF* */
VLIB_REGISTER_NODE (udp4_punt_socket_node) = {
  .function = udp4_punt_socket,
  .name = "ip4-udp-punt-socket",
  .format_trace = format_udp_punt_trace,
  .flags = VLIB_NODE_FLAG_IS_DROP,
  /* Takes a vector of packets. */
  .vector_size = sizeof (u32),
  .n_errors = PUNT_N_ERROR,
  .error_strings = punt_error_strings,
};
VLIB_REGISTER_NODE (udp6_punt_socket_node) = {
  .function = udp6_punt_socket,
  .name = "ip6-udp-punt-socket",
  .format_trace = format_udp_punt_trace,
  .flags = VLIB_NODE_FLAG_IS_DROP,
  .vector_size = sizeof (u32),
  .n_errors = PUNT_N_ERROR,
  .error_strings = punt_error_strings,
};
/* *INDENT-ON* */

typedef struct
{
  enum punt_action_e action;
  u32 sw_if_index;
} punt_trace_t;

static u8 *
format_punt_trace (u8 * s, va_list * va)
{
  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
  CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
  vnet_main_t *vnm = vnet_get_main ();
  punt_trace_t *t = va_arg (*va, punt_trace_t *);
  s = format (s, "%U Action: %d", format_vnet_sw_if_index_name,
	      vnm, t->sw_if_index, t->action);
  return s;
}

static uword
punt_socket_rx_fd (vlib_main_t * vm, vlib_node_runtime_t * node, u32 fd)
{
  const uword buffer_size = vlib_buffer_get_default_data_size (vm);
  u32 n_trace = vlib_get_trace_count (vm, node);
  u32 next = node->cached_next_index;
  u32 n_left_to_next, next_index;
  u32 *to_next;
  u32 error = PUNT_ERROR_NONE;
  vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);

  /* $$$$ Only dealing with one buffer at the time for now */

  u32 bi;
  vlib_buffer_t *b;
  punt_packetdesc_t packetdesc;
  ssize_t size;
  struct iovec io[2];

  if (vlib_buffer_alloc (vm, &bi, 1) != 1)
    {
      error = PUNT_ERROR_NOBUFFER;
      goto error;
    }

  b = vlib_get_buffer (vm, bi);
  io[0].iov_base = &packetdesc;
  io[0].iov_len = sizeof (packetdesc);
  io[1].iov_base = b->data;
  io[1].iov_len = buffer_size;

  size = readv (fd, io, 2);
  /* We need at least the packet descriptor plus a header */
  if (size <= (int) (sizeof (packetdesc) + sizeof (ip4_header_t)))
    {
      vlib_buffer_free (vm, &bi, 1);
      error = PUNT_ERROR_READV;
      goto error;
    }

  b->flags = VNET_BUFFER_F_LOCALLY_ORIGINATED;
  b->current_length = size - sizeof (packetdesc);

  VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b);

  switch (packetdesc.action)
    {
    case PUNT_L2:
      vnet_buffer (b)->sw_if_index[VLIB_TX] = packetdesc.sw_if_index;
      next_index = PUNT_SOCKET_RX_NEXT_INTERFACE_OUTPUT;
      break;

    case PUNT_IP4_ROUTED:
      vnet_buffer (b)->sw_if_index[VLIB_RX] = packetdesc.sw_if_index;
      vnet_buffer (b)->sw_if_index[VLIB_TX] = ~0;
      next_index = PUNT_SOCKET_RX_NEXT_IP4_LOOKUP;
      break;

    case PUNT_IP6_ROUTED:
      vnet_buffer (b)->sw_if_index[VLIB_RX] = packetdesc.sw_if_index;
      vnet_buffer (b)->sw_if_index[VLIB_TX] = ~0;
      next_index = PUNT_SOCKET_RX_NEXT_IP6_LOOKUP;
      break;

    default:
      error = PUNT_ERROR_ACTION;
      vlib_buffer_free (vm, &bi, 1);
      goto error;
    }

  if (PREDICT_FALSE (n_trace > 0))
    {
      punt_trace_t *t;
      vlib_trace_buffer (vm, node, next_index, b, 1 /* follow_chain */ );
      vlib_set_trace_count (vm, node, --n_trace);
      t = vlib_add_trace (vm, node, b, sizeof (*t));
      t->sw_if_index = packetdesc.sw_if_index;
      t->action = packetdesc.action;
    }

  to_next[0] = bi;
  to_next++;
  n_left_to_next--;

  vlib_validate_buffer_enqueue_x1 (vm, node, next, to_next, n_left_to_next,
				   bi, next_index);
  vlib_put_next_frame (vm, node, next, n_left_to_next);
  return 1;

error:
  vlib_node_increment_counter (vm, punt_socket_rx_node.index, error, 1);
  return 0;
}

static uword
punt_socket_rx (vlib_main_t * vm,
		vlib_node_runtime_t * node, vlib_frame_t * frame)
{
  punt_main_t *pm = &punt_main;
  u32 total_count = 0;
  int i;

  for (i = 0; i < vec_len (pm->ready_fds); i++)
    {
      total_count += punt_socket_rx_fd (vm, node, pm->ready_fds[i]);
      vec_del1 (pm->ready_fds, i);
    }
  return total_count;
}

/* *INDENT-OFF* */
VLIB_REGISTER_NODE (punt_socket_rx_node, static) =
{
 .function = punt_socket_rx,
 .name = "punt-socket-rx",
 .type = VLIB_NODE_TYPE_INPUT,
 .state = VLIB_NODE_STATE_INTERRUPT,
 .vector_size = 1,
 .n_errors = PUNT_N_ERROR,
 .error_strings = punt_error_strings,
 .n_next_nodes = PUNT_SOCKET_RX_N_NEXT,
 .next_nodes = {
		[PUNT_SOCKET_RX_NEXT_INTERFACE_OUTPUT] = "interface-output",
		[PUNT_SOCKET_RX_NEXT_IP4_LOOKUP] = "ip4-lookup",
		[PUNT_SOCKET_RX_NEXT_IP6_LOOKUP] = "ip6-lookup",
		},
 .format_trace = format_punt_trace,
};
/* *INDENT-ON* */

static clib_error_t *
punt_socket_read_ready (clib_file_t * uf)
{
  vlib_main_t *vm = vlib_get_main ();
  punt_main_t *pm = &punt_main;

  /** Schedule the rx node */
  vlib_node_set_interrupt_pending (vm, punt_socket_rx_node.index);
  vec_add1 (pm->ready_fds, uf->file_descriptor);

  return 0;
}

#ifndef CLIB_MARCH_VARIANT
clib_error_t *
vnet_punt_socket_add (vlib_main_t * vm, u32 header_version,
		      bool is_ip4, u8 protocol, u16 port,
		      char *client_pathname)
{
  punt_main_t *pm = &punt_main;

  if (!pm->is_configured)
    return clib_error_return (0, "socket is not configured");

  if (header_version != PUNT_PACKETDESC_VERSION)
    return clib_error_return (0, "Invalid packet descriptor version");

  /* For now we only support UDP punt */
  if (protocol != IP_PROTOCOL_UDP)
    return clib_error_return (0,
			      "only UDP protocol (%d) is supported, got %d",
			      IP_PROTOCOL_UDP, protocol);

  if (port == (u16) ~ 0)
    return clib_error_return (0, "UDP port number required");

  /* Register client */
  if (punt_socket_register (is_ip4, protocol, port, client_pathname) < 0)
    return clib_error_return (0,
			      "Punt socket: Invalid client path: %s",
			      client_pathname);

  u32 node_index = is_ip4 ? udp4_punt_socket_node.index :
    udp6_punt_socket_node.index;

  udp_register_dst_port (vm, port, node_index, is_ip4);

  return 0;
}

clib_error_t *
vnet_punt_socket_del (vlib_main_t * vm, bool is_ip4, u8 l4_protocol, u16 port)
{
  punt_main_t *pm = &punt_main;

  if (!pm->is_configured)
    return clib_error_return (0, "socket is not configured");

  punt_socket_unregister (is_ip4, l4_protocol, port);
  udp_unregister_dst_port (vm, port, is_ip4);

  return 0;
}

/**
 * @brief Request IP traffic punt to the local TCP/IP stack.
 *
 * @em Note
 * - UDP and TCP are the only protocols supported in the current implementation
 *
 * @param vm       vlib_main_t corresponding to the current thread
 * @param ipv      IP protcol version.
 *                 4 - IPv4, 6 - IPv6, ~0 for both IPv6 and IPv4
 * @param protocol 8-bits L4 protocol value
 *                 UDP is 17
 *                 TCP is 1
 * @param port     16-bits L4 (TCP/IP) port number when applicable (UDP only)
 *
 * @returns 0 on success, non-zero value otherwise
 */
clib_error_t *
vnet_punt_add_del (vlib_main_t * vm, u8 ipv, u8 protocol, u16 port,
		   bool is_add)
{

  /* For now we only support TCP, UDP and SCTP punt */
  if (protocol != IP_PROTOCOL_UDP &&
      protocol != IP_PROTOCOL_TCP && protocol != IP_PROTOCOL_SCTP)
    return clib_error_return (0,
			      "only UDP (%d), TCP (%d) and SCTP (%d) protocols are supported, got %d",
			      IP_PROTOCOL_UDP, IP_PROTOCOL_TCP,
			      IP_PROTOCOL_SCTP, protocol);

  if (ipv != (u8) ~ 0 && ipv != 4 && ipv != 6)
    return clib_error_return (0, "IP version must be 4 or 6, got %d", ipv);

  if (port == (u16) ~ 0)
    {
      if ((ipv == 4) || (ipv == (u8) ~ 0))
	{
	  if (protocol == IP_PROTOCOL_UDP)
	    udp_punt_unknown (vm, 1, is_add);
	  else if (protocol == IP_PROTOCOL_TCP)
	    tcp_punt_unknown (vm, 1, is_add);
	  else if (protocol == IP_PROTOCOL_SCTP)
	    sctp_punt_unknown (vm, 1, is_add);
	}

      if ((ipv == 6) || (ipv == (u8) ~ 0))
	{
	  if (protocol == IP_PROTOCOL_UDP)
	    udp_punt_unknown (vm, 0, is_add);
	  else if (protocol == IP_PROTOCOL_TCP)
	    tcp_punt_unknown (vm, 0, is_add);
	  else if (protocol == IP_PROTOCOL_SCTP)
	    sctp_punt_unknown (vm, 0, is_add);
	}

      return 0;
    }

  else if (is_add)
    {
      if (protocol == IP_PROTOCOL_TCP || protocol == IP_PROTOCOL_SCTP)
	return clib_error_return (0,
				  "punt TCP/SCTP ports is not supported yet");

      if (ipv == 4 || ipv == (u8) ~ 0)
	udp_register_dst_port (vm, port, udp4_punt_node.index, 1);

      if (ipv == 6 || ipv == (u8) ~ 0)
	udp_register_dst_port (vm, port, udp6_punt_node.index, 0);

      return 0;
    }
  else
    {
      if (protocol == IP_PROTOCOL_TCP || protocol == IP_PROTOCOL_SCTP)
	return clib_error_return (0,
				  "punt TCP/SCTP ports is not supported yet");
      if (ipv == 4 || ipv == (u8) ~ 0)
	udp_unregister_dst_port (vm, port, 1);

      if (ipv == 6 || ipv == (u8) ~ 0)
	udp_unregister_dst_port (vm, port, 0);

      return 0;
    }
}
#endif /* CLIB_MARCH_VARIANT */

static clib_error_t *
punt_cli (vlib_main_t * vm,
	  unformat_input_t * input, vlib_cli_command_t * cmd)
{
  u32 port = ~0;
  bool is_add = true;
  u32 protocol = ~0;
  clib_error_t *error = NULL;

  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
    {
      if (unformat (input, "del"))
	is_add = false;
      else if (unformat (input, "all"))
	;
      else if (unformat (input, "%d", &port))
	;
      else if (unformat (input, "udp"))
	protocol = IP_PROTOCOL_UDP;
      else if (unformat (input, "tcp"))
	protocol = IP_PROTOCOL_TCP;
      else
	{
	  error = clib_error_return (0, "parse error: '%U'",
				     format_unformat_error, input);
	  goto done;
	}
    }

  /* punt both IPv6 and IPv4 when used in CLI */
  error = vnet_punt_add_del (vm, ~0, protocol, port, is_add);
  if (error)
    {
      clib_error_report (error);
    }

done:
  return error;
}

/*?
 * The set of '<em>set punt</em>' commands allows specific IP traffic to
 * be punted to the host TCP/IP stack
 *
 * @em Note
 * - UDP is the only protocol supported in the current implementation
 * - All TCP traffic is currently punted to the host by default
 *
 * @cliexpar
 * @parblock
 * Example of how to request NTP traffic to be punted
 * @cliexcmd{set punt udp 125}
 *
 * Example of how to request all 'unknown' UDP traffic to be punted
 * @cliexcmd{set punt udp all}
 *
 * Example of how to stop all 'unknown' UDP traffic to be punted
 * @cliexcmd{set punt udp del all}
 * @endparblock
?*/
/* *INDENT-OFF* */
VLIB_CLI_COMMAND (punt_command, static) = {
  .path = "set punt",
  .short_help = "set punt [udp|tcp] [del] <all | port-num1 [port-num2 ...]>",
  .function = punt_cli,
};
/* *INDENT-ON* */

#ifndef CLIB_MARCH_VARIANT
static clib_error_t *
punt_socket_register_cmd (vlib_main_t * vm,
			  unformat_input_t * input, vlib_cli_command_t * cmd)
{
  bool is_ipv4 = true;
  u32 protocol = ~0;
  u32 port = ~0;
  u8 *socket_name = 0;
  clib_error_t *error = NULL;

  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
    {
      if (unformat (input, "ipv4"))
	;
      else if (unformat (input, "ipv6"))
	is_ipv4 = false;
      else if (unformat (input, "udp"))
	protocol = IP_PROTOCOL_UDP;
      else if (unformat (input, "tcp"))
	protocol = IP_PROTOCOL_TCP;
      else if (unformat (input, "%d", &port))
	;
      else if (unformat (input, "socket %s", &socket_name))
	;
      else
	{
	  error = clib_error_return (0, "parse error: '%U'",
				     format_unformat_error, input);
	  goto done;
	}
    }

  error =
    vnet_punt_socket_add (vm, 1, is_ipv4, protocol, port,
			  (char *) socket_name);
done:
  return error;
}

/*?
 *
 * @cliexpar
 * @cliexcmd{punt socket register}
 ?*/
/* *INDENT-OFF* */
VLIB_CLI_COMMAND (punt_socket_register_command, static) =
{
  .path = "punt socket register",
  .function = punt_socket_register_cmd,
  .short_help = "punt socket register [ipv4|ipv6] [udp|tcp]> <all | port-num1 [port-num2 ...]> <socket>",
  .is_mp_safe = 1,
};
/* *INDENT-ON* */

static clib_error_t *
punt_socket_deregister_cmd (vlib_main_t * vm,
			    unformat_input_t * input,
			    vlib_cli_command_t * cmd)
{
  bool is_ipv4 = true;
  u32 protocol = ~0;
  u32 port = ~0;
  clib_error_t *error = NULL;

  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
    {
      if (unformat (input, "ipv4"))
	;
      else if (unformat (input, "ipv6"))
	is_ipv4 = false;
      else if (unformat (input, "udp"))
	protocol = IP_PROTOCOL_UDP;
      else if (unformat (input, "tcp"))
	protocol = IP_PROTOCOL_TCP;
      else if (unformat (input, "%d", &port))
	;
      else
	{
	  error = clib_error_return (0, "parse error: '%U'",
				     format_unformat_error, input);
	  goto done;
	}
    }

  error = vnet_punt_socket_del (vm, is_ipv4, protocol, port);
done:
  return error;
}

/*?
 *
 * @cliexpar
 * @cliexcmd{punt socket register}
 ?*/
/* *INDENT-OFF* */
VLIB_CLI_COMMAND (punt_socket_deregister_command, static) =
{
  .path = "punt socket deregister",
  .function = punt_socket_deregister_cmd,
  .short_help = "punt socket deregister [ipv4|ipv6] [udp|tcp]> <all | port-num1 [port-num2 ...]>",
  .is_mp_safe = 1,
};
/* *INDENT-ON* */

punt_socket_detail_t *
punt_socket_entries (u8 ipv)
{
  punt_main_t *pm = &punt_main;
  punt_client_t *pc;
  punt_socket_detail_t *ps = 0;
  bool is_valid;

  punt_client_t *v = !ipv ? pm->clients_by_dst_port4 :
    pm->clients_by_dst_port6;

  vec_foreach (pc, v)
  {
    if (pc && pc->port != 0)
      {
	is_valid = false;
	if (pc->protocol == IP_PROTOCOL_UDP)
	  {
	    is_valid = udp_is_valid_dst_port (pc->port, !ipv);
	  }
	if (is_valid)
	  {
	    punt_socket_detail_t detail = {
	      .ipv = ipv,
	      .l4_protocol = pc->protocol,
	      .l4_port = pc->port
	    };
	    memcpy (detail.pathname, pc->caddr.sun_path,
		    sizeof (pc->caddr.sun_path));
	    vec_add1 (ps, detail);
	  }
      }
  }
  return ps;
}

u8 *
format_punt_socket (u8 * s, va_list * args)
{
  punt_client_t *clients = va_arg (*args, punt_client_t *);
  u8 *is_ipv6 = va_arg (*args, u8 *);
  punt_client_t *pc;
  bool is_valid;

  vec_foreach (pc, clients)
  {
    if (pc && pc->port != 0)
      {
	is_valid = false;
	if (pc->protocol == IP_PROTOCOL_UDP)
	  {
	    is_valid = udp_is_valid_dst_port (pc->port, !(*is_ipv6));
	  }
	if (is_valid)
	  {
	    s = format (s, " punt %s port %d to socket %s \n",
			(pc->protocol == IP_PROTOCOL_UDP) ? "UDP" : "TCP",
			pc->port, pc->caddr.sun_path);
	  }
      }
  }

  return (s);
}

static clib_error_t *
punt_socket_show_cmd (vlib_main_t * vm,
		      unformat_input_t * input, vlib_cli_command_t * cmd)
{
  u8 is_ipv6;
  punt_main_t *pm = &punt_main;
  clib_error_t *error = NULL;

  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
    {
      if (unformat (input, "ipv4"))
	is_ipv6 = 0;
      else if (unformat (input, "ipv6"))
	is_ipv6 = 1;
      else
	{
	  error = clib_error_return (0, "parse error: '%U'",
				     format_unformat_error, input);
	  goto done;
	}
    }

  punt_client_t *v =
    is_ipv6 ? pm->clients_by_dst_port6 : pm->clients_by_dst_port4;
  vlib_cli_output (vm, "%U", format_punt_socket, v, &is_ipv6);

done:
  return (error);
}

/*?
 *
 * @cliexpar
 * @cliexcmd{show punt socket ipv4}
 ?*/
/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_punt_socket_registration_command, static) =
{
  .path = "show punt socket registrations",
  .function = punt_socket_show_cmd,
  .short_help = "show punt socket registrations [ipv4|ipv6]",
  .is_mp_safe = 1,
};
/* *INDENT-ON* */

clib_error_t *
punt_init (vlib_main_t * vm)
{
  punt_main_t *pm = &punt_main;

  pm->clients_by_dst_port6 = sparse_vec_new
    (sizeof (pm->clients_by_dst_port6[0]),
     BITS (((udp_header_t *) 0)->dst_port));
  pm->clients_by_dst_port4 = sparse_vec_new
    (sizeof (pm->clients_by_dst_port4[0]),
     BITS (((udp_header_t *) 0)->dst_port));

  pm->is_configured = false;
  pm->interface_output_node = vlib_get_node_by_name (vm,
						     (u8 *)
						     "interface-output");
  return 0;
}

VLIB_INIT_FUNCTION (punt_init);
#endif /* CLIB_MARCH_VARIANT */

static clib_error_t *
punt_config (vlib_main_t * vm, unformat_input_t * input)
{
  punt_main_t *pm = &punt_main;
  char *socket_path = 0;

  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
    {
      if (unformat (input, "socket %s", &socket_path))
	strncpy (pm->sun_path, socket_path, UNIX_PATH_MAX - 1);
      else
	return clib_error_return (0, "unknown input `%U'",
				  format_unformat_error, input);
    }

  if (socket_path == 0)
    return 0;

  /* UNIX domain socket */
  struct sockaddr_un addr;
  if ((pm->socket_fd = socket (AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK, 0)) == -1)
    {
      return clib_error_return (0, "socket error");
    }

  clib_memset (&addr, 0, sizeof (addr));
  addr.sun_family = AF_UNIX;
  if (*socket_path == '\0')
    {
      *addr.sun_path = '\0';
      strncpy (addr.sun_path + 1, socket_path + 1,
	       sizeof (addr.sun_path) - 2);
    }
  else
    {
      strncpy (addr.sun_path, socket_path, sizeof (addr.sun_path) - 1);
      unlink (socket_path);
    }

  if (bind (pm->socket_fd, (struct sockaddr *) &addr, sizeof (addr)) == -1)
    {
      return clib_error_return (0, "bind error");
    }

  /* Register socket */
  clib_file_main_t *fm = &file_main;
  clib_file_t template = { 0 };
  template.read_function = punt_socket_read_ready;
  template.file_descriptor = pm->socket_fd;
  template.description = format (0, "%s", socket_path);
  pm->clib_file_index = clib_file_add (fm, &template);

  pm->is_configured = true;

  return 0;
}

VLIB_CONFIG_FUNCTION (punt_config, "punt");

/*
 * fd.io coding-style-patch-verification: ON
 *
 * Local Variables:
 * eval: (c-set-style "gnu")
 * End:
 */