From 50f0ac0f097e5495da1f2b1816106e3d420ff34b Mon Sep 17 00:00:00 2001 From: Neale Ranns Date: Wed, 15 May 2019 02:13:37 -0700 Subject: Punt: socket register for exception dispatched/punted packets based on reason - add to the Punt API to allow different descriptions of the desired packets: UDP or exceptions - move the punt nodes into punt_node.c - improve tests (test that the correct packets are punted to the registered socket) Change-Id: I1a133dec88106874993cba1f5a439cd26b2fef72 Signed-off-by: Neale Ranns --- src/vnet/CMakeLists.txt | 3 +- src/vnet/ethernet/ethernet_types_api.c | 1 + src/vnet/ip/ip.c | 16 + src/vnet/ip/ip.h | 13 + src/vnet/ip/ip_types.api | 5 + src/vnet/ip/ip_types_api.c | 67 +++ src/vnet/ip/ip_types_api.h | 12 + src/vnet/ip/punt.api | 92 ++- src/vnet/ip/punt.c | 999 ++++++++++----------------------- src/vnet/ip/punt.h | 123 +++- src/vnet/ip/punt.md | 82 +++ src/vnet/ip/punt_api.c | 274 +++++++-- src/vnet/ip/punt_node.c | 587 +++++++++++++++++++ src/vnet/ipsec/ipsec_punt.h | 10 +- 14 files changed, 1482 insertions(+), 802 deletions(-) create mode 100644 src/vnet/ip/punt.md create mode 100644 src/vnet/ip/punt_node.c (limited to 'src/vnet') diff --git a/src/vnet/CMakeLists.txt b/src/vnet/CMakeLists.txt index 1940abe03c2..dcbdb73d841 100644 --- a/src/vnet/CMakeLists.txt +++ b/src/vnet/CMakeLists.txt @@ -449,6 +449,7 @@ list(APPEND VNET_SOURCES ip/ping.c ip/punt_api.c ip/punt.c + ip/punt_node.c ) list(APPEND VNET_MULTIARCH_SOURCES @@ -459,7 +460,7 @@ list(APPEND VNET_MULTIARCH_SOURCES ip/ip6_reassembly.c ip/ip6_input.c ip/ip6_punt_drop.c - ip/punt.c + ip/punt_node.c ip/ip_in_out_acl.c ) diff --git a/src/vnet/ethernet/ethernet_types_api.c b/src/vnet/ethernet/ethernet_types_api.c index 90b630d46be..145cf6ca40b 100644 --- a/src/vnet/ethernet/ethernet_types_api.c +++ b/src/vnet/ethernet/ethernet_types_api.c @@ -13,6 +13,7 @@ * limitations under the License. */ +#include #include #define vl_typedefs /* define message structures */ diff --git a/src/vnet/ip/ip.c b/src/vnet/ip/ip.c index cab0cd0315a..6e8ac7c437d 100644 --- a/src/vnet/ip/ip.c +++ b/src/vnet/ip/ip.c @@ -278,6 +278,22 @@ ip6_mask_to_preflen (ip6_address_t * mask) return 64 - first0; } +u8 * +format_ip_address_family (u8 * s, va_list * args) +{ + ip_address_family_t af = va_arg (*args, ip_address_family_t); + + switch (af) + { + case AF_IP4: + return (format (s, "ip4")); + case AF_IP6: + return (format (s, "ip6")); + } + + return (format (s, "unknown")); +} + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/vnet/ip/ip.h b/src/vnet/ip/ip.h index b114fcbaaf6..7a82dcf8f2b 100644 --- a/src/vnet/ip/ip.h +++ b/src/vnet/ip/ip.h @@ -65,6 +65,19 @@ #include #include +typedef enum ip_address_family_t_ +{ + AF_IP4, + AF_IP6, +} ip_address_family_t; + +extern uword unformat_ip_address_family (unformat_input_t * input, + va_list * args); +extern u8 *format_ip_address_family (u8 * s, va_list * args); + +#define FOR_EACH_IP_ADDRESS_FAMILY(_af) \ + for (_af = AF_IP4; _af <= AF_IP6; _af++) + #define u8_ptr_add(ptr, index) (((u8 *)ptr) + index) #define u16_net_add(u, val) clib_host_to_net_u16(clib_net_to_host_u16(u) + (val)) diff --git a/src/vnet/ip/ip_types.api b/src/vnet/ip/ip_types.api index 29bd8ad37f5..a9e66470665 100644 --- a/src/vnet/ip/ip_types.api +++ b/src/vnet/ip/ip_types.api @@ -22,6 +22,11 @@ enum address_family { ADDRESS_IP6, }; +enum ip_proto { + IP_API_PROTO_TCP = 6, + IP_API_PROTO_UDP = 17, +}; + union address_union { vl_api_ip4_address_t ip4; vl_api_ip6_address_t ip6; diff --git a/src/vnet/ip/ip_types_api.c b/src/vnet/ip/ip_types_api.c index b56101c355f..d84c1ff9126 100644 --- a/src/vnet/ip/ip_types_api.c +++ b/src/vnet/ip/ip_types_api.c @@ -13,6 +13,7 @@ * limitations under the License. */ +#include #include #define vl_typedefs /* define message structures */ @@ -29,6 +30,72 @@ #include #undef vl_printfun +int +ip_address_family_decode (int _af, ip_address_family_t * out) +{ + vl_api_address_family_t af = clib_host_to_net_u32 (_af); + + switch (af) + { + case ADDRESS_IP4: + *out = AF_IP4; + return (0); + case ADDRESS_IP6: + *out = AF_IP6; + return (0); + } + return (-1); +} + +int +ip_address_family_encode (ip_address_family_t af) +{ + switch (af) + { + case AF_IP4: + return (clib_host_to_net_u32 (ADDRESS_IP4)); + case AF_IP6: + return (clib_host_to_net_u32 (ADDRESS_IP6)); + } + + ASSERT (0); + return (clib_host_to_net_u32 (ADDRESS_IP4)); +} + +int +ip_proto_decode (int _ipp, ip_protocol_t * out) +{ + vl_api_ip_proto_t ipp = clib_host_to_net_u32 (_ipp); + + switch (ipp) + { + case IP_API_PROTO_TCP: + *out = IP_PROTOCOL_TCP; + return (0); + case IP_API_PROTO_UDP: + *out = IP_PROTOCOL_UDP; + return (0); + } + return (-1); +} + +int +ip_proto_encode (ip_protocol_t ipp) +{ + switch (ipp) + { + case IP_PROTOCOL_UDP: + return (clib_host_to_net_u32 (IP_API_PROTO_UDP)); + case IP_PROTOCOL_TCP: + return (clib_host_to_net_u32 (IP_API_PROTO_TCP)); + default: + break; + } + + ASSERT (0); + return (clib_host_to_net_u32 (IP_API_PROTO_TCP)); +} + void ip6_address_encode (const ip6_address_t * in, vl_api_ip6_address_t out) { diff --git a/src/vnet/ip/ip_types_api.h b/src/vnet/ip/ip_types_api.h index a67134c9a86..4c79bf138e3 100644 --- a/src/vnet/ip/ip_types_api.h +++ b/src/vnet/ip/ip_types_api.h @@ -33,6 +33,18 @@ struct _vl_api_address; struct _vl_api_prefix; struct _vl_api_mprefix; +/** + * These enum decode/encodes use 'int' as the type for the enum becuase + * one cannot forward declare an enum + */ +extern int ip_address_family_decode (int _af, ip_address_family_t * out); +extern int ip_address_family_encode (ip_address_family_t af); +extern int ip_proto_decode (int _af, ip_protocol_t * out); +extern int ip_proto_encode (ip_protocol_t af); + +/** + * Decode/Encode for struct/union types + */ extern ip46_type_t ip_address_decode (const struct _vl_api_address *in, ip46_address_t * out); extern void ip_address_encode (const ip46_address_t * in, diff --git a/src/vnet/ip/punt.api b/src/vnet/ip/punt.api index c8b222a211b..cedddc5601d 100644 --- a/src/vnet/ip/punt.api +++ b/src/vnet/ip/punt.api @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016 Cisco and/or its affiliates. + * Copyright (c) 2015-2019 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: @@ -13,18 +13,55 @@ * limitations under the License. */ -option version = "2.0.0"; +option version = "2.1.0"; +import "vnet/ip/ip_types.api"; -/** \brief Punt definition - @param ipv - L3 protocol 4 - IPv4, 6 - IPv6, ~0 - All - @param l4_protocol - L4 protocol to be punted - @param l4_port - TCP/UDP port to be punted +/** \brief The types of packets to be punted */ -typeonly define punt +enum punt_type { - u8 ipv; - u8 l4_protocol; - u16 l4_port; + /* L4 (UDP) packets */ + PUNT_API_TYPE_L4, + /* Exception packets handled by the VLIB punt infra */ + PUNT_API_TYPE_EXCEPTION, +}; + +/** \brief Punt L4 traffic definition + @param af - Address Family, IPv4 or IPV6 + @param protocol - L4 protocol to be punted + @param port - TCP/UDP port to be punted +*/ +typedef punt_l4 +{ + vl_api_address_family_t af; + vl_api_ip_proto_t protocol; + u16 port; +}; + +/** \brief The ID of the punt exception reason + Dump all the reasons to obtain this +*/ +typedef punt_exception +{ + u32 id; +}; + +/** \brief Union of the different punt packet descriptions +*/ +union punt_union +{ + vl_api_punt_exception_t exception; + vl_api_punt_l4_t l4; +}; + +/** \brief Full description of which packets are requested to be punted + @param type - Which packet type + @param punt - Description corresponding to the type +*/ +typedef punt +{ + vl_api_punt_type_t type; + vl_api_punt_union_t punt; }; /** \brief Punt traffic to the host @@ -40,19 +77,6 @@ autoreply define set_punt { vl_api_punt_t punt; }; -define punt_dump -{ - u32 client_index; - u32 context; - u8 is_ipv6; -}; - -define punt_details -{ - u32 context; - vl_api_punt_t punt; -}; - /** \brief Punt traffic to the host via socket @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -78,7 +102,7 @@ define punt_socket_dump { u32 client_index; u32 context; - u8 is_ipv6; + vl_api_punt_type_t type; }; define punt_socket_details @@ -94,6 +118,26 @@ autoreply define punt_socket_deregister { vl_api_punt_t punt; }; +/** \brief Dump all of the excpetion punt reasons +*/ +define punt_reason_dump +{ + u32 client_index; + u32 context; +}; + +typedef punt_reason +{ + u32 id; + string name; +}; + +define punt_reason_details +{ + u32 context; + vl_api_punt_reason_t reason; +}; + /* * Local Variables: * eval: (c-set-style "gnu") diff --git a/src/vnet/ip/punt.c b/src/vnet/ip/punt.c index 743df1fef0b..d4d502887d7 100644 --- a/src/vnet/ip/punt.c +++ b/src/vnet/ip/punt.c @@ -29,7 +29,6 @@ #include #include #include -#include #include #include @@ -38,37 +37,6 @@ #include #include -#define foreach_punt_next \ - _ (PUNT4, "ip4-punt") \ - _ (PUNT6, "ip6-punt") - -typedef enum -{ -#define _(s,n) PUNT_NEXT_##s, - foreach_punt_next -#undef _ - PUNT_N_NEXT, -} punt_next_t; - -enum punt_socket_rx_next_e -{ - PUNT_SOCKET_RX_NEXT_INTERFACE_OUTPUT, - PUNT_SOCKET_RX_NEXT_IP4_LOOKUP, - PUNT_SOCKET_RX_NEXT_IP6_LOOKUP, - PUNT_SOCKET_RX_N_NEXT -}; - -#define punt_next_punt(is_ip4) (is_ip4 ? PUNT_NEXT_PUNT4 : PUNT_NEXT_PUNT6) - -extern vlib_node_registration_t udp4_punt_node; -extern vlib_node_registration_t udp6_punt_node; -extern vlib_node_registration_t udp4_punt_socket_node; -extern vlib_node_registration_t udp6_punt_socket_node; -static vlib_node_registration_t punt_socket_rx_node; - -extern punt_main_t punt_main; - -#ifndef CLIB_MARCH_VARIANT punt_main_t punt_main; char * @@ -77,571 +45,179 @@ vnet_punt_get_server_pathname (void) punt_main_t *pm = &punt_main; return pm->sun_path; } -#endif /* CLIB_MARCH_VARIANT */ - -/** @brief IPv4/IPv6 UDP punt node main loop. - - This is the main loop inline function for IPv4/IPv6 UDP punt - transition node. - - @param vm vlib_main_t corresponding to the current thread - @param node vlib_node_runtime_t - @param frame vlib_frame_t whose contents should be dispatched - @param is_ipv4 indicates if called for IPv4 or IPv6 node -*/ -always_inline uword -udp46_punt_inline (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * from_frame, int is_ip4) -{ - u32 n_left_from, *from, *to_next; - word advance; - - from = vlib_frame_vector_args (from_frame); - n_left_from = from_frame->n_vectors; - - /* udp[46]_lookup hands us the data payload, not the IP header */ - if (is_ip4) - advance = -(sizeof (ip4_header_t) + sizeof (udp_header_t)); - else - advance = -(sizeof (ip6_header_t) + sizeof (udp_header_t)); - - while (n_left_from > 0) - { - u32 n_left_to_next; - - vlib_get_next_frame (vm, node, punt_next_punt (is_ip4), to_next, - n_left_to_next); - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 bi0; - vlib_buffer_t *b0; - - bi0 = from[0]; - to_next[0] = bi0; - from += 1; - to_next += 1; - n_left_from -= 1; - n_left_to_next -= 1; - - b0 = vlib_get_buffer (vm, bi0); - vlib_buffer_advance (b0, advance); - b0->error = node->errors[PUNT_ERROR_UDP_PORT]; - } - - vlib_put_next_frame (vm, node, punt_next_punt (is_ip4), n_left_to_next); - } - - return from_frame->n_vectors; -} - -static char *punt_error_strings[] = { -#define punt_error(n,s) s, -#include "punt_error.def" -#undef punt_error -}; - -/** @brief IPv4 UDP punt node. - @node ip4-udp-punt - - This is the IPv4 UDP punt transition node. It is registered as a next - node for the "ip4-udp-lookup" handling UDP port(s) requested for punt. - The buffer's current data pointer is adjusted to the original packet - IPv4 header. All buffers are dispatched to "error-punt". - - @param vm vlib_main_t corresponding to the current thread - @param node vlib_node_runtime_t - @param frame vlib_frame_t whose contents should be dispatched - - @par Graph mechanics: next index usage - - @em Sets: - - vnet_buffer(b)->current_data - - vnet_buffer(b)->current_len - - Next Index: - - Dispatches the packet to the "error-punt" node -*/ -VLIB_NODE_FN (udp4_punt_node) (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * from_frame) +static void +punt_client_l4_db_add (ip_address_family_t af, u16 port, u32 index) { - return udp46_punt_inline (vm, node, from_frame, 1 /* is_ip4 */ ); -} - -/** @brief IPv6 UDP punt node. - @node ip6-udp-punt - - This is the IPv6 UDP punt transition node. It is registered as a next - node for the "ip6-udp-lookup" handling UDP port(s) requested for punt. - The buffer's current data pointer is adjusted to the original packet - IPv6 header. All buffers are dispatched to "error-punt". - - @param vm vlib_main_t corresponding to the current thread - @param node vlib_node_runtime_t - @param frame vlib_frame_t whose contents should be dispatched - - @par Graph mechanics: next index usage - - @em Sets: - - vnet_buffer(b)->current_data - - vnet_buffer(b)->current_len + punt_main_t *pm = &punt_main; - Next Index: - - Dispatches the packet to the "error-punt" node -*/ -VLIB_NODE_FN (udp6_punt_node) (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * from_frame) -{ - return udp46_punt_inline (vm, node, from_frame, 0 /* is_ip4 */ ); + pm->db.clients_by_l4_port = hash_set (pm->db.clients_by_l4_port, + punt_client_l4_mk_key (af, port), + index); } -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (udp4_punt_node) = { - .name = "ip4-udp-punt", - /* Takes a vector of packets. */ - .vector_size = sizeof (u32), - - .n_errors = PUNT_N_ERROR, - .error_strings = punt_error_strings, - - .n_next_nodes = PUNT_N_NEXT, - .next_nodes = { -#define _(s,n) [PUNT_NEXT_##s] = n, - foreach_punt_next -#undef _ - }, -}; - -VLIB_REGISTER_NODE (udp6_punt_node) = { - .name = "ip6-udp-punt", - /* Takes a vector of packets. */ - .vector_size = sizeof (u32), - - .n_errors = PUNT_N_ERROR, - .error_strings = punt_error_strings, - - .n_next_nodes = PUNT_N_NEXT, - .next_nodes = { -#define _(s,n) [PUNT_NEXT_##s] = n, - foreach_punt_next -#undef _ - }, -}; - -/* *INDENT-ON* */ - -static punt_client_t * -punt_client_get (bool is_ip4, u16 port) +static u32 +punt_client_l4_db_remove (ip_address_family_t af, u16 port) { punt_main_t *pm = &punt_main; - punt_client_t *v = - is_ip4 ? pm->clients_by_dst_port4 : pm->clients_by_dst_port6; + u32 key, index = ~0; + uword *p; - u16 i = sparse_vec_index (v, port); - if (i == SPARSE_VEC_INVALID_INDEX) - return 0; + key = punt_client_l4_mk_key (af, port); + p = hash_get (pm->db.clients_by_l4_port, key); - return &vec_elt (v, i); -} + if (p) + index = p[0]; -static struct sockaddr_un * -punt_socket_get (bool is_ip4, u16 port) -{ - punt_client_t *v = punt_client_get (is_ip4, port); - if (v) - return &v->caddr; + hash_unset (pm->db.clients_by_l4_port, key); - return NULL; + return (index); } -#ifndef CLIB_MARCH_VARIANT -static int -punt_socket_register (bool is_ip4, u8 protocol, u16 port, - char *client_pathname) +static void +punt_client_exception_db_add (vlib_punt_reason_t reason, u32 pci) { punt_main_t *pm = &punt_main; - punt_client_t c, *n; - punt_client_t *v = is_ip4 ? pm->clients_by_dst_port4 : - pm->clients_by_dst_port6; - if (strncmp (client_pathname, vnet_punt_get_server_pathname (), - UNIX_PATH_MAX) == 0) - return -1; - - clib_memset (&c, 0, sizeof (c)); - memcpy (c.caddr.sun_path, client_pathname, sizeof (c.caddr.sun_path)); - c.caddr.sun_family = AF_UNIX; - c.port = port; - c.protocol = protocol; - n = sparse_vec_validate (v, port); - n[0] = c; - return 0; -} + vec_validate_init_empty (pm->db.clients_by_exception, reason, ~0); -/* $$$$ Just leaves the mapping in place for now */ -static void -punt_socket_unregister (bool is_ip4, u8 protocol, u16 port) -{ - return; + pm->db.clients_by_exception[reason] = pci; } -#endif /* CLIB_MARCH_VARIANT */ -typedef struct +static u32 +punt_client_exception_db_remove (vlib_punt_reason_t reason) { - punt_client_t client; - u8 is_midchain; -} udp_punt_trace_t; + punt_main_t *pm = &punt_main; + u32 pci = ~0; -static u8 * -format_udp_punt_trace (u8 * s, va_list * args) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); - udp_punt_trace_t *t = va_arg (*args, udp_punt_trace_t *); - u32 indent = format_get_indent (s); - s = format (s, "to: %s", t->client.caddr.sun_path); - if (t->is_midchain) + if (punt_client_exception_get (reason)) { - s = format (s, "\n%U(buffer is part of chain)", format_white_space, - indent); + pci = pm->db.clients_by_exception[reason]; + pm->db.clients_by_exception[reason] = ~0; } - return s; + + return pci; } -always_inline uword -udp46_punt_socket_inline (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame, bool is_ip4) +static clib_error_t * +punt_socket_read_ready (clib_file_t * uf) { - u32 *buffers = vlib_frame_vector_args (frame); - uword n_packets = frame->n_vectors; - struct iovec *iovecs = 0; + vlib_main_t *vm = vlib_get_main (); punt_main_t *pm = &punt_main; - int i; - - u32 node_index = is_ip4 ? udp4_punt_socket_node.index : - udp6_punt_socket_node.index; - - for (i = 0; i < n_packets; i++) - { - struct iovec *iov; - vlib_buffer_t *b; - uword l; - punt_packetdesc_t packetdesc; - - b = vlib_get_buffer (vm, buffers[i]); - - /* Reverse UDP Punt advance */ - udp_header_t *udp; - if (is_ip4) - { - vlib_buffer_advance (b, -(sizeof (ip4_header_t) + - sizeof (udp_header_t))); - ip4_header_t *ip = vlib_buffer_get_current (b); - udp = (udp_header_t *) (ip + 1); - } - else - { - vlib_buffer_advance (b, -(sizeof (ip6_header_t) + - sizeof (udp_header_t))); - ip6_header_t *ip = vlib_buffer_get_current (b); - udp = (udp_header_t *) (ip + 1); - } - - u16 port = clib_net_to_host_u16 (udp->dst_port); - - /* - * Find registerered client - * If no registered client, drop packet and count - */ - struct sockaddr_un *caddr; - caddr = punt_socket_get (is_ip4, port); - if (!caddr) - { - vlib_node_increment_counter (vm, node_index, - PUNT_ERROR_SOCKET_TX_ERROR, 1); - goto error; - } - - punt_client_t *c = NULL; - if (PREDICT_FALSE (b->flags & VLIB_BUFFER_IS_TRACED)) - { - c = punt_client_get (is_ip4, port); - udp_punt_trace_t *t; - t = vlib_add_trace (vm, node, b, sizeof (t[0])); - clib_memcpy_fast (&t->client, c, sizeof (t->client)); - } - - /* Re-set iovecs if present. */ - if (iovecs) - _vec_len (iovecs) = 0; - - /* Add packet descriptor */ - packetdesc.sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX]; - packetdesc.action = 0; - vec_add2 (iovecs, iov, 1); - iov->iov_base = &packetdesc; - iov->iov_len = sizeof (packetdesc); - - /** VLIB buffer chain -> Unix iovec(s). */ - vlib_buffer_advance (b, -(sizeof (ethernet_header_t))); - vec_add2 (iovecs, iov, 1); - iov->iov_base = b->data + b->current_data; - iov->iov_len = l = b->current_length; - - if (PREDICT_FALSE (b->flags & VLIB_BUFFER_NEXT_PRESENT)) - { - do - { - b = vlib_get_buffer (vm, b->next_buffer); - if (PREDICT_FALSE (b->flags & VLIB_BUFFER_IS_TRACED)) - { - if (PREDICT_FALSE (!c)) - { - c = punt_client_get (is_ip4, port); - } - udp_punt_trace_t *t; - t = vlib_add_trace (vm, node, b, sizeof (t[0])); - clib_memcpy_fast (&t->client, c, sizeof (t->client)); - t->is_midchain = 1; - } - - vec_add2 (iovecs, iov, 1); - - iov->iov_base = b->data + b->current_data; - iov->iov_len = b->current_length; - l += b->current_length; - } - while (b->flags & VLIB_BUFFER_NEXT_PRESENT); - } - - struct msghdr msg = { - .msg_name = caddr, - .msg_namelen = sizeof (*caddr), - .msg_iov = iovecs, - .msg_iovlen = vec_len (iovecs), - }; - - if (sendmsg (pm->socket_fd, &msg, 0) < (ssize_t) l) - vlib_node_increment_counter (vm, node_index, - PUNT_ERROR_SOCKET_TX_ERROR, 1); - else - vlib_node_increment_counter (vm, node_index, PUNT_ERROR_SOCKET_TX, 1); - } - -error: - vlib_buffer_free (vm, buffers, n_packets); - - return n_packets; -} + /** Schedule the rx node */ + vlib_node_set_interrupt_pending (vm, punt_socket_rx_node.index); + vec_add1 (pm->ready_fds, uf->file_descriptor); -static uword -udp4_punt_socket (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * from_frame) -{ - return udp46_punt_socket_inline (vm, node, from_frame, true /* is_ip4 */ ); + return 0; } -static uword -udp6_punt_socket (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * from_frame) +static clib_error_t * +punt_socket_register_l4 (vlib_main_t * vm, + ip_address_family_t af, + u8 protocol, u16 port, char *client_pathname) { - return udp46_punt_socket_inline (vm, node, from_frame, false /* is_ip4 */ ); -} - - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (udp4_punt_socket_node) = { - .function = udp4_punt_socket, - .name = "ip4-udp-punt-socket", - .format_trace = format_udp_punt_trace, - .flags = VLIB_NODE_FLAG_IS_DROP, - /* Takes a vector of packets. */ - .vector_size = sizeof (u32), - .n_errors = PUNT_N_ERROR, - .error_strings = punt_error_strings, -}; -VLIB_REGISTER_NODE (udp6_punt_socket_node) = { - .function = udp6_punt_socket, - .name = "ip6-udp-punt-socket", - .format_trace = format_udp_punt_trace, - .flags = VLIB_NODE_FLAG_IS_DROP, - .vector_size = sizeof (u32), - .n_errors = PUNT_N_ERROR, - .error_strings = punt_error_strings, -}; -/* *INDENT-ON* */ + punt_main_t *pm = &punt_main; + punt_client_t *c; -typedef struct -{ - enum punt_action_e action; - u32 sw_if_index; -} punt_trace_t; + /* For now we only support UDP punt */ + if (protocol != IP_PROTOCOL_UDP) + return clib_error_return (0, + "only UDP protocol (%d) is supported, got %d", + IP_PROTOCOL_UDP, protocol); -static u8 * -format_punt_trace (u8 * s, va_list * va) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); - vnet_main_t *vnm = vnet_get_main (); - punt_trace_t *t = va_arg (*va, punt_trace_t *); - s = format (s, "%U Action: %d", format_vnet_sw_if_index_name, - vnm, t->sw_if_index, t->action); - return s; -} + if (port == (u16) ~ 0) + return clib_error_return (0, "UDP port number required"); -static uword -punt_socket_rx_fd (vlib_main_t * vm, vlib_node_runtime_t * node, u32 fd) -{ - const uword buffer_size = vlib_buffer_get_default_data_size (vm); - u32 n_trace = vlib_get_trace_count (vm, node); - u32 next = node->cached_next_index; - u32 n_left_to_next, next_index; - u32 *to_next; - u32 error = PUNT_ERROR_NONE; - vlib_get_next_frame (vm, node, next, to_next, n_left_to_next); - - /* $$$$ Only dealing with one buffer at the time for now */ - - u32 bi; - vlib_buffer_t *b; - punt_packetdesc_t packetdesc; - ssize_t size; - struct iovec io[2]; - - if (vlib_buffer_alloc (vm, &bi, 1) != 1) - { - error = PUNT_ERROR_NOBUFFER; - goto error; - } + if (strncmp (client_pathname, vnet_punt_get_server_pathname (), + UNIX_PATH_MAX) == 0) + return clib_error_return (0, + "Punt socket: Invalid client path: %s", + client_pathname); - b = vlib_get_buffer (vm, bi); - io[0].iov_base = &packetdesc; - io[0].iov_len = sizeof (packetdesc); - io[1].iov_base = b->data; - io[1].iov_len = buffer_size; + c = punt_client_l4_get (af, port); - size = readv (fd, io, 2); - /* We need at least the packet descriptor plus a header */ - if (size <= (int) (sizeof (packetdesc) + sizeof (ip4_header_t))) + if (NULL == c) { - vlib_buffer_free (vm, &bi, 1); - error = PUNT_ERROR_READV; - goto error; + pool_get_zero (pm->punt_client_pool, c); + punt_client_l4_db_add (af, port, c - pm->punt_client_pool); } - b->flags = VNET_BUFFER_F_LOCALLY_ORIGINATED; - b->current_length = size - sizeof (packetdesc); + memcpy (c->caddr.sun_path, client_pathname, sizeof (c->caddr.sun_path)); + c->caddr.sun_family = AF_UNIX; + c->reg.type = PUNT_TYPE_L4; + c->reg.punt.l4.port = port; + c->reg.punt.l4.protocol = protocol; + c->reg.punt.l4.af = af; - VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b); + u32 node_index = (af == AF_IP4 ? + udp4_punt_socket_node.index : + udp6_punt_socket_node.index); - switch (packetdesc.action) - { - case PUNT_L2: - vnet_buffer (b)->sw_if_index[VLIB_TX] = packetdesc.sw_if_index; - next_index = PUNT_SOCKET_RX_NEXT_INTERFACE_OUTPUT; - break; + udp_register_dst_port (vm, port, node_index, af == AF_IP4); - case PUNT_IP4_ROUTED: - vnet_buffer (b)->sw_if_index[VLIB_RX] = packetdesc.sw_if_index; - vnet_buffer (b)->sw_if_index[VLIB_TX] = ~0; - next_index = PUNT_SOCKET_RX_NEXT_IP4_LOOKUP; - break; + return (NULL); +} - case PUNT_IP6_ROUTED: - vnet_buffer (b)->sw_if_index[VLIB_RX] = packetdesc.sw_if_index; - vnet_buffer (b)->sw_if_index[VLIB_TX] = ~0; - next_index = PUNT_SOCKET_RX_NEXT_IP6_LOOKUP; - break; +static clib_error_t * +punt_socket_register_exception (vlib_main_t * vm, + vlib_punt_reason_t reason, + char *client_pathname) +{ + punt_main_t *pm = &punt_main; + punt_client_t *pc; - default: - error = PUNT_ERROR_ACTION; - vlib_buffer_free (vm, &bi, 1); - goto error; - } + pc = punt_client_exception_get (reason); - if (PREDICT_FALSE (n_trace > 0)) + if (NULL == pc) { - punt_trace_t *t; - vlib_trace_buffer (vm, node, next_index, b, 1 /* follow_chain */ ); - vlib_set_trace_count (vm, node, --n_trace); - t = vlib_add_trace (vm, node, b, sizeof (*t)); - t->sw_if_index = packetdesc.sw_if_index; - t->action = packetdesc.action; + pool_get_zero (pm->punt_client_pool, pc); + punt_client_exception_db_add (reason, pc - pm->punt_client_pool); } - to_next[0] = bi; - to_next++; - n_left_to_next--; + memcpy (pc->caddr.sun_path, client_pathname, sizeof (pc->caddr.sun_path)); + pc->caddr.sun_family = AF_UNIX; + pc->reg.type = PUNT_TYPE_EXCEPTION; + pc->reg.punt.exception.reason = reason; - vlib_validate_buffer_enqueue_x1 (vm, node, next, to_next, n_left_to_next, - bi, next_index); - vlib_put_next_frame (vm, node, next, n_left_to_next); - return 1; + vlib_punt_register (pm->hdl, + pc->reg.punt.exception.reason, "exception-punt-socket"); -error: - vlib_node_increment_counter (vm, punt_socket_rx_node.index, error, 1); - return 0; + return (NULL); } -static uword -punt_socket_rx (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame) +static clib_error_t * +punt_socket_unregister_l4 (ip_address_family_t af, + ip_protocol_t protocol, u16 port) { - punt_main_t *pm = &punt_main; - u32 total_count = 0; - int i; + u32 pci; - for (i = 0; i < vec_len (pm->ready_fds); i++) - { - total_count += punt_socket_rx_fd (vm, node, pm->ready_fds[i]); - vec_del1 (pm->ready_fds, i); - } - return total_count; -} + udp_unregister_dst_port (vlib_get_main (), port, af == AF_IP4); -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (punt_socket_rx_node, static) = -{ - .function = punt_socket_rx, - .name = "punt-socket-rx", - .type = VLIB_NODE_TYPE_INPUT, - .state = VLIB_NODE_STATE_INTERRUPT, - .vector_size = 1, - .n_errors = PUNT_N_ERROR, - .error_strings = punt_error_strings, - .n_next_nodes = PUNT_SOCKET_RX_N_NEXT, - .next_nodes = { - [PUNT_SOCKET_RX_NEXT_INTERFACE_OUTPUT] = "interface-output", - [PUNT_SOCKET_RX_NEXT_IP4_LOOKUP] = "ip4-lookup", - [PUNT_SOCKET_RX_NEXT_IP6_LOOKUP] = "ip6-lookup", - }, - .format_trace = format_punt_trace, -}; -/* *INDENT-ON* */ + pci = punt_client_l4_db_remove (af, port); + + if (~0 != pci) + pool_put_index (punt_main.punt_client_pool, pci); + + return (NULL); +} static clib_error_t * -punt_socket_read_ready (clib_file_t * uf) +punt_socket_unregister_exception (vlib_punt_reason_t reason) { - vlib_main_t *vm = vlib_get_main (); - punt_main_t *pm = &punt_main; + u32 pci; - /** Schedule the rx node */ - vlib_node_set_interrupt_pending (vm, punt_socket_rx_node.index); - vec_add1 (pm->ready_fds, uf->file_descriptor); + pci = punt_client_exception_db_remove (reason); - return 0; + if (~0 != pci) + pool_put_index (punt_main.punt_client_pool, pci); + + return (NULL); } -#ifndef CLIB_MARCH_VARIANT clib_error_t * vnet_punt_socket_add (vlib_main_t * vm, u32 header_version, - bool is_ip4, u8 protocol, u16 port, - char *client_pathname) + const punt_reg_t * pr, char *client_pathname) { punt_main_t *pm = &punt_main; @@ -651,39 +227,40 @@ vnet_punt_socket_add (vlib_main_t * vm, u32 header_version, if (header_version != PUNT_PACKETDESC_VERSION) return clib_error_return (0, "Invalid packet descriptor version"); - /* For now we only support UDP punt */ - if (protocol != IP_PROTOCOL_UDP) - return clib_error_return (0, - "only UDP protocol (%d) is supported, got %d", - IP_PROTOCOL_UDP, protocol); - - if (port == (u16) ~ 0) - return clib_error_return (0, "UDP port number required"); - /* Register client */ - if (punt_socket_register (is_ip4, protocol, port, client_pathname) < 0) - return clib_error_return (0, - "Punt socket: Invalid client path: %s", - client_pathname); - - u32 node_index = is_ip4 ? udp4_punt_socket_node.index : - udp6_punt_socket_node.index; - - udp_register_dst_port (vm, port, node_index, is_ip4); + switch (pr->type) + { + case PUNT_TYPE_L4: + return (punt_socket_register_l4 (vm, + pr->punt.l4.af, + pr->punt.l4.protocol, + pr->punt.l4.port, client_pathname)); + case PUNT_TYPE_EXCEPTION: + return (punt_socket_register_exception (vm, + pr->punt.exception.reason, + client_pathname)); + } return 0; } clib_error_t * -vnet_punt_socket_del (vlib_main_t * vm, bool is_ip4, u8 l4_protocol, u16 port) +vnet_punt_socket_del (vlib_main_t * vm, const punt_reg_t * pr) { punt_main_t *pm = &punt_main; if (!pm->is_configured) return clib_error_return (0, "socket is not configured"); - punt_socket_unregister (is_ip4, l4_protocol, port); - udp_unregister_dst_port (vm, port, is_ip4); + switch (pr->type) + { + case PUNT_TYPE_L4: + return (punt_socket_unregister_l4 (pr->punt.l4.af, + pr->punt.l4.protocol, + pr->punt.l4.port)); + case PUNT_TYPE_EXCEPTION: + return (punt_socket_unregister_exception (pr->punt.exception.reason)); + } return 0; } @@ -692,11 +269,10 @@ vnet_punt_socket_del (vlib_main_t * vm, bool is_ip4, u8 l4_protocol, u16 port) * @brief Request IP traffic punt to the local TCP/IP stack. * * @em Note - * - UDP and TCP are the only protocols supported in the current implementation + * - UDP, TCP and SCTP are the only protocols supported in the current implementation * * @param vm vlib_main_t corresponding to the current thread - * @param ipv IP protcol version. - * 4 - IPv4, 6 - IPv6, ~0 for both IPv6 and IPv4 + * @param af IP address family. * @param protocol 8-bits L4 protocol value * UDP is 17 * TCP is 1 @@ -704,11 +280,11 @@ vnet_punt_socket_del (vlib_main_t * vm, bool is_ip4, u8 l4_protocol, u16 port) * * @returns 0 on success, non-zero value otherwise */ -clib_error_t * -vnet_punt_add_del (vlib_main_t * vm, u8 ipv, u8 protocol, u16 port, - bool is_add) +static clib_error_t * +punt_l4_add_del (vlib_main_t * vm, + ip_address_family_t af, + ip_protocol_t protocol, u16 port, bool is_add) { - /* For now we only support TCP, UDP and SCTP punt */ if (protocol != IP_PROTOCOL_UDP && protocol != IP_PROTOCOL_TCP && protocol != IP_PROTOCOL_SCTP) @@ -717,30 +293,14 @@ vnet_punt_add_del (vlib_main_t * vm, u8 ipv, u8 protocol, u16 port, IP_PROTOCOL_UDP, IP_PROTOCOL_TCP, IP_PROTOCOL_SCTP, protocol); - if (ipv != (u8) ~ 0 && ipv != 4 && ipv != 6) - return clib_error_return (0, "IP version must be 4 or 6, got %d", ipv); - if (port == (u16) ~ 0) { - if ((ipv == 4) || (ipv == (u8) ~ 0)) - { - if (protocol == IP_PROTOCOL_UDP) - udp_punt_unknown (vm, 1, is_add); - else if (protocol == IP_PROTOCOL_TCP) - tcp_punt_unknown (vm, 1, is_add); - else if (protocol == IP_PROTOCOL_SCTP) - sctp_punt_unknown (vm, 1, is_add); - } - - if ((ipv == 6) || (ipv == (u8) ~ 0)) - { - if (protocol == IP_PROTOCOL_UDP) - udp_punt_unknown (vm, 0, is_add); - else if (protocol == IP_PROTOCOL_TCP) - tcp_punt_unknown (vm, 0, is_add); - else if (protocol == IP_PROTOCOL_SCTP) - sctp_punt_unknown (vm, 0, is_add); - } + if (protocol == IP_PROTOCOL_UDP) + udp_punt_unknown (vm, af == AF_IP4, is_add); + else if (protocol == IP_PROTOCOL_TCP) + tcp_punt_unknown (vm, af == AF_IP4, is_add); + else if (protocol == IP_PROTOCOL_SCTP) + sctp_punt_unknown (vm, af == AF_IP4, is_add); return 0; } @@ -751,11 +311,10 @@ vnet_punt_add_del (vlib_main_t * vm, u8 ipv, u8 protocol, u16 port, return clib_error_return (0, "punt TCP/SCTP ports is not supported yet"); - if (ipv == 4 || ipv == (u8) ~ 0) - udp_register_dst_port (vm, port, udp4_punt_node.index, 1); + if (!udp_is_valid_dst_port (port, af == AF_IP4)) + return clib_error_return (0, "invalid port: %d", port); - if (ipv == 6 || ipv == (u8) ~ 0) - udp_register_dst_port (vm, port, udp6_punt_node.index, 0); + udp_register_dst_port (vm, port, udp4_punt_node.index, af == AF_IP4); return 0; } @@ -764,38 +323,66 @@ vnet_punt_add_del (vlib_main_t * vm, u8 ipv, u8 protocol, u16 port, if (protocol == IP_PROTOCOL_TCP || protocol == IP_PROTOCOL_SCTP) return clib_error_return (0, "punt TCP/SCTP ports is not supported yet"); - if (ipv == 4 || ipv == (u8) ~ 0) - udp_unregister_dst_port (vm, port, 1); - if (ipv == 6 || ipv == (u8) ~ 0) - udp_unregister_dst_port (vm, port, 0); + udp_unregister_dst_port (vm, port, af == AF_IP4); return 0; } } -#endif /* CLIB_MARCH_VARIANT */ + +static clib_error_t * +punt_exception_add_del (vlib_main_t * vm, + vlib_punt_reason_t reason, bool is_add) +{ + return (NULL); +} + +clib_error_t * +vnet_punt_add_del (vlib_main_t * vm, const punt_reg_t * pr, bool is_add) +{ + switch (pr->type) + { + case PUNT_TYPE_L4: + return (punt_l4_add_del (vm, pr->punt.l4.af, pr->punt.l4.protocol, + pr->punt.l4.port, is_add)); + case PUNT_TYPE_EXCEPTION: + return (punt_exception_add_del (vm, pr->punt.exception.reason, is_add)); + } + + return (clib_error_return (0, "Unsupported punt type: %d", pr->type)); +} static clib_error_t * punt_cli (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { - u32 port = ~0; - bool is_add = true; - u32 protocol = ~0; clib_error_t *error = NULL; + bool is_add = true; + punt_reg_t pr = { + .punt = { + .l4 = { + .af = AF_IP4, + .port = ~0, + .protocol = ~0, + }, + }, + .type = PUNT_TYPE_L4, + }; while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { if (unformat (input, "del")) is_add = false; - else if (unformat (input, "all")) - ; - else if (unformat (input, "%d", &port)) + else if (unformat (input, "ipv6")) + pr.punt.l4.af = AF_IP6; + else if (unformat (input, "ip6")) + pr.punt.l4.af = AF_IP6; + else if (unformat (input, "%d", &pr.punt.l4.port)) ; else if (unformat (input, "udp")) - protocol = IP_PROTOCOL_UDP; + pr.punt.l4.protocol = IP_PROTOCOL_UDP; else if (unformat (input, "tcp")) - protocol = IP_PROTOCOL_TCP; + pr.punt.l4.protocol = IP_PROTOCOL_TCP; else { error = clib_error_return (0, "parse error: '%U'", @@ -805,7 +392,7 @@ punt_cli (vlib_main_t * vm, } /* punt both IPv6 and IPv4 when used in CLI */ - error = vnet_punt_add_del (vm, ~0, protocol, port, is_add); + error = vnet_punt_add_del (vm, &pr, is_add); if (error) { clib_error_report (error); @@ -843,28 +430,36 @@ VLIB_CLI_COMMAND (punt_command, static) = { }; /* *INDENT-ON* */ -#ifndef CLIB_MARCH_VARIANT static clib_error_t * punt_socket_register_cmd (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { - bool is_ipv4 = true; - u32 protocol = ~0; - u32 port = ~0; u8 *socket_name = 0; clib_error_t *error = NULL; + /* *INDENT-OFF* */ + punt_reg_t pr = { + .punt = { + .l4 = { + .af = AF_IP4, + .port = ~0, + .protocol = ~0, + }, + }, + .type = PUNT_TYPE_L4, + }; + /* *INDENT-ON* */ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { if (unformat (input, "ipv4")) ; else if (unformat (input, "ipv6")) - is_ipv4 = false; + pr.punt.l4.af = AF_IP6; else if (unformat (input, "udp")) - protocol = IP_PROTOCOL_UDP; + pr.punt.l4.protocol = IP_PROTOCOL_UDP; else if (unformat (input, "tcp")) - protocol = IP_PROTOCOL_TCP; - else if (unformat (input, "%d", &port)) + pr.punt.l4.protocol = IP_PROTOCOL_TCP; + else if (unformat (input, "%d", &pr.punt.l4.port)) ; else if (unformat (input, "socket %s", &socket_name)) ; @@ -876,9 +471,8 @@ punt_socket_register_cmd (vlib_main_t * vm, } } - error = - vnet_punt_socket_add (vm, 1, is_ipv4, protocol, port, - (char *) socket_name); + error = vnet_punt_socket_add (vm, 1, &pr, (char *) socket_name); + done: return error; } @@ -903,22 +497,31 @@ punt_socket_deregister_cmd (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { - bool is_ipv4 = true; - u32 protocol = ~0; - u32 port = ~0; clib_error_t *error = NULL; + /* *INDENT-OFF* */ + punt_reg_t pr = { + .punt = { + .l4 = { + .af = AF_IP4, + .port = ~0, + .protocol = ~0, + }, + }, + .type = PUNT_TYPE_L4, + }; + /* *INDENT-ON* */ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { if (unformat (input, "ipv4")) ; else if (unformat (input, "ipv6")) - is_ipv4 = false; + pr.punt.l4.af = AF_IP6; else if (unformat (input, "udp")) - protocol = IP_PROTOCOL_UDP; + pr.punt.l4.protocol = IP_PROTOCOL_UDP; else if (unformat (input, "tcp")) - protocol = IP_PROTOCOL_TCP; - else if (unformat (input, "%d", &port)) + pr.punt.l4.protocol = IP_PROTOCOL_TCP; + else if (unformat (input, "%d", &pr.punt.l4.port)) ; else { @@ -928,7 +531,7 @@ punt_socket_deregister_cmd (vlib_main_t * vm, } } - error = vnet_punt_socket_del (vm, is_ipv4, protocol, port); + error = vnet_punt_socket_del (vm, &pr); done: return error; } @@ -948,85 +551,90 @@ VLIB_CLI_COMMAND (punt_socket_deregister_command, static) = }; /* *INDENT-ON* */ -punt_socket_detail_t * -punt_socket_entries (u8 ipv) +void +punt_client_walk (punt_type_t pt, punt_client_walk_cb_t cb, void *ctx) { punt_main_t *pm = &punt_main; - punt_client_t *pc; - punt_socket_detail_t *ps = 0; - bool is_valid; - - punt_client_t *v = !ipv ? pm->clients_by_dst_port4 : - pm->clients_by_dst_port6; - vec_foreach (pc, v) - { - if (pc && pc->port != 0) + switch (pt) + { + case PUNT_TYPE_L4: + { + u32 pci; + u16 port; + + /* *INDENT-OFF* */ + hash_foreach(port, pci, pm->db.clients_by_l4_port, + ({ + cb (pool_elt_at_index(pm->punt_client_pool, pci), ctx); + })); + /* *INDENT-ON* */ + break; + } + case PUNT_TYPE_EXCEPTION: { - is_valid = false; - if (pc->protocol == IP_PROTOCOL_UDP) - { - is_valid = udp_is_valid_dst_port (pc->port, !ipv); - } - if (is_valid) - { - punt_socket_detail_t detail = { - .ipv = ipv, - .l4_protocol = pc->protocol, - .l4_port = pc->port - }; - memcpy (detail.pathname, pc->caddr.sun_path, - sizeof (pc->caddr.sun_path)); - vec_add1 (ps, detail); - } + u32 *pci; + + vec_foreach (pci, pm->db.clients_by_exception) + { + if (~0 != *pci) + cb (pool_elt_at_index (pm->punt_client_pool, *pci), ctx); + } + + break; } - } - return ps; + } } -u8 * -format_punt_socket (u8 * s, va_list * args) +static u8 * +format_punt_client (u8 * s, va_list * args) { - punt_client_t *clients = va_arg (*args, punt_client_t *); - u8 *is_ipv6 = va_arg (*args, u8 *); - punt_client_t *pc; - bool is_valid; + punt_client_t *pc = va_arg (*args, punt_client_t *); - vec_foreach (pc, clients) - { - if (pc && pc->port != 0) - { - is_valid = false; - if (pc->protocol == IP_PROTOCOL_UDP) - { - is_valid = udp_is_valid_dst_port (pc->port, !(*is_ipv6)); - } - if (is_valid) - { - s = format (s, " punt %s port %d to socket %s \n", - (pc->protocol == IP_PROTOCOL_UDP) ? "UDP" : "TCP", - pc->port, pc->caddr.sun_path); - } - } - } + s = format (s, " punt "); + + switch (pc->reg.type) + { + case PUNT_TYPE_L4: + s = format (s, "%U %U port %d", + format_ip_address_family, pc->reg.punt.l4.af, + format_ip_protocol, pc->reg.punt.l4.protocol, + pc->reg.punt.l4.port); + break; + case PUNT_TYPE_EXCEPTION: + s = format (s, " %U", format_vlib_punt_reason, + pc->reg.punt.exception.reason); + break; + } + + s = format (s, " to socket %s \n", pc->caddr.sun_path); return (s); } +static walk_rc_t +punt_client_show_one (const punt_client_t * pc, void *ctx) +{ + vlib_cli_output (ctx, "%U", format_punt_client, pc); + + return (WALK_CONTINUE); +} + static clib_error_t * punt_socket_show_cmd (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { - u8 is_ipv6; - punt_main_t *pm = &punt_main; clib_error_t *error = NULL; + punt_type_t pt; + + pt = PUNT_TYPE_L4; while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { - if (unformat (input, "ipv4")) - is_ipv6 = 0; - else if (unformat (input, "ipv6")) - is_ipv6 = 1; + if (unformat (input, "exception")) + pt = PUNT_TYPE_EXCEPTION; + else if (unformat (input, "l4")) + pt = PUNT_TYPE_L4; else { error = clib_error_return (0, "parse error: '%U'", @@ -1035,9 +643,7 @@ punt_socket_show_cmd (vlib_main_t * vm, } } - punt_client_t *v = - is_ipv6 ? pm->clients_by_dst_port6 : pm->clients_by_dst_port4; - vlib_cli_output (vm, "%U", format_punt_socket, v, &is_ipv6); + punt_client_walk (pt, punt_client_show_one, vm); done: return (error); @@ -1053,7 +659,7 @@ VLIB_CLI_COMMAND (show_punt_socket_registration_command, static) = { .path = "show punt socket registrations", .function = punt_socket_show_cmd, - .short_help = "show punt socket registrations [ipv4|ipv6]", + .short_help = "show punt socket registrations [l4|exception]", .is_mp_safe = 1, }; /* *INDENT-ON* */ @@ -1061,24 +667,22 @@ VLIB_CLI_COMMAND (show_punt_socket_registration_command, static) = clib_error_t * ip_punt_init (vlib_main_t * vm) { + clib_error_t *error = NULL; punt_main_t *pm = &punt_main; - pm->clients_by_dst_port6 = sparse_vec_new - (sizeof (pm->clients_by_dst_port6[0]), - BITS (((udp_header_t *) 0)->dst_port)); - pm->clients_by_dst_port4 = sparse_vec_new - (sizeof (pm->clients_by_dst_port4[0]), - BITS (((udp_header_t *) 0)->dst_port)); - pm->is_configured = false; - pm->interface_output_node = vlib_get_node_by_name (vm, - (u8 *) - "interface-output"); - return 0; + pm->interface_output_node = + vlib_get_node_by_name (vm, (u8 *) "interface-output"); + + if ((error = vlib_call_init_function (vm, punt_init))) + return error; + + pm->hdl = vlib_punt_client_register ("ip-punt"); + + return (error); } VLIB_INIT_FUNCTION (ip_punt_init); -#endif /* CLIB_MARCH_VARIANT */ static clib_error_t * punt_config (vlib_main_t * vm, unformat_input_t * input) @@ -1124,6 +728,15 @@ punt_config (vlib_main_t * vm, unformat_input_t * input) return clib_error_return (0, "bind error"); } + int n_bytes = 0x10000; + + if (setsockopt + (pm->socket_fd, SOL_SOCKET, SO_SNDBUF, &n_bytes, + sizeof (n_bytes)) == -1) + { + return clib_error_return (0, "setsockopt error"); + } + /* Register socket */ clib_file_main_t *fm = &file_main; clib_file_t template = { 0 }; diff --git a/src/vnet/ip/punt.h b/src/vnet/ip/punt.h index 0518b2b91ef..a77e6330813 100644 --- a/src/vnet/ip/punt.h +++ b/src/vnet/ip/punt.h @@ -22,23 +22,51 @@ #include #include +#include -typedef enum +#define foreach_punt_type \ + _(L4, "l4") \ + _(EXCEPTION, "exception") + +typedef enum punt_type_t_ +{ +#define _(v, s) PUNT_TYPE_##v, + foreach_punt_type +#undef _ +} punt_type_t; + +typedef struct punt_l4_t_ +{ + ip_address_family_t af; + ip_protocol_t protocol; + u16 port; +} punt_l4_t; + +typedef struct punt_exception_t_ { -#define punt_error(n,s) PUNT_ERROR_##n, -#include -#undef punt_error - PUNT_N_ERROR, -} punt_error_t; + vlib_punt_reason_t reason; +} punt_exception_t; +typedef struct punt_union_t_ +{ + punt_exception_t exception; + punt_l4_t l4; +} punt_union_t; -clib_error_t *vnet_punt_add_del (vlib_main_t * vm, u8 ipv, - u8 protocol, u16 port, bool is_add); -clib_error_t *vnet_punt_socket_add (vlib_main_t * vm, u32 header_version, - bool is_ip4, u8 protocol, u16 port, +typedef struct punt_reg_t_ +{ + punt_type_t type; + punt_union_t punt; +} punt_reg_t; + + +clib_error_t *vnet_punt_add_del (vlib_main_t * vm, + const punt_reg_t * pr, bool is_add); +clib_error_t *vnet_punt_socket_add (vlib_main_t * vm, + u32 header_version, + const punt_reg_t * pr, char *client_pathname); -clib_error_t *vnet_punt_socket_del (vlib_main_t * vm, bool is_ip4, - u8 l4_protocol, u16 port); +clib_error_t *vnet_punt_socket_del (vlib_main_t * vm, const punt_reg_t * pr); char *vnet_punt_get_server_pathname (void); enum punt_action_e @@ -64,34 +92,83 @@ typedef struct __attribute__ ((packed)) */ typedef struct { - u8 protocol; - u16 port; + punt_reg_t reg; struct sockaddr_un caddr; } punt_client_t; +typedef struct punt_client_db_t_ +{ + void *clients_by_l4_port; + u32 *clients_by_exception; +} punt_client_db_t; + typedef struct { int socket_fd; char sun_path[sizeof (struct sockaddr_un)]; - punt_client_t *clients_by_dst_port4; - punt_client_t *clients_by_dst_port6; + punt_client_db_t db; + punt_client_t *punt_client_pool; u32 clib_file_index; bool is_configured; vlib_node_t *interface_output_node; u32 *ready_fds; u32 *rx_buffers; + vlib_punt_hdl_t hdl; } punt_main_t; + extern punt_main_t punt_main; -typedef struct punt_socket_detail_t_ +typedef walk_rc_t (*punt_client_walk_cb_t) (const punt_client_t * pc, + void *ctx); +extern void punt_client_walk (punt_type_t pt, + punt_client_walk_cb_t cb, void *ctx); + +/* + * inlines for the data-plane + */ +static_always_inline u32 +punt_client_l4_mk_key (ip_address_family_t af, u16 port) +{ + return (af << BITS (port) | port); +} + +static_always_inline punt_client_t * +punt_client_l4_get (ip_address_family_t af, u16 port) +{ + punt_main_t *pm = &punt_main; + uword *p; + + p = hash_get (pm->db.clients_by_l4_port, punt_client_l4_mk_key (af, port)); + + if (p) + return (pool_elt_at_index (pm->punt_client_pool, p[0])); + + return (NULL); +} + +static_always_inline punt_client_t * +punt_client_exception_get (vlib_punt_reason_t reason) { - u8 ipv; - u8 l4_protocol; - u16 l4_port; - u8 pathname[108]; -} punt_socket_detail_t; + punt_main_t *pm = &punt_main; + u32 pci; + + if (reason >= vec_len (pm->db.clients_by_exception)) + return (NULL); + + pci = pm->db.clients_by_exception[reason]; + + if (~0 != pci) + return (pool_elt_at_index (pm->punt_client_pool, pci)); + + return (NULL); +} + +extern vlib_node_registration_t udp4_punt_node; +extern vlib_node_registration_t udp6_punt_node; +extern vlib_node_registration_t udp4_punt_socket_node; +extern vlib_node_registration_t udp6_punt_socket_node; +extern vlib_node_registration_t punt_socket_rx_node; -punt_socket_detail_t *punt_socket_entries (u8 ipv); #endif /* diff --git a/src/vnet/ip/punt.md b/src/vnet/ip/punt.md new file mode 100644 index 00000000000..2bbb5f6b97e --- /dev/null +++ b/src/vnet/ip/punt.md @@ -0,0 +1,82 @@ +.. _punt: + +Punting Packets +=============== + +.. toctree:: + +Overview +________ + +To 'punt' can mean different things to different people. In VPP the +data-plane punts when a packet cannot be handled by any further +nodes. Punt differs from drop, in that VPP is giving other elements of +the system the opportunity to handle this packet. + +A popular meaning of punt is to send packets to the user/control-plane. +This is specific option of the more general case above, where VPP is +handing the packet to the control-plane for further prosessing. + +The Punt Infrastructe +--------------------- + +Exception packets are those that a given node cannot handle via normal +mecahnisms. +Punting of exception packets is handled via the VLIB 'punt +infra'. There are two types of nodes; sources and sinks. Sources +allocate a punt 'reason' from the infrastructre and load time. When +they encouter an exception during switch time it will tag the packet +with the reason and ship the packet of the the punt-dispatch node. A +sink will register with the punt infra at load time so it can receive +packets that were punted for that reason. If no sinks are registerd +for a given reason the packet is dropped, if multiple sinks register +the packets are replicated. + +This mechanism allows us to extend the system to deal with packets +that the source node would otherise drop. + + +Punting to the Control Plane +---------------------------- + +Active Punt +----------- + +The user/control-plane specifies that this is the type of packet I +want to receive and this is where I want it sent. + +Currently there exists 3 ways to describe how to match/classify the +packets to be punted: +... + 1) a matching UDP port + 2) a matching IP protocol (i.e. OSPF) + 3) a matching punt excpetion reason (see above) +... + +Depending on the type/classification of the packet to be punted, that +active punt will register itself into the VLIB graph to receive those +packets. For example, if it's a packet matching a UDP port then it +will hook into the UDP port dispatch functions; udp_register_port(). + +There exists only one sink for passive punt, a unix domain socket. But +more work is underway in this area. + +see the API in: vnet/ip/punt.api + + + +Passive Punt +------------ + +VPP input packet processing can be described as a series of +classifiers. For example, a sequence of input classifications could +be, is it IP? is it for-us? is it UDP? is it a known UDP-port? If at +some point in this pipline VPP has no further classifications to make, +then the packet can be punted, which means sent to ipX-punt node. This +is described as passive since the control-plane is thus receiving +every packet that VPP does not itself handle. +For passive punt the user can specify where the packets should be +sent and whether/how they should be policed/rate-limited. + +see the API in: vnet/ip/ip.api + diff --git a/src/vnet/ip/punt_api.c b/src/vnet/ip/punt_api.c index ecb461bcdda..95fff714b46 100644 --- a/src/vnet/ip/punt_api.c +++ b/src/vnet/ip/punt_api.c @@ -20,6 +20,7 @@ #include #include #include +#include #include @@ -43,107 +44,226 @@ _(SET_PUNT, set_punt) \ _(PUNT_SOCKET_REGISTER, punt_socket_register) \ _(PUNT_SOCKET_DEREGISTER, punt_socket_deregister) \ -_(PUNT_DUMP, punt_dump) \ -_(PUNT_SOCKET_DUMP, punt_socket_dump) +_(PUNT_SOCKET_DUMP, punt_socket_dump) \ +_(PUNT_REASON_DUMP, punt_reason_dump) + +static int +vl_api_punt_type_decode (vl_api_punt_type_t in, punt_type_t * out) +{ + in = clib_net_to_host_u32 (in); + + switch (in) + { +#define _(v, s) \ + case PUNT_API_TYPE_##v: \ + *out = PUNT_TYPE_##v; \ + return (0); + foreach_punt_type +#undef _ + } + + return (-1); +} + +static vl_api_punt_type_t +vl_api_punt_type_encode (punt_type_t in) +{ + vl_api_punt_type_t pt = PUNT_API_TYPE_L4; + + switch (in) + { +#define _(v, s) \ + case PUNT_TYPE_##v: \ + pt = PUNT_API_TYPE_##v; \ + break; + foreach_punt_type +#undef _ + } + + return (clib_host_to_net_u32 (pt)); +} + +static int +vl_api_punt_l4_decode (const vl_api_punt_l4_t * in, punt_l4_t * out) +{ + int rv; + + rv = ip_address_family_decode (in->af, &out->af); + rv += ip_proto_decode (in->protocol, &out->protocol); + out->port = clib_net_to_host_u16 (in->port); + + return (rv); +} + +static int +vl_api_punt_exception_decode (const vl_api_punt_exception_t * in, + punt_exception_t * out) +{ + int rv; + + out->reason = clib_net_to_host_u32 (in->id); + rv = vlib_punt_reason_validate (out->reason); + + return (rv); +} + +static int +vl_api_punt_decode (const vl_api_punt_t * in, punt_reg_t * out) +{ + int rv; + + rv = vl_api_punt_type_decode (in->type, &out->type); + + if (rv) + return (rv); + + switch (out->type) + { + case PUNT_TYPE_L4: + return (vl_api_punt_l4_decode (&in->punt.l4, &out->punt.l4)); + case PUNT_TYPE_EXCEPTION: + return (vl_api_punt_exception_decode (&in->punt.exception, + &out->punt.exception)); + } + + return (-1); +} + +static void +vl_api_punt_l4_encode (const punt_l4_t * in, vl_api_punt_l4_t * out) +{ + out->af = ip_address_family_encode (in->af); + out->protocol = ip_proto_encode (in->protocol); + out->port = clib_net_to_host_u16 (in->port); +} + +static void +vl_api_punt_exception_encode (const punt_exception_t * in, + vl_api_punt_exception_t * out) +{ + out->id = clib_host_to_net_u32 (in->reason); +} + +static void +vl_api_punt_encode (const punt_reg_t * in, vl_api_punt_t * out) +{ + out->type = vl_api_punt_type_encode (in->type); + + switch (in->type) + { + case PUNT_TYPE_L4: + vl_api_punt_l4_encode (&in->punt.l4, &out->punt.l4); + break; + case PUNT_TYPE_EXCEPTION: + vl_api_punt_exception_encode (&in->punt.exception, + &out->punt.exception); + break; + } +} static void vl_api_set_punt_t_handler (vl_api_set_punt_t * mp) { vl_api_set_punt_reply_t *rmp; vlib_main_t *vm = vlib_get_main (); - int rv = 0; clib_error_t *error; + punt_reg_t pr; + int rv; + + rv = vl_api_punt_decode (&mp->punt, &pr); - error = vnet_punt_add_del (vm, mp->punt.ipv, mp->punt.l4_protocol, - ntohs (mp->punt.l4_port), mp->is_add); + if (rv) + goto out; + + error = vnet_punt_add_del (vm, &pr, mp->is_add); if (error) { rv = -1; clib_error_report (error); } +out: REPLY_MACRO (VL_API_SET_PUNT_REPLY); } -static void -vl_api_punt_dump_t_handler (vl_api_punt_dump_t * mp) -{ - -} - static void vl_api_punt_socket_register_t_handler (vl_api_punt_socket_register_t * mp) { vl_api_punt_socket_register_reply_t *rmp; vlib_main_t *vm = vlib_get_main (); - int rv = 0; clib_error_t *error; - vl_api_registration_t *reg; + punt_reg_t pr; + int rv; + + rv = vl_api_punt_decode (&mp->punt, &pr); + + if (rv) + return; error = vnet_punt_socket_add (vm, ntohl (mp->header_version), - mp->punt.ipv, mp->punt.l4_protocol, - ntohs (mp->punt.l4_port), - (char *) mp->pathname); + &pr, (char *) mp->pathname); if (error) { rv = -1; clib_error_report (error); } - reg = vl_api_client_index_to_registration (mp->client_index); - if (!reg) - return; - - rmp = vl_msg_api_alloc (sizeof (*rmp)); - rmp->_vl_msg_id = htons (VL_API_PUNT_SOCKET_REGISTER_REPLY); - rmp->context = mp->context; - rmp->retval = htonl (rv); char *p = vnet_punt_get_server_pathname (); - /* Abstract pathnames start with \0 */ - memcpy ((char *) rmp->pathname, p, sizeof (rmp->pathname)); - vl_api_send_msg (reg, (u8 *) rmp); + + /* *INDENT-OFF* */ + REPLY_MACRO2 (VL_API_PUNT_SOCKET_REGISTER_REPLY, + ({ + memcpy ((char *) rmp->pathname, p, sizeof (rmp->pathname)); + })); + /* *INDENT-ON* */ } -void -send_punt_socket_details (vl_api_registration_t * reg, - u32 context, punt_socket_detail_t * p) +typedef struct punt_socket_send_ctx_t_ +{ + vl_api_registration_t *reg; + u32 context; +} punt_socket_send_ctx_t; + +static walk_rc_t +vl_api_punt_socket_send_details (const punt_client_t * pc, void *args) { + punt_socket_send_ctx_t *ctx = args; vl_api_punt_socket_details_t *mp; mp = vl_msg_api_alloc (sizeof (*mp)); if (!mp) - return; + return (WALK_STOP); clib_memset (mp, 0, sizeof (*mp)); mp->_vl_msg_id = ntohs (VL_API_PUNT_SOCKET_DETAILS); - mp->context = context; - mp->punt.ipv = p->ipv; - mp->punt.l4_protocol = p->l4_protocol; - mp->punt.l4_port = htons (p->l4_port); - memcpy (mp->pathname, p->pathname, sizeof (p->pathname)); + mp->context = ctx->context; + vl_api_punt_encode (&pc->reg, &mp->punt); + memcpy (mp->pathname, pc->caddr.sun_path, sizeof (pc->caddr.sun_path)); + + vl_api_send_msg (ctx->reg, (u8 *) mp); - vl_api_send_msg (reg, (u8 *) mp); + return (WALK_CONTINUE); } static void vl_api_punt_socket_dump_t_handler (vl_api_punt_socket_dump_t * mp) { vl_api_registration_t *reg; - punt_socket_detail_t *p, *ps; - int rv __attribute__ ((unused)) = 0; + punt_type_t pt; + + if (0 != vl_api_punt_type_decode (mp->type, &pt)) + return; reg = vl_api_client_index_to_registration (mp->client_index); if (!reg) return; - ps = punt_socket_entries (mp->is_ipv6); - /* *INDENT-OFF* */ - vec_foreach (p, ps) - { - send_punt_socket_details (reg, mp->context, p); - } - /* *INDENT-ON* */ - vec_free (ps); + punt_socket_send_ctx_t ctx = { + .reg = reg, + .context = mp->context, + }; + + punt_client_walk (pt, vl_api_punt_socket_send_details, &ctx); } static void @@ -151,27 +271,69 @@ vl_api_punt_socket_deregister_t_handler (vl_api_punt_socket_deregister_t * mp) { vl_api_punt_socket_deregister_reply_t *rmp; vlib_main_t *vm = vlib_get_main (); - int rv = 0; clib_error_t *error; - vl_api_registration_t *reg; + punt_reg_t pr; + int rv; + + rv = vl_api_punt_decode (&mp->punt, &pr); - error = vnet_punt_socket_del (vm, mp->punt.ipv, mp->punt.l4_protocol, - ntohs (mp->punt.l4_port)); + if (rv) + goto out; + + error = vnet_punt_socket_del (vm, &pr); if (error) { rv = -1; clib_error_report (error); } +out: + REPLY_MACRO (VL_API_PUNT_SOCKET_DEREGISTER_REPLY); +} + +typedef struct punt_reason_dump_walk_ctx_t_ +{ + vl_api_registration_t *reg; + u32 context; +} punt_reason_dump_walk_ctx_t; + +static int +punt_reason_dump_walk_cb (vlib_punt_reason_t id, const u8 * name, void *args) +{ + punt_reason_dump_walk_ctx_t *ctx = args; + vl_api_punt_reason_details_t *mp; + + mp = vl_msg_api_alloc (sizeof (*mp) + vec_len (name)); + if (!mp) + return (0); + + clib_memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_PUNT_REASON_DETAILS); + + mp->context = ctx->context; + mp->reason.id = clib_host_to_net_u32 (id); + vl_api_to_api_string (vec_len (name), (char *) name, &mp->reason.name); + + vl_api_send_msg (ctx->reg, (u8 *) mp); + + return (1); +} + +static void +vl_api_punt_reason_dump_t_handler (vl_api_punt_reason_dump_t * mp) +{ + vl_api_registration_t *reg; + reg = vl_api_client_index_to_registration (mp->client_index); if (!reg) return; - rmp = vl_msg_api_alloc (sizeof (*rmp)); - rmp->_vl_msg_id = htons (VL_API_PUNT_SOCKET_DEREGISTER_REPLY); - rmp->context = mp->context; - rmp->retval = htonl (rv); - vl_api_send_msg (reg, (u8 *) rmp); + punt_reason_dump_walk_ctx_t ctx = { + .reg = reg, + .context = mp->context, + }; + + punt_reason_walk (punt_reason_dump_walk_cb, &ctx); } #define vl_msg_name_crc_list diff --git a/src/vnet/ip/punt_node.c b/src/vnet/ip/punt_node.c new file mode 100644 index 00000000000..53c8199342b --- /dev/null +++ b/src/vnet/ip/punt_node.c @@ -0,0 +1,587 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file + * @brief Local TCP/IP stack punt infrastructure. + * + * Provides a set of VPP nodes together with the relevant APIs and CLI + * commands in order to adjust and dispatch packets from the VPP data plane + * to the local TCP/IP stack + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +typedef enum +{ +#define punt_error(n,s) PUNT_ERROR_##n, +#include +#undef punt_error + PUNT_N_ERROR, +} punt_error_t; + +#define foreach_punt_next \ + _ (PUNT4, "ip4-punt") \ + _ (PUNT6, "ip6-punt") + +typedef enum +{ +#define _(s,n) PUNT_NEXT_##s, + foreach_punt_next +#undef _ + PUNT_N_NEXT, +} punt_next_t; + +enum punt_socket_rx_next_e +{ + PUNT_SOCKET_RX_NEXT_INTERFACE_OUTPUT, + PUNT_SOCKET_RX_NEXT_IP4_LOOKUP, + PUNT_SOCKET_RX_NEXT_IP6_LOOKUP, + PUNT_SOCKET_RX_N_NEXT +}; + +#define punt_next_punt(is_ip4) (is_ip4 ? PUNT_NEXT_PUNT4 : PUNT_NEXT_PUNT6) + +/** @brief IPv4/IPv6 UDP punt node main loop. + + This is the main loop inline function for IPv4/IPv6 UDP punt + transition node. + + @param vm vlib_main_t corresponding to the current thread + @param node vlib_node_runtime_t + @param frame vlib_frame_t whose contents should be dispatched + @param is_ipv4 indicates if called for IPv4 or IPv6 node +*/ +always_inline uword +udp46_punt_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame, int is_ip4) +{ + u32 n_left_from, *from, *to_next; + word advance; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + /* udp[46]_lookup hands us the data payload, not the IP header */ + if (is_ip4) + advance = -(sizeof (ip4_header_t) + sizeof (udp_header_t)); + else + advance = -(sizeof (ip6_header_t) + sizeof (udp_header_t)); + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, punt_next_punt (is_ip4), to_next, + n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + vlib_buffer_advance (b0, advance); + b0->error = node->errors[PUNT_ERROR_UDP_PORT]; + } + + vlib_put_next_frame (vm, node, punt_next_punt (is_ip4), n_left_to_next); + } + + return from_frame->n_vectors; +} + +static char *punt_error_strings[] = { +#define punt_error(n,s) s, +#include "punt_error.def" +#undef punt_error +}; + +/** @brief IPv4 UDP punt node. + @node ip4-udp-punt + + This is the IPv4 UDP punt transition node. It is registered as a next + node for the "ip4-udp-lookup" handling UDP port(s) requested for punt. + The buffer's current data pointer is adjusted to the original packet + IPv4 header. All buffers are dispatched to "error-punt". + + @param vm vlib_main_t corresponding to the current thread + @param node vlib_node_runtime_t + @param frame vlib_frame_t whose contents should be dispatched + + @par Graph mechanics: next index usage + + @em Sets: + - vnet_buffer(b)->current_data + - vnet_buffer(b)->current_len + + Next Index: + - Dispatches the packet to the "error-punt" node +*/ +VLIB_NODE_FN (udp4_punt_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return udp46_punt_inline (vm, node, from_frame, 1 /* is_ip4 */ ); +} + +/** @brief IPv6 UDP punt node. + @node ip6-udp-punt + + This is the IPv6 UDP punt transition node. It is registered as a next + node for the "ip6-udp-lookup" handling UDP port(s) requested for punt. + The buffer's current data pointer is adjusted to the original packet + IPv6 header. All buffers are dispatched to "error-punt". + + @param vm vlib_main_t corresponding to the current thread + @param node vlib_node_runtime_t + @param frame vlib_frame_t whose contents should be dispatched + + @par Graph mechanics: next index usage + + @em Sets: + - vnet_buffer(b)->current_data + - vnet_buffer(b)->current_len + + Next Index: + - Dispatches the packet to the "error-punt" node +*/ +VLIB_NODE_FN (udp6_punt_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return udp46_punt_inline (vm, node, from_frame, 0 /* is_ip4 */ ); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (udp4_punt_node) = { + .name = "ip4-udp-punt", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + + .n_errors = PUNT_N_ERROR, + .error_strings = punt_error_strings, + + .n_next_nodes = PUNT_N_NEXT, + .next_nodes = { +#define _(s,n) [PUNT_NEXT_##s] = n, + foreach_punt_next +#undef _ + }, +}; + +VLIB_REGISTER_NODE (udp6_punt_node) = { + .name = "ip6-udp-punt", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + + .n_errors = PUNT_N_ERROR, + .error_strings = punt_error_strings, + + .n_next_nodes = PUNT_N_NEXT, + .next_nodes = { +#define _(s,n) [PUNT_NEXT_##s] = n, + foreach_punt_next +#undef _ + }, +}; +/* *INDENT-ON* */ + +typedef struct +{ + punt_client_t client; + u8 is_midchain; +} udp_punt_trace_t; + +static u8 * +format_udp_punt_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + udp_punt_trace_t *t = va_arg (*args, udp_punt_trace_t *); + u32 indent = format_get_indent (s); + s = format (s, "to: %s", t->client.caddr.sun_path); + if (t->is_midchain) + { + s = format (s, "\n%U(buffer is part of chain)", format_white_space, + indent); + } + return s; +} + +always_inline uword +punt_socket_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, + punt_type_t pt, ip_address_family_t af) +{ + u32 *buffers = vlib_frame_vector_args (frame); + uword n_packets = frame->n_vectors; + struct iovec *iovecs = 0; + punt_main_t *pm = &punt_main; + int i; + + u32 node_index = AF_IP4 == af ? udp4_punt_socket_node.index : + udp6_punt_socket_node.index; + + for (i = 0; i < n_packets; i++) + { + struct iovec *iov; + vlib_buffer_t *b; + uword l; + punt_packetdesc_t packetdesc; + punt_client_t *c; + + b = vlib_get_buffer (vm, buffers[i]); + + if (PUNT_TYPE_L4 == pt) + { + /* Reverse UDP Punt advance */ + udp_header_t *udp; + if (AF_IP4 == af) + { + vlib_buffer_advance (b, -(sizeof (ip4_header_t) + + sizeof (udp_header_t))); + ip4_header_t *ip = vlib_buffer_get_current (b); + udp = (udp_header_t *) (ip + 1); + } + else + { + vlib_buffer_advance (b, -(sizeof (ip6_header_t) + + sizeof (udp_header_t))); + ip6_header_t *ip = vlib_buffer_get_current (b); + udp = (udp_header_t *) (ip + 1); + } + + u16 port = clib_net_to_host_u16 (udp->dst_port); + + /* + * Find registerered client + * If no registered client, drop packet and count + */ + c = punt_client_l4_get (af, port); + } + else if (PUNT_TYPE_EXCEPTION == pt) + { + c = punt_client_exception_get (b->punt_reason); + } + else + c = NULL; + + if (PREDICT_FALSE (NULL == c)) + { + vlib_node_increment_counter (vm, node_index, + PUNT_ERROR_SOCKET_TX_ERROR, 1); + goto error; + } + + struct sockaddr_un *caddr = &c->caddr; + + if (PREDICT_FALSE (b->flags & VLIB_BUFFER_IS_TRACED)) + { + udp_punt_trace_t *t; + t = vlib_add_trace (vm, node, b, sizeof (t[0])); + clib_memcpy_fast (&t->client, c, sizeof (t->client)); + } + + /* Re-set iovecs if present. */ + if (iovecs) + _vec_len (iovecs) = 0; + + /* Add packet descriptor */ + packetdesc.sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX]; + packetdesc.action = 0; + vec_add2 (iovecs, iov, 1); + iov->iov_base = &packetdesc; + iov->iov_len = sizeof (packetdesc); + + /** VLIB buffer chain -> Unix iovec(s). */ + vlib_buffer_advance (b, -(sizeof (ethernet_header_t))); + vec_add2 (iovecs, iov, 1); + iov->iov_base = b->data + b->current_data; + iov->iov_len = l = b->current_length; + + if (PREDICT_FALSE (b->flags & VLIB_BUFFER_NEXT_PRESENT)) + { + do + { + b = vlib_get_buffer (vm, b->next_buffer); + if (PREDICT_FALSE (b->flags & VLIB_BUFFER_IS_TRACED)) + { + udp_punt_trace_t *t; + t = vlib_add_trace (vm, node, b, sizeof (t[0])); + clib_memcpy_fast (&t->client, c, sizeof (t->client)); + t->is_midchain = 1; + } + + vec_add2 (iovecs, iov, 1); + + iov->iov_base = b->data + b->current_data; + iov->iov_len = b->current_length; + l += b->current_length; + } + while (b->flags & VLIB_BUFFER_NEXT_PRESENT); + } + + struct msghdr msg = { + .msg_name = caddr, + .msg_namelen = sizeof (*caddr), + .msg_iov = iovecs, + .msg_iovlen = vec_len (iovecs), + }; + + if (sendmsg (pm->socket_fd, &msg, 0) < (ssize_t) l) + vlib_node_increment_counter (vm, node_index, + PUNT_ERROR_SOCKET_TX_ERROR, 1); + else + vlib_node_increment_counter (vm, node_index, PUNT_ERROR_SOCKET_TX, 1); + } + +error: + vlib_buffer_free (vm, buffers, n_packets); + + return n_packets; +} + +static uword +udp4_punt_socket (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * from_frame) +{ + return punt_socket_inline (vm, node, from_frame, PUNT_TYPE_L4, AF_IP4); +} + +static uword +udp6_punt_socket (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * from_frame) +{ + return punt_socket_inline (vm, node, from_frame, PUNT_TYPE_L4, AF_IP6); +} + +static uword +exception_punt_socket (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * from_frame) +{ + return punt_socket_inline (vm, node, from_frame, + PUNT_TYPE_EXCEPTION, AF_IP4); +} + + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (udp4_punt_socket_node) = { + .function = udp4_punt_socket, + .name = "ip4-udp-punt-socket", + .format_trace = format_udp_punt_trace, + .flags = VLIB_NODE_FLAG_IS_DROP, + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = PUNT_N_ERROR, + .error_strings = punt_error_strings, +}; +VLIB_REGISTER_NODE (udp6_punt_socket_node) = { + .function = udp6_punt_socket, + .name = "ip6-udp-punt-socket", + .format_trace = format_udp_punt_trace, + .flags = VLIB_NODE_FLAG_IS_DROP, + .vector_size = sizeof (u32), + .n_errors = PUNT_N_ERROR, + .error_strings = punt_error_strings, +}; +VLIB_REGISTER_NODE (exception_punt_socket_node) = { + .function = exception_punt_socket, + .name = "exception-punt-socket", + .format_trace = format_udp_punt_trace, + .flags = VLIB_NODE_FLAG_IS_DROP, + .vector_size = sizeof (u32), + .n_errors = PUNT_N_ERROR, + .error_strings = punt_error_strings, +}; +/* *INDENT-ON* */ + +typedef struct +{ + enum punt_action_e action; + u32 sw_if_index; +} punt_trace_t; + +static u8 * +format_punt_trace (u8 * s, va_list * va) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); + vnet_main_t *vnm = vnet_get_main (); + punt_trace_t *t = va_arg (*va, punt_trace_t *); + s = format (s, "%U Action: %d", format_vnet_sw_if_index_name, + vnm, t->sw_if_index, t->action); + return s; +} + +static uword +punt_socket_rx_fd (vlib_main_t * vm, vlib_node_runtime_t * node, u32 fd) +{ + const uword buffer_size = vlib_buffer_get_default_data_size (vm); + u32 n_trace = vlib_get_trace_count (vm, node); + u32 next = node->cached_next_index; + u32 n_left_to_next, next_index; + u32 *to_next; + u32 error = PUNT_ERROR_NONE; + vlib_get_next_frame (vm, node, next, to_next, n_left_to_next); + + /* $$$$ Only dealing with one buffer at the time for now */ + + u32 bi; + vlib_buffer_t *b; + punt_packetdesc_t packetdesc; + ssize_t size; + struct iovec io[2]; + + if (vlib_buffer_alloc (vm, &bi, 1) != 1) + { + error = PUNT_ERROR_NOBUFFER; + goto error; + } + + b = vlib_get_buffer (vm, bi); + io[0].iov_base = &packetdesc; + io[0].iov_len = sizeof (packetdesc); + io[1].iov_base = b->data; + io[1].iov_len = buffer_size; + + size = readv (fd, io, 2); + /* We need at least the packet descriptor plus a header */ + if (size <= (int) (sizeof (packetdesc) + sizeof (ip4_header_t))) + { + vlib_buffer_free (vm, &bi, 1); + error = PUNT_ERROR_READV; + goto error; + } + + b->flags = VNET_BUFFER_F_LOCALLY_ORIGINATED; + b->current_length = size - sizeof (packetdesc); + + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b); + + switch (packetdesc.action) + { + case PUNT_L2: + vnet_buffer (b)->sw_if_index[VLIB_TX] = packetdesc.sw_if_index; + next_index = PUNT_SOCKET_RX_NEXT_INTERFACE_OUTPUT; + break; + + case PUNT_IP4_ROUTED: + vnet_buffer (b)->sw_if_index[VLIB_RX] = packetdesc.sw_if_index; + vnet_buffer (b)->sw_if_index[VLIB_TX] = ~0; + next_index = PUNT_SOCKET_RX_NEXT_IP4_LOOKUP; + break; + + case PUNT_IP6_ROUTED: + vnet_buffer (b)->sw_if_index[VLIB_RX] = packetdesc.sw_if_index; + vnet_buffer (b)->sw_if_index[VLIB_TX] = ~0; + next_index = PUNT_SOCKET_RX_NEXT_IP6_LOOKUP; + break; + + default: + error = PUNT_ERROR_ACTION; + vlib_buffer_free (vm, &bi, 1); + goto error; + } + + if (PREDICT_FALSE (n_trace > 0)) + { + punt_trace_t *t; + vlib_trace_buffer (vm, node, next_index, b, 1 /* follow_chain */ ); + vlib_set_trace_count (vm, node, --n_trace); + t = vlib_add_trace (vm, node, b, sizeof (*t)); + t->sw_if_index = packetdesc.sw_if_index; + t->action = packetdesc.action; + } + + to_next[0] = bi; + to_next++; + n_left_to_next--; + + vlib_validate_buffer_enqueue_x1 (vm, node, next, to_next, n_left_to_next, + bi, next_index); + vlib_put_next_frame (vm, node, next, n_left_to_next); + return 1; + +error: + vlib_node_increment_counter (vm, punt_socket_rx_node.index, error, 1); + return 0; +} + +static uword +punt_socket_rx (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + punt_main_t *pm = &punt_main; + u32 total_count = 0; + int i; + + for (i = 0; i < vec_len (pm->ready_fds); i++) + { + total_count += punt_socket_rx_fd (vm, node, pm->ready_fds[i]); + vec_del1 (pm->ready_fds, i); + } + return total_count; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (punt_socket_rx_node) = +{ + .function = punt_socket_rx, + .name = "punt-socket-rx", + .type = VLIB_NODE_TYPE_INPUT, + .state = VLIB_NODE_STATE_INTERRUPT, + .vector_size = 1, + .n_errors = PUNT_N_ERROR, + .error_strings = punt_error_strings, + .n_next_nodes = PUNT_SOCKET_RX_N_NEXT, + .next_nodes = { + [PUNT_SOCKET_RX_NEXT_INTERFACE_OUTPUT] = "interface-output", + [PUNT_SOCKET_RX_NEXT_IP4_LOOKUP] = "ip4-lookup", + [PUNT_SOCKET_RX_NEXT_IP6_LOOKUP] = "ip6-lookup", + }, + .format_trace = format_punt_trace, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ipsec/ipsec_punt.h b/src/vnet/ipsec/ipsec_punt.h index 19943273154..4400ec9b4d2 100644 --- a/src/vnet/ipsec/ipsec_punt.h +++ b/src/vnet/ipsec/ipsec_punt.h @@ -18,11 +18,11 @@ #include #define foreach_ipsec_punt_reason \ - _(IP4_SPI_0, "ip4-spi-0") \ - _(IP6_SPI_0, "ip6-spi-0") \ - _(IP4_SPI_UDP_0, "ip4-spi-o-udp-0") \ - _(IP4_NO_SUCH_TUNNEL, "ip4-no-such-tunnel") \ - _(IP6_NO_SUCH_TUNNEL, "ip6-no-such-tunnel") + _(IP4_SPI_0, "ipsec4-spi-0") \ + _(IP6_SPI_0, "ipsec6-spi-0") \ + _(IP4_SPI_UDP_0, "ipsec4-spi-o-udp-0") \ + _(IP4_NO_SUCH_TUNNEL, "ipsec4-no-such-tunnel") \ + _(IP6_NO_SUCH_TUNNEL, "ipsec6-no-such-tunnel") typedef enum ipsec_punt_reason_t_ { -- cgit 1.2.3-korg