diff options
44 files changed, 9825 insertions, 196 deletions
diff --git a/netlink/Makefile.am b/netlink/Makefile.am index 85023f8..f760b86 100644 --- a/netlink/Makefile.am +++ b/netlink/Makefile.am @@ -13,7 +13,21 @@ AUTOMAKE_OPTIONS = foreign subdir-objects -AM_CFLAGS = -Wall -I@TOOLKIT_INCLUDE@ +AM_CFLAGS = -Wall -fstack-protector -fPIC -Werror -g -DFORTIFY_SOURCE=2 +if DEBUG +AM_CFLAGS += -O0 -DCLIB_DEBUG +vpp_build = vpp_debug-native +else +AM_CFLAGS += -O2 +vpp_build = vpp-native +endif + +if VPP_DIR_SET +vpp_install = @VPP_DIR@/build-root/install-$(vpp_build) +AM_CFLAGS += -I$(vpp_install)/vpp/include/ +else +AM_CFLAGS += -Wall +endif lib_LTLIBRARIES = librtnl.la @@ -33,8 +47,9 @@ testrtnl_plugin_la_SOURCES = test/test.c testrtnl_plugin_la_LDFLAGS = -module testrtnl_plugin_la_LIBADD = librtnl.la -if WITH_PLUGIN_TOOLKIT -install-data-hook: - mkdir /usr/lib/vpp_plugins || true - cp $(prefix)/lib/testnl_plugin.so.*.*.* /usr/lib/vpp_plugins +if VPP_DIR_SET +install: install-am + mkdir -p $(vpp_install)/vpp/lib64/vpp_plugins/ + cp $(prefix)/lib/testrtnl_plugin.so.*.*.* $(vpp_install)/vpp/lib64/vpp_plugins/testrtnl_plugin.so + cp $(prefix)/lib/librtnl.so $(vpp_install)/vpp/lib64/librtnl.so endif diff --git a/netlink/README.md b/netlink/README.md index 02f4c18..f26a3c7 100644 --- a/netlink/README.md +++ b/netlink/README.md @@ -4,15 +4,34 @@ This VPP package provides a rtnetlink-based library to be used by [VPP](http://f ## HowTo -The library and test plugin can be compiled by copying netlink.mk into VPP's build tree build-data/packages/ directory and using VPP make target netlink-install. +The library and test plugin can be compiled by running the following commands from the plugin directory: +```bash +libtoolize +aclocal +autoconf +automake --add-missing +./configure +make +sudo make install +``` + +If VPP is not installed, but rather built in a separate directory, you can use the VPP_DIR 'configure' argument. + +```bash +./configure VPP_DIR=<path/to/vpp/directory> +make +make install +``` + +You can also enable debug with the 'configure' --enable-debug option. + -Note that VPP build system can be configured to look in other directories for additional build-data/packages/ directories by adding those to build-root/build-config.mk SOURCE_PATH variable. ## Administrativa ### Current status -This library is still under active development, which means the defined headers and functions are subject to changes without notice. +This library is currently looking for some maintainers. ### Objective diff --git a/netlink/configure.ac b/netlink/configure.ac index 5fdd345..7e8380a 100644 --- a/netlink/configure.ac +++ b/netlink/configure.ac @@ -6,13 +6,15 @@ AM_PROG_AS AC_PROG_CC AM_PROG_CC_C_O -AC_ARG_WITH(plugin-toolkit, - AC_HELP_STRING([--with-plugin-toolkit], - [build using the vpp toolkit]), - [with_plugin_toolkit=${prefix}/include], - [with_plugin_toolkit=.]) +AC_ARG_VAR(VPP_DIR,[ vpp build directory ]) +AM_CONDITIONAL([VPP_DIR_SET], [test ! -z "$VPP_DIR"]) -AC_SUBST(TOOLKIT_INCLUDE,[${with_plugin_toolkit}]) -AM_CONDITIONAL(WITH_PLUGIN_TOOLKIT, test "$with_plugin_toolkit" != ".") +AC_ARG_ENABLE([debug], +[ --enable-debug Turn on debugging], + [if test x$enableval = xyes; then + AC_DEFINE(DEBUG, 1, [Define this to enable debug.]) + debug=true + fi], [debug=false]) +AM_CONDITIONAL([DEBUG], [test x$debug = xtrue]) AC_OUTPUT([Makefile]) diff --git a/netlink/librtnl/mapper.c b/netlink/librtnl/mapper.c index b82fae8..65cc13a 100644 --- a/netlink/librtnl/mapper.c +++ b/netlink/librtnl/mapper.c @@ -96,7 +96,8 @@ int mapper_add_del_route(mapper_ns_t *ns, ns_route_t *route, int del) fib_table_entry_path_add (ns->v6fib_index, &prefix, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, prefix.fp_proto, &nh, map->sw_if_index, ns->v6fib_index, - 0 /* weight */, MPLS_LABEL_INVALID, + 0 /* weight */, + (mpls_label_t *) MPLS_LABEL_INVALID, FIB_ROUTE_PATH_FLAG_NONE); #endif /* FIB_VERSION == 1 */ } else { @@ -126,7 +127,8 @@ int mapper_add_del_route(mapper_ns_t *ns, ns_route_t *route, int del) fib_table_entry_path_add (ns->v4fib_index, &prefix, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, prefix.fp_proto, &nh, map->sw_if_index, ns->v4fib_index, - 0 /* weight */, MPLS_LABEL_INVALID, + 0 /* weight */, + (mpls_label_t *) MPLS_LABEL_INVALID, FIB_ROUTE_PATH_FLAG_NONE); #endif /* FIB_VERSION == 1 */ } diff --git a/netlink/netlink.mk b/netlink/netlink.mk index 677a1b5..610205f 100644 --- a/netlink/netlink.mk +++ b/netlink/netlink.mk @@ -1,31 +1,5 @@ -netlink_configure_depend = \ - vppinfra-install \ - dpdk-install \ - svm-install \ - vlib-api-install \ - vlib-install \ - vnet-install \ - vpp-install \ - vpp-api-test-install +netlink_configure_depend = vpp-install -netlink_CPPFLAGS = $(call installed_includes_fn, \ - vppinfra \ - dpdk \ - openssl \ - svm \ - vlib \ - vlib-api \ - vnet \ - vpp \ - vpp-api-test) +netlink_CPPFLAGS = $(call installed_includes_fn, vpp) -netlink_LDFLAGS = $(call installed_libs_fn, \ - vppinfra \ - dpdk \ - openssl \ - svm \ - vlib \ - vlib-api \ - vnet \ - vpp \ - vpp-api-test) +netlink_LDFLAGS = $(call installed_libs_fn, vpp) diff --git a/netlink/test/test.c b/netlink/test/test.c index 96f49f8..6aaf1a3 100644 --- a/netlink/test/test.c +++ b/netlink/test/test.c @@ -18,6 +18,9 @@ #include <vnet/plugin/plugin.h> #include <librtnl/mapper.h> #include <vnet/ip/ip.h> +#include <vnet/fib/fib.h> +#include <vnet/fib/ip4_fib.h> +#include <vnet/fib/ip6_fib.h> u32 handles[10]; @@ -121,17 +124,8 @@ mapper_ns_add_command_fn (vlib_main_t * vm, if (!strcmp(nsname, "default")) nsname[0] = 0; -#ifdef find_ip4_fib_by_table_index_or_id - u32 fib4 = find_ip4_fib_by_table_index_or_id(&ip4_main, table_id, 0) - ip4_main.fibs; -#else - u32 fib4 = fib_table_id_find_fib_index (FIB_PROTOCOL_IP4, table_id); -#endif - -#ifdef find_ip6_fib_by_table_index_or_id - u32 fib6 = find_ip6_fib_by_table_index_or_id(&ip6_main, table_id, 0) - ip6_main.fibs; -#else - u32 fib6 = fib_table_id_find_fib_index (FIB_PROTOCOL_IP6, table_id); -#endif + u32 fib4 = ip4_fib_index_from_table_id(table_id); + u32 fib6 = ip6_fib_index_from_table_id(table_id); if (mapper_add_ns(nsname, fib4, fib6, &mapper_indexes[index])) return clib_error_return(0, "Could not add ns %s", nsname); @@ -200,11 +194,10 @@ VLIB_CLI_COMMAND (mapper_iface_command, static) = { .function = mapper_iface_command_fn, }; -clib_error_t * -vlib_plugin_register (vlib_main_t * vm, vnet_plugin_handoff_t * h, - int from_early_init) -{ - clib_warning("Loaded module"); - return 0; -} +/* *INDENT-OFF* */ +VLIB_PLUGIN_REGISTER () = { + //.version = VPP_BUILD_VER, FIXME + .description = "netlink", +}; +/* *INDENT-ON* */ diff --git a/router/README.md b/router/README.md index b4e8ce0..3401c76 100644 --- a/router/README.md +++ b/router/README.md @@ -34,7 +34,7 @@ Now build everything and create a link to the plugin in vpp's plugin path. ``` $ cd build-root $ ./bootstrap.sh -$ make V=0 PLATFORM=vpp TAG=vpp_debug router-install +$ make V=0 PLATFORM=vpp TAG=vpp_debug netlink-install router-install $ ln -sf /git/vpp/build-root/install-vpp_debug-native/router/lib64/router.so.0.0.0 \ /usr/lib/vpp_plugins/router.so ``` @@ -43,7 +43,7 @@ Once VPP is running and the plugin is loaded, data plane interfaces can be tapped. ``` -$ vppctl tap inject arp,icmp4 from TenGigabitEthernet2/0/0 as vpp0 +$ vppctl enable tap-inject ``` The host operating system should see a tap named 'vpp0' with the same hardware @@ -58,7 +58,7 @@ plane's default fib. Currently the router plugin handles ARP, locally destined ICMPv4 and OSPF traffic. It supports the classifier directing packets from an ip4-table to -the 'tap-inject-classified' node (for handling multicast OSPF and IGMP). +the 'tap-inject-neighbor' node (for handling multicast OSPF and IGMP). ### Objective diff --git a/router/router.mk b/router/router.mk index f9bb917..9ffbd3a 100644 --- a/router/router.mk +++ b/router/router.mk @@ -1,34 +1,11 @@ -router_configure_depend = \ - vppinfra-install \ - dpdk-install \ - svm-install \ - vlib-api-install \ - vlib-install \ - vnet-install \ - vpp-install \ - netlink-install \ - vpp-api-test-install ++router_configure_depend = \ ++ vpp-install \ ++ netlink-install router_CPPFLAGS = $(call installed_includes_fn, \ - vppinfra \ - dpdk \ - openssl \ - svm \ - vlib \ - vlib-api \ - vnet \ vpp \ - netlink \ - vpp-api-test) + netlink) router_LDFLAGS = $(call installed_libs_fn, \ - vppinfra \ - dpdk \ - openssl \ - svm \ - vlib \ - vlib-api \ - vnet \ vpp \ - netlink \ - vpp-api-test) + netlink) diff --git a/router/router/tap_inject.c b/router/router/tap_inject.c index 8d6f5af..75d6df2 100644 --- a/router/router/tap_inject.c +++ b/router/router/tap_inject.c @@ -16,7 +16,17 @@ #include "tap_inject.h" +#include <vnet/ip/ip.h> +#include <vnet/ip/lookup.h> +#ifdef ip6_add_del_route_next_hop +#define FIB_VERSION 1 +#else +#include <vnet/fib/fib.h> +#define FIB_VERSION 2 +#endif + static tap_inject_main_t tap_inject_main; +extern dpo_type_t tap_inject_dpo_type; tap_inject_main_t * tap_inject_get_main (void) @@ -84,12 +94,12 @@ tap_inject_lookup_sw_if_index_from_tap_if_index (u32 tap_if_index) return sw_if_index ? *(u32 *)sw_if_index : ~0; } - -clib_error_t * -vlib_plugin_register (vlib_main_t * vm, vnet_plugin_handoff_t * h, int f) -{ - return 0; -} +/* *INDENT-OFF* */ +VLIB_PLUGIN_REGISTER () = { + // .version = VPP_BUILD_VER, FIXME + .description = "router", +}; +/* *INDENT-ON* */ static void @@ -135,6 +145,7 @@ tap_inject_enable (void) ip6_register_protocol (IP_PROTOCOL_TCP, im->tx_node_index); ip6_register_protocol (IP_PROTOCOL_UDP, im->tx_node_index); +#if FIB_VERSION == 1 /* Add IPv4 multicast route. */ { ip4_add_del_route_args_t a; @@ -163,6 +174,30 @@ tap_inject_enable (void) ip4_add_del_route (&ip4_main, &a); } +#else + { + dpo_proto_t proto = 0; + dpo_id_t dpo = DPO_INVALID; + fib_prefix_t pfx = {}; + + pfx.fp_addr.ip4.as_u32 = 0x000000E0; /* 224.0.0.0 */ + pfx.fp_len = 24; + pfx.fp_proto = FIB_PROTOCOL_IP4; + proto = DPO_PROTO_IP4; + + vlib_node_add_next (vm, ip4_lookup_node.index, im->tx_node_index); + + dpo_set(&dpo, tap_inject_dpo_type, proto, ~0); + + fib_table_entry_special_dpo_add(0, + &pfx, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_EXCLUSIVE, + &dpo); + + dpo_reset(&dpo); + } +#endif /* FIB_VERSION == 1 */ im->flags |= TAP_INJECT_F_ENABLED; diff --git a/router/router/tap_inject_netlink.c b/router/router/tap_inject_netlink.c index a30e262..19d5d04 100644 --- a/router/router/tap_inject_netlink.c +++ b/router/router/tap_inject_netlink.c @@ -19,7 +19,16 @@ #include <librtnl/netns.h> #include <vlibmemory/api.h> #include <vnet/ethernet/arp_packet.h> +#include <vnet/ip/ip6_neighbor.h> +#include <vnet/ip/ip.h> +#include <vnet/ip/lookup.h> +#ifdef ip6_add_del_route_next_hop +#define FIB_VERSION 1 +#else +#include <vnet/fib/fib.h> +#define FIB_VERSION 2 +#endif static void add_del_addr (ns_addr_t * a, int is_del) @@ -109,16 +118,41 @@ add_del_neigh (ns_neigh_t * n, int is_del) clib_memcpy (&a.ethernet, n->lladdr, ETHER_ADDR_LEN); clib_memcpy (&a.ip4, n->dst, sizeof (a.ip4)); + if (n->nd.ndm_state & NUD_REACHABLE) + { +#if FIB_VERSION == 1 vnet_arp_set_ip4_over_ethernet (vnet_main, sw_if_index, ~0, &a, 0); +#else + vnet_arp_set_ip4_over_ethernet (vnet_main, sw_if_index, + &a, 0 /* static */ , + 0 /* no fib entry */); + +#endif /* FIB_VERSION == 1 */ + } else if (n->nd.ndm_state & NUD_FAILED) + { +#if FIB_VERSION == 1 vnet_arp_unset_ip4_over_ethernet (vnet_main, sw_if_index, ~0, &a); +#else + vnet_arp_unset_ip4_over_ethernet (vnet_main, sw_if_index, &a); +#endif /* FIB_VERSION == 1 */ + } } else if (n->nd.ndm_family == AF_INET6) { if (n->nd.ndm_state & NUD_REACHABLE) + { +#if FIB_VERSION == 1 vnet_set_ip6_ethernet_neighbor (vm, sw_if_index, (ip6_address_t *) n->dst, n->lladdr, ETHER_ADDR_LEN, 0); +#else + vnet_set_ip6_ethernet_neighbor (vm, sw_if_index, + (ip6_address_t *) n->dst, n->lladdr, ETHER_ADDR_LEN, + 0 /* static */, + 0 /* no fib entry */); +#endif /* FIB_VERSION == 1 */ + } else vnet_unset_ip6_ethernet_neighbor (vm, sw_if_index, (ip6_address_t *) n->dst, n->lladdr, ETHER_ADDR_LEN); @@ -140,17 +174,55 @@ add_del_route (ns_route_t * r, int is_del) if (r->rtm.rtm_family == AF_INET) { +#if FIB_VERSION == 1 ip4_add_del_route_next_hop (&ip4_main, is_del ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD, (ip4_address_t *) r->dst, r->rtm.rtm_dst_len, (ip4_address_t *) r->gateway, sw_if_index, 0, ~0, 0); +#else + fib_prefix_t prefix; + ip46_address_t nh; + + memset (&prefix, 0, sizeof (prefix)); + prefix.fp_len = r->rtm.rtm_dst_len; + prefix.fp_proto = FIB_PROTOCOL_IP4; + clib_memcpy (&prefix.fp_addr.ip4, r->dst, sizeof (prefix.fp_addr.ip4)); + + memset (&nh, 0, sizeof (nh)); + clib_memcpy (&nh.ip4, r->gateway, sizeof (nh.ip4)); + + fib_table_entry_path_add (0, &prefix, FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, prefix.fp_proto, + &nh, sw_if_index, 0, + 0 /* weight */, NULL, + FIB_ROUTE_PATH_FLAG_NONE); +#endif /* FIB_VERSION == 1 */ } else if (r->rtm.rtm_family == AF_INET6) { +#if FIB_VERSION == 1 ip6_add_del_route_next_hop (&ip6_main, is_del ? IP6_ROUTE_FLAG_DEL : IP6_ROUTE_FLAG_ADD, (ip6_address_t *) r->dst, r->rtm.rtm_dst_len, (ip6_address_t *) r->gateway, sw_if_index, 0, ~0, 0); +#else + fib_prefix_t prefix; + ip46_address_t nh; + + memset (&prefix, 0, sizeof (prefix)); + prefix.fp_len = r->rtm.rtm_dst_len; + prefix.fp_proto = FIB_PROTOCOL_IP6; + clib_memcpy (&prefix.fp_addr.ip6, r->dst, sizeof (prefix.fp_addr.ip6)); + + memset (&nh, 0, sizeof (nh)); + clib_memcpy (&nh.ip6, r->gateway, sizeof (nh.ip6)); + + fib_table_entry_path_add (0, &prefix, FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, prefix.fp_proto, + &nh, sw_if_index, 0, + 0 /* weight */, NULL, + FIB_ROUTE_PATH_FLAG_NONE); +#endif /* FIB_VERSION == 1 */ } } diff --git a/router/router/tap_inject_node.c b/router/router/tap_inject_node.c index fe108dc..32c1ab1 100644 --- a/router/router/tap_inject_node.c +++ b/router/router/tap_inject_node.c @@ -28,6 +28,10 @@ enum { NEXT_NEIGHBOR_ICMP6, }; +/** + * @brief Dynamically added tap_inject DPO type + */ +dpo_type_t tap_inject_dpo_type; static inline void tap_inject_tap_send_buffer (int fd, vlib_buffer_t * b) @@ -233,7 +237,7 @@ tap_rx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f, int fd) b = vlib_get_buffer (vm, bi[0]); - vnet_buffer (b)->sw_if_index[VLIB_RX] = ~0; + vnet_buffer (b)->sw_if_index[VLIB_RX] = sw_if_index; vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index; n_bytes_left = n_bytes - VLIB_BUFFER_DATA_SIZE; @@ -260,8 +264,6 @@ tap_rx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f, int fd) _vec_len (im->rx_buffers) -= i; - vlib_buffer_chain_validate (vm, vlib_get_buffer (vm, bi[0])); - /* Get the packet to the output node. */ { vnet_hw_interface_t * hw; @@ -312,6 +314,42 @@ VLIB_REGISTER_NODE (tap_inject_rx_node) = { .vector_size = sizeof (u32), }; +/** + * @brief no-op lock function. + */ +static void +tap_inject_dpo_lock (dpo_id_t * dpo) +{ +} + +/** + * @brief no-op unlock function. + */ +static void +tap_inject_dpo_unlock (dpo_id_t * dpo) +{ +} + +u8 * +format_tap_inject_dpo (u8 * s, va_list * args) +{ + return (format (s, "tap-inject:[%d]", 0)); +} + +const static dpo_vft_t tap_inject_vft = { + .dv_lock = tap_inject_dpo_lock, + .dv_unlock = tap_inject_dpo_unlock, + .dv_format = format_tap_inject_dpo, +}; + +const static char *const tap_inject_tx_nodes[] = { + "tap-inject-tx", + NULL, +}; + +const static char *const *const tap_inject_nodes[DPO_PROTO_NUM] = { + [DPO_PROTO_IP6] = tap_inject_tx_nodes, +}; static clib_error_t * tap_inject_init (vlib_main_t * vm) @@ -322,6 +360,8 @@ tap_inject_init (vlib_main_t * vm) im->tx_node_index = tap_inject_tx_node.index; im->neighbor_node_index = tap_inject_neighbor_node.index; + tap_inject_dpo_type = dpo_register_new_type (&tap_inject_vft, tap_inject_nodes); + vec_alloc (im->rx_buffers, NUM_BUFFERS_TO_ALLOC); vec_reset_length (im->rx_buffers); diff --git a/vcl-ldpreload/LICENSE b/vcl-ldpreload/LICENSE new file mode 100644 index 0000000..7a4a3ea --- /dev/null +++ b/vcl-ldpreload/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License.
\ No newline at end of file diff --git a/vcl-ldpreload/MAINTAINERS b/vcl-ldpreload/MAINTAINERS new file mode 100644 index 0000000..68c6204 --- /dev/null +++ b/vcl-ldpreload/MAINTAINERS @@ -0,0 +1,30 @@ +Descriptions of section entries: + + M: Maintainer Full name and E-mail address: Full Name <address@domain> + One maintainer per line. Multiple M: lines acceptable. + F: Files and directories with wildcard patterns. + A trailing slash includes all files and subdirectory files. + F: drivers/net/ all files in and below drivers/net + F: drivers/net/* all files in drivers/net, but not below + One pattern per line. Multiple F: lines acceptable. + C: Single line comment related to current section. + + ----------------------------------- + +Build System +M: Shrinivasan Ganapathy <shganapa@cisco.com> +F: Makefile +F: src/*.ac +F: src/*.am +F: src/*.mk +F: src/m4/ + +VCL-LDPRELOAD Library +M: Shrinivasan Ganapathy <shganapa@cisco.com> +F: src/libvcl-ldpreload/ + + +THE REST +C: Contact vppsb-dev Mailing List <vppsb-dev@lists.fd.io> +F: * +F: */ diff --git a/vcl-ldpreload/README.md b/vcl-ldpreload/README.md new file mode 100644 index 0000000..fd413a3 --- /dev/null +++ b/vcl-ldpreload/README.md @@ -0,0 +1,88 @@ +# vcl-ldpreload a LD_PRELOAD library that uses the VPP Communications Library (VCL). + +User can LD_PRELOAD any application that uses POSIX socket API. +This library internally uses libvppcom.so library from VPP project. + + +## HowTo + +If VPP is not installed, but rather built in a separate directory, you can use the VPP_DIR 'configure' argument. +```bash +# 1. Edit vppsb/vcl-ldpreload/env.sh +## set base for directory above vpp and vppsb source + +source ./env.sh + +# 2. Change to VPP source directory and build + +cd $VPP_DIR + +# Modify uri.am to enable socket_test program + +perl -pi -e 's/noinst_PROGRAMS/bin_PROGRAMS/g' $VPP_DIR/src/uri.am + +# Build VPP release + +make install-dep wipe-release bootstrap dpdk-install-dev build-release + +# 2. Build LD_PRELOAD library against VPP build above +## This does not install the LD_PRELOAD library in your system. +## Instead it will be referenced from the build directory set in VCL_LDPRELOAD_LIB + +cd $LDP_DIR/vcl-ldpreload/src +autoreconf -i -f +./configure VPP_DIR=$VPP_DIR +make +```bash + + +# 3. Running the demo +## Run test script without parameters to see help menu: + +cd $VPP_DIR/test/scripts +./socket_test.sh + +# 4. Docker iPerf examples. +## These launch xterms. To quit, close xterms and run following docker kill cmd (WARNING: This will kill all docker containers!) 'docker kill $(docker ps -q)' + + +## Docker iPerf using default Linux Bridge + +./socket_test.sh -bi docker-kernel + +## Docker iPerf using VPP +./socket_test.sh -bi docker-preload + + + + +The library can be compiled by running the following commands from the vppsb/vcl-ldpreload/src directory: +If VPP is installed, then +```bash +libtoolize +aclocal +autoconf +automake --add-missing +./configure +make +sudo make install +``` + +## Administrative + +### Current status + +This library is currently under active enhancement. + +### Objective + +This effort intends to be a building block for a better integration of POSIX socket applications with VPP. +It will evolve depending on the needs of the VPP community while focusing on +LD_PRELOADing applications that use POSIX socket APIs. + +### Main contributors + +Shrinivasan Ganapathy - LF-ID:shganapa + + + diff --git a/vcl-ldpreload/env.sh b/vcl-ldpreload/env.sh new file mode 100644 index 0000000..a7d3110 --- /dev/null +++ b/vcl-ldpreload/env.sh @@ -0,0 +1,5 @@ +export VCL_BASE=/home/alagalah/git/work/fdio +export VPP_DIR=$VCL_BASE/vpp +export LDP_DIR=$VCL_BASE/vppsb +export WS_ROOT=$VCL_BASE/vpp +export VCL_LDPRELOAD_LIB_DIR=$VCL_BASE/vppsb/vcl-ldpreload/src/.libs diff --git a/vcl-ldpreload/src/.gitignore b/vcl-ldpreload/src/.gitignore new file mode 100644 index 0000000..9578123 --- /dev/null +++ b/vcl-ldpreload/src/.gitignore @@ -0,0 +1,17 @@ +*.m4 +autom4te.cache +compile +config.* +configure +depcomp +install-sh +ltmain.sh +missing +Makefile.in +Makefile +*.o +*.l[oa]* +*.Plo +*.dirstamp +*.so* +libtool diff --git a/vcl-ldpreload/src/Makefile.am b/vcl-ldpreload/src/Makefile.am new file mode 100644 index 0000000..a131769 --- /dev/null +++ b/vcl-ldpreload/src/Makefile.am @@ -0,0 +1,62 @@ +# Copyright (c) 2016 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +############################################################################### +# Global Defines +############################################################################### + +AUTOMAKE_OPTIONS = foreign subdir-objects +ACLOCAL_AMFLAGS = -I m4 +AM_LIBTOOLFLAGS = --quiet + +AM_CFLAGS = -Wall -fstack-protector -fPIC -Werror -g -DFORTIFY_SOURCE=2 +AM_LDFLAGS = -shared + +if VPP_DEBUG +AM_CFLAGS += -O0 -DCLIB_DEBUG +vpp_build = vpp_debug-native +else +AM_CFLAGS += -O2 +vpp_build = vpp-native +endif + +if VCL_LDPRELOAD_DEBUG +AM_CFLAGS += -ggdb '-DVCOM_DEBUG=1' +else +AM_CFLAGS += -Wall '-DVCOM_DEBUG=0' +endif + +if VPP_DIR_SET +vpp_install_dir = @VPP_DIR@/build-root/install-$(vpp_build)/vpp +AM_CFLAGS += -I$(vpp_install_dir)/include/ +AM_LDFLAGS += -L$(vpp_install_dir)/lib64/ +endif + + +AM_LDFLAGS += -lvppcom -lvppinfra + +SUBDIRS = . +noinst_HEADERS = +dist_bin_SCRIPTS = +lib_LTLIBRARIES = +BUILT_SOURCES = +CLEANFILES = + + +############################################################################### +# Components +############################################################################### + +include libvcl-ldpreload.am + + diff --git a/vcl-ldpreload/src/configure.ac b/vcl-ldpreload/src/configure.ac new file mode 100644 index 0000000..cee7c95 --- /dev/null +++ b/vcl-ldpreload/src/configure.ac @@ -0,0 +1,151 @@ +AC_INIT([vcl-ldpreload], [1.0], [vpp-dev@fd.io]) +LT_INIT +AC_CONFIG_AUX_DIR([.]) +AM_INIT_AUTOMAKE([subdir-objects]) +AM_SILENT_RULES([yes]) +AC_CONFIG_FILES([Makefile]) +AC_CONFIG_MACRO_DIR([m4]) + +AC_PROG_CC +AM_PROG_AS +AM_PROG_LIBTOOL +AC_PROG_YACC +AM_PATH_PYTHON + +AC_ARG_VAR(VPP_DIR,[ vpp build directory ]) +AM_CONDITIONAL([VPP_DIR_SET], [test ! -z "$VPP_DIR"]) + +############################################################################### +# Macros +############################################################################### + +AC_DEFUN([ENABLE_ARG], +[ + AC_ARG_ENABLE($1, + AC_HELP_STRING(patsubst([--enable-$1],[_],[-]), $2), + [enable_$1=yes n_enable_$1=1], + [enable_$1=no n_enable_$1=0]) + AM_CONDITIONAL(m4_toupper(ENABLE_$1), test "$enable_$1" = "yes") + m4_append([list_of_enabled], [$1], [, ]) +]) + +AC_DEFUN([DISABLE_ARG], +[ + AC_ARG_ENABLE($1, + AC_HELP_STRING(patsubst([--disable-$1],[_],[-]), $2), + [enable_$1=no n_enable_$1=0], + [enable_$1=yes n_enable_$1=1]) + AM_CONDITIONAL(m4_toupper(ENABLE_$1), test "$enable_$1" = "yes") + m4_append([list_of_enabled], [$1], [, ]) +]) + +AC_DEFUN([WITH_ARG], +[ + AC_ARG_WITH($1, + AC_HELP_STRING(patsubst([--with-$1],[_],[-]), $2), + [with_$1=yes n_with_$1=1], + [with_$1=no n_with_$1=0]) + AM_CONDITIONAL(m4_toupper(WITH_$1), test "$with_$1" = "yes") + m4_append([list_of_with], [$1], [, ]) +]) + +AC_DEFUN([WITHOUT_ARG], +[ + AC_ARG_WITH($1, + AC_HELP_STRING(patsubst([--without-$1],[_],[-]), $2), + [with_$1=no n_with_$1=0], + [with_$1=yes n_with_$1=1]) + AM_CONDITIONAL(m4_toupper(WITH_$1), test "$with_$1" = "yes") + m4_append([list_of_with], [$1], [, ]) +]) + + +AC_DEFUN([PRINT_VAL], [ AC_MSG_RESULT(AC_HELP_STRING($1,$2)) ]) + + +############################################################################### +# configure arguments +############################################################################### + +# --enable-X + +AC_ARG_ENABLE([vpp-debug], +[ --enable-vpp-debug Use vpp debug native build libraries], + [if test x$enableval = xyes; then + AC_DEFINE(VPP_DEBUG, 1, [Define this to use vpp debug native build libraries.]) + vpp_debug=true + fi], [vpp_debug=false]) +AM_CONDITIONAL([VPP_DEBUG], [test x$vpp_debug = xtrue]) + +AC_ARG_ENABLE([vcl-ldpreload-debug], +[ --enable-vcl-ldpreload-debug Turn on vcl-ldpreload debugging], + [if test x$enableval = xyes; then + AC_DEFINE(VCL_LDPRELOAD_DEBUG, 1, [Define this to enable vcl-ldpreload debug.]) + vcl_ldpreload_debug=true + fi], [vcl_ldpreload_debug=false]) +AM_CONDITIONAL([VCL_LDPRELOAD_DEBUG], [test x$vcl_ldpreload_debug = xtrue]) + +# --disable-X + +# --with-X + +# --without-X + +AC_ARG_WITH(unix, + AC_HELP_STRING([--with-unix],[Compile unix version of clib]), + [], + [case $host_os in + darwin* | linux*) with_unix=yes;; + *) with_unix=no;; + esac]) + +AM_CONDITIONAL(WITH_UNIX, test "$with_unix" = "yes") + + +############################################################################### +# Substitutions and defines +############################################################################### + + +# Silence following noise: +# ar: `u' modifier ignored since `D' is the default (see `U') +AR_FLAGS=cr +AC_SUBST(AR_FLAGS) + + +############################################################################### +# Dependency checks +############################################################################### + +############################################################################### +# Output +############################################################################### + +AC_OUTPUT + +AC_MSG_RESULT([==============================================================================]) +PRINT_VAL([version], $PACKAGE $VERSION) +PRINT_VAL([prefix], ${prefix}) +PRINT_VAL([exec_prefix], ${exec_prefix}) +PRINT_VAL([libdir], ${libdir}) +PRINT_VAL([includedir], ${includedir}) +PRINT_VAL([CFLAGS], ${CFLAGS}) +PRINT_VAL([CPPFLAGS], ${CPPFLAGS}) +PRINT_VAL([LDFLAGS], ${LDFLAGS}) + +AC_MSG_RESULT([]) +AC_MSG_RESULT([with:]) +m4_foreach([x], m4_dquote(list_of_with), [ + AC_MSG_RESULT(AC_HELP_STRING(x, m4_join([], [${with_], x, [}]))) +]) + +AC_MSG_RESULT([]) +AC_MSG_RESULT([enabled:]) +m4_foreach([x], m4_dquote(list_of_enabled), [ + AC_MSG_RESULT(AC_HELP_STRING(x, m4_join([], [${enable_], x, [}]))) +]) + +AC_MSG_RESULT([]) +AC_MSG_RESULT([==============================================================================]) + + diff --git a/vcl-ldpreload/src/libvcl-ldpreload.am b/vcl-ldpreload/src/libvcl-ldpreload.am new file mode 100644 index 0000000..0d2fcb5 --- /dev/null +++ b/vcl-ldpreload/src/libvcl-ldpreload.am @@ -0,0 +1,31 @@ +# Copyright (c) 2016 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +lib_LTLIBRARIES += libvcl_ldpreload.la + +libvcl_ldpreload_la_SOURCES = +nobase_include_HEADERS = + +libvcl_ldpreload_la_LIBADD = -lvppinfra -lvppcom -lpthread + +libvcl_ldpreload_la_SOURCES += \ + libvcl-ldpreload/vcom_socket_wrapper.c \ + libvcl-ldpreload/vcom.c \ + libvcl-ldpreload/vcom_socket.c + +nobase_include_HEADERS += \ + libvcl-ldpreload/vcom_socket_wrapper.h \ + libvcl-ldpreload/vcom_glibc_socket.h \ + libvcl-ldpreload/vcom.h \ + libvcl-ldpreload/vcom_socket.h + diff --git a/vcl-ldpreload/src/libvcl-ldpreload/TODO b/vcl-ldpreload/src/libvcl-ldpreload/TODO new file mode 100644 index 0000000..63ec21e --- /dev/null +++ b/vcl-ldpreload/src/libvcl-ldpreload/TODO @@ -0,0 +1,7 @@ +VCL-LDPRELOAD Library project plan. + +1. [shganapa] LD_PRELOAD iperf3 application. + + +NOTES: +1. Not all POSIX socket API's are LD_PRELOADED. diff --git a/vcl-ldpreload/src/libvcl-ldpreload/vcom.c b/vcl-ldpreload/src/libvcl-ldpreload/vcom.c new file mode 100644 index 0000000..3160d1d --- /dev/null +++ b/vcl-ldpreload/src/libvcl-ldpreload/vcom.c @@ -0,0 +1,3002 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <unistd.h> +#include <stdio.h> +#include <signal.h> +#include <dlfcn.h> +#include <pthread.h> +#include <time.h> +#include <stdarg.h> + +#include <libvcl-ldpreload/vcom_socket_wrapper.h> +#include <libvcl-ldpreload/vcom.h> +#include <sys/time.h> + +#include <uri/vppcom.h> +#include <libvcl-ldpreload/vcom_socket.h> + +/* GCC have printf type attribute check. */ +#ifdef HAVE_FUNCTION_ATTRIBUTE_FORMAT +#define PRINTF_ATTRIBUTE(a,b) \ + __attribute__ ((__format__ (__printf__, a, b))) +#else +#define PRINTF_ATTRIBUTE(a,b) +#endif /* HAVE_FUNCTION_ATTRIBUTE_FORMAT */ + +#define HAVE_CONSTRUCTOR_ATTRIBUTE +#ifdef HAVE_CONSTRUCTOR_ATTRIBUTE +#define CONSTRUCTOR_ATTRIBUTE \ + __attribute__ ((constructor)) +#else +#define CONSTRUCTOR_ATTRIBUTE +#endif /* HAVE_CONSTRUCTOR_ATTRIBUTE */ + +#define HAVE_DESTRUCTOR_ATTRIBUTE +#ifdef HAVE_DESTRUCTOR_ATTRIBUTE +#define DESTRUCTOR_ATTRIBUTE \ + __attribute__ ((destructor)) +#else +#define DESTRUCTOR_ATTRIBUTE +#endif + +#define HAVE_ADDRESS_SANITIZER_ATTRIBUTE +#ifdef HAVE_ADDRESS_SANITIZER_ATTRIBUTE +#define DO_NOT_SANITIZE_ADDRESS_ATTRIBUTE \ + __attribute__((no_sanitize_address)) +#else +#define DO_NOT_SANITIZE_ADDRESS_ATTRIBUTE +#endif + +#define VCOM_SOCKET_FD_MAX 0x10000 + +static char vcom_app_name[MAX_VCOM_APP_NAME]; + +/* + * RETURN: 0 on success or -1 on error. + * */ +int +vcom_set_app_name (char *__app_name) +{ + return snprintf (vcom_app_name, MAX_VCOM_APP_NAME, "vcom-%s-%d", + __app_name, getpid ()) < 0 ? -1 : 0; +} + +static char * +vcom_get_app_name () +{ + if (vcom_app_name[0] == '\0') + { + snprintf (vcom_app_name, MAX_VCOM_APP_NAME, "vcom-app-%d", getpid ()); + } + return vcom_app_name; +} + +/* + * 1 if init, 0 otherwise + */ +static int is_vcom_init; + +/* + * TBD: Make it thread safe + */ + +/* + * constructor function called before main is called + * RETURN: 0 on success -1 on failure + * */ +static inline int +vcom_init () +{ + pid_t pid = getpid (); + + if (!is_vcom_init) + { + if (vppcom_app_create (vcom_get_app_name ()) != 0) + { + printf ("\n[%d] vcom_init...failed!\n", pid); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] vcom_init: vppcom_app_create failed!\n", pid); + return -1; + } + if (vcom_socket_main_init () != 0) + { + printf ("\n[%d] vcom_init...failed!\n", pid); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] vcom_init: vcom_socket_main_init failed!\n", pid); + return -1; + } + + is_vcom_init = 1; + printf ("\n[%d] vcom_init...done!\n", pid); + } + return 0; +} + +static inline void +vcom_destroy (void) +{ + pid_t pid = getpid (); + + if (is_vcom_init) + { + vcom_socket_main_destroy (); + vppcom_app_destroy (); + is_vcom_init = 0; + fprintf (stderr, "\n[%d] vcom_destroy...done!\n", pid); + } +} + +static inline int +is_vcom_socket_fd (int fd) +{ + return vcom_socket_is_vcom_fd (fd); +} + +static inline int +is_vcom_epfd (int epfd) +{ + return vcom_socket_is_vcom_epfd (epfd); +} + + +/* + * + * Generic glibc fd api + * + */ + +/* Close the file descriptor FD. + + This function is a cancellation point and therefore + not marked with __THROW. */ +/* + * PRE: is_vcom_socket_fd(__fd) == 1 + * RETURN: 0 on success and -1 for errors. + * */ +int +vcom_close (int __fd) +{ + if (vcom_init () != 0) + { + return -1; + } + + if (vcom_socket_close (__fd) != 0) + { + return -1; + } + + return 0; +} + +/* + * RETURN: 0 on success, or -1 on error + */ +int +close (int __fd) +{ + int rv; + pid_t pid = getpid (); + + if (is_vcom_socket_fd (__fd) || is_vcom_epfd (__fd)) + { + if (VCOM_DEBUG > 0) + vcom_socket_main_show (); + rv = vcom_close (__fd); + if (VCOM_DEBUG > 0) + fprintf (stderr, "[%d] close: " "'%04d'='%04d'\n", pid, rv, __fd); + if (VCOM_DEBUG > 0) + vcom_socket_main_show (); + if (rv != 0) + { + errno = -rv; + return -1; + } + return 0; + } + return libc_close (__fd); +} + +/* Read NBYTES into BUF from FD. Return the + number read, -1 for errors or 0 for EOF. + + This function is a cancellation point and therefore + not marked with __THROW. */ +ssize_t +vcom_read (int __fd, void *__buf, size_t __nbytes) +{ + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_read (__fd, __buf, __nbytes); +} + +ssize_t +read (int __fd, void *__buf, size_t __nbytes) +{ + ssize_t size = 0; + pid_t pid = getpid (); + pthread_t tid = pthread_self (); + + if (is_vcom_socket_fd (__fd)) + { + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d][%lu (0x%lx)] read:1 " + "'%04d'='%04d', '%p', '%04d'\n", + pid, (unsigned long) tid, (unsigned long) tid, + (int) size, __fd, __buf, (int) __nbytes); + size = vcom_read (__fd, __buf, __nbytes); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d][%lu (0x%lx)] read:2 " + "'%04d'='%04d', '%p', '%04d'\n", + pid, (unsigned long) tid, (unsigned long) tid, + (int) size, __fd, __buf, (int) __nbytes); + if (size < 0) + { + errno = -size; + return -1; + } + return size; + } + return libc_read (__fd, __buf, __nbytes); +} + +ssize_t +vcom_readv (int __fd, const struct iovec * __iov, int __iovcnt) +{ + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_readv (__fd, __iov, __iovcnt); +} + +ssize_t +readv (int __fd, const struct iovec * __iov, int __iovcnt) +{ + ssize_t size = 0; + + if (is_vcom_socket_fd (__fd)) + { + size = vcom_readv (__fd, __iov, __iovcnt); + if (size < 0) + { + errno = -size; + return -1; + } + return size; + } + else + return libc_readv (__fd, __iov, __iovcnt); +} + +/* Write N bytes of BUF to FD. Return the number written, or -1. + + This function is a cancellation point and therefore + not marked with __THROW. */ +ssize_t +vcom_write (int __fd, const void *__buf, size_t __n) +{ + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_write (__fd, (void *) __buf, __n); +} + +ssize_t +write (int __fd, const void *__buf, size_t __n) +{ + ssize_t size = 0; + pid_t pid = getpid (); + pthread_t tid = pthread_self (); + + if (is_vcom_socket_fd (__fd)) + { + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d][%lu (0x%lx)] write:1 " + "'%04d'='%04d', '%p', '%04d'\n", + pid, (unsigned long) tid, (unsigned long) tid, + (int) size, __fd, __buf, (int) __n); + size = vcom_write (__fd, __buf, __n); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d][%lu (0x%lx)] write:2 " + "'%04d'='%04d', '%p', '%04d'\n", + pid, (unsigned long) tid, (unsigned long) tid, + (int) size, __fd, __buf, (int) __n); + if (size < 0) + { + errno = -size; + return -1; + } + return size; + } + return libc_write (__fd, __buf, __n); +} + +ssize_t +vcom_writev (int __fd, const struct iovec * __iov, int __iovcnt) +{ + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_writev (__fd, __iov, __iovcnt); +} + +ssize_t +writev (int __fd, const struct iovec * __iov, int __iovcnt) +{ + ssize_t size = 0; + + if (is_vcom_socket_fd (__fd)) + { + size = vcom_writev (__fd, __iov, __iovcnt); + if (size < 0) + { + errno = -size; + return -1; + } + return size; + } + else + return libc_writev (__fd, __iov, __iovcnt); +} + +/* Do the file control operation described by CMD on FD. + The remaining arguments are interpreted depending on CMD. + + This function is a cancellation point and therefore + not marked with __THROW. */ +int +vcom_fcntl_va (int __fd, int __cmd, va_list __ap) +{ + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_fcntl_va (__fd, __cmd, __ap); +} + +int +vcom_fcntl (int __fd, int __cmd, ...) +{ + int rv = -1; + va_list ap; + + if (is_vcom_socket_fd (__fd)) + { + va_start (ap, __cmd); + rv = vcom_fcntl_va (__fd, __cmd, ap); + va_end (ap); + } + return rv; +} + +int +fcntl (int __fd, int __cmd, ...) +{ + int rv; + va_list ap; + pid_t pid = getpid (); + + va_start (ap, __cmd); + if (is_vcom_socket_fd (__fd)) + { + rv = vcom_fcntl_va (__fd, __cmd, ap); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] fcntl: " + "'%04d'='%04d', '%04d'\n", pid, rv, __fd, __cmd); + if (rv < 0) + { + errno = -rv; + rv = -1; + } + goto out; + } + rv = libc_vfcntl (__fd, __cmd, ap); + +out: + va_end (ap); + return rv; +} + +/* + * Check the first NFDS descriptors each in READFDS (if not NULL) for + * read readiness, in WRITEFDS (if not NULL) for write readiness, + * and in EXCEPTFDS (if not NULL) for exceptional conditions. + * If TIMEOUT is not NULL, time out after waiting the interval + * specified therein. Returns the number of ready descriptors, + * or -1 for errors. + * + * This function is a cancellation point and therefore not marked + * with __THROW. + * */ + +/* + * clear all vcom FDs from fd_sets __readfds, __writefds and + * __exceptfds and update the new nfds + * + * new nfds is the highest-numbered file descriptor + * in any of the three sets, plus 1 + * + * Return the number of file descriptors contained in the + * three descriptor sets. ie. the total number of the bits + * that are set in __readfds, __writefds and __exceptfds + */ +static inline int +vcom_fd_clear (int __nfds, + int *__new_nfds, + fd_set * __restrict __readfds, + fd_set * __restrict __writefds, + fd_set * __restrict __exceptfds) +{ + int fd; + /* invalid max_fd is -1 */ + int max_fd = -1; + int nfd = 0; + + + /* clear all vcom fd from the sets */ + for (fd = 0; fd < __nfds; fd++) + { + + /* clear vcom fd from set */ + /* + * F fd set + */ +#define _(F) \ + if ((F) && FD_ISSET (fd, (F))) \ + { \ + if (is_vcom_socket_fd (fd)) \ + { \ + FD_CLR (fd, (F)); \ + } \ + } + + + _(__readfds); + _(__writefds); + _(__exceptfds); +#undef _ + } + + /* + * compute nfd and __new_nfds + */ + for (fd = 0; fd < __nfds; fd++) + { + + /* + * F fd set + */ +#define _(F) \ + if ((F) && FD_ISSET (fd, (F))) \ + { \ + if (fd > max_fd) \ + { \ + max_fd = fd; \ + } \ + ++nfd; \ + } + + + _(__readfds); + _(__writefds); + _(__exceptfds); +#undef _ + } + + *__new_nfds = max_fd != -1 ? max_fd + 1 : 0; + return nfd; +} + +/* + * Return the number of file descriptors contained in the + * three descriptor sets. ie. the total number of the bits + * that are set in __readfds, __writefds and __exceptfds + */ +static inline int +vcom_fd_set (int __nfds, + /* dest */ + int *__new_nfds, + fd_set * __restrict __readfds, + fd_set * __restrict __writefds, fd_set * __restrict __exceptfds, + /* src */ + fd_set * __restrict __saved_readfds, + fd_set * __restrict __saved_writefds, + fd_set * __restrict __saved_exceptfds) +{ + int fd; + /* invalid max_fd is -1 */ + int max_fd = -1; + int nfd = 0; + + for (fd = 0; fd < __nfds; fd++) + { + /* + * F fd set + * S saved fd set + */ +#define _(S,F) \ + if ((F) && (S) && FD_ISSET (fd, (S))) \ + { \ + if (is_vcom_socket_fd (fd)) \ + { \ + FD_SET (fd, (F)); \ + } \ + } + + + _(__saved_readfds, __readfds); + _(__saved_writefds, __writefds); +#undef _ + } + + + /* + * compute nfd and __new_nfds + */ + for (fd = 0; fd < __nfds; fd++) + { + + /* + * F fd set + */ +#define _(F) \ + if ((F) && FD_ISSET (fd, (F))) \ + { \ + if (fd > max_fd) \ + { \ + max_fd = fd; \ + } \ + ++nfd; \ + } + + + _(__readfds); + _(__writefds); + _(__exceptfds); +#undef _ + } + + *__new_nfds = max_fd != -1 ? max_fd + 1 : 0; + return nfd; +} + +/* + * split select sets(src) into + * vcom sets(dest1) and libc sets(dest2) + */ +static inline void +vcom_fd_set_split ( + /* src, select sets */ + int nfds, + fd_set * __restrict readfds, + fd_set * __restrict writefds, + fd_set * __restrict exceptfds, + /* dest1, vcom sets */ + int *vcom_nfds, + fd_set * __restrict vcom_readfds, + fd_set * __restrict vcom_writefds, + fd_set * __restrict vcom_exceptfds, int *vcom_nfd, + /* dest2, libc sets */ + int *libc_nfds, + fd_set * __restrict libc_readfds, + fd_set * __restrict libc_writefds, + fd_set * __restrict libc_exceptfds, int *libc_nfd) +{ + int fd; + + /* vcom */ + /* invalid max_fd is -1 */ + int vcom_max_fd = -1; + int vcom_nfd2 = 0; + + /* libc */ + /* invalid max_fd is -1 */ + int libc_max_fd = -1; + int libc_nfd2 = 0; + + + for (fd = 0; fd < nfds; fd++) + { + /* + * S select fd set + * V vcom fd set + * L libc fd set + */ +#define _(S,V,L) \ + if ((S) && FD_ISSET (fd, (S))) \ + { \ + if (is_vcom_socket_fd (fd)) \ + { \ + if ((V)) \ + { \ + FD_SET(fd, (V)); \ + if (fd > vcom_max_fd) \ + { \ + vcom_max_fd = fd; \ + } \ + ++vcom_nfd2; \ + } \ + } \ + else \ + { \ + if ((L)) \ + { \ + FD_SET(fd, (L)); \ + if (fd > libc_max_fd) \ + { \ + libc_max_fd = fd; \ + } \ + ++libc_nfd2; \ + } \ + } \ + } + + + _(readfds, vcom_readfds, libc_readfds); + _(writefds, vcom_writefds, libc_writefds); + _(exceptfds, vcom_exceptfds, libc_exceptfds); +#undef _ + } + + if (vcom_nfds) + *vcom_nfds = vcom_max_fd != -1 ? vcom_max_fd + 1 : 0; + if (vcom_nfd) + *vcom_nfd = vcom_nfd2; + if (libc_nfds) + *libc_nfds = libc_max_fd != -1 ? libc_max_fd + 1 : 0; + if (libc_nfd) + *libc_nfd = libc_nfd2; +} + +/* + * merge vcom sets(src1) and libc sets(src2) + * into select sets(dest) + */ +static inline void +vcom_fd_set_merge ( + /* dest, select sets */ + int *nfds, + fd_set * __restrict readfds, + fd_set * __restrict writefds, + fd_set * __restrict exceptfds, int *nfd, + /* src1, vcom sets */ + int vcom_nfds, + fd_set * __restrict vcom_readfds, + fd_set * __restrict vcom_writefds, + fd_set * __restrict vcom_exceptfds, int vcom_nfd, + /* src2, libc sets */ + int libc_nfds, + fd_set * __restrict libc_readfds, + fd_set * __restrict libc_writefds, + fd_set * __restrict libc_exceptfds, int libc_nfd) +{ + int fd; + /* invalid max_fd is -1 */ + int max_fd = -1; + int nfd2 = 0; + + + /* FD_BIT_OR + * + * dest |= src at current bit index + * update MAX and NFD of dest fd set + * + * + * FS source fd set + * FD dest fd set + * BI bit index + * MAX current max_fd of dest fd sets + * NFD current nfd of dest fd sets + * N nfds of source fd set + */ +#define FD_BIT_OR(FD,FS,BI, \ + MAX,NFD) \ + if ((FS) && (FD) && FD_ISSET ((BI), (FS))) \ + { \ + FD_SET ((BI), (FD)); \ + if ((BI) > (MAX)) \ + { \ + (MAX) = (BI); \ + } \ + ++(NFD); \ + } + + + /* FD_RWE_SET_OR */ + /* + * SR,SW,SE source RWE fd sets + * DR,DW,DE dest RWE fd sets + * BI bit index + * NFDS nfds of source fd sets + * MAX current max_fd of dest fd sets + * NFD current nfd of dest fd sets + */ +#define FD_RWE_SETS_OR(DR,DW,DE, \ + SR,SW,SE, \ + BI,NFDS, \ + MAX,NFD) \ + do \ + { \ + for ((BI) = 0; (BI) < (NFDS); (BI)++) \ + { \ + FD_BIT_OR((DR), (SR), (BI), (MAX), (NFD)); \ + FD_BIT_OR((DW), (SW), (BI), (MAX), (NFD)); \ + FD_BIT_OR((DE), (SE), (BI), (MAX), (NFD)); \ + } \ + } \ + while (0); + + + /* source(vcom) to dest(select) rwe fd sets */ + FD_RWE_SETS_OR (readfds, writefds, exceptfds, + vcom_readfds, vcom_writefds, vcom_exceptfds, + fd, vcom_nfds, max_fd, nfd2); + + /* source(libc) to dest(select) rwe fd sets */ + FD_RWE_SETS_OR (readfds, writefds, exceptfds, + libc_readfds, libc_writefds, libc_exceptfds, + fd, libc_nfds, max_fd, nfd2); + +#undef FD_RWE_SETS_OR +#undef FD_BIT_OR + + if (nfds) + *nfds = max_fd != -1 ? max_fd + 1 : 0; + if (nfd) + *nfd = nfd2; +} + +/* + * RETURN 1 if fds is NULL or empty. 0 otherwise + */ +static inline int +fd_set_iszero (fd_set * __restrict fds) +{ + int fd; + + /* NULL fds */ + if (!fds) + return 1; + + for (fd = 0; fd < FD_SETSIZE; fd++) + { + if (FD_ISSET (fd, fds)) + { + /* non-empty fds */ + return 0; + } + } + /* empty fds */ + return 1; +} + + +/* + * ################ + * kernel time64.h + * ################ + * */ +typedef long int s64; +typedef unsigned long int u64; + +typedef long long int __s64; +typedef unsigned long long int __u64; + +typedef __s64 time64_t; +typedef __u64 timeu64_t; + +/* Parameters used to convert the timespec values: */ +#define MSEC_PER_SEC 1000L +#define USEC_PER_MSEC 1000L +#define NSEC_PER_USEC 1000L +#define NSEC_PER_MSEC 1000000L +#define USEC_PER_SEC 1000000L +#define NSEC_PER_SEC 1000000000L +#define FSEC_PER_SEC 1000000000000000LL + + +/* + * ################ + * kernel time.h + * ################ + * */ + + +#define TIME_T_MAX (time_t)((1UL << ((sizeof(time_t) << 3) - 1)) - 1) + +static inline int +timespec_equal (const struct timespec *a, const struct timespec *b) +{ + return (a->tv_sec == b->tv_sec) && (a->tv_nsec == b->tv_nsec); +} + +/* + * lhs < rhs: return <0 + * lhs == rhs: return 0 + * lhs > rhs: return >0 + */ +static inline int +timespec_compare (const struct timespec *lhs, const struct timespec *rhs) +{ + if (lhs->tv_sec < rhs->tv_sec) + return -1; + if (lhs->tv_sec > rhs->tv_sec) + return 1; + return lhs->tv_nsec - rhs->tv_nsec; +} + +static inline int +timeval_compare (const struct timeval *lhs, const struct timeval *rhs) +{ + if (lhs->tv_sec < rhs->tv_sec) + return -1; + if (lhs->tv_sec > rhs->tv_sec) + return 1; + return lhs->tv_usec - rhs->tv_usec; +} + +extern void set_normalized_timespec (struct timespec *ts, time_t sec, + s64 nsec); + + +static inline struct timespec +timespec_add (struct timespec lhs, struct timespec rhs) +{ + struct timespec ts_delta; + set_normalized_timespec (&ts_delta, lhs.tv_sec + rhs.tv_sec, + lhs.tv_nsec + rhs.tv_nsec); + return ts_delta; +} + +/* + * sub = lhs - rhs, in normalized form + */ +static inline struct timespec +timespec_sub (struct timespec lhs, struct timespec rhs) +{ + struct timespec ts_delta; + set_normalized_timespec (&ts_delta, lhs.tv_sec - rhs.tv_sec, + lhs.tv_nsec - rhs.tv_nsec); + return ts_delta; +} + +/* + * ################ + * kernel time.c + * ################ + * */ + + +/** + * set_normalized_timespec - set timespec sec and nsec parts and normalize + * + * @ts: pointer to timespec variable to be set + * @sec: seconds to set + * @nsec: nanoseconds to set + * + * Set seconds and nanoseconds field of a timespec variable and + * normalize to the timespec storage format + * + * Note: The tv_nsec part is always in the range of + * 0 <= tv_nsec < NSEC_PER_SEC + * For negative values only the tv_sec field is negative ! + */ +void +set_normalized_timespec (struct timespec *ts, time_t sec, s64 nsec) +{ + while (nsec >= NSEC_PER_SEC) + { + /* + * The following asm() prevents the compiler from + * optimising this loop into a modulo operation. See + * also __iter_div_u64_rem() in include/linux/time.h + */ + asm ("":"+rm" (nsec)); + nsec -= NSEC_PER_SEC; + ++sec; + } + while (nsec < 0) + { + asm ("":"+rm" (nsec)); + nsec += NSEC_PER_SEC; + --sec; + } + ts->tv_sec = sec; + ts->tv_nsec = nsec; +} + +#define vcom_timerisvalid(tvp) (!((tvp)->tv_sec < 0 || (tvp)->tv_usec < 0)) + +/* Macros for converting between `struct timeval' and `struct timespec'. */ +#define VCOM_TIMEVAL_TO_TIMESPEC(tv, ts) { \ + (ts)->tv_sec = (tv)->tv_sec; \ + (ts)->tv_nsec = (tv)->tv_usec * 1000; \ +} +#define VCOM_TIMESPEC_TO_TIMEVAL(tv, ts) { \ + (tv)->tv_sec = (ts)->tv_sec; \ + (tv)->tv_usec = (ts)->tv_nsec / 1000; \ +} + +static inline int +vcom_select_impl (int vcom_nfds, fd_set * __restrict vcom_readfds, + fd_set * __restrict vcom_writefds, + fd_set * __restrict vcom_exceptfds, + struct timeval *__restrict timeout) +{ + return vcom_socket_select (vcom_nfds, vcom_readfds, + vcom_writefds, vcom_exceptfds, timeout); +} + +int +vcom_select (int __nfds, fd_set * __restrict __readfds, + fd_set * __restrict __writefds, + fd_set * __restrict __exceptfds, + struct timeval *__restrict __timeout) +{ + int rv; + int rv2 = 0; + pid_t pid = getpid (); + + int timedout = 0; + /* block indefinitely */ + int no_timeout = 0; + int first_clock_gettime_failed = 0; + /* timeout value in units of timespec */ + struct timespec timeout_ts; + struct timespec start_time, now, end_time; + + /* select sets attributes - after merge */ + int new_nfds = 0; + int new_nfd = -1; + + /* vcom */ + int vcom_nfds = 0; + fd_set vcom_readfds; + fd_set vcom_writefds; + fd_set vcom_exceptfds; + int vcom_nfd = -1; + + /* libc */ + int libc_nfds = 0; + fd_set libc_readfds; + fd_set libc_writefds; + fd_set libc_exceptfds; + int libc_nfd = -1; + + /* for polling */ + struct timeval tv = {.tv_sec = 0,.tv_usec = 0 }; + + /* validate __timeout */ + if (__timeout) + { + /* validate tv_sec */ + /* bogus */ + if (!vcom_timerisvalid (__timeout)) + { + rv = -EINVAL; + goto select_done; + } + + /* validate tv_usec */ + /* TBD: */ + /* init timeout_ts */ + VCOM_TIMEVAL_TO_TIMESPEC (__timeout, &timeout_ts); + set_normalized_timespec (&timeout_ts, + timeout_ts.tv_sec, timeout_ts.tv_nsec); + } + + rv = clock_gettime (CLOCK_MONOTONIC, &start_time); + if (rv == -1) + { + rv = -errno; + first_clock_gettime_failed = 1; + goto select_done; + } + + /* init end_time */ + if (__timeout) + { + if (timerisset (__timeout)) + { + end_time = timespec_add (start_time, timeout_ts); + } + else + { + /* + * if both fields of the timeout structure are zero, + * then select returns immediately + * */ + end_time = start_time; + } + } + else + { + /* block indefinitely */ + no_timeout = 1; + } + + + + if (vcom_init () != 0) + { + rv = -1; + goto select_done; + } + + /* validate __nfds */ + if (__nfds < 0 || __nfds > FD_SETSIZE) + { + rv = -EINVAL; + goto select_done; + } + + + /* + * usleep(3) emulation + * */ + + /* call libc_select() with a finite timeout and + * no file descriptors or empty fd sets and + * zero nfds */ + if (__nfds == 0 && + (!__readfds || fd_set_iszero (__readfds)) && + (!__writefds || fd_set_iszero (__writefds)) && + (!__exceptfds || fd_set_iszero (__exceptfds))) + { + if (__timeout) + { + rv = libc_select (__nfds, + __readfds, __writefds, __exceptfds, __timeout); + if (rv == -1) + rv = -errno; + } + else + { + /* TBD: block indefinitely or return -EINVAL */ + rv = -EINVAL; + } + goto select_done; + } + + /* init once before the polling loop */ + + /* zero vcom and libc fd sets */ + /* + * S select fd set + * V vcom fd set + * L libc fd set + */ +#define _(S,V,L) \ + if ((S)) \ + { \ + FD_ZERO ((V)); \ + FD_ZERO ((L)); \ + } + + + _(__readfds, &vcom_readfds, &libc_readfds); + _(__writefds, &vcom_writefds, &libc_writefds); + _(__exceptfds, &vcom_exceptfds, &libc_exceptfds); +#undef _ + new_nfds = 0; + new_nfd = -1; + + vcom_nfds = 0; + vcom_nfd = -1; + libc_nfds = 0; + libc_nfd = -1; + + vcom_fd_set_split ( + /* src, select sets */ + __nfds, __readfds, __writefds, __exceptfds, + /* dest1, vcom sets */ + __readfds || __writefds || __exceptfds ? + &vcom_nfds : NULL, + __readfds ? &vcom_readfds : NULL, + __writefds ? &vcom_writefds : NULL, + __exceptfds ? &vcom_exceptfds : NULL, + __readfds || __writefds || __exceptfds ? + &vcom_nfd : NULL, + /* dest2, libc sets */ + __readfds || __writefds || __exceptfds ? + &libc_nfds : NULL, + __readfds ? &libc_readfds : NULL, + __writefds ? &libc_writefds : NULL, + __exceptfds ? &libc_exceptfds : NULL, + __readfds || __writefds || __exceptfds ? + &libc_nfd : NULL); + + /* + * polling loop + * */ + do + { + new_nfd = -1; + vcom_nfd = -1; + libc_nfd = -1; + + /* + * if both fields of timeval structure are zero, + * vcom_select_impl and libc_select returns immediately. + * useful for polling and ensure fairness among + * file descriptors watched. + */ + + /* for polling */ + tv.tv_sec = 0; + tv.tv_usec = 0; + + /* select on vcom fds */ + if (vcom_nfds) + { + vcom_nfd = vcom_select_impl (vcom_nfds, + __readfds ? &vcom_readfds : NULL, + __writefds ? &vcom_writefds : NULL, + __exceptfds ? &vcom_exceptfds : NULL, + &tv); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] select vcom: " + "'%04d'='%04d'\n", pid, vcom_nfd, vcom_nfds); + + if (vcom_nfd < 0) + { + rv = vcom_nfd; + goto select_done; + } + } + /* select on libc fds */ + if (libc_nfds) + { + libc_nfd = libc_select (libc_nfds, + __readfds ? &libc_readfds : NULL, + __writefds ? &libc_writefds : NULL, + __exceptfds ? &libc_exceptfds : NULL, &tv); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] select libc: " + "'%04d'='%04d'\n", pid, libc_nfd, libc_nfds); + + if (libc_nfd < 0) + { + /* tv becomes undefined */ + libc_nfd = errno; + rv = libc_nfd; + goto select_done; + } + } + + /* check if any file descriptors changed status */ + if ((vcom_nfds && vcom_nfd > 0) || (libc_nfds && libc_nfd > 0)) + { + /* zero the sets before merge and exit */ + + /* + * F fd set + */ +#define _(F) \ + if ((F)) \ + { \ + FD_ZERO ((F)); \ + } + + + _(__readfds); + _(__writefds); + _(__exceptfds); +#undef _ + new_nfds = 0; + new_nfd = -1; + + /* + * on exit, sets are modified in place to indicate which + * file descriptors actually changed status + * */ + vcom_fd_set_merge ( + /* dest, select sets */ + __readfds || __writefds || __exceptfds ? + &new_nfds : NULL, + __readfds, + __writefds, + __exceptfds, + __readfds || __writefds || __exceptfds ? + &new_nfd : NULL, + /* src1, vcom sets */ + vcom_nfds, + __readfds ? &vcom_readfds : NULL, + __writefds ? &vcom_writefds : NULL, + __exceptfds ? &vcom_exceptfds : NULL, vcom_nfd, + /* src2, libc sets */ + libc_nfds, + __readfds ? &libc_readfds : NULL, + __writefds ? &libc_writefds : NULL, + __exceptfds ? &libc_exceptfds : NULL, libc_nfd); + /* + * return the number of file descriptors contained in the + * three returned sets + * */ + rv = 0; + /* + * for documentation + * + * if(vcom_nfd > 0) + * rv += vcom_nfd; + * if(libc_nfd > 0) + * rv += libc_nfd; + */ + + rv = new_nfd == -1 ? 0 : new_nfd; + goto select_done; + } + + rv = clock_gettime (CLOCK_MONOTONIC, &now); + if (rv == -1) + { + rv = -errno; + goto select_done; + } + } + while (no_timeout || timespec_compare (&now, &end_time) < 0); + + /* timeout expired before anything interesting happened */ + timedout = 1; + rv = 0; + +select_done: + if (VCOM_DEBUG > 0) + fprintf (stderr, "[%d] vselect1: " "'%04d'='%04d'\n", pid, rv, __nfds); + /* + * modify timeout parameter to reflect the amount of time not slept + * */ + if (__timeout) + { + if (vcom_timerisvalid (__timeout)) + { + /* timeout expired */ + if (timedout) + { + timerclear (__timeout); + } + else if (!first_clock_gettime_failed) + { + rv2 = clock_gettime (CLOCK_MONOTONIC, &now); + if (rv2 == -1) + { + rv = -errno; + } + else + { + struct timespec ts_delta; + ts_delta = timespec_sub (end_time, now); + VCOM_TIMESPEC_TO_TIMEVAL (__timeout, &ts_delta); + } + } + } + } + if (VCOM_DEBUG > 0) + fprintf (stderr, "[%d] vselect2: " "'%04d',='%04d'\n", pid, rv, __nfds); + + return rv; +} + +int +vcom_select_internal (int __nfds, fd_set * __restrict __readfds, + fd_set * __restrict __writefds, + fd_set * __restrict __exceptfds, + struct timeval *__restrict __timeout) +{ + int rv; + int new_nfds = 0; + int nfd = 0; + pid_t pid = getpid (); + + fd_set saved_readfds; + fd_set saved_writefds; + fd_set saved_exceptfds; + + /* validate __nfds */ + if (__nfds < 0) + { + errno = EINVAL; + return -1; + } + + /* validate __timeout */ + if (__timeout) + { + /* validate tv_sec */ + /* bogus */ + if (__timeout->tv_sec < 0 || __timeout->tv_usec < 0) + { + errno = EINVAL; + return -1; + } + + /* validate tv_usec */ + /* TBD: */ + } + + /* init saved_x fds */ + if (__readfds) + { + saved_readfds = *__readfds; + /* + memcpy (&saved_readfds, __readfds, sizeof (*__readfds)); + */ + } + else + { + FD_ZERO (&saved_readfds); + } + + if (__writefds) + { + saved_writefds = *__writefds; + /* + memcpy (&saved_writefds, __writefds, sizeof (*__writefds)); + */ + + } + else + { + FD_ZERO (&saved_writefds); + } + + if (__exceptfds) + { + saved_exceptfds = *__exceptfds; + /* + memcpy (&saved_exceptfds, __exceptfds, sizeof (*__exceptfds)); + */ + + } + else + { + FD_ZERO (&saved_exceptfds); + } + + /* clear vcom fds */ + nfd = vcom_fd_clear (__nfds, &new_nfds, __readfds, __writefds, __exceptfds); + + /* set to an invalid value */ + rv = -2; + /* have kernel fds */ + if (new_nfds) + rv = libc_select (new_nfds, __readfds, + __writefds, __exceptfds, __timeout); + + if (new_nfds && rv == -1) + { + /* on error, the file descriptor sets are unmodified */ + if (__readfds) + *__readfds = saved_readfds; + if (__writefds) + *__writefds = saved_writefds; + if (__exceptfds) + *__exceptfds = saved_exceptfds; + return rv; + } + else if ((new_nfds && rv != -1) || (rv == -2)) + { + /* restore vcom fds */ + nfd = vcom_fd_set (__nfds, + &new_nfds, + __readfds, + __writefds, + __exceptfds, + &saved_readfds, &saved_writefds, &saved_exceptfds); + rv = nfd; + } + + if (VCOM_DEBUG > 0) + fprintf (stderr, "[%d] select: " "'%04d'='%04d'\n", pid, rv, __nfds); + return rv; +} + +int +select (int __nfds, fd_set * __restrict __readfds, + fd_set * __restrict __writefds, + fd_set * __restrict __exceptfds, struct timeval *__restrict __timeout) +{ + int rv = 0; + pid_t pid = getpid (); + + if (VCOM_DEBUG > 0) + fprintf (stderr, "[%d] select1: " "'%04d'='%04d'\n", pid, rv, __nfds); + rv = vcom_select (__nfds, __readfds, __writefds, __exceptfds, __timeout); + if (VCOM_DEBUG > 0) + fprintf (stderr, "[%d] select2: " "'%04d'='%04d'\n", pid, rv, __nfds); + if (rv < 0) + { + errno = -rv; + return -1; + } + return rv; +} + +#ifdef __USE_XOPEN2K +/* + * Same as above only that the TIMEOUT value is given with higher + * resolution and a sigmask which is been set temporarily. This + * version should be used. + * + * This function is a cancellation point and therefore not marked + * with __THROW. + * */ +int +vcom_pselect (int __nfds, fd_set * __restrict __readfds, + fd_set * __restrict __writefds, + fd_set * __restrict __exceptfds, + const struct timespec *__restrict __timeout, + const __sigset_t * __restrict __sigmask) +{ + int fd; + int vcom_nfds = 0; + + for (fd = 0; fd < __nfds; fd++) + { + if (__readfds && FD_ISSET (fd, __readfds)) + { + if (is_vcom_socket_fd (fd)) + { + vcom_nfds++; + } + } + + if (__writefds && FD_ISSET (fd, __writefds)) + { + if (is_vcom_socket_fd (fd)) + { + vcom_nfds++; + } + } + if (__exceptfds && FD_ISSET (fd, __exceptfds)) + { + if (is_vcom_socket_fd (fd)) + { + FD_CLR (fd, __exceptfds); + } + } + } + return vcom_nfds; +} + +int +pselect (int __nfds, fd_set * __restrict __readfds, + fd_set * __restrict __writefds, + fd_set * __restrict __exceptfds, + const struct timespec *__restrict __timeout, + const __sigset_t * __restrict __sigmask) +{ + int rv; + int new_nfds = 0; + int nfd = 0; + pid_t pid = getpid (); + + fd_set saved_readfds; + fd_set saved_writefds; + fd_set saved_exceptfds; + + /* validate __nfds */ + if (__nfds < 0) + { + errno = EINVAL; + return -1; + } + + /* validate __timeout */ + if (__timeout) + { + /* validate tv_sec */ + /* bogus */ + if (__timeout->tv_sec < 0 || __timeout->tv_nsec < 0) + { + errno = EINVAL; + return -1; + } + + /* validate tv_usec */ + /* TBD: */ + } + + /* init saved fds */ + if (__readfds) + { + saved_readfds = *__readfds; + /* + memcpy (&saved_readfds, __readfds, sizeof (*__readfds)); + */ + } + else + { + FD_ZERO (&saved_readfds); + } + + if (__writefds) + { + saved_writefds = *__writefds; + /* + memcpy (&saved_writefds, __writefds, sizeof (*__writefds)); + */ + + } + else + { + FD_ZERO (&saved_writefds); + } + + if (__exceptfds) + { + saved_exceptfds = *__exceptfds; + /* + memcpy (&saved_exceptfds, __exceptfds, sizeof (*__exceptfds)); + */ + + } + else + { + FD_ZERO (&saved_exceptfds); + } + + /* clear vcom fds */ + nfd = vcom_fd_clear (__nfds, &new_nfds, __readfds, __writefds, __exceptfds); + + /* set to an invalid value */ + rv = -2; + if (new_nfds) + rv = libc_pselect (new_nfds, + __readfds, + __writefds, __exceptfds, __timeout, __sigmask); + + if (new_nfds && rv == -1) + { + /* on error, the file descriptor sets are unmodified */ + if (__readfds) + *__readfds = saved_readfds; + if (__writefds) + *__writefds = saved_writefds; + if (__exceptfds) + *__exceptfds = saved_exceptfds; + return rv; + } + else if ((new_nfds && rv != -1) || (rv == -2)) + { + /* restore vcom fds */ + nfd = vcom_fd_set (__nfds, + &new_nfds, + __readfds, + __writefds, + __exceptfds, + &saved_readfds, &saved_writefds, &saved_exceptfds); + rv = nfd; + } + + if (VCOM_DEBUG > 0) + fprintf (stderr, "[%d] pselect: " "'%04d'='%04d'\n", pid, rv, __nfds); + return rv; +} +#endif + +/* + * + * Socket specific glibc api + * + */ + +/* Create a new socket of type TYPE in domain DOMAIN, using + * protocol PROTOCOL. If PROTOCOL is zero, one is chosen + * automatically. Returns a file descriptor for the new socket, + * or -1 for errors. + * RETURN: a valid file descriptor for the new socket, + * or -1 for errors. + * */ + +int +vcom_socket (int __domain, int __type, int __protocol) +{ + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_socket (__domain, __type, __protocol); +} + +int +socket (int __domain, int __type, int __protocol) +{ + int rv; + pid_t pid = getpid (); + pthread_t tid = pthread_self (); + + /* handle domains implemented by vpp */ + switch (__domain) + { + case AF_INET: + case AF_INET6: + /* handle types implemented by vpp */ + switch (__type & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) + { + case SOCK_STREAM: + case SOCK_DGRAM: + if (VCOM_DEBUG > 0) + vcom_socket_main_show (); + rv = vcom_socket (__domain, __type, __protocol); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d][%lu (0x%lx)] socket: " + "'%04d'= D='%04d', T='%04d', P='%04d'\n", + pid, (unsigned long) tid, (unsigned long) tid, + rv, __domain, __type, __protocol); + if (VCOM_DEBUG > 0) + vcom_socket_main_show (); + if (rv < 0) + { + errno = -rv; + return -1; + } + return rv; + break; + + default: + goto CALL_GLIBC_SOCKET_API; + break; + } + + break; + + default: + goto CALL_GLIBC_SOCKET_API; + break; + } + +CALL_GLIBC_SOCKET_API: + return libc_socket (__domain, __type, __protocol); +} + +/* + * Create two new sockets, of type TYPE in domain DOMAIN and using + * protocol PROTOCOL, which are connected to each other, and put file + * descriptors for them in FDS[0] and FDS[1]. If PROTOCOL is zero, + * one will be chosen automatically. + * Returns 0 on success, -1 for errors. + * */ +int +vcom_socketpair (int __domain, int __type, int __protocol, int __fds[2]) +{ + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_socketpair (__domain, __type, __protocol, __fds); +} + +int +socketpair (int __domain, int __type, int __protocol, int __fds[2]) +{ + int rv; + pid_t pid = getpid (); + + /* handle domains implemented by vpp */ + switch (__domain) + { + case AF_INET: + case AF_INET6: + /* handle types implemented by vpp */ + switch (__type) + { + case SOCK_STREAM: + case SOCK_DGRAM: + rv = vcom_socketpair (__domain, __type, __protocol, __fds); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] socketpair: " + "'%04d'= D='%04d', T='%04d', P='%04d'\n", + pid, rv, __domain, __type, __protocol); + if (rv < 0) + { + errno = -rv; + return -1; + } + return 0; + break; + + default: + goto CALL_GLIBC_SOCKET_API; + break; + } + + break; + + default: + goto CALL_GLIBC_SOCKET_API; + break; + } + +CALL_GLIBC_SOCKET_API: + return libc_socketpair (__domain, __type, __protocol, __fds); +} + +/* + * Give the socket FD the local address ADDR + * (which is LEN bytes long). + * */ +int +vcom_bind (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len) +{ + int rv; + + if (vcom_init () != 0) + { + return -1; + } + + /* validate __len */ + switch (__addr->sa_family) + { + case AF_INET: + if (__len != sizeof (struct sockaddr_in)) + return -EINVAL; + break; + case AF_INET6: + if (__len != sizeof (struct sockaddr_in6)) + return -EINVAL; + break; + + default: + return -1; + break; + } + + /* handle domains implemented by vpp */ + switch (__addr->sa_family) + { + case AF_INET: + case AF_INET6: + rv = vcom_socket_bind (__fd, __addr, __len); + return rv; + break; + + default: + return -1; + break; + } + + return -1; +} + +int +bind (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len) +{ + int rv; + pid_t pid = getpid (); + + if (is_vcom_socket_fd (__fd)) + { + + rv = vcom_bind (__fd, __addr, __len); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] bind: " + "'%04d'='%04d', '%p', '%04d'\n", + pid, rv, __fd, __addr, __len); + if (rv != 0) + { + errno = -rv; + return -1; + } + return 0; + } + return libc_bind (__fd, __addr, __len); +} + +/* + * Put the local address of FD into *ADDR and its length in *LEN. + * */ +int +vcom_getsockname (int __fd, __SOCKADDR_ARG __addr, + socklen_t * __restrict __len) +{ + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_getsockname (__fd, __addr, __len); +} + +int +getsockname (int __fd, __SOCKADDR_ARG __addr, socklen_t * __restrict __len) +{ + int rv; + pid_t pid = getpid (); + + if (is_vcom_socket_fd (__fd)) + { + rv = vcom_getsockname (__fd, __addr, __len); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] getsockname: " + "'%04d'='%04d', '%p', '%p'\n", pid, rv, __fd, __addr, __len); + if (rv != 0) + { + errno = -rv; + return -1; + } + return 0; + } + return libc_getsockname (__fd, __addr, __len); +} + +/* + * Open a connection on socket FD to peer at ADDR + * (which LEN bytes long). For connectionless socket types, just set + * the default address to send to and the only address from which to + * accept transmissions. Return 0 on success, -1 for errors. + * This function is a cancellation point and therefore not marked + * with __THROW. + * */ +int +vcom_connect (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len) +{ + int rv = -1; + + if (vcom_init () != 0) + { + return -1; + } + + /* validate __len */ + switch (__addr->sa_family) + { + case AF_INET: + if (__len != INET_ADDRSTRLEN) + return -1; + break; + case AF_INET6: + if (__len != INET6_ADDRSTRLEN) + return -1; + break; + + default: + return -1; + break; + } + + /* handle domains implemented by vpp */ + switch (__addr->sa_family) + { + case AF_INET: + case AF_INET6: + rv = vcom_socket_connect (__fd, __addr, __len); + break; + + default: + return -1; + break; + } + + return rv; +} + +int +connect (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len) +{ + int rv; + pid_t pid = getpid (); + pthread_t tid = pthread_self (); + + if (is_vcom_socket_fd (__fd)) + { + rv = vcom_connect (__fd, __addr, __len); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d][%lu (0x%lx)] connect: " + "'%04d'='%04d', '%p', '%04d'\n", + pid, (unsigned long) tid, (unsigned long) tid, + rv, __fd, __addr, __len); + if (rv != 0) + { + errno = -rv; + return -1; + } + return 0; + } + + return libc_connect (__fd, __addr, __len); +} + +/* + * Put the address of the peer connected to socket FD into *ADDR + * (which is *LEN bytes long), and its actual length into *LEN. + * */ +int +vcom_getpeername (int __fd, __SOCKADDR_ARG __addr, + socklen_t * __restrict __len) +{ + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_getpeername (__fd, __addr, __len); +} + +int +getpeername (int __fd, __SOCKADDR_ARG __addr, socklen_t * __restrict __len) +{ + int rv; + pid_t pid = getpid (); + + if (is_vcom_socket_fd (__fd)) + { + rv = vcom_getpeername (__fd, __addr, __len); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] getpeername: " + "'%04d'='%04d', '%p', '%p'\n", pid, rv, __fd, __addr, __len); + if (rv != 0) + { + errno = -rv; + return -1; + } + return 0; + } + return libc_getpeername (__fd, __addr, __len); +} + +/* + * Send N bytes of BUF to socket FD. Returns the number sent or -1. + * This function is a cancellation point and therefore not marked + * with __THROW. + * */ +ssize_t +vcom_send (int __fd, const void *__buf, size_t __n, int __flags) +{ + + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_send (__fd, (void *) __buf, (int) __n, __flags); +} + +ssize_t +send (int __fd, const void *__buf, size_t __n, int __flags) +{ + ssize_t size; + pid_t pid = getpid (); + + if (is_vcom_socket_fd (__fd)) + { + size = vcom_send (__fd, __buf, __n, __flags); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] send: " + "'%04d'='%04d', '%p', '%04d', '%04x'\n", + pid, (int) size, __fd, __buf, (int) __n, __flags); + if (size < 0) + { + errno = -size; + return -1; + } + return size; + } + return libc_send (__fd, __buf, __n, __flags); +} + +/* + * Read N bytes into BUF from socket FD. + * Returns the number read or -1 for errors. + * This function is a cancellation point and therefore not marked + * with __THROW. + * */ +ssize_t +vcom_recv (int __fd, void *__buf, size_t __n, int __flags) +{ + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_recv (__fd, __buf, __n, __flags); +} + +ssize_t +recv (int __fd, void *__buf, size_t __n, int __flags) +{ + ssize_t size; + pid_t pid = getpid (); + + if (is_vcom_socket_fd (__fd)) + { + size = vcom_recv (__fd, __buf, __n, __flags); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] recv: " + "'%04d'='%04d', '%p', '%04d', '%04x'\n", + pid, (int) size, __fd, __buf, (int) __n, __flags); + if (size < 0) + { + errno = -size; + return -1; + } + return size; + } + return libc_recv (__fd, __buf, __n, __flags); +} + +/* + * Send N bytes of BUF on socket FD to peer at address ADDR (which is + * ADDR_LEN bytes long). Returns the number sent, or -1 for errors. + * This function is a cancellation point and therefore not marked + * with __THROW. + * */ +ssize_t +vcom_sendto (int __fd, const void *__buf, size_t __n, int __flags, + __CONST_SOCKADDR_ARG __addr, socklen_t __addr_len) +{ + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_sendto (__fd, __buf, __n, __flags, __addr, __addr_len); +} + +ssize_t +sendto (int __fd, const void *__buf, size_t __n, int __flags, + __CONST_SOCKADDR_ARG __addr, socklen_t __addr_len) +{ + ssize_t size; + pid_t pid = getpid (); + + if (is_vcom_socket_fd (__fd)) + { + size = vcom_sendto (__fd, __buf, __n, __flags, __addr, __addr_len); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] sendto: " + "'%04d'='%04d', '%p', '%04d', '%04x', " + "'%p', '%04d'\n", + pid, (int) size, __fd, __buf, (int) __n, __flags, + __addr, __addr_len); + if (size < 0) + { + errno = -size; + return -1; + } + return size; + } + return libc_sendto (__fd, __buf, __n, __flags, __addr, __addr_len); +} + +/* + * Read N bytes into BUF through socket FD. + * If ADDR is not NULL, fill in *ADDR_LEN bytes of it with the + * address of the sender, and store the actual size of the address + * in *ADDR_LEN. + * Returns the number of bytes read or -1 for errors. + * This function is a cancellation point and therefore not marked + * with __THROW. + * */ +ssize_t +vcom_recvfrom (int __fd, void *__restrict __buf, size_t __n, + int __flags, + __SOCKADDR_ARG __addr, socklen_t * __restrict __addr_len) +{ + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_recvfrom (__fd, __buf, __n, __flags, __addr, __addr_len); +} + +ssize_t +recvfrom (int __fd, void *__restrict __buf, size_t __n, + int __flags, + __SOCKADDR_ARG __addr, socklen_t * __restrict __addr_len) +{ + ssize_t size; + pid_t pid = getpid (); + + if (is_vcom_socket_fd (__fd)) + { + size = vcom_recvfrom (__fd, __buf, __n, __flags, __addr, __addr_len); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] recvfrom: " + "'%04d'='%04d', '%p', '%04d', '%04x', " + "'%p', '%p'\n", + pid, (int) size, __fd, __buf, (int) __n, __flags, + __addr, __addr_len); + if (size < 0) + { + errno = -size; + return -1; + } + return size; + } + return libc_recvfrom (__fd, __buf, __n, __flags, __addr, __addr_len); +} + +/* + * Send a message described MESSAGE on socket FD. + * Returns the number of bytes sent, or -1 for errors. + * This function is a cancellation point and therefore not marked + * with __THROW. + * */ +ssize_t +vcom_sendmsg (int __fd, const struct msghdr * __message, int __flags) +{ + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_sendmsg (__fd, __message, __flags); +} + +ssize_t +sendmsg (int __fd, const struct msghdr * __message, int __flags) +{ + ssize_t size; + pid_t pid = getpid (); + + if (is_vcom_socket_fd (__fd)) + { + size = vcom_sendmsg (__fd, __message, __flags); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] sendmsg: " + "'%04d'='%04d', '%p', '%04x'\n", + pid, (int) size, __fd, __message, __flags); + if (size < 0) + { + errno = -size; + return -1; + } + return size; + } + return libc_sendmsg (__fd, __message, __flags); +} + +#ifdef __USE_GNU +/* + * Send a VLEN messages as described by VMESSAGES to socket FD. + * Returns the number of datagrams successfully written + * or -1 for errors. + * This function is a cancellation point and therefore not marked + * with __THROW. + * */ +int +vcom_sendmmsg (int __fd, struct mmsghdr *__vmessages, + unsigned int __vlen, int __flags) +{ + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_sendmmsg (__fd, __message, __vlen, __flags); +} + +int +sendmmsg (int __fd, struct mmsghdr *__vmessages, + unsigned int __vlen, int __flags) +{ + ssize_t size; + pid_t pid = getpid (); + + if (is_vcom_socket_fd (__fd)) + { + size = vcom_sendmmsg (__fd, __message, __vlen, __flags); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] sendmmsg: " + "'%04d'='%04d', '%p', '%04d', '%04x'\n", + pid, (int) size, __fd, __vmessages, __vlen, __flags); + if (size < 0) + { + errno = -size; + return -1; + } + return size; + } + return libc_sendmmsg (__fd, __message, __vlen, __flags); +} + +#endif + +/* + * Receive a message as described by MESSAGE from socket FD. + * Returns the number of bytes read or -1 for errors. + * This function is a cancellation point and therefore not marked + * with __THROW. + * */ +ssize_t +vcom_recvmsg (int __fd, struct msghdr * __message, int __flags) +{ + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_recvmsg (__fd, __message, __flags); +} + +ssize_t +recvmsg (int __fd, struct msghdr * __message, int __flags) +{ + ssize_t size; + pid_t pid = getpid (); + + if (is_vcom_socket_fd (__fd)) + { + size = vcom_recvmsg (__fd, __message, __flags); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] recvmsg: " + "'%04d'='%04d', '%p', '%04x'\n", + pid, (int) size, __fd, __message, __flags); + if (size < 0) + { + errno = -size; + return -1; + } + return size; + } + return libc_recvmsg (__fd, __message, __flags); +} + +#ifdef __USE_GNU +/* + * Receive up to VLEN messages as described by VMESSAGES from socket FD. + * Returns the number of messages received or -1 for errors. + * This function is a cancellation point and therefore not marked + * with __THROW. + * */ +int +vcom_recvmmsg (int __fd, struct mmsghdr *__vmessages, + unsigned int __vlen, int __flags, struct timespec *__tmo) +{ + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_recvmmsg (__fd, __message, __vlen, __flags, __tmo); +} + +int +recvmmsg (int __fd, struct mmsghdr *__vmessages, + unsigned int __vlen, int __flags, struct timespec *__tmo) +{ + ssize_t size; + pid_t pid = getpid (); + + if (is_vcom_socket_fd (__fd)) + { + size = vcom_recvmmsg (__fd, __message, __vlen, __flags, __tmo); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] recvmmsg: " + "'%04d'='%04d', '%p', " + "'%04d', '%04x', '%p'\n", + pid, (int) size, __fd, __vmessages, __vlen, __flags, __tmo); + if (size < 0) + { + errno = -size; + return -1; + } + return size; + } + return libc_recvmmsg (__fd, __message, __vlen, __flags, __tmo); +} + +#endif + +/* + * Put the current value for socket FD's option OPTNAME + * at protocol level LEVEL into OPTVAL (which is *OPTLEN bytes long), + * and set *OPTLEN to the value's actual length. + * Returns 0 on success, -1 for errors. + * */ +int +vcom_getsockopt (int __fd, int __level, int __optname, + void *__restrict __optval, socklen_t * __restrict __optlen) +{ + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_getsockopt (__fd, __level, __optname, + __optval, __optlen); +} + +int +getsockopt (int __fd, int __level, int __optname, + void *__restrict __optval, socklen_t * __restrict __optlen) +{ + int rv; + pid_t pid = getpid (); + + if (is_vcom_socket_fd (__fd)) + { + rv = vcom_getsockopt (__fd, __level, __optname, __optval, __optlen); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] getsockopt: " + "'%04d'='%04d', '%04d', '%04d', " + "'%p', '%p'\n", + pid, rv, __fd, __level, __optname, __optval, __optlen); + if (rv != 0) + { + errno = -rv; + return -1; + } + return 0; + } + return libc_getsockopt (__fd, __level, __optname, __optval, __optlen); +} + +/* + * Set socket FD's option OPTNAME at protocol level LEVEL + * to *OPTVAL (which is OPTLEN bytes long). + * Returns 0 on success, -1 for errors. + * */ +int +vcom_setsockopt (int __fd, int __level, int __optname, + const void *__optval, socklen_t __optlen) +{ + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_setsockopt (__fd, __level, __optname, + __optval, __optlen); +} + +int +setsockopt (int __fd, int __level, int __optname, + const void *__optval, socklen_t __optlen) +{ + int rv; + pid_t pid = getpid (); + + if (is_vcom_socket_fd (__fd)) + { + rv = vcom_setsockopt (__fd, __level, __optname, __optval, __optlen); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] setsockopt: " + "'%04d'='%04d', '%04d', '%04d', " + "'%p', '%04d'\n", + pid, rv, __fd, __level, __optname, __optval, __optlen); + if (rv != 0) + { + errno = -rv; + return -1; + } + return 0; + } + return libc_setsockopt (__fd, __level, __optname, __optval, __optlen); +} + +/* + * Prepare to accept connections on socket FD. + * N connection requests will be queued before further + * requests are refused. + * Returns 0 on success, -1 for errors. + * */ +int +vcom_listen (int __fd, int __n) +{ + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_listen (__fd, __n); +} + +int +listen (int __fd, int __n) +{ + int rv; + pid_t pid = getpid (); + + if (is_vcom_socket_fd (__fd)) + { + rv = vcom_listen (__fd, __n); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] listen: " + "'%04d'='%04d', '%04d'\n", pid, rv, __fd, __n); + if (rv != 0) + { + errno = -rv; + return -1; + } + return 0; + } + return libc_listen (__fd, __n); +} + +/* + * Await a connection on socket FD. + * When a connection arrives, open a new socket to communicate + * with it, set *ADDR (which is *ADDR_LEN bytes long) to the address + * of the connecting peer and *ADDR_LEN to the address's actual + * length, and return the new socket's descriptor, or -1 for errors. + * This function is a cancellation point and therefore not marked + * with __THROW. + * */ +int +vcom_accept (int __fd, __SOCKADDR_ARG __addr, + socklen_t * __restrict __addr_len) +{ + + if (vcom_init () != 0) + { + return -1; + } + return vcom_socket_accept (__fd, __addr, __addr_len); +} + +int +accept (int __fd, __SOCKADDR_ARG __addr, socklen_t * __restrict __addr_len) +{ + int rv = -1; + pid_t pid = getpid (); + pthread_t tid = pthread_self (); + + if (is_vcom_socket_fd (__fd)) + { + if (VCOM_DEBUG > 0) + vcom_socket_main_show (); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d][%lu (0x%lx)] accept1: " + "'%04d'='%04d', '%p', '%p'\n", + pid, (unsigned long) tid, (unsigned long) tid, + rv, __fd, __addr, __addr_len); + rv = vcom_accept (__fd, __addr, __addr_len); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d][%lu (0x%lx)] accept2: " + "'%04d'='%04d', '%p', '%p'\n", + pid, (unsigned long) tid, (unsigned long) tid, + rv, __fd, __addr, __addr_len); + if (VCOM_DEBUG > 0) + vcom_socket_main_show (); + if (rv < 0) + { + errno = -rv; + return -1; + } + return rv; + } + return libc_accept (__fd, __addr, __addr_len); +} + +#ifdef __USE_GNU +/* + * Similar to 'accept' but takes an additional parameter to specify + * flags. + * This function is a cancellation point and therefore not marked + * with __THROW. + * */ +int +vcom_accept4 (int __fd, __SOCKADDR_ARG __addr, + socklen_t * __restrict __addr_len, int __flags) +{ + + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_accept4 (__fd, __addr, __addr_len, __flags); +} + +int +accept4 (int __fd, __SOCKADDR_ARG __addr, + socklen_t * __restrict __addr_len, int __flags) +{ + int rv; + pid_t pid = getpid (); + + if (is_vcom_socket_fd (__fd)) + { + if (VCOM_DEBUG > 0) + vcom_socket_main_show (); + rv = vcom_accept4 (__fd, __addr, __addr_len, __flags); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] accept4: " + "'%04d'='%04d', '%p', '%p', '%04x'\n", + pid, rv, __fd, __addr, __addr_len, __flags); + if (VCOM_DEBUG > 0) + vcom_socket_main_show (); + if (rv < 0) + { + errno = -rv; + return -1; + } + return rv; + } + return libc_accept4 (__fd, __addr, __addr_len, __flags); +} + +#endif + +/* + * Shut down all or part of the connection open on socket FD. + * HOW determines what to shut down: + * SHUT_RD = No more receptions; + * SHUT_WR = No more transmissions; + * SHUT_RDWR = No more receptions or transmissions. + * Returns 0 on success, -1 for errors. + * */ +int +vcom_shutdown (int __fd, int __how) +{ + if (vcom_init () != 0) + { + return -1; + } + return vcom_socket_shutdown (__fd, __how); +} + +int +shutdown (int __fd, int __how) +{ + int rv; + pid_t pid = getpid (); + + if (is_vcom_socket_fd (__fd)) + { + rv = vcom_shutdown (__fd, __how); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] shutdown: " + "'%04d'='%04d', '%04d'\n", pid, rv, __fd, __how); + if (rv != 0) + { + errno = -rv; + return -1; + } + return 0; + } + return libc_shutdown (__fd, __how); +} + +int +vcom_epoll_create (int __size) +{ + + if (vcom_init () != 0) + { + return -1; + } + + if (__size <= 0) + { + return -EINVAL; + } + + /* __size argument is ignored "thereafter" */ + return vcom_epoll_create1(0); +} + +/* + * __size argument is ignored, but must be greater than zero + */ +int +epoll_create (int __size) +{ + int rv = 0; + pid_t pid = getpid (); + + rv = vcom_epoll_create(__size); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] epoll_create: " + "'%04d'='%04d'\n", + pid, rv, __size); + if (rv < 0) + { + errno = -rv; + return -1; + } + return rv; +} + +int +vcom_epoll_create1 (int __flags) +{ + if (vcom_init () != 0) + { + return -1; + } + + if (__flags < 0) + { + return -EINVAL; + } + if (__flags & ~EPOLL_CLOEXEC) + { + return -EINVAL; + } + /* __flags can be either zero or EPOLL_CLOEXEC */ + /* implementation */ + return vcom_socket_epoll_create1(__flags); +} + +/* + * __flags can be either zero or EPOLL_CLOEXEC + * */ +int +epoll_create1 (int __flags) +{ + int rv = 0; + pid_t pid = getpid (); + + rv = vcom_epoll_create1(__flags); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] epoll_create: " + "'%04d'='%08x'\n", + pid, rv, __flags); + if (rv < 0) + { + errno = -rv; + return -1; + } + return rv; +} + +static inline int +ep_op_has_event (int op) +{ + return op != EPOLL_CTL_DEL; +} + +int +vcom_epoll_ctl (int __epfd, int __op, int __fd, + struct epoll_event *__event) +{ + if (vcom_init () != 0) + { + return -1; + } + + /* + * the requested operation __op is not supported + * by this interface */ + if (!((__op == EPOLL_CTL_ADD) || + (__op == EPOLL_CTL_MOD) || + (__op == EPOLL_CTL_DEL))) + { + return -EINVAL; + } + + /* op is ADD or MOD but event parameter is NULL */ + if ((ep_op_has_event (__op) && !__event)) + { + return -EFAULT; + } + + /* fd is same as epfd */ + /* do not permit adding an epoll file descriptor inside itself */ + if (__epfd == __fd) + { + return -EINVAL; + } + + /* implementation */ + return vcom_socket_epoll_ctl (__epfd, __op, __fd, + __event); +} + +/* + * implement the controller interface for epoll + * that enables the insertion/removal/change of + * file descriptors inside the interest set. + */ +int +epoll_ctl (int __epfd, int __op, int __fd, + struct epoll_event *__event) +{ + int rv; + pid_t pid = getpid (); + + if (is_vcom_epfd (__epfd)) + { + /* TBD: currently limiting epoll to support only vcom fds */ + if (is_vcom_socket_fd (__fd)) + { + rv = vcom_epoll_ctl (__epfd, __op, __fd, + __event); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] epoll_ctl: " + "'%04d'='%04d', '%04d', '%04d'\n", + pid, rv, __epfd, __op, __fd); + if (rv != 0) + { + errno = -rv; + return -1; + } + return 0; + } + else + { + /* + * TBD: currently epoll does not support kernel fds + * or epoll fds */ + errno = EBADF; + return -1; + } + } + else + { + /* epfd is not an epoll file descriptor */ + errno = EINVAL; + return -1; + } + return 0; +} + +int +vcom_epoll_wait (int __epfd, struct epoll_event *__events, + int __maxevents, int __timeout) +{ + if (vcom_init () != 0) + { + return -1; + } + + return vcom_epoll_pwait(__epfd, __events, + __maxevents, __timeout, + NULL); +} + +int +epoll_wait (int __epfd, struct epoll_event *__events, + int __maxevents, int __timeout) +{ + int rv; + pid_t pid = getpid (); + + if (__maxevents <=0 || __maxevents > EP_MAX_EVENTS) + { + errno = EINVAL; + return -1; + } + + if (is_vcom_epfd (__epfd)) + { + rv = vcom_epoll_wait (__epfd, __events, + __maxevents, __timeout); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] epoll_wait: " + "'%04d'='%04d', '%p', " + "'%04d', '%04d'\n", + pid, + rv, __epfd, __events, + __maxevents, __timeout); + if (rv < 0) + { + errno = -rv; + return -1; + } + return rv; + } + else + { + errno = EINVAL; + return -1; + } + return 0; +} + + +int +vcom_epoll_pwait (int __epfd, struct epoll_event *__events, + int __maxevents, int __timeout, + const __sigset_t *__ss) +{ + if (vcom_init () != 0) + { + return -1; + } + + /* implementation */ + return vcom_socket_epoll_pwait (__epfd, __events, + __maxevents, __timeout, + __ss); +} + +int +epoll_pwait (int __epfd, struct epoll_event *__events, + int __maxevents, int __timeout, + const __sigset_t *__ss) +{ + int rv; + pid_t pid = getpid (); + + if (__maxevents <=0 || __maxevents > EP_MAX_EVENTS) + { + errno = EINVAL; + return -1; + } + + if (is_vcom_epfd (__epfd)) + { + rv = vcom_epoll_pwait (__epfd, __events, + __maxevents, __timeout, + __ss); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] epoll_pwait: " + "'%04d'='%04d', '%p', " + "'%04d', '%04d', " + "'%p'\n", + pid, + rv, __epfd, __events, + __maxevents, __timeout, + __ss); + if (rv < 0) + { + errno = -rv; + return -1; + } + return rv; + } + else + { + errno = EINVAL; + return -1; + } + + return 0; +} + +/* Poll the file descriptors described by the NFDS structures starting at + FDS. If TIMEOUT is nonzero and not -1, allow TIMEOUT milliseconds for + an event to occur; if TIMEOUT is -1, block until an event occurs. + Returns the number of file descriptors with events, zero if timed out, + or -1 for errors. + + This function is a cancellation point and therefore not marked with + __THROW. */ +int +vcom_poll (struct pollfd *__fds, nfds_t __nfds, int __timeout) +{ + if (vcom_init () != 0) + { + return -1; + } + + return -EOPNOTSUPP; +} + +int +poll (struct pollfd *__fds, nfds_t __nfds, int __timeout) +{ + int rv = 0; + + errno = EOPNOTSUPP; + rv = -1; + return rv; +} + +#ifdef __USE_GNU +/* Like poll, but before waiting the threads signal mask is replaced + with that specified in the fourth parameter. For better usability, + the timeout value is specified using a TIMESPEC object. + + This function is a cancellation point and therefore not marked with + __THROW. */ +int vcom_ppoll (struct pollfd *__fds, nfds_t __nfds, + const struct timespec *__timeout, + const __sigset_t *__ss) +{ + if (vcom_init () != 0) + { + return -1; + } + + return -EOPNOTSUPP; +} + +int ppoll (struct pollfd *__fds, nfds_t __nfds, + const struct timespec *__timeout, + const __sigset_t *__ss) +{ + int rv = 0; + + errno = EOPNOTSUPP; + rv = -1; + return rv; +} +#endif + +void CONSTRUCTOR_ATTRIBUTE vcom_constructor (void); + +void DESTRUCTOR_ATTRIBUTE vcom_destructor (void); + +void +vcom_constructor (void) +{ + pid_t pid = getpid (); + + swrap_constructor (); + if (vcom_init () != 0) + { + printf ("\n[%d] vcom_constructor...failed!\n", pid); + } + else + { + printf ("\n[%d] vcom_constructor...done!\n", pid); + } +} + +/* + * This function is called when the library is unloaded + */ +void +vcom_destructor (void) +{ + pid_t pid = getpid (); + + vcom_destroy (); + swrap_destructor (); + printf ("\n[%d] vcom_destructor...done!\n", pid); +} + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/vcl-ldpreload/src/libvcl-ldpreload/vcom.h b/vcl-ldpreload/src/libvcl-ldpreload/vcom.h new file mode 100644 index 0000000..0cb68a3 --- /dev/null +++ b/vcl-ldpreload/src/libvcl-ldpreload/vcom.h @@ -0,0 +1,197 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_vcom_h +#define included_vcom_h + +/* VCOM DEBUG flag. Setting this to 1 or 0 turns off + ASSERT & other debugging code. */ +#ifndef VCOM_DEBUG +#define VCOM_DEBUG 0 +#endif + +#include <libvcl-ldpreload/vcom_glibc_socket.h> + +#define MAX_VCOM_APP_NAME 256 + +/* Returns 0 on success or -1 on error. */ +extern int vcom_set_app_name (char *__app_name); + +/* + * + * File descriptor based APIs + * + */ + +/* + * vpp implementation of glibc APIs from <unistd.h> + */ +extern int vcom_close (int __fd); + +extern ssize_t __wur vcom_read (int __fd, void *__buf, size_t __nbytes); + +extern ssize_t __wur vcom_write (int __fd, const void *__buf, size_t __n); + +extern ssize_t __wur vcom_readv (int __fd, const struct iovec * __iov, int __iovcnt); + +extern ssize_t __wur vcom_writev (int __fd, const struct iovec * __iov, int __iovcnt); + +/* + * vpp implementation of glibc APIs from <fcntl.h> + */ +extern int vcom_fcntl (int __fd, int __cmd, ...); + +/* + * vpp implementation of glibc APIs from <sys/select.h> + */ +extern int +vcom_select (int __nfds, fd_set * __restrict __readfds, + fd_set * __restrict __writefds, + fd_set * __restrict __exceptfds, + struct timeval *__restrict __timeout); + +#ifdef __USE_XOPEN2K +extern int +vcom_pselect (int __nfds, fd_set * __restrict __readfds, + fd_set * __restrict __writefds, + fd_set * __restrict __exceptfds, + const struct timespec *__restrict __timeout, + const __sigset_t * __restrict __sigmask); +#endif + +/* + * vpp implementation of glibc APIs from <sys/socket.h> + */ +extern int __THROW vcom_socket (int __domain, int __type, int __protocol); + +/* On Linux, the only supported domain for this call is AF_UNIX +* (or synonymously, AF_LOCAL). Most implementations have the +* same restriction. +* vpp does not implement AF_UNIX domain in this release. +* */ +extern int __THROW +vcom_socketpair (int __domain, int __type, int __protocol, int __fds[2]); + +extern int __THROW +vcom_bind (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len); + +extern int __THROW +vcom_getsockname (int __fd, __SOCKADDR_ARG __addr, + socklen_t * __restrict __len); + +extern int +vcom_connect (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len); + +extern int __THROW +vcom_getpeername (int __fd, __SOCKADDR_ARG __addr, + socklen_t * __restrict __len); + +extern ssize_t +vcom_send (int __fd, const void *__buf, size_t __n, int __flags); + +extern ssize_t vcom_recv (int __fd, void *__buf, size_t __n, int __flags); + +extern ssize_t +vcom_sendto (int __fd, const void *__buf, size_t __n, + int __flags, __CONST_SOCKADDR_ARG __addr, socklen_t __addr_len); + +extern ssize_t +vcom_recvfrom (int __fd, void *__restrict __buf, + size_t __n, int __flags, + __SOCKADDR_ARG __addr, socklen_t * __restrict __addr_len); + +extern ssize_t +vcom_sendmsg (int __fd, const struct msghdr *__message, int __flags); + +#ifdef __USE_GNU +extern int +sendmmsg (int __fd, struct mmsghdr *__vmessages, + unsigned int __vlen, int __flags); +#endif + +extern ssize_t vcom_recvmsg (int __fd, struct msghdr *__message, int __flags); + +#ifdef __USE_GNU +extern int +vcom_recvmmsg (int __fd, struct mmsghdr *__vmessages, + unsigned int __vlen, int __flags, struct timespec *__tmo); +#endif + +extern int __THROW +vcom_getsockopt (int __fd, int __level, int __optname, + void *__restrict __optval, socklen_t * __restrict __optlen); + +extern int __THROW +vcom_setsockopt (int __fd, int __level, int __optname, + const void *__optval, socklen_t __optlen); + +extern int __THROW vcom_listen (int __fd, int __n); + +extern int +vcom_accept (int __fd, __SOCKADDR_ARG __addr, + socklen_t * __restrict __addr_len); + +#ifdef __USE_GNU +/* + * Similar to 'accept' but takes an additional parameter to specify + * flags. + * */ +/* TBD: implemented later */ +extern int +vcom_accept4 (int __fd, __SOCKADDR_ARG __addr, + socklen_t * __restrict __addr_len, int __flags); +#endif + +extern int __THROW vcom_shutdown (int __fd, int __how); + +extern int __THROW +vcom_epoll_create (int __size); + +extern int __THROW +vcom_epoll_create1 (int __flags); + +extern int __THROW +vcom_epoll_ctl (int __epfd, int __op, int __fd, + struct epoll_event *__event); + +extern int +vcom_epoll_wait (int __epfd, struct epoll_event *__events, + int __maxevents, int __timeout); + +extern int +vcom_epoll_pwait (int __epfd, struct epoll_event *__events, + int __maxevents, int __timeout, + const __sigset_t *__ss); + +extern int +vcom_poll (struct pollfd *__fds, nfds_t __nfds, int __timeout); + +#ifdef __USE_GNU +extern int +vcom_ppoll (struct pollfd *__fds, nfds_t __nfds, + const struct timespec *__timeout, + const __sigset_t *__ss); +#endif + + +#endif /* included_vcom_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/vcl-ldpreload/src/libvcl-ldpreload/vcom_glibc_socket.h b/vcl-ldpreload/src/libvcl-ldpreload/vcom_glibc_socket.h new file mode 100644 index 0000000..ecad24a --- /dev/null +++ b/vcl-ldpreload/src/libvcl-ldpreload/vcom_glibc_socket.h @@ -0,0 +1,354 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_vcom_glibc_socket_h +#define included_vcom_glibc_socket_h + +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/select.h> +#include <arpa/inet.h> +#include <fcntl.h> + +#include <sys/epoll.h> +#include <poll.h> + +/* + * + * Generic glibc fd api + * + */ +/* + * glibc APIs from <unistd.h> + */ + +/* Close the file descriptor FD. + + This function is a cancellation point and therefore not marked with + __THROW. */ +extern int close (int __fd); + +/* Read NBYTES into BUF from FD. Return the + number read, -1 for errors or 0 for EOF. + + This function is a cancellation point and therefore not marked with + __THROW. */ +extern ssize_t __wur read (int __fd, void *__buf, size_t __nbytes); + +/* Write N bytes of BUF to FD. Return the number written, or -1. + + This function is a cancellation point and therefore not marked with + __THROW. */ +extern ssize_t __wur write (int __fd, const void *__buf, size_t __n); + + +/* + * glibc APIs from <fcntl.h> + */ + +/* Do the file control operation described by CMD on FD. + The remaining arguments are interpreted depending on CMD. + + This function is a cancellation point and therefore not marked with + __THROW. */ +extern int fcntl (int __fd, int __cmd, ...); + + +/* + * glibc APIs from <sys/select.h> + */ + +/* Check the first NFDS descriptors each in READFDS (if not NULL) for read + readiness, in WRITEFDS (if not NULL) for write readiness, and in EXCEPTFDS + (if not NULL) for exceptional conditions. If TIMEOUT is not NULL, time out + after waiting the interval specified therein. Returns the number of ready + descriptors, or -1 for errors. + + + This function is a cancellation point and therefore not marked with + __THROW. */ +extern int +select (int __nfds, fd_set * __restrict __readfds, + fd_set * __restrict __writefds, + fd_set * __restrict __exceptfds, + struct timeval *__restrict __timeout); + +#ifdef __USE_XOPEN2K +/* Same as above only that the TIMEOUT value is given with higher + resolution and a sigmask which is been set temporarily. This version + should be used. + + This function is a cancellation point and therefore not marked with + __THROW. */ +extern int +pselect (int __nfds, fd_set * __restrict __readfds, + fd_set * __restrict __writefds, + fd_set * __restrict __exceptfds, + const struct timespec *__restrict __timeout, + const __sigset_t * __restrict __sigmask); +#endif + + +/* + * + * Socket specific glibc api + * + */ + +/* + * glibc APIs from <sys/socket.h> + */ + +/* Create a new socket of type TYPE in domain DOMAIN, using + protocol PROTOCOL. If PROTOCOL is zero, one is chosen automatically. + Returns a file descriptor for the new socket, or -1 for errors. */ +extern int __THROW socket (int __domain, int __type, int __protocol); + +/* Create two new sockets, of type TYPE in domain DOMAIN and using + protocol PROTOCOL, which are connected to each other, and put file + descriptors for them in FDS[0] and FDS[1]. If PROTOCOL is zero, + one will be chosen automatically. Returns 0 on success, -1 for errors. */ +extern int __THROW +socketpair (int __domain, int __type, int __protocol, int __fds[2]); + +/* Give the socket FD the local address ADDR (which is LEN bytes long). */ +extern int __THROW +bind (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len); + +/* Put the local address of FD into *ADDR and its length in *LEN. */ +extern int __THROW +getsockname (int __fd, __SOCKADDR_ARG __addr, socklen_t * __restrict __len); + +/* Open a connection on socket FD to peer at ADDR (which LEN bytes long). + For connectionless socket types, just set the default address to send to + and the only address from which to accept transmissions. + Return 0 on success, -1 for errors. + + This function is a cancellation point and therefore not marked with + __THROW. */ +extern int connect (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len); + +/* Put the address of the peer connected to socket FD into *ADDR + (which is *LEN bytes long), and its actual length into *LEN. */ +extern int __THROW +getpeername (int __fd, __SOCKADDR_ARG __addr, socklen_t * __restrict __len); + +/* Send N bytes of BUF to socket FD. Returns the number sent or -1. + + This function is a cancellation point and therefore not marked with + __THROW. */ +extern ssize_t send (int __fd, const void *__buf, size_t __n, int __flags); + +/* Read N bytes into BUF from socket FD. + Returns the number read or -1 for errors. + + This function is a cancellation point and therefore not marked with + __THROW. */ +extern ssize_t recv (int __fd, void *__buf, size_t __n, int __flags); + +/* Send N bytes of BUF on socket FD to peer at address ADDR (which is + ADDR_LEN bytes long). Returns the number sent, or -1 for errors. + + This function is a cancellation point and therefore not marked with + __THROW. */ +extern ssize_t +sendto (int __fd, const void *__buf, size_t __n, + int __flags, __CONST_SOCKADDR_ARG __addr, socklen_t __addr_len); + +/* Read N bytes into BUF through socket FD. + If ADDR is not NULL, fill in *ADDR_LEN bytes of it with tha address of + the sender, and store the actual size of the address in *ADDR_LEN. + Returns the number of bytes read or -1 for errors. + + This function is a cancellation point and therefore not marked with + __THROW. */ +extern ssize_t +recvfrom (int __fd, void *__restrict __buf, + size_t __n, int __flags, + __SOCKADDR_ARG __addr, socklen_t * __restrict __addr_len); + +/* Send a message described MESSAGE on socket FD. + Returns the number of bytes sent, or -1 for errors. + + This function is a cancellation point and therefore not marked with + __THROW. */ +extern ssize_t +sendmsg (int __fd, const struct msghdr *__message, int __flags); + +#ifdef __USE_GNU +/* Send a VLEN messages as described by VMESSAGES to socket FD. + Returns the number of datagrams successfully written or -1 for errors. + + This function is a cancellation point and therefore not marked with + __THROW. */ +extern int +sendmmsg (int __fd, struct mmsghdr *__vmessages, + unsigned int __vlen, int __flags); +#endif + +/* Receive a message as described by MESSAGE from socket FD. + Returns the number of bytes read or -1 for errors. + + This function is a cancellation point and therefore not marked with + __THROW. */ +extern ssize_t recvmsg (int __fd, struct msghdr *__message, int __flags); + +#ifdef __USE_GNU +/* Receive up to VLEN messages as described by VMESSAGES from socket FD. + Returns the number of messages received or -1 for errors. + + This function is a cancellation point and therefore not marked with + __THROW. */ +extern int +recvmmsg (int __fd, struct mmsghdr *__vmessages, + unsigned int __vlen, int __flags, struct timespec *__tmo); +#endif + + +/* Put the current value for socket FD's option OPTNAME at protocol level LEVEL + into OPTVAL (which is *OPTLEN bytes long), and set *OPTLEN to the value's + actual length. Returns 0 on success, -1 for errors. */ +extern int __THROW +getsockopt (int __fd, int __level, int __optname, + void *__restrict __optval, socklen_t * __restrict __optlen); + +/* Set socket FD's option OPTNAME at protocol level LEVEL + to *OPTVAL (which is OPTLEN bytes long). + Returns 0 on success, -1 for errors. */ +extern int __THROW +setsockopt (int __fd, int __level, int __optname, + const void *__optval, socklen_t __optlen); + +/* Prepare to accept connections on socket FD. + N connection requests will be queued before further requests are refused. + Returns 0 on success, -1 for errors. */ +extern int __THROW listen (int __fd, int __n); + +/* Await a connection on socket FD. + When a connection arrives, open a new socket to communicate with it, + set *ADDR (which is *ADDR_LEN bytes long) to the address of the connecting + peer and *ADDR_LEN to the address's actual length, and return the + new socket's descriptor, or -1 for errors. + + This function is a cancellation point and therefore not marked with + __THROW. */ +extern int +accept (int __fd, __SOCKADDR_ARG __addr, socklen_t * __restrict __addr_len); + +#ifdef __USE_GNU +/* Similar to 'accept' but takes an additional parameter to specify flags. + + This function is a cancellation point and therefore not marked with + __THROW. */ + /* TBD: implemented later */ +extern int +accept4 (int __fd, __SOCKADDR_ARG __addr, + socklen_t * __restrict __addr_len, int __flags); +#endif + +/* Shut down all or part of the connection open on socket FD. + HOW determines what to shut down: + SHUT_RD = No more receptions; + SHUT_WR = No more transmissions; + SHUT_RDWR = No more receptions or transmissions. + Returns 0 on success, -1 for errors. */ +extern int __THROW shutdown (int __fd, int __how); + + +/* + * glibc APIs from <sys/epoll.h> + */ + +/* Creates an epoll instance. Returns an fd for the new instance. + The "size" parameter is a hint specifying the number of file + descriptors to be associated with the new instance. The fd + returned by epoll_create() should be closed with close(). */ +extern int __THROW +epoll_create (int __size); + +/* Same as epoll_create but with an FLAGS parameter. The unused SIZE + parameter has been dropped. */ +extern int __THROW +epoll_create1 (int __flags); + +/* Manipulate an epoll instance "epfd". Returns 0 in case of success, + -1 in case of error ( the "errno" variable will contain the + specific error code ) The "op" parameter is one of the EPOLL_CTL_* + constants defined above. The "fd" parameter is the target of the + operation. The "event" parameter describes which events the caller + is interested in and any associated user data. */ +extern int __THROW +epoll_ctl (int __epfd, int __op, int __fd, + struct epoll_event *__event); + +#define EP_INT_MAX ((int)(~0U>>1)) +#define EP_MAX_EVENTS (EP_INT_MAX / sizeof(struct epoll_event)) + +/* Wait for events on an epoll instance "epfd". Returns the number of + triggered events returned in "events" buffer. Or -1 in case of + error with the "errno" variable set to the specific error code. The + "events" parameter is a buffer that will contain triggered + events. The "maxevents" is the maximum number of events to be + returned ( usually size of "events" ). The "timeout" parameter + specifies the maximum wait time in milliseconds (-1 == infinite). + + This function is a cancellation point and therefore not marked with + __THROW. */ +extern int +epoll_wait (int __epfd, struct epoll_event *__events, + int __maxevents, int __timeout); + +/* Same as epoll_wait, but the thread's signal mask is temporarily + and atomically replaced with the one provided as parameter. + + This function is a cancellation point and therefore not marked with + __THROW. */ +extern int +epoll_pwait (int __epfd, struct epoll_event *__events, + int __maxevents, int __timeout, + const __sigset_t *__ss); + +/* Poll the file descriptors described by the NFDS structures starting at + FDS. If TIMEOUT is nonzero and not -1, allow TIMEOUT milliseconds for + an event to occur; if TIMEOUT is -1, block until an event occurs. + Returns the number of file descriptors with events, zero if timed out, + or -1 for errors. + + This function is a cancellation point and therefore not marked with + __THROW. */ +extern int poll (struct pollfd *__fds, nfds_t __nfds, int __timeout); + +#ifdef __USE_GNU +/* Like poll, but before waiting the threads signal mask is replaced + with that specified in the fourth parameter. For better usability, + the timeout value is specified using a TIMESPEC object. + + This function is a cancellation point and therefore not marked with + __THROW. */ +extern int ppoll (struct pollfd *__fds, nfds_t __nfds, + const struct timespec *__timeout, + const __sigset_t *__ss); +#endif + + +#endif /* included_vcom_glibc_socket_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/vcl-ldpreload/src/libvcl-ldpreload/vcom_socket.c b/vcl-ldpreload/src/libvcl-ldpreload/vcom_socket.c new file mode 100644 index 0000000..89c28fe --- /dev/null +++ b/vcl-ldpreload/src/libvcl-ldpreload/vcom_socket.c @@ -0,0 +1,2950 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <unistd.h> +#include <stdio.h> +#include <sys/uio.h> +#include <limits.h> +#define __need_IOV_MAX +#include <bits/stdio_lim.h> + +#include <vppinfra/types.h> +#include <vppinfra/hash.h> +#include <vppinfra/pool.h> + +#include <libvcl-ldpreload/vcom_socket.h> +#include <libvcl-ldpreload/vcom_socket_wrapper.h> +#include <libvcl-ldpreload/vcom.h> + +#include <uri/vppcom.h> + + +/* + * VCOM_SOCKET Private definitions and functions. + */ + +typedef struct vcom_socket_main_t_ +{ + u8 init; + + /* vcom_socket pool */ + vcom_socket_t *vsockets; + + /* Hash table for socketidx to fd mapping */ + uword *sockidx_by_fd; + + /* vcom_epoll pool */ + vcom_epoll_t *vepolls; + + /* Hash table for epollidx to epfd mapping */ + uword *epollidx_by_epfd; + + + /* common epitem poll for all epfd */ + /* TBD: epitem poll per epfd */ + /* vcom_epitem pool */ + vcom_epitem_t *vepitems; + + /* Hash table for epitemidx to epfdfd mapping */ + uword *epitemidx_by_epfdfd; + + /* Hash table - key:epfd, value:vec of epitemidx */ + uword *epitemidxs_by_epfd; + /* Hash table - key:fd, value:vec of epitemidx */ + uword *epitemidxs_by_fd; + +} vcom_socket_main_t; + +vcom_socket_main_t vcom_socket_main; + + +static int +vcom_socket_open_socket (int domain, int type, int protocol) +{ + int rv = -1; + + /* handle domains implemented by vpp */ + switch (domain) + { + case AF_INET: + case AF_INET6: + /* get socket type and + * handle the socket types supported by vpp */ + switch (type & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) + { + case SOCK_STREAM: + case SOCK_DGRAM: + /* the type argument serves a second purpose, + * in addition to specifying a socket type, + * it may include the bitwise OR of any of + * SOCK_NONBLOCK and SOCK_CLOEXEC, to modify + * the behavior of socket. */ + rv = libc_socket (domain, type, protocol); + if (rv == -1) + rv = -errno; + break; + + default: + break; + } + + break; + + default: + break; + } + + return rv; +} + +static int +vcom_socket_open_epoll (int flags) +{ + int rv = -1; + + if (flags < 0) + { + return -EINVAL; + } + if (flags && (flags & ~EPOLL_CLOEXEC)) + { + return -EINVAL; + } + + /* flags can be either zero or EPOLL_CLOEXEC */ + rv = libc_epoll_create1 (flags); + if (rv == -1) + rv = -errno; + + return rv; +} + +static int +vcom_socket_close_socket (int fd) +{ + int rv; + + rv = libc_close (fd); + if (rv == -1) + rv = -errno; + + return rv; +} + +static int +vcom_socket_close_epoll (int epfd) +{ + int rv; + + rv = libc_close (epfd); + if (rv == -1) + rv = -errno; + + return rv; +} + +/* + * Public API functions + */ + + +int +vcom_socket_is_vcom_fd (int fd) +{ + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + p = hash_get (vsm->sockidx_by_fd, fd); + + if (p) + { + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (vsock && vsock->type == SOCKET_TYPE_VPPCOM_BOUND) + return 1; + } + return 0; +} + +int +vcom_socket_is_vcom_epfd (int epfd) +{ + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_epoll_t *vepoll; + + p = hash_get (vsm->epollidx_by_epfd, epfd); + + if (p) + { + vepoll = pool_elt_at_index (vsm->vepolls, p[0]); + if (vepoll && vepoll->type == EPOLL_TYPE_VPPCOM_BOUND) + return 1; + } + return 0; +} + +static inline int +vcom_socket_get_sid (int fd) +{ + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + p = hash_get (vsm->sockidx_by_fd, fd); + + if (p) + { + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (vsock && vsock->type == SOCKET_TYPE_VPPCOM_BOUND) + return vsock->sid; + } + return INVALID_SESSION_ID; +} + +static inline int +vcom_socket_get_vep_idx (int epfd) +{ + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_epoll_t *vepoll; + + p = hash_get (vsm->epollidx_by_epfd, epfd); + + if (p) + { + vepoll = pool_elt_at_index (vsm->vepolls, p[0]); + if (vepoll && vepoll->type == EPOLL_TYPE_VPPCOM_BOUND) + return vepoll->vep_idx; + } + return INVALID_VEP_IDX; +} + +static inline int +vcom_socket_get_sid_and_vsock (int fd, vcom_socket_t **vsockp) +{ + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + p = hash_get (vsm->sockidx_by_fd, fd); + + if (p) + { + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (vsock && vsock->type == SOCKET_TYPE_VPPCOM_BOUND) + { + *vsockp = vsock; + return vsock->sid; + } + } + return INVALID_SESSION_ID; +} + +static inline int +vcom_socket_get_vep_idx_and_vepoll (int epfd, vcom_epoll_t **vepollp) +{ + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_epoll_t *vepoll; + + p = hash_get (vsm->epollidx_by_epfd, epfd); + + if (p) + { + vepoll = pool_elt_at_index (vsm->vepolls, p[0]); + if (vepoll && vepoll->type == EPOLL_TYPE_VPPCOM_BOUND) + { + *vepollp = vepoll; + return vepoll->vep_idx; + } + } + return INVALID_VEP_IDX; +} + + +static int +vcom_socket_close_vepoll (int epfd) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_epoll_t *vepoll; + + p = hash_get (vsm->epollidx_by_epfd, epfd); + if (!p) + return -EBADF; + + vepoll = pool_elt_at_index (vsm->vepolls, p[0]); + if (!vepoll) + return -EBADF; + + if (vepoll->type != EPOLL_TYPE_VPPCOM_BOUND) + return -EINVAL; + + if (vepoll->count) + { + if (!vepoll->close) + { + vepoll->close = 1; + return 0; + } + else + { + return -EBADF; + } + } + + /* count is zero */ + rv = vppcom_session_close (vepoll->vep_idx); + rv = vcom_socket_close_epoll (vepoll->epfd); + + vepoll_init (vepoll); + hash_unset (vsm->epollidx_by_epfd, epfd); + pool_put (vsm->vepolls, vepoll); + + return rv; +} + +static int +vcom_socket_close_vsock (int fd) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + vcom_epitem_t *vepitem; + + i32 *vepitemidxs = 0; + i32 *vepitemidxs_var = 0; + + p = hash_get (vsm->sockidx_by_fd, fd); + if (!p) + return -EBADF; + + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (!vsock) + return -ENOTSOCK; + + if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND) + return -EINVAL; + + rv = vppcom_session_close (vsock->sid); + rv = vcom_socket_close_socket (vsock->fd); + + vsocket_init (vsock); + hash_unset (vsm->sockidx_by_fd, fd); + pool_put (vsm->vsockets, vsock); + + /* + * NOTE: + * Before calling close(), user should remove + * this fd from the epoll-set of all epoll instances, + * otherwise resource(epitems) leaks ensues. + */ + + /* + * 00. close all epoll instances that are marked as "close" + * of which this fd is the "last" remaining member. + * 01. epitems associated with this fd are intentionally + * not removed, see NOTE: above. + * */ + + /* does this fd participate in epoll */ + p = hash_get (vsm->epitemidxs_by_fd, fd); + if (p) + { + vepitemidxs = *(i32 **)p; + vec_foreach (vepitemidxs_var, vepitemidxs) + { + vepitem = pool_elt_at_index (vsm->vepitems, vepitemidxs_var[0]); + if (vepitem && vepitem->fd == fd && + vepitem->type == FD_TYPE_VCOM_SOCKET) + { + i32 vep_idx; + vcom_epoll_t *vepoll; + if ((vep_idx = + vcom_socket_get_vep_idx_and_vepoll (vepitem->epfd, &vepoll)) != INVALID_VEP_IDX) + { + if (vepoll->close) + { + if (vepoll->count == 1) + { + /* + * force count to zero and + * close this epoll instance + * */ + vepoll->count = 0; + vcom_socket_close_vepoll (vepoll->epfd); + } + else + { + vepoll->count -= 1; + } + } + } + } + + } + } + + return rv; +} + +int +vcom_socket_close (int __fd) +{ + int rv; + + if (vcom_socket_is_vcom_fd (__fd)) + { + rv = vcom_socket_close_vsock (__fd); + } + else if (vcom_socket_is_vcom_epfd (__fd)) + { + rv = vcom_socket_close_vepoll (__fd); + } + else + { + rv = -EBADF; + } + + return rv; +} + +ssize_t +vcom_socket_read (int __fd, void *__buf, size_t __nbytes) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (!p) + return -EBADF; + + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (!vsock) + return -ENOTSOCK; + + if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND) + return -EINVAL; + + if (!__buf || __nbytes < 0) + { + return -EINVAL; + } + + rv = vcom_fcntl (__fd, F_GETFL, 0); + if (rv < 0) + { + return rv; + + } + + /* is blocking */ + if (!(rv & O_NONBLOCK)) + { + do + { + rv = vppcom_session_read (vsock->sid, __buf, __nbytes); + } + while (rv == -EAGAIN || rv == -EWOULDBLOCK); + return rv; + } + /* The file descriptor refers to a socket and has been + * marked nonblocking(O_NONBLOCK) and the read would + * block. + * */ + /* is non blocking */ + rv = vppcom_session_read (vsock->sid, __buf, __nbytes); + return rv; +} + +ssize_t +vcom_socket_readv (int __fd, const struct iovec * __iov, int __iovcnt) +{ + int rv; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + ssize_t total = 0, len = 0; + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (!p) + return -EBADF; + + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (!vsock) + return -ENOTSOCK; + + if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND) + return -EINVAL; + + if (__iov == 0 || __iovcnt == 0 || __iovcnt > IOV_MAX) + return -EINVAL; + + /* Sanity check */ + for (int i = 0; i < __iovcnt; ++i) + { + if (SSIZE_MAX - len < __iov[i].iov_len) + return -EINVAL; + len += __iov[i].iov_len; + } + + rv = vcom_fcntl (__fd, F_GETFL, 0); + if (rv < 0) + { + return rv; + } + + /* is blocking */ + if (!(rv & O_NONBLOCK)) + { + do + { + for (int i = 0; i < __iovcnt; ++i) + { + rv = vppcom_session_read (vsock->sid, __iov[i].iov_base, + __iov[i].iov_len); + if (rv < 0) + break; + else + { + total += rv; + if (rv < __iov[i].iov_len) + /* Read less than buffer provided, no point to continue */ + break; + } + } + } + while ((rv == -EAGAIN || rv == -EWOULDBLOCK) && total == 0); + return total; + } + + /* is non blocking */ + for (int i = 0; i < __iovcnt; ++i) + { + rv = vppcom_session_read (vsock->sid, __iov[i].iov_base, + __iov[i].iov_len); + if (rv < 0) + { + if (total > 0) + break; + else + { + errno = rv; + return rv; + } + } + else + { + total += rv; + if (rv < __iov[i].iov_len) + /* Read less than buffer provided, no point to continue */ + break; + } + } + return total; +} + +ssize_t +vcom_socket_write (int __fd, const void *__buf, size_t __n) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (!p) + return -EBADF; + + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (!vsock) + return -ENOTSOCK; + + if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND) + return -EINVAL; + + if (!__buf || __n < 0) + { + return -EINVAL; + } + + rv = vppcom_session_write (vsock->sid, (void *) __buf, __n); + return rv; +} + +ssize_t +vcom_socket_writev (int __fd, const struct iovec * __iov, int __iovcnt) +{ + int rv = -1; + ssize_t total = 0; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (!p) + return -EBADF; + + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (!vsock) + return -ENOTSOCK; + + if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND) + return -EINVAL; + + if (__iov == 0 || __iovcnt == 0 || __iovcnt > IOV_MAX) + return -EINVAL; + + for (int i = 0; i < __iovcnt; ++i) + { + rv = vppcom_session_write (vsock->sid, __iov[i].iov_base, + __iov[i].iov_len); + if (rv < 0) + { + if (total > 0) + break; + else + return rv; + } + else + total += rv; + } + return total; +} + +/* + * RETURN: 0 - invalid cmd + * 1 - cmd not handled by vcom and vppcom + * 2 - cmd handled by vcom socket resource + * 3 - cmd handled by vppcom + * */ +/* TBD: incomplete list of cmd */ +static int +vcom_socket_check_fcntl_cmd (int __cmd) +{ + switch (__cmd) + { + /*cmd not handled by vcom and vppcom */ + /* Fallthrough */ + case F_DUPFD: + case F_DUPFD_CLOEXEC: + return 1; + + /* cmd handled by vcom socket resource */ + /* Fallthrough */ + case F_GETFD: + case F_SETFD: + case F_GETFL: + case F_SETFL: + case F_GETLK: + case F_SETLK: + case F_SETLKW: + case F_GETOWN: + case F_SETOWN: + return 2; + +#if 0 + /* cmd handled by vppcom */ + case F_XXXXX: + return 3; +#endif + /* invalid cmd */ + default: + return 0; + } + return 0; +} + +/* TBD: move it to vppcom */ +static int +vppcom_session_fcntl_va (int __fd, int __cmd, va_list __ap) +{ + int rv; + + rv = -EINVAL; + + return rv; +} + +int +vcom_socket_fcntl_va (int __fd, int __cmd, va_list __ap) +{ + int rv = -EBADF; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (!p) + return -EBADF; + + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (!vsock) + return -ENOTSOCK; + + if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND) + return -EINVAL; + + switch (vcom_socket_check_fcntl_cmd (__cmd)) + { + /* invalid cmd */ + case 0: + rv = -EBADF; + break; + /*cmd not handled by vcom and vppcom */ + case 1: + rv = -EBADF; + break; + /* cmd handled by vcom socket resource */ + case 2: + rv = libc_vfcntl (vsock->fd, __cmd, __ap); + break; + /* cmd handled by vppcom */ + case 3: + rv = vppcom_session_fcntl_va (vsock->sid, __cmd, __ap); + break; + + default: + rv = -EINVAL; + break; + } + + return rv; +} + +static inline int +vcom_socket_fds_2_sid_fds ( + /* dest */ + int *vcom_nsid_fds, + fd_set * __restrict vcom_rd_sid_fds, + fd_set * __restrict vcom_wr_sid_fds, + fd_set * __restrict vcom_ex_sid_fds, + /* src */ + int vcom_nfds, + fd_set * __restrict vcom_readfds, + fd_set * __restrict vcom_writefds, + fd_set * __restrict vcom_exceptfds) +{ + int rv = 0; + int fd; + int sid; + /* invalid max_sid is -1 */ + int max_sid = -1; + int nsid = 0; + + /* + * set sid in sid sets corresponding to fd's in fd sets + * compute nsid and vcom_nsid_fds from sid sets + */ + + for (fd = 0; fd < vcom_nfds; fd++) + { + /* + * F fd set, src + * S sid set, dest + */ +#define _(S,F) \ + if ((F) && (S) && FD_ISSET (fd, (F))) \ + { \ + sid = vcom_socket_get_sid (fd); \ + if (sid != INVALID_SESSION_ID) \ + { \ + FD_SET (sid, (S)); \ + if (sid > max_sid) \ + { \ + max_sid = sid; \ + } \ + ++nsid; \ + } \ + else \ + { \ + rv = -EBADFD; \ + goto done; \ + } \ + } + + + _(vcom_rd_sid_fds, vcom_readfds); + _(vcom_wr_sid_fds, vcom_writefds); + _(vcom_ex_sid_fds, vcom_exceptfds); +#undef _ + } + + *vcom_nsid_fds = max_sid != -1 ? max_sid + 1 : 0; + rv = nsid; + +done: + return rv; +} + +/* + * PRE: 00. sid sets were derived from fd sets + * 01. sid sets were updated with sids that actually changed + * status + * 02. fd sets still has watched fds + * + * This function will modify in place fd sets to indicate which fd's + * actually changed status(inferred from sid sets) + */ +static inline int +vcom_socket_sid_fds_2_fds ( + /* dest */ + int *new_vcom_nfds, + int vcom_nfds, + fd_set * __restrict vcom_readfds, + fd_set * __restrict vcom_writefds, + fd_set * __restrict vcom_exceptfds, + /* src */ + int vcom_nsid_fds, + fd_set * __restrict vcom_rd_sid_fds, + fd_set * __restrict vcom_wr_sid_fds, + fd_set * __restrict vcom_ex_sid_fds) +{ + int rv = 0; + int fd; + int sid; + /* invalid max_fd is -1 */ + int max_fd = -1; + int nfd = 0; + + + /* + * modify in place fd sets to indicate which fd's + * actually changed status(inferred from sid sets) + */ + for (fd = 0; fd < vcom_nfds; fd++) + { + /* + * F fd set, dest + * S sid set, src + */ +#define _(S,F) \ + if ((F) && (S) && FD_ISSET (fd, (F))) \ + { \ + sid = vcom_socket_get_sid (fd); \ + if (sid != INVALID_SESSION_ID) \ + { \ + if (!FD_ISSET (sid, (S))) \ + { \ + FD_CLR(fd, (F)); \ + } \ + } \ + else \ + { \ + rv = -EBADFD; \ + goto done; \ + } \ + } + + + _(vcom_rd_sid_fds, vcom_readfds); + _(vcom_wr_sid_fds, vcom_writefds); + _(vcom_ex_sid_fds, vcom_exceptfds); +#undef _ + } + + /* + * compute nfd and new_vcom_nfds from fd sets + */ + for (fd = 0; fd < vcom_nfds; fd++) + { + +#define _(F) \ + if ((F) && FD_ISSET (fd, (F))) \ + { \ + if (fd > max_fd) \ + { \ + max_fd = fd; \ + } \ + ++nfd; \ + } + + + _(vcom_readfds); + _(vcom_writefds); + _(vcom_exceptfds); +#undef _ + + } + + *new_vcom_nfds = max_fd != -1 ? max_fd + 1 : 0; + rv = nfd; + +done: + return rv; +} + +/* + * PRE: + * vom_socket_select is always called with + * timeout->tv_sec and timeout->tv_usec set to zero. + * hence vppcom_select return immediately. + */ +/* + * TBD: do{body;} while(timeout conditional); timeout loop + */ +int +vcom_socket_select (int vcom_nfds, fd_set * __restrict vcom_readfds, + fd_set * __restrict vcom_writefds, + fd_set * __restrict vcom_exceptfds, + struct timeval *__restrict timeout) +{ + int rv = -EBADF; + pid_t pid = getpid (); + + int new_vcom_nfds = 0; + int new_vcom_nfd = 0; + + /* vcom sid fds */ + fd_set vcom_rd_sid_fds; + fd_set vcom_wr_sid_fds; + fd_set vcom_ex_sid_fds; + unsigned long vcom_nsid_fds = 0; + int vcom_nsid = 0; + + /* in seconds eg. 3.123456789 seconds */ + double time_to_wait = (double) 0; + + /* validate inputs */ + if (vcom_nfds < 0) + { + return -EINVAL; + } + + /* convert timeval timeout to double time_to_wait */ + if (timeout) + { + if (timeout->tv_sec == 0 && timeout->tv_usec == 0) + { + /* polling: vppcom_select returns immediately */ + time_to_wait = (double) 0; + } + else + { + /*TBD: use timeval api */ + time_to_wait = (double) timeout->tv_sec + + (double) timeout->tv_usec / (double) 1000000 + + (double) (timeout->tv_usec % 1000000) / (double) 1000000; + } + } + else + { + /* + * no timeout: vppcom_select can block indefinitely + * waiting for a file descriptor to become ready + * */ + /* set to a phantom value */ + time_to_wait = ~0; + } + + /* zero the sid_sets */ + /* + * F fd set + * S sid set + */ +#define _(S,F) \ + if ((F)) \ + { \ + FD_ZERO ((S)); \ + } + + + _(&vcom_rd_sid_fds, vcom_readfds); + _(&vcom_wr_sid_fds, vcom_writefds); + _(&vcom_ex_sid_fds, vcom_exceptfds); +#undef _ + + /* populate read, write and except sid_sets */ + vcom_nsid = vcom_socket_fds_2_sid_fds ( + /* dest */ + vcom_readfds || vcom_writefds + || vcom_exceptfds ? (int *) + &vcom_nsid_fds : NULL, + vcom_readfds ? &vcom_rd_sid_fds : + NULL, + vcom_writefds ? &vcom_wr_sid_fds : + NULL, + vcom_exceptfds ? &vcom_ex_sid_fds : + NULL, + /* src */ + vcom_nfds, + vcom_readfds, + vcom_writefds, vcom_exceptfds); + if (vcom_nsid < 0) + { + return vcom_nsid; + } + if (vcom_nsid_fds < 0) + { + return -EINVAL; + } + + rv = vppcom_select (vcom_nsid_fds, + vcom_readfds ? (unsigned long *) &vcom_rd_sid_fds : + NULL, + vcom_writefds ? (unsigned long *) &vcom_wr_sid_fds : + NULL, + vcom_exceptfds ? (unsigned long *) &vcom_ex_sid_fds : + NULL, time_to_wait); + if (VCOM_DEBUG > 0) + fprintf (stderr, "[%d] vppcom_select: " + "'%04d'='%04d'\n", pid, rv, (int) vcom_nsid_fds); + + /* check if any file descriptors changed status */ + if (rv > 0) + { + /* + * on exit, sets are modified in place to indicate which + * file descriptors actually changed status + * */ + + /* + * comply with pre-condition + * do not clear vcom fd sets befor calling + * vcom_socket_sid_fds_2_fds + */ + new_vcom_nfd = vcom_socket_sid_fds_2_fds ( + /* dest */ + &new_vcom_nfds, + vcom_nfds, + vcom_readfds, + vcom_writefds, + vcom_exceptfds, + /* src */ + vcom_nsid_fds, + vcom_readfds ? + &vcom_rd_sid_fds : NULL, + vcom_writefds ? + &vcom_wr_sid_fds : NULL, + vcom_exceptfds ? + &vcom_ex_sid_fds : NULL); + if (new_vcom_nfd < 0) + { + return new_vcom_nfd; + } + if (new_vcom_nfds < 0) + { + return -EINVAL; + } + rv = new_vcom_nfd; + } + return rv; +} + + +int +vcom_socket_socket (int __domain, int __type, int __protocol) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + vcom_socket_t *vsock; + + i32 fd; + i32 sid; + i32 sockidx; + u8 is_nonblocking = __type & SOCK_NONBLOCK ? 1 : 0; + int type = __type & ~(SOCK_NONBLOCK | SOCK_CLOEXEC); + + fd = vcom_socket_open_socket (__domain, __type, __protocol); + if (fd < 0) + { + rv = fd; + goto out; + } + + sid = vppcom_session_create (VPPCOM_VRF_DEFAULT, + (type == SOCK_DGRAM) ? + VPPCOM_PROTO_UDP : VPPCOM_PROTO_TCP, + is_nonblocking); + if (sid < 0) + { + rv = sid; + goto out_close_socket; + } + + pool_get (vsm->vsockets, vsock); + vsocket_init (vsock); + + sockidx = vsock - vsm->vsockets; + hash_set (vsm->sockidx_by_fd, fd, sockidx); + + vsocket_set (vsock, fd, sid, SOCKET_TYPE_VPPCOM_BOUND); + return fd; + +out_close_socket: + vcom_socket_close_socket (fd); +out: + return rv; +} + +int +vcom_socket_socketpair (int __domain, int __type, int __protocol, + int __fds[2]) +{ +/* TBD: */ + return 0; +} + +int +vcom_socket_bind (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + vppcom_endpt_t ep; + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (!p) + return -EBADF; + + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (!vsock) + return -ENOTSOCK; + + if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND) + return -EINVAL; + + if (!__addr) + { + return -EINVAL; + } + + ep.vrf = VPPCOM_VRF_DEFAULT; + switch (__addr->sa_family) + { + case AF_INET: + if (__len != sizeof (struct sockaddr_in)) + { + return -EINVAL; + } + ep.is_ip4 = VPPCOM_IS_IP4; + ep.ip = (u8 *) & ((const struct sockaddr_in *) __addr)->sin_addr; + ep.port = (u16) ((const struct sockaddr_in *) __addr)->sin_port; + break; + + case AF_INET6: + if (__len != sizeof (struct sockaddr_in6)) + { + return -EINVAL; + } + ep.is_ip4 = VPPCOM_IS_IP6; + ep.ip = (u8 *) & ((const struct sockaddr_in6 *) __addr)->sin6_addr; + ep.port = (u16) ((const struct sockaddr_in6 *) __addr)->sin6_port; + break; + + default: + return -1; + break; + } + + rv = vppcom_session_bind (vsock->sid, &ep); + /* TBD: remove libc_bind code snippet + * once vppcom implements vppcom_session_getsockname */ + if (rv == 0) + { + rv = libc_bind (__fd, __addr, __len); + if (rv != 0) + { + rv = -errno; + } + } + return rv; +} + +int +vppcom_session_getsockname (int sid, vppcom_endpt_t * ep) +{ + /* TBD: move it to vppcom */ + return 0; +} + +int +vcom_socket_getsockname (int __fd, __SOCKADDR_ARG __addr, + socklen_t * __restrict __len) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (!p) + return -EBADF; + + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (!vsock) + return -ENOTSOCK; + + if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND) + return -EINVAL; + + if (!__addr || !__len) + return -EFAULT; + + if (*__len < 0) + { + return -EINVAL; + } + + /* TBD: remove libc_getsockname code snippet + * once vppcom implements vppcom_session_getsockname */ + rv = libc_getsockname (__fd, __addr, __len); + if (rv != 0) + { + rv = -errno; + return rv; + } + + /* TBD: use the below code snippet when vppcom + * implements vppcom_session_getsockname */ +#if 0 + vppcom_endpt_t ep; + ep.ip = (u8 *) & ((const struct sockaddr_in *) __addr)->sin_addr; + rv = vppcom_session_getsockname (vsock->sid, &ep); + if (rv == 0) + { + if (ep.vrf == VPPCOM_VRF_DEFAULT) + { + __addr->sa_family = ep.is_ip4 == VPPCOM_IS_IP4 ? AF_INET : AF_INET6; + switch (__addr->sa_family) + { + case AF_INET: + ((struct sockaddr_in *) __addr)->sin_port = ep.port; + *__len = sizeof (struct sockaddr_in); + break; + + case AF_INET6: + ((struct sockaddr_in6 *) __addr)->sin6_port = ep.port; + *__len = sizeof (struct sockaddr_in6); + break; + + default: + break; + } + } + } +#endif + + return rv; +} + +int +vcom_socket_connect (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + vppcom_endpt_t ep; + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (p) + { + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + + ep.vrf = VPPCOM_VRF_DEFAULT; + switch (__addr->sa_family) + { + case AF_INET: + ep.is_ip4 = VPPCOM_IS_IP4; + ep.ip = + (uint8_t *) & ((const struct sockaddr_in *) __addr)->sin_addr; + ep.port = + (uint16_t) ((const struct sockaddr_in *) __addr)->sin_port; + break; + + case AF_INET6: + ep.is_ip4 = VPPCOM_IS_IP6; + ep.ip = + (uint8_t *) & ((const struct sockaddr_in6 *) __addr)->sin6_addr; + ep.port = + (uint16_t) ((const struct sockaddr_in6 *) __addr)->sin6_port; + break; + + default: + return -1; + break; + } + + rv = vppcom_session_connect (vsock->sid, &ep); + } + return rv; +} + +int +vppcom_session_getpeername (int sid, vppcom_endpt_t * ep) +{ + /* TBD: move it to vppcom */ + return 0; +} + +int +vcom_socket_getpeername (int __fd, __SOCKADDR_ARG __addr, + socklen_t * __restrict __len) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (!p) + return -EBADF; + + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (!vsock) + return -ENOTSOCK; + + if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND) + return -EINVAL; + + if (!__addr || !__len) + return -EFAULT; + + if (*__len < 0) + { + return -EINVAL; + } + + /* DAW: hack to allow iperf3 to be happy w/ getpeername output */ + { + uint8_t *a; + ((struct sockaddr_in *) __addr)->sin_family = AF_INET; + ((struct sockaddr_in *) __addr)->sin_port = 0x1000; + a = (uint8_t *) & ((struct sockaddr_in *) __addr)->sin_addr; + a[0] = 0x7f; + a[1] = 0x00; + a[2] = 0x00; + a[3] = 0x01; + *__len = sizeof (struct sockaddr_in); + return 0; + } + + /* TBD: remove libc_getpeername code snippet + * once vppcom implements vppcom_session_getpeername */ + rv = libc_getpeername (__fd, __addr, __len); + if (rv != 0) + { + rv = -errno; + return rv; + } + + /* TBD: use the below code snippet when vppcom + * implements vppcom_session_getpeername */ +#if 0 + vppcom_endpt_t ep; + ep.ip = (u8 *) & ((const struct sockaddr_in *) __addr)->sin_addr; + rv = vppcom_session_getpeername (vsock->sid, &ep); + if (rv == 0) + { + if (ep.vrf == VPPCOM_VRF_DEFAULT) + { + __addr->sa_family = ep.is_ip4 == VPPCOM_IS_IP4 ? AF_INET : AF_INET6; + switch (__addr->sa_family) + { + case AF_INET: + ((struct sockaddr_in *) __addr)->sin_port = ep.port; + *__len = sizeof (struct sockaddr_in); + break; + + case AF_INET6: + ((struct sockaddr_in6 *) __addr)->sin6_port = ep.port; + *__len = sizeof (struct sockaddr_in6); + break; + + default: + break; + } + } + } +#endif + + return rv; +} + +ssize_t +vcom_socket_send (int __fd, const void *__buf, size_t __n, int __flags) +{ + return vcom_socket_sendto (__fd, __buf, __n, __flags, NULL, 0); +} + +ssize_t +vcom_socket_recv (int __fd, void *__buf, size_t __n, int __flags) +{ + int rv = -1; + rv = vcom_socket_recvfrom (__fd, __buf, __n, __flags, NULL, 0); + return rv; +} + +/* + * RETURN 1 if __fd is (SOCK_STREAM, SOCK_SEQPACKET), + * 0 otherwise + * */ +int +vcom_socket_is_connection_mode_socket (int __fd) +{ + int rv = -1; + /* TBD define new vppcom api */ + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + int type; + socklen_t optlen; + + p = hash_get (vsm->sockidx_by_fd, __fd); + + if (p) + { + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (vsock && vsock->type == SOCKET_TYPE_VPPCOM_BOUND) + { + optlen = sizeof (type); + rv = libc_getsockopt (__fd, SOL_SOCKET, SO_TYPE, &type, &optlen); + if (rv != 0) + { + return 0; + } + /* get socket type */ + switch (type & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) + { + case SOCK_STREAM: + case SOCK_SEQPACKET: + return 1; + break; + + default: + return 0; + break; + } + } + } + return 0; +} + +ssize_t +vvppcom_session_sendto (int __sid, const void *__buf, size_t __n, + int __flags, __CONST_SOCKADDR_ARG __addr, + socklen_t __addr_len) +{ + int rv = -1; + /* TBD add new vpp api */ + /* TBD add flags parameter */ + rv = vppcom_session_write (__sid, (void *) __buf, (int) __n); + return rv; +} + +ssize_t +vcom_socket_sendto (int __fd, const void *__buf, size_t __n, + int __flags, __CONST_SOCKADDR_ARG __addr, + socklen_t __addr_len) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (!p) + return -EBADF; + + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (!vsock) + return -ENOTSOCK; + + if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND) + return -EINVAL; + + if (!__buf || __n < 0) + { + return -EINVAL; + } + + if (vcom_socket_is_connection_mode_socket (__fd)) + { + /* ignore __addr and _addr_len */ + /* and EISCONN may be returned when they are not NULL and 0 */ + if ((__addr != NULL) || (__addr_len != 0)) + { + return -EISCONN; + } + } + else + { + if (!__addr || __addr_len < 0) + { + return -EDESTADDRREQ; + } + /* not a vppcom supported address family */ + if ((__addr->sa_family != AF_INET) || (__addr->sa_family != AF_INET6)) + { + return -EINVAL; + } + } + + rv = vvppcom_session_sendto (vsock->sid, (void *) __buf, (int) __n, + __flags, __addr, __addr_len); + return rv; +} + +/* TBD: move it to vppcom */ +static ssize_t +vppcom_session_recvfrom (int __sid, void *__restrict __buf, size_t __n, + int __flags, __SOCKADDR_ARG __addr, + socklen_t * __restrict __addr_len) +{ + int rv = -1; + + /* TBD add flags parameter */ + rv = vppcom_session_read (__sid, __buf, __n); + return rv; +} + +ssize_t +vcom_socket_recvfrom (int __fd, void *__restrict __buf, size_t __n, + int __flags, __SOCKADDR_ARG __addr, + socklen_t * __restrict __addr_len) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (!p) + return -EBADF; + + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (!vsock) + return -ENOTSOCK; + + if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND) + return -EINVAL; + + if (!__buf || __n < 0) + { + return -EINVAL; + } + + if (__addr || __addr_len < 0) + { + return -EINVAL; + } + + rv = vppcom_session_recvfrom (vsock->sid, __buf, __n, + __flags, __addr, __addr_len); + return rv; +} + +/* TBD: move it to vppcom */ +static ssize_t +vppcom_sendmsg (int __sid, const struct msghdr *__message, int __flags) +{ + int rv = -1; + /* rv = vppcom_session_write (__sid, (void *) __message->__buf, + (int)__n); */ + return rv; +} + +ssize_t +vcom_socket_sendmsg (int __fd, const struct msghdr * __message, int __flags) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (!p) + return -EBADF; + + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (!vsock) + return -ENOTSOCK; + + if (vcom_socket_is_connection_mode_socket (__fd)) + { + /* ignore __addr and _addr_len */ + /* and EISCONN may be returned when they are not NULL and 0 */ + if ((__message->msg_name != NULL) || (__message->msg_namelen != 0)) + { + return -EISCONN; + } + } + else + { + /* TBD: validate __message->msg_name and __message->msg_namelen + * and return -EINVAL on validation error + * */ + ; + } + + rv = vppcom_sendmsg (vsock->sid, __message, __flags); + + return rv; +} + +#ifdef __USE_GNU +int +vcom_socket_sendmmsg (int __fd, struct mmsghdr *__vmessages, + unsigned int __vlen, int __flags) +{ + + /* TBD: define a new vppcom api */ + return 0; +} +#endif + +/* TBD: move it to vppcom */ +static ssize_t +vppcom_recvmsg (int __sid, struct msghdr *__message, int __flags) +{ + int rv = -1; + /* rv = vppcom_session_read (__sid, (void *) __message->__buf, + (int)__n); */ + rv = -EOPNOTSUPP; + return rv; +} + +ssize_t +vcom_socket_recvmsg (int __fd, struct msghdr * __message, int __flags) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (!p) + return -EBADF; + + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (!vsock) + return -ENOTSOCK; + + if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND) + return -EINVAL; + + if (!__message) + { + return -EINVAL; + } + + /* validate __flags */ + + rv = vppcom_recvmsg (vsock->sid, __message, __flags); + return rv; +} + +#ifdef __USE_GNU +int +vcom_socket_recvmmsg (int __fd, struct mmsghdr *__vmessages, + unsigned int __vlen, int __flags, + struct timespec *__tmo) +{ + /* TBD: define a new vppcom api */ + return 0; +} +#endif + +/* TBD: move it to vppcom */ +static int +vppcom_getsockopt (int __sid, int __level, int __optname, + void *__restrict __optval, socklen_t * __restrict __optlen) +{ + /* 1. for socket level options that are NOT socket attributes + * and that has corresponding vpp options get from vppcom */ +#if 0 + return 0; +#endif + + /* 2. unhandled options */ + return -ENOPROTOOPT; +} + +int +vcom_socket_getsockopt (int __fd, int __level, int __optname, + void *__restrict __optval, + socklen_t * __restrict __optlen) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (!p) + return -EBADF; + + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (!vsock) + return -ENOTSOCK; + + if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND) + return -EINVAL; + + if (!__optval && !__optlen) + return -EFAULT; + + if (*__optlen < 0) + { + return -EINVAL; + } + + switch (__level) + { + /* handle options at socket level */ + case SOL_SOCKET: + switch (__optname) + { +/* + * 1. for socket level options that are socket attributes, + * get from libc_getsockopt. + * 2. for socket level options that are NOT socket + * attributes and that has corresponding vpp options + * get from vppcom. + * 3. for socket level options unimplemented + * return -ENOPROTOOPT */ + case SO_DEBUG: + case SO_DONTROUTE: + case SO_BROADCAST: + case SO_SNDBUF: + case SO_RCVBUF: + case SO_REUSEADDR: + case SO_REUSEPORT: + case SO_KEEPALIVE: + case SO_TYPE: + case SO_PROTOCOL: + case SO_DOMAIN: + case SO_ERROR: + case SO_OOBINLINE: + case SO_NO_CHECK: + case SO_PRIORITY: + case SO_LINGER: + case SO_BSDCOMPAT: + case SO_TIMESTAMP: + case SO_TIMESTAMPNS: + case SO_TIMESTAMPING: + case SO_RCVTIMEO: + case SO_SNDTIMEO: + case SO_RCVLOWAT: + case SO_SNDLOWAT: + case SO_PASSCRED: + case SO_PEERCRED: + case SO_PEERNAME: + case SO_ACCEPTCONN: + case SO_PASSSEC: + case SO_PEERSEC: + case SO_MARK: + case SO_RXQ_OVFL: + case SO_WIFI_STATUS: + case SO_PEEK_OFF: + case SO_NOFCS: + case SO_BINDTODEVICE: + case SO_GET_FILTER: + case SO_LOCK_FILTER: + case SO_BPF_EXTENSIONS: + case SO_SELECT_ERR_QUEUE: +#ifdef CONFIG_NET_RX_BUSY_POLL + case SO_BUSY_POLL: +#endif + case SO_MAX_PACING_RATE: + case SO_INCOMING_CPU: + rv = libc_getsockopt (__fd, __level, __optname, __optval, __optlen); + if (rv != 0) + { + rv = -errno; + return rv; + } + break; + + default: + /* We implement the SO_SNDLOWAT etc to not be settable + * (1003.1g 7). + */ + return -ENOPROTOOPT; + } + + break; + + default: + /* 1. handle options that are NOT socket level options, + * but have corresponding vpp otions. */ + rv = vppcom_getsockopt (vsock->sid, __level, __optname, + __optval, __optlen); + + return rv; +#if 0 + /* 2. unhandled options */ + return -ENOPROTOOPT; +#endif + } + + return rv; +} + +/* TBD: move it to vppcom */ +int +vppcom_setsockopt (int __fd, int __level, int __optname, + const void *__optval, socklen_t __optlen) +{ + /* 1. for socket level options that are NOT socket attributes + * and that has corresponding vpp options set it from vppcom */ +#if 0 + return 0; +#endif + + /* 2. unhandled options */ + return -ENOPROTOOPT; +} + +int +vcom_socket_setsockopt (int __fd, int __level, int __optname, + const void *__optval, socklen_t __optlen) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (!p) + return -EBADF; + + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (!vsock) + return -ENOTSOCK; + + if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND) + return -EINVAL; + + /* + * Options without arguments + */ + + if (__optname == SO_BINDTODEVICE) + { + rv = libc_setsockopt (__fd, __level, __optname, __optval, __optlen); + if (rv != 0) + { + rv = -errno; + } + return rv; + } + + if (!__optval) + return -EFAULT; + + if ((__optlen < 0) || (__optlen < sizeof (int))) + return -EINVAL; + + switch (__level) + { + /* handle options at socket level */ + case SOL_SOCKET: + switch (__optname) + { + /* + * 1. for socket level options that are socket attributes, + * set it from libc_getsockopt + * 2. for socket level options that are NOT socket + * attributes and that has corresponding vpp options + * set it from vppcom + * 3. for socket level options unimplemented + * return -ENOPROTOOPT */ + case SO_DEBUG: + case SO_DONTROUTE: + case SO_BROADCAST: + case SO_SNDBUF: + case SO_RCVBUF: + case SO_REUSEADDR: + case SO_REUSEPORT: + case SO_KEEPALIVE: + case SO_TYPE: + case SO_PROTOCOL: + case SO_DOMAIN: + case SO_ERROR: + case SO_OOBINLINE: + case SO_NO_CHECK: + case SO_PRIORITY: + case SO_LINGER: + case SO_BSDCOMPAT: + case SO_TIMESTAMP: + case SO_TIMESTAMPNS: + case SO_TIMESTAMPING: + case SO_RCVTIMEO: + case SO_SNDTIMEO: + case SO_RCVLOWAT: + case SO_SNDLOWAT: + case SO_PASSCRED: + case SO_PEERCRED: + case SO_PEERNAME: + case SO_ACCEPTCONN: + case SO_PASSSEC: + case SO_PEERSEC: + case SO_MARK: + case SO_RXQ_OVFL: + case SO_WIFI_STATUS: + case SO_PEEK_OFF: + case SO_NOFCS: + /* + * SO_BINDTODEVICE already handled as + * "Options without arguments" */ + /* case SO_BINDTODEVICE: */ + case SO_GET_FILTER: + case SO_LOCK_FILTER: + case SO_BPF_EXTENSIONS: + case SO_SELECT_ERR_QUEUE: +#ifdef CONFIG_NET_RX_BUSY_POLL + case SO_BUSY_POLL: +#endif + case SO_MAX_PACING_RATE: + case SO_INCOMING_CPU: + rv = libc_setsockopt (__fd, __level, __optname, __optval, __optlen); + if (rv != 0) + { + rv = -errno; + return rv; + } + break; + + default: + /* We implement the SO_SNDLOWAT etc to not be settable + * (1003.1g 7). + */ + return -ENOPROTOOPT; + } + + break; + + default: + /* 1. handle options that are NOT socket level options, + * but have corresponding vpp otions. */ + rv = vppcom_setsockopt (vsock->sid, __level, __optname, + __optval, __optlen); + return rv; +#if 0 + /* 2. unhandled options */ + return -ENOPROTOOPT; +#endif + } + + return rv; +} + +int +vcom_socket_listen (int __fd, int __n) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (p) + { + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + + /* TBD vppcom to accept __n parameter */ + rv = vppcom_session_listen (vsock->sid, __n); + } + + return rv; +} + +static int +vcom_socket_connected_socket (int __fd, int __sid, + int *__domain, + int *__type, int *__protocol, int flags) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + vcom_socket_t *vsock; + + i32 fd; + i32 sockidx; + + socklen_t optlen; + + optlen = sizeof (*__domain); + rv = libc_getsockopt (__fd, SOL_SOCKET, SO_DOMAIN, __domain, &optlen); + if (rv != 0) + { + rv = -errno; + goto out; + } + + optlen = sizeof (*__type); + rv = libc_getsockopt (__fd, SOL_SOCKET, SO_TYPE, __type, &optlen); + if (rv != 0) + { + rv = -errno; + goto out; + } + + optlen = sizeof (*__protocol); + rv = libc_getsockopt (__fd, SOL_SOCKET, SO_PROTOCOL, __protocol, &optlen); + if (rv != 0) + { + rv = -errno; + goto out; + } + + fd = vcom_socket_open_socket (*__domain, *__type | flags, *__protocol); + if (fd < 0) + { + rv = fd; + goto out; + } + + pool_get (vsm->vsockets, vsock); + vsocket_init (vsock); + + sockidx = vsock - vsm->vsockets; + hash_set (vsm->sockidx_by_fd, fd, sockidx); + + vsocket_set (vsock, fd, __sid, SOCKET_TYPE_VPPCOM_BOUND); + return fd; + +out: + return rv; +} + +/* If flag is 0, then accept4() is the same as accept(). + * SOCK_NONBLOCK and SOCK_CLOEXEC can be bitwise ORed in flags + */ +static int +vcom_socket_accept_flags (int __fd, __SOCKADDR_ARG __addr, + socklen_t * __restrict __addr_len, int flags) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + int fd; + int sid; + int domain; + int type; + int protocol; + + uint8_t addr8[sizeof (struct in6_addr)]; + vppcom_endpt_t ep; + + ep.ip = addr8; + + /* validate flags */ + + /* + * for documentation + * switch (flags) + * { + * case 0: + * case SOCK_NONBLOCK: + * case SOCK_CLOEXEC: + * case SOCK_NONBLOCK | SOCK_CLOEXEC: + * break; + * + * default: + * return -1; + * } + */ + /* flags can be 0 or can be bitwise OR + * of any of SOCK_NONBLOCK and SOCK_CLOEXEC */ + + if (!(!flags || (flags & (SOCK_NONBLOCK | SOCK_CLOEXEC)))) + { + /* TBD: return proper error code */ + return -1; + } + + /* TBD: return proper error code */ + + if (!vcom_socket_is_connection_mode_socket (__fd)) + { + return -EOPNOTSUPP; + } + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (p) + { + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + + + rv = vcom_fcntl (vsock->fd, F_GETFL, 0); + if (rv < 0) + { + return rv; + } + + /* is blocking */ + if (!(rv & O_NONBLOCK)) + { + /* socket is not marked as nonblocking + * and no pending connections are present + * on the queue, accept () blocks the caller + * until a connection is present. + */ + rv = vppcom_session_accept (vsock->sid, &ep, + -1.0 /* wait forever */ ); + } + else + { + /* The file descriptor refers to a socket and has been + * marked nonblocking(O_NONBLOCK) and the accept would + * block. + * */ + /* is non blocking */ + rv = vppcom_session_accept (vsock->sid, &ep, 0); + /* If the socket is marked nonblocking and + * no pending connections are present on the + * queue, accept fails with the error + * EAGAIN or EWOULDBLOCK + */ + if (rv == VPPCOM_ETIMEDOUT) + { + rv = VPPCOM_EAGAIN; + } + } + if (rv < 0) + { + return rv; + } + + sid = rv; + + /* create a new connected socket resource and set flags + * on the new file descriptor. + * update vsockets and sockidx_by_fd table + * */ + fd = vcom_socket_connected_socket (__fd, sid, + &domain, &type, &protocol, flags); + if (fd < 0) + { + return fd; + } + + rv = fd; + + /* TBD populate __addr and __addr_len */ + /* TBD: The returned address is truncated if the buffer + * provided is too small, in this case, __addr_len will + * return a value greater than was supplied to the call.*/ + if (__addr) + { + if (ep.is_cut_thru) + { + /* TBD populate __addr and __addr_len */ + switch (domain) + { + case AF_INET: + ((struct sockaddr_in *) __addr)->sin_family = AF_INET; + ((struct sockaddr_in *) __addr)->sin_port = ep.port; + memcpy (&((struct sockaddr_in *) __addr)->sin_addr, + addr8, sizeof (struct in_addr)); + /* TBD: populate __addr_len */ + if (__addr_len) + { + *__addr_len = sizeof (struct sockaddr_in); + } + break; + + case AF_INET6: + ((struct sockaddr_in6 *) __addr)->sin6_family = AF_INET6; + ((struct sockaddr_in6 *) __addr)->sin6_port = ep.port; + memcpy (((struct sockaddr_in6 *) __addr)->sin6_addr. + __in6_u.__u6_addr8, addr8, + sizeof (struct in6_addr)); + /* TBD: populate __addr_len */ + if (__addr_len) + { + *__addr_len = sizeof (struct sockaddr_in6); + } + break; + + default: + return -EAFNOSUPPORT; + } + } + else + { + switch (ep.is_ip4) + { + case VPPCOM_IS_IP4: + ((struct sockaddr_in *) __addr)->sin_family = AF_INET; + ((struct sockaddr_in *) __addr)->sin_port = ep.port; + memcpy (&((struct sockaddr_in *) __addr)->sin_addr, + addr8, sizeof (struct in_addr)); + /* TBD: populate __addr_len */ + if (__addr_len) + { + *__addr_len = sizeof (struct sockaddr_in); + } + break; + + case VPPCOM_IS_IP6: + ((struct sockaddr_in6 *) __addr)->sin6_family = AF_INET6; + ((struct sockaddr_in6 *) __addr)->sin6_port = ep.port; + memcpy (((struct sockaddr_in6 *) __addr)->sin6_addr. + __in6_u.__u6_addr8, addr8, + sizeof (struct in6_addr)); + /* TBD: populate __addr_len */ + if (__addr_len) + { + *__addr_len = sizeof (struct sockaddr_in6); + } + break; + + default: + return -EAFNOSUPPORT; + } + } + } + else + { + /* when __addr is NULL, nothing is filled in, + * in this case, __addr_len is not used, + * and should also be null + * */ + if (__addr_len) + { + /* TBD: return proper error code */ + return -1; + } + } + } + + return rv; +} + +int +vcom_socket_accept (int __fd, __SOCKADDR_ARG __addr, + socklen_t * __restrict __addr_len) +{ + /* set flags to 0 for accept() */ + return vcom_socket_accept_flags (__fd, __addr, __addr_len, 0); +} + +#ifdef __USE_GNU +int +vcom_socket_accept4 (int __fd, __SOCKADDR_ARG __addr, + socklen_t * __restrict __addr_len, int __flags) +{ + /* SOCK_NONBLOCK and SOCK_CLOEXEC can be bitwise ORed in flags */ + return vcom_socket_accept_flags (__fd, __addr, __addr_len, __flags); +} +#endif + +/* TBD: move it to vppcom */ +int +vppcom_session_shutdown (int __fd, int __how) +{ + return 0; +} + +int +vcom_socket_shutdown (int __fd, int __how) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (p) + { + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + switch (__how) + { + case SHUT_RD: + case SHUT_WR: + case SHUT_RDWR: + rv = vppcom_session_shutdown (vsock->sid, __how); + return rv; + break; + + default: + return -EINVAL; + break; + } + } + + return rv; +} + +int +vcom_socket_epoll_create1 (int __flags) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + vcom_epoll_t *vepoll; + + i32 epfd; + i32 vep_idx; + i32 epollidx; + + epfd = vcom_socket_open_epoll (__flags); + if (epfd < 0) + { + rv = epfd; + goto out; + } + + vep_idx = vppcom_epoll_create ( ); + if (vep_idx < 0) + { + rv = vep_idx; + goto out_close_epoll; + } + + pool_get (vsm->vepolls, vepoll); + vepoll_init (vepoll); + + epollidx = vepoll - vsm->vepolls; + hash_set (vsm->epollidx_by_epfd, epfd, epollidx); + + vepoll_set (vepoll, epfd, vep_idx, + EPOLL_TYPE_VPPCOM_BOUND, __flags, 0, 0); + + return epfd; + +out_close_epoll: + vcom_socket_close_epoll (epfd); +out: + return rv; +} + +/* + * PRE: vppcom_epoll_ctl() is successful + * free_vepitem_on_del : 0 - no_pool_put, 1 - pool_put + */ +int +vcom_socket_ctl_vepitem (int __epfd, int __op, int __fd, + struct epoll_event *__event, + i32 vep_idx, vcom_epoll_t *vepoll, + i32 vfd_id, void *vfd, vcom_fd_type_t type, + int free_vepitem_on_del) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + vcom_epitem_t *vepitem; + + vcom_epitem_key_t epfdfd = {.epfd = __epfd, .fd = __fd}; + uword *p; + i32 vepitemidx; + + i32 *vepitemidxs = 0; + + struct epoll_event revent = {.events = 0, .data.fd = INVALID_FD}; + + i32 vec_idx ; + + /* perform control operations on the epoll instance */ + switch (__op) + { + case EPOLL_CTL_ADD: + /* + * supplied file descriptor is already + * registered with this epoll instance + * */ + /* vepitem exists */ + p = hash_get (vsm->epitemidx_by_epfdfd, epfdfd.key); + if (p) + { + rv = -EEXIST; + goto out; + } + + /* add a new vepitem */ + pool_get (vsm->vepitems, vepitem); + vepitem_init (vepitem); + + vepitemidx = vepitem - vsm->vepitems; + hash_set (vsm->epitemidx_by_epfdfd, epfdfd.key, vepitemidx); + vepitem_set (vepitem, + __epfd, + __fd, __fd, __fd, + type, + *__event, revent); + + /* update epitemidxs */ + /* by_epfd */ + p = hash_get (vsm->epitemidxs_by_epfd, __epfd); + if (!p) /* not exist */ + { + vepitemidxs = 0; + vec_add1 (vepitemidxs, vepitemidx); + hash_set (vsm->epitemidxs_by_epfd, __epfd, vepitemidxs); + } + else /* exists */ + { + vepitemidxs = *(i32 **)p; + vec_add1 (vepitemidxs, vepitemidx); + hash_set3 (vsm->epitemidxs_by_epfd, __epfd, vepitemidxs, 0); + } + /* update epitemidxs */ + /* by_fd */ + p = hash_get (vsm->epitemidxs_by_fd, __fd); + if (!p) /* not exist */ + { + vepitemidxs = 0; + vec_add1 (vepitemidxs, vepitemidx); + hash_set (vsm->epitemidxs_by_fd, __fd, vepitemidxs); + } + else /* exists */ + { + vepitemidxs = *(i32 **)p; + vec_add1 (vepitemidxs, vepitemidx); + hash_set3 (vsm->epitemidxs_by_fd, __fd, vepitemidxs, 0); + } + + /* increment vepoll fd count by 1 */ + vepoll->count +=1; + + rv = 0; + goto out; + break; + + case EPOLL_CTL_MOD: + /* + * supplied file descriptor is not + * registered with this epoll instance + * */ + /* vepitem not exist */ + p = hash_get (vsm->epitemidx_by_epfdfd, epfdfd.key); + if (!p) + { + rv = -ENOENT; + goto out; + } + vepitem = pool_elt_at_index (vsm->vepitems, p[0]); + if (vepitem) + { + vepitem->event = *__event; + vepitem->revent = revent; + } + + rv = 0; + goto out; + break; + + case EPOLL_CTL_DEL: + /* + * supplied file descriptor is not + * registered with this epoll instance + * */ + /* vepitem not exist */ + p = hash_get (vsm->epitemidx_by_epfdfd, epfdfd.key); + if (!p) + { + rv = -ENOENT; + goto out; + } + vepitemidx = *(i32 *)p; + hash_unset (vsm->epitemidx_by_epfdfd, epfdfd.key); + + /* update epitemidxs */ + /* by_epfd */ + p = hash_get (vsm->epitemidxs_by_epfd, __epfd); + if (!p) /* not exist */ + { + rv = -ENOENT; + goto out; + } + else /* exists */ + { + vepitemidxs = *(i32 **)p; + vec_idx = vec_search (vepitemidxs, vepitemidx); + if(vec_idx != ~0) + { + vec_del1 (vepitemidxs, vec_idx); + if(!vec_len (vepitemidxs)) + { + vec_free (vepitemidxs); + hash_unset (vsm->epitemidxs_by_epfd, __epfd); + } + } + } + + /* update epitemidxs */ + /* by_fd */ + p = hash_get (vsm->epitemidxs_by_fd, __fd); + if (!p) /* not exist */ + { + rv = -ENOENT; + goto out; + } + else /* exists */ + { + vepitemidxs = *(i32 **)p; + vec_idx = vec_search (vepitemidxs, vepitemidx); + if(vec_idx != ~0) + { + vec_del1 (vepitemidxs, vec_idx); + if(!vec_len (vepitemidxs)) + { + vec_free (vepitemidxs); + hash_unset (vsm->epitemidxs_by_fd, __fd); + } + } + } + + /* pool put vepitem */ + vepitem = pool_elt_at_index (vsm->vepitems, vepitemidx); + if (free_vepitem_on_del) + { + if(!vepitem) + { + rv = -ENOENT; + goto out; + } + vepitem_init(vepitem); + pool_put (vsm->vepitems, vepitem); + } + else + { + if(!vepitem) + { + vepitem_init(vepitem); + } + } + + /* decrement vepoll fd count by 1 */ + vepoll->count -=1; + + rv = 0; + goto out; + break; + + default: + rv = -EINVAL; + goto out; + break; + } + +out: + return rv; +} + +/* + * PRE: 00. null pointer check on __event + * 01. all other parameters are validated + */ + +static int +vcom_socket_epoll_ctl_internal (int __epfd, int __op, int __fd, + struct epoll_event *__event, + int free_vepitem_on_del) +{ + int rv = -1; + + /* vcom_socket_main_t *vsm = &vcom_socket_main; */ + vcom_epoll_t *vepoll; + + /*__fd could could be vcom socket or vcom epoll or kernel fd */ + void *vfd; + vcom_epoll_t *vfd_vepoll; + vcom_socket_t *vfd_vsock; + + i32 vep_idx; + i32 vfd_id; + + vcom_fd_type_t type = FD_TYPE_INVALID; + + /* validate __event */ + + /* get vep_idx and vepoll */ + vep_idx = vcom_socket_get_vep_idx_and_vepoll (__epfd, &vepoll); + if (vep_idx == INVALID_VEP_IDX) + { + return -EBADF; + } + + /* get vcom fd type, vfd_id and vfd */ + vfd_id = vcom_socket_get_sid_and_vsock (__fd, &vfd_vsock); + if (vfd_id != INVALID_SESSION_ID) + { + type = FD_TYPE_VCOM_SOCKET; + vfd = vfd_vsock; + } + else if ((vfd_id = vcom_socket_get_vep_idx_and_vepoll (__fd, &vfd_vepoll)) != INVALID_VEP_IDX) + { + type = FD_TYPE_EPOLL; + vfd = vfd_vepoll; + } + else + { + /* FD_TYPE_KERNEL not supported by epoll instance */ + type = FD_TYPE_INVALID; + return -EBADF; + } + + + /* vepoll and vsock are now valid */ + rv = vppcom_epoll_ctl ( vep_idx, __op, vfd_id, __event); + if (rv < 0) + { + return rv; + } + + rv = vcom_socket_ctl_vepitem (__epfd, __op, __fd, + __event, + vep_idx, vepoll, + vfd_id, vfd, type, + free_vepitem_on_del); + return rv; +} + +int +vcom_socket_epoll_ctl (int __epfd, int __op, int __fd, + struct epoll_event *__event) +{ + int rv = -1; + + rv = vcom_socket_epoll_ctl_internal (__epfd, __op, __fd, + __event, 1); + return rv; +} + +static int +vcom_socket_epoll_ctl1 (int __epfd, int __op, int __fd, + struct epoll_event *__event) +{ + int rv = -1; + + rv = vcom_socket_epoll_ctl_internal (__epfd, __op, __fd, + __event, 0); + return rv; +} + +int +vcom_socket_epoll_pwait (int __epfd, struct epoll_event *__events, + int __maxevents, int __timeout, + const __sigset_t *__ss) +{ + int rv = -EBADF; + + /* in seconds eg. 3.123456789 seconds */ + double time_to_wait = (double) 0; + + i32 vep_idx; + + /* validate __event */ + if (!__events) + { + rv = -EFAULT; + goto out; + } + + /* validate __timeout */ + if (__timeout > 0) + { + time_to_wait = (double) __timeout / (double) 1000; + } + else if (__timeout == 0) + { + time_to_wait = (double) 0; + } + else if (__timeout == -1) + { + time_to_wait = ~0; + } + else + { + rv = -EBADF; + goto out; + } + + /* get vep_idx */ + vep_idx = vcom_socket_get_vep_idx (__epfd); + if (vep_idx != INVALID_VEP_IDX) + { + rv = vppcom_epoll_wait (vep_idx, __events, + __maxevents, time_to_wait); + } +out: + return rv; +} + +int +vcom_socket_main_init (void) +{ + vcom_socket_main_t *vsm = &vcom_socket_main; + + if (VCOM_DEBUG > 0) + printf ("vcom_socket_main_init\n"); + + if (!vsm->init) + { + /* TBD: define FD_MAXSIZE and use it here */ + pool_alloc (vsm->vsockets, FD_SETSIZE); + vsm->sockidx_by_fd = hash_create (0, sizeof (i32)); + + pool_alloc (vsm->vepolls, FD_SETSIZE); + vsm->epollidx_by_epfd = hash_create (0, sizeof (i32)); + + pool_alloc (vsm->vepitems, FD_SETSIZE); + vsm->epitemidx_by_epfdfd = hash_create (0, sizeof (i32)); + + vsm->epitemidxs_by_epfd = hash_create (0, sizeof (i32 *)); + vsm->epitemidxs_by_fd = hash_create (0, sizeof (i32 *)); + + vsm->init = 1; + } + + return 0; +} + + +void +vcom_socket_main_show (void) +{ + vcom_socket_main_t *vsm = &vcom_socket_main; + vcom_socket_t *vsock; + + vcom_epoll_t *vepoll; + + vcom_epitem_t *vepitem; + + i32 epfd; + i32 fd; + i32 *vepitemidxs, *vepitemidxs_var; + + if (vsm->init) + { + /* from active list of vsockets show vsock */ + + /* *INDENT-OFF* */ + pool_foreach (vsock, vsm->vsockets, + ({ + printf( + "fd='%04d', sid='%08x',type='%-30s'\n", + vsock->fd, vsock->sid, + vcom_socket_type_str (vsock->type)); + })); + /* *INDENT-ON* */ + + /* from active list of vepolls, show vepoll */ + + /* *INDENT-OFF* */ + pool_foreach (vepoll, vsm->vepolls, + ({ + printf( + "epfd='%04d', vep_idx='%08x', " + "type='%-30s', " + "flags='%d', count='%d', close='%d'\n", + vepoll->epfd, vepoll->vep_idx, + vcom_socket_epoll_type_str (vepoll->type), + vepoll->flags, vepoll->count, vepoll->close); + })); + /* *INDENT-ON* */ + + /* from active list of vepitems, show vepitem */ + + /* *INDENT-OFF* */ + pool_foreach (vepitem, vsm->vepitems, + ({ + printf( + "epfd='%04d', fd='%04d', " + "next_fd='%04d', prev_fd='%04d', " + "type='%-30s', " + "events='%04x', revents='%04x'\n", + vepitem->epfd, vepitem->fd, + vepitem->next_fd, vepitem->prev_fd, + vcom_socket_vcom_fd_type_str (vepitem->type), + vepitem->event.events, vepitem->revent.events); + })); + + /* *INDENT-ON* */ + + /* show epitemidxs for epfd */ + /* *INDENT-OFF* */ + hash_foreach (epfd, vepitemidxs, + vsm->epitemidxs_by_epfd, + ({ + printf("\n[ '%04d': ", epfd); + vec_foreach (vepitemidxs_var,vepitemidxs) + { + printf("'%04d' ", (int)vepitemidxs_var[0]); + } + printf("]\n"); + })); + /* *INDENT-ON* */ + + /* show epitemidxs for fd */ + /* *INDENT-OFF* */ + hash_foreach (fd, vepitemidxs, + vsm->epitemidxs_by_fd, + ({ + printf("\n{ '%04d': ", fd); + vec_foreach (vepitemidxs_var,vepitemidxs) + { + printf("'%04d' ", (int)vepitemidxs_var[0]); + } + printf("}\n"); + })); + /* *INDENT-ON* */ + + } +} + +void +vcom_socket_main_destroy (void) +{ + vcom_socket_main_t *vsm = &vcom_socket_main; + vcom_socket_t *vsock; + + vcom_epoll_t *vepoll; + + vcom_epitem_t *vepitem; + + i32 epfd; + i32 fd; + i32 *vepitemidxs; + + + if (VCOM_DEBUG > 0) + printf ("vcom_socket_main_destroy\n"); + + if (vsm->init) + { + + /* + * from active list of vepitems, + * remove all "vepitem" elements from the pool in a safe way + * */ + + /* *INDENT-OFF* */ + pool_flush (vepitem, vsm->vepitems, + ({ + if (vepitem->type == FD_TYPE_EPOLL || FD_TYPE_VCOM_SOCKET) + { + vcom_socket_epoll_ctl1 (vepitem->epfd, EPOLL_CTL_DEL, + vepitem->fd, NULL); + vepitem_init (vepitem); + } + })); + /* *INDENT-ON* */ + + pool_free (vsm->vepitems); + hash_free (vsm->epitemidx_by_epfdfd); + + /* free vepitemidxs for each epfd */ + /* *INDENT-OFF* */ + hash_foreach (epfd, vepitemidxs, + vsm->epitemidxs_by_epfd, + ({ + vec_free (vepitemidxs); + })); + /* *INDENT-ON* */ + hash_free (vsm->epitemidxs_by_epfd); + + /* free vepitemidxs for each fd */ + /* *INDENT-OFF* */ + hash_foreach (fd, vepitemidxs, + vsm->epitemidxs_by_fd, + ({ + vec_free (vepitemidxs); + })); + /* *INDENT-ON* */ + hash_free (vsm->epitemidxs_by_fd); + + + /* + * from active list of vsockets, + * close socket and vppcom session + * */ + + /* *INDENT-OFF* */ + pool_foreach (vsock, vsm->vsockets, + ({ + if (vsock->type == SOCKET_TYPE_VPPCOM_BOUND) + { + vppcom_session_close (vsock->sid); + vcom_socket_close_socket (vsock->fd); + vsocket_init (vsock); + } + })); + /* *INDENT-ON* */ + + /* + * return vsocket element to the pool + * */ + + /* *INDENT-OFF* */ + pool_flush (vsock, vsm->vsockets, + ({ + // vsocket_init(vsock); + ; + })); + /* *INDENT-ON* */ + + pool_free (vsm->vsockets); + hash_free (vsm->sockidx_by_fd); + + /* + * from active list of vepolls, + * close epoll and vppcom_epoll + * */ + + /* *INDENT-OFF* */ + pool_foreach (vepoll, vsm->vepolls, + ({ + if (vepoll->type == EPOLL_TYPE_VPPCOM_BOUND) + { + vppcom_session_close (vepoll->vep_idx); + vcom_socket_close_epoll (vepoll->epfd); /* TBD: */ + vepoll_init (vepoll); + } + })); + /* *INDENT-ON* */ + + /* + * return vepoll element to the pool + * */ + + /* *INDENT-OFF* */ + pool_flush (vepoll, vsm->vepolls, + ({ + // vepoll_init(vepoll); + ; + })); + /* *INDENT-ON* */ + + pool_free (vsm->vepolls); + hash_free (vsm->epollidx_by_epfd); + + vsm->init = 0; + } +} + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/vcl-ldpreload/src/libvcl-ldpreload/vcom_socket.h b/vcl-ldpreload/src/libvcl-ldpreload/vcom_socket.h new file mode 100644 index 0000000..56539d1 --- /dev/null +++ b/vcl-ldpreload/src/libvcl-ldpreload/vcom_socket.h @@ -0,0 +1,459 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_vcom_socket_h +#define included_vcom_socket_h + +#include <string.h> + +#include <libvcl-ldpreload/vcom_glibc_socket.h> +#include <vppinfra/types.h> + +#define INVALID_SESSION_ID (~0) +#define INVALID_FD (~0) + +#define INVALID_VEP_IDX INVALID_SESSION_ID +#define INVALID_EPFD INVALID_FD + +typedef enum +{ + SOCKET_TYPE_UNBOUND = 0, + SOCKET_TYPE_KERNEL_BOUND, + SOCKET_TYPE_VPPCOM_BOUND +} vcom_socket_type_t; + +typedef enum +{ + EPOLL_TYPE_UNBOUND = 0, + EPOLL_TYPE_KERNEL_BOUND, + EPOLL_TYPE_VPPCOM_BOUND +} vcom_epoll_type_t; + +typedef enum +{ + FD_TYPE_INVALID = 0, + FD_TYPE_KERNEL, + FD_TYPE_EPOLL, + FD_TYPE_VCOM_SOCKET, + /* add new types here */ + /* FD_TYPE_MAX should be the last entry */ + FD_TYPE_MAX +} vcom_fd_type_t; + +typedef struct +{ + /* file descriptor - + * fd 0, 1, 2 have special meaning and are reserved, + * -1 denote invalid fd */ + i32 fd; + + /* session id - -1 denote invalid sid */ + i32 sid; + + /* socket type */ + vcom_socket_type_t type; + + /* vcom socket attributes here */ + +} vcom_socket_t; + +typedef struct +{ + /* epoll file descriptor - + * epfd 0, 1, 2 have special meaning and are reserved, + * -1 denote invalid epfd */ + i32 epfd; + + /* vep idx - -1 denote invalid vep_idx */ + i32 vep_idx; + + /* epoll type */ + vcom_epoll_type_t type; + + /* flags - 0 or EPOLL_CLOEXEC */ + i32 flags; + + /* vcom epoll attributes here */ + + /* + * 00. count of file descriptors currently registered + * on this epoll instance. + * 01. number of file descriptors in the epoll set. + * 02. EPOLL_CTL_ADD, EPOLL_CTL_MOD, EPOLL_CTL_DEL + * update the count. + * 03. cached for frequent access. + * */ + i32 count; + + /* close( ) called on this epoll instance */ + /* 0 - close ( ) not called, 1 - close( ) called. */ + u32 close; + +} vcom_epoll_t; + +typedef struct +{ + /* "container" of this item */ + i32 epfd; + + /* fd - file descriptor information this item refers to */ + i32 fd; + /* next and prev fd in the "epoll set" of epfd */ + i32 next_fd; + i32 prev_fd; + + /* vcom fd type */ + vcom_fd_type_t type; + + /* interested events and the source fd */ + struct epoll_event event; + + /* ready events and the source fd */ + struct epoll_event revent; + + /* epitem attributes here */ + +} vcom_epitem_t; + +typedef union vcom_epitem_key +{ + struct { + i32 fd; + i32 epfd; + }; + i64 key; +} __EPOLL_PACKED vcom_epitem_key_t; + +static inline char * +vcom_socket_type_str (vcom_socket_type_t t) +{ + switch (t) + { + case SOCKET_TYPE_UNBOUND: + return "SOCKET_TYPE_UNBOUND"; + + case SOCKET_TYPE_KERNEL_BOUND: + return "SOCKET_TYPE_KERNEL_BOUND"; + + case SOCKET_TYPE_VPPCOM_BOUND: + return "SOCKET_TYPE_VPPCOM_BOUND"; + + default: + return "SOCKET_TYPE_UNKNOWN"; + } +} + +static inline char * +vcom_socket_epoll_type_str (vcom_epoll_type_t t) +{ + switch (t) + { + case EPOLL_TYPE_UNBOUND: + return "EPOLL_TYPE_UNBOUND"; + + case EPOLL_TYPE_KERNEL_BOUND: + return "EPOLL_TYPE_KERNEL_BOUND"; + + case EPOLL_TYPE_VPPCOM_BOUND: + return "EPOLL_TYPE_VPPCOM_BOUND"; + + default: + return "EPOLL_TYPE_UNKNOWN"; + } +} + +static inline char * +vcom_socket_vcom_fd_type_str (vcom_fd_type_t t) +{ + switch (t) + { + case FD_TYPE_KERNEL: + return "FD_TYPE_KERNEL"; + + case FD_TYPE_EPOLL: + return "FD_TYPE_EPOLL"; + + case FD_TYPE_VCOM_SOCKET: + return "FD_TYPE_VCOM_SOCKET"; + + default: + return "FD_TYPE_UNKNOWN"; + } +} + +static inline int +vcom_socket_type_is_vppcom_bound (vcom_socket_type_t t) +{ + return t == SOCKET_TYPE_VPPCOM_BOUND; +} + +static inline int +vcom_socket_epoll_type_is_vppcom_bound (vcom_epoll_type_t t) +{ + return t == EPOLL_TYPE_VPPCOM_BOUND; +} + +static inline void +vsocket_init (vcom_socket_t * vsock) +{ + memset (vsock, 0, sizeof (*vsock)); + + vsock->fd = INVALID_FD; + vsock->sid = INVALID_SESSION_ID; + vsock->type = SOCKET_TYPE_UNBOUND; + /* vcom socket attributes init here */ +} + +static inline void +vepoll_init (vcom_epoll_t * vepoll) +{ + memset (vepoll, 0, sizeof (*vepoll)); + + vepoll->epfd = INVALID_EPFD; + vepoll->vep_idx = INVALID_VEP_IDX; + vepoll->type = EPOLL_TYPE_UNBOUND; + vepoll->flags = 0; + + vepoll->count = 0; + vepoll->close = 0; + /* vcom epoll attributes init here */ +} + +static inline void +vepitem_init (vcom_epitem_t * vepitem) +{ + struct epoll_event event = {.events = 0, .data.fd = INVALID_FD}; + + memset (vepitem, 0, sizeof (*vepitem)); + + vepitem->epfd = INVALID_EPFD; + + vepitem->fd = INVALID_FD; + vepitem->next_fd = INVALID_FD; + vepitem->prev_fd = INVALID_FD; + + vepitem->type = FD_TYPE_INVALID; + + vepitem->event = event; + vepitem->revent = event; + /* vepoll attributes init here */ +} + +static inline void +vepitemkey_init (vcom_epitem_key_t * epfdfd) +{ + memset (epfdfd, 0, sizeof (*epfdfd)); + + epfdfd->epfd = INVALID_EPFD; + epfdfd->fd = INVALID_FD; +} + +static inline void +vsocket_set (vcom_socket_t * vsock, + i32 fd, i32 sid, vcom_socket_type_t type) +{ + vsock->fd = fd; + vsock->sid = sid; + vsock->type = type; + /* vcom socket attributes set here */ +} + +static inline void +vepoll_set (vcom_epoll_t * vepoll, + i32 epfd, i32 vep_idx, + vcom_epoll_type_t type, i32 flags, i32 count, u32 close) +{ + vepoll->epfd = epfd; + vepoll->vep_idx = vep_idx; + vepoll->type = type; + vepoll->flags = flags; + + vepoll->count = count; + vepoll->close = close; + /* vcom epoll attributes set here */ +} + +static inline void +vepitem_set (vcom_epitem_t * vepitem, + i32 epfd, + i32 fd, i32 next_fd, i32 prev_fd, + vcom_fd_type_t type, + struct epoll_event event, struct epoll_event revent) +{ + vepitem->epfd = epfd; + + vepitem->fd = fd; + vepitem->next_fd = next_fd; + vepitem->prev_fd = prev_fd; + + vepitem->type = type; + + vepitem->event = event; + vepitem->revent = revent; + /* vcom epitem attributes set here */ +} + +static inline void +vepitemkey_set (vcom_epitem_key_t * epfdfd, + i32 epfd, i32 fd) +{ + epfdfd->epfd = epfd; + epfdfd->fd = fd; +} + +static inline int +vsocket_is_vppcom_bound (vcom_socket_t * vsock) +{ + return vcom_socket_type_is_vppcom_bound (vsock->type); +} + +static inline int +vepoll_is_vppcom_bound (vcom_epoll_t * vepoll) +{ + return vcom_socket_epoll_type_is_vppcom_bound (vepoll->type); +} + +int vcom_socket_main_init (void); + +void vcom_socket_main_destroy (void); + +void vcom_socket_main_show (void); + +int vcom_socket_is_vcom_fd (int fd); + +int +vcom_socket_is_vcom_epfd (int epfd); + +int vcom_socket_close (int __fd); + +ssize_t vcom_socket_read (int __fd, void *__buf, size_t __nbytes); + +ssize_t vcom_socket_readv (int __fd, const struct iovec * __iov, int __iovcnt); + +ssize_t vcom_socket_write (int __fd, const void *__buf, size_t __n); + +ssize_t vcom_socket_writev (int __fd, const struct iovec * __iov, int __iovcnt); + +int vcom_socket_fcntl_va (int __fd, int __cmd, va_list __ap); + +int +vcom_socket_select (int vcom_nfds, fd_set * __restrict vcom_readfds, + fd_set * __restrict vcom_writefds, + fd_set * __restrict vcom_exceptfds, + struct timeval *__restrict timeout); + + +int vcom_socket_socket (int __domain, int __type, int __protocol); + +int +vcom_socket_socketpair (int __domain, int __type, int __protocol, + int __fds[2]); + +int vcom_socket_bind (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len); + +int +vcom_socket_getsockname (int __fd, __SOCKADDR_ARG __addr, + socklen_t * __restrict __len); + +int +vcom_socket_connect (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len); + +int +vcom_socket_getpeername (int __fd, __SOCKADDR_ARG __addr, + socklen_t * __restrict __len); + +ssize_t +vcom_socket_send (int __fd, const void *__buf, size_t __n, int __flags); + +ssize_t vcom_socket_recv (int __fd, void *__buf, size_t __n, int __flags); + +/* + * RETURN 1 if __fd is (SOCK_STREAM, SOCK_SEQPACKET), + * 0 otherwise + * */ +int vcom_socket_is_connection_mode_socket (int __fd); + +ssize_t +vcom_socket_sendto (int __fd, const void *__buf, size_t __n, + int __flags, __CONST_SOCKADDR_ARG __addr, + socklen_t __addr_len); + +ssize_t +vcom_socket_recvfrom (int __fd, void *__restrict __buf, size_t __n, + int __flags, __SOCKADDR_ARG __addr, + socklen_t * __restrict __addr_len); + +ssize_t +vcom_socket_sendmsg (int __fd, const struct msghdr *__message, int __flags); + +#ifdef __USE_GNU +int +vcom_socket_sendmmsg (int __fd, struct mmsghdr *__vmessages, + unsigned int __vlen, int __flags); +#endif + +ssize_t vcom_socket_recvmsg (int __fd, struct msghdr *__message, int __flags); + +#ifdef __USE_GNU +int +vcom_socket_recvmmsg (int __fd, struct mmsghdr *__vmessages, + unsigned int __vlen, int __flags, + struct timespec *__tmo); +#endif + +int +vcom_socket_getsockopt (int __fd, int __level, int __optname, + void *__restrict __optval, + socklen_t * __restrict __optlen); + +int +vcom_socket_setsockopt (int __fd, int __level, int __optname, + const void *__optval, socklen_t __optlen); + +int vcom_socket_listen (int __fd, int __n); + +int +vcom_socket_accept (int __fd, __SOCKADDR_ARG __addr, + socklen_t * __restrict __addr_len); + +#ifdef __USE_GNU +int +vcom_socket_accept4 (int __fd, __SOCKADDR_ARG __addr, + socklen_t * __restrict __addr_len, int __flags); +#endif + +int vcom_socket_shutdown (int __fd, int __how); + +int +vcom_socket_epoll_create1 (int __flags); + +int +vcom_socket_epoll_ctl (int __epfd, int __op, int __fd, + struct epoll_event *__event); + +int +vcom_socket_epoll_pwait (int __epfd, struct epoll_event *__events, + int __maxevents, int __timeout, + const __sigset_t *__ss); + +#endif /* included_vcom_socket_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/vcl-ldpreload/src/libvcl-ldpreload/vcom_socket_wrapper.c b/vcl-ldpreload/src/libvcl-ldpreload/vcom_socket_wrapper.c new file mode 100644 index 0000000..2e2d794 --- /dev/null +++ b/vcl-ldpreload/src/libvcl-ldpreload/vcom_socket_wrapper.c @@ -0,0 +1,851 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Copyright (c) 2005-2008 Jelmer Vernooij <jelmer@samba.org> + * Copyright (C) 2006-2014 Stefan Metzmacher <metze@samba.org> + * Copyright (C) 2013-2014 Andreas Schneider <asn@samba.org> + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the author nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +/* + Socket wrapper library. Passes all socket communication over + unix domain sockets if the environment variable SOCKET_WRAPPER_DIR + is set. +*/ + +#include <signal.h> +#include <dlfcn.h> + +#include <stdio.h> +#include <stdarg.h> +#include <unistd.h> +#include <pthread.h> + +#include <libvcl-ldpreload/vcom_socket_wrapper.h> + + +enum swrap_dbglvl_e +{ + SWRAP_LOG_ERROR = 0, + SWRAP_LOG_WARN, + SWRAP_LOG_DEBUG, + SWRAP_LOG_TRACE +}; + + +/* Macros for accessing mutexes */ +#define SWRAP_LOCK(m) do { \ + pthread_mutex_lock(&(m ## _mutex)); \ +} while(0) + +#define SWRAP_UNLOCK(m) do { \ + pthread_mutex_unlock(&(m ## _mutex)); \ +} while(0) + +/* Add new global locks here please */ +#define SWRAP_LOCK_ALL \ + SWRAP_LOCK(libc_symbol_binding); \ + +#define SWRAP_UNLOCK_ALL \ + SWRAP_UNLOCK(libc_symbol_binding); \ + + + +/* The mutex for accessing the global libc.symbols */ +static pthread_mutex_t libc_symbol_binding_mutex = PTHREAD_MUTEX_INITIALIZER; + +/* Function prototypes */ + +#ifdef NDEBUG +#define SWRAP_LOG(...) +#else +static void +swrap_log (enum swrap_dbglvl_e dbglvl, const char *func, + const char *format, ...) +PRINTF_ATTRIBUTE (3, 4); +#define SWRAP_LOG(dbglvl, ...) swrap_log((dbglvl), __func__, __VA_ARGS__) + + static void + swrap_log (enum swrap_dbglvl_e dbglvl, + const char *func, const char *format, ...) +{ + char buffer[1024]; + va_list va; + unsigned int lvl = SWRAP_LOG_WARN; + + va_start (va, format); + vsnprintf (buffer, sizeof (buffer), format, va); + va_end (va); + + if (lvl >= dbglvl) + { + switch (dbglvl) + { + case SWRAP_LOG_ERROR: + fprintf (stderr, + "SWRAP_ERROR(%d) - %s: %s\n", + (int) getpid (), func, buffer); + break; + case SWRAP_LOG_WARN: + fprintf (stderr, + "SWRAP_WARN(%d) - %s: %s\n", + (int) getpid (), func, buffer); + break; + case SWRAP_LOG_DEBUG: + fprintf (stderr, + "SWRAP_DEBUG(%d) - %s: %s\n", + (int) getpid (), func, buffer); + break; + case SWRAP_LOG_TRACE: + fprintf (stderr, + "SWRAP_TRACE(%d) - %s: %s\n", + (int) getpid (), func, buffer); + break; + } + } +} +#endif + + +/********************************************************* + * SWRAP LOADING LIBC FUNCTIONS + *********************************************************/ + +#ifdef HAVE_ACCEPT4 +typedef int (*__libc_accept4) (int sockfd, + struct sockaddr * addr, + socklen_t * addrlen, int flags); +#else +typedef int (*__libc_accept) (int sockfd, + struct sockaddr * addr, socklen_t * addrlen); +#endif +typedef int (*__libc_bind) (int sockfd, + const struct sockaddr * addr, socklen_t addrlen); +typedef int (*__libc_close) (int fd); +typedef int (*__libc_connect) (int sockfd, + const struct sockaddr * addr, + socklen_t addrlen); + +#if 0 +/* TBD: dup and dup2 to be implemented later */ +typedef int (*__libc_dup) (int fd); +typedef int (*__libc_dup2) (int oldfd, int newfd); +#endif + +typedef int (*__libc_fcntl) (int fd, int cmd, ...); +typedef FILE *(*__libc_fopen) (const char *name, const char *mode); +#ifdef HAVE_FOPEN64 +typedef FILE *(*__libc_fopen64) (const char *name, const char *mode); +#endif +#ifdef HAVE_EVENTFD +typedef int (*__libc_eventfd) (int count, int flags); +#endif +typedef int (*__libc_getpeername) (int sockfd, + struct sockaddr * addr, + socklen_t * addrlen); +typedef int (*__libc_getsockname) (int sockfd, + struct sockaddr * addr, + socklen_t * addrlen); +typedef int (*__libc_getsockopt) (int sockfd, + int level, + int optname, + void *optval, socklen_t * optlen); +typedef int (*__libc_ioctl) (int d, unsigned long int request, ...); +typedef int (*__libc_listen) (int sockfd, int backlog); +typedef int (*__libc_open) (const char *pathname, int flags, mode_t mode); +#ifdef HAVE_OPEN64 +typedef int (*__libc_open64) (const char *pathname, int flags, mode_t mode); +#endif /* HAVE_OPEN64 */ +typedef int (*__libc_openat) (int dirfd, const char *path, int flags, ...); +typedef int (*__libc_pipe) (int pipefd[2]); +typedef int (*__libc_read) (int fd, void *buf, size_t count); +typedef ssize_t (*__libc_readv) (int fd, const struct iovec * iov, + int iovcnt); +typedef int (*__libc_recv) (int sockfd, void *buf, size_t len, int flags); +typedef int (*__libc_recvfrom) (int sockfd, + void *buf, + size_t len, + int flags, + struct sockaddr * src_addr, + socklen_t * addrlen); +typedef int (*__libc_recvmsg) (int sockfd, const struct msghdr * msg, + int flags); +typedef int (*__libc_send) (int sockfd, const void *buf, size_t len, + int flags); +typedef int (*__libc_sendmsg) (int sockfd, const struct msghdr * msg, + int flags); +typedef int (*__libc_sendto) (int sockfd, const void *buf, size_t len, + int flags, const struct sockaddr * dst_addr, + socklen_t addrlen); +typedef int (*__libc_setsockopt) (int sockfd, int level, int optname, + const void *optval, socklen_t optlen); +#ifdef HAVE_SIGNALFD +typedef int (*__libc_signalfd) (int fd, const sigset_t * mask, int flags); +#endif +typedef int (*__libc_socket) (int domain, int type, int protocol); +typedef int (*__libc_socketpair) (int domain, int type, int protocol, + int sv[2]); +#ifdef HAVE_TIMERFD_CREATE +typedef int (*__libc_timerfd_create) (int clockid, int flags); +#endif +typedef ssize_t (*__libc_write) (int fd, const void *buf, size_t count); +typedef ssize_t (*__libc_writev) (int fd, const struct iovec * iov, + int iovcnt); + +typedef int (*__libc_shutdown) (int fd, int how); + +typedef int (*__libc_select) (int __nfds, fd_set * __restrict __readfds, + fd_set * __restrict __writefds, + fd_set * __restrict __exceptfds, + struct timeval * __restrict __timeout); + +#ifdef __USE_XOPEN2K +typedef int (*__libc_pselect) (int __nfds, fd_set * __restrict __readfds, + fd_set * __restrict __writefds, + fd_set * __restrict __exceptfds, + const struct timespec * __restrict __timeout, + const __sigset_t * __restrict __sigmask); +#endif + +typedef int (*__libc_epoll_create) (int __size); + +typedef int (*__libc_epoll_create1) (int __flags); + +typedef int (*__libc_epoll_ctl) (int __epfd, int __op, int __fd, + struct epoll_event *__event); + +typedef int (*__libc_epoll_wait) (int __epfd, struct epoll_event *__events, + int __maxevents, int __timeout); + +typedef int (*__libc_epoll_pwait) (int __epfd, struct epoll_event *__events, + int __maxevents, int __timeout, + const __sigset_t *__ss); + + +#define SWRAP_SYMBOL_ENTRY(i) \ + union { \ + __libc_##i f; \ + void *obj; \ + } _libc_##i + +struct swrap_libc_symbols +{ +#ifdef HAVE_ACCEPT4 + SWRAP_SYMBOL_ENTRY (accept4); +#else + SWRAP_SYMBOL_ENTRY (accept); +#endif + SWRAP_SYMBOL_ENTRY (bind); + SWRAP_SYMBOL_ENTRY (close); + SWRAP_SYMBOL_ENTRY (connect); +#if 0 + /* TBD: dup and dup2 to be implemented later */ + SWRAP_SYMBOL_ENTRY (dup); + SWRAP_SYMBOL_ENTRY (dup2); +#endif + SWRAP_SYMBOL_ENTRY (fcntl); + SWRAP_SYMBOL_ENTRY (fopen); +#ifdef HAVE_FOPEN64 + SWRAP_SYMBOL_ENTRY (fopen64); +#endif +#ifdef HAVE_EVENTFD + SWRAP_SYMBOL_ENTRY (eventfd); +#endif + SWRAP_SYMBOL_ENTRY (getpeername); + SWRAP_SYMBOL_ENTRY (getsockname); + SWRAP_SYMBOL_ENTRY (getsockopt); + SWRAP_SYMBOL_ENTRY (ioctl); + SWRAP_SYMBOL_ENTRY (listen); + SWRAP_SYMBOL_ENTRY (open); +#ifdef HAVE_OPEN64 + SWRAP_SYMBOL_ENTRY (open64); +#endif + SWRAP_SYMBOL_ENTRY (openat); + SWRAP_SYMBOL_ENTRY (pipe); + SWRAP_SYMBOL_ENTRY (read); + SWRAP_SYMBOL_ENTRY (readv); + SWRAP_SYMBOL_ENTRY (recv); + SWRAP_SYMBOL_ENTRY (recvfrom); + SWRAP_SYMBOL_ENTRY (recvmsg); + SWRAP_SYMBOL_ENTRY (send); + SWRAP_SYMBOL_ENTRY (sendmsg); + SWRAP_SYMBOL_ENTRY (sendto); + SWRAP_SYMBOL_ENTRY (setsockopt); +#ifdef HAVE_SIGNALFD + SWRAP_SYMBOL_ENTRY (signalfd); +#endif + SWRAP_SYMBOL_ENTRY (socket); + SWRAP_SYMBOL_ENTRY (socketpair); +#ifdef HAVE_TIMERFD_CREATE + SWRAP_SYMBOL_ENTRY (timerfd_create); +#endif + SWRAP_SYMBOL_ENTRY (write); + SWRAP_SYMBOL_ENTRY (writev); + + SWRAP_SYMBOL_ENTRY (shutdown); + SWRAP_SYMBOL_ENTRY (select); +#ifdef __USE_XOPEN2K + SWRAP_SYMBOL_ENTRY (pselect); +#endif + SWRAP_SYMBOL_ENTRY (epoll_create); + SWRAP_SYMBOL_ENTRY (epoll_create1); + SWRAP_SYMBOL_ENTRY (epoll_ctl); + SWRAP_SYMBOL_ENTRY (epoll_wait); + SWRAP_SYMBOL_ENTRY (epoll_pwait); +}; + +struct swrap +{ + struct + { + void *handle; + void *socket_handle; + struct swrap_libc_symbols symbols; + } libc; +}; + +static struct swrap swrap; + +#define LIBC_NAME "libc.so" + +enum swrap_lib +{ + SWRAP_LIBC, +}; + +#ifndef NDEBUG +static const char * +swrap_str_lib (enum swrap_lib lib) +{ + switch (lib) + { + case SWRAP_LIBC: + return "libc"; + } + + /* Compiler would warn us about unhandled enum value if we get here */ + return "unknown"; +} +#endif + +static void * +swrap_load_lib_handle (enum swrap_lib lib) +{ + int flags = RTLD_LAZY; + void *handle = NULL; + int i; + +#ifdef RTLD_DEEPBIND + flags |= RTLD_DEEPBIND; +#endif + + switch (lib) + { + case SWRAP_LIBC: + handle = swrap.libc.handle; +#ifdef LIBC_SO + if (handle == NULL) + { + handle = dlopen (LIBC_SO, flags); + + swrap.libc.handle = handle; + } +#endif + if (handle == NULL) + { + for (i = 10; i >= 0; i--) + { + char soname[256] = { 0 }; + + snprintf (soname, sizeof (soname), "libc.so.%d", i); + handle = dlopen (soname, flags); + if (handle != NULL) + { + break; + } + } + + swrap.libc.handle = handle; + } + break; + } + + if (handle == NULL) + { + SWRAP_LOG (SWRAP_LOG_ERROR, + "Failed to dlopen library: %s\n", dlerror ()); + exit (-1); + } + + return handle; +} + +static void * +_swrap_bind_symbol (enum swrap_lib lib, const char *fn_name) +{ + void *handle; + void *func; + + handle = swrap_load_lib_handle (lib); + + func = dlsym (handle, fn_name); + if (func == NULL) + { + SWRAP_LOG (SWRAP_LOG_ERROR, + "Failed to find %s: %s\n", fn_name, dlerror ()); + exit (-1); + } + + SWRAP_LOG (SWRAP_LOG_TRACE, + "Loaded %s from %s", fn_name, swrap_str_lib (lib)); + + return func; +} + +#define swrap_bind_symbol_libc(sym_name) \ + SWRAP_LOCK(libc_symbol_binding); \ + if (swrap.libc.symbols._libc_##sym_name.obj == NULL) { \ + swrap.libc.symbols._libc_##sym_name.obj = \ + _swrap_bind_symbol(SWRAP_LIBC, #sym_name); \ + } \ + SWRAP_UNLOCK(libc_symbol_binding) + +/* + * IMPORTANT + * + * Functions especially from libc need to be loaded individually, you can't load + * all at once or gdb will segfault at startup. The same applies to valgrind and + * has probably something todo with with the linker. + * So we need load each function at the point it is called the first time. + */ +#ifdef HAVE_ACCEPT4 +int +libc_accept4 (int sockfd, + struct sockaddr *addr, socklen_t * addrlen, int flags) +{ + swrap_bind_symbol_libc (accept4); + + return swrap.libc.symbols._libc_accept4.f (sockfd, addr, addrlen, flags); +} + +#else /* HAVE_ACCEPT4 */ + +int +libc_accept (int sockfd, struct sockaddr *addr, socklen_t * addrlen) +{ + swrap_bind_symbol_libc (accept); + + return swrap.libc.symbols._libc_accept.f (sockfd, addr, addrlen); +} +#endif /* HAVE_ACCEPT4 */ + +int +libc_bind (int sockfd, const struct sockaddr *addr, socklen_t addrlen) +{ + swrap_bind_symbol_libc (bind); + + return swrap.libc.symbols._libc_bind.f (sockfd, addr, addrlen); +} + +int +libc_close (int fd) +{ + swrap_bind_symbol_libc (close); + + return swrap.libc.symbols._libc_close.f (fd); +} + +int +libc_connect (int sockfd, const struct sockaddr *addr, socklen_t addrlen) +{ + swrap_bind_symbol_libc (connect); + + return swrap.libc.symbols._libc_connect.f (sockfd, addr, addrlen); +} + +#if 0 +/* TBD: dup and dup2 to be implemented later */ +int +libc_dup (int fd) +{ + swrap_bind_symbol_libc (dup); + + return swrap.libc.symbols._libc_dup.f (fd); +} + +int +libc_dup2 (int oldfd, int newfd) +{ + swrap_bind_symbol_libc (dup2); + + return swrap.libc.symbols._libc_dup2.f (oldfd, newfd); +} +#endif + +#ifdef HAVE_EVENTFD +int +libc_eventfd (int count, int flags) +{ + swrap_bind_symbol_libc (eventfd); + + return swrap.libc.symbols._libc_eventfd.f (count, flags); +} +#endif + +DO_NOT_SANITIZE_ADDRESS_ATTRIBUTE int +libc_vfcntl (int fd, int cmd, va_list ap) +{ + long int args[4]; + int rc; + int i; + + swrap_bind_symbol_libc (fcntl); + + for (i = 0; i < 4; i++) + { + args[i] = va_arg (ap, long int); + } + + rc = swrap.libc.symbols._libc_fcntl.f (fd, + cmd, + args[0], args[1], args[2], args[3]); + + return rc; +} + +int +libc_getpeername (int sockfd, struct sockaddr *addr, socklen_t * addrlen) +{ + swrap_bind_symbol_libc (getpeername); + + return swrap.libc.symbols._libc_getpeername.f (sockfd, addr, addrlen); +} + +int +libc_getsockname (int sockfd, struct sockaddr *addr, socklen_t * addrlen) +{ + swrap_bind_symbol_libc (getsockname); + + return swrap.libc.symbols._libc_getsockname.f (sockfd, addr, addrlen); +} + +int +libc_getsockopt (int sockfd, + int level, int optname, void *optval, socklen_t * optlen) +{ + swrap_bind_symbol_libc (getsockopt); + + return swrap.libc.symbols._libc_getsockopt.f (sockfd, + level, + optname, optval, optlen); +} + +int +libc_listen (int sockfd, int backlog) +{ + swrap_bind_symbol_libc (listen); + + return swrap.libc.symbols._libc_listen.f (sockfd, backlog); +} + +int +libc_read (int fd, void *buf, size_t count) +{ + swrap_bind_symbol_libc (read); + + return swrap.libc.symbols._libc_read.f (fd, buf, count); +} + +ssize_t +libc_readv (int fd, const struct iovec *iov, int iovcnt) +{ + swrap_bind_symbol_libc (readv); + + return swrap.libc.symbols._libc_readv.f (fd, iov, iovcnt); +} + +int +libc_recv (int sockfd, void *buf, size_t len, int flags) +{ + swrap_bind_symbol_libc (recv); + + return swrap.libc.symbols._libc_recv.f (sockfd, buf, len, flags); +} + +int +libc_recvfrom (int sockfd, + void *buf, + size_t len, + int flags, struct sockaddr *src_addr, socklen_t * addrlen) +{ + swrap_bind_symbol_libc (recvfrom); + + return swrap.libc.symbols._libc_recvfrom.f (sockfd, + buf, + len, flags, src_addr, addrlen); +} + +int +libc_recvmsg (int sockfd, struct msghdr *msg, int flags) +{ + swrap_bind_symbol_libc (recvmsg); + + return swrap.libc.symbols._libc_recvmsg.f (sockfd, msg, flags); +} + +int +libc_send (int sockfd, const void *buf, size_t len, int flags) +{ + swrap_bind_symbol_libc (send); + + return swrap.libc.symbols._libc_send.f (sockfd, buf, len, flags); +} + +int +libc_sendmsg (int sockfd, const struct msghdr *msg, int flags) +{ + swrap_bind_symbol_libc (sendmsg); + + return swrap.libc.symbols._libc_sendmsg.f (sockfd, msg, flags); +} + +int +libc_sendto (int sockfd, + const void *buf, + size_t len, + int flags, const struct sockaddr *dst_addr, socklen_t addrlen) +{ + swrap_bind_symbol_libc (sendto); + + return swrap.libc.symbols._libc_sendto.f (sockfd, + buf, + len, flags, dst_addr, addrlen); +} + +int +libc_setsockopt (int sockfd, + int level, int optname, const void *optval, socklen_t optlen) +{ + swrap_bind_symbol_libc (setsockopt); + + return swrap.libc.symbols._libc_setsockopt.f (sockfd, + level, + optname, optval, optlen); +} + +int +libc_socket (int domain, int type, int protocol) +{ + swrap_bind_symbol_libc (socket); + + return swrap.libc.symbols._libc_socket.f (domain, type, protocol); +} + +int +libc_socketpair (int domain, int type, int protocol, int sv[2]) +{ + swrap_bind_symbol_libc (socketpair); + + return swrap.libc.symbols._libc_socketpair.f (domain, type, protocol, sv); +} + +ssize_t +libc_write (int fd, const void *buf, size_t count) +{ + swrap_bind_symbol_libc (write); + + return swrap.libc.symbols._libc_write.f (fd, buf, count); +} + +ssize_t +libc_writev (int fd, const struct iovec *iov, int iovcnt) +{ + swrap_bind_symbol_libc (writev); + + return swrap.libc.symbols._libc_writev.f (fd, iov, iovcnt); +} + +int +libc_shutdown (int fd, int how) +{ + swrap_bind_symbol_libc (shutdown); + + return swrap.libc.symbols._libc_shutdown.f (fd, how); +} + +int +libc_select (int __nfds, fd_set * __restrict __readfds, + fd_set * __restrict __writefds, + fd_set * __restrict __exceptfds, + struct timeval *__restrict __timeout) +{ + swrap_bind_symbol_libc (select); + + return swrap.libc.symbols._libc_select.f (__nfds, __readfds, + __writefds, + __exceptfds, __timeout); +} + +#ifdef __USE_XOPEN2K +int +libc_pselect (int __nfds, fd_set * __restrict __readfds, + fd_set * __restrict __writefds, + fd_set * __restrict __exceptfds, + const struct timespec *__restrict __timeout, + const __sigset_t * __restrict __sigmask) +{ + swrap_bind_symbol_libc (pselect); + + return swrap.libc.symbols._libc_pselect.f (__nfds, __readfds, + __writefds, + __exceptfds, + __timeout, __sigmask); +} +#endif + +int libc_epoll_create (int __size) +{ + swrap_bind_symbol_libc (epoll_create); + + return swrap.libc.symbols._libc_epoll_create.f (__size); +} + +int libc_epoll_create1 (int __flags) +{ + swrap_bind_symbol_libc (epoll_create1); + + return swrap.libc.symbols._libc_epoll_create1.f (__flags); +} + +int libc_epoll_ctl (int __epfd, int __op, int __fd, + struct epoll_event *__event) +{ + swrap_bind_symbol_libc (epoll_ctl); + + return swrap.libc.symbols._libc_epoll_ctl.f (__epfd, __op, __fd, + __event); +} + +int libc_epoll_wait (int __epfd, struct epoll_event *__events, + int __maxevents, int __timeout) +{ + swrap_bind_symbol_libc (epoll_wait); + + return swrap.libc.symbols._libc_epoll_wait.f (__epfd, __events, + __maxevents, __timeout); +} + +int libc_epoll_pwait (int __epfd, struct epoll_event *__events, + int __maxevents, int __timeout, + const __sigset_t *__ss) +{ + swrap_bind_symbol_libc (epoll_pwait); + + return swrap.libc.symbols._libc_epoll_pwait.f (__epfd, __events, + __maxevents, __timeout, + __ss); +} + +static void +swrap_thread_prepare (void) +{ + SWRAP_LOCK_ALL; +} + +static void +swrap_thread_parent (void) +{ + SWRAP_UNLOCK_ALL; +} + +static void +swrap_thread_child (void) +{ + SWRAP_UNLOCK_ALL; +} + +/**************************** + * CONSTRUCTOR + ***************************/ +void +swrap_constructor (void) +{ + /* + * If we hold a lock and the application forks, then the child + * is not able to unlock the mutex and we are in a deadlock. + * This should prevent such deadlocks. + */ + pthread_atfork (&swrap_thread_prepare, + &swrap_thread_parent, &swrap_thread_child); +} + +/**************************** + * DESTRUCTOR + ***************************/ + +/* + * This function is called when the library is unloaded and makes sure that + * sockets get closed and the unix file for the socket are unlinked. + */ +void +swrap_destructor (void) +{ + if (swrap.libc.handle != NULL) + { + dlclose (swrap.libc.handle); + } + if (swrap.libc.socket_handle) + { + dlclose (swrap.libc.socket_handle); + } +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/vcl-ldpreload/src/libvcl-ldpreload/vcom_socket_wrapper.h b/vcl-ldpreload/src/libvcl-ldpreload/vcom_socket_wrapper.h new file mode 100644 index 0000000..66a7bc1 --- /dev/null +++ b/vcl-ldpreload/src/libvcl-ldpreload/vcom_socket_wrapper.h @@ -0,0 +1,224 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Copyright (c) 2005-2008 Jelmer Vernooij <jelmer@samba.org> + * Copyright (C) 2006-2014 Stefan Metzmacher <metze@samba.org> + * Copyright (C) 2013-2014 Andreas Schneider <asn@samba.org> + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the author nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +/* + Socket wrapper library. Passes all socket communication over + unix domain sockets if the environment variable SOCKET_WRAPPER_DIR + is set. +*/ + +#ifndef included_vcom_socket_wrapper_h +#define included_vcom_socket_wrapper_h + +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/ioctl.h> +#include <sys/select.h> +#include <sys/epoll.h> +#include <sys/uio.h> +#include <stdlib.h> + + +/* GCC have printf type attribute check. */ +#ifdef HAVE_FUNCTION_ATTRIBUTE_FORMAT +#define PRINTF_ATTRIBUTE(a,b) __attribute__ ((__format__ (__printf__, a, b))) +#else +#define PRINTF_ATTRIBUTE(a,b) +#endif /* HAVE_FUNCTION_ATTRIBUTE_FORMAT */ + +#define HAVE_CONSTRUCTOR_ATTRIBUTE +#ifdef HAVE_CONSTRUCTOR_ATTRIBUTE +#define CONSTRUCTOR_ATTRIBUTE __attribute__ ((constructor)) +#else +#define CONSTRUCTOR_ATTRIBUTE +#endif /* HAVE_CONSTRUCTOR_ATTRIBUTE */ + +#define HAVE_DESTRUCTOR_ATTRIBUTE +#ifdef HAVE_DESTRUCTOR_ATTRIBUTE +#define DESTRUCTOR_ATTRIBUTE __attribute__ ((destructor)) +#else +#define DESTRUCTOR_ATTRIBUTE +#endif + +#define HAVE_ADDRESS_SANITIZER_ATTRIBUTE +#ifdef HAVE_ADDRESS_SANITIZER_ATTRIBUTE +#define DO_NOT_SANITIZE_ADDRESS_ATTRIBUTE __attribute__((no_sanitize_address)) +#else +#define DO_NOT_SANITIZE_ADDRESS_ATTRIBUTE +#endif + +/* + * IMPORTANT + * + * Functions especially from libc need to be loaded individually, you can't load + * all at once or gdb will segfault at startup. The same applies to valgrind and + * has probably something todo with with the linker. + * So we need load each function at the point it is called the first time. + */ +#ifdef HAVE_ACCEPT4 +int +libc_accept4 (int sockfd, + struct sockaddr *addr, socklen_t * addrlen, int flags); +#else /* HAVE_ACCEPT4 */ +int libc_accept (int sockfd, struct sockaddr *addr, socklen_t * addrlen); +#endif /* HAVE_ACCEPT4 */ + +int libc_bind (int sockfd, const struct sockaddr *addr, socklen_t addrlen); + +int libc_close (int fd); + +int libc_connect (int sockfd, const struct sockaddr *addr, socklen_t addrlen); + +#if 0 +/* TBD: dup and dup2 to be implemented later */ +int libc_dup (int fd); + +int libc_dup2 (int oldfd, int newfd); +#endif + +#ifdef HAVE_EVENTFD +int libc_eventfd (int count, int flags); +#endif + +DO_NOT_SANITIZE_ADDRESS_ATTRIBUTE int +libc_vfcntl (int fd, int cmd, va_list ap); + +int libc_getpeername (int sockfd, struct sockaddr *addr, socklen_t * addrlen); + +int libc_getsockname (int sockfd, struct sockaddr *addr, socklen_t * addrlen); + +int +libc_getsockopt (int sockfd, + int level, int optname, void *optval, socklen_t * optlen); + +int libc_listen (int sockfd, int backlog); + +int libc_read (int fd, void *buf, size_t count); + +ssize_t libc_readv (int fd, const struct iovec *iov, int iovcnt); + +int libc_recv (int sockfd, void *buf, size_t len, int flags); + +int +libc_recvfrom (int sockfd, + void *buf, + size_t len, + int flags, struct sockaddr *src_addr, socklen_t * addrlen); + +int libc_recvmsg (int sockfd, struct msghdr *msg, int flags); + +int libc_send (int sockfd, const void *buf, size_t len, int flags); + +int libc_sendmsg (int sockfd, const struct msghdr *msg, int flags); + +int +libc_sendto (int sockfd, + const void *buf, + size_t len, + int flags, const struct sockaddr *dst_addr, socklen_t addrlen); + +int +libc_setsockopt (int sockfd, + int level, int optname, const void *optval, + socklen_t optlen); + +int libc_socket (int domain, int type, int protocol); + +int libc_socketpair (int domain, int type, int protocol, int sv[2]); + +ssize_t libc_write (int fd, const void *buf, size_t count); + +ssize_t libc_writev (int fd, const struct iovec *iov, int iovcnt); + +int libc_shutdown (int fd, int how); + +int +libc_select (int __nfds, fd_set * __restrict __readfds, + fd_set * __restrict __writefds, + fd_set * __restrict __exceptfds, + struct timeval *__restrict __timeout); + +#ifdef __USE_XOPEN2K +int +libc_pselect (int __nfds, fd_set * __restrict __readfds, + fd_set * __restrict __writefds, + fd_set * __restrict __exceptfds, + const struct timespec *__restrict __timeout, + const __sigset_t * __restrict __sigmask); +#endif + +int libc_epoll_create (int __size); + +int libc_epoll_create1 (int __flags); + +int libc_epoll_ctl (int __epfd, int __op, int __fd, + struct epoll_event *__event); + +int libc_epoll_wait (int __epfd, struct epoll_event *__events, + int __maxevents, int __timeout); + +int libc_epoll_pwait (int __epfd, struct epoll_event *__events, + int __maxevents, int __timeout, + const __sigset_t *__ss); + +void swrap_constructor (void); + +void swrap_destructor (void); + +#endif /* included_vcom_socket_wrapper_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/vcl-ldpreload/src/vcl-ldpreload.mk b/vcl-ldpreload/src/vcl-ldpreload.mk new file mode 100644 index 0000000..001308a --- /dev/null +++ b/vcl-ldpreload/src/vcl-ldpreload.mk @@ -0,0 +1,9 @@ +vcl_ldpreload_configure_depend = vpp-install + +vcl_ldpreload_CPPFLAGS = $(call installed_includes_fn, \ + vppinfra \ + uri) + +vcl_ldpreload_LDFLAGS = $(call installed_libs_fn, \ + vppinfra \ + uri) diff --git a/vhost-test/.gitignore b/vhost-test/.gitignore new file mode 100644 index 0000000..2272b59 --- /dev/null +++ b/vhost-test/.gitignore @@ -0,0 +1,3 @@ +tmp +vmroot +conf.sh diff --git a/vhost-test/README.md b/vhost-test/README.md index ee5f56f..5564b18 100644 --- a/vhost-test/README.md +++ b/vhost-test/README.md @@ -52,11 +52,30 @@ Finally, when you are done, you can stop the VMs. $ ./vhost.sh stop -## Traffic Generation - -Traffic generation is, for now, out of the scope of this script. -You are supposed to update ./conf.sh by setting up the right parameters. -Use the traffic generator you like to test the perfs. +## Traffic Generation with MoonGen + +mg.lua is intended to be used with the MoonGen packet generator. +This script measures packet loss and forwarding with a user-defined +granularity. The script keeps running while consecutive measures are +too far on both Tx axis (--maxRateInterval option) or on the drop rate +axis (--targetDropRateRatio). The latter is a ratio instead of an interval, +as the packet drop is mostly a logarithmic value. + +The script is used as follows: +sudo /path/to/MoonGen ./mg.lua <options> + +Mandatory options are: +--rxport <id> MoonGen's rx port index +--txport <id> MoonGen's tx port index +--dst <hwaddr> Frame's destination L2 address + +Other options are: +--duration <seconds> Each measurement duration +--frameSize <bytes> Each frame size +--maxRateInterval <%> Max Tx interval between measure +--targetDropRateRatio <ratio> Max ratio between two drop rate measures +--minRateInterval <%> Min Tx interval (Will override drop ratio in case of non-continuous) +--out <file> Output file ## Administrativa diff --git a/vhost-test/conf.sh.default b/vhost-test/conf.sh.default index b3870d8..fd880d1 100644 --- a/vhost-test/conf.sh.default +++ b/vhost-test/conf.sh.default @@ -56,3 +56,7 @@ VM_VMLINUZ="/boot/vmlinuz-$(uname -r)" # to the running VM VM_USERNAME="$USER" +# Set custom queue size (default is 256). +# Setting a different value (512 or 1024) requires a patched qemu. +VIRTIO_QSZ="256" + diff --git a/vhost-test/mg.lua b/vhost-test/mg.lua new file mode 100644 index 0000000..21c4afa --- /dev/null +++ b/vhost-test/mg.lua @@ -0,0 +1,164 @@ +local mg = require "moongen" +local memory = require "memory" +local device = require "device" +local stats = require "stats" +local hist = require "histogram" +local timer = require "timer" +local barrier = require "barrier" +local table = require "table" +local math = require "math" + +function configure(parser) + parser:description("Packet loss graph.") + parser:option("--txport", "Device to transmit/receive from."):convert(tonumber) + parser:option("--rxport", "Device to transmit/receive from."):convert(tonumber) + parser:option("--dst", "Frame's destination hardware address") + parser:option("--duration", "Device to transmit/receive from."):default(20):convert(tonumber) + parser:option("--frameSize", "Ethernet frame size."):default(64):convert(tonumber) + parser:option("--maxRateInterval", "Max % rate interval between measures"):default(5):convert(tonumber) + parser:option("--minRateInterval", "Min % rate interval between measures"):default(0.1):convert(tonumber) + parser:option("--targetDropRateRatio", "Target drop rate ratio between measures"):default(2):convert(tonumber) + parser:option("--out", "Output file"):default("vhost_mg_"..os.date("%F_%H-%M")..".txt") +end + +function getHeaderString(file) + return string.format("%10s\t%10s\t%12s\t%12s\t%12s\t%12s\t%12s\t%12s\t%12s\t%12s", + "size(B)", "Time(s)", "Rate(Mbps)", "Tx(pkt)", "Rx(pkt)", "DropRate(%)", "Tx(Mpps)", "Rx(Mpps)", "Tx(Gbps)", "Rx(Gbps)") +end + +function getResultString(result, file) + return string.format("%10d\t%10d\t%12f\t%12d\t%12d\t%12f\t%12f\t%12f\t%12f\t%12f", + result.frameSize, result.duration, result.rate, result.tx, result.rx, result.dropRate, result.txMpps, result.rxMpps, result.txRate, result.rxRate) +end + +function runMeasure(txDev, rxDev, frameSize, duration, rate) + local bar = barrier.new(2) + local result = {} + txDev:getTxQueue(0):setRate(rate * frameSize / (frameSize+20)) + local stask = mg.startTask("loadSlave", txDev:getTxQueue(0), frameSize, duration, bar) + local rtask = mg.startTask("counterSlave", rxDev:getRxQueue(0), frameSize, duration, bar) + result.frameSize = frameSize + result.duration = duration + result.tx = stask:wait() + result.rx = rtask:wait() + result.rate = rate + result.dropRate = (result.tx - result.rx)/result.tx * 100 + result.txMpps = (result.tx) / duration / 1000000 + result.rxMpps = (result.rx) / duration / 1000000 + result.txRate = (result.tx * frameSize * 8) / duration / 1000000000 + result.rxRate = (result.rx * frameSize * 8) / duration / 1000000000 + + print(getResultString(result)) + return result +end + +function master(args) + local txDev = device.config({port = args.txport, rxQueues = 1, txQueues = 1}) + local rxDev = device.config({port = args.rxport, rxQueues = 1, txQueues = 1}) + local frameSize = args.frameSize + local duration = args.duration + device.waitForLinks() + + local maxLinkRate = txDev:getLinkStatus().speed + local results = {} + + -- Warming up + print ("Output file is: "..args.out) + print ("Start Warm-Up") + runMeasure(txDev, rxDev, frameSize, 1, maxLinkRate) + runMeasure(txDev, rxDev, frameSize, 1, 1000) + print ("Stop Warm-Up") + + local sortFunction = function(a,b) + return a.rate < b.rate + end + + local file = io.open(args.out, "w") + io.output(file) + io.write(getHeaderString().."\n") + io.close(file) + + print ("Start Measures") + print(getHeaderString()) + table.insert(results, runMeasure(txDev, rxDev, frameSize, duration, maxLinkRate)) + table.insert(results, runMeasure(txDev, rxDev, frameSize, duration, maxLinkRate/2)) + table.insert(results, runMeasure(txDev, rxDev, frameSize, duration, 100)) + table.sort(results, sortFunction) + + while mg.running() do + local chosen = nil + local chosenMeaning = 0 + for i, r1 in ipairs(results) do + local r2 = results[i+1] + if r2 == nil then + break + end + local rateDiff = r2.rate - r1.rate + local dropRateRatio = (r2.dropRate + 0.00000001)/(r1.dropRate + 0.00000001) + if r2.dropRate < r1.dropRate then + dropRateRatio = (r1.dropRate+ 0.00000001)/(r2.dropRate + 0.00000001) + end + -- Meaning rates next measures + -- The idea is to compute interesting results first and get more picky later. + local meaning = rateDiff / ((args.maxRateInterval * maxLinkRate) / 100 ) + math.log(dropRateRatio / args.targetDropRateRatio)/math.log(2) + if rateDiff > (args.maxRateInterval * maxLinkRate) / 100 + or (dropRateRatio > args.targetDropRateRatio and (rateDiff) > (args.minRateInterval * maxLinkRate) / 100) then + if meaning > chosenMeaning then + chosenMeaning = meaning + chosen = i + end + end + end + if (chosen == nil) then + break + end + local nextRate = (results[chosen+1].rate + results[chosen].rate)/2 + table.insert(results, runMeasure(txDev, rxDev, frameSize, duration, nextRate)) + table.sort(results, sortFunction) + + local file = io.open(args.out, "w") + io.output(file) + io.write(getHeaderString().."\n") + for i, r1 in ipairs(results) do + io.write(getResultString(r1).."\n") + end + io.close(file) + end + + +end + +function loadSlave(queue, frameSize, duration, bar) + bar:wait() + + local mem = memory.createMemPool(function(buf) + buf:getEthernetPacket():fill{ + ethSrc = queue, + ethDst = ETH_DST, + ethType = 0x1234 + } + end) + + local bufs = mem:bufArray() + local timer = timer:new(duration) + local total = 0; + while timer:running() do + bufs:alloc(frameSize) + total = total + queue:send(bufs) + end + return total +end + +function counterSlave(queue, frameSize, duration, bar) + local bufs = memory.bufArray() + local total = 0; + bar:wait() + + local timer = timer:new(duration + 1) + while timer:running() do + total = total + queue:tryRecv(bufs, 1000) + bufs:freeAll() + end + return total +end + diff --git a/vhost-test/vhost.sh b/vhost-test/vhost.sh index 552bdc5..fc3dce5 100755 --- a/vhost-test/vhost.sh +++ b/vhost-test/vhost.sh @@ -21,6 +21,7 @@ VPP_BUILD="xxx" VPP_INSTALL="xxx" VPP="xxx" DPDK_BIND="xxx" +VPPCTL="xxx" CORES_VM_LIST="xxx" CORES_VM_N="xxx" @@ -102,7 +103,7 @@ function load_config() { #Validate config validate_parameter VPP_DIR VPP_IF0_PCI VPP_IF0_MAC VPP_IF1_PCI VPP_IF1_MAC VPP_IF0_NAME VPP_IF1_NAME \ - QEMU VM_ROOT VM_INITRD VM_VMLINUZ VM_VNCPORT VM_USERNAME + QEMU VM_ROOT VM_INITRD VM_VMLINUZ VM_VNCPORT VM_USERNAME VPP_VHOST_MODE VIRTIO_QSZ validate_directory VPP_DIR VM_ROOT validate_file VM_INITRD VM_VMLINUZ validate_exec QEMU @@ -121,6 +122,10 @@ function load_config() { echo "Invalid VPP_BUILD parameter '$VPP_BUILD'" && exit 1 fi + if [ "$VPP_VHOST_MODE" != "client" -a "$VPP_VHOST_MODE" != "server" ]; then + echo "Invalid VPP_VHOST_MODE (must be client or server)" && exit 1 + fi + if [ "$QUEUES" != "1" -a "$QUEUES" != "2" ]; then echo "QUEUES can only be 1 or 2" exit 7 @@ -128,6 +133,7 @@ function load_config() { VPP="$VPP_INSTALL/vpp/bin/vpp" DPDK_BIND="$(ls $VPP_BUILD/dpdk/dpdk-*/tools/dpdk-devbind.py | head -n 1)" + VPPCTL="sudo env PATH=$PATH:${VPP_INSTALL}/../install-vpp_debug-native/vpp-api-test/bin/ vppctl" validate_exec VPP DPDK_BIND @@ -215,7 +221,7 @@ EOF cat > "$VMDIR/etc/rc.local" << EOF #!/bin/sh mkdir -p /var/log/startup/ -for exe in \`ls /etc/startup.d\`; do +for exe in \$(ls /etc/startup.d); do echo -n "Startup script \$exe " ( (nohup /etc/startup.d/\$exe > /var/log/startup/\$exe 2>&1 &) && echo "[OK]") || echo "[Failed]" done @@ -267,6 +273,16 @@ EOF fi LAST_VM_CPU="$(expr $CORES_VM_N - 1)" + VM_VH_SERV_PARAM="" + if [ "$VPP_VHOST_MODE" = "client" ]; then + VM_VH_SERV_PARAM=",server" + fi + + QSZ="" + if [ "$VIRTIO_QSZ" != "256" ]; then + QSZ=",rx_virtqueue_sz=$VIRTIO_QSZ,tx_virtqueue_sz=$VIRTIO_QSZ" + fi + cat << EOF > "$TMP_DIR/vm.conf" -enable-kvm -machine pc -initrd $VM_INITRD -kernel $VM_VMLINUZ -vnc 127.0.0.1:1 -m 4G -append 'root=ro ro rootfstype=9p rootflags=trans=virtio nohz_full=1-$LAST_VM_CPU isolcpus=1-$LAST_VM_CPU rcu_nocbs=1-$LAST_VM_CPU selinux=0 audit=0 net.ifnames=0 biosdevname=0' @@ -277,15 +293,135 @@ EOF -device virtio-9p-pci,id=dev_fs,fsdev=fsdev_id,mount_tag=ro -daemonize -pidfile $TMP_DIR/qemu.pid --chardev socket,id=chr0,path=$TMP_DIR/sock0,server +-chardev socket,id=chr0,path=$TMP_DIR/sock0${VM_VH_SERV_PARAM} -netdev type=vhost-user,id=thrnet0,chardev=chr0,queues=$QUEUES --device virtio-net-pci,netdev=thrnet0,mac=de:ad:be:ef:01:00,bus=pci.0,addr=7.0${MQ} +-device virtio-net-pci,netdev=thrnet0,mac=de:ad:be:ef:01:00,bus=pci.0,addr=7.0,mrg_rxbuf=on,indirect_desc=on${MQ}${QSZ} -object memory-backend-file,id=mem,size=4096M,mem-path=/mnt/huge,share=on -numa node,memdev=mem --chardev socket,id=chr1,path=$TMP_DIR/sock1,server +-chardev socket,id=chr1,path=$TMP_DIR/sock1${VM_VH_SERV_PARAM} -netdev type=vhost-user,id=thrnet1,chardev=chr1,queues=$QUEUES --device virtio-net-pci,netdev=thrnet1,mac=de:ad:be:ef:01:01,bus=pci.0,addr=8.0${MQ} +-device virtio-net-pci,netdev=thrnet1,mac=de:ad:be:ef:01:01,bus=pci.0,addr=8.0,mrg_rxbuf=on,indirect_desc=on${MQ}${QSZ} +EOF +} + +function get_vhost_thread_config() { + VH_IF0="$1" + VH_IF1="$2" + + if [ "$VH_IF0" = "" -o "$VH_IF1" = "" ]; then + echo "Missing get_vhost_thread_config interface argument" + exit 1 + fi + + DEL="" + if [ "$3" = "del" ]; then + DEL="del" + elif [ "$3" != "" ]; then + echo "Invalid get_vhost_thread_config argument" + exit 1 + fi + + #VHOST queue pining + if [ "$QUEUES" = "1" ]; then + if [ "$CORES_VPP_N" = "0" ]; then + echo -n "" + elif [ "$CORES_VPP_N" = "1" ]; then + echo "vhost thread $VH_IF1 1 ${DEL}" + echo "vhost thread $VH_IF0 1 ${DEL}" + elif [ "$CORES_VPP_N" -lt "4" ]; then + echo "vhost thread $VH_IF1 2 ${DEL}" + echo "vhost thread $VH_IF0 1 ${DEL}" + else + echo "vhost thread $VH_IF1 3 ${DEL}" + echo "vhost thread $VH_IF0 4 ${DEL}" + fi + elif [ "$QUEUES" = "2" ]; then + if [ "$CORES_VPP_N" = "0" ]; then + echo -n "" + elif [ "$CORES_VPP_N" = "1" ]; then + echo "vhost thread $VH_IF1 1 ${DEL}" + echo "vhost thread $VH_IF1 1 ${DEL}" + echo "vhost thread $VH_IF0 1 ${DEL}" + echo "vhost thread $VH_IF0 1 ${DEL}" + elif [ "$CORES_VPP_N" -lt "4" ]; then + echo "vhost thread $VH_IF1 2 ${DEL}" + echo "vhost thread $VH_IF1 2 ${DEL}" + echo "vhost thread $VH_IF0 1 ${DEL}" + echo "vhost thread $VH_IF0 1 ${DEL}" + else + echo "vhost thread $VH_IF1 3 ${DEL}" + echo "vhost thread $VH_IF1 4 ${DEL}" + echo "vhost thread $VH_IF0 1 ${DEL}" + echo "vhost thread $VH_IF0 2 ${DEL}" + fi + fi +} + +function disconnect_vhost() { + VH_INST=$1 + + if [ ! -f "$TMP_DIR/vpp-vhost-disconnect$VH_INST.conf" ]; then + echo "No configured vhost devices $VH_INST" + exit 1 + fi + + echo "------- Disconnect vhost --------" + cat "$TMP_DIR/vpp-vhost-disconnect$VH_INST.conf" + echo "-------------------------------" + + $VPPCTL exec "$TMP_DIR/vpp-vhost-disconnect$VH_INST.conf" + rm "$TMP_DIR/vpp-vhost-disconnect$VH_INST.conf" +} + +function connect_vhost() { + VH_INST=$1 + + if [ -f "$TMP_DIR/vpp-vhost-disconnect$VH_INST.conf" ]; then + echo "Vhost devices $VH_INST already configured" + exit 1 + fi + + VH_SERV_PARAM="" + if [ "$VPP_VHOST_MODE" = "server" ]; then + VH_SERV_PARAM="server" + fi + echo "create vhost-user socket $TMP_DIR/sock0$VH_INST $VH_SERV_PARAM hwaddr aa:aa:aa:aa:bb:b1" + VH_IF0=$($VPPCTL "create vhost-user socket $TMP_DIR/sock0$VH_INST $VH_SERV_PARAM hwaddr aa:aa:aa:aa:bb:b1") + VH_IF1=$($VPPCTL "create vhost-user socket $TMP_DIR/sock1$VH_INST $VH_SERV_PARAM hwaddr aa:aa:aa:aa:bb:b2") + + if [ "$VH_INST" = "" ]; then + cat << EOF > "$TMP_DIR/vpp-vhost-connect$VH_INST.conf" +set interface l2 xconnect $VH_IF0 $VPP_IF0_NAME +set interface l2 xconnect $VH_IF1 $VPP_IF1_NAME +set interface l2 xconnect $VPP_IF0_NAME $VH_IF0 +set interface l2 xconnect $VPP_IF1_NAME $VH_IF1 +set interface state $VH_IF0 up +set interface state $VH_IF1 up +EOF + else + cat << EOF > "$TMP_DIR/vpp-vhost-connect$VH_INST.conf" +set interface state $VH_IF0 up +set interface state $VH_IF1 up EOF + fi + + cat << EOF > "$TMP_DIR/vpp-vhost-disconnect$VH_INST.conf" +delete vhost-user $VH_IF0 +delete vhost-user $VH_IF1 +EOF + + if [ "$USE_DEFAULT_VHOST_PLACEMENT" != "1" ]; then + get_vhost_thread_config $VH_IF0 $VH_IF1 >> "$TMP_DIR/vpp-vhost-connect$VH_INST.conf" + #get_vhost_thread_config $VH_IF0 $VH_IF1 del >> "$TMP_DIR/vpp-vhost-disconnect$VH_INST.conf" + fi + + echo "------- Connect vhost --------" + echo "create vhost-user socket $TMP_DIR/sock0$VH_INST $VH_SERV_PARAM hwaddr aa:aa:aa:aa:bb:b1" + echo "create vhost-user socket $TMP_DIR/sock1$VH_INST $VH_SERV_PARAM hwaddr aa:aa:aa:aa:bb:b2" + cat "$TMP_DIR/vpp-vhost-connect$VH_INST.conf" + echo "-------------------------------" + + $VPPCTL exec "$TMP_DIR/vpp-vhost-connect$VH_INST.conf" } function prepare_vpp() { @@ -320,65 +456,9 @@ dpdk { dev $VPP_IF0_PCI { $VPP_DEV0 } dev $VPP_IF1_PCI { $VPP_DEV1 } } EOF cat << EOF > "$TMP_DIR/vpp.conf" -create vhost-user socket $TMP_DIR/sock0 hwaddr aa:aa:aa:aa:bb:b1 -create vhost-user socket $TMP_DIR/sock1 hwaddr aa:aa:aa:aa:bb:b2 -set interface l2 xconnect VirtualEthernet0/0/0 $VPP_IF0_NAME -set interface l2 xconnect VirtualEthernet0/0/1 $VPP_IF1_NAME -set interface l2 xconnect $VPP_IF0_NAME VirtualEthernet0/0/0 -set interface l2 xconnect $VPP_IF1_NAME VirtualEthernet0/0/1 -set interface state VirtualEthernet0/0/0 up -set interface state VirtualEthernet0/0/1 up set interface state $VPP_IF1_NAME up set interface state $VPP_IF0_NAME up EOF - - #VHOST queue pining - if [ "$QUEUES" = "1" -a "$USE_DEFAULT_VHOST_PLACEMENT" != "1" ]; then - if [ "$CORES_VPP_N" = "0" ]; then - echo -n "" - elif [ "$CORES_VPP_N" = "1" ]; then - cat << EOF >> "$TMP_DIR/vpp.conf" -vhost thread VirtualEthernet0/0/1 1 -vhost thread VirtualEthernet0/0/0 1 -EOF - elif [ "$CORES_VPP_N" -lt "4" ]; then - cat << EOF >> "$TMP_DIR/vpp.conf" -vhost thread VirtualEthernet0/0/1 2 -vhost thread VirtualEthernet0/0/0 1 -EOF - else - cat << EOF >> "$TMP_DIR/vpp.conf" -vhost thread VirtualEthernet0/0/1 3 -vhost thread VirtualEthernet0/0/0 4 -EOF - fi - elif [ "$QUEUES" = "2" -a "$USE_DEFAULT_VHOST_PLACEMENT" != "1" ]; then - if [ "$CORES_VPP_N" = "0" ]; then - echo -n "" - elif [ "$CORES_VPP_N" = "1" ]; then - cat << EOF >> "$TMP_DIR/vpp.conf" -vhost thread VirtualEthernet0/0/1 1 -vhost thread VirtualEthernet0/0/1 1 -vhost thread VirtualEthernet0/0/0 1 -vhost thread VirtualEthernet0/0/0 1 -EOF - elif [ "$CORES_VPP_N" -lt "4" ]; then - cat << EOF >> "$TMP_DIR/vpp.conf" -vhost thread VirtualEthernet0/0/1 2 -vhost thread VirtualEthernet0/0/1 2 -vhost thread VirtualEthernet0/0/0 1 -vhost thread VirtualEthernet0/0/0 1 -EOF - else - cat << EOF >> "$TMP_DIR/vpp.conf" -vhost thread VirtualEthernet0/0/1 3 -vhost thread VirtualEthernet0/0/1 4 -vhost thread VirtualEthernet0/0/0 1 -vhost thread VirtualEthernet0/0/0 2 -EOF - fi - fi - } function prepare() { @@ -389,6 +469,11 @@ function prepare() { } function start_vpp() { + if [ -f "$TMP_DIR/vpp-running" ]; then + echo "VPP is already running" + return 1 + fi + GDB="" if [ "$VPP_GDB" = "1" ]; then [ -e "$TMP_DIR/vpp.sh.gdbinit" ] && sudo rm "$TMP_DIR/vpp.sh.gdbinit" @@ -400,7 +485,7 @@ run EOF GDB="gdb -x $TMP_DIR/vpp.sh.gdbinit --args " fi - + echo "------- Starting VPP --------" echo " Screen $VPPSCREEN (sudo screen -r $VPPSCREEN)" echo " Command-line Conf:" @@ -408,16 +493,30 @@ EOF echo " CLI Conf:" cat $TMP_DIR/vpp.conf echo "-----------------------------" - + sudo screen -d -m -S "$VPPSCREEN" $GDB $VPP_DIR/build-root/install-vpp-native/vpp/bin/vpp -c $TMP_DIR/vpp.cmdline + touch "$TMP_DIR/vpp-running" +} + +function stop_vpp() { + set +e + sudo screen -S "$VPPSCREEN" -X quit && echo "Stopping VPP" + [ -f "$TMP_DIR/vpp-running" ] && rm "$TMP_DIR/vpp-running" } function start_vm() { + if [ -f "$TMP_DIR/qemu.pid" ]; then + echo "VM already running" + return 1 + fi + echo "------- Starting VM --------" echo " VM conf:" cat $TMP_DIR/vm.conf echo "----------------------------" + vmdir_mount + #Eval is used so that ' characters are not ignored eval sudo chrt -rr 1 taskset -c $CORES_VM $QEMU $(cat $TMP_DIR/vm.conf) @@ -430,6 +529,17 @@ function start_vm() { sudo ip link set $VMTAP up } +function stop_vm() { + set +e + + [ -f "$TMP_DIR/qemu.pid" ] && echo "Stopping VM ($(sudo cat $TMP_DIR/qemu.pid))" && sudo kill "$(sudo cat $TMP_DIR/qemu.pid)" && sudo rm $TMP_DIR/qemu.pid + + sudo ip link set $BRNAME down + sudo brctl delbr $BRNAME + + vmdir_umount +} + function start() { if [ -f "$TMP_DIR/.started" ]; then echo "$TMP_DIR/.started exists" @@ -439,17 +549,14 @@ function start() { fi banner - prepare touch "$TMP_DIR/.started" - echo "0" | sudo tee /proc/sys/kernel/watchdog_cpumask start_vpp - - vmdir_mount - + sleep 10 + connect_vhost start_vm } @@ -465,25 +572,40 @@ function pin_vm() { sudo taskset -pc ${CORES_VM_ARRAY[$idx]} $p && sudo chrt -r -p 1 $p idx=$(expr $idx + 1) done + + #pin non-running processes to other cores + if [ "${CORES_VM_ARRAY[$idx]}" != "" ]; then + PIDS=$(ps -eLf | grep qemu-system-x86_64 | awk '$5 < 20 { print $4; }') + for p in $PIDS; do + echo "VM lazy process $p on core ${CORES_VM_ARRAY[$idx]}" + sudo taskset -pc ${CORES_VM_ARRAY[$idx]} $p || echo err + done + fi } function pin_vpp() { - + for i in $(ls /proc/irq/ | grep [0-9]); do echo 1 | sudo tee /proc/irq/$i/smp_affinity > /dev/null || true ; done PIDS=$(ps -eLf | grep /bin/vpp | awk '$5 > 50 { print $4; }') + skip_first=1 idx=0 for p in $PIDS; do if [ "${CORES_VPP_ARRAY[$idx]}" = "" ]; then echo "Too many working threads in VPP" return 1 fi - echo "VPP PID $p on core ${CORES_VPP_ARRAY[$idx]}" - sudo taskset -pc ${CORES_VPP_ARRAY[$idx]} $p && sudo chrt -r -p 1 $p - idx=$(expr $idx + 1) + if [ "$skip_first" = "1" ]; then + echo "Skipping $p" + skip_first=0 + else + echo "VPP PID $p on core ${CORES_VPP_ARRAY[$idx]}" + sudo taskset -pc ${CORES_VPP_ARRAY[$idx]} $p && sudo chrt -r -p 1 $p + idx=$(expr $idx + 1) + fi done } @@ -494,19 +616,21 @@ function pin() { function stop() { set +e - - [ -f "$TMP_DIR/qemu.pid" ] && echo "Stopping VM ($(sudo cat $TMP_DIR/qemu.pid))" && sudo kill "$(sudo cat $TMP_DIR/qemu.pid)" && sudo rm $TMP_DIR/qemu.pid - - vmdir_umount - - sudo ip link set $BRNAME down - sudo brctl delbr $BRNAME - - sudo screen -S "$VPPSCREEN" -X quit && echo "Stopping VPP" - + stop_vm + stop_vpp [ -f "$TMP_DIR/.started" ] && rm "$TMP_DIR/.started" } +function cmd_stop_vm() { + load_config + stop_vm +} + +function cmd_start_vm() { + load_config + start_vm +} + function cmd_openvnc() { load_config echo Please VNC to 5900 to connect to this VM console @@ -560,6 +684,16 @@ function cmd_config() { load_config } +function cmd_disconnect() { + load_config + disconnect_vhost $@ +} + +function cmd_connect() { + load_config + connect_vhost $@ +} + [ "$1" = "" ] && echo "Missing arguments" && usage && exit 1 CMD="$1" shift diff --git a/vpp-bootstrap/.gitignore b/vpp-bootstrap/.gitignore new file mode 100644 index 0000000..ddbe19d --- /dev/null +++ b/vpp-bootstrap/.gitignore @@ -0,0 +1,10 @@ +.vagrant/ +*~ +*# +*.cmd + +node_modules/ +frontend/ +dist.dev/ +node-info.log +node-error.log
\ No newline at end of file diff --git a/vpp-bootstrap/README.md b/vpp-bootstrap/README.md new file mode 100644 index 0000000..a8204e3 --- /dev/null +++ b/vpp-bootstrap/README.md @@ -0,0 +1,42 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Copyright (c) 2016 Intel Corporation + */ + +# GOAL + +The aim of the project is provide a lightweight development environment +for those looking to quickly start VPP development. Including but not +limited to training events and workshops. + +# COMMITTERS + +Ray Kinsella <ray.kinsella@intel.com> + +# INTRO +vpp-bootstrap is a vagrant image to create an environment to rapidly +bootstrap vpp development. All required sources, dependencies and test +tools are included in the environment. + +To build the image, simple do + + vagrant up + +# CURRENT STATE + +Supports VPP 17.01 on VirtualBox and AWS + +# Evolution + +Support for VPP 17.04. diff --git a/vpp-bootstrap/Vagrantfile b/vpp-bootstrap/Vagrantfile new file mode 100644 index 0000000..e607c44 --- /dev/null +++ b/vpp-bootstrap/Vagrantfile @@ -0,0 +1,108 @@ +# -*- mode: ruby -*- +# vi: set ft=ruby : +# Copyright (c) 2016 Intel Corporation + +unless Vagrant.has_plugin?("vagrant-reload") + raise 'vagrant-reload (plugin) is not installed!' +end + +Vagrant.configure(2) do |config| + + # Pick the right distro and bootstrap, default is ubuntu1604 + config.vm.box = "puppetlabs/ubuntu-16.04-64-nocm" + vmcpu=(ENV['VPP_VAGRANT_VMCPU'] || 2) + vmram=(ENV['VPP_VAGRANT_VMRAM'] || 1024) + + # Define some physical ports for your VMs to be used by DPDK + config.vm.network "private_network", type: "dhcp" + + config.vm.provision :shell, :path => File.join(File.dirname(__FILE__),"provision.sh") , privileged: false + config.vm.provision :reload + + # vagrant-cachier caches apt/yum etc to speed subsequent + # vagrant up + # to enable, run + # vagrant plugin install vagrant-cachier + # + if Vagrant.has_plugin?("vagrant-cachier") + config.cache.scope = :box + end + + # use http proxy if avaiable + if ENV['http_proxy'] && Vagrant.has_plugin?("vagrant-proxyconf") + config.proxy.http = ENV['http_proxy'] + config.proxy.https = ENV['https_proxy'] + config.proxy.no_proxy = "localhost,127.0.0.1" + end + + config.vm.provider :aws do |aws, override| + #disable any corporate proxies in the AWS cloud + if Vagrant.has_plugin?("vagrant-proxyconf") + override.proxy.enabled = false + end + + #Use rsync instead of nfs to sync folders. + override.vm.synced_folder ".", "/vagrant", type: "rsync", + rsync__exclude: ".git/" + + #We don't need a local box, use the vagrant-aws dummy instead + override.vm.box = "dummy" + + #These are the credentials required to access AWS Instructure. + #vagrant-aws requires these to create the new instance. + #These can either be your AWS root account access key (not recommended) + #or an IAM user with sufficent rights to create EC2 instances. + aws.access_key_id = "abcdefg" + aws.secret_access_key = "abcdefg" + + #Your preferred region, Ireland is always a good choice. + aws.region = "eu-west-1" + + #The EC2 keypair used to provision remote access creds in the + #newly created EC2 instance. These creds permit remote access via ssh. + aws.keypair_name = "ec2" + + #Security groups (ACLs) to provision new EC2 instance with. + #At least one of the security groups should allow SSH. + #to enable `vagrant ssh` to work. + aws.security_groups = [ "permit-ssh", "default" ] + + #Amazon Machine Instance (AMI) to use, default is Ubuntu Xenial (HVM). + aws.ami = "ami-405f7226" + + #EC2 instance type (how much cpu/mem resources to give the instance). + aws.instance_type = "t2.micro" + + #Any proxy command required for ssh to workaround corporate firewalls + #override.ssh.proxy_command = "nc -x proxy.com:1080 %h %p" + + #Ubuntu AMIs use ubuntu as the default username, not vagrant. + override.ssh.username = "ubuntu" + + #Private key to access new EC2 instance via SSH, should be the private + #key from the keypair_name created above. + override.ssh.private_key_path = "/root/private_key.pem" + end + config.vm.provider "virtualbox" do |vb| + vb.name = "vpp-bootstrap" + + vb.customize ["modifyvm", :id, "--ioapic", "on"] + vb.memory = "#{vmram}" + vb.cpus = "#{vmcpu}" + + vb.customize ["setextradata", :id, "VBoxInternal/CPUM/SSE4.1", "1"] + vb.customize ["setextradata", :id, "VBoxInternal/CPUM/SSE4.2", "1"] + end + config.vm.provider "vmware_fusion" do |fusion,override| + fusion.vmx["memsize"] = "#{vmram}" + fusion.vmx["numvcpus"] = "#{vmcpu}" + end + config.vm.provider "libvirt" do |lv| + lv.memory = "#{vmram}" + lv.cpus = "#{vmcpu}" + end + config.vm.provider "vmware_workstation" do |vws,override| + vws.vmx["memsize"] = "#{vmram}" + vws.vmx["numvcpus"] = "#{vmcpu}" + end +end diff --git a/vpp-bootstrap/containers/cone.cntr b/vpp-bootstrap/containers/cone.cntr new file mode 100644 index 0000000..9c00ec0 --- /dev/null +++ b/vpp-bootstrap/containers/cone.cntr @@ -0,0 +1,5 @@ +DESC: This container is used for vpp testing with scapy. +DIST: ubuntu +VER: trusty +PACKAGES: python-pip +PIP: scapy diff --git a/vpp-bootstrap/containers/ctwo.cntr b/vpp-bootstrap/containers/ctwo.cntr new file mode 100644 index 0000000..2a764e2 --- /dev/null +++ b/vpp-bootstrap/containers/ctwo.cntr @@ -0,0 +1,4 @@ +DESC: This is the vpp build/test container. +DIST: ubuntu +VER: trusty +PACKAGES: make gcc autotools-dev autoconf linux-headers-kernver gdb diff --git a/vpp-bootstrap/containers/ctwo.provision.sh b/vpp-bootstrap/containers/ctwo.provision.sh new file mode 100755 index 0000000..f914f2d --- /dev/null +++ b/vpp-bootstrap/containers/ctwo.provision.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash +# Copyright (c) 2016 Intel Corporation +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +VPP_VERSION=v17.04 +VPP_DIR=~/vpp +VPP_GIT="https://git.fd.io/vpp" + +echo Cloning $VPP_GIT +git clone $VPP_GIT $VPP_DIR + +# Install dependencies +echo Building $VPP_DIR +cd $VPP_DIR +git checkout -b $VPP_VERSION $VPP_VERSION +make UNATTENDED=yes install-dep + +make wipe +(cd build-root/; make distclean) +rm -f build-root/.bootstrap.ok + +# Build and install packaging +make bootstrap +make build diff --git a/vpp-bootstrap/provision.sh b/vpp-bootstrap/provision.sh new file mode 100755 index 0000000..5c01939 --- /dev/null +++ b/vpp-bootstrap/provision.sh @@ -0,0 +1,257 @@ +#!/usr/bin/env bash +# +# Copyright (c) 2016 Intel Corporation +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +PACKAGE_REPO="https://nexus.fd.io/content/repositories/fd.io.stable.1704.ubuntu.xenial.main/" +HOME_DIR="/home/$USER" +RC_LOCAL="/etc/rc.local" +SSH_OPTIONS="-o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" +APT_PROXY_CONF="/etc/apt/apt.conf.d/01proxy" +ENV_FILE="/etc/environment" +UNAMER=$(uname -r) + +# LXC gives backend interfaces horrible names, give them a better name. +function rename_veth_interface() { + + local cntr="$1" + local nifname="$2" + + ifr_index=`sudo lxc-attach -n $cntr -- ip -o link | tail -n 1 | awk -F : '{print $1}'` + ifr_index=$((ifr_index+1)) + + for dir in /sys/class/net/*/ + do + ifindex=`cat $dir/ifindex` + if [ $ifindex == $ifr_index ] + then ifname=`basename $dir` + fi + done + + sudo ip link set $ifname down + sudo ip link set $ifname name $nifname + sudo ip link set $nifname up +} + +function add_to_rc_local() +{ + local str="$1" + + echo -e "$str" | sudo tee -a $RC_LOCAL +} + +function sudo_exec() { + + CMD="$1" + add_to_rc_local="${2:-0}" + + if [ "$add_to_rc_local" == "1" ]; then + add_to_rc_local "$CMD" + fi + + CMD="sudo $CMD" + + eval "${CMD}" +} + +function lxc_exec() { + + cntr="$1" + rCMD="$2" + add_to_rc_local="${3:-0}" + + CMD="lxc-attach -n $cntr -- $rCMD" + + echo "$CMD" + sudo_exec "$CMD" $add_to_rc_local +} + +function get_field() { + file="$1" + field="$2" + + value=$(grep $field $file | awk -F : '{print $2}' | sed -e 's/^[ ]*//' | sed -e 's/kernver/"$UNAMER"/') + echo $value +} + +echo "deb $PACKAGE_REPO ./" | sudo tee -a /etc/apt/sources.list.d/99fd.io.list +sudo apt-get -qq update +sudo apt-get -qq install -y --force-yes linux-image-extra-$(uname -r) lxc bridge-utils tmux +sudo apt-get -qq install -y --force-yes vpp vpp vpp-dpdk-dkms vpp-plugins + +#Disable DPDK to make memory requirements more modest +sudo sed -i_dpdk '47,52d' /etc/vpp/startup.conf +echo -e "plugins {\n\tplugin dpdk_plugin.so { disable }\n}" | sudo tee -a /etc/vpp/startup.conf + +#Fix VPP on the host to use 32 hugepages +echo -e "heapsize 64M" | sudo tee -a /etc/vpp/startup.conf +sudo sed -i 's/vm.nr_hugepages=1024/vm.nr_hugepages=32/' /etc/sysctl.d/80-vpp.conf +sudo sed -i 's/kernel.shmmax=2147483648/kernel.shmmax=67018864/' /etc/sysctl.d/80-vpp.conf + +#Provision containers with two network connections, second connection is unconnected +echo -e "lxc.network.name=veth0" | sudo tee -a /etc/lxc/default.conf +echo -e "lxc.network.type = veth" | sudo tee -a /etc/lxc/default.conf +echo -e "lxc.network.hwaddr = 00:17:3e:xx:xx:xx\n" | sudo tee -a /etc/lxc/default.conf +echo -e "lxc.network.name=veth_link1" | sudo tee -a /etc/lxc/default.conf + +sudo lxc-checkconfig + +# update rc.local to be interpreted with bash +sudo sed -i '1 s/^.*$/#!\/bin\/bash/g' $RC_LOCAL +# remove the exit 0 from rc.local. +sudo sed -i 's/exit 0//' $RC_LOCAL + +# add rename_veth_interface to /etc/rc.local +read -r -d '' TMP_RVI <<'EOF' +function rename_veth_interface() { + + local cntr="$1" + local nifname="$2" + + ifr_index=`sudo lxc-attach -n $cntr -- ip -o link | tail -n 1 | awk -F : '{print $1}'` + ifr_index=$((ifr_index+1)) + + for dir in /sys/class/net/*/ + do + ifindex=`cat $dir/ifindex` + if [ $ifindex == $ifr_index ] + then ifname=`basename $dir` + fi + done + + sudo ip link set $ifname down + sudo ip link set $ifname name $nifname + sudo ip link set $nifname up +} +EOF +add_to_rc_local "$TMP_RVI" + +# For the moment just cross connect the host, will more clever later. +read -r -d '' TMP_CCI <<'EOF' +function cross_connect_interfaces() { + + sudo vppctl create host-interface name veth-cone + sudo vppctl create host-interface name veth-ctwo + sudo vppctl set int l2 xconnect host-veth-cone host-veth-ctwo + sudo vppctl set int l2 xconnect host-veth-ctwo host-veth-cone + sudo vppctl set int state host-veth-cone up + sudo vppctl set int state host-veth-ctwo up +} +EOF +add_to_rc_local "$TMP_CCI" + +ssh-keygen -t rsa -b 1024 -N "" -f ~/.ssh/id_rsa +openssh_pubkey=`cat ~/.ssh/id_rsa.pub` + +#Ensure that virtual bridge comes up after boot +add_to_rc_local "#autostart vpp on the host" +sudo_exec "service vpp start" 1 + +for f in $(ls /vagrant/containers/*.cntr) +do + i=$(basename $f | sed s/.cntr//) + dist=$(get_field $f DIST) + ver=$(get_field $f VER) + packages=$(get_field $f PACKAGES) + pip=$(get_field $f PIP) + provision_file="/vagrant/containers/"$i".provision.sh" + + sudo lxc-create -t download -n $i -- --dist $dist --release $ver --arch amd64 + + #autostart container after a reboot (standard lxc way doesn't work). + add_to_rc_local "#autostart container $i" + + sudo_exec "lxc-start -n $i -d" 1 + + lxc_exec $i "resolvconf -d veth0" + + #dhcp after boot + lxc_exec $i "dhclient veth0" 1 + + #insert delay to allow completion before starting ssh service + add_to_rc_local "sleep 1" + + lxc_exec $i "apt-get -qq install -y git openssh-server" + lxc_exec $i "apt-get -qq update" + + lxc_exec $i "adduser --disabled-password --gecos \"\" $USER" + + lxc_exec $i "mkdir -p /root/.ssh/" + lxc_exec $i "mkdir -p $HOME_DIR/.ssh/" + + lxc_exec $i "sh -c 'echo $openssh_pubkey >> /root/.ssh/authorized_keys'" + lxc_exec $i "sh -c 'echo $openssh_pubkey >> $HOME_DIR/.ssh/authorized_keys'" + + lxc_exec $i "chmod 0600 /root/.ssh/authorized_keys" + lxc_exec $i "chmod 0600 $HOME_DIR/.ssh/authorized_keys" + + lxc_exec $i "chown -R $USER.$USER $HOME_DIR/.ssh/" + + lxc_exec $i "sh -c 'echo \"%$USER ALL=(ALL) NOPASSWD: ALL\" > /etc/sudoers.d/10_$USER'" + + lxc_exec $i "update-alternatives --install /bin/sh sh /bin/bash 100" + + lxc_exec $i "apt-get -qq install $packages" + + lxc_exec $i "service ssh restart" 1 + + ip_address=$(sudo lxc-ls -f | grep $i | awk '{print $5}') + echo $ip_address $i | sudo tee -a /etc/hosts + + if [ -s $APT_PROXY_CONF ] + then + scp $SSH_OPTIONS $APT_PROXY_CONF root@$i:$APT_PROXY_CONF + fi + + if [ -s $ENV_FILE ] + then + scp $SSH_OPTIONS $ENV_FILE root@$i:$ENV_FILE + fi + + #rename the backend interface to something sensible + rename_veth_interface $i "veth-$i" + add_to_rc_local "rename_veth_interface $i 'veth-$i'" + + if [ -s $provision_file ] + then + tmpname=$(mktemp)".sh" + scp $SSH_OPTIONS $provision_file $USER@$i:$tmpname + ssh $SSH_OPTIONS $USER@$i "sh -c $tmpname" + fi + + #install any pip packages + if [ ! -z "$pip" ] + then + ssh -t $SSH_OPTIONS $USER@$i "sudo -E pip install $pip" + fi + +done + +#cross connect the containers +add_to_rc_local "sleep 1" +add_to_rc_local "cross_connect_interfaces" + +add_to_rc_local "exit 0" + +#setting password to username +echo "$USER:$USER" | sudo chpasswd + +echo -e "List of containers deployed in the dev environment:" | sudo tee -a /etc/motd +for f in $(ls /vagrant/containers/*.cntr) +do + i=$(basename $f | sed s/.cntr//) + desc=$(get_field $f DESC) + echo -e $i":\t"$desc | sudo tee -a /etc/motd +done + +echo "To access the environment, type 'vagrant ssh'" diff --git a/vpp-bootstrap/update.sh b/vpp-bootstrap/update.sh new file mode 100755 index 0000000..d3e0094 --- /dev/null +++ b/vpp-bootstrap/update.sh @@ -0,0 +1,59 @@ +#!/bin/bash +# Copyright (c) 2016 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Make sure that we get the hugepages we need on provision boot +# Note: The package install should take care of this at the end +# But sometimes after all the work of provisioning, we can't +# get the requested number of hugepages without rebooting. +# So do it here just in case +sysctl -w vm.nr_hugepages=1024 +HUGEPAGES=`sysctl -n vm.nr_hugepages` +if [ $HUGEPAGES != 1024 ]; then + echo "ERROR: Unable to get 1024 hugepages, only got $HUGEPAGES. Cannot finish." + exit +fi + +exit 0 + +# Figure out what system we are running on +if [ -f /etc/lsb-release ];then + . /etc/lsb-release +elif [ -f /etc/redhat-release ];then + yum install -y redhat-lsb + DISTRIB_ID=`lsb_release -si` + DISTRIB_RELEASE=`lsb_release -sr` + DISTRIB_CODENAME=`lsb_release -sc` + DISTRIB_DESCRIPTION=`lsb_release -sd` +fi + +# Do initial setup for the system +if [ $DISTRIB_ID == "Ubuntu" ]; then + # Fix grub-pc on Virtualbox with Ubuntu + export DEBIAN_FRONTEND=noninteractive + + # Standard update + upgrade dance + apt-get update + apt-get upgrade -y + + # Fix the silly notion that /bin/sh should point to dash by pointing it to bash + + update-alternatives --install /bin/sh sh /bin/bash 100 + + # Install useful but non-mandatory tools + apt-get install -y emacs git-review gdb gdbserver brctl +elif [ $DISTRIB_ID == "CentOS" ]; then + # Standard update + upgrade dance + yum check-update + yum update -y +fi diff --git a/vpp-userdemo/CHANGELOG b/vpp-userdemo/CHANGELOG new file mode 100644 index 0000000..8adeb5a --- /dev/null +++ b/vpp-userdemo/CHANGELOG @@ -0,0 +1,6 @@ +2017-03-28 Ray Kinsella <ray.kinsella@intel.com> + + * Updated Userdemo to support VPP 17.01 + * Bug fixes: + * npm: setting npm proxy correctly + * run: setting directories correctly diff --git a/vpp-userdemo/install.sh b/vpp-userdemo/install.sh index 72669f1..d038de7 100755 --- a/vpp-userdemo/install.sh +++ b/vpp-userdemo/install.sh @@ -20,9 +20,9 @@ if [ $HUGEPAGES != 1024 ]; then exit fi -echo "deb https://nexus.fd.io/content/repositories/fd.io.master.ubuntu.trusty.main/ ./" | sudo tee -a /etc/apt/sources.list.d/99fd.io.list +echo "deb https://nexus.fd.io/content/repositories/fd.io.stable.1701.ubuntu.trusty.main/ ./" | sudo tee -a /etc/apt/sources.list.d/99fd.io.list apt-get -qq update -apt-get -qq install -y --force-yes vpp vpp-dpdk-dkms bridge-utils lxc +apt-get -qq install -y --force-yes vpp vpp-lib vpp-dpdk-dkms bridge-utils lxc service vpp start #Configure LXC network to create an inteface for Linux bridge and a unconsumed second inteface @@ -74,7 +74,19 @@ done #UI dependencies curl -sL https://deb.nodesource.com/setup_4.x | sudo -E bash - sudo apt-get install -y nodejs + +#setup the npm proxy config correctly +if [[ -v http_proxy ]]; then + sudo npm config set proxy $http_proxy +fi + +if [[ -v https_proxy ]]; then + npm_https_proxy=$(sed 's/https/http/' <<< $https_proxy) + sudo npm config set https-proxy $npm_https_proxy +fi + +#install nodeJS backend sudo npm install /vagrant/ui/backend/ sudo mv node_modules/vppsb/node_modules/ /vagrant/ui/backend/ sudo npm install -g forever -sudo forever start /vagrant/ui/backend/server.js
\ No newline at end of file +sudo forever start /vagrant/ui/backend/server.js diff --git a/vpp-userdemo/run b/vpp-userdemo/run index b3582de..293c942 100755 --- a/vpp-userdemo/run +++ b/vpp-userdemo/run @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -vagrant ssh -c "/vagrant/vmrun /vagrant/${1}" +vagrant ssh -c "/vagrant/vmrun /vagrant/tutorials/${1}" |