From 59b2565cd91a67ced650739f36129650830211ac Mon Sep 17 00:00:00 2001 From: Dave Barach Date: Sun, 10 Sep 2017 15:04:27 -0400 Subject: Repair vlib API socket server - Teach vpp_api_test to send/receive API messages over sockets - Add memfd-based shared memory - Add api messages to create memfd-based shared memory segments - vpp_api_test supports both socket and shared memory segment connections - vpp_api_test pivot from socket to shared memory API messaging - add socket client support to libvlibclient.so - dead client reaper sends ping messages, container-friendly - dead client reaper falls back to kill (, 0) live checking if e.g. a python app goes silent for tens of seconds - handle ping messages in python client support code - teach show api ring about pairwise shared-memory segments - fix ip probing of already resolved destinations (VPP-998) We'll need this work to implement proper host-stack client isolation Change-Id: Ic23b65f75c854d0393d9a2e9d6b122a9551be769 Signed-off-by: Dave Barach Signed-off-by: Dave Wallace Signed-off-by: Florin Coras --- extras/emacs/plugin-main-skel.el | 1 - extras/emacs/plugin-test-skel.el | 1 - src/examples/sample-plugin/sample/sample.c | 1 - src/examples/sample-plugin/sample/sample_test.c | 1 - .../srv6-sample-localsid/srv6_localsid_sample.c | 1 - src/plugins/acl/acl.c | 1 - src/plugins/acl/acl_test.c | 1 - src/plugins/acl/hash_lookup.c | 1 - src/plugins/dpdk/api/dpdk_test.c | 1 - src/plugins/flowprobe/flowprobe.c | 1 - src/plugins/flowprobe/flowprobe_test.c | 1 - src/plugins/gtpu/gtpu_api.c | 1 - src/plugins/gtpu/gtpu_test.c | 1 - .../ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.c | 1 - .../export-vxlan-gpe/vxlan_gpe_ioam_export_test.c | 1 - src/plugins/ioam/export/ioam_export.c | 1 - src/plugins/ioam/export/ioam_export_test.c | 1 - src/plugins/ioam/ip6/ioam_cache.c | 1 - src/plugins/ioam/lib-pot/pot_api.c | 1 - src/plugins/ioam/lib-pot/pot_test.c | 2 +- src/plugins/ioam/lib-trace/trace_api.c | 2 +- src/plugins/ioam/lib-trace/trace_test.c | 2 +- src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_api.c | 2 +- src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_test.c | 2 +- src/plugins/ioam/udp-ping/udp_ping_api.c | 2 +- src/plugins/ioam/udp-ping/udp_ping_node.c | 2 +- src/plugins/ioam/udp-ping/udp_ping_test.c | 2 +- src/plugins/lb/api.c | 2 +- src/plugins/lb/lb_test.c | 2 +- src/plugins/memif/memif_api.c | 2 +- src/plugins/memif/memif_test.c | 2 +- src/plugins/nat/nat_api.c | 2 +- src/plugins/nat/nat_test.c | 2 +- src/plugins/pppoe/pppoe_api.c | 2 +- src/plugins/pppoe/pppoe_test.c | 2 +- src/svm.am | 18 +- src/svm/memfd.c | 137 ++++ src/svm/memfd.h | 194 ++++++ src/svm/svm.c | 193 +++--- src/svm/svm_common.h | 2 + src/svm/svmdb.c | 4 +- src/uri/vppcom.c | 2 +- src/vat/api_format.c | 370 +++++++---- src/vat/main.c | 38 +- src/vat/vat.h | 14 +- src/vlib-api.am | 28 +- src/vlib/unix/cli.c | 2 +- src/vlib/unix/input.c | 2 +- src/vlibapi/api_common.h | 9 + src/vlibapi/api_helper_macros.h | 141 ++-- src/vlibapi/vat_helper_macros.h | 115 +++- src/vlibmemory/api.h | 1 + src/vlibmemory/api_common.h | 121 +++- src/vlibmemory/memclnt.api | 57 ++ src/vlibmemory/memory_client.c | 65 +- src/vlibmemory/memory_shared.c | 175 +++-- src/vlibmemory/memory_vlib.c | 717 ++++++++++++++------ src/vlibmemory/socket_client.c | 240 +++++++ src/vlibmemory/socksvr_vlib.c | 719 +++++++++++++++++++++ src/vlibsocket/sock_test.c | 6 +- src/vlibsocket/sockclnt.api | 50 -- src/vlibsocket/sockclnt_vlib.c | 10 +- src/vlibsocket/socksvr_vlib.c | 706 -------------------- src/vlibsocket/vl_socket_api_h.h | 33 - src/vlibsocket/vl_socket_msg_enum.h | 42 -- src/vnet/ethernet/arp.c | 3 +- src/vnet/fib/fib_table.c | 3 +- src/vnet/interface_api.c | 18 +- src/vnet/ip/ip4_forward.c | 24 +- src/vnet/ip/ip6_forward.c | 8 + src/vnet/ip/ip6_neighbor.c | 3 +- src/vnet/l2/l2_fib.c | 12 +- src/vnet/lisp-cp/control.c | 2 + src/vnet/lisp-cp/lisp_api.c | 9 +- src/vnet/tcp/builtin_client.c | 1 - src/vpp-api/client/client.c | 37 +- src/vpp-api/vapi/vapi.c | 2 + src/vpp/api/api.c | 11 - src/vpp/api/api_main.c | 46 ++ src/vpp/api/custom_dump.c | 11 + src/vppinfra/elog.h | 8 +- src/vppinfra/file.h | 4 +- src/vppinfra/format.h | 2 +- src/vppinfra/serialize.c | 28 +- src/vppinfra/serialize.h | 8 +- src/vppinfra/test_serialize.c | 4 +- src/vppinfra/time.c | 4 +- src/vppinfra/unformat.c | 6 +- src/vppinfra/unix-misc.c | 10 +- src/vppinfra/unix.h | 6 +- 90 files changed, 2968 insertions(+), 1564 deletions(-) create mode 100644 src/svm/memfd.c create mode 100644 src/svm/memfd.h create mode 100644 src/vlibmemory/socket_client.c create mode 100644 src/vlibmemory/socksvr_vlib.c delete mode 100644 src/vlibsocket/sockclnt.api delete mode 100644 src/vlibsocket/socksvr_vlib.c delete mode 100644 src/vlibsocket/vl_socket_api_h.h delete mode 100644 src/vlibsocket/vl_socket_msg_enum.h diff --git a/extras/emacs/plugin-main-skel.el b/extras/emacs/plugin-main-skel.el index 46615c311c7..db1147e75e5 100644 --- a/extras/emacs/plugin-main-skel.el +++ b/extras/emacs/plugin-main-skel.el @@ -45,7 +45,6 @@ nil #include #include -#include #include /* define message IDs */ diff --git a/extras/emacs/plugin-test-skel.el b/extras/emacs/plugin-test-skel.el index 837cbdaa1b1..5928c974c9f 100644 --- a/extras/emacs/plugin-test-skel.el +++ b/extras/emacs/plugin-test-skel.el @@ -41,7 +41,6 @@ nil #include #include #include -#include #include uword unformat_sw_if_index (unformat_input_t * input, va_list * args); diff --git a/src/examples/sample-plugin/sample/sample.c b/src/examples/sample-plugin/sample/sample.c index 3929ac23701..703e604201f 100644 --- a/src/examples/sample-plugin/sample/sample.c +++ b/src/examples/sample-plugin/sample/sample.c @@ -23,7 +23,6 @@ #include #include -#include /* define message IDs */ #include diff --git a/src/examples/sample-plugin/sample/sample_test.c b/src/examples/sample-plugin/sample/sample_test.c index 2298675be74..51c2c87cb8d 100644 --- a/src/examples/sample-plugin/sample/sample_test.c +++ b/src/examples/sample-plugin/sample/sample_test.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #define __plugin_msg_base sample_test_main.msg_id_base diff --git a/src/examples/srv6-sample-localsid/srv6_localsid_sample.c b/src/examples/srv6-sample-localsid/srv6_localsid_sample.c index ec16547eebf..115ee94866d 100755 --- a/src/examples/srv6-sample-localsid/srv6_localsid_sample.c +++ b/src/examples/srv6-sample-localsid/srv6_localsid_sample.c @@ -24,7 +24,6 @@ #include #include -#include unsigned char srv6_localsid_name[32] = "Sample-SRv6-LocalSID-plugin"; unsigned char keyword_str[32] = "new_srv6_localsid"; diff --git a/src/plugins/acl/acl.c b/src/plugins/acl/acl.c index efd506de805..b8d8c71151d 100644 --- a/src/plugins/acl/acl.c +++ b/src/plugins/acl/acl.c @@ -25,7 +25,6 @@ #include #include -#include /* define message IDs */ #include diff --git a/src/plugins/acl/acl_test.c b/src/plugins/acl/acl_test.c index abb9643ef1c..90fd49980de 100644 --- a/src/plugins/acl/acl_test.c +++ b/src/plugins/acl/acl_test.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include diff --git a/src/plugins/acl/hash_lookup.c b/src/plugins/acl/hash_lookup.c index 7869027b301..2262402d52f 100644 --- a/src/plugins/acl/hash_lookup.c +++ b/src/plugins/acl/hash_lookup.c @@ -20,7 +20,6 @@ #include #include -#include #include #include diff --git a/src/plugins/dpdk/api/dpdk_test.c b/src/plugins/dpdk/api/dpdk_test.c index ea17e5d0e7a..8204edfe66a 100644 --- a/src/plugins/dpdk/api/dpdk_test.c +++ b/src/plugins/dpdk/api/dpdk_test.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include diff --git a/src/plugins/flowprobe/flowprobe.c b/src/plugins/flowprobe/flowprobe.c index 884b5a2e0a5..35999d2edb2 100644 --- a/src/plugins/flowprobe/flowprobe.c +++ b/src/plugins/flowprobe/flowprobe.c @@ -30,7 +30,6 @@ #include #include -#include /* define message IDs */ #include diff --git a/src/plugins/flowprobe/flowprobe_test.c b/src/plugins/flowprobe/flowprobe_test.c index 91793f552c9..19b7a733795 100644 --- a/src/plugins/flowprobe/flowprobe_test.c +++ b/src/plugins/flowprobe/flowprobe_test.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include diff --git a/src/plugins/gtpu/gtpu_api.c b/src/plugins/gtpu/gtpu_api.c index 49a5053d9f4..5a77ab7f851 100644 --- a/src/plugins/gtpu/gtpu_api.c +++ b/src/plugins/gtpu/gtpu_api.c @@ -24,7 +24,6 @@ #include #include -#include #include diff --git a/src/plugins/gtpu/gtpu_test.c b/src/plugins/gtpu/gtpu_test.c index e7fd0d54ac1..1eb54d6a6b8 100644 --- a/src/plugins/gtpu/gtpu_test.c +++ b/src/plugins/gtpu/gtpu_test.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include diff --git a/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.c b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.c index ec43e4849b4..976c831fcce 100644 --- a/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.c +++ b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.c @@ -25,7 +25,6 @@ #include #include -#include #include diff --git a/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_test.c b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_test.c index 17d31c95bcb..7e66329a998 100644 --- a/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_test.c +++ b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_test.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #define __plugin_msg_base export_test_main.msg_id_base diff --git a/src/plugins/ioam/export/ioam_export.c b/src/plugins/ioam/export/ioam_export.c index 46ac3d4a582..98e5f116c8a 100644 --- a/src/plugins/ioam/export/ioam_export.c +++ b/src/plugins/ioam/export/ioam_export.c @@ -24,7 +24,6 @@ #include #include -#include #include diff --git a/src/plugins/ioam/export/ioam_export_test.c b/src/plugins/ioam/export/ioam_export_test.c index 5023afd7d63..70efc7e3e4e 100644 --- a/src/plugins/ioam/export/ioam_export_test.c +++ b/src/plugins/ioam/export/ioam_export_test.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #define __plugin_msg_base export_test_main.msg_id_base diff --git a/src/plugins/ioam/ip6/ioam_cache.c b/src/plugins/ioam/ip6/ioam_cache.c index 4c9997f4923..4e147da34dc 100644 --- a/src/plugins/ioam/ip6/ioam_cache.c +++ b/src/plugins/ioam/ip6/ioam_cache.c @@ -24,7 +24,6 @@ #include #include -#include #include #include "ioam_cache.h" diff --git a/src/plugins/ioam/lib-pot/pot_api.c b/src/plugins/ioam/lib-pot/pot_api.c index cc1b7b76a78..52ea68e083d 100644 --- a/src/plugins/ioam/lib-pot/pot_api.c +++ b/src/plugins/ioam/lib-pot/pot_api.c @@ -25,7 +25,6 @@ #include #include -#include /* define message IDs */ #include diff --git a/src/plugins/ioam/lib-pot/pot_test.c b/src/plugins/ioam/lib-pot/pot_test.c index 1c6dd02dae1..2b528ece54e 100644 --- a/src/plugins/ioam/lib-pot/pot_test.c +++ b/src/plugins/ioam/lib-pot/pot_test.c @@ -21,7 +21,7 @@ #include #include #include -#include + #include #define __plugin_msg_base pot_test_main.msg_id_base diff --git a/src/plugins/ioam/lib-trace/trace_api.c b/src/plugins/ioam/lib-trace/trace_api.c index 6889859baa0..cd00c591fda 100644 --- a/src/plugins/ioam/lib-trace/trace_api.c +++ b/src/plugins/ioam/lib-trace/trace_api.c @@ -26,7 +26,7 @@ #include #include -#include + /* define message IDs */ #include diff --git a/src/plugins/ioam/lib-trace/trace_test.c b/src/plugins/ioam/lib-trace/trace_test.c index 1e287deede4..f598b7ee936 100644 --- a/src/plugins/ioam/lib-trace/trace_test.c +++ b/src/plugins/ioam/lib-trace/trace_test.c @@ -21,7 +21,7 @@ #include #include #include -#include + #include #define __plugin_msg_base trace_test_main.msg_id_base diff --git a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_api.c b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_api.c index 634133a4f7f..3bc682bfbfb 100644 --- a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_api.c +++ b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_api.c @@ -25,7 +25,7 @@ #include #include -#include + /* define message IDs */ #include diff --git a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_test.c b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_test.c index 80e656448c6..fdb4cf63b8b 100644 --- a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_test.c +++ b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_test.c @@ -21,7 +21,7 @@ #include #include #include -#include + #include #define __plugin_msg_base vxlan_gpe_test_main.msg_id_base diff --git a/src/plugins/ioam/udp-ping/udp_ping_api.c b/src/plugins/ioam/udp-ping/udp_ping_api.c index 7593873103d..52190097f1a 100644 --- a/src/plugins/ioam/udp-ping/udp_ping_api.c +++ b/src/plugins/ioam/udp-ping/udp_ping_api.c @@ -25,7 +25,7 @@ #include #include -#include + /* define message IDs */ #include diff --git a/src/plugins/ioam/udp-ping/udp_ping_node.c b/src/plugins/ioam/udp-ping/udp_ping_node.c index e1a57955fbc..7a725258d65 100644 --- a/src/plugins/ioam/udp-ping/udp_ping_node.c +++ b/src/plugins/ioam/udp-ping/udp_ping_node.c @@ -17,7 +17,7 @@ #include #include #include -#include + #include #include #include diff --git a/src/plugins/ioam/udp-ping/udp_ping_test.c b/src/plugins/ioam/udp-ping/udp_ping_test.c index 4ec11351221..a5cc4f3f7e3 100644 --- a/src/plugins/ioam/udp-ping/udp_ping_test.c +++ b/src/plugins/ioam/udp-ping/udp_ping_test.c @@ -21,7 +21,7 @@ #include #include #include -#include + #include #include diff --git a/src/plugins/lb/api.c b/src/plugins/lb/api.c index 9e3bcd651a0..7eb49ff6c26 100644 --- a/src/plugins/lb/api.c +++ b/src/plugins/lb/api.c @@ -19,7 +19,7 @@ #include #include #include -#include + #define vl_msg_id(n,h) n, typedef enum { diff --git a/src/plugins/lb/lb_test.c b/src/plugins/lb/lb_test.c index 9b30c18d459..35bda262fee 100644 --- a/src/plugins/lb/lb_test.c +++ b/src/plugins/lb/lb_test.c @@ -16,7 +16,7 @@ #include #include #include -#include + #include #include diff --git a/src/plugins/memif/memif_api.c b/src/plugins/memif/memif_api.c index 07347bc05a4..684f53c5193 100644 --- a/src/plugins/memif/memif_api.c +++ b/src/plugins/memif/memif_api.c @@ -25,7 +25,7 @@ #include #include -#include + /* define message IDs */ #include diff --git a/src/plugins/memif/memif_test.c b/src/plugins/memif/memif_test.c index 4ca7526df13..559d0f7d19f 100644 --- a/src/plugins/memif/memif_test.c +++ b/src/plugins/memif/memif_test.c @@ -20,7 +20,7 @@ #include #include #include -#include + #include #include #include diff --git a/src/plugins/nat/nat_api.c b/src/plugins/nat/nat_api.c index b56b4436adc..3bacdfed051 100644 --- a/src/plugins/nat/nat_api.c +++ b/src/plugins/nat/nat_api.c @@ -23,7 +23,7 @@ #include #include #include -#include + #include #include diff --git a/src/plugins/nat/nat_test.c b/src/plugins/nat/nat_test.c index e0b0494048b..7cc85f5ccfa 100644 --- a/src/plugins/nat/nat_test.c +++ b/src/plugins/nat/nat_test.c @@ -18,7 +18,7 @@ #include #include #include -#include + #include #include #include diff --git a/src/plugins/pppoe/pppoe_api.c b/src/plugins/pppoe/pppoe_api.c index 9b7584606a2..04326572ce7 100644 --- a/src/plugins/pppoe/pppoe_api.c +++ b/src/plugins/pppoe/pppoe_api.c @@ -24,7 +24,7 @@ #include #include -#include + #include diff --git a/src/plugins/pppoe/pppoe_test.c b/src/plugins/pppoe/pppoe_test.c index 2b67d989418..a32f1b76fe0 100644 --- a/src/plugins/pppoe/pppoe_test.c +++ b/src/plugins/pppoe/pppoe_test.c @@ -16,7 +16,7 @@ #include #include #include -#include + #include #include diff --git a/src/svm.am b/src/svm.am index 7116eb37c8e..002c06cbef2 100644 --- a/src/svm.am +++ b/src/svm.am @@ -13,12 +13,24 @@ bin_PROGRAMS += svmtool svmdbtool -nobase_include_HEADERS += svm/svm.h svm/svm_common.h svm/ssvm.h svm/svmdb.h \ - svm/svm_fifo.h svm/svm_fifo_segment.h +nobase_include_HEADERS += \ + svm/memfd.h \ + svm/ssvm.h \ + svm/svm_common.h \ + svm/svmdb.h \ + svm/svm_fifo.h \ + svm/svm_fifo_segment.h \ + svm/svm.h lib_LTLIBRARIES += libsvm.la libsvmdb.la -libsvm_la_SOURCES = svm/svm.c svm/ssvm.c svm/svm_fifo.c svm/svm_fifo_segment.c +libsvm_la_SOURCES = \ + svm/svm.c \ + svm/ssvm.c \ + svm/svm_fifo.c \ + svm/svm_fifo_segment.c \ + svm/memfd.c + libsvm_la_LIBADD = libvppinfra.la -lrt -lpthread libsvm_la_DEPENDENCIES = libvppinfra.la diff --git a/src/svm/memfd.c b/src/svm/memfd.c new file mode 100644 index 00000000000..9fe487db788 --- /dev/null +++ b/src/svm/memfd.c @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "memfd.h" + +int +memfd_master_init (memfd_private_t * memfd, u32 master_index) +{ + int flags; + memfd_shared_header_t *sh; + u64 ticks = clib_cpu_time_now (); + u64 randomize_baseva; + void *oldheap; + + if (memfd->memfd_size == 0) + return MEMFD_API_ERROR_NO_SIZE; + + ASSERT (vec_c_string_is_terminated (memfd->name)); + memfd->name = format (0, "memfd svm region %d", master_index); + + memfd->fd = memfd_create ((char *) memfd->name, MFD_ALLOW_SEALING); + if (memfd->fd < 0) + { + clib_unix_warning ("create segment '%s'", memfd->name); + return MEMFD_API_ERROR_CREATE_FAILURE; + } + + if ((ftruncate (memfd->fd, memfd->memfd_size)) == -1) + { + clib_unix_warning ("set memfd size"); + return MEMFD_API_ERROR_SET_SIZE; + } + + if ((fcntl (memfd->fd, F_ADD_SEALS, F_SEAL_SHRINK)) == -1) + clib_unix_warning ("fcntl (F_ADD_SEALS, F_SEAL_SHRINK)"); + + flags = MAP_SHARED; + if (memfd->requested_va) + flags |= MAP_FIXED; + + randomize_baseva = (ticks & 15) * MMAP_PAGESIZE; + + if (memfd->requested_va) + memfd->requested_va += randomize_baseva; + + sh = memfd->sh = + (memfd_shared_header_t *) mmap ((void *) memfd->requested_va, + memfd->memfd_size, PROT_READ | PROT_WRITE, + flags, memfd->fd, 0); + + if (memfd->sh == MAP_FAILED) + { + clib_unix_warning ("mmap"); + close (memfd->fd); + return MEMFD_API_ERROR_MMAP; + } + + memfd->my_pid = getpid (); + sh->master_pid = memfd->my_pid; + sh->memfd_size = memfd->memfd_size; + sh->heap = mheap_alloc_with_flags + (((u8 *) sh) + MMAP_PAGESIZE, memfd->memfd_size - MMAP_PAGESIZE, + MHEAP_FLAG_DISABLE_VM | MHEAP_FLAG_THREAD_SAFE); + + sh->memfd_va = pointer_to_uword (sh); + sh->master_index = master_index; + + oldheap = memfd_push_heap (sh); + sh->name = format (0, "%s%c", memfd->name, 0); + memfd_pop_heap (oldheap); + + memfd->i_am_master = 1; + + /* The application has to set set sh->ready... */ + return 0; +} + +/* + * Subtly different than svm_slave_init. The caller + * needs to acquire a usable file descriptor for the memfd segment + * e.g. via vppinfra/socket.c:default_socket_recvmsg + */ + +int +memfd_slave_init (memfd_private_t * memfd) +{ + memfd_shared_header_t *sh; + + memfd->i_am_master = 0; + + /* Map the segment once, to look at the shared header */ + sh = (void *) mmap (0, MMAP_PAGESIZE, PROT_READ | PROT_WRITE, MAP_SHARED, + memfd->fd, 0); + if (sh == MAP_FAILED) + { + clib_unix_warning ("slave research mmap"); + close (memfd->fd); + return MEMFD_API_ERROR_MMAP; + } + + memfd->requested_va = (u64) sh->memfd_va; + memfd->memfd_size = sh->memfd_size; + munmap (sh, MMAP_PAGESIZE); + + sh = memfd->sh = + (void *) mmap ((void *) memfd->requested_va, memfd->memfd_size, + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_FIXED, memfd->fd, 0); + + if (sh == MAP_FAILED) + { + clib_unix_warning ("slave final mmap"); + close (memfd->fd); + return MEMFD_API_ERROR_MMAP; + } + sh->slave_pid = getpid (); + return 0; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/svm/memfd.h b/src/svm/memfd.h new file mode 100644 index 00000000000..3ed4a9ab067 --- /dev/null +++ b/src/svm/memfd.h @@ -0,0 +1,194 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_memfd_h__ +#define __included_memfd_h__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +/* DGMS, memfd syscall not in glibc... */ +#include + +#ifndef MMAP_PAGESIZE +#define MMAP_PAGESIZE (clib_mem_get_page_size()) +#endif + +#define MEMFD_N_OPAQUE 7 + +typedef struct +{ + /* Spin-lock */ + volatile u32 lock; + volatile u32 owner_pid; + int recursion_count; + u32 tag; /* for debugging */ + + /* The allocation arena */ + void *heap; + + /* Segment must be mapped at this address, or no supper */ + u64 memfd_va; + /* The actual mmap size */ + u64 memfd_size; + u32 master_pid; + u32 slave_pid; + u8 *name; + void *opaque[MEMFD_N_OPAQUE]; + + /* Set when the master application thinks it's time to make the donuts */ + volatile u32 ready; + + /* Needed to make unique MAC addresses, etc. */ + u32 master_index; +} memfd_shared_header_t; + +typedef struct +{ + memfd_shared_header_t *sh; + int fd; + u64 memfd_size; + u32 my_pid; + u32 vlib_hw_if_index; + uword requested_va; + int i_am_master; + u32 per_interface_next_index; + u32 *rx_queue; + u8 *name; +} memfd_private_t; + +always_inline void +memfd_lock (memfd_shared_header_t * h, u32 my_pid, u32 tag) +{ + if (h->owner_pid == my_pid) + { + h->recursion_count++; + return; + } + + while (__sync_lock_test_and_set (&h->lock, 1)) + ; + + h->owner_pid = my_pid; + h->recursion_count = 1; + h->tag = tag; +} + +always_inline void +memfd_lock_non_recursive (memfd_shared_header_t * h, u32 tag) +{ + while (__sync_lock_test_and_set (&h->lock, 1)) + ; + + h->tag = tag; +} + +always_inline void +memfd_unlock (memfd_shared_header_t * h) +{ + if (--h->recursion_count == 0) + { + h->owner_pid = 0; + h->tag = 0; + CLIB_MEMORY_BARRIER (); + h->lock = 0; + } +} + +always_inline void +memfd_unlock_non_recursive (memfd_shared_header_t * h) +{ + h->tag = 0; + CLIB_MEMORY_BARRIER (); + h->lock = 0; +} + +static inline void * +memfd_push_heap (memfd_shared_header_t * sh) +{ + u8 *oldheap; + oldheap = clib_mem_set_heap (sh->heap); + return ((void *) oldheap); +} + +static inline void +memfd_pop_heap (void *oldheap) +{ + clib_mem_set_heap (oldheap); +} + +#define foreach_memfd_api_error \ +_(NO_NAME, "No shared segment name", -100) \ +_(NO_SIZE, "Size not set (master)", -101) \ +_(CREATE_FAILURE, "Create failed", -102) \ +_(SET_SIZE, "Set size failed", -103) \ +_(MMAP, "mmap failed", -104) \ +_(SLAVE_TIMEOUT, "Slave map timeout", -105) + +typedef enum +{ +#define _(n,s,c) MEMFD_API_ERROR_##n = c, + foreach_memfd_api_error +#undef _ +} memfd_api_error_enum_t; + +#define MEMFD_API_ERROR_NO_NAME (-10) + +int memfd_master_init (memfd_private_t * memfd, u32 master_index); +int memfd_slave_init (memfd_private_t * memfd); +void memfd_delete (memfd_private_t * memfd); + +/* These do not belong here, but the original keeps running around... */ +/* $$$$ work w/ Damjan to fix properly */ + +#ifndef F_LINUX_SPECIFIC_BASE +#define F_LINUX_SPECIFIC_BASE 1024 +#endif +#define MFD_ALLOW_SEALING 0x0002U +#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9) +#define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10) + +#define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */ +#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */ +#define F_SEAL_GROW 0x0004 /* prevent file from growing */ +#define F_SEAL_WRITE 0x0008 /* prevent writes */ + +#endif /* __included_memfd_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/svm/svm.c b/src/svm/svm.c index f97803cd7af..c54f9730094 100644 --- a/src/svm/svm.c +++ b/src/svm/svm.c @@ -434,6 +434,107 @@ shm_name_from_svm_map_region_args (svm_map_region_args_t * a) return (shm_name); } +void +svm_region_init_mapped_region (svm_map_region_args_t * a, svm_region_t * rp) +{ + pthread_mutexattr_t attr; + pthread_condattr_t cattr; + int nbits, words, bit; + int overhead_space; + void *oldheap; + uword data_base; + ASSERT (rp); + int rv; + + memset (rp, 0, sizeof (*rp)); + + if (pthread_mutexattr_init (&attr)) + clib_unix_warning ("mutexattr_init"); + + if (pthread_mutexattr_setpshared (&attr, PTHREAD_PROCESS_SHARED)) + clib_unix_warning ("mutexattr_setpshared"); + + if (pthread_mutex_init (&rp->mutex, &attr)) + clib_unix_warning ("mutex_init"); + + if (pthread_mutexattr_destroy (&attr)) + clib_unix_warning ("mutexattr_destroy"); + + if (pthread_condattr_init (&cattr)) + clib_unix_warning ("condattr_init"); + + if (pthread_condattr_setpshared (&cattr, PTHREAD_PROCESS_SHARED)) + clib_unix_warning ("condattr_setpshared"); + + if (pthread_cond_init (&rp->condvar, &cattr)) + clib_unix_warning ("cond_init"); + + if (pthread_condattr_destroy (&cattr)) + clib_unix_warning ("condattr_destroy"); + + region_lock (rp, 1); + + rp->virtual_base = a->baseva; + rp->virtual_size = a->size; + + rp->region_heap = + mheap_alloc_with_flags (uword_to_pointer + (a->baseva + MMAP_PAGESIZE, void *), + (a->pvt_heap_size != + 0) ? a->pvt_heap_size : SVM_PVT_MHEAP_SIZE, + MHEAP_FLAG_DISABLE_VM); + oldheap = svm_push_pvt_heap (rp); + + rp->region_name = (char *) format (0, "%s%c", a->name, 0); + vec_add1 (rp->client_pids, getpid ()); + + nbits = rp->virtual_size / MMAP_PAGESIZE; + + ASSERT (nbits > 0); + rp->bitmap_size = nbits; + words = (nbits + BITS (uword) - 1) / BITS (uword); + vec_validate (rp->bitmap, words - 1); + + overhead_space = MMAP_PAGESIZE /* header */ + + ((a->pvt_heap_size != 0) ? a->pvt_heap_size : SVM_PVT_MHEAP_SIZE); + + bit = 0; + data_base = (uword) rp->virtual_base; + + if (a->flags & SVM_FLAGS_NODATA) + rp->flags |= SVM_FLAGS_NEED_DATA_INIT; + + do + { + clib_bitmap_set_no_check (rp->bitmap, bit, 1); + bit++; + overhead_space -= MMAP_PAGESIZE; + data_base += MMAP_PAGESIZE; + } + while (overhead_space > 0); + + rp->data_base = (void *) data_base; + + /* + * Note: although the POSIX spec guarantees that only one + * process enters this block, we have to play games + * to hold off clients until e.g. the mutex is ready + */ + rp->version = SVM_VERSION; + + /* setup the data portion of the region */ + + rv = svm_data_region_create (a, rp); + if (rv) + { + clib_warning ("data_region_create: %d", rv); + } + + region_unlock (rp); + + svm_pop_heap (oldheap); +} + /* * svm_map_region */ @@ -442,15 +543,10 @@ svm_map_region (svm_map_region_args_t * a) { int svm_fd; svm_region_t *rp; - pthread_mutexattr_t attr; - pthread_condattr_t cattr; int deadman = 0; u8 junk = 0; void *oldheap; - int overhead_space; int rv; - uword data_base; - int nbits, words, bit; int pid_holding_region_lock; u8 *shm_name; int dead_region_recovery = 0; @@ -502,93 +598,8 @@ svm_map_region (svm_map_region_args_t * a) return (0); } close (svm_fd); - memset (rp, 0, sizeof (*rp)); - - if (pthread_mutexattr_init (&attr)) - clib_unix_warning ("mutexattr_init"); - - if (pthread_mutexattr_setpshared (&attr, PTHREAD_PROCESS_SHARED)) - clib_unix_warning ("mutexattr_setpshared"); - - if (pthread_mutex_init (&rp->mutex, &attr)) - clib_unix_warning ("mutex_init"); - if (pthread_mutexattr_destroy (&attr)) - clib_unix_warning ("mutexattr_destroy"); - - if (pthread_condattr_init (&cattr)) - clib_unix_warning ("condattr_init"); - - if (pthread_condattr_setpshared (&cattr, PTHREAD_PROCESS_SHARED)) - clib_unix_warning ("condattr_setpshared"); - - if (pthread_cond_init (&rp->condvar, &cattr)) - clib_unix_warning ("cond_init"); - - if (pthread_condattr_destroy (&cattr)) - clib_unix_warning ("condattr_destroy"); - - region_lock (rp, 1); - - rp->virtual_base = a->baseva; - rp->virtual_size = a->size; - - rp->region_heap = - mheap_alloc_with_flags (uword_to_pointer - (a->baseva + MMAP_PAGESIZE, void *), - (a->pvt_heap_size != - 0) ? a->pvt_heap_size : SVM_PVT_MHEAP_SIZE, - MHEAP_FLAG_DISABLE_VM); - oldheap = svm_push_pvt_heap (rp); - - rp->region_name = (char *) format (0, "%s%c", a->name, 0); - vec_add1 (rp->client_pids, getpid ()); - - nbits = rp->virtual_size / MMAP_PAGESIZE; - - ASSERT (nbits > 0); - rp->bitmap_size = nbits; - words = (nbits + BITS (uword) - 1) / BITS (uword); - vec_validate (rp->bitmap, words - 1); - - overhead_space = MMAP_PAGESIZE /* header */ + - ((a->pvt_heap_size != 0) ? a->pvt_heap_size : SVM_PVT_MHEAP_SIZE); - - bit = 0; - data_base = (uword) rp->virtual_base; - - if (a->flags & SVM_FLAGS_NODATA) - rp->flags |= SVM_FLAGS_NEED_DATA_INIT; - - do - { - clib_bitmap_set_no_check (rp->bitmap, bit, 1); - bit++; - overhead_space -= MMAP_PAGESIZE; - data_base += MMAP_PAGESIZE; - } - while (overhead_space > 0); - - rp->data_base = (void *) data_base; - - /* - * Note: although the POSIX spec guarantees that only one - * process enters this block, we have to play games - * to hold off clients until e.g. the mutex is ready - */ - rp->version = SVM_VERSION; - - /* setup the data portion of the region */ - - rv = svm_data_region_create (a, rp); - if (rv) - { - clib_warning ("data_region_create: %d", rv); - } - - region_unlock (rp); - - svm_pop_heap (oldheap); + svm_region_init_mapped_region (a, rp); return ((void *) rp); } diff --git a/src/svm/svm_common.h b/src/svm/svm_common.h index ea3ec87a212..a7160286a44 100644 --- a/src/svm/svm_common.h +++ b/src/svm/svm_common.h @@ -112,6 +112,8 @@ typedef struct void *svm_region_find_or_create (svm_map_region_args_t * a); void svm_region_init (void); +void svm_region_init_mapped_region (svm_map_region_args_t * a, + svm_region_t * rp); int svm_region_init_chroot (const char *root_path); void svm_region_init_chroot_uid_gid (const char *root_path, int uid, int gid); void svm_region_init_args (svm_map_region_args_t * a); diff --git a/src/svm/svmdb.c b/src/svm/svmdb.c index 043b0924d84..03aa1f17494 100644 --- a/src/svm/svmdb.c +++ b/src/svm/svmdb.c @@ -456,7 +456,7 @@ svmdb_local_serialize_strings (svmdb_client_t * client, char *filename) goto out; } - serialize_open_unix_file_descriptor (sm, fd); + serialize_open_clib_file_descriptor (sm, fd); region_lock (client->db_rp, 20); @@ -512,7 +512,7 @@ svmdb_local_unserialize_strings (svmdb_client_t * client, char *filename) goto out; } - unserialize_open_unix_file_descriptor (sm, fd); + unserialize_open_clib_file_descriptor (sm, fd); region_lock (client->db_rp, 21); oldheap = svm_push_data_heap (client->db_rp); diff --git a/src/uri/vppcom.c b/src/uri/vppcom.c index c7ae0ea5eff..8aeb9dafa6d 100644 --- a/src/uri/vppcom.c +++ b/src/uri/vppcom.c @@ -1474,7 +1474,7 @@ vppcom_cfg_read (char *conf_fname) goto file_done; } - unformat_init_unix_file (input, fd); + unformat_init_clib_file (input, fd); while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { diff --git a/src/vat/api_format.c b/src/vat/api_format.c index 02300216e4a..1010a0570b7 100644 --- a/src/vat/api_format.c +++ b/src/vat/api_format.c @@ -18,9 +18,10 @@ */ #include +#include +#include #include #include -#include #include #include #include @@ -73,6 +74,36 @@ #define __plugin_msg_base 0 #include +#if VPP_API_TEST_BUILTIN == 0 +#include + +u32 +vl (void *p) +{ + return vec_len (p); +} + +int +vat_socket_connect (vat_main_t * vam) +{ + return vl_socket_client_connect + (&vam->socket_client_main, (char *) vam->socket_name, + "vpp_api_test(s)", 0 /* default socket rx, tx buffer */ ); +} +#else /* vpp built-in case, we don't do sockets... */ +int +vat_socket_connect (vat_main_t * vam) +{ + return 0; +} + +void +vl_socket_client_read_reply (socket_client_main_t * scm) +{ +}; +#endif + + f64 vat_time_now (vat_main_t * vam) { @@ -1036,9 +1067,17 @@ vl_api_cli_inband_reply_t_handler (vl_api_cli_inband_reply_t * mp) { vat_main_t *vam = &vat_main; i32 retval = ntohl (mp->retval); + u32 length = ntohl (mp->length); + + vec_reset_length (vam->cmd_reply); vam->retval = retval; - vam->cmd_reply = mp->reply; + if (retval == 0) + { + vec_validate (vam->cmd_reply, length); + clib_memcpy ((char *) (vam->cmd_reply), mp->reply, length); + vam->cmd_reply[length] = 0; + } vam->result_ready = 1; } @@ -1048,6 +1087,8 @@ vl_api_cli_inband_reply_t_handler_json (vl_api_cli_inband_reply_t * mp) vat_main_t *vam = &vat_main; vat_json_node_t node; + vec_reset_length (vam->cmd_reply); + vat_json_init_object (&node); vat_json_object_add_int (&node, "retval", ntohl (mp->retval)); vat_json_object_add_string_copy (&node, "reply", mp->reply); @@ -1421,6 +1462,7 @@ static void vl_api_control_ping_reply_t_handler vam->retval = retval; vam->result_ready = 1; } + vam->socket_client_main.control_pings_outstanding--; } static void vl_api_control_ping_reply_t_handler_json @@ -1979,6 +2021,130 @@ static void vl_api_create_vhost_user_if_reply_t_handler_json vam->result_ready = 1; } +static clib_error_t * +receive_fd_msg (int socket_fd, int *my_fd) +{ + char msgbuf[16]; + char ctl[CMSG_SPACE (sizeof (int)) + CMSG_SPACE (sizeof (struct ucred))]; + struct msghdr mh = { 0 }; + struct iovec iov[1]; + ssize_t size; + struct ucred *cr = 0; + struct cmsghdr *cmsg; + pid_t pid __attribute__ ((unused)); + uid_t uid __attribute__ ((unused)); + gid_t gid __attribute__ ((unused)); + + iov[0].iov_base = msgbuf; + iov[0].iov_len = 5; + mh.msg_iov = iov; + mh.msg_iovlen = 1; + mh.msg_control = ctl; + mh.msg_controllen = sizeof (ctl); + + memset (ctl, 0, sizeof (ctl)); + + /* receive the incoming message */ + size = recvmsg (socket_fd, &mh, 0); + if (size != 5) + { + return (size == 0) ? clib_error_return (0, "disconnected") : + clib_error_return_unix (0, "recvmsg: malformed message (fd %d)", + socket_fd); + } + + cmsg = CMSG_FIRSTHDR (&mh); + while (cmsg) + { + if (cmsg->cmsg_level == SOL_SOCKET) + { + if (cmsg->cmsg_type == SCM_CREDENTIALS) + { + cr = (struct ucred *) CMSG_DATA (cmsg); + uid = cr->uid; + gid = cr->gid; + pid = cr->pid; + } + else if (cmsg->cmsg_type == SCM_RIGHTS) + { + clib_memcpy (my_fd, CMSG_DATA (cmsg), sizeof (int)); + } + } + cmsg = CMSG_NXTHDR (&mh, cmsg); + } + return 0; +} + +static void vl_api_memfd_segment_create_reply_t_handler + (vl_api_memfd_segment_create_reply_t * mp) +{ + /* Dont bother in the builtin version */ +#if VPP_API_TEST_BUILTIN == 0 + vat_main_t *vam = &vat_main; + api_main_t *am = &api_main; + socket_client_main_t *scm = &vam->socket_client_main; + int my_fd = -1; + clib_error_t *error; + memfd_private_t memfd; + i32 retval = ntohl (mp->retval); + + if (retval == 0) + { + error = receive_fd_msg (scm->socket_fd, &my_fd); + if (error) + { + retval = -99; + goto out; + } + + memset (&memfd, 0, sizeof (memfd)); + memfd.fd = my_fd; + + vam->client_index_invalid = 1; + + retval = memfd_slave_init (&memfd); + if (retval) + clib_warning ("WARNING: segment map returned %d", retval); + + /* Pivot to the memory client segment that vpp just created */ + + am->vlib_rp = (void *) (memfd.requested_va + MMAP_PAGESIZE); + + am->shmem_hdr = (void *) am->vlib_rp->user_ctx; + + vl_client_install_client_message_handlers (); + + vl_client_connect_to_vlib_no_map ("pvt", + "vpp_api_test(p)", + 32 /* input_queue_length */ ); + if (close (my_fd) < 0) + clib_unix_warning ("close memfd fd pivot"); + vam->vl_input_queue = am->shmem_hdr->vl_input_queue; + + vl_socket_client_enable_disable (&vam->socket_client_main, + 0 /* disable socket */ ); + } + +out: + if (vam->async_mode) + { + vam->async_errors += (retval < 0); + } + else + { + vam->retval = retval; + vam->result_ready = 1; + } +#endif +} + +static void vl_api_memfd_segment_create_reply_t_handler_json + (vl_api_memfd_segment_create_reply_t * mp) +{ + clib_warning ("no"); +} + + static void vl_api_ip_address_details_t_handler (vl_api_ip_address_details_t * mp) { @@ -5223,7 +5389,8 @@ _(VNET_INTERFACE_COMBINED_COUNTERS, vnet_interface_combined_counters) \ _(VNET_IP4_FIB_COUNTERS, vnet_ip4_fib_counters) \ _(VNET_IP6_FIB_COUNTERS, vnet_ip6_fib_counters) \ _(VNET_IP4_NBR_COUNTERS, vnet_ip4_nbr_counters) \ -_(VNET_IP6_NBR_COUNTERS, vnet_ip6_nbr_counters) +_(VNET_IP6_NBR_COUNTERS, vnet_ip6_nbr_counters) \ +_(MEMFD_SEGMENT_CREATE_REPLY, memfd_segment_create_reply) typedef struct { @@ -5597,76 +5764,9 @@ dump_stats_table (vat_main_t * vam) return 0; } -int -exec (vat_main_t * vam) -{ - api_main_t *am = &api_main; - vl_api_cli_t *mp; - f64 timeout; - void *oldheap; - u8 *cmd = 0; - unformat_input_t *i = vam->input; - - if (vec_len (i->buffer) == 0) - return -1; - - if (vam->exec_mode == 0 && unformat (i, "mode")) - { - vam->exec_mode = 1; - return 0; - } - if (vam->exec_mode == 1 && (unformat (i, "exit") || unformat (i, "quit"))) - { - vam->exec_mode = 0; - return 0; - } - - - M (CLI, mp); - - /* - * Copy cmd into shared memory. - * In order for the CLI command to work, it - * must be a vector ending in \n, not a C-string ending - * in \n\0. - */ - pthread_mutex_lock (&am->vlib_rp->mutex); - oldheap = svm_push_data_heap (am->vlib_rp); - - vec_validate (cmd, vec_len (vam->input->buffer) - 1); - clib_memcpy (cmd, vam->input->buffer, vec_len (vam->input->buffer)); - - svm_pop_heap (oldheap); - pthread_mutex_unlock (&am->vlib_rp->mutex); - - mp->cmd_in_shmem = pointer_to_uword (cmd); - S (mp); - timeout = vat_time_now (vam) + 10.0; - - while (vat_time_now (vam) < timeout) - { - if (vam->result_ready == 1) - { - u8 *free_me; - if (vam->shmem_result != NULL) - print (vam->ofp, "%s", vam->shmem_result); - pthread_mutex_lock (&am->vlib_rp->mutex); - oldheap = svm_push_data_heap (am->vlib_rp); - - free_me = (u8 *) vam->shmem_result; - vec_free (free_me); - - svm_pop_heap (oldheap); - pthread_mutex_unlock (&am->vlib_rp->mutex); - return 0; - } - } - return -99; -} - /* - * Future replacement of exec() that passes CLI buffers directly in - * the API messages instead of an additional shared memory area. + * Pass CLI buffers directly in the CLI_INBAND API message, + * instead of an additional shared memory area. */ static int exec_inband (vat_main_t * vam) @@ -5700,10 +5800,19 @@ exec_inband (vat_main_t * vam) mp->length = htonl (len); S (mp); - W2 (ret, print (vam->ofp, "%s", vam->cmd_reply)); + W (ret); + /* json responses may or may not include a useful reply... */ + if (vec_len (vam->cmd_reply)) + print (vam->ofp, (char *) (vam->cmd_reply)); return ret; } +int +exec (vat_main_t * vam) +{ + return exec_inband (vam); +} + static int api_create_loopback (vat_main_t * vam) { @@ -5949,7 +6058,7 @@ api_sw_interface_dump (vat_main_t * vam) S (mp); /* Use a control ping for synchronization */ - M (CONTROL_PING, mp_ping); + MPING (CONTROL_PING, mp_ping); S (mp_ping); W (ret); @@ -6526,7 +6635,7 @@ api_bridge_domain_dump (vat_main_t * vam) S (mp); /* Use a control ping for synchronization */ - M (CONTROL_PING, mp_ping); + MPING (CONTROL_PING, mp_ping); S (mp_ping); W (ret); @@ -6793,7 +6902,7 @@ api_l2fib_add_del (vat_main_t * vam) /* Shut off async mode */ vam->async_mode = 0; - M (CONTROL_PING, mp_ping); + MPING (CONTROL_PING, mp_ping); S (mp_ping); timeout = vat_time_now (vam) + 1.0; @@ -7565,7 +7674,7 @@ api_ip_add_del_route (vat_main_t * vam) /* Shut off async mode */ vam->async_mode = 0; - M (CONTROL_PING, mp_ping); + MPING (CONTROL_PING, mp_ping); S (mp_ping); timeout = vat_time_now (vam) + 1.0; @@ -7954,7 +8063,7 @@ api_mpls_route_add_del (vat_main_t * vam) /* Shut off async mode */ vam->async_mode = 0; - M (CONTROL_PING, mp_ping); + MPING (CONTROL_PING, mp_ping); S (mp_ping); timeout = vat_time_now (vam) + 1.0; @@ -8864,7 +8973,7 @@ api_dhcp_proxy_dump (vat_main_t * vam) S (mp); /* Use a control ping for synchronization */ - M (CONTROL_PING, mp_ping); + MPING (CONTROL_PING, mp_ping); S (mp_ping); W (ret); @@ -9216,7 +9325,7 @@ api_ip6nd_proxy_dump (vat_main_t * vam) S (mp); /* Use a control ping for synchronization */ - M (CONTROL_PING, mp_ping); + MPING (CONTROL_PING, mp_ping); S (mp_ping); W (ret); @@ -11552,7 +11661,7 @@ api_sw_if_l2tpv3_tunnel_dump (vat_main_t * vam) S (mp); /* Use a control ping for synchronization */ - M (CONTROL_PING, mp_ping); + MPING (CONTROL_PING, mp_ping); S (mp_ping); W (ret); @@ -11600,7 +11709,7 @@ api_sw_interface_tap_dump (vat_main_t * vam) S (mp); /* Use a control ping for synchronization */ - M (CONTROL_PING, mp_ping); + MPING (CONTROL_PING, mp_ping); S (mp_ping); W (ret); @@ -11875,7 +11984,7 @@ api_vxlan_tunnel_dump (vat_main_t * vam) S (mp); /* Use a control ping for synchronization */ - M (CONTROL_PING, mp_ping); + MPING (CONTROL_PING, mp_ping); S (mp_ping); W (ret); @@ -12066,7 +12175,7 @@ api_gre_tunnel_dump (vat_main_t * vam) S (mp); /* Use a control ping for synchronization */ - M (CONTROL_PING, mp_ping); + MPING (CONTROL_PING, mp_ping); S (mp_ping); W (ret); @@ -12422,7 +12531,7 @@ api_sw_interface_vhost_user_dump (vat_main_t * vam) S (mp); /* Use a control ping for synchronization */ - M (CONTROL_PING, mp_ping); + MPING (CONTROL_PING, mp_ping); S (mp_ping); W (ret); @@ -12697,7 +12806,7 @@ api_vxlan_gpe_tunnel_dump (vat_main_t * vam) S (mp); /* Use a control ping for synchronization */ - M (CONTROL_PING, mp_ping); + MPING (CONTROL_PING, mp_ping); S (mp_ping); W (ret); @@ -12782,7 +12891,7 @@ api_l2_fib_table_dump (vat_main_t * vam) S (mp); /* Use a control ping for synchronization */ - M (CONTROL_PING, mp_ping); + MPING (CONTROL_PING, mp_ping); S (mp_ping); W (ret); @@ -13050,7 +13159,7 @@ api_ip_address_dump (vat_main_t * vam) S (mp); /* Use a control ping for synchronization */ - M (CONTROL_PING, mp_ping); + MPING (CONTROL_PING, mp_ping); S (mp_ping); W (ret); @@ -13106,7 +13215,7 @@ api_ip_dump (vat_main_t * vam) S (mp); /* Use a control ping for synchronization */ - M (CONTROL_PING, mp_ping); + MPING (CONTROL_PING, mp_ping); S (mp_ping); W (ret); @@ -14485,7 +14594,7 @@ api_map_domain_dump (vat_main_t * vam) S (mp); /* Use a control ping for synchronization */ - M (CONTROL_PING, mp_ping); + MPING (CONTROL_PING, mp_ping); S (mp_ping); W (ret); @@ -14524,7 +14633,7 @@ api_map_rule_dump (vat_main_t * vam) S (mp); /* Use a control ping for synchronization */ - M (CONTROL_PING, mp_ping); + MPING (CONTROL_PING, mp_ping); S (mp_ping); W (ret); @@ -16920,7 +17029,7 @@ api_one_locator_dump (vat_main_t * vam) S (mp); /* Use a control ping for synchronization */ - M (CONTROL_PING, mp_ping); + MPING (CONTROL_PING, mp_ping); S (mp_ping); /* Wait for a reply... */ @@ -16970,7 +17079,7 @@ api_one_locator_set_dump (vat_main_t * vam) S (mp); /* Use a control ping for synchronization */ - M (CONTROL_PING, mp_ping); + MPING (CONTROL_PING, mp_ping); S (mp_ping); /* Wait for a reply... */ @@ -17028,7 +17137,7 @@ api_one_eid_table_map_dump (vat_main_t * vam) S (mp); /* Use a control ping for synchronization */ - M (CONTROL_PING, mp_ping); + MPING (CONTROL_PING, mp_ping); S (mp_ping); /* Wait for a reply... */ @@ -17056,7 +17165,7 @@ api_one_eid_table_vni_dump (vat_main_t * vam) S (mp); /* Use a control ping for synchronization */ - M (CONTROL_PING, mp_ping); + MPING (CONTROL_PING, mp_ping); S (mp_ping); /* Wait for a reply... */ @@ -17164,7 +17273,7 @@ api_one_eid_table_dump (vat_main_t * vam) S (mp); /* Use a control ping for synchronization */ - M (CONTROL_PING, mp_ping); + MPING (CONTROL_PING, mp_ping); S (mp_ping); /* Wait for a reply... */ @@ -17431,7 +17540,7 @@ api_one_map_server_dump (vat_main_t * vam) S (mp); /* Use a control ping for synchronization */ - M (CONTROL_PING, mp_ping); + MPING (CONTROL_PING, mp_ping); S (mp_ping); /* Wait for a reply... */ @@ -17458,7 +17567,7 @@ api_one_map_resolver_dump (vat_main_t * vam) S (mp); /* Use a control ping for synchronization */ - M (CONTROL_PING, mp_ping); + MPING (CONTROL_PING, mp_ping); S (mp_ping); /* Wait for a reply... */ @@ -17492,7 +17601,7 @@ api_one_stats_dump (vat_main_t * vam) S (mp); /* Use a control ping for synchronization */ - M (CONTROL_PING, mp_ping); + MPING (CONTROL_PING, mp_ping); S (mp_ping); /* Wait for a reply... */ @@ -17554,7 +17663,7 @@ api_lisp_gpe_fwd_entry_path_dump (vat_main_t * vam) /* send it... */ S (mp); /* Use a control ping for synchronization */ - M (CONTROL_PING, mp_ping); + MPING (CONTROL_PING, mp_ping); S (mp_ping); /* Wait for a reply... */ @@ -17803,7 +17912,7 @@ api_policer_dump (vat_main_t * vam) S (mp); /* Use a control ping for synchronization */ - M (CONTROL_PING, mp_ping); + MPING (CONTROL_PING, mp_ping); S (mp_ping); /* Wait for a reply... */ @@ -17892,7 +18001,7 @@ api_policer_classify_dump (vat_main_t * vam) S (mp); /* Use a control ping for synchronization */ - M (CONTROL_PING, mp_ping); + MPING (CONTROL_PING, mp_ping); S (mp_ping); /* Wait for a reply... */ @@ -18118,7 +18227,7 @@ api_mpls_tunnel_dump (vat_main_t * vam) S (mp); /* Use a control ping for synchronization */ - M (CONTROL_PING, mp_ping); + MPING (CONTROL_PING, mp_ping); S (mp_ping); W (ret); @@ -18188,7 +18297,7 @@ api_mpls_fib_dump (vat_main_t * vam) S (mp); /* Use a control ping for synchronization */ - M (CONTROL_PING, mp_ping); + MPING (CONTROL_PING, mp_ping); S (mp_ping); W (ret); @@ -18289,7 +18398,7 @@ api_ip_fib_dump (vat_main_t * vam) S (mp); /* Use a control ping for synchronization */ - M (CONTROL_PING, mp_ping); + MPING (CONTROL_PING, mp_ping); S (mp_ping); W (ret); @@ -18307,7 +18416,7 @@ api_ip_mfib_dump (vat_main_t * vam) S (mp); /* Use a control ping for synchronization */ - M (CONTROL_PING, mp_ping); + MPING (CONTROL_PING, mp_ping); S (mp_ping); W (ret); @@ -18398,7 +18507,7 @@ api_ip_neighbor_dump (vat_main_t * vam) S (mp); /* Use a control ping for synchronization */ - M (CONTROL_PING, mp_ping); + MPING (CONTROL_PING, mp_ping); S (mp_ping); W (ret); @@ -18499,7 +18608,7 @@ api_ip6_fib_dump (vat_main_t * vam) S (mp); /* Use a control ping for synchronization */ - M (CONTROL_PING, mp_ping); + MPING (CONTROL_PING, mp_ping); S (mp_ping); W (ret); @@ -18517,7 +18626,7 @@ api_ip6_mfib_dump (vat_main_t * vam) S (mp); /* Use a control ping for synchronization */ - M (CONTROL_PING, mp_ping); + MPING (CONTROL_PING, mp_ping); S (mp_ping); W (ret); @@ -18632,7 +18741,7 @@ api_classify_session_dump (vat_main_t * vam) S (mp); /* Use a control ping for synchronization */ - M (CONTROL_PING, mp_ping); + MPING (CONTROL_PING, mp_ping); S (mp_ping); W (ret); @@ -18766,7 +18875,7 @@ api_ipfix_classify_table_dump (vat_main_t * vam) S (mp); /* Use a control ping for synchronization */ - M (CONTROL_PING, mp_ping); + MPING (CONTROL_PING, mp_ping); S (mp_ping); W (ret); @@ -18957,7 +19066,7 @@ api_sw_interface_span_dump (vat_main_t * vam) S (mp); /* Use a control ping for synchronization */ - M (CONTROL_PING, mp_ping); + MPING (CONTROL_PING, mp_ping); S (mp_ping); W (ret); @@ -19458,7 +19567,7 @@ api_ipsec_gre_tunnel_dump (vat_main_t * vam) S (mp); /* Use a control ping for synchronization */ - M (CONTROL_PING, mp_ping); + MPING (CONTROL_PING, mp_ping); S (mp_ping); W (ret); @@ -19665,7 +19774,7 @@ api_flow_classify_dump (vat_main_t * vam) S (mp); /* Use a control ping for synchronization */ - M (CONTROL_PING, mp_ping); + MPING (CONTROL_PING, mp_ping); S (mp_ping); /* Wait for a reply... */ @@ -19836,7 +19945,7 @@ api_l2_xconnect_dump (vat_main_t * vam) S (mp); /* Use a control ping for synchronization */ - M (CONTROL_PING, mp_ping); + MPING (CONTROL_PING, mp_ping); S (mp_ping); W (ret); @@ -20140,6 +20249,34 @@ api_tcp_configure_src_addresses (vat_main_t * vam) return ret; } +static int +api_memfd_segment_create (vat_main_t * vam) +{ + unformat_input_t *i = vam->input; + vl_api_memfd_segment_create_t *mp; + u64 size = 64 << 20; + int ret; + +#if VPP_API_TEST_BUILTIN == 1 + errmsg ("memfd_segment_create (builtin) not supported"); + return -99; +#endif + + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "size %U", unformat_memory_size, &size)) + ; + else + break; + } + + M (MEMFD_SEGMENT_CREATE, mp); + mp->requested_size = size; + S (mp); + W (ret); + return ret; +} + static int q_or_quit (vat_main_t * vam) { @@ -20919,7 +21056,8 @@ _(p2p_ethernet_add, " | sw_if_index remote_mac sub_id _(p2p_ethernet_del, " | sw_if_index remote_mac ") \ _(lldp_config, "system-name tx-hold tx-interval ") \ _(sw_interface_set_lldp, " | sw_if_index [port-desc ] [disable]") \ -_(tcp_configure_src_addresses, "first-last [vrf ]") +_(tcp_configure_src_addresses, "first-last [vrf ]") \ +_(memfd_segment_create,"size ") /* List of command functions, CLI names map directly to functions */ #define foreach_cli_function \ diff --git a/src/vat/main.c b/src/vat/main.c index 1bad2ebb14d..e2c9b70889a 100644 --- a/src/vat/main.c +++ b/src/vat/main.c @@ -190,6 +190,17 @@ do_one_file (vat_main_t * vam) vam->regenerate_interface_table = 0; api_sw_interface_dump (vam); } + + /* Hack to pick up new client index after memfd_segment_create pivot */ + if (vam->client_index_invalid) + { + vat_main_t *vam = &vat_main; + api_main_t *am = &api_main; + + vam->vl_input_queue = am->shmem_hdr->vl_input_queue; + vam->my_client_index = am->my_client_index; + vam->client_index_invalid = 0; + } } } @@ -313,6 +324,8 @@ main (int argc, char **argv) eval_current_line); init_error_string_table (vam); + vec_validate (vam->cmd_reply, 0); + vec_reset_length (vam->cmd_reply); unformat_init_command_line (a, argv); @@ -326,6 +339,12 @@ main (int argc, char **argv) interactive = 0; else if (unformat (a, "json")) json_output = 1; + else if (unformat (a, "socket-name %s", &vam->socket_name)) + ; + else if (unformat (a, "default-socket")) + { + vam->socket_name = format (0, "%s%c", API_SOCKET_FILE, 0); + } else if (unformat (a, "plugin_path %s", (u8 *) & vat_plugin_path)) vec_add1 (vat_plugin_path, 0); else if (unformat (a, "plugin_name_filter %s", @@ -337,9 +356,12 @@ main (int argc, char **argv) } else { - fformat (stderr, - "%s: usage [in ... in ] [out ] [script] [json]\n", - argv[0]); + fformat + (stderr, + "%s: usage [in ... in ] [out ] [script] [json]\n" + "[plugin_path ][default-socket][socket-name ]\n" + "[plugin_name_filter ][chroot prefix ]\n", + argv[0]); exit (1); } } @@ -363,7 +385,11 @@ main (int argc, char **argv) setup_signal_handlers (); - if (connect_to_vpe ("vpp_api_test") < 0) + if (vam->socket_name && vat_socket_connect (vam)) + fformat (stderr, "WARNING: socket connection failed"); + + if (vam->socket_client_main.socket_fd == 0 + && connect_to_vpe ("vpp_api_test") < 0) { svm_region_exit (); fformat (stderr, "Couldn't connect to vpe, exiting...\n"); @@ -373,9 +399,7 @@ main (int argc, char **argv) vam->json_output = json_output; if (!json_output) - { - api_sw_interface_dump (vam); - } + api_sw_interface_dump (vam); vec_validate (vam->inbuf, 4096); diff --git a/src/vat/vat.h b/src/vat/vat.h index 233a1c41820..1ae46f30a7e 100644 --- a/src/vat/vat.h +++ b/src/vat/vat.h @@ -15,13 +15,18 @@ #ifndef __included_vat_h__ #define __included_vat_h__ +#define _GNU_SOURCE #include #include +#include +#include +#include #include #include #include #include #include +#include #include #include #include @@ -147,7 +152,6 @@ typedef struct /* Errors by number */ uword *error_string_by_error_number; - /* Main thread can spin (w/ timeout) here if needed */ u32 async_mode; u32 async_errors; @@ -155,10 +159,11 @@ typedef struct volatile i32 retval; volatile u32 sw_if_index; volatile u8 *shmem_result; - volatile u8 *cmd_reply; + u8 *cmd_reply; /* our client index */ u32 my_client_index; + int client_index_invalid; /* Time is of the essence... */ clib_time_t clib_time; @@ -204,6 +209,9 @@ typedef struct ip4_nbr_counter_t **ip4_nbr_counters; ip6_nbr_counter_t **ip6_nbr_counters; + socket_client_main_t socket_client_main; + u8 *socket_name; + /* Convenience */ vlib_main_t *vlib_main; } vat_main_t; @@ -233,6 +241,8 @@ u8 *format_ip6_address (u8 * s, va_list * args); u8 *format_ip46_address (u8 * s, va_list * args); u8 *format_ethernet_address (u8 * s, va_list * args); +int vat_socket_connect (vat_main_t * vam); + #if VPP_API_TEST_BUILTIN #define print api_cli_output void api_cli_output (void *, const char *fmt, ...); diff --git a/src/vlib-api.am b/src/vlib-api.am index 677811bc895..5bbd4599729 100644 --- a/src/vlib-api.am +++ b/src/vlib-api.am @@ -11,8 +11,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -lib_LTLIBRARIES += libvlibmemory.la libvlibmemoryclient.la \ - libvlibsocket.la +lib_LTLIBRARIES += libvlibmemory.la libvlibmemoryclient.la libvlibmemory_la_DEPENDENCIES = libvppinfra.la libsvm.la libvlib.la libvlibmemory_la_LIBADD = $(libvlibmemory_la_DEPENDENCIES) -lpthread @@ -25,6 +24,7 @@ libvlibmemory_la_SOURCES = \ vlibmemory/memclnt.api \ vlibmemory/memory_shared.c \ vlibmemory/memory_vlib.c \ + vlibmemory/socksvr_vlib.c \ vlibmemory/unix_shared_memory_queue.c \ vlibmemory/unix_shared_memory_queue.h \ vlibmemory/vl_memory_api_h.h \ @@ -45,6 +45,7 @@ libvlibmemoryclient_la_SOURCES = \ vlibmemory/memclnt.api \ vlibmemory/memory_client.c \ vlibmemory/memory_shared.c \ + vlibmemory/socket_client.c \ vlibmemory/unix_shared_memory_queue.c \ vlibmemory/unix_shared_memory_queue.h \ vlibmemory/vl_memory_api_h.h \ @@ -58,27 +59,14 @@ nobase_include_HEADERS += \ vlibmemory/unix_shared_memory_queue.h \ vlibmemory/memclnt.api.h -libvlibsocket_la_DEPENDENCIES = libvppinfra.la libvlib.la libvlibmemory.la -libvlibsocket_la_LIBADD = $(libvlibsocket_la_DEPENDENCIES) -libvlibsocket_la_SOURCES = \ - vlibsocket/api.h \ - vlibsocket/sockclnt.api \ - vlibsocket/sockclnt_vlib.c \ - vlibsocket/socksvr_vlib.c \ - vlibsocket/vl_socket_api_h.h \ - vlibsocket/vl_socket_msg_enum.h - -nobase_include_HEADERS += \ - vlibsocket/api.h \ - vlibsocket/vl_socket_api_h.h \ - vlibsocket/vl_socket_msg_enum.h \ - vlibsocket/sockclnt.api.h - BUILT_SOURCES += \ - vlibsocket/sockclnt.api.h \ vlibmemory/memclnt.api.h \ vlibmemory/memclnt.api.json -API_FILES += vlibmemory/memclnt.api +noinst_PROGRAMS += sock_test + +sock_test_SOURCES = vlibsocket/sock_test.c + +API_FILES += vlibmemory/memclnt.api # vi:syntax=automake diff --git a/src/vlib/unix/cli.c b/src/vlib/unix/cli.c index be3c813ae33..3eb1d9ee86c 100644 --- a/src/vlib/unix/cli.c +++ b/src/vlib/unix/cli.c @@ -2991,7 +2991,7 @@ unix_cli_exec (vlib_main_t * vm, } } - unformat_init_unix_file (&sub_input, fd); + unformat_init_clib_file (&sub_input, fd); vlib_cli_input (vm, &sub_input, 0, 0); unformat_free (&sub_input); diff --git a/src/vlib/unix/input.c b/src/vlib/unix/input.c index ecd317913e9..ecf659bc824 100644 --- a/src/vlib/unix/input.c +++ b/src/vlib/unix/input.c @@ -62,7 +62,7 @@ typedef struct static linux_epoll_main_t linux_epoll_main; static void -linux_epoll_file_update (clib_file_t * f, unix_file_update_type_t update_type) +linux_epoll_file_update (clib_file_t * f, clib_file_update_type_t update_type) { clib_file_main_t *fm = &file_main; linux_epoll_main_t *em = &linux_epoll_main; diff --git a/src/vlibapi/api_common.h b/src/vlibapi/api_common.h index 8e6ab0ff094..324b260ecc6 100644 --- a/src/vlibapi/api_common.h +++ b/src/vlibapi/api_common.h @@ -50,14 +50,22 @@ typedef struct vl_api_registration_ u8 *name; /**< Client name */ + /* Zombie apocalypse checking */ + f64 last_heard; + int last_queue_head; + int unanswered_pings; + /** shared memory only: pointer to client input queue */ unix_shared_memory_queue_t *vl_input_queue; + svm_region_t *vlib_rp; + void *shmem_hdr; /* socket server and client */ u32 clib_file_index; /**< Socket only: file index */ i8 *unprocessed_input; /**< Socket only: pending input */ u32 unprocessed_msg_length; /**< Socket only: unprocssed length */ u8 *output_vector; /**< Socket only: output vecto */ + int *additional_fds_to_close; /* socket client only */ u32 server_handle; /**< Socket client only: server handle */ @@ -235,6 +243,7 @@ typedef struct svm_region_t *vlib_rp; /** Vector of all mapped shared-VM segments */ + svm_region_t **vlib_private_rps; svm_region_t **mapped_shmem_regions; /** Binary API shared-memory segment header pointer */ diff --git a/src/vlibapi/api_helper_macros.h b/src/vlibapi/api_helper_macros.h index 052cc6e78b5..fc9374f8a26 100644 --- a/src/vlibapi/api_helper_macros.h +++ b/src/vlibapi/api_helper_macros.h @@ -27,82 +27,83 @@ #define REPLY_MSG_ID_BASE 0 #endif -#define REPLY_MACRO(t) \ -do { \ - unix_shared_memory_queue_t * q; \ - rv = vl_msg_api_pd_handler (mp, rv); \ - q = vl_api_client_index_to_input_queue (mp->client_index); \ - if (!q) \ - return; \ - \ - rmp = vl_msg_api_alloc (sizeof (*rmp)); \ - rmp->_vl_msg_id = htons((t)+(REPLY_MSG_ID_BASE)); \ - rmp->context = mp->context; \ - rmp->retval = ntohl(rv); \ - \ - vl_msg_api_send_shmem (q, (u8 *)&rmp); \ +#define REPLY_MACRO(t) \ +do { \ + vl_api_registration_t *rp; \ + rv = vl_msg_api_pd_handler (mp, rv); \ + rp = vl_api_client_index_to_registration (mp->client_index); \ + if (rp == 0) \ + return; \ + \ + rmp = vl_msg_api_alloc (sizeof (*rmp)); \ + rmp->_vl_msg_id = htons((t)+(REPLY_MSG_ID_BASE)); \ + rmp->context = mp->context; \ + rmp->retval = ntohl(rv); \ + \ + vl_msg_api_send (rp, (u8 *)rmp); \ } while(0); -#define REPLY_MACRO2(t, body) \ -do { \ - unix_shared_memory_queue_t * q; \ - rv = vl_msg_api_pd_handler (mp, rv); \ - q = vl_api_client_index_to_input_queue (mp->client_index); \ - if (!q) \ - return; \ - \ - rmp = vl_msg_api_alloc (sizeof (*rmp)); \ - rmp->_vl_msg_id = htons((t)+(REPLY_MSG_ID_BASE)); \ - rmp->context = mp->context; \ - rmp->retval = ntohl(rv); \ - do {body;} while (0); \ - vl_msg_api_send_shmem (q, (u8 *)&rmp); \ +#define REPLY_MACRO2(t, body) \ +do { \ + vl_api_registration_t *rp; \ + rv = vl_msg_api_pd_handler (mp, rv); \ + rp = vl_api_client_index_to_registration (mp->client_index); \ + if (rp == 0) \ + return; \ + \ + rmp = vl_msg_api_alloc (sizeof (*rmp)); \ + rmp->_vl_msg_id = htons((t)+(REPLY_MSG_ID_BASE)); \ + rmp->context = mp->context; \ + rmp->retval = ntohl(rv); \ + do {body;} while (0); \ + vl_msg_api_send (rp, (u8 *)rmp); \ } while(0); -#define REPLY_MACRO3(t, n, body) \ -do { \ - unix_shared_memory_queue_t * q; \ - rv = vl_msg_api_pd_handler (mp, rv); \ - q = vl_api_client_index_to_input_queue (mp->client_index); \ - if (!q) \ - return; \ - \ - rmp = vl_msg_api_alloc (sizeof (*rmp) + n); \ - rmp->_vl_msg_id = htons((t)+(REPLY_MSG_ID_BASE)); \ - rmp->context = mp->context; \ - rmp->retval = ntohl(rv); \ - do {body;} while (0); \ - vl_msg_api_send_shmem (q, (u8 *)&rmp); \ +#define REPLY_MACRO3(t, n, body) \ +do { \ + vl_api_registration_t *rp; \ + rv = vl_msg_api_pd_handler (mp, rv); \ + rp = vl_api_client_index_to_registration (mp->client_index); \ + if (rp == 0) \ + return; \ + \ + rmp = vl_msg_api_alloc (sizeof (*rmp) + n); \ + rmp->_vl_msg_id = htons((t)+(REPLY_MSG_ID_BASE)); \ + rmp->context = mp->context; \ + rmp->retval = ntohl(rv); \ + do {body;} while (0); \ + vl_msg_api_send (rp, (u8 *)rmp); \ } while(0); -#define REPLY_MACRO4(t, n, body) \ -do { \ - unix_shared_memory_queue_t * q; \ - u8 is_error = 0; \ - rv = vl_msg_api_pd_handler (mp, rv); \ - q = vl_api_client_index_to_input_queue (mp->client_index); \ - if (!q) \ - return; \ - \ - rmp = vl_msg_api_alloc_or_null (sizeof (*rmp) + n); \ - if (!rmp) \ - { \ - /* if there isn't enough memory, try to allocate */ \ - /* some at least for returning an error */ \ - rmp = vl_msg_api_alloc (sizeof (*rmp)); \ - if (!rmp) \ - return; \ - \ - memset (rmp, 0, sizeof (*rmp)); \ - rv = VNET_API_ERROR_TABLE_TOO_BIG; \ - is_error = 1; \ - } \ - rmp->_vl_msg_id = htons((t)+(REPLY_MSG_ID_BASE)); \ - rmp->context = mp->context; \ - rmp->retval = ntohl(rv); \ - if (!is_error) \ - do {body;} while (0); \ - vl_msg_api_send_shmem (q, (u8 *)&rmp); \ +#define REPLY_MACRO4(t, n, body) \ +do { \ + vl_api_registration_t *rp; \ + u8 is_error = 0; \ + rv = vl_msg_api_pd_handler (mp, rv); \ + \ + rp = vl_api_client_index_to_registration (mp->client_index); \ + if (rp == 0) \ + return; \ + \ + rmp = vl_msg_api_alloc_or_null (sizeof (*rmp) + n); \ + if (!rmp) \ + { \ + /* if there isn't enough memory, try to allocate */ \ + /* some at least for returning an error */ \ + rmp = vl_msg_api_alloc (sizeof (*rmp)); \ + if (!rmp) \ + return; \ + \ + memset (rmp, 0, sizeof (*rmp)); \ + rv = VNET_API_ERROR_TABLE_TOO_BIG; \ + is_error = 1; \ + } \ + rmp->_vl_msg_id = htons((t)+(REPLY_MSG_ID_BASE)); \ + rmp->context = mp->context; \ + rmp->retval = ntohl(rv); \ + if (!is_error) \ + do {body;} while (0); \ + vl_msg_api_send (rp, (u8 *)rmp); \ } while(0); /* "trust, but verify" */ diff --git a/src/vlibapi/vat_helper_macros.h b/src/vlibapi/vat_helper_macros.h index 57ad520be1a..5e7f1083947 100644 --- a/src/vlibapi/vat_helper_macros.h +++ b/src/vlibapi/vat_helper_macros.h @@ -22,54 +22,113 @@ /* M: construct, but don't yet send a message */ #define M(T, mp) \ do { \ + socket_client_main_t *scm = &vam->socket_client_main; \ vam->result_ready = 0; \ - mp = vl_msg_api_alloc_as_if_client(sizeof(*mp)); \ + if (scm->socket_enable) \ + { \ + mp = (void *)scm->socket_tx_buffer; \ + scm->socket_tx_nbytes = sizeof (*mp); \ + } \ + else \ + mp = vl_msg_api_alloc_as_if_client(sizeof(*mp)); \ memset (mp, 0, sizeof (*mp)); \ mp->_vl_msg_id = ntohs (VL_API_##T+__plugin_msg_base); \ mp->client_index = vam->my_client_index; \ } while(0); +/* MPING: construct a control-ping message, don't send it yet */ +#define MPING(T, mp) \ +do { \ + socket_client_main_t *scm = &vam->socket_client_main; \ + vam->result_ready = 0; \ + if (scm->socket_enable) \ + { \ + mp = (void *)scm->socket_tx_buffer; \ + scm->socket_tx_nbytes = sizeof (*mp); \ + } \ + else \ + mp = vl_msg_api_alloc_as_if_client(sizeof(*mp)); \ + memset (mp, 0, sizeof (*mp)); \ + mp->_vl_msg_id = ntohs (VL_API_##T+__plugin_msg_base); \ + mp->client_index = vam->my_client_index; \ + scm->control_pings_outstanding++; \ +} while(0); + #define M2(T, mp, n) \ do { \ + socket_client_main_t *scm = &vam->socket_client_main; \ vam->result_ready = 0; \ - mp = vl_msg_api_alloc_as_if_client(sizeof(*mp)+(n)); \ + if (scm->socket_enable) \ + { \ + mp = (void *)scm->socket_tx_buffer; \ + scm->socket_tx_nbytes = sizeof (*mp) + n; \ + } \ + else \ + mp = vl_msg_api_alloc_as_if_client(sizeof(*mp) + n); \ memset (mp, 0, sizeof (*mp)); \ mp->_vl_msg_id = ntohs (VL_API_##T+__plugin_msg_base); \ mp->client_index = vam->my_client_index; \ } while(0); /* S: send a message */ -#define S(mp) (vl_msg_api_send_shmem (vam->vl_input_queue, (u8 *)&mp)) +#define S(mp) \ +do { \ + int n; \ + socket_client_main_t *scm = &vam->socket_client_main; \ + if (scm->socket_enable) \ + { \ + msgbuf_t msgbuf; \ + \ + msgbuf.q = 0; \ + msgbuf.gc_mark_timestamp = 0; \ + msgbuf.data_len = ntohl(scm->socket_tx_nbytes); \ + \ + n = write (scm->socket_fd, &msgbuf, sizeof (msgbuf)); \ + if (n < sizeof (msgbuf)) \ + clib_unix_warning ("socket write (msgbuf)"); \ + \ + n = write (scm->socket_fd, scm->socket_tx_buffer, \ + scm->socket_tx_nbytes); \ + if (n < scm->socket_tx_nbytes) \ + clib_unix_warning ("socket write (msg)"); \ + } \ + else \ + vl_msg_api_send_shmem (vam->vl_input_queue, (u8 *)&mp); \ + } while (0); /* W: wait for results, with timeout */ -#define W(ret) \ -do { \ - f64 timeout = vat_time_now (vam) + 1.0; \ - ret = -99; \ - \ - while (vat_time_now (vam) < timeout) { \ - if (vam->result_ready == 1) { \ - ret = vam->retval; \ - break; \ - } \ - vat_suspend (vam->vlib_main, 1e-5); \ - } \ +#define W(ret) \ +do { \ + f64 timeout = vat_time_now (vam) + 1.0; \ + socket_client_main_t *scm = &vam->socket_client_main; \ + ret = -99; \ + \ + vl_socket_client_read_reply (scm); \ + while (vat_time_now (vam) < timeout) { \ + if (vam->result_ready == 1) { \ + ret = vam->retval; \ + break; \ + } \ + vat_suspend (vam->vlib_main, 1e-5); \ + } \ } while(0); /* W2: wait for results, with timeout */ -#define W2(ret, body) \ -do { \ - f64 timeout = vat_time_now (vam) + 1.0; \ - ret = -99; \ - \ - while (vat_time_now (vam) < timeout) { \ - if (vam->result_ready == 1) { \ - (body); \ - ret = vam->retval; \ - break; \ - } \ - vat_suspend (vam->vlib_main, 1e-5); \ - } \ +#define W2(ret, body) \ +do { \ + f64 timeout = vat_time_now (vam) + 1.0; \ + socket_client_main_t *scm = &vam->socket_client_main; \ + ret = -99; \ + \ + vl_socket_client_read_reply (scm); \ + while (vat_time_now (vam) < timeout) { \ + if (vam->result_ready == 1) { \ + (body); \ + ret = vam->retval; \ + break; \ + } \ + vat_suspend (vam->vlib_main, 1e-5); \ + } \ } while(0); diff --git a/src/vlibmemory/api.h b/src/vlibmemory/api.h index 2a1438fde3c..5c32f5308de 100644 --- a/src/vlibmemory/api.h +++ b/src/vlibmemory/api.h @@ -50,6 +50,7 @@ vl_msg_api_handle_from_index_and_epoch (u32 index, u32 epoch) } void vl_enable_disable_memory_api (vlib_main_t * vm, int yesno); + #endif /* included_vlibmemory_api_h */ /* diff --git a/src/vlibmemory/api_common.h b/src/vlibmemory/api_common.h index 19daecdfb6a..63a7e5e4188 100644 --- a/src/vlibmemory/api_common.h +++ b/src/vlibmemory/api_common.h @@ -19,6 +19,7 @@ #define included_vlibmemory_api_common_h #include +#include #include #include @@ -44,18 +45,17 @@ typedef struct ring_alloc_ /* * Initializers for the (shared-memory) rings - * _(size, n). Note: each msg has an 8 byte header. - * Might want to change that to an index sometime. + * _(size, n). Note: each msg has space for a header. */ #define foreach_vl_aring_size \ -_(64+8, 1024) \ -_(256+8, 128) \ -_(1024+8, 64) +_(64+sizeof(ring_alloc_t), 1024) \ +_(256+sizeof(ring_alloc_t), 128) \ +_(1024+sizeof(ring_alloc_t), 64) #define foreach_clnt_aring_size \ -_(1024+8, 1024) \ -_(2048+8, 128) \ -_(4096+8, 8) + _(1024+sizeof(ring_alloc_t), 1024) \ + _(2048+sizeof(ring_alloc_t), 128) \ + _(4096+sizeof(ring_alloc_t), 8) typedef struct vl_shmem_hdr_ { @@ -83,7 +83,6 @@ typedef struct vl_shmem_hdr_ /* Number of garbage-collected messages */ u32 garbage_collects; - } vl_shmem_hdr_t; #define VL_SHM_VERSION 2 @@ -123,10 +122,114 @@ int vl_client_connect_to_vlib (const char *svm_name, const char *client_name, int vl_client_connect_to_vlib_no_rx_pthread (const char *svm_name, const char *client_name, int rx_queue_size); +int vl_client_connect_to_vlib_no_map (const char *svm_name, + const char *client_name, + int rx_queue_size); u16 vl_client_get_first_plugin_msg_id (const char *plugin_name); void vl_api_rpc_call_main_thread (void *fp, u8 * data, u32 data_length); u32 vl_api_memclnt_create_internal (char *, unix_shared_memory_queue_t *); +void vl_init_shmem (svm_region_t * vlib_rp, int is_vlib, + int is_private_region); +void vl_client_install_client_message_handlers (void); + +/* API messages over sockets */ + +extern vlib_node_registration_t memclnt_node; +extern volatile int **vl_api_queue_cursizes; + +/* Events sent to the memclnt process */ +#define QUEUE_SIGNAL_EVENT 1 +#define SOCKET_READ_EVENT 2 + +#define API_SOCKET_FILE "/run/vpp-api.sock" + +typedef struct +{ + clib_file_t *clib_file; + vl_api_registration_t *regp; + u8 *data; +} vl_socket_args_for_process_t; + +typedef struct +{ + /* Server port number */ + u8 *socket_name; + + /* By default, localhost... */ + u32 bind_address; + + /* + * (listen, server, client) registrations. Shared memory + * registrations are in shared memory + */ + vl_api_registration_t *registration_pool; + /* + * Chain-drag variables, so message API handlers + * (generally) don't know whether they're talking to a socket + * or to a shared-memory connection. + */ + vl_api_registration_t *current_rp; + clib_file_t *current_uf; + /* One input buffer, shared across all sockets */ + i8 *input_buffer; + + /* pool of process args for socket clients */ + vl_socket_args_for_process_t *process_args; + + /* Listen for API connections here */ + clib_socket_t socksvr_listen_socket; +} socket_main_t; + +extern socket_main_t socket_main; + +typedef struct +{ + int socket_fd; + /* Temporarily disable the connection, so we can keep it around... */ + int socket_enable; + + clib_socket_t client_socket; + + u32 socket_buffer_size; + u8 *socket_tx_buffer; + u8 *socket_rx_buffer; + u32 socket_tx_nbytes; + int control_pings_outstanding; +} socket_client_main_t; + +extern socket_client_main_t socket_client_main; + +#define SOCKET_CLIENT_DEFAULT_BUFFER_SIZE 4096 + +void socksvr_add_pending_output (struct clib_file *uf, + struct vl_api_registration_ *cf, + u8 * buffer, uword buffer_bytes); + +void vl_free_socket_registration_index (u32 pool_index); +void vl_socket_process_msg (struct clib_file *uf, + struct vl_api_registration_ *rp, i8 * input_v); +clib_error_t *vl_socket_read_ready (struct clib_file *uf); +void vl_socket_add_pending_output (struct clib_file *uf, + struct vl_api_registration_ *rp, + u8 * buffer, uword buffer_bytes); +void vl_socket_add_pending_output_no_flush (struct clib_file *uf, + struct vl_api_registration_ *rp, + u8 * buffer, uword buffer_bytes); +clib_error_t *vl_socket_write_ready (struct clib_file *uf); +void vl_socket_api_send (vl_api_registration_t * rp, u8 * elem); +u32 sockclnt_open_index (char *client_name, char *hostname, int port); +void sockclnt_close_index (u32 index); +void vl_client_msg_api_send (vl_api_registration_t * cm, u8 * elem); +vl_api_registration_t *sockclnt_get_registration (u32 index); +void vl_api_socket_process_msg (clib_file_t * uf, vl_api_registration_t * rp, + i8 * input_v); + +int +vl_socket_client_connect (socket_client_main_t * scm, char *socket_path, + char *client_name, u32 socket_buffer_size); +void vl_socket_client_read_reply (socket_client_main_t * scm); +void vl_socket_client_enable_disable (socket_client_main_t * scm, int enable); #endif /* included_vlibmemory_api_common_h */ diff --git a/src/vlibmemory/memclnt.api b/src/vlibmemory/memclnt.api index 32e51407949..94c99ad5d45 100644 --- a/src/vlibmemory/memclnt.api +++ b/src/vlibmemory/memclnt.api @@ -112,3 +112,60 @@ manual_print define trace_plugin_msg_ids u16 first_msg_id; u16 last_msg_id; }; + +/* + * Create a socket client registration. + */ +define sockclnt_create { + u8 name[64]; /* for show, find by name, whatever */ + u32 context; /* opaque value to be returned in the reply */ +}; + +define sockclnt_create_reply { + i32 response; /* Non-negative = success */ + u64 handle; /* handle by which vlib knows this client */ + u32 index; /* index, used e.g. by API trace replay */ + u32 context; /* opaque value from the create request */ +}; + +/* + * Delete a client registration + */ +define sockclnt_delete { + u32 index; /* index, used e.g. by API trace replay */ + u64 handle; /* handle by which vlib knows this client */ +}; + +define sockclnt_delete_reply { + i32 response; /* Non-negative = success */ + u64 handle; /* in case the client wonders */ +}; + +/* + * Ask vpp for a memfd shared segment + */ +define memfd_segment_create { + u32 client_index; + u32 context; + u64 requested_size; +}; + +/* + * Reply + */ +define memfd_segment_create_reply +{ + u32 context; + i32 retval; + u32 master_fd; +}; + +/* + * Memory client ping / response + * Only sent on inactive connections + */ +autoreply define memclnt_keepalive +{ + u32 client_index; + u32 context; +}; diff --git a/src/vlibmemory/memory_client.c b/src/vlibmemory/memory_client.c index a162d6bb27c..3f8b799f41f 100644 --- a/src/vlibmemory/memory_client.c +++ b/src/vlibmemory/memory_client.c @@ -319,21 +319,37 @@ vl_client_disconnect (void) } } +/** + * Stave off the binary API dead client reaper + * Only sent to inactive clients + */ +static void +vl_api_memclnt_keepalive_t_handler (vl_api_memclnt_keepalive_t * mp) +{ + vl_api_memclnt_keepalive_reply_t *rmp; + api_main_t *am; + vl_shmem_hdr_t *shmem_hdr; + + am = &api_main; + shmem_hdr = am->shmem_hdr; + + rmp = vl_msg_api_alloc_as_if_client (sizeof (*rmp)); + memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_MEMCLNT_KEEPALIVE_REPLY); + rmp->context = mp->context; + vl_msg_api_send_shmem (shmem_hdr->vl_input_queue, (u8 *) & rmp); +} + #define foreach_api_msg \ _(RX_THREAD_EXIT, rx_thread_exit) \ _(MEMCLNT_CREATE_REPLY, memclnt_create_reply) \ -_(MEMCLNT_DELETE_REPLY, memclnt_delete_reply) +_(MEMCLNT_DELETE_REPLY, memclnt_delete_reply) \ +_(MEMCLNT_KEEPALIVE, memclnt_keepalive) -int -vl_client_api_map (const char *region_name) +void +vl_client_install_client_message_handlers (void) { - int rv; - - if ((rv = vl_map_shmem (region_name, 0 /* is_vlib */ )) < 0) - { - return rv; - } #define _(N,n) \ vl_msg_api_set_handlers(VL_API_##N, #n, \ @@ -344,6 +360,18 @@ vl_client_api_map (const char *region_name) sizeof(vl_api_##n##_t), 1); foreach_api_msg; #undef _ +} + + +int +vl_client_api_map (const char *region_name) +{ + int rv; + + if ((rv = vl_map_shmem (region_name, 0 /* is_vlib */ )) < 0) + return rv; + + vl_client_install_client_message_handlers (); return 0; } @@ -356,12 +384,12 @@ vl_client_api_unmap (void) static int connect_to_vlib_internal (const char *svm_name, const char *client_name, - int rx_queue_size, int want_pthread) + int rx_queue_size, int want_pthread, int do_map) { int rv = 0; memory_client_main_t *mm = &memory_client_main; - if ((rv = vl_client_api_map (svm_name))) + if (do_map && (rv = vl_client_api_map (svm_name))) { clib_warning ("vl_client_api map rv %d", rv); return rv; @@ -393,7 +421,8 @@ vl_client_connect_to_vlib (const char *svm_name, const char *client_name, int rx_queue_size) { return connect_to_vlib_internal (svm_name, client_name, rx_queue_size, - 1 /* want pthread */ ); + 1 /* want pthread */ , + 1 /* do map */ ); } int @@ -402,7 +431,17 @@ vl_client_connect_to_vlib_no_rx_pthread (const char *svm_name, int rx_queue_size) { return connect_to_vlib_internal (svm_name, client_name, rx_queue_size, - 0 /* want pthread */ ); + 0 /* want pthread */ , + 1 /* do map */ ); +} + +int +vl_client_connect_to_vlib_no_map (const char *svm_name, + const char *client_name, int rx_queue_size) +{ + return connect_to_vlib_internal (svm_name, client_name, rx_queue_size, + 1 /* want pthread */ , + 0 /* dont map */ ); } void diff --git a/src/vlibmemory/memory_shared.c b/src/vlibmemory/memory_shared.c index 8c6469080d7..021c54ef953 100644 --- a/src/vlibmemory/memory_shared.c +++ b/src/vlibmemory/memory_shared.c @@ -39,6 +39,10 @@ #include #undef vl_typedefs +socket_main_t socket_main; + +#define DEBUG_MESSAGE_BUFFER_OVERRUN 0 + static inline void * vl_msg_api_alloc_internal (int nbytes, int pool, int may_return_null) { @@ -52,6 +56,10 @@ vl_msg_api_alloc_internal (int nbytes, int pool, int may_return_null) shmem_hdr = am->shmem_hdr; +#if DEBUG_MESSAGE_BUFFER_OVERRUN > 0 + nbytes += 4; +#endif + if (shmem_hdr == 0) { clib_warning ("shared memory header NULL"); @@ -172,7 +180,16 @@ vl_msg_api_alloc_internal (int nbytes, int pool, int may_return_null) pthread_mutex_unlock (&am->vlib_rp->mutex); out: +#if DEBUG_MESSAGE_BUFFER_OVERRUN > 0 + { + nbytes -= 4; + u32 *overrun; + overrun = (u32 *) (rv->data + nbytes - sizeof (msgbuf_t)); + *overrun = 0x1badbabe; + } +#endif rv->data_len = htonl (nbytes - sizeof (msgbuf_t)); + return (rv->data); } @@ -231,11 +248,27 @@ vl_msg_api_free (void *a) { rv->q = 0; rv->gc_mark_timestamp = 0; +#if DEBUG_MESSAGE_BUFFER_OVERRUN > 0 + { + u32 *overrun; + overrun = (u32 *) (rv->data + ntohl (rv->data_len)); + ASSERT (*overrun == 0x1badbabe); + } +#endif return; } pthread_mutex_lock (&am->vlib_rp->mutex); oldheap = svm_push_data_heap (am->vlib_rp); + +#if DEBUG_MESSAGE_BUFFER_OVERRUN > 0 + { + u32 *overrun; + overrun = (u32 *) (rv->data + ntohl (rv->data_len)); + ASSERT (*overrun == 0x1badbabe); + } +#endif + clib_mem_free (rv); svm_pop_heap (oldheap); pthread_mutex_unlock (&am->vlib_rp->mutex); @@ -329,17 +362,91 @@ vl_set_api_pvt_heap_size (u64 size) am->api_pvt_heap_size = size; } +void +vl_init_shmem (svm_region_t * vlib_rp, int is_vlib, int is_private_region) +{ + api_main_t *am = &api_main; + vl_shmem_hdr_t *shmem_hdr = 0; + u32 vlib_input_queue_length; + void *oldheap; + ASSERT (vlib_rp); + + /* $$$$ need private region config parameters */ + + oldheap = svm_push_data_heap (vlib_rp); + + vec_validate (shmem_hdr, 0); + shmem_hdr->version = VL_SHM_VERSION; + + /* vlib main input queue */ + vlib_input_queue_length = 1024; + if (am->vlib_input_queue_length) + vlib_input_queue_length = am->vlib_input_queue_length; + + shmem_hdr->vl_input_queue = + unix_shared_memory_queue_init (vlib_input_queue_length, sizeof (uword), + getpid (), am->vlib_signal); + + /* Set up the msg ring allocator */ +#define _(sz,n) \ + do { \ + ring_alloc_t _rp; \ + _rp.rp = unix_shared_memory_queue_init ((n), (sz), 0, 0); \ + _rp.size = (sz); \ + _rp.nitems = n; \ + _rp.hits = 0; \ + _rp.misses = 0; \ + vec_add1(shmem_hdr->vl_rings, _rp); \ + } while (0); + + foreach_vl_aring_size; +#undef _ + +#define _(sz,n) \ + do { \ + ring_alloc_t _rp; \ + _rp.rp = unix_shared_memory_queue_init ((n), (sz), 0, 0); \ + _rp.size = (sz); \ + _rp.nitems = n; \ + _rp.hits = 0; \ + _rp.misses = 0; \ + vec_add1(shmem_hdr->client_rings, _rp); \ + } while (0); + + foreach_clnt_aring_size; +#undef _ + + if (is_private_region == 0) + { + am->shmem_hdr = shmem_hdr; + am->vlib_rp = vlib_rp; + am->our_pid = getpid (); + if (is_vlib) + am->shmem_hdr->vl_pid = am->our_pid; + } + else + shmem_hdr->vl_pid = am->our_pid; + + svm_pop_heap (oldheap); + + /* + * After absolutely everything that a client might see is set up, + * declare the shmem region valid + */ + vlib_rp->user_ctx = shmem_hdr; + + pthread_mutex_unlock (&vlib_rp->mutex); +} + + int vl_map_shmem (const char *region_name, int is_vlib) { svm_map_region_args_t _a, *a = &_a; svm_region_t *vlib_rp, *root_rp; - void *oldheap; - vl_shmem_hdr_t *shmem_hdr = 0; api_main_t *am = &api_main; int i, rv; struct timespec ts, tsrem; - u32 vlib_input_queue_length; char *vpe_api_region_suffix = "-vpe-api"; memset (a, 0, sizeof (*a)); @@ -472,65 +579,8 @@ vl_map_shmem (const char *region_name, int is_vlib) } /* Nope, it's our problem... */ + vl_init_shmem (vlib_rp, 1 /* is vlib */ , 0 /* is_private_region */ ); - oldheap = svm_push_data_heap (vlib_rp); - - vec_validate (shmem_hdr, 0); - shmem_hdr->version = VL_SHM_VERSION; - - /* vlib main input queue */ - vlib_input_queue_length = 1024; - if (am->vlib_input_queue_length) - vlib_input_queue_length = am->vlib_input_queue_length; - - shmem_hdr->vl_input_queue = - unix_shared_memory_queue_init (vlib_input_queue_length, sizeof (uword), - getpid (), am->vlib_signal); - - /* Set up the msg ring allocator */ -#define _(sz,n) \ - do { \ - ring_alloc_t _rp; \ - _rp.rp = unix_shared_memory_queue_init ((n), (sz), 0, 0); \ - _rp.size = (sz); \ - _rp.nitems = n; \ - _rp.hits = 0; \ - _rp.misses = 0; \ - vec_add1(shmem_hdr->vl_rings, _rp); \ - } while (0); - - foreach_vl_aring_size; -#undef _ - -#define _(sz,n) \ - do { \ - ring_alloc_t _rp; \ - _rp.rp = unix_shared_memory_queue_init ((n), (sz), 0, 0); \ - _rp.size = (sz); \ - _rp.nitems = n; \ - _rp.hits = 0; \ - _rp.misses = 0; \ - vec_add1(shmem_hdr->client_rings, _rp); \ - } while (0); - - foreach_clnt_aring_size; -#undef _ - - am->shmem_hdr = shmem_hdr; - am->vlib_rp = vlib_rp; - am->our_pid = getpid (); - if (is_vlib) - am->shmem_hdr->vl_pid = am->our_pid; - - svm_pop_heap (oldheap); - - /* - * After absolutely everything that a client might see is set up, - * declare the shmem region valid - */ - vlib_rp->user_ctx = shmem_hdr; - - pthread_mutex_unlock (&vlib_rp->mutex); vec_add1 (am->mapped_shmem_regions, vlib_rp); return 0; } @@ -638,6 +688,9 @@ vl_api_client_index_to_registration_internal (u32 handle) vl_api_registration_t * vl_api_client_index_to_registration (u32 index) { + if (PREDICT_FALSE (socket_main.current_rp != 0)) + return socket_main.current_rp; + return (vl_api_client_index_to_registration_internal (index)); } diff --git a/src/vlibmemory/memory_vlib.c b/src/vlibmemory/memory_vlib.c index d305ea619aa..c9b3183f592 100644 --- a/src/vlibmemory/memory_vlib.c +++ b/src/vlibmemory/memory_vlib.c @@ -96,17 +96,7 @@ vl_api_trace_plugin_msg_ids_t_print (vl_api_trace_plugin_msg_ids_t * a, #include #undef vl_endianfun -void vl_socket_api_send (vl_api_registration_t * rp, u8 * elem) - __attribute__ ((weak)); - -void -vl_socket_api_send (vl_api_registration_t * rp, u8 * elem) -{ - static int count; - - if (count++ < 5) - clib_warning ("need to link against -lvlibsocket, msg not sent!"); -} +extern void vl_socket_api_send (vl_api_registration_t * rp, u8 * elem); void vl_msg_api_send (vl_api_registration_t * rp, u8 * elem) @@ -117,7 +107,7 @@ vl_msg_api_send (vl_api_registration_t * rp, u8 * elem) } else { - vl_msg_api_send_shmem (rp->vl_input_queue, elem); + vl_msg_api_send_shmem (rp->vl_input_queue, (u8 *) & elem); } } @@ -196,6 +186,7 @@ vl_api_memclnt_create_t_handler (vl_api_memclnt_create_t * mp) int rv = 0; void *oldheap; api_main_t *am = &api_main; + u8 *serialized_message_table_in_shmem; /* * This is tortured. Maintain a vlib-address-space private @@ -235,6 +226,8 @@ vl_api_memclnt_create_t_handler (vl_api_memclnt_create_t * mp) memset (regp, 0, sizeof (*regp)); regp->registration_type = REGISTRATION_TYPE_SHMEM; regp->vl_api_registration_pool_index = regpp - am->vl_clients; + regp->vlib_rp = svm; + regp->shmem_hdr = am->shmem_hdr; q = regp->vl_input_queue = (unix_shared_memory_queue_t *) (uword) mp->input_queue; @@ -242,11 +235,11 @@ vl_api_memclnt_create_t_handler (vl_api_memclnt_create_t * mp) regp->name = format (0, "%s", mp->name); vec_add1 (regp->name, 0); + serialized_message_table_in_shmem = vl_api_serialize_message_table (am, 0); + pthread_mutex_unlock (&svm->mutex); svm_pop_heap (oldheap); - ASSERT (am->serialized_message_table_in_shmem); - rp = vl_msg_api_alloc (sizeof (*rp)); rp->_vl_msg_id = ntohs (VL_API_MEMCLNT_CREATE_REPLY); rp->handle = (uword) regp; @@ -255,8 +248,7 @@ vl_api_memclnt_create_t_handler (vl_api_memclnt_create_t * mp) am->shmem_hdr->application_restarts); rp->context = mp->context; rp->response = ntohl (rv); - rp->message_table = - pointer_to_uword (am->serialized_message_table_in_shmem); + rp->message_table = pointer_to_uword (serialized_message_table_in_shmem); vl_msg_api_send_shmem (q, (u8 *) & rp); } @@ -313,11 +305,15 @@ vl_api_memclnt_delete_t_handler (vl_api_memclnt_delete_t * mp) if (!pool_is_free (am->vl_clients, regpp)) { + int i; regp = *regpp; svm = am->vlib_rp; + int private_registration = 0; - /* $$$ check the input queue for e.g. punted sf's */ - + /* + * Note: the API message handling path will set am->vlib_rp + * as appropriate for pairwise / private memory segments + */ rp = vl_msg_api_alloc (sizeof (*rp)); rp->_vl_msg_id = ntohs (VL_API_MEMCLNT_DELETE_REPLY); rp->handle = mp->handle; @@ -333,18 +329,56 @@ vl_api_memclnt_delete_t_handler (vl_api_memclnt_delete_t * mp) return; } + /* For horizontal scaling, add a hash table... */ + for (i = 0; i < vec_len (am->vlib_private_rps); i++) + { + /* Is this a pairwise / private API segment? */ + if (am->vlib_private_rps[i] == svm) + { + /* Note: account for the memfd header page */ + u64 virtual_base = svm->virtual_base - MMAP_PAGESIZE; + u64 virtual_size = svm->virtual_size + MMAP_PAGESIZE; + + /* + * Kill the registration pool element before we make + * the index vanish forever + */ + pool_put_index (am->vl_clients, + regp->vl_api_registration_pool_index); + + vec_delete (am->vlib_private_rps, 1, i); + /* Kill it, accounting for the memfd header page */ + if (munmap ((void *) virtual_base, virtual_size) < 0) + clib_unix_warning ("munmap"); + /* Reset the queue-length-address cache */ + vec_reset_length (vl_api_queue_cursizes); + private_registration = 1; + break; + } + } + /* No dangling references, please */ *regpp = 0; - pool_put_index (am->vl_clients, regp->vl_api_registration_pool_index); - - pthread_mutex_lock (&svm->mutex); - oldheap = svm_push_data_heap (svm); - /* Poison the old registration */ - memset (regp, 0xF1, sizeof (*regp)); - clib_mem_free (regp); - pthread_mutex_unlock (&svm->mutex); - svm_pop_heap (oldheap); + if (private_registration == 0) + { + pool_put_index (am->vl_clients, + regp->vl_api_registration_pool_index); + pthread_mutex_lock (&svm->mutex); + oldheap = svm_push_data_heap (svm); + /* Poison the old registration */ + memset (regp, 0xF1, sizeof (*regp)); + clib_mem_free (regp); + pthread_mutex_unlock (&svm->mutex); + svm_pop_heap (oldheap); + /* + * These messages must be freed manually, since they're set up + * as "bounce" messages. In the private_registration == 1 case, + * we kill the shared-memory segment which contains the message + * with munmap. + */ + vl_msg_api_free (mp); + } } else { @@ -392,10 +426,54 @@ out: vl_msg_api_send_shmem (q, (u8 *) & rmp); } -#define foreach_vlib_api_msg \ -_(MEMCLNT_CREATE, memclnt_create) \ -_(MEMCLNT_DELETE, memclnt_delete) \ -_(GET_FIRST_MSG_ID, get_first_msg_id) +/** + * client answered a ping, stave off the grim reaper... + */ + +void + vl_api_memclnt_keepalive_reply_t_handler + (vl_api_memclnt_keepalive_reply_t * mp) +{ + vl_api_registration_t *regp; + vlib_main_t *vm = vlib_get_main (); + + regp = vl_api_client_index_to_registration (mp->context); + if (regp) + { + regp->last_heard = vlib_time_now (vm); + regp->unanswered_pings = 0; + } + else + clib_warning ("BUG: anonymous memclnt_keepalive_reply"); +} + +/** + * We can send ourselves these messages if someone uses the + * builtin binary api test tool... + */ +static void +vl_api_memclnt_keepalive_t_handler (vl_api_memclnt_keepalive_t * mp) +{ + vl_api_memclnt_keepalive_reply_t *rmp; + api_main_t *am; + vl_shmem_hdr_t *shmem_hdr; + + am = &api_main; + shmem_hdr = am->shmem_hdr; + + rmp = vl_msg_api_alloc_as_if_client (sizeof (*rmp)); + memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_MEMCLNT_KEEPALIVE_REPLY); + rmp->context = mp->context; + vl_msg_api_send_shmem (shmem_hdr->vl_input_queue, (u8 *) & rmp); +} + +#define foreach_vlib_api_msg \ +_(MEMCLNT_CREATE, memclnt_create) \ +_(MEMCLNT_DELETE, memclnt_delete) \ +_(GET_FIRST_MSG_ID, get_first_msg_id) \ +_(MEMCLNT_KEEPALIVE, memclnt_keepalive) \ +_(MEMCLNT_KEEPALIVE_REPLY, memclnt_keepalive_reply) /* * vl_api_init @@ -404,6 +482,7 @@ static int memory_api_init (const char *region_name) { int rv; + api_main_t *am = &api_main; vl_msg_api_msg_config_t cfg; vl_msg_api_msg_config_t *c = &cfg; @@ -428,6 +507,13 @@ memory_api_init (const char *region_name) foreach_vlib_api_msg; #undef _ + /* + * special-case freeing of memclnt_delete messages, so we can + * simply munmap pairwise / private API segments... + */ + am->message_bounce[VL_API_MEMCLNT_DELETE] = 1; + am->is_mp_safe[VL_API_MEMCLNT_KEEPALIVE_REPLY] = 1; + return 0; } @@ -474,6 +560,203 @@ send_one_plugin_msg_ids_msg (u8 * name, u16 first_msg_id, u16 last_msg_id) vl_msg_api_send_shmem (q, (u8 *) & mp); } +static void +send_memclnt_keepalive (vl_api_registration_t * regp, f64 now) +{ + vl_api_memclnt_keepalive_t *mp; + unix_shared_memory_queue_t *q; + api_main_t *am = &api_main; + svm_region_t *save_vlib_rp = am->vlib_rp; + vl_shmem_hdr_t *save_shmem_hdr = am->shmem_hdr; + + q = regp->vl_input_queue; + + /* + * If the queue head is moving, assume that the client is processing + * messages and skip the ping. This heuristic may fail if the queue + * is in the same position as last time, net of wrapping; in which + * case, the client will receive a keepalive. + */ + if (regp->last_queue_head != q->head) + { + regp->last_heard = now; + regp->unanswered_pings = 0; + regp->last_queue_head = q->head; + return; + } + + /* + * push/pop shared memory segment, so this routine + * will work with "normal" as well as "private segment" + * memory clients.. + */ + + am->vlib_rp = regp->vlib_rp; + am->shmem_hdr = regp->shmem_hdr; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_MEMCLNT_KEEPALIVE); + mp->context = mp->client_index = + vl_msg_api_handle_from_index_and_epoch + (regp->vl_api_registration_pool_index, + am->shmem_hdr->application_restarts); + + regp->unanswered_pings++; + + /* Failure-to-send due to a stuffed queue is absolutely expected */ + if (unix_shared_memory_queue_add (q, (u8 *) & mp, 1 /* nowait */ )) + vl_msg_api_free (mp); + + am->vlib_rp = save_vlib_rp; + am->shmem_hdr = save_shmem_hdr; +} + +static void +dead_client_scan (api_main_t * am, vl_shmem_hdr_t * shm, f64 now) +{ + + vl_api_registration_t **regpp; + vl_api_registration_t *regp; + static u32 *dead_indices; + static u32 *confused_indices; + + vec_reset_length (dead_indices); + vec_reset_length (confused_indices); + + /* *INDENT-OFF* */ + pool_foreach (regpp, am->vl_clients, + ({ + regp = *regpp; + if (regp) + { + /* If we haven't heard from this client recently... */ + if (regp->last_heard < (now - 10.0)) + { + if (regp->unanswered_pings == 2) + { + unix_shared_memory_queue_t *q; + q = regp->vl_input_queue; + if (kill (q->consumer_pid, 0) >=0) + { + clib_warning ("REAPER: lazy binary API client '%s'", + regp->name); + regp->unanswered_pings = 0; + regp->last_heard = now; + } + else + { + clib_warning ("REAPER: binary API client '%s' died", + regp->name); + vec_add1(dead_indices, regpp - am->vl_clients); + } + } + else + send_memclnt_keepalive (regp, now); + } + else + regp->unanswered_pings = 0; + } + else + { + clib_warning ("NULL client registration index %d", + regpp - am->vl_clients); + vec_add1 (confused_indices, regpp - am->vl_clients); + } + })); + /* *INDENT-ON* */ + /* This should "never happen," but if it does, fix it... */ + if (PREDICT_FALSE (vec_len (confused_indices) > 0)) + { + int i; + for (i = 0; i < vec_len (confused_indices); i++) + { + pool_put_index (am->vl_clients, confused_indices[i]); + } + } + + if (PREDICT_FALSE (vec_len (dead_indices) > 0)) + { + int i; + svm_region_t *svm; + void *oldheap; + + /* Allow the application to clean up its registrations */ + for (i = 0; i < vec_len (dead_indices); i++) + { + regpp = pool_elt_at_index (am->vl_clients, dead_indices[i]); + if (regpp) + { + u32 handle; + + handle = vl_msg_api_handle_from_index_and_epoch + (dead_indices[i], shm->application_restarts); + (void) call_reaper_functions (handle); + } + } + + svm = am->vlib_rp; + pthread_mutex_lock (&svm->mutex); + oldheap = svm_push_data_heap (svm); + + for (i = 0; i < vec_len (dead_indices); i++) + { + regpp = pool_elt_at_index (am->vl_clients, dead_indices[i]); + if (regpp) + { + /* Is this a pairwise SVM segment? */ + if ((*regpp)->vlib_rp != svm) + { + int i; + svm_region_t *dead_rp = (*regpp)->vlib_rp; + /* Note: account for the memfd header page */ + u64 virtual_base = dead_rp->virtual_base - MMAP_PAGESIZE; + u64 virtual_size = dead_rp->virtual_size + MMAP_PAGESIZE; + + /* For horizontal scaling, add a hash table... */ + for (i = 0; i < vec_len (am->vlib_private_rps); i++) + if (am->vlib_private_rps[i] == dead_rp) + { + vec_delete (am->vlib_private_rps, 1, i); + goto found; + } + clib_warning ("private rp %llx AWOL", dead_rp); + + found: + /* Kill it, accounting for the memfd header page */ + if (munmap ((void *) virtual_base, virtual_size) < 0) + clib_unix_warning ("munmap"); + /* Reset the queue-length-address cache */ + vec_reset_length (vl_api_queue_cursizes); + } + else + { + /* Poison the old registration */ + memset (*regpp, 0xF3, sizeof (**regpp)); + clib_mem_free (*regpp); + } + /* no dangling references, please */ + *regpp = 0; + } + else + { + svm_pop_heap (oldheap); + clib_warning ("Duplicate free, client index %d", + regpp - am->vl_clients); + oldheap = svm_push_data_heap (svm); + } + } + + svm_client_scan_this_region_nolock (am->vlib_rp); + + pthread_mutex_unlock (&svm->mutex); + svm_pop_heap (oldheap); + for (i = 0; i < vec_len (dead_indices); i++) + pool_put_index (am->vl_clients, dead_indices[i]); + } +} + + static uword memclnt_process (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f) @@ -487,17 +770,29 @@ memclnt_process (vlib_main_t * vm, f64 dead_client_scan_time; f64 sleep_time, start_time; f64 vector_rate; + clib_error_t *socksvr_api_init (vlib_main_t * vm); + clib_error_t *error; int i; - u8 *serialized_message_table = 0; - svm_region_t *svm; - void *oldheap; + vl_socket_args_for_process_t *a; + uword event_type; + uword *event_data = 0; + int private_segment_rotor = 0; + svm_region_t *vlib_rp; + f64 now; vlib_set_queue_signal_callback (vm, memclnt_queue_callback); if ((rv = memory_api_init (am->region_name)) < 0) { - clib_warning ("memory_api_init returned %d, wait for godot...", rv); - vlib_process_suspend (vm, 1e70); + clib_warning ("memory_api_init returned %d, quitting...", rv); + return 0; + } + + if ((error = socksvr_api_init (vm))) + { + clib_error_report (error); + clib_warning ("socksvr_api_init failed, quitting..."); + return 0; } shm = am->shmem_hdr; @@ -510,8 +805,8 @@ memclnt_process (vlib_main_t * vm, if (e) clib_error_report (e); - sleep_time = 20.0; - dead_client_scan_time = vlib_time_now (vm) + 20.0; + sleep_time = 10.0; + dead_client_scan_time = vlib_time_now (vm) + 10.0; /* * Send plugin message range messages for each plugin we loaded @@ -523,20 +818,6 @@ memclnt_process (vlib_main_t * vm, rp->last_msg_id); } - /* - * Snapshoot the api message table. - */ - serialized_message_table = vl_api_serialize_message_table (am, 0); - - svm = am->vlib_rp; - pthread_mutex_lock (&svm->mutex); - oldheap = svm_push_data_heap (svm); - - am->serialized_message_table_in_shmem = vec_dup (serialized_message_table); - - pthread_mutex_unlock (&svm->mutex); - svm_pop_heap (oldheap); - /* * Save the api message table snapshot, if configured */ @@ -544,6 +825,11 @@ memclnt_process (vlib_main_t * vm, { int fd, rv; u8 *chroot_file; + u8 *serialized_message_table; + + /* + * Snapshoot the api message table. + */ if (strstr ((char *) am->save_msg_table_filename, "..") || index ((char *) am->save_msg_table_filename, '/')) { @@ -561,6 +847,9 @@ memclnt_process (vlib_main_t * vm, clib_unix_warning ("creat"); goto skip_save; } + + serialized_message_table = vl_api_serialize_message_table (am, 0); + rv = write (fd, serialized_message_table, vec_len (serialized_message_table)); @@ -572,15 +861,14 @@ memclnt_process (vlib_main_t * vm, clib_unix_warning ("close"); vec_free (chroot_file); + vec_free (serialized_message_table); } skip_save: - vec_free (serialized_message_table); /* $$$ pay attention to frame size, control CPU usage */ while (1) { - uword event_type __attribute__ ((unused)); i8 *headp; int need_broadcast; @@ -665,104 +953,89 @@ skip_save: } } - event_type = vlib_process_wait_for_event_or_clock (vm, sleep_time); - vm->queue_signal_pending = 0; - vlib_process_get_events (vm, 0 /* event_data */ ); - - if (vlib_time_now (vm) > dead_client_scan_time) + /* + * see if we have any private api shared-memory segments + * If so, push required context variables, and process + * a message. + */ + if (PREDICT_FALSE (vec_len (am->vlib_private_rps))) { - vl_api_registration_t **regpp; - vl_api_registration_t *regp; - unix_shared_memory_queue_t *q; - static u32 *dead_indices; - static u32 *confused_indices; + unix_shared_memory_queue_t *save_vlib_input_queue = q; + vl_shmem_hdr_t *save_shmem_hdr = am->shmem_hdr; + svm_region_t *save_vlib_rp = am->vlib_rp; - vec_reset_length (dead_indices); - vec_reset_length (confused_indices); + vlib_rp = am->vlib_rp = am->vlib_private_rps[private_segment_rotor]; - /* *INDENT-OFF* */ - pool_foreach (regpp, am->vl_clients, - ({ - regp = *regpp; - if (regp) - { - q = regp->vl_input_queue; - if (kill (q->consumer_pid, 0) < 0) - { - vec_add1(dead_indices, regpp - am->vl_clients); - } - } - else - { - clib_warning ("NULL client registration index %d", - regpp - am->vl_clients); - vec_add1 (confused_indices, regpp - am->vl_clients); - } - })); - /* *INDENT-ON* */ - /* This should "never happen," but if it does, fix it... */ - if (PREDICT_FALSE (vec_len (confused_indices) > 0)) - { - int i; - for (i = 0; i < vec_len (confused_indices); i++) - { - pool_put_index (am->vl_clients, confused_indices[i]); - } - } + am->shmem_hdr = (void *) vlib_rp->user_ctx; + q = am->shmem_hdr->vl_input_queue; - if (PREDICT_FALSE (vec_len (dead_indices) > 0)) + pthread_mutex_lock (&q->mutex); + if (q->cursize > 0) { - int i; - svm_region_t *svm; - void *oldheap; + headp = (i8 *) (q->data + sizeof (uword) * q->head); + clib_memcpy (&mp, headp, sizeof (uword)); - /* Allow the application to clean up its registrations */ - for (i = 0; i < vec_len (dead_indices); i++) - { - regpp = pool_elt_at_index (am->vl_clients, dead_indices[i]); - if (regpp) - { - u32 handle; - - handle = vl_msg_api_handle_from_index_and_epoch - (dead_indices[i], shm->application_restarts); - (void) call_reaper_functions (handle); - } - } + q->head++; + need_broadcast = (q->cursize == q->maxsize / 2); + q->cursize--; - svm = am->vlib_rp; - pthread_mutex_lock (&svm->mutex); - oldheap = svm_push_data_heap (svm); + if (PREDICT_FALSE (q->head == q->maxsize)) + q->head = 0; + pthread_mutex_unlock (&q->mutex); + if (need_broadcast) + (void) pthread_cond_broadcast (&q->condvar); - for (i = 0; i < vec_len (dead_indices); i++) - { - regpp = pool_elt_at_index (am->vl_clients, dead_indices[i]); - if (regpp) - { - /* Poison the old registration */ - memset (*regpp, 0xF3, sizeof (**regpp)); - clib_mem_free (*regpp); - /* no dangling references, please */ - *regpp = 0; - } - else - { - svm_pop_heap (oldheap); - clib_warning ("Duplicate free, client index %d", - regpp - am->vl_clients); - oldheap = svm_push_data_heap (svm); - } - } + pthread_mutex_unlock (&q->mutex); - svm_client_scan_this_region_nolock (am->vlib_rp); + vl_msg_api_handler_with_vm_node (am, (void *) mp, vm, node); + } + else + pthread_mutex_unlock (&q->mutex); - pthread_mutex_unlock (&svm->mutex); - svm_pop_heap (oldheap); - for (i = 0; i < vec_len (dead_indices); i++) - pool_put_index (am->vl_clients, dead_indices[i]); + q = save_vlib_input_queue; + am->shmem_hdr = save_shmem_hdr; + am->vlib_rp = save_vlib_rp; + + private_segment_rotor++; + if (private_segment_rotor >= vec_len (am->vlib_private_rps)) + private_segment_rotor = 0; + } + + vlib_process_wait_for_event_or_clock (vm, sleep_time); + vec_reset_length (event_data); + event_type = vlib_process_get_events (vm, &event_data); + now = vlib_time_now (vm); + + switch (event_type) + { + case QUEUE_SIGNAL_EVENT: + vm->queue_signal_pending = 0; + break; + + case SOCKET_READ_EVENT: + for (i = 0; i < vec_len (event_data); i++) + { + a = pool_elt_at_index (socket_main.process_args, event_data[i]); + vl_api_socket_process_msg (a->clib_file, a->regp, + (i8 *) a->data); + vec_free (a->data); + pool_put (socket_main.process_args, a); } + break; - dead_client_scan_time = vlib_time_now (vm) + 20.0; + /* Timeout... */ + case -1: + break; + + default: + clib_warning ("unknown event type %d", event_type); + break; + } + + if (now > dead_client_scan_time) + { + dead_client_scan (am, shm, now); + dead_client_scan_time = vlib_time_now (vm) + 10.0; } if (TRACE_VLIB_MEMORY_QUEUE) @@ -785,11 +1058,12 @@ skip_save: return 0; } /* *INDENT-OFF* */ -VLIB_REGISTER_NODE (memclnt_node,static) = { - .function = memclnt_process, - .type = VLIB_NODE_TYPE_PROCESS, - .name = "api-rx-from-ring", - .state = VLIB_NODE_STATE_DISABLED, +VLIB_REGISTER_NODE (memclnt_node) = +{ + .function = memclnt_process, + .type = VLIB_NODE_TYPE_PROCESS, + .name = "api-rx-from-ring", + .state = VLIB_NODE_STATE_DISABLED, }; /* *INDENT-ON* */ @@ -865,14 +1139,17 @@ VLIB_CLI_COMMAND (cli_clear_api_histogram_command, static) = }; /* *INDENT-ON* */ +volatile int **vl_api_queue_cursizes; + static void memclnt_queue_callback (vlib_main_t * vm) { - static volatile int *cursizep; + int i; + api_main_t *am = &api_main; - if (PREDICT_FALSE (cursizep == 0)) + if (PREDICT_FALSE (vec_len (vl_api_queue_cursizes) != + 1 + vec_len (am->vlib_private_rps))) { - api_main_t *am = &api_main; vl_shmem_hdr_t *shmem_hdr = am->shmem_hdr; unix_shared_memory_queue_t *q; @@ -882,15 +1159,30 @@ memclnt_queue_callback (vlib_main_t * vm) q = shmem_hdr->vl_input_queue; if (q == 0) return; - cursizep = &q->cursize; + + vec_add1 (vl_api_queue_cursizes, &q->cursize); + + for (i = 0; i < vec_len (am->vlib_private_rps); i++) + { + svm_region_t *vlib_rp = am->vlib_private_rps[i]; + + shmem_hdr = (void *) vlib_rp->user_ctx; + q = shmem_hdr->vl_input_queue; + vec_add1 (vl_api_queue_cursizes, &q->cursize); + } } - if (*cursizep >= 1) + for (i = 0; i < vec_len (vl_api_queue_cursizes); i++) { - vm->queue_signal_pending = 1; - vm->api_queue_nonempty = 1; - vlib_process_signal_event (vm, memclnt_node.index, - /* event_type */ 0, /* event_data */ 0); + if (*vl_api_queue_cursizes[i]) + { + vm->queue_signal_pending = 1; + vm->api_queue_nonempty = 1; + vlib_process_signal_event (vm, memclnt_node.index, + /* event_type */ QUEUE_SIGNAL_EVENT, + /* event_data */ 0); + break; + } } } @@ -971,13 +1263,55 @@ setup_memclnt_exit (vlib_main_t * vm) VLIB_INIT_FUNCTION (setup_memclnt_exit); +u8 * +format_api_message_rings (u8 * s, va_list * args) +{ + api_main_t *am = va_arg (*args, api_main_t *); + vl_shmem_hdr_t *shmem_hdr = va_arg (*args, vl_shmem_hdr_t *); + int main_segment = va_arg (*args, int); + ring_alloc_t *ap; + int i; + + if (shmem_hdr == 0) + return format (s, "%8s %8s %8s %8s %8s\n", + "Owner", "Size", "Nitems", "Hits", "Misses"); + + ap = shmem_hdr->vl_rings; + + for (i = 0; i < vec_len (shmem_hdr->vl_rings); i++) + { + s = format (s, "%8s %8d %8d %8d %8d\n", + "vlib", ap->size, ap->nitems, ap->hits, ap->misses); + ap++; + } + + ap = shmem_hdr->client_rings; + + for (i = 0; i < vec_len (shmem_hdr->client_rings); i++) + { + s = format (s, "%8s %8d %8d %8d %8d\n", + "clnt", ap->size, ap->nitems, ap->hits, ap->misses); + ap++; + } + + if (main_segment) + { + s = format (s, "%d ring miss fallback allocations\n", am->ring_misses); + s = format + (s, + "%d application restarts, %d reclaimed msgs, %d garbage collects\n", + shmem_hdr->application_restarts, shmem_hdr->restart_reclaims, + shmem_hdr->garbage_collects); + } + return s; +} + static clib_error_t * vl_api_ring_command (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cli_cmd) { int i; - ring_alloc_t *ap; vl_shmem_hdr_t *shmem_hdr; api_main_t *am = &api_main; @@ -989,34 +1323,38 @@ vl_api_ring_command (vlib_main_t * vm, return 0; } - vlib_cli_output (vm, "%8s %8s %8s %8s %8s\n", - "Owner", "Size", "Nitems", "Hits", "Misses"); - - ap = shmem_hdr->vl_rings; + vlib_cli_output (vm, "Main API segment rings:"); - for (i = 0; i < vec_len (shmem_hdr->vl_rings); i++) - { - vlib_cli_output (vm, "%8s %8d %8d %8d %8d\n", - "vlib", ap->size, ap->nitems, ap->hits, ap->misses); - ap++; - } + vlib_cli_output (vm, "%U", format_api_message_rings, am, + 0 /* print header */ , 0 /* notused */ ); - ap = shmem_hdr->client_rings; + vlib_cli_output (vm, "%U", format_api_message_rings, am, + shmem_hdr, 1 /* main segment */ ); - for (i = 0; i < vec_len (shmem_hdr->client_rings); i++) + for (i = 0; i < vec_len (am->vlib_private_rps); i++) { - vlib_cli_output (vm, "%8s %8d %8d %8d %8d\n", - "clnt", ap->size, ap->nitems, ap->hits, ap->misses); - ap++; + svm_region_t *vlib_rp = am->vlib_private_rps[i]; + shmem_hdr = (void *) vlib_rp->user_ctx; + vl_api_registration_t **regpp; + vl_api_registration_t *regp; + + /* For horizontal scaling, add a hash table... */ + /* *INDENT-OFF* */ + pool_foreach (regpp, am->vl_clients, + ({ + regp = *regpp; + if (regp && regp->vlib_rp == vlib_rp) + { + vlib_cli_output (vm, "%s segment rings:", regp->name); + goto found; + } + })); + /* *INDENT-ON* */ + found: + vlib_cli_output (vm, "%U", format_api_message_rings, am, + shmem_hdr, 0 /* main segment */ ); } - vlib_cli_output (vm, "%d ring miss fallback allocations\n", - am->ring_misses); - - vlib_cli_output - (vm, "%d application restarts, %d reclaimed msgs, %d garbage collects\n", - shmem_hdr->application_restarts, - shmem_hdr->restart_reclaims, shmem_hdr->garbage_collects); return 0; } @@ -1051,15 +1389,13 @@ vl_api_client_command (vlib_main_t * vm, if (regp) { - q = regp->vl_input_queue; - if (kill (q->consumer_pid, 0) < 0) - { - health = "DEAD"; - } + if (regp->unanswered_pings > 0) + health = "questionable"; else - { - health = "alive"; - } + health = "OK"; + + q = regp->vl_input_queue; + vlib_cli_output (vm, "%16s %8d %14d 0x%016llx %s\n", regp->name, q->consumer_pid, q->cursize, q, health); @@ -1306,6 +1642,7 @@ vlibmemory_init (vlib_main_t * vm) { api_main_t *am = &api_main; svm_map_region_args_t _a, *a = &_a; + clib_error_t *error; memset (a, 0, sizeof (*a)); a->root_path = am->root_path; @@ -1321,7 +1658,10 @@ vlibmemory_init (vlib_main_t * vm) 0) ? am->global_pvt_heap_size : SVM_PVT_MHEAP_SIZE; svm_region_init_args (a); - return 0; + + error = vlib_call_init_function (vm, vlibsocket_init); + + return error; } VLIB_INIT_FUNCTION (vlibmemory_init); @@ -2227,7 +2567,7 @@ dump_api_table_file_command_fn (vlib_main_t * vm, /* Load the serialized message table from the table dump */ - error = unserialize_open_unix_file (sm, (char *) filename); + error = unserialize_open_clib_file (sm, (char *) filename); if (error) return error; @@ -2251,7 +2591,7 @@ dump_api_table_file_command_fn (vlib_main_t * vm, if (compare_current) { /* Append the current message table */ - u8 *tblv = vec_dup (am->serialized_message_table_in_shmem); + u8 *tblv = vl_api_serialize_message_table (am, 0); serialize_open_vector (sm, tblv); unserialize_integer (sm, &nmsgs, sizeof (u32)); @@ -2268,6 +2608,7 @@ dump_api_table_file_command_fn (vlib_main_t * vm, item->crc = extract_crc (name_and_crc); item->which = 1; /* current_image */ } + vec_free (tblv); } /* Sort the table. */ diff --git a/src/vlibmemory/socket_client.c b/src/vlibmemory/socket_client.c new file mode 100644 index 00000000000..8519e7f5f7c --- /dev/null +++ b/src/vlibmemory/socket_client.c @@ -0,0 +1,240 @@ +/* + *------------------------------------------------------------------ + * socket_client.c - API message handling over sockets, client code. + * + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +#define vl_typedefs /* define message structures */ +#include +#undef vl_typedefs + +#define vl_endianfun /* define message structures */ +#include +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) clib_warning (__VA_ARGS__) +#define vl_printfun +#include +#undef vl_printfun + +socket_client_main_t socket_client_main; + +/* Debug aid */ +u32 vl (void *p) __attribute__ ((weak)); +u32 +vl (void *p) +{ + return vec_len (p); +} + +void +vl_socket_client_read_reply (socket_client_main_t * scm) +{ + int n, current_rx_index; + msgbuf_t *mbp; + + if (scm->socket_fd == 0 || scm->socket_enable == 0) + return; + + mbp = 0; + + while (1) + { + current_rx_index = vec_len (scm->socket_rx_buffer); + while (vec_len (scm->socket_rx_buffer) < + sizeof (*mbp) + 2 /* msg id */ ) + { + vec_validate (scm->socket_rx_buffer, current_rx_index + + scm->socket_buffer_size - 1); + _vec_len (scm->socket_rx_buffer) = current_rx_index; + n = read (scm->socket_fd, scm->socket_rx_buffer + current_rx_index, + scm->socket_buffer_size); + if (n < 0) + { + clib_unix_warning ("socket_read"); + return; + } + _vec_len (scm->socket_rx_buffer) += n; + } + +#if CLIB_DEBUG > 1 + if (n > 0) + clib_warning ("read %d bytes", n); +#endif + + if (mbp == 0) + mbp = (msgbuf_t *) (scm->socket_rx_buffer); + + if (vec_len (scm->socket_rx_buffer) >= ntohl (mbp->data_len) + + sizeof (*mbp)) + { + vl_msg_api_socket_handler ((void *) (mbp->data)); + + if (vec_len (scm->socket_rx_buffer) == ntohl (mbp->data_len) + + sizeof (*mbp)) + _vec_len (scm->socket_rx_buffer) = 0; + else + vec_delete (scm->socket_rx_buffer, ntohl (mbp->data_len) + + sizeof (*mbp), 0); + mbp = 0; + + /* Quit if we're out of data, and not expecting a ping reply */ + if (vec_len (scm->socket_rx_buffer) == 0 + && scm->control_pings_outstanding == 0) + break; + } + } +} + +int +vl_socket_client_connect (socket_client_main_t * scm, char *socket_path, + char *client_name, u32 socket_buffer_size) +{ + char buffer[256]; + char *rdptr; + int n, total_bytes; + vl_api_sockclnt_create_reply_t *rp; + vl_api_sockclnt_create_t *mp; + clib_socket_t *sock = &scm->client_socket; + msgbuf_t *mbp; + clib_error_t *error; + + /* Already connected? */ + if (scm->socket_fd) + return (-2); + + /* bogus call? */ + if (socket_path == 0 || client_name == 0) + return (-3); + + sock->config = socket_path; + sock->flags = CLIB_SOCKET_F_IS_CLIENT | CLIB_SOCKET_F_SEQPACKET; + + error = clib_socket_init (sock); + + if (error) + { + clib_error_report (error); + return (-1); + } + + scm->socket_fd = sock->fd; + + mbp = (msgbuf_t *) buffer; + mbp->q = 0; + mbp->data_len = ntohl (sizeof (*mp)); + mbp->gc_mark_timestamp = 0; + + mp = (vl_api_sockclnt_create_t *) mbp->data; + mp->_vl_msg_id = ntohs (VL_API_SOCKCLNT_CREATE); + strncpy ((char *) mp->name, client_name, sizeof (mp->name) - 1); + mp->name[sizeof (mp->name) - 1] = 0; + mp->context = 0xfeedface; + + n = write (scm->socket_fd, mbp, sizeof (*mbp) + ntohl (mbp->data_len)); + if (n < 0) + { + clib_unix_warning ("socket write (msg)"); + return (-1); + } + + memset (buffer, 0, sizeof (buffer)); + + total_bytes = 0; + rdptr = buffer; + do + { + n = read (scm->socket_fd, rdptr, sizeof (buffer) - (rdptr - buffer)); + if (n < 0) + { + clib_unix_warning ("socket read"); + } + total_bytes += n; + rdptr += n; + } + while (total_bytes < sizeof (vl_api_sockclnt_create_reply_t) + + sizeof (msgbuf_t)); + + rp = (vl_api_sockclnt_create_reply_t *) (buffer + sizeof (msgbuf_t)); + if (ntohs (rp->_vl_msg_id) != VL_API_SOCKCLNT_CREATE_REPLY) + { + clib_warning ("connect reply got msg id %d\n", ntohs (rp->_vl_msg_id)); + return (-1); + } + + /* allocate tx, rx buffers */ + scm->socket_buffer_size = socket_buffer_size ? socket_buffer_size : + SOCKET_CLIENT_DEFAULT_BUFFER_SIZE; + vec_validate (scm->socket_tx_buffer, scm->socket_buffer_size - 1); + vec_validate (scm->socket_rx_buffer, scm->socket_buffer_size - 1); + _vec_len (scm->socket_rx_buffer) = 0; + scm->socket_enable = 1; + + return (0); +} + +void +vl_socket_client_disconnect (socket_client_main_t * scm) +{ + if (scm->socket_fd && (close (scm->socket_fd) < 0)) + clib_unix_warning ("close"); + scm->socket_fd = 0; +} + +void +vl_socket_client_enable_disable (socket_client_main_t * scm, int enable) +{ + scm->socket_enable = enable; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vlibmemory/socksvr_vlib.c b/src/vlibmemory/socksvr_vlib.c new file mode 100644 index 00000000000..1a263e7bf37 --- /dev/null +++ b/src/vlibmemory/socksvr_vlib.c @@ -0,0 +1,719 @@ +/* + *------------------------------------------------------------------ + * socksvr_vlib.c + * + * Copyright (c) 2009 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +#include + +#define vl_typedefs /* define message structures */ +#include +#undef vl_typedefs + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) +#define vl_printfun +#include +#undef vl_printfun + +/* instantiate all the endian swap functions we know about */ +#define vl_endianfun +#include +#undef vl_endianfun + +void +dump_socket_clients (vlib_main_t * vm, api_main_t * am) +{ + vl_api_registration_t *reg; + socket_main_t *sm = &socket_main; + clib_file_main_t *fm = &file_main; + clib_file_t *f; + + /* + * Must have at least one active client, not counting the + * REGISTRATION_TYPE_SOCKET_LISTEN bind/accept socket + */ + if (pool_elts (sm->registration_pool) < 2) + return; + + vlib_cli_output (vm, "Socket clients"); + vlib_cli_output (vm, "%16s %8s", "Name", "Fildesc"); + /* *INDENT-OFF* */ + pool_foreach (reg, sm->registration_pool, + ({ + if (reg->registration_type == REGISTRATION_TYPE_SOCKET_SERVER) { + f = pool_elt_at_index (fm->file_pool, reg->clib_file_index); + vlib_cli_output (vm, "%16s %8d", + reg->name, f->file_descriptor); + } + })); +/* *INDENT-ON* */ +} + +void +vl_socket_api_send (vl_api_registration_t * rp, u8 * elem) +{ + u16 msg_id = ntohs (*(u16 *) elem); + api_main_t *am = &api_main; + msgbuf_t *mb = (msgbuf_t *) (elem - offsetof (msgbuf_t, data)); +#if CLIB_DEBUG > 1 + u32 output_length; +#endif + clib_file_t *cf = rp->clib_file_index + file_main.file_pool; + + ASSERT (rp->registration_type > REGISTRATION_TYPE_SHMEM); + + if (msg_id >= vec_len (am->api_trace_cfg)) + { + clib_warning ("id out of range: %d", msg_id); + vl_msg_api_free ((void *) elem); + return; + } + + /* Add the msgbuf_t to the output vector */ + vl_socket_add_pending_output_no_flush (cf, + rp->vl_api_registration_pool_index + + socket_main.registration_pool, + (u8 *) mb, sizeof (*mb)); + /* Send the message */ + vl_socket_add_pending_output (cf, + rp->vl_api_registration_pool_index + + socket_main.registration_pool, + elem, ntohl (mb->data_len)); + +#if CLIB_DEBUG > 1 + output_length = sizeof (*mb) + ntohl (mb->data_len); + clib_warning ("wrote %u bytes to fd %d", output_length, + cf->file_descriptor); +#endif + + vl_msg_api_free ((void *) elem); +} + +void +vl_free_socket_registration_index (u32 pool_index) +{ + int i; + vl_api_registration_t *rp; + if (pool_is_free_index (socket_main.registration_pool, pool_index)) + { + clib_warning ("main pool index %d already free", pool_index); + return; + } + rp = pool_elt_at_index (socket_main.registration_pool, pool_index); + + ASSERT (rp->registration_type != REGISTRATION_TYPE_FREE); + for (i = 0; i < vec_len (rp->additional_fds_to_close); i++) + if (close (rp->additional_fds_to_close[i]) < 0) + clib_unix_warning ("close"); + vec_free (rp->additional_fds_to_close); + vec_free (rp->name); + vec_free (rp->unprocessed_input); + vec_free (rp->output_vector); + rp->registration_type = REGISTRATION_TYPE_FREE; + pool_put (socket_main.registration_pool, rp); +} + +void +vl_api_socket_process_msg (clib_file_t * uf, vl_api_registration_t * rp, + i8 * input_v) +{ + msgbuf_t *mbp = (msgbuf_t *) input_v; + + u8 *the_msg = (u8 *) (mbp->data); + socket_main.current_uf = uf; + socket_main.current_rp = rp; + vl_msg_api_socket_handler (the_msg); + socket_main.current_uf = 0; + socket_main.current_rp = 0; +} + +clib_error_t * +vl_socket_read_ready (clib_file_t * uf) +{ + clib_file_main_t *fm = &file_main; + vlib_main_t *vm = vlib_get_main (); + vl_api_registration_t *rp; + int n; + i8 *msg_buffer = 0; + u8 *data_for_process; + u32 msg_len; + u32 save_input_buffer_length = vec_len (socket_main.input_buffer); + vl_socket_args_for_process_t *a; + msgbuf_t *mbp; + int mbp_set = 0; + + rp = pool_elt_at_index (socket_main.registration_pool, uf->private_data); + + n = read (uf->file_descriptor, socket_main.input_buffer, + vec_len (socket_main.input_buffer)); + + if (n <= 0 && errno != EAGAIN) + { + clib_file_del (fm, uf); + + if (!pool_is_free (socket_main.registration_pool, rp)) + { + u32 index = rp - socket_main.registration_pool; + vl_free_socket_registration_index (index); + } + else + { + clib_warning ("client index %d already free?", + rp->vl_api_registration_pool_index); + } + return 0; + } + + _vec_len (socket_main.input_buffer) = n; + + /* + * Look for bugs here. This code is tricky because + * data read from a stream socket does not honor message + * boundaries. In the case of a long message (>4K bytes) + * we have to do (at least) 2 reads, etc. + */ + do + { + if (vec_len (rp->unprocessed_input)) + { + vec_append (rp->unprocessed_input, socket_main.input_buffer); + msg_buffer = rp->unprocessed_input; + } + else + { + msg_buffer = socket_main.input_buffer; + mbp_set = 0; + } + + if (mbp_set == 0) + { + /* Any chance that we have a complete message? */ + if (vec_len (msg_buffer) <= sizeof (msgbuf_t)) + goto save_and_split; + + mbp = (msgbuf_t *) msg_buffer; + msg_len = ntohl (mbp->data_len); + mbp_set = 1; + } + + /* We don't have the entire message yet. */ + if (mbp_set == 0 + || (msg_len + sizeof (msgbuf_t)) > vec_len (msg_buffer)) + { + save_and_split: + /* if we were using the input buffer save the fragment */ + if (msg_buffer == socket_main.input_buffer) + { + ASSERT (vec_len (rp->unprocessed_input) == 0); + vec_validate (rp->unprocessed_input, vec_len (msg_buffer) - 1); + clib_memcpy (rp->unprocessed_input, msg_buffer, + vec_len (msg_buffer)); + _vec_len (rp->unprocessed_input) = vec_len (msg_buffer); + } + _vec_len (socket_main.input_buffer) = save_input_buffer_length; + return 0; + } + + data_for_process = (u8 *) vec_dup (msg_buffer); + _vec_len (data_for_process) = (msg_len + sizeof (msgbuf_t)); + pool_get (socket_main.process_args, a); + a->clib_file = uf; + a->regp = rp; + a->data = data_for_process; + + vlib_process_signal_event (vm, memclnt_node.index, + SOCKET_READ_EVENT, + a - socket_main.process_args); + if (n > (msg_len + sizeof (*mbp))) + vec_delete (msg_buffer, msg_len + sizeof (*mbp), 0); + else + _vec_len (msg_buffer) = 0; + n -= msg_len + sizeof (msgbuf_t); + msg_len = 0; + mbp_set = 0; + } + while (n > 0); + + _vec_len (socket_main.input_buffer) = save_input_buffer_length; + + return 0; +} + +void +vl_socket_add_pending_output (clib_file_t * uf, + vl_api_registration_t * rp, + u8 * buffer, uword buffer_bytes) +{ + clib_file_main_t *fm = &file_main; + + vec_add (rp->output_vector, buffer, buffer_bytes); + if (vec_len (rp->output_vector) > 0) + { + int skip_update = 0 != (uf->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE); + uf->flags |= UNIX_FILE_DATA_AVAILABLE_TO_WRITE; + if (!skip_update) + fm->file_update (uf, UNIX_FILE_UPDATE_MODIFY); + } +} + +void +vl_socket_add_pending_output_no_flush (clib_file_t * uf, + vl_api_registration_t * rp, + u8 * buffer, uword buffer_bytes) +{ + vec_add (rp->output_vector, buffer, buffer_bytes); +} + +static void +socket_del_pending_output (clib_file_t * uf, + vl_api_registration_t * rp, uword n_bytes) +{ + clib_file_main_t *fm = &file_main; + + vec_delete (rp->output_vector, n_bytes, 0); + if (vec_len (rp->output_vector) <= 0) + { + int skip_update = 0 == (uf->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE); + uf->flags &= ~UNIX_FILE_DATA_AVAILABLE_TO_WRITE; + if (!skip_update) + fm->file_update (uf, UNIX_FILE_UPDATE_MODIFY); + } +} + +clib_error_t * +vl_socket_write_ready (clib_file_t * uf) +{ + clib_file_main_t *fm = &file_main; + vl_api_registration_t *rp; + int n; + + rp = pool_elt_at_index (socket_main.registration_pool, uf->private_data); + + /* Flush output vector. */ + n = write (uf->file_descriptor, + rp->output_vector, vec_len (rp->output_vector)); + + if (n < 0) + { +#if DEBUG > 2 + clib_warning ("write error, close the file...\n"); +#endif + clib_file_del (fm, uf); + + vl_free_socket_registration_index (rp - socket_main.registration_pool); + return 0; + } + + else if (n > 0) + socket_del_pending_output (uf, rp, n); + + return 0; +} + +clib_error_t * +vl_socket_error_ready (clib_file_t * uf) +{ + vl_api_registration_t *rp; + clib_file_main_t *fm = &file_main; + + rp = pool_elt_at_index (socket_main.registration_pool, uf->private_data); + clib_file_del (fm, uf); + vl_free_socket_registration_index (rp - socket_main.registration_pool); + + return 0; +} + +void +socksvr_file_add (clib_file_main_t * fm, int fd) +{ + vl_api_registration_t *rp; + clib_file_t template = { 0 }; + + pool_get (socket_main.registration_pool, rp); + memset (rp, 0, sizeof (*rp)); + + template.read_function = vl_socket_read_ready; + template.write_function = vl_socket_write_ready; + template.error_function = vl_socket_error_ready; + template.file_descriptor = fd; + template.private_data = rp - socket_main.registration_pool; + + rp->registration_type = REGISTRATION_TYPE_SOCKET_SERVER; + rp->vl_api_registration_pool_index = rp - socket_main.registration_pool; + rp->clib_file_index = clib_file_add (fm, &template); +} + +static clib_error_t * +socksvr_accept_ready (clib_file_t * uf) +{ + clib_file_main_t *fm = &file_main; + socket_main_t *sm = &socket_main; + clib_socket_t *sock = &sm->socksvr_listen_socket; + clib_socket_t client; + clib_error_t *error; + + error = clib_socket_accept (sock, &client); + + if (error) + return error; + + socksvr_file_add (fm, client.fd); + return 0; +} + +static clib_error_t * +socksvr_bogus_write (clib_file_t * uf) +{ + clib_warning ("why am I here?"); + return 0; +} + +/* + * vl_api_sockclnt_create_t_handler + */ +void +vl_api_sockclnt_create_t_handler (vl_api_sockclnt_create_t * mp) +{ + vl_api_registration_t *regp; + vl_api_sockclnt_create_reply_t *rp; + int rv = 1; + + regp = socket_main.current_rp; + + ASSERT (regp->registration_type == REGISTRATION_TYPE_SOCKET_SERVER); + + regp->name = format (0, "%s%c", mp->name, 0); + + rp = vl_msg_api_alloc (sizeof (*rp)); + rp->_vl_msg_id = htons (VL_API_SOCKCLNT_CREATE_REPLY); + rp->handle = (uword) regp; + rp->index = (uword) regp->vl_api_registration_pool_index; + rp->context = mp->context; + rp->response = htonl (rv); + + vl_msg_api_send (regp, (u8 *) rp); +} + +/* + * vl_api_sockclnt_delete_t_handler + */ +void +vl_api_sockclnt_delete_t_handler (vl_api_sockclnt_delete_t * mp) +{ + vl_api_registration_t *regp; + vl_api_sockclnt_delete_reply_t *rp; + + if (!pool_is_free_index (socket_main.registration_pool, mp->index)) + { + regp = pool_elt_at_index (socket_main.registration_pool, mp->index); + + rp = vl_msg_api_alloc (sizeof (*rp)); + rp->_vl_msg_id = htons (VL_API_SOCKCLNT_DELETE_REPLY); + rp->handle = mp->handle; + rp->response = htonl (1); + + vl_msg_api_send (regp, (u8 *) rp); + + clib_file_del (&file_main, file_main.file_pool + regp->clib_file_index); + + vl_free_socket_registration_index (mp->index); + } + else + { + clib_warning ("unknown client ID %d", mp->index); + } +} + +static clib_error_t * +send_fd_msg (int socket_fd, int fd_to_share) +{ + struct msghdr mh = { 0 }; + struct iovec iov[1]; + char ctl[CMSG_SPACE (sizeof (int))]; + char *msg = "memfd"; + int rv; + + iov[0].iov_base = msg; + iov[0].iov_len = strlen (msg); + mh.msg_iov = iov; + mh.msg_iovlen = 1; + + struct cmsghdr *cmsg; + memset (&ctl, 0, sizeof (ctl)); + mh.msg_control = ctl; + mh.msg_controllen = sizeof (ctl); + cmsg = CMSG_FIRSTHDR (&mh); + cmsg->cmsg_len = CMSG_LEN (sizeof (int)); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + memcpy (CMSG_DATA (cmsg), &fd_to_share, sizeof (int)); + + rv = sendmsg (socket_fd, &mh, 0); + if (rv < 0) + return clib_error_return_unix (0, "sendmsg"); + return 0; +} + +/* + * Create a memory-fd segment. + */ +void +vl_api_memfd_segment_create_t_handler (vl_api_memfd_segment_create_t * mp) +{ + vl_api_memfd_segment_create_reply_t *rmp; + api_main_t *am = &api_main; + clib_file_t *cf; + memfd_private_t _memfd_private, *memfd = &_memfd_private; + vl_api_registration_t *regp; + vlib_main_t *vm = vlib_get_main (); + svm_map_region_args_t _args, *a = &_args; + svm_region_t *vlib_rp; + int rv; + + regp = vl_api_client_index_to_registration (mp->client_index); + + if (regp == 0) + { + clib_warning ("API client disconnected"); + return; + } + + if (regp->registration_type != REGISTRATION_TYPE_SOCKET_SERVER) + { + rv = -31; /* VNET_API_ERROR_INVALID_REGISTRATION */ + goto reply; + } + + memset (memfd, 0, sizeof (*memfd)); + + /* Embed in api_main_t */ + memfd->memfd_size = mp->requested_size; + memfd->requested_va = 0ULL; + memfd->i_am_master = 1; + memfd->name = format (0, "%s%c", regp->name, 0); + + /* Set up a memfd segment of the requested size */ + rv = memfd_master_init (memfd, mp->client_index); + + if (rv) + goto reply; + + /* Remember to close this fd when the socket connection goes away */ + vec_add1 (regp->additional_fds_to_close, memfd->fd); + + /* And create a plausible svm_region in it */ + memset (a, 0, sizeof (*a)); + a->baseva = memfd->sh->memfd_va + MMAP_PAGESIZE; + a->size = memfd->memfd_size - MMAP_PAGESIZE; + /* $$$$ might want a different config parameter */ + a->pvt_heap_size = am->api_pvt_heap_size; + a->flags = SVM_FLAGS_MHEAP; + svm_region_init_mapped_region (a, (svm_region_t *) a->baseva); + + vlib_rp = (svm_region_t *) a->baseva; + + /* + * Part deux, initialize the svm_region_t shared-memory header + * api allocation rings, and so on. + */ + vl_init_shmem (vlib_rp, 1 /* is_vlib (dont-care) */ , 1 /* is_private */ ); + + vec_add1 (am->vlib_private_rps, vlib_rp); + + memfd->sh->ready = 1; + + /* Recompute the set of input queues to poll in memclnt_process */ + vec_reset_length (vl_api_queue_cursizes); + +reply: + + /* send the reply message */ + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + rmp->_vl_msg_id = htons (VL_API_MEMFD_SEGMENT_CREATE_REPLY); + rmp->context = mp->context; + rmp->retval = htonl (rv); + + vl_msg_api_send (regp, (u8 *) rmp); + + if (rv != 0) + return; + + /* + * We need the reply message to make it out the back door + * before we send the magic fd message. + */ + vlib_process_suspend (vm, 11e-6); + + cf = file_main.file_pool + regp->clib_file_index; + + /* send the magic "here's your sign (aka fd)" socket message */ + send_fd_msg (cf->file_descriptor, memfd->fd); +} + +#define foreach_vlib_api_msg \ +_(SOCKCLNT_CREATE, sockclnt_create) \ +_(SOCKCLNT_DELETE, sockclnt_delete) \ +_(MEMFD_SEGMENT_CREATE, memfd_segment_create) + +clib_error_t * +socksvr_api_init (vlib_main_t * vm) +{ + clib_file_main_t *fm = &file_main; + clib_file_t template = { 0 }; + vl_api_registration_t *rp; + vl_msg_api_msg_config_t cfg; + vl_msg_api_msg_config_t *c = &cfg; + socket_main_t *sm = &socket_main; + clib_socket_t *sock = &sm->socksvr_listen_socket; + clib_error_t *error; + + /* If not explicitly configured, do not bind/enable, etc. */ + if (sm->socket_name == 0) + return 0; + +#define _(N,n) do { \ + c->id = VL_API_##N; \ + c->name = #n; \ + c->handler = vl_api_##n##_t_handler; \ + c->cleanup = vl_noop_handler; \ + c->endian = vl_api_##n##_t_endian; \ + c->print = vl_api_##n##_t_print; \ + c->size = sizeof(vl_api_##n##_t); \ + c->traced = 1; /* trace, so these msgs print */ \ + c->replay = 0; /* don't replay client create/delete msgs */ \ + c->message_bounce = 0; /* don't bounce this message */ \ + vl_msg_api_config(c);} while (0); + + foreach_vlib_api_msg; +#undef _ + + vec_resize (sm->input_buffer, 4096); + + sock->config = (char *) sm->socket_name; + + /* mkdir of file socket, only under /run */ + if (strncmp (sock->config, "/run", 4) == 0) + { + u8 *tmp = format (0, "%s", sock->config); + int i = vec_len (tmp); + while (i && tmp[--i] != '/') + ; + + tmp[i] = 0; + + if (i) + vlib_unix_recursive_mkdir ((char *) tmp); + vec_free (tmp); + } + + sock->flags = CLIB_SOCKET_F_IS_SERVER | CLIB_SOCKET_F_SEQPACKET | + CLIB_SOCKET_F_ALLOW_GROUP_WRITE; + error = clib_socket_init (sock); + if (error) + return error; + + pool_get (sm->registration_pool, rp); + memset (rp, 0, sizeof (*rp)); + + rp->registration_type = REGISTRATION_TYPE_SOCKET_LISTEN; + + template.read_function = socksvr_accept_ready; + template.write_function = socksvr_bogus_write; + template.file_descriptor = sock->fd; + template.private_data = rp - sm->registration_pool; + + rp->clib_file_index = clib_file_add (fm, &template); + return 0; +} + +static clib_error_t * +socket_exit (vlib_main_t * vm) +{ + clib_file_main_t *fm = &file_main; + socket_main_t *sm = &socket_main; + vl_api_registration_t *rp; + + /* Defensive driving in case something wipes out early */ + if (sm->registration_pool) + { + u32 index; + /* *INDENT-OFF* */ + pool_foreach (rp, sm->registration_pool, ({ + clib_file_del (fm, fm->file_pool + rp->clib_file_index); + index = rp->vl_api_registration_pool_index; + vl_free_socket_registration_index (index); + })); +/* *INDENT-ON* */ + } + + return 0; +} + +VLIB_MAIN_LOOP_EXIT_FUNCTION (socket_exit); + +static clib_error_t * +socksvr_config (vlib_main_t * vm, unformat_input_t * input) +{ + socket_main_t *sm = &socket_main; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "socket-name %s", &sm->socket_name)) + ; + else if (unformat (input, "default")) + { + sm->socket_name = format (0, "%s%c", API_SOCKET_FILE, 0); + } + else + { + return clib_error_return (0, "unknown input '%U'", + format_unformat_error, input); + } + } + return 0; +} + +VLIB_CONFIG_FUNCTION (socksvr_config, "socksvr"); + +clib_error_t * +vlibsocket_init (vlib_main_t * vm) +{ + return 0; +} + +VLIB_INIT_FUNCTION (vlibsocket_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vlibsocket/sock_test.c b/src/vlibsocket/sock_test.c index 3bded08fd99..e563b9a8d61 100644 --- a/src/vlibsocket/sock_test.c +++ b/src/vlibsocket/sock_test.c @@ -22,7 +22,7 @@ #include #include -#define SOCKCLNT_SERVER_PORT 32741 /* whatever */ +#define SOCKCLNT_SERVER_PORT 1234 /* whatever */ typedef signed char i8; typedef signed short i16; @@ -103,7 +103,7 @@ main (int argc, char *argv[]) memset (buffer, 0, sizeof (buffer)); mp = (vl_api_sockclnt_create_t *) buffer; - mp->_vl_msg_id = ntohs (8); /* VL_API_SOCKCLNT_CREATE */ + mp->_vl_msg_id = ntohs (13); /* VL_API_SOCKCLNT_CREATE */ strncpy ((char *) mp->name, "socket-test", sizeof (mp->name) - 1); mp->name[sizeof (mp->name) - 1] = 0; mp->context = 0xfeedface; @@ -134,7 +134,7 @@ main (int argc, char *argv[]) rp = (vl_api_sockclnt_create_reply_t *) (buffer + 4); /* VL_API_SOCKCLNT_CREATE_REPLY */ - if (ntohs (rp->_vl_msg_id) != 9) + if (ntohs (rp->_vl_msg_id) != 14) { printf ("WARNING: msg id %d\n", ntohs (rp->_vl_msg_id)); } diff --git a/src/vlibsocket/sockclnt.api b/src/vlibsocket/sockclnt.api deleted file mode 100644 index e0a7505fe87..00000000000 --- a/src/vlibsocket/sockclnt.api +++ /dev/null @@ -1,50 +0,0 @@ -/* Hey Emacs use -*- mode: C -*- */ - -/* - *------------------------------------------------------------------ - * sockclnt.api - API message(s) to hook up clients, pass traffic - * to client processes via TCP sockets - * - * Copyright (c) 2009 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *------------------------------------------------------------------ - */ - -/* - * Create a socket client registration - */ -define sockclnt_create { - u8 name[64]; /* for show, find by name, whatever */ - u32 context; /* opaque value to be returned in the reply */ -}; - -define sockclnt_create_reply { - i32 response; /* Non-negative = success */ - u64 handle; /* handle by which vlib knows this client */ - u32 index; /* index, used e.g. by API trace replay */ - u32 context; /* opaque value from the create request */ -}; - -/* - * Delete a client registration - */ -define sockclnt_delete { - u32 index; /* index, used e.g. by API trace replay */ - u64 handle; /* handle by which vlib knows this client */ -}; - -define sockclnt_delete_reply { - i32 response; /* Non-negative = success */ - u64 handle; /* in case the client wonders */ -}; - diff --git a/src/vlibsocket/sockclnt_vlib.c b/src/vlibsocket/sockclnt_vlib.c index 760ad9445ca..0df32d527ed 100644 --- a/src/vlibsocket/sockclnt_vlib.c +++ b/src/vlibsocket/sockclnt_vlib.c @@ -29,23 +29,23 @@ #include #include -#include -#include + +#include #define vl_typedefs /* define message structures */ -#include +#include #undef vl_typedefs /* instantiate all the print functions we know about */ #define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) #define vl_printfun -#include +#include #undef vl_printfun /* instantiate all the endian swap functions we know about */ #define vl_endianfun -#include +#include #undef vl_endianfun static void diff --git a/src/vlibsocket/socksvr_vlib.c b/src/vlibsocket/socksvr_vlib.c deleted file mode 100644 index 31b33df5393..00000000000 --- a/src/vlibsocket/socksvr_vlib.c +++ /dev/null @@ -1,706 +0,0 @@ -/* - *------------------------------------------------------------------ - * socksvr_vlib.c - * - * Copyright (c) 2009 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *------------------------------------------------------------------ - */ - -#include -#include -#include -#include -#include - -#include -#include - -#include -#include - -#include /* enumerate all vlib messages */ - -#define vl_typedefs /* define message structures */ -#include -#undef vl_typedefs - -/* instantiate all the print functions we know about */ -#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) -#define vl_printfun -#include -#undef vl_printfun - -/* instantiate all the endian swap functions we know about */ -#define vl_endianfun -#include -#undef vl_endianfun - -socket_main_t socket_main; - -void -dump_socket_clients (vlib_main_t * vm, api_main_t * am) -{ - vl_api_registration_t *reg; - socket_main_t *sm = &socket_main; - clib_file_main_t *fm = &file_main; - clib_file_t *f; - - /* - * Must have at least one active client, not counting the - * REGISTRATION_TYPE_SOCKET_LISTEN bind/accept socket - */ - if (pool_elts (sm->registration_pool) < 2) - return; - - vlib_cli_output (vm, "TCP socket clients"); - vlib_cli_output (vm, "%16s %8s", "Name", "Fildesc"); - /* *INDENT-OFF* */ - pool_foreach (reg, sm->registration_pool, - ({ - if (reg->registration_type == REGISTRATION_TYPE_SOCKET_SERVER) { - f = pool_elt_at_index (fm->file_pool, reg->clib_file_index); - vlib_cli_output (vm, "%16s %8d", - reg->name, f->file_descriptor); - } - })); -/* *INDENT-ON* */ -} - -void -vl_socket_api_send (vl_api_registration_t * rp, u8 * elem) -{ - u32 nbytes = 4; /* for the length... */ - u16 msg_id = ntohs (*(u16 *) elem); - u32 msg_length; - u32 tmp; - api_main_t *am = &api_main; - - ASSERT (rp->registration_type > REGISTRATION_TYPE_SHMEM); - - if (msg_id >= vec_len (am->api_trace_cfg)) - { - clib_warning ("id out of range: %d", msg_id); - vl_msg_api_free ((void *) elem); - return; - } - - msg_length = am->api_trace_cfg[msg_id].size; - nbytes += msg_length; - tmp = clib_host_to_net_u32 (nbytes); - - vl_socket_add_pending_output (rp->clib_file_index - + file_main.file_pool, - rp->vl_api_registration_pool_index - + socket_main.registration_pool, - (u8 *) & tmp, sizeof (tmp)); - vl_socket_add_pending_output (rp->clib_file_index - + file_main.file_pool, - rp->vl_api_registration_pool_index - + socket_main.registration_pool, - elem, msg_length); - vl_msg_api_free ((void *) elem); -} - -void -vl_socket_api_send_with_data (vl_api_registration_t * rp, - u8 * elem, u8 * data_vector) -{ - u32 nbytes = 4; /* for the length... */ - u16 msg_id = ntohs (*(u16 *) elem); - u32 msg_length; - u32 tmp; - api_main_t *am = &api_main; - - ASSERT (rp->registration_type > REGISTRATION_TYPE_SHMEM); - - if (msg_id >= vec_len (am->api_trace_cfg)) - { - clib_warning ("id out of range: %d", msg_id); - vec_free (data_vector); - vl_msg_api_free ((void *) elem); - return; - } - - msg_length = am->api_trace_cfg[msg_id].size; - nbytes += msg_length; - nbytes += vec_len (data_vector); - - /* Length in network byte order */ - tmp = clib_host_to_net_u32 (nbytes); - - vl_socket_add_pending_output (rp->clib_file_index - + file_main.file_pool, - rp->vl_api_registration_pool_index - + socket_main.registration_pool, - (u8 *) & tmp, sizeof (tmp)); - vl_socket_add_pending_output (rp->clib_file_index - + file_main.file_pool, - rp->vl_api_registration_pool_index - + socket_main.registration_pool, - elem, msg_length); - vl_socket_add_pending_output (rp->clib_file_index - + file_main.file_pool, - rp->vl_api_registration_pool_index - + socket_main.registration_pool, - data_vector, vec_len (data_vector)); - vl_msg_api_free ((void *) elem); -} - -static inline void -vl_socket_api_send_with_length_internal (vl_api_registration_t * rp, - u8 * elem, u32 msg_length, int free) -{ - u32 nbytes = 4; /* for the length... */ - u16 msg_id = ntohs (*(u16 *) elem); - u32 tmp; - api_main_t *am = &api_main; - - ASSERT (rp->registration_type > REGISTRATION_TYPE_SHMEM); - - if (msg_id >= vec_len (am->api_trace_cfg)) - { - clib_warning ("id out of range: %d", msg_id); - if (free) - vl_msg_api_free ((void *) elem); - return; - } - - nbytes += msg_length; - - /* Length in network byte order */ - tmp = clib_host_to_net_u32 (nbytes); - - vl_socket_add_pending_output (rp->clib_file_index - + file_main.file_pool, - rp->vl_api_registration_pool_index - + socket_main.registration_pool, - (u8 *) & tmp, sizeof (tmp)); - vl_socket_add_pending_output (rp->clib_file_index - + file_main.file_pool, - rp->vl_api_registration_pool_index - + socket_main.registration_pool, - elem, msg_length); - if (free) - vl_msg_api_free ((void *) elem); -} - -void -vl_socket_api_send_with_length (vl_api_registration_t * rp, - u8 * elem, u32 msg_length) -{ - vl_socket_api_send_with_length_internal (rp, elem, msg_length, - 1 /* free */ ); -} - -void -vl_socket_api_send_with_length_no_free (vl_api_registration_t * rp, - u8 * elem, u32 msg_length) -{ - vl_socket_api_send_with_length_internal (rp, elem, msg_length, - 0 /* free */ ); -} - -void -vl_free_socket_registration_index (u32 pool_index) -{ - vl_api_registration_t *rp; - if (pool_is_free_index (socket_main.registration_pool, pool_index)) - { - clib_warning ("main pool index %d already free", pool_index); - return; - } - rp = pool_elt_at_index (socket_main.registration_pool, pool_index); - - ASSERT (rp->registration_type != REGISTRATION_TYPE_FREE); - vec_free (rp->name); - vec_free (rp->unprocessed_input); - vec_free (rp->output_vector); - rp->registration_type = REGISTRATION_TYPE_FREE; - pool_put (socket_main.registration_pool, rp); -} - -static inline void -socket_process_msg (clib_file_t * uf, vl_api_registration_t * rp, - i8 * input_v) -{ - u8 *the_msg = (u8 *) (input_v + sizeof (u32)); - socket_main.current_uf = uf; - socket_main.current_rp = rp; - vl_msg_api_socket_handler (the_msg); - socket_main.current_uf = 0; - socket_main.current_rp = 0; -} - -clib_error_t * -vl_socket_read_ready (clib_file_t * uf) -{ - clib_file_main_t *fm = &file_main; - vl_api_registration_t *rp; - int n; - i8 *msg_buffer = 0; - u32 msg_len; - u32 save_input_buffer_length = vec_len (socket_main.input_buffer); - - rp = pool_elt_at_index (socket_main.registration_pool, uf->private_data); - - n = read (uf->file_descriptor, socket_main.input_buffer, - vec_len (socket_main.input_buffer)); - - if (n <= 0 && errno != EAGAIN) - { - clib_file_del (fm, uf); - - if (!pool_is_free (socket_main.registration_pool, rp)) - { - u32 index = rp - socket_main.registration_pool; - vl_free_socket_registration_index (index); - } - else - { - clib_warning ("client index %d already free?", - rp->vl_api_registration_pool_index); - } - return 0; - } - - _vec_len (socket_main.input_buffer) = n; - - /* - * Look for bugs here. This code is tricky because - * data read from a stream socket does honor message - * boundaries. In the case of a long message (>4K bytes) - * we have to do (at least) 2 reads, etc. - */ - do - { - if (vec_len (rp->unprocessed_input)) - { - vec_append (rp->unprocessed_input, socket_main.input_buffer); - msg_buffer = rp->unprocessed_input; - msg_len = rp->unprocessed_msg_length; - } - else - { - msg_buffer = socket_main.input_buffer; - msg_len = 0; - } - - if (msg_len == 0) - { - /* Length may be split across two reads */ - if (vec_len (msg_buffer) < sizeof (u32)) - goto save_and_split; - - /* total length, including msg_len itself, in network byte order */ - msg_len = clib_net_to_host_u32 (*((u32 *) msg_buffer)); - } - - /* Happens if the client sent msg_len == 0 */ - if (msg_len == 0) - { - clib_warning ("msg_len == 0"); - goto turf_it; - } - - /* We don't have the entire message yet. */ - if (msg_len > vec_len (msg_buffer)) - { - save_and_split: - /* - * if we were using the shared input buffer, - * save the fragment. - */ - if (msg_buffer == socket_main.input_buffer) - { - ASSERT (vec_len (rp->unprocessed_input) == 0); - vec_validate (rp->unprocessed_input, vec_len (msg_buffer) - 1); - clib_memcpy (rp->unprocessed_input, msg_buffer, - vec_len (msg_buffer)); - _vec_len (rp->unprocessed_input) = vec_len (msg_buffer); - } - _vec_len (socket_main.input_buffer) = save_input_buffer_length; - rp->unprocessed_msg_length = msg_len; - return 0; - } - - socket_process_msg (uf, rp, msg_buffer); - if (n > msg_len) - vec_delete (msg_buffer, msg_len, 0); - else - _vec_len (msg_buffer) = 0; - n -= msg_len; - msg_len = 0; - rp->unprocessed_msg_length = 0; - } - while (n > 0); - -turf_it: - _vec_len (socket_main.input_buffer) = save_input_buffer_length; - - return 0; -} - -void -vl_socket_add_pending_output (clib_file_t * uf, - vl_api_registration_t * rp, - u8 * buffer, uword buffer_bytes) -{ - clib_file_main_t *fm = &file_main; - - vec_add (rp->output_vector, buffer, buffer_bytes); - if (vec_len (rp->output_vector) > 0) - { - int skip_update = 0 != (uf->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE); - uf->flags |= UNIX_FILE_DATA_AVAILABLE_TO_WRITE; - if (!skip_update) - fm->file_update (uf, UNIX_FILE_UPDATE_MODIFY); - } -} - -static void -socket_del_pending_output (clib_file_t * uf, - vl_api_registration_t * rp, uword n_bytes) -{ - clib_file_main_t *fm = &file_main; - - vec_delete (rp->output_vector, n_bytes, 0); - if (vec_len (rp->output_vector) <= 0) - { - int skip_update = 0 == (uf->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE); - uf->flags &= ~UNIX_FILE_DATA_AVAILABLE_TO_WRITE; - if (!skip_update) - fm->file_update (uf, UNIX_FILE_UPDATE_MODIFY); - } -} - -clib_error_t * -vl_socket_write_ready (clib_file_t * uf) -{ - clib_file_main_t *fm = &file_main; - vl_api_registration_t *rp; - int n; - - rp = pool_elt_at_index (socket_main.registration_pool, uf->private_data); - - /* Flush output vector. */ - n = write (uf->file_descriptor, - rp->output_vector, vec_len (rp->output_vector)); - - if (n < 0) - { -#if DEBUG > 2 - clib_warning ("write error, close the file...\n"); -#endif - clib_file_del (fm, uf); - - vl_free_socket_registration_index (rp - socket_main.registration_pool); - return 0; - } - - else if (n > 0) - socket_del_pending_output (uf, rp, n); - - return 0; -} - -clib_error_t * -vl_socket_error_ready (clib_file_t * uf) -{ - vl_api_registration_t *rp; - clib_file_main_t *fm = &file_main; - - rp = pool_elt_at_index (socket_main.registration_pool, uf->private_data); - clib_file_del (fm, uf); - vl_free_socket_registration_index (rp - socket_main.registration_pool); - - return 0; -} - -void -socksvr_file_add (clib_file_main_t * fm, int fd) -{ - vl_api_registration_t *rp; - clib_file_t template = { 0 }; - - pool_get (socket_main.registration_pool, rp); - memset (rp, 0, sizeof (*rp)); - - template.read_function = vl_socket_read_ready; - template.write_function = vl_socket_write_ready; - template.error_function = vl_socket_error_ready; - template.file_descriptor = fd; - template.private_data = rp - socket_main.registration_pool; - - rp->registration_type = REGISTRATION_TYPE_SOCKET_SERVER; - rp->vl_api_registration_pool_index = rp - socket_main.registration_pool; - rp->clib_file_index = clib_file_add (fm, &template); -} - -static clib_error_t * -socksvr_accept_ready (clib_file_t * uf) -{ - clib_file_main_t *fm = &file_main; - struct sockaddr_in client_addr; - int client_fd; - int client_len; - - client_len = sizeof (client_addr); - - /* - * Supposedly acquires the non-blocking attrib from the - * server socket. - */ - client_fd = accept (uf->file_descriptor, - (struct sockaddr *) &client_addr, - (socklen_t *) & client_len); - - if (client_fd < 0) - return clib_error_return_unix (0, "socksvr_accept_ready: accept"); - - socksvr_file_add (fm, client_fd); - return 0; -} - -static clib_error_t * -socksvr_bogus_write (clib_file_t * uf) -{ - clib_warning ("why am I here?"); - return 0; -} - -/* - * vl_api_sockclnt_create_t_handler - */ -void -vl_api_sockclnt_create_t_handler (vl_api_sockclnt_create_t * mp) -{ - vl_api_registration_t *regp; - vl_api_sockclnt_create_reply_t *rp; - int rv = 1; - - regp = socket_main.current_rp; - - ASSERT (regp->registration_type == REGISTRATION_TYPE_SOCKET_SERVER); - - regp->name = format (0, "%s%c", mp->name, 0); - - rp = vl_msg_api_alloc (sizeof (*rp)); - rp->_vl_msg_id = htons (VL_API_SOCKCLNT_CREATE_REPLY); - rp->handle = (uword) regp; - rp->index = (uword) regp->vl_api_registration_pool_index; - rp->context = mp->context; - rp->response = htonl (rv); - - vl_msg_api_send (regp, (u8 *) rp); -} - -/* - * vl_api_sockclnt_delete_t_handler - */ -void -vl_api_sockclnt_delete_t_handler (vl_api_sockclnt_delete_t * mp) -{ - vl_api_registration_t *regp; - vl_api_sockclnt_delete_reply_t *rp; - - if (!pool_is_free_index (socket_main.registration_pool, mp->index)) - { - regp = pool_elt_at_index (socket_main.registration_pool, mp->index); - - rp = vl_msg_api_alloc (sizeof (*rp)); - rp->_vl_msg_id = htons (VL_API_SOCKCLNT_DELETE_REPLY); - rp->handle = mp->handle; - rp->response = htonl (1); - - vl_msg_api_send (regp, (u8 *) rp); - - clib_file_del (&file_main, file_main.file_pool + regp->clib_file_index); - - vl_free_socket_registration_index (mp->index); - } - else - { - clib_warning ("unknown client ID %d", mp->index); - } -} - -#define foreach_vlib_api_msg \ -_(SOCKCLNT_CREATE, sockclnt_create) \ -_(SOCKCLNT_DELETE, sockclnt_delete) - -static clib_error_t * -socksvr_api_init (vlib_main_t * vm) -{ - clib_file_main_t *fm = &file_main; - clib_file_t template = { 0 }; - int sockfd; - int one = 1; - int rv; - struct sockaddr_in serv_addr; - vl_api_registration_t *rp; - u16 portno; - u32 bind_address; - -#define _(N,n) \ - vl_msg_api_set_handlers(VL_API_##N, #n, \ - vl_api_##n##_t_handler, \ - vl_noop_handler, \ - vl_api_##n##_t_endian, \ - vl_api_##n##_t_print, \ - sizeof(vl_api_##n##_t), 1); - foreach_vlib_api_msg; -#undef _ - - vec_resize (socket_main.input_buffer, 4096); - - /* Set up non-blocking server socket on CLIENT_API_SERVER_PORT */ - sockfd = socket (AF_INET, SOCK_STREAM, 0); - - if (sockfd < 0) - { - return clib_error_return_unix (0, "socket"); - } - - rv = ioctl (sockfd, FIONBIO, &one); - if (rv < 0) - { - close (sockfd); - return clib_error_return_unix (0, "FIONBIO"); - } - - rv = setsockopt (sockfd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof (one)); - if (rv < 0) - { - close (sockfd); - return clib_error_return_unix (0, "SO_REUSEADDR"); - } - - bzero ((char *) &serv_addr, sizeof (serv_addr)); - serv_addr.sin_family = AF_INET; - - if (socket_main.bind_address) - bind_address = socket_main.bind_address; - else - bind_address = INADDR_LOOPBACK; - - if (socket_main.portno) - portno = socket_main.portno; - else - portno = SOCKSVR_DEFAULT_PORT; - - serv_addr.sin_port = clib_host_to_net_u16 (portno); - serv_addr.sin_addr.s_addr = clib_host_to_net_u32 (bind_address); - - if (bind (sockfd, (struct sockaddr *) &serv_addr, sizeof (serv_addr)) < 0) - { - close (sockfd); - return clib_error_return_unix (0, "bind"); - } - - rv = listen (sockfd, 5); - if (rv < 0) - { - close (sockfd); - return clib_error_return_unix (0, "listen"); - } - - pool_get (socket_main.registration_pool, rp); - memset (rp, 0, sizeof (*rp)); - - rp->registration_type = REGISTRATION_TYPE_SOCKET_LISTEN; - - template.read_function = socksvr_accept_ready; - template.write_function = socksvr_bogus_write; - template.file_descriptor = sockfd; - template.private_data = rp - socket_main.registration_pool; - - rp->clib_file_index = clib_file_add (fm, &template); - return 0; -} - -static clib_error_t * -socket_exit (vlib_main_t * vm) -{ - clib_file_main_t *fm = &file_main; - vl_api_registration_t *rp; - - /* Defensive driving in case something wipes out early */ - if (socket_main.registration_pool) - { - u32 index; - /* *INDENT-OFF* */ - pool_foreach (rp, socket_main.registration_pool, ({ - clib_file_del (fm, fm->file_pool + rp->clib_file_index); - index = rp->vl_api_registration_pool_index; - vl_free_socket_registration_index (index); - })); -/* *INDENT-ON* */ - } - - return 0; -} - -VLIB_MAIN_LOOP_EXIT_FUNCTION (socket_exit); - -static clib_error_t * -socksvr_config (vlib_main_t * vm, unformat_input_t * input) -{ - int portno; - - while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (input, "port %d", &portno)) - { - socket_main.portno = portno; - } - else - { - return clib_error_return (0, "unknown input '%U'", - format_unformat_error, input); - } - } - return socksvr_api_init (vm); -} - -VLIB_CONFIG_FUNCTION (socksvr_config, "socksvr"); - -/* argument in host byte order */ -void -socksvr_set_port (u16 port) -{ - socket_main.portno = port; -} - -/* argument in host byte order */ -void -socksvr_set_bind_address (u32 bind_address) -{ - socket_main.bind_address = bind_address; -} - -clib_error_t * -vlibsocket_init (vlib_main_t * vm) -{ - return 0; -} - -VLIB_INIT_FUNCTION (vlibsocket_init); - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vlibsocket/vl_socket_api_h.h b/src/vlibsocket/vl_socket_api_h.h deleted file mode 100644 index 7fc53cefc03..00000000000 --- a/src/vlibsocket/vl_socket_api_h.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - *------------------------------------------------------------------ - * vl_socket_api_h.h - all API headers, in a specific order. - * - * Copyright (c) 2009 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *------------------------------------------------------------------ - */ - -/* - * Add to the bottom of the #include list, or elves will steal your - * keyboard in the middle of the night! - */ -#include -#include - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vlibsocket/vl_socket_msg_enum.h b/src/vlibsocket/vl_socket_msg_enum.h deleted file mode 100644 index cdc61a5bcf5..00000000000 --- a/src/vlibsocket/vl_socket_msg_enum.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - *------------------------------------------------------------------ - * vl_msg_enum.h - Our view of how to number API messages - * Clients have their own view, which has to agree with ours. - * - * Copyright (c) 2009 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *------------------------------------------------------------------ - */ - -#ifndef __VL_MSG_ENUM_H__ -#define __VL_MSG_ENUM_H__ - -#include - -#define vl_msg_id(n,h) n, -typedef enum -{ - VL_ILLEGAL_MESSAGE_ID = 0, -#include -} vl_msg_id_t; -#undef vl_msg_id - -#endif /* __VL_MSG_ENUM_H__ */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/ethernet/arp.c b/src/vnet/ethernet/arp.c index 120a276cc0f..52b13e0401b 100644 --- a/src/vnet/ethernet/arp.c +++ b/src/vnet/ethernet/arp.c @@ -615,7 +615,7 @@ vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm, */ if (0 == memcmp (e->ethernet_address, a->ethernet, sizeof (e->ethernet_address))) - return -1; + goto check_customers; /* Update time stamp and ethernet address. */ clib_memcpy (e->ethernet_address, a->ethernet, @@ -630,6 +630,7 @@ vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm, adj_nbr_walk_nh4 (sw_if_index, &e->ip4_address, arp_mk_complete_walk, e); +check_customers: /* Customer(s) waiting for this address to be resolved? */ p = hash_get (am->pending_resolutions_by_address, a->ip4.as_u32); if (p) diff --git a/src/vnet/fib/fib_table.c b/src/vnet/fib/fib_table.c index ba1e27209c0..627e5cba9e1 100644 --- a/src/vnet/fib/fib_table.c +++ b/src/vnet/fib/fib_table.c @@ -846,7 +846,8 @@ fib_table_entry_delete (u32 fib_index, * removing an etry that does not exist. * i'll allow it, but i won't like it. */ - clib_warning("%U not in FIB", format_fib_prefix, prefix); + if (0) + clib_warning("%U not in FIB", format_fib_prefix, prefix); } else { diff --git a/src/vnet/interface_api.c b/src/vnet/interface_api.c index 93551e476eb..45b472e8701 100644 --- a/src/vnet/interface_api.c +++ b/src/vnet/interface_api.c @@ -138,7 +138,7 @@ vl_api_sw_interface_set_mtu_t_handler (vl_api_sw_interface_set_mtu_t * mp) static void send_sw_interface_details (vpe_api_main_t * am, - unix_shared_memory_queue_t * q, + vl_api_registration_t * rp, vnet_sw_interface_t * swif, u8 * interface_name, u32 context) { @@ -233,7 +233,7 @@ send_sw_interface_details (vpe_api_main_t * am, if (tag) strncpy ((char *) mp->tag, (char *) tag, ARRAY_LEN (mp->tag) - 1); - vl_msg_api_send_shmem (q, (u8 *) & mp); + vl_msg_api_send (rp, (u8 *) mp); } static void @@ -242,11 +242,15 @@ vl_api_sw_interface_dump_t_handler (vl_api_sw_interface_dump_t * mp) vpe_api_main_t *am = &vpe_api_main; vnet_sw_interface_t *swif; vnet_interface_main_t *im = &am->vnet_main->interface_main; + vl_api_registration_t *rp; - unix_shared_memory_queue_t *q = - vl_api_client_index_to_input_queue (mp->client_index); - if (q == 0) - return; + rp = vl_api_client_index_to_registration (mp->client_index); + + if (rp == 0) + { + clib_warning ("Client %d AWOL", mp->client_index); + return; + } u8 *filter = 0, *name = 0; if (mp->name_filter_valid) @@ -268,7 +272,7 @@ vl_api_sw_interface_dump_t_handler (vl_api_sw_interface_dump_t * mp) if (filter && !strcasestr((char *) name, (char *) filter)) continue; - send_sw_interface_details (am, q, swif, name, mp->context); + send_sw_interface_details (am, rp, swif, name, mp->context); })); /* *INDENT-ON* */ diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c index 0a34497da7a..58cfd1816d8 100755 --- a/src/vnet/ip/ip4_forward.c +++ b/src/vnet/ip/ip4_forward.c @@ -2257,14 +2257,6 @@ ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index) sw_if_index); } - ip46_address_t nh = { - .ip4 = *dst, - }; - - ai = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4, - VNET_LINK_IP4, &nh, sw_if_index); - adj = adj_get (ai); - h = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi); @@ -2288,6 +2280,22 @@ ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index) vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index; + ip46_address_t nh = { + .ip4 = *dst, + }; + + ai = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4, + VNET_LINK_IP4, &nh, sw_if_index); + adj = adj_get (ai); + + /* Peer has been previously resolved, retrieve glean adj instead */ + if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE) + { + adj_unlock (ai); + ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP4, sw_if_index, &nh); + adj = adj_get (ai); + } + /* Add encapsulation string for software interface (e.g. ethernet header). */ vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t)); vlib_buffer_advance (b, -adj->rewrite_header.data_bytes); diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c index bb4893ad19f..54582d38364 100644 --- a/src/vnet/ip/ip6_forward.c +++ b/src/vnet/ip/ip6_forward.c @@ -2041,6 +2041,14 @@ ip6_probe_neighbor (vlib_main_t * vm, ip6_address_t * dst, u32 sw_if_index) VNET_LINK_IP6, &nh, sw_if_index); adj = adj_get (ai); + /* Peer has been previously resolved, retrieve glean adj instead */ + if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE) + { + adj_unlock (ai); + ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP6, sw_if_index, &nh); + adj = adj_get (ai); + } + vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t)); vlib_buffer_advance (b, -adj->rewrite_header.data_bytes); diff --git a/src/vnet/ip/ip6_neighbor.c b/src/vnet/ip/ip6_neighbor.c index 1908a679e2e..d549ac37625 100644 --- a/src/vnet/ip/ip6_neighbor.c +++ b/src/vnet/ip/ip6_neighbor.c @@ -723,7 +723,7 @@ vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm, */ if (0 == memcmp (n->link_layer_address, link_layer_address, n_bytes_link_layer_address)) - return -1; + goto check_customers; clib_memcpy (n->link_layer_address, link_layer_address, n_bytes_link_layer_address); @@ -739,6 +739,7 @@ vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm, adj_nbr_walk_nh6 (sw_if_index, &n->key.ip6_address, ip6_nd_mk_complete_walk, n); +check_customers: /* Customer(s) waiting for this address to be resolved? */ p = mhash_get (&nm->pending_resolutions_by_address, a); if (p) diff --git a/src/vnet/l2/l2_fib.c b/src/vnet/l2/l2_fib.c index 64b3275b0b6..0ad56f38fe0 100644 --- a/src/vnet/l2/l2_fib.c +++ b/src/vnet/l2/l2_fib.c @@ -1036,9 +1036,10 @@ l2fib_scan (vlib_main_t * vm, f64 start_time, u8 event_only) } else { - clib_warning ("MAC event to pid %d queue stuffed!" - " %d MAC entries lost", client, - evt_idx); + if (q) + clib_warning ("MAC event to pid %d queue stuffed!" + " %d MAC entries lost", client, + evt_idx); } evt_idx = 0; } @@ -1119,8 +1120,9 @@ l2fib_scan (vlib_main_t * vm, f64 start_time, u8 event_only) } else { - clib_warning ("MAC event to pid %d queue stuffed!" - " %d MAC entries lost", client, evt_idx); + if (q) + clib_warning ("MAC event to pid %d queue stuffed!" + " %d MAC entries lost", client, evt_idx); vl_msg_api_free (mp); } } diff --git a/src/vnet/lisp-cp/control.c b/src/vnet/lisp-cp/control.c index 0d6d453de00..d09be405011 100644 --- a/src/vnet/lisp-cp/control.c +++ b/src/vnet/lisp-cp/control.c @@ -1380,6 +1380,7 @@ vnet_lisp_del_mapping (gid_address_t * eid, u32 * res_map_index) mapping_t *old_map; u32 mi; + memset (ls_args, 0, sizeof (ls_args[0])); memset (m_args, 0, sizeof (m_args[0])); if (res_map_index) res_map_index[0] = ~0; @@ -1784,6 +1785,7 @@ get_locator_set_index (vnet_lisp_add_del_locator_set_args_t * a, uword * p) /* find locator-set */ if (a->local) { + ASSERT (a->name); p = hash_get_mem (lcm->locator_set_index_by_name, a->name); } else diff --git a/src/vnet/lisp-cp/lisp_api.c b/src/vnet/lisp-cp/lisp_api.c index f7c41971061..3053611c611 100644 --- a/src/vnet/lisp-cp/lisp_api.c +++ b/src/vnet/lisp-cp/lisp_api.c @@ -128,6 +128,7 @@ vl_api_lisp_add_del_locator_set_t_handler (vl_api_lisp_add_del_locator_set_t * memset (a, 0, sizeof (a[0])); + mp->locator_set_name[63] = 0; locator_name = format (0, "%s", mp->locator_set_name); a->name = locator_name; @@ -182,6 +183,7 @@ vl_api_lisp_add_del_locator_t_handler (vl_api_lisp_add_del_locator_t * mp) locator.local = 1; vec_add1 (locators, locator); + mp->locator_set_name[63] = 0; locator_name = format (0, "%s", mp->locator_set_name); a->name = locator_name; @@ -248,6 +250,7 @@ vl_api_lisp_add_del_local_eid_t_handler (vl_api_lisp_add_del_local_eid_t * mp) if (rv) goto out; + mp->locator_set_name[63] = 0; name = format (0, "%s", mp->locator_set_name); p = hash_get_mem (lcm->locator_set_index_by_name, name); if (!p) @@ -476,6 +479,7 @@ static void u8 *locator_set_name = NULL; vnet_lisp_add_del_mreq_itr_rloc_args_t _a, *a = &_a; + mp->locator_set_name[63] = 0; locator_set_name = format (0, "%s", mp->locator_set_name); a->is_add = mp->is_add; @@ -510,6 +514,7 @@ static void if (!mp->is_add) { vnet_lisp_add_del_adjacency_args_t _a, *a = &_a; + memset (a, 0, sizeof (*a)); gid_address_copy (&a->reid, eid); a->is_add = 0; rv = vnet_lisp_add_del_adjacency (a); @@ -533,7 +538,9 @@ static void rv = vnet_lisp_add_mapping (m_args, rlocs, NULL, NULL); } else - rv = vnet_lisp_del_mapping (eid, NULL); + { + rv = vnet_lisp_del_mapping (eid, NULL); + } if (mp->del_all) vnet_lisp_clear_all_remote_adjacencies (); diff --git a/src/vnet/tcp/builtin_client.c b/src/vnet/tcp/builtin_client.c index 527b3289924..b48422c0b30 100644 --- a/src/vnet/tcp/builtin_client.c +++ b/src/vnet/tcp/builtin_client.c @@ -21,7 +21,6 @@ #include #include -#include #include #define TCP_BUILTIN_CLIENT_DBG (0) diff --git a/src/vpp-api/client/client.c b/src/vpp-api/client/client.c index 8bdcda01c4d..668461aad6c 100644 --- a/src/vpp-api/client/client.c +++ b/src/vpp-api/client/client.c @@ -133,8 +133,11 @@ static void * vac_rx_thread_fn (void *arg) { unix_shared_memory_queue_t *q; + vl_api_memclnt_keepalive_t *mp; + vl_api_memclnt_keepalive_reply_t *rmp; vac_main_t *pm = &vac_main; api_main_t *am = &api_main; + vl_shmem_hdr_t *shmem_hdr; uword msg; q = am->vl_input_queue; @@ -169,6 +172,17 @@ vac_rx_thread_fn (void *arg) vl_msg_api_free((void *) msg); break; + case VL_API_MEMCLNT_KEEPALIVE: + mp = (void *)msg; + rmp = vl_msg_api_alloc (sizeof (*rmp)); + memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = ntohs(VL_API_MEMCLNT_KEEPALIVE_REPLY); + rmp->context = mp->context; + shmem_hdr = am->shmem_hdr; + vl_msg_api_send_shmem(shmem_hdr->vl_input_queue, (u8 *)&rmp); + vl_msg_api_free((void *) msg); + break; + default: vac_api_handler((void *)msg); } @@ -370,8 +384,12 @@ vac_read (char **p, int *l, u16 timeout) unix_shared_memory_queue_t *q; api_main_t *am = &api_main; vac_main_t *pm = &vac_main; + vl_api_memclnt_keepalive_t *mp; + vl_api_memclnt_keepalive_reply_t *rmp; uword msg; msgbuf_t *msgbuf; + int rv; + vl_shmem_hdr_t *shmem_hdr; if (!pm->connected_to_vlib) return -1; @@ -384,7 +402,9 @@ vac_read (char **p, int *l, u16 timeout) set_timeout(timeout); q = am->vl_input_queue; - int rv = unix_shared_memory_queue_sub(q, (u8 *)&msg, 0); + + again: + rv = unix_shared_memory_queue_sub(q, (u8 *)&msg, 0); if (rv == 0) { u16 msg_id = ntohs(*((u16 *)msg)); switch (msg_id) { @@ -397,6 +417,21 @@ vac_read (char **p, int *l, u16 timeout) case VL_API_MEMCLNT_READ_TIMEOUT: printf("Received read timeout %ds\n", timeout); goto error; + case VL_API_MEMCLNT_KEEPALIVE: + /* Handle an alive-check ping from vpp. */ + mp = (void *)msg; + rmp = vl_msg_api_alloc (sizeof (*rmp)); + memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = ntohs(VL_API_MEMCLNT_KEEPALIVE_REPLY); + rmp->context = mp->context; + shmem_hdr = am->shmem_hdr; + vl_msg_api_send_shmem(shmem_hdr->vl_input_queue, (u8 *)&rmp); + vl_msg_api_free((void *) msg); + /* + * Python code is blissfully unaware of these pings, so + * act as if it never happened... + */ + goto again; default: msgbuf = (msgbuf_t *)(((u8 *)msg) - offsetof(msgbuf_t, data)); diff --git a/src/vpp-api/vapi/vapi.c b/src/vpp-api/vapi/vapi.c index 3150d2b427e..25c0b3be1d3 100644 --- a/src/vpp-api/vapi/vapi.c +++ b/src/vpp-api/vapi/vapi.c @@ -26,6 +26,8 @@ #include #include #include +#include +#include #include #include diff --git a/src/vpp/api/api.c b/src/vpp/api/api.c index d020314bfb1..5d4a63b9061 100644 --- a/src/vpp/api/api.c +++ b/src/vpp/api/api.c @@ -1031,15 +1031,10 @@ vl_api_cli_inband_t_handler (vl_api_cli_inband_t * mp) { vl_api_cli_inband_reply_t *rmp; int rv = 0; - unix_shared_memory_queue_t *q; vlib_main_t *vm = vlib_get_main (); unformat_input_t input; u8 *out_vec = 0; - q = vl_api_client_index_to_input_queue (mp->client_index); - if (!q) - return; - unformat_init_string (&input, (char *) mp->cmd, ntohl (mp->length)); vlib_cli_input (vm, &input, inband_cli_output, (uword) & out_vec); @@ -1174,12 +1169,6 @@ vl_api_show_version_t_handler (vl_api_show_version_t * mp) char *vpe_api_get_version (void); char *vpe_api_get_build_date (void); - unix_shared_memory_queue_t *q = - vl_api_client_index_to_input_queue (mp->client_index); - - if (!q) - return; - /* *INDENT-OFF* */ REPLY_MACRO2(VL_API_SHOW_VERSION_REPLY, ({ diff --git a/src/vpp/api/api_main.c b/src/vpp/api/api_main.c index c355a5fdff0..129334830d7 100644 --- a/src/vpp/api/api_main.c +++ b/src/vpp/api/api_main.c @@ -49,10 +49,13 @@ api_main_init (vlib_main_t * vm) vam->my_client_index = (u32) ~ 0; /* Ensure that vam->inbuf is never NULL */ vec_validate (vam->inbuf, 0); + vec_validate (vam->cmd_reply, 0); + vec_reset_length (vam->cmd_reply); init_error_string_table (vam); rv = vat_plugin_init (vam); if (rv) clib_warning ("vat_plugin_init returned %d", rv); + return 0; } @@ -68,6 +71,47 @@ vat_plugin_hash_create (void) vam->help_by_name = hash_create_string (0, sizeof (uword)); } +static void +maybe_register_api_client (vat_main_t * vam) +{ + vl_api_registration_t **regpp; + vl_api_registration_t *regp; + svm_region_t *svm; + void *oldheap; + api_main_t *am = &api_main; + + if (vam->my_client_index != ~0) + return; + + pool_get (am->vl_clients, regpp); + + svm = am->vlib_rp; + + pthread_mutex_lock (&svm->mutex); + oldheap = svm_push_data_heap (svm); + *regpp = clib_mem_alloc (sizeof (vl_api_registration_t)); + + regp = *regpp; + memset (regp, 0, sizeof (*regp)); + regp->registration_type = REGISTRATION_TYPE_SHMEM; + regp->vl_api_registration_pool_index = regpp - am->vl_clients; + regp->vlib_rp = svm; + regp->shmem_hdr = am->shmem_hdr; + + /* Loopback connection */ + regp->vl_input_queue = am->shmem_hdr->vl_input_queue; + + regp->name = format (0, "%s", "vpp-internal"); + vec_add1 (regp->name, 0); + + pthread_mutex_unlock (&svm->mutex); + svm_pop_heap (oldheap); + + vam->my_client_index = vl_msg_api_handle_from_index_and_epoch + (regp->vl_api_registration_pool_index, + am->shmem_hdr->application_restarts); +} + static clib_error_t * api_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) @@ -82,6 +126,8 @@ api_command_fn (vlib_main_t * vm, int (*fp) (vat_main_t *); api_main_t *am = &api_main; + maybe_register_api_client (vam); + vam->vl_input_queue = am->shmem_hdr->vl_input_queue; /* vec_validated in the init routine */ diff --git a/src/vpp/api/custom_dump.c b/src/vpp/api/custom_dump.c index 2e1f980e633..8063d680d6e 100644 --- a/src/vpp/api/custom_dump.c +++ b/src/vpp/api/custom_dump.c @@ -1710,6 +1710,16 @@ static void *vl_api_memclnt_create_t_print FINISH; } +static void *vl_api_sockclnt_create_t_print + (vl_api_sockclnt_create_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: sockclnt_create name %s ", mp->name); + + FINISH; +} + static void *vl_api_show_version_t_print (vl_api_show_version_t * mp, void *handle) { @@ -3122,6 +3132,7 @@ _(WANT_INTERFACE_EVENTS, want_interface_events) \ _(CLI, cli) \ _(CLI_INBAND, cli_inband) \ _(MEMCLNT_CREATE, memclnt_create) \ +_(SOCKCLNT_CREATE, sockclnt_create) \ _(SW_INTERFACE_VHOST_USER_DUMP, sw_interface_vhost_user_dump) \ _(SHOW_VERSION, show_version) \ _(L2_FIB_TABLE_DUMP, l2_fib_table_dump) \ diff --git a/src/vppinfra/elog.h b/src/vppinfra/elog.h index 05085b264df..d50c9a6500d 100644 --- a/src/vppinfra/elog.h +++ b/src/vppinfra/elog.h @@ -525,12 +525,12 @@ void elog_alloc (elog_main_t * em, u32 n_events); #ifdef CLIB_UNIX always_inline clib_error_t * -elog_write_file (elog_main_t * em, char *unix_file, int flush_ring) +elog_write_file (elog_main_t * em, char *clib_file, int flush_ring) { serialize_main_t m; clib_error_t *error; - error = serialize_open_unix_file (&m, unix_file); + error = serialize_open_clib_file (&m, clib_file); if (error) return error; error = serialize (&m, serialize_elog_main, em, flush_ring); @@ -540,12 +540,12 @@ elog_write_file (elog_main_t * em, char *unix_file, int flush_ring) } always_inline clib_error_t * -elog_read_file (elog_main_t * em, char *unix_file) +elog_read_file (elog_main_t * em, char *clib_file) { serialize_main_t m; clib_error_t *error; - error = unserialize_open_unix_file (&m, unix_file); + error = unserialize_open_clib_file (&m, clib_file); if (error) return error; error = unserialize (&m, unserialize_elog_main, em); diff --git a/src/vppinfra/file.h b/src/vppinfra/file.h index 69facea9993..6ebf5122567 100644 --- a/src/vppinfra/file.h +++ b/src/vppinfra/file.h @@ -68,7 +68,7 @@ typedef enum UNIX_FILE_UPDATE_ADD, UNIX_FILE_UPDATE_MODIFY, UNIX_FILE_UPDATE_DELETE, -} unix_file_update_type_t; +} clib_file_update_type_t; typedef struct { @@ -76,7 +76,7 @@ typedef struct clib_file_t *file_pool; void (*file_update) (clib_file_t * file, - unix_file_update_type_t update_type); + clib_file_update_type_t update_type); } clib_file_main_t; diff --git a/src/vppinfra/format.h b/src/vppinfra/format.h index 5b7023a3023..3c4aec99c2b 100644 --- a/src/vppinfra/format.h +++ b/src/vppinfra/format.h @@ -305,7 +305,7 @@ u8 *format_hexdump (u8 * s, va_list * va); /* Unix specific formats. */ #ifdef CLIB_UNIX /* Setup input from Unix file. */ -void unformat_init_unix_file (unformat_input_t * input, int file_descriptor); +void unformat_init_clib_file (unformat_input_t * input, int file_descriptor); /* Take input from Unix environment variable; returns 1 if variable exists zero otherwise. */ diff --git a/src/vppinfra/serialize.c b/src/vppinfra/serialize.c index 5d401a080c1..fe2146a1e64 100644 --- a/src/vppinfra/serialize.c +++ b/src/vppinfra/serialize.c @@ -1147,7 +1147,7 @@ unserialize_multiple_4 (serialize_main_t * m, #include static void -unix_file_write (serialize_main_header_t * m, serialize_stream_t * s) +clib_file_write (serialize_main_header_t * m, serialize_stream_t * s) { int fd, n; @@ -1168,7 +1168,7 @@ unix_file_write (serialize_main_header_t * m, serialize_stream_t * s) } static void -unix_file_read (serialize_main_header_t * m, serialize_stream_t * s) +clib_file_read (serialize_main_header_t * m, serialize_stream_t * s) { int fd, n; @@ -1188,7 +1188,7 @@ unix_file_read (serialize_main_header_t * m, serialize_stream_t * s) } static void -serialize_open_unix_file_descriptor_helper (serialize_main_t * m, int fd, +serialize_open_clib_file_descriptor_helper (serialize_main_t * m, int fd, uword is_read) { memset (m, 0, sizeof (m[0])); @@ -1200,24 +1200,24 @@ serialize_open_unix_file_descriptor_helper (serialize_main_t * m, int fd, _vec_len (m->stream.buffer) = 0; } - m->header.data_function = is_read ? unix_file_read : unix_file_write; + m->header.data_function = is_read ? clib_file_read : clib_file_write; m->stream.data_function_opaque = fd; } void -serialize_open_unix_file_descriptor (serialize_main_t * m, int fd) +serialize_open_clib_file_descriptor (serialize_main_t * m, int fd) { - serialize_open_unix_file_descriptor_helper (m, fd, /* is_read */ 0); + serialize_open_clib_file_descriptor_helper (m, fd, /* is_read */ 0); } void -unserialize_open_unix_file_descriptor (serialize_main_t * m, int fd) +unserialize_open_clib_file_descriptor (serialize_main_t * m, int fd) { - serialize_open_unix_file_descriptor_helper (m, fd, /* is_read */ 1); + serialize_open_clib_file_descriptor_helper (m, fd, /* is_read */ 1); } static clib_error_t * -serialize_open_unix_file_helper (serialize_main_t * m, char *file, +serialize_open_clib_file_helper (serialize_main_t * m, char *file, uword is_read) { int fd, mode; @@ -1227,20 +1227,20 @@ serialize_open_unix_file_helper (serialize_main_t * m, char *file, if (fd < 0) return clib_error_return_unix (0, "open `%s'", file); - serialize_open_unix_file_descriptor_helper (m, fd, is_read); + serialize_open_clib_file_descriptor_helper (m, fd, is_read); return 0; } clib_error_t * -serialize_open_unix_file (serialize_main_t * m, char *file) +serialize_open_clib_file (serialize_main_t * m, char *file) { - return serialize_open_unix_file_helper (m, file, /* is_read */ 0); + return serialize_open_clib_file_helper (m, file, /* is_read */ 0); } clib_error_t * -unserialize_open_unix_file (serialize_main_t * m, char *file) +unserialize_open_clib_file (serialize_main_t * m, char *file) { - return serialize_open_unix_file_helper (m, file, /* is_read */ 1); + return serialize_open_clib_file_helper (m, file, /* is_read */ 1); } #endif /* CLIB_UNIX */ diff --git a/src/vppinfra/serialize.h b/src/vppinfra/serialize.h index 6cc2372e6ab..90d615f60a4 100644 --- a/src/vppinfra/serialize.h +++ b/src/vppinfra/serialize.h @@ -416,11 +416,11 @@ void *serialize_close_vector (serialize_main_t * m); void unserialize_open_vector (serialize_main_t * m, u8 * vector); #ifdef CLIB_UNIX -clib_error_t *serialize_open_unix_file (serialize_main_t * m, char *file); -clib_error_t *unserialize_open_unix_file (serialize_main_t * m, char *file); +clib_error_t *serialize_open_clib_file (serialize_main_t * m, char *file); +clib_error_t *unserialize_open_clib_file (serialize_main_t * m, char *file); -void serialize_open_unix_file_descriptor (serialize_main_t * m, int fd); -void unserialize_open_unix_file_descriptor (serialize_main_t * m, int fd); +void serialize_open_clib_file_descriptor (serialize_main_t * m, int fd); +void unserialize_open_clib_file_descriptor (serialize_main_t * m, int fd); #endif /* CLIB_UNIX */ /* Main routines. */ diff --git a/src/vppinfra/test_serialize.c b/src/vppinfra/test_serialize.c index e00eec3268f..6351b050c6c 100644 --- a/src/vppinfra/test_serialize.c +++ b/src/vppinfra/test_serialize.c @@ -184,7 +184,7 @@ test_serialize_main (unformat_input_t * input) #ifdef CLIB_UNIX if (tm->dump_file) - serialize_open_unix_file (sm, tm->dump_file); + serialize_open_clib_file (sm, tm->dump_file); else #endif serialize_open_vector (sm, 0); @@ -217,7 +217,7 @@ test_serialize_main (unformat_input_t * input) #ifdef CLIB_UNIX if (tm->dump_file) { - if ((error = unserialize_open_unix_file (um, tm->dump_file))) + if ((error = unserialize_open_clib_file (um, tm->dump_file))) goto done; } else diff --git a/src/vppinfra/time.c b/src/vppinfra/time.c index 168d7375915..770ed8b6807 100644 --- a/src/vppinfra/time.c +++ b/src/vppinfra/time.c @@ -91,7 +91,7 @@ clock_frequency_from_proc_filesystem (void) if (fd < 0) return cpu_freq; - unformat_init_unix_file (&input, fd); + unformat_init_clib_file (&input, fd); ppc_timebase = 0; while (unformat_check_input (&input) != UNFORMAT_END_OF_INPUT) @@ -130,7 +130,7 @@ clock_frequency_from_sys_filesystem (void) if (fd < 0) goto done; - unformat_init_unix_file (&input, fd); + unformat_init_clib_file (&input, fd); unformat (&input, "%f", &cpu_freq); cpu_freq *= 1e3; /* measured in kHz */ unformat_free (&input); diff --git a/src/vppinfra/unformat.c b/src/vppinfra/unformat.c index 5b17562f359..e30c6d34c22 100644 --- a/src/vppinfra/unformat.c +++ b/src/vppinfra/unformat.c @@ -1037,7 +1037,7 @@ unformat_init_vector (unformat_input_t * input, u8 * vector_string) #ifdef CLIB_UNIX static uword -unix_file_fill_buffer (unformat_input_t * input) +clib_file_fill_buffer (unformat_input_t * input) { int fd = pointer_to_uword (input->fill_buffer_arg); uword l, n; @@ -1055,9 +1055,9 @@ unix_file_fill_buffer (unformat_input_t * input) } void -unformat_init_unix_file (unformat_input_t * input, int file_descriptor) +unformat_init_clib_file (unformat_input_t * input, int file_descriptor) { - unformat_init (input, unix_file_fill_buffer, + unformat_init (input, clib_file_fill_buffer, uword_to_pointer (file_descriptor, void *)); } diff --git a/src/vppinfra/unix-misc.c b/src/vppinfra/unix-misc.c index 361015b4f67..f693102c65a 100644 --- a/src/vppinfra/unix-misc.c +++ b/src/vppinfra/unix-misc.c @@ -48,7 +48,7 @@ __thread uword __os_thread_index = 0; clib_error_t * -unix_file_n_bytes (char *file, uword * result) +clib_file_n_bytes (char *file, uword * result) { struct stat s; @@ -64,7 +64,7 @@ unix_file_n_bytes (char *file, uword * result) } clib_error_t * -unix_file_read_contents (char *file, u8 * result, uword n_bytes) +clib_file_read_contents (char *file, u8 * result, uword n_bytes) { int fd = -1; uword n_done, n_left; @@ -108,19 +108,19 @@ done: } clib_error_t * -unix_file_contents (char *file, u8 ** result) +clib_file_contents (char *file, u8 ** result) { uword n_bytes; clib_error_t *error = 0; u8 *v; - if ((error = unix_file_n_bytes (file, &n_bytes))) + if ((error = clib_file_n_bytes (file, &n_bytes))) return error; v = 0; vec_resize (v, n_bytes); - error = unix_file_read_contents (file, v, n_bytes); + error = clib_file_read_contents (file, v, n_bytes); if (error) vec_free (v); diff --git a/src/vppinfra/unix.h b/src/vppinfra/unix.h index 29114cfece6..5b82c23a3c0 100644 --- a/src/vppinfra/unix.h +++ b/src/vppinfra/unix.h @@ -41,14 +41,14 @@ #include /* Number of bytes in a Unix file. */ -clib_error_t *unix_file_n_bytes (char *file, uword * result); +clib_error_t *clib_file_n_bytes (char *file, uword * result); /* Read file contents into given buffer. */ -clib_error_t *unix_file_read_contents (char *file, u8 * result, +clib_error_t *clib_file_read_contents (char *file, u8 * result, uword n_bytes); /* Read and return contents of Unix file. */ -clib_error_t *unix_file_contents (char *file, u8 ** result); +clib_error_t *clib_file_contents (char *file, u8 ** result); /* As above but for /proc file system on Linux. */ clib_error_t *unix_proc_file_contents (char *file, u8 ** result); -- cgit 1.2.3-korg