From 905c14af2b1464840cea201daed005cb30513683 Mon Sep 17 00:00:00 2001 From: Dave Barach Date: Mon, 25 Sep 2017 08:47:59 -0400 Subject: Add binary API documentation Change-Id: Id1a5da12b13d87bacfa81094f471b95db40c39be Signed-off-by: Dave Barach --- doxygen/user_doc.md | 1 + src/vlibapi/api_common.h | 192 +++++++++++++++--------- src/vlibapi/api_doc.md | 349 +++++++++++++++++++++++++++++++++++++++++++ src/vlibmemory/memory_vlib.c | 4 +- 4 files changed, 475 insertions(+), 71 deletions(-) create mode 100644 src/vlibapi/api_doc.md diff --git a/doxygen/user_doc.md b/doxygen/user_doc.md index 952a72fc304..65b09f98c9e 100644 --- a/doxygen/user_doc.md +++ b/doxygen/user_doc.md @@ -18,3 +18,4 @@ Several modules provide operational, dataplane-user focused documentation. - @subpage srmpls_doc - @subpage sample_plugin_doc - @subpage nat64_doc +- @subpage api_doc diff --git a/src/vlibapi/api_common.h b/src/vlibapi/api_common.h index dc6761bc308..770cf47d188 100644 --- a/src/vlibapi/api_common.h +++ b/src/vlibapi/api_common.h @@ -20,71 +20,74 @@ #ifndef included_api_common_h #define included_api_common_h +/** \file API common definitions + * See api_doc.md for more info + */ + #include #include #include +/** API registration types + */ typedef enum { REGISTRATION_TYPE_FREE = 0, - REGISTRATION_TYPE_SHMEM, - REGISTRATION_TYPE_SOCKET_LISTEN, - REGISTRATION_TYPE_SOCKET_SERVER, - REGISTRATION_TYPE_SOCKET_CLIENT, + REGISTRATION_TYPE_SHMEM, /**< Shared memory connection */ + REGISTRATION_TYPE_SOCKET_LISTEN, /**< Socket listener */ + REGISTRATION_TYPE_SOCKET_SERVER, /**< Socket server */ + REGISTRATION_TYPE_SOCKET_CLIENT, /**< Socket client */ } vl_registration_type_t; +/** An API client registration, only in vpp/vlib */ + typedef struct vl_api_registration_ { - vl_registration_type_t registration_type; + vl_registration_type_t registration_type; /**< type */ - /* Index in VLIB's brain (not shared memory). */ + /** Index in VLIB's brain (not shared memory). */ u32 vl_api_registration_pool_index; - u8 *name; + u8 *name; /**< Client name */ - /* - * The following groups of data could be unioned, but my fingers are - * going to be sore enough. - */ - - /* shared memory only */ + /** shared memory only: pointer to client input queue */ unix_shared_memory_queue_t *vl_input_queue; /* socket server and client */ - u32 clib_file_index; - i8 *unprocessed_input; - u32 unprocessed_msg_length; - u8 *output_vector; + u32 clib_file_index; /**< Socket only: file index */ + i8 *unprocessed_input; /**< Socket only: pending input */ + u32 unprocessed_msg_length; /**< Socket only: unprocssed length */ + u8 *output_vector; /**< Socket only: output vecto */ /* socket client only */ - u32 server_handle; - u32 server_index; - + u32 server_handle; /**< Socket client only: server handle */ + u32 server_index; /**< Socket client only: server index */ } vl_api_registration_t; -/* Trace configuration for a single message */ +/** Trace configuration for a single message */ typedef struct { - int size; - int trace_enable; - int replay_enable; + int size; /**< for sanity checking */ + int trace_enable; /**< trace this message */ + int replay_enable; /**< This message can be replayed */ } trace_cfg_t; -/* - * API recording +/** + * API trace state */ typedef struct { - u8 endian; - u8 enabled; - u8 wrapped; + u8 endian; /**< trace endianness */ + u8 enabled; /**< trace is enabled */ + u8 wrapped; /**< trace has wrapped */ u8 pad; - u32 nitems; - u32 curindex; - u8 **traces; + u32 nitems; /**< Number of trace records */ + u32 curindex; /**< Current index in circular buffer */ + u8 **traces; /**< Trace ring */ } vl_api_trace_t; +/** Trace RX / TX enum */ typedef enum { VL_API_TRACE_TX, @@ -94,35 +97,38 @@ typedef enum #define VL_API_LITTLE_ENDIAN 0x00 #define VL_API_BIG_ENDIAN 0x01 +/** Message range (belonging to a plugin) */ typedef struct { - u8 *name; - u16 first_msg_id; - u16 last_msg_id; + u8 *name; /**< name of the plugin */ + u16 first_msg_id; /**< first assigned message ID */ + u16 last_msg_id; /**< last assigned message ID */ } vl_api_msg_range_t; +/** Message configuration definition */ typedef struct { - int id; - char *name; - u32 crc; - void *handler; - void *cleanup; - void *endian; - void *print; - int size; - int traced; - int replay; - int message_bounce; - int is_mp_safe; + int id; /**< the message ID */ + char *name; /**< the message name */ + u32 crc; /**< message definition CRC */ + void *handler; /**< the message handler */ + void *cleanup; /**< non-default message cleanup handler */ + void *endian; /**< message endian function */ + void *print; /**< message print function */ + int size; /**< message size */ + int traced; /**< is this message to be traced? */ + int replay; /**< is this message to be replayed? */ + int message_bounce; /**< do not free message after processing */ + int is_mp_safe; /**< worker thread barrier required? */ } vl_msg_api_msg_config_t; +/** Message header structure */ typedef struct msgbuf_ { - unix_shared_memory_queue_t *q; - u32 data_len; - u32 gc_mark_timestamp; - u8 data[0]; + unix_shared_memory_queue_t *q; /**< message allocated in this shmem ring */ + u32 data_len; /**< message length not including header */ + u32 gc_mark_timestamp; /**< message garbage collector mark TS */ + u8 data[0]; /**< actual message begins here */ } msgbuf_t; /* api_shared.c prototypes */ @@ -171,101 +177,147 @@ typedef struct _vl_msg_api_init_function_list_elt vl_msg_api_init_function_t *f; } _vl_msg_api_function_list_elt_t; +/** API main structure, used by both vpp and binary API clients */ typedef struct { + /** Message handler vector */ void (**msg_handlers) (void *); + /** Plaform-dependent (aka hardware) message handler vector */ int (**pd_msg_handlers) (void *, int); + + /** non-default message cleanup handler vector */ void (**msg_cleanup_handlers) (void *); + + /** Message endian handler vector */ void (**msg_endian_handlers) (void *); + + /** Message print function vector */ void (**msg_print_handlers) (void *, void *); + + /** Message name vector */ const char **msg_names; + + /** Don't automatically free message buffer vetor */ u8 *message_bounce; + + /** Message is mp safe vector */ u8 *is_mp_safe; + + /** Allocator ring vectors (in shared memory) */ struct ring_alloc_ *arings; + + /** Number of times that the ring allocator failed */ u32 ring_misses; + + /** Number of garbage-collected message buffers */ u32 garbage_collects; + + /** Number of missing clients / failed message sends */ u32 missing_clients; + + /** Received message trace configuration */ vl_api_trace_t *rx_trace; + + /** Sent message trace configuration */ vl_api_trace_t *tx_trace; + + /** Print every received message */ int msg_print_flag; + + /** Current trace configuration */ trace_cfg_t *api_trace_cfg; + + /** Current process PID */ int our_pid; + + /** Binary api segment descriptor */ svm_region_t *vlib_rp; + + /** Vector of all mapped shared-VM segments */ svm_region_t **mapped_shmem_regions; + + /** Binary API shared-memory segment header pointer */ struct vl_shmem_hdr_ *shmem_hdr; + + /** vlib/vpp only: vector of client registrations */ vl_api_registration_t **vl_clients; + /** vlib/vpp only: serialized (message, name, crc) table */ u8 *serialized_message_table_in_shmem; - /* For plugin msg allocator */ + /** First available message ID, for theplugin msg allocator */ u16 first_available_msg_id; - /* message range by name hash */ + /** Message range by name hash */ uword *msg_range_by_name; - /* vector of message ranges */ + /** vector of message ranges */ vl_api_msg_range_t *msg_ranges; - /* uid for the api shared memory region */ + /** uid for the api shared memory region */ int api_uid; - /* gid for the api shared memory region */ + + /** gid for the api shared memory region */ int api_gid; - /* base virtual address for global VM region */ + /** base virtual address for global VM region */ u64 global_baseva; - /* size of the global VM region */ + /** size of the global VM region */ u64 global_size; - /* size of the API region */ + /** size of the API region */ u64 api_size; - /* size of the global VM private mheap */ + /** size of the global VM private mheap */ u64 global_pvt_heap_size; - /* size of the api private mheap */ + /** size of the api private mheap */ u64 api_pvt_heap_size; - /* Client-only data structures */ + /** Peer input queue pointer */ unix_shared_memory_queue_t *vl_input_queue; - /* + /** * All VLIB-side message handlers use my_client_index to identify * the queue / client. This works in sim replay. */ int my_client_index; - /* + /** * This is the (shared VM) address of the registration, * don't use it to id the connection since it can't possibly * work in simulator replay. */ vl_api_registration_t *my_registration; + /** (Historical) signal-based queue non-empty signal, to be removed */ i32 vlib_signal; - /* vlib input queue length */ + /** vpp/vlib input queue length */ u32 vlib_input_queue_length; - /* client side message index hash table */ + /** client message index hash table */ uword *msg_index_by_name_and_crc; + /** Shared VM binary API region name */ const char *region_name; + + /** Chroot path to the shared memory API files */ const char *root_path; - /* Replay in progress? */ + /** Replay in progress? */ int replay_in_progress; - /* Dump (msg-name, crc) snapshot here at startup */ + /** Dump (msg-name, crc) snapshot here at startup */ u8 *save_msg_table_filename; - /* List of API client reaper functions */ + /** List of API client reaper functions */ _vl_msg_api_function_list_elt_t *reaper_function_registrations; } api_main_t; extern api_main_t api_main; - #endif /* included_api_common_h */ /* diff --git a/src/vlibapi/api_doc.md b/src/vlibapi/api_doc.md new file mode 100644 index 00000000000..e620ee12728 --- /dev/null +++ b/src/vlibapi/api_doc.md @@ -0,0 +1,349 @@ +# Binary API support {#api_doc} + +VPP provides a binary API scheme to allow a wide variety of client codes to +program data-plane tables. As of this writing, there are hundreds of binary +APIs. + +Messages are defined in `*.api` files. Today, there are about 50 api files, +with more arriving as folks add programmable features. The API file compiler +sources reside in @ref src/tools/vppapigen . + +Here's a typical request/response message definition, from +@ref src/vnet/interface.api : + +``` + autoreply define sw_interface_set_flags + { + u32 client_index; + u32 context; + u32 sw_if_index; + /* 1 = up, 0 = down */ + u8 admin_up_down; + }; +``` + +To a first approximation, the API compiler renders this definition as +follows: + +``` + /****** Message ID / handler enum ******/ + #ifdef vl_msg_id + vl_msg_id(VL_API_SW_INTERFACE_SET_FLAGS, vl_api_sw_interface_set_flags_t_handler) + vl_msg_id(VL_API_SW_INTERFACE_SET_FLAGS_REPLY, vl_api_sw_interface_set_flags_reply_t_handler) + #endif + + /****** Message names ******/ + #ifdef vl_msg_name + vl_msg_name(vl_api_sw_interface_set_flags_t, 1) + vl_msg_name(vl_api_sw_interface_set_flags_reply_t, 1) + #endif + + /****** Message name, crc list ******/ + #ifdef vl_msg_name_crc_list + #define foreach_vl_msg_name_crc_interface \ + _(VL_API_SW_INTERFACE_SET_FLAGS, sw_interface_set_flags, f890584a) \ + _(VL_API_SW_INTERFACE_SET_FLAGS_REPLY, sw_interface_set_flags_reply, dfbf3afa) \ + #endif + + /****** Typedefs *****/ + #ifdef vl_typedefs + typedef VL_API_PACKED(struct _vl_api_sw_interface_set_flags { + u16 _vl_msg_id; + u32 client_index; + u32 context; + u32 sw_if_index; + u8 admin_up_down; + }) vl_api_sw_interface_set_flags_t; + + typedef VL_API_PACKED(struct _vl_api_sw_interface_set_flags_reply { + u16 _vl_msg_id; + u32 context; + i32 retval; + }) vl_api_sw_interface_set_flags_reply_t; +``` + +To change the admin state of an interface, a binary api client sends a +@ref vl_api_sw_interface_set_flags_t to vpp, which will respond with a +@ref vl_api_sw_interface_set_flags_reply_t message. + +Multiple layers of software, transport types, and shared libraries +implement a variety of features: + +* API message allocation, tracing, pretty-printing, and replay. +* Message transport via global shared memory, pairwise/private shared + memory, and sockets. +* Barrier synchronization of worker threads across thread-unsafe + message handlers. + +Correctly-coded message handlers know nothing about the transport used to +deliver messages to/from vpp. It's reasonably straighforward to use multiple +API message transport types simultaneously. + +For historical reasons, binary api messages are (putatively) sent in network +byte order. As of this writing, we're seriously considering whether that +choice makes sense. + + +## Message Allocation + +Since binary API messages are always processed in order, we allocate messages +using a ring allocator whenever possible. This scheme is extremely fast when +compared with a traditional memory allocator, and doesn't cause heap +fragmentation. See +@ref src/vlibmemory/memory_shared.c @ref vl_msg_api_alloc_internal() . + +Regardless of transport, binary api messages always follow a @ref msgbuf_t +header: + +``` + typedef struct msgbuf_ + { + unix_shared_memory_queue_t *q; + u32 data_len; + u32 gc_mark_timestamp; + u8 data[0]; + } msgbuf_t; +``` + +This structure makes it easy to trace messages without having to +decode them - simply save data_len bytes - and allows +@ref vl_msg_api_free() to rapidly dispose of message buffers: + +``` + void + vl_msg_api_free (void *a) + { + msgbuf_t *rv; + api_main_t *am = &api_main; + + rv = (msgbuf_t *) (((u8 *) a) - offsetof (msgbuf_t, data)); + + /* + * Here's the beauty of the scheme. Only one proc/thread has + * control of a given message buffer. To free a buffer, we just + * clear the queue field, and leave. No locks, no hits, no errors... + */ + if (rv->q) + { + rv->q = 0; + rv->gc_mark_timestamp = 0; + return; + } + + } +``` + +## Message Tracing and Replay + +It's extremely important that vpp can capture and replay sizeable binary API +traces. System-level issues involving hundreds of thousands of API +transactions can be re-run in a second or less. Partial replay allows one to +binary-search for the point where the wheels fall off. One can add scaffolding +to the data plane, to trigger when complex conditions obtain. + +With binary API trace, print, and replay, system-level bug reports of the form +"after 300,000 API transactions, the vpp data-plane stopped forwarding +traffic, FIX IT!" can be solved offline. + +More often than not, one discovers that a control-plane client +misprograms the data plane after a long time or under complex +circumstances. Without direct evidence, "it's a data-plane problem!" + +See @ref src/vlibmemory/memory_vlib.c @ref vl_msg_api_process_file() , +and @ref src/vlibapi/api_shared.c . See also the debug CLI command "api trace" + +## Client connection details + +Establishing a binary API connection to vpp from a C-language client +is easy: + +``` + int + connect_to_vpe (char *client_name, int client_message_queue_length) + { + vat_main_t *vam = &vat_main; + api_main_t *am = &api_main; + + if (vl_client_connect_to_vlib ("/vpe-api", client_name, + client_message_queue_length) < 0) + return -1; + + /* Memorize vpp's binary API message input queue address */ + vam->vl_input_queue = am->shmem_hdr->vl_input_queue; + /* And our client index */ + vam->my_client_index = am->my_client_index; + return 0; + } +``` + +32 is a typical value for client_message_queue_length. Vpp cannot +block when it needs to send an API message to a binary API client, and +the vpp-side binary API message handlers are very fast. When sending +asynchronous messages, make sure to scrape the binary API rx ring with +some enthusiasm. + +### binary API message RX pthread + +Calling @ref vl_client_connect_to_vlib spins up a binary API message RX +pthread: + +``` + static void * + rx_thread_fn (void *arg) + { + unix_shared_memory_queue_t *q; + memory_client_main_t *mm = &memory_client_main; + api_main_t *am = &api_main; + + q = am->vl_input_queue; + + /* So we can make the rx thread terminate cleanly */ + if (setjmp (mm->rx_thread_jmpbuf) == 0) + { + mm->rx_thread_jmpbuf_valid = 1; + while (1) + { + vl_msg_api_queue_handler (q); + } + } + pthread_exit (0); + } +``` + +To handle the binary API message queue yourself, use +@ref vl_client_connect_to_vlib_no_rx_pthread. + +In turn, vl_msg_api_queue_handler(...) uses mutex/condvar signalling +to wake up, process vpp -> client traffic, then sleep. Vpp supplies a +condvar broadcast when the vpp -> client API message queue transitions +from empty to nonempty. + +Vpp checks its own binary API input queue at a very high rate. Vpp +invokes message handlers in "process" context [aka cooperative +multitasking thread context] at a variable rate, depending on +data-plane packet processing requirements. + +## Client disconnection details + +To disconnect from vpp, call @ref vl_client_disconnect_from_vlib +. Please arrange to call this function if the client application +terminates abnormally. Vpp makes every effort to hold a decent funeral +for dead clients, but vpp can't guarantee to free leaked memory in the +shared binary API segment. + +## Sending binary API messages to vpp + +The point of the exercise is to send binary API messages to vpp, and +to receive replies from vpp. Many vpp binary APIs comprise a client +request message, and a simple status reply. For example, to +set the admin status of an interface, one codes: + +``` + vl_api_sw_interface_set_flags_t *mp; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_SW_INTERFACE_SET_FLAGS); + mp->client_index = api_main.my_client_index; + mp->sw_if_index = clib_host_to_net_u32 (); + vl_msg_api_send (api_main.shmem_hdr->vl_input_queue, (u8 *)mp); +``` + +Key points: + +* Use @ref vl_msg_api_alloc to allocate message buffers + +* Allocated message buffers are not initialized, and must be presumed + to contain trash. + +* Don't forget to set the _vl_msg_id field! + +* As of this writing, binary API message IDs and data are sent in + network byte order + +* The client-library global data structure @ref api_main keeps track + of sufficient pointers and handles used to communicate with vpp + +## Receiving binary API messages from vpp + +Unless you've made other arrangements (see @ref +vl_client_connect_to_vlib_no_rx_pthread), *messages are received on a +separate rx pthread*. Synchronization with the client application main +thread is the responsibility of the application! + +Set up message handlers about as follows: + +``` + #define vl_typedefs /* define message structures */ + #include + #undef vl_typedefs + + /* declare message handlers for each api */ + + #define vl_endianfun /* define message structures */ + #include + #undef vl_endianfun + + /* instantiate all the print functions we know about */ + #define vl_print(handle, ...) + #define vl_printfun + #include + #undef vl_printfun + + /* Define a list of all message that the client handles */ + #define foreach_vpe_api_reply_msg \ + _(SW_INTERFACE_SET_FLAGS_REPLY, sw_interface_set_flags_reply) + + static clib_error_t * + my_api_hookup (vlib_main_t * vm) + { + api_main_t *am = &api_main; + + #define _(N,n) \ + vl_msg_api_set_handlers(VL_API_##N, #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_vpe_api_msg; + #undef _ + + return 0; + } +``` + +The key API used to establish message handlers is @ref +vl_msg_api_set_handlers , which sets values in multiple parallel +vectors in the @ref api_main_t structure. As of this writing: not all +vector element values can be set through the API. You'll see sporadic +API message registrations followed by minor adjustments of this form: + +``` + /* + * Thread-safe API messages + */ + am->is_mp_safe[VL_API_IP_ADD_DEL_ROUTE] = 1; + am->is_mp_safe[VL_API_GET_NODE_GRAPH] = 1; +``` + + + + + + + + + + + + + + + + + + + + + diff --git a/src/vlibmemory/memory_vlib.c b/src/vlibmemory/memory_vlib.c index 401f388a5be..b6b87529375 100644 --- a/src/vlibmemory/memory_vlib.c +++ b/src/vlibmemory/memory_vlib.c @@ -2348,7 +2348,9 @@ cleanup: ?*/ /*? - * Display a serialized API message decode table + * Display a serialized API message decode table, compare a saved + * decode table with the current image, to establish API differences. + * ?*/ /* *INDENT-OFF* */ VLIB_CLI_COMMAND (dump_api_table_file, static) = -- cgit 1.2.3-korg