From 142d151f075a73706f914451bceef18f4f07c28f Mon Sep 17 00:00:00 2001 From: Steven Date: Fri, 26 May 2017 14:18:58 -0700 Subject: memif: master instance crashes when typing quit on slave When I type in 'quit' on the slave instance, the master instance crashes on this line. 0: /home/sluong/vpp-master/vpp/build-data/../src/vlib/unix/input.c:200 (linux_epoll_input) assertion `! pool_is_free (um->file_pool, _e)' fails Aborted (core dumped) Below is the decode from gdb line_number=0, fmt=0x7f57af6cc9a0 "%s:%d (%s) assertion `%s' fails") at /home/sluong/vpp-master/vpp/build-data/../src/vppinfra/error.c:143 vm=0x7f57af8e2400 , node=0x7f576d40ad80, frame=0x0) at /home/sluong/vpp-master/vpp/build-data/../src/vlib/unix/input.c:200 vm=0x7f57af8e2400 , node=0x7f576d40ad80, type=VLIB_NODE_TYPE_PRE_INPUT, dispatch_state=VLIB_NODE_STATE_POLLING, frame=0x0, last_time_stamp=1525665215050617) at /home/sluong/vpp-master/vpp/build-data/../src/vlib/main.c:1016 vm=0x7f57af8e2400 , is_main=1) at /home/sluong/vpp-master/vpp/build-data/../src/vlib/main.c:1500 I am able to reproduce the problem consistently with the below procedure. 1. Create 3 memif interfaces between slave and master instances. 2. Type 'quit' on the slave. Neither crashes the first time. 3. Bring back the slave. Type 'quit' on the master. Neither crashes. 4. Bring back the master. Type 'quit' on the slave. The master crashes. There are two places the interrupt line is disconnected and the unix file is removed via the call unix_file_del () 1. memif_int_fd_read_ready () 2. memif_disconnect () which is called via multiple places in memif. When the crash happens, the unix file was removed from memif_disconnect () via memif_conn_fd_read_ready () with size of the message == 0 in recvmsg (). It is noted when the unix file was removed from memif_int_fd_read_ready (), it never crashes. It is a race condition. However, if I follow the aformentioned procedure, the crash always happens. The reason the crash happens when memif_disconnect () removes the unix file is because there may still be pending input in linux_epoll_input (). When linux_epoll_input () tries to access the unix file via the line 200 unix_file_t *f = pool_elt_at_index (um->file_pool, i); it crashes. We could add code in linux_epoll_input () to avoid the crash if the index for the particular file_pool is already free. Or we could fix memif to not remove the unix file in memif_conn_fd_read_ready () when recvmsg () got 0 byte and just postpone the unix file deletion in memif_int_fd_read_ready () later after linux_epoll_input () got a chance to run to empty the input stream. I choose to fix the problem in the latter approach. I split the function memif_disconnect () into two parts. For the code path which memif_conn_fd_read_ready () calls memif_disconnect (), it does not remove the unix file. All other calls to memif_disconnect () will continue to do what it uses to do to avoid regression. Please let me know if I should fix the problem other way. Change-Id: I8efe2a3d24c6581609bc7b6fe82c2b59c22d8e4b Signed-off-by: Steven --- src/plugins/memif/memif.c | 44 +++++++++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 15 deletions(-) (limited to 'src/plugins') diff --git a/src/plugins/memif/memif.c b/src/plugins/memif/memif.c index 41c882fd04b..44c5012e80e 100644 --- a/src/plugins/memif/memif.c +++ b/src/plugins/memif/memif.c @@ -85,8 +85,8 @@ memif_connect (vlib_main_t * vm, memif_if_t * mif) VNET_HW_INTERFACE_FLAG_LINK_UP); } -void -memif_disconnect (vlib_main_t * vm, memif_if_t * mif) +static void +memif_disconnect_do (vlib_main_t * vm, memif_if_t * mif) { vnet_main_t *vnm = vnet_get_main (); @@ -94,13 +94,6 @@ memif_disconnect (vlib_main_t * vm, memif_if_t * mif) if (mif->hw_if_index != ~0) vnet_hw_interface_set_flags (vnm, mif->hw_if_index, 0); - if (mif->interrupt_line.index != ~0) - { - unix_file_del (&unix_main, - unix_main.file_pool + mif->interrupt_line.index); - mif->interrupt_line.index = ~0; - mif->interrupt_line.fd = -1; /* closed in unix_file_del */ - } if (mif->connection.index != ~0) { unix_file_del (&unix_main, unix_main.file_pool + mif->connection.index); @@ -112,6 +105,20 @@ memif_disconnect (vlib_main_t * vm, memif_if_t * mif) vec_free (mif->regions); } +void +memif_disconnect (vlib_main_t * vm, memif_if_t * mif) +{ + if (mif->interrupt_line.index != ~0) + { + unix_file_del (&unix_main, + unix_main.file_pool + mif->interrupt_line.index); + mif->interrupt_line.index = ~0; + mif->interrupt_line.fd = -1; /* closed in unix_file_del */ + } + + memif_disconnect_do (vm, mif); +} + static clib_error_t * memif_process_connect_req (memif_pending_conn_t * pending_conn, memif_msg_t * req, struct ucred *slave_cr, @@ -329,13 +336,19 @@ memif_conn_fd_read_ready (unix_file_t * uf) size = recvmsg (uf->file_descriptor, &mh, 0); if (size != sizeof (memif_msg_t)) { - if (size != 0) + if (size == 0) { - DEBUG_UNIX_LOG ("Malformed message received on fd %d", - uf->file_descriptor); - error = clib_error_return_unix (0, "recvmsg fd %d", - uf->file_descriptor); + if (pending_conn) + memif_remove_pending_conn (pending_conn); + else + memif_disconnect_do (vm, mif); + return error; } + + DEBUG_UNIX_LOG ("Malformed message received on fd %d", + uf->file_descriptor); + error = clib_error_return_unix (0, "recvmsg fd %d", + uf->file_descriptor); goto disconnect; } @@ -420,7 +433,8 @@ memif_int_fd_read_ready (unix_file_t * uf) mif->interrupt_line.index = ~0; mif->interrupt_line.fd = -1; } - vnet_device_input_set_interrupt_pending (vnm, mif->hw_if_index, 0); + else + vnet_device_input_set_interrupt_pending (vnm, mif->hw_if_index, 0); return 0; } -- cgit 1.2.3-korg