diff options
author | Steven <sluong@cisco.com> | 2017-09-26 15:58:24 -0700 |
---|---|---|
committer | steven luong <sluong@cisco.com> | 2017-10-06 04:20:02 +0000 |
commit | 43bb653d7fbc4903ea4960dac9c8a3ee8a9ed173 (patch) | |
tree | ca43c8e494e9c9aebd469f8d4224cb6936afe89e | |
parent | 348edb1c06c26bbb6f74ef91169cc5c8ce6cea37 (diff) |
tun/tap: Bad packets sent to kernel via tun/tap interface
It was observed that under heavy traffic, VPP accidentally sent traffic
with the wrong source and destination to the tun/tap interface. Traffic
appears to be sent to the wrong direction. This problem is only
seen when worker thread is configured.
When worker thread is used, TX and RX may reside in different
core. Yet both TX and RX threads are sharing the same global variable,
namely iovecs without any mutex or memory barrier protection.
This creates a race condition when heavy traffic is blasted to VPP,
like 1000 pps.
We could create a mutex or memory barrier to ensure atomic memory access.
But why bother? It is a lot cheaper to just decouple the iovecs such
that TX and RX have their own iovecs.
Change-Id: I86a5a19bd8de54d54f32e1f0845bae6a81bbf686
Signed-off-by: Steven <sluong@cisco.com>
(cherry picked from commit 4ff586d1c6fc5c40e1548cd6f221a8a7f3ad033b)
(cherry picked from commit 3fd57e67532bd55701bef7365adc17da229a44dc)
-rw-r--r-- | src/vnet/unix/tapcli.c | 25 | ||||
-rw-r--r-- | src/vnet/unix/tuntap.c | 26 |
2 files changed, 29 insertions, 22 deletions
diff --git a/src/vnet/unix/tapcli.c b/src/vnet/unix/tapcli.c index 0fc62f6c061..f39e2d7e75d 100644 --- a/src/vnet/unix/tapcli.c +++ b/src/vnet/unix/tapcli.c @@ -99,8 +99,11 @@ u8 * format_tapcli_rx_trace (u8 * s, va_list * va) * @brief TAPCLI main state struct */ typedef struct { - /** Vector of iovecs for readv/writev calls. */ - struct iovec * iovecs; + /** Vector of iovecs for readv calls. */ + struct iovec * rd_iovecs; + + /** Vector of iovecs for writev calls. */ + struct iovec * wr_iovecs; /** Vector of VLIB rx buffers to use. We allocate them in blocks of VLIB_FRAME_SIZE (256). */ @@ -201,11 +204,11 @@ tapcli_tx (vlib_main_t * vm, ti = vec_elt_at_index (tm->tapcli_interfaces, p[0]); /* Re-set iovecs if present. */ - if (tm->iovecs) - _vec_len (tm->iovecs) = 0; + if (tm->wr_iovecs) + _vec_len (tm->wr_iovecs) = 0; /* VLIB buffer chain -> Unix iovec(s). */ - vec_add2 (tm->iovecs, iov, 1); + vec_add2 (tm->wr_iovecs, iov, 1); iov->iov_base = b->data + b->current_data; iov->iov_len = l = b->current_length; @@ -214,7 +217,7 @@ tapcli_tx (vlib_main_t * vm, do { b = vlib_get_buffer (vm, b->next_buffer); - vec_add2 (tm->iovecs, iov, 1); + vec_add2 (tm->wr_iovecs, iov, 1); iov->iov_base = b->data + b->current_data; iov->iov_len = b->current_length; @@ -222,7 +225,7 @@ tapcli_tx (vlib_main_t * vm, } while (b->flags & VLIB_BUFFER_NEXT_PRESENT); } - if (writev (ti->unix_fd, tm->iovecs, vec_len (tm->iovecs)) < l) + if (writev (ti->unix_fd, tm->wr_iovecs, vec_len (tm->wr_iovecs)) < l) clib_unix_warning ("writev"); } @@ -294,14 +297,14 @@ static uword tapcli_rx_iface(vlib_main_t * vm, /* Allocate RX buffers from end of rx_buffers. Turn them into iovecs to pass to readv. */ - vec_validate (tm->iovecs, tm->mtu_buffers - 1); + vec_validate (tm->rd_iovecs, tm->mtu_buffers - 1); for (j = 0; j < tm->mtu_buffers; j++) { b = vlib_get_buffer (vm, tm->rx_buffers[i_rx - j]); - tm->iovecs[j].iov_base = b->data; - tm->iovecs[j].iov_len = buffer_size; + tm->rd_iovecs[j].iov_base = b->data; + tm->rd_iovecs[j].iov_len = buffer_size; } - n_bytes_left = readv (ti->unix_fd, tm->iovecs, tm->mtu_buffers); + n_bytes_left = readv (ti->unix_fd, tm->rd_iovecs, tm->mtu_buffers); n_bytes_in_packet = n_bytes_left; if (n_bytes_left <= 0) { if (errno != EAGAIN) { diff --git a/src/vnet/unix/tuntap.c b/src/vnet/unix/tuntap.c index ac674653df4..3b73c1ed6d1 100644 --- a/src/vnet/unix/tuntap.c +++ b/src/vnet/unix/tuntap.c @@ -70,8 +70,11 @@ typedef struct { * @brief TUNTAP node main state */ typedef struct { - /** Vector of iovecs for readv/writev calls. */ - struct iovec * iovecs; + /** Vector of iovecs for readv calls. */ + struct iovec * rd_iovecs; + + /** Vector of iovecs for writev calls. */ + struct iovec * wr_iovecs; /** Vector of VLIB rx buffers to use. We allocate them in blocks of VLIB_FRAME_SIZE (256). */ @@ -159,11 +162,11 @@ tuntap_tx (vlib_main_t * vm, } /* Re-set iovecs if present. */ - if (tm->iovecs) - _vec_len (tm->iovecs) = 0; + if (tm->wr_iovecs) + _vec_len (tm->wr_iovecs) = 0; /** VLIB buffer chain -> Unix iovec(s). */ - vec_add2 (tm->iovecs, iov, 1); + vec_add2 (tm->wr_iovecs, iov, 1); iov->iov_base = b->data + b->current_data; iov->iov_len = l = b->current_length; @@ -172,7 +175,7 @@ tuntap_tx (vlib_main_t * vm, do { b = vlib_get_buffer (vm, b->next_buffer); - vec_add2 (tm->iovecs, iov, 1); + vec_add2 (tm->wr_iovecs, iov, 1); iov->iov_base = b->data + b->current_data; iov->iov_len = b->current_length; @@ -180,7 +183,8 @@ tuntap_tx (vlib_main_t * vm, } while (b->flags & VLIB_BUFFER_NEXT_PRESENT); } - if (writev (tm->dev_net_tun_fd, tm->iovecs, vec_len (tm->iovecs)) < l) + if (writev (tm->dev_net_tun_fd, tm->wr_iovecs, + vec_len (tm->wr_iovecs)) < l) clib_unix_warning ("writev"); n_bytes += l; @@ -255,15 +259,15 @@ tuntap_rx (vlib_main_t * vm, /** We should have enough buffers left for an MTU sized packet. */ ASSERT (vec_len (tm->rx_buffers) >= tm->mtu_buffers); - vec_validate (tm->iovecs, tm->mtu_buffers - 1); + vec_validate (tm->rd_iovecs, tm->mtu_buffers - 1); for (i = 0; i < tm->mtu_buffers; i++) { b = vlib_get_buffer (vm, tm->rx_buffers[i_rx - i]); - tm->iovecs[i].iov_base = b->data; - tm->iovecs[i].iov_len = buffer_size; + tm->rd_iovecs[i].iov_base = b->data; + tm->rd_iovecs[i].iov_len = buffer_size; } - n_bytes_left = readv (tm->dev_net_tun_fd, tm->iovecs, tm->mtu_buffers); + n_bytes_left = readv (tm->dev_net_tun_fd, tm->rd_iovecs, tm->mtu_buffers); n_bytes_in_packet = n_bytes_left; if (n_bytes_left <= 0) { |