tun/tap: Bad packets sent to kernel via tun/tap interface

It was observed that under heavy traffic, VPP accidentally sent traffic with the wrong source and destination to the tun/tap interface. Traffic appears to be sent to the wrong direction. This problem is only seen when worker thread is configured. When worker thread is used, TX and RX may reside in different core. Yet both TX and RX threads are sharing the same global variable, namely iovecs without any mutex or memory barrier protection. This creates a race condition when heavy traffic is blasted to VPP, like 1000 pps. We could create a mutex or memory barrier to ensure atomic memory access. But why bother? It is a lot cheaper to just decouple the iovecs such that TX and RX have their own iovecs. Change-Id: I86a5a19bd8de54d54f32e1f0845bae6a81bbf686 Signed-off-by: Steven <sluong@cisco.com> (cherry picked from commit 4ff586d1c6fc5c40e1548cd6f221a8a7f3ad033b) (cherry picked from commit 3fd57e67532bd55701bef7365adc17da229a44dc)
author: Steven <sluong@cisco.com> 2017-09-26 15:58:24 -0700
committer: steven luong <sluong@cisco.com> 2017-10-06 04:20:02 +0000
commit: 43bb653d7fbc4903ea4960dac9c8a3ee8a9ed173 (patch)
tree: ca43c8e494e9c9aebd469f8d4224cb6936afe89e
parent: 348edb1c06c26bbb6f74ef91169cc5c8ce6cea37 (diff)
2 files changed, 29 insertions, 22 deletions
diff --git a/src/vnet/unix/tapcli.c b/src/vnet/unix/tapcli.c
index 0fc62f6c061..f39e2d7e75d 100644
--- a/src/vnet/unix/tapcli.c
+++ b/src/vnet/unix/tapcli.c
@@ -99,8 +99,11 @@ u8 * format_tapcli_rx_trace (u8 * s, va_list * va)
  * @brief TAPCLI main state struct
  */
 typedef struct {
-  /** Vector of iovecs for readv/writev calls. */
-  struct iovec * iovecs;
+  /** Vector of iovecs for readv calls. */
+  struct iovec * rd_iovecs;
+
+  /** Vector of iovecs for writev calls. */
+  struct iovec * wr_iovecs;
 
   /** Vector of VLIB rx buffers to use.  We allocate them in blocks
      of VLIB_FRAME_SIZE (256). */
@@ -201,11 +204,11 @@ tapcli_tx (vlib_main_t * vm,
         ti = vec_elt_at_index (tm->tapcli_interfaces, p[0]);
 
       /* Re-set iovecs if present. */
-      if (tm->iovecs)
-	_vec_len (tm->iovecs) = 0;
+      if (tm->wr_iovecs)
+	_vec_len (tm->wr_iovecs) = 0;
 
       /* VLIB buffer chain -> Unix iovec(s). */
-      vec_add2 (tm->iovecs, iov, 1);
+      vec_add2 (tm->wr_iovecs, iov, 1);
       iov->iov_base = b->data + b->current_data;
       iov->iov_len = l = b->current_length;
 
@@ -214,7 +217,7 @@ tapcli_tx (vlib_main_t * vm,
 	  do {
 	    b = vlib_get_buffer (vm, b->next_buffer);
 
-	    vec_add2 (tm->iovecs, iov, 1);
+	    vec_add2 (tm->wr_iovecs, iov, 1);
 
 	    iov->iov_base = b->data + b->current_data;
 	    iov->iov_len = b->current_length;
@@ -222,7 +225,7 @@ tapcli_tx (vlib_main_t * vm,
 	  } while (b->flags & VLIB_BUFFER_NEXT_PRESENT);
 	}
 
-      if (writev (ti->unix_fd, tm->iovecs, vec_len (tm->iovecs)) < l)
+      if (writev (ti->unix_fd, tm->wr_iovecs, vec_len (tm->wr_iovecs)) < l)
 	clib_unix_warning ("writev");
     }
 
@@ -294,14 +297,14 @@ static uword tapcli_rx_iface(vlib_main_t * vm,
 
     /* Allocate RX buffers from end of rx_buffers.
            Turn them into iovecs to pass to readv. */
-    vec_validate (tm->iovecs, tm->mtu_buffers - 1);
+    vec_validate (tm->rd_iovecs, tm->mtu_buffers - 1);
     for (j = 0; j < tm->mtu_buffers; j++) {
       b = vlib_get_buffer (vm, tm->rx_buffers[i_rx - j]);
-      tm->iovecs[j].iov_base = b->data;
-      tm->iovecs[j].iov_len = buffer_size;
+      tm->rd_iovecs[j].iov_base = b->data;
+      tm->rd_iovecs[j].iov_len = buffer_size;
     }
 
-    n_bytes_left = readv (ti->unix_fd, tm->iovecs, tm->mtu_buffers);
+    n_bytes_left = readv (ti->unix_fd, tm->rd_iovecs, tm->mtu_buffers);
     n_bytes_in_packet = n_bytes_left;
     if (n_bytes_left <= 0) {
       if (errno != EAGAIN) {
diff --git a/src/vnet/unix/tuntap.c b/src/vnet/unix/tuntap.c
index ac674653df4..3b73c1ed6d1 100644
--- a/src/vnet/unix/tuntap.c
+++ b/src/vnet/unix/tuntap.c
@@ -70,8 +70,11 @@ typedef struct {
  * @brief TUNTAP node main state
  */
 typedef struct {
-  /** Vector of iovecs for readv/writev calls. */
-  struct iovec * iovecs;
+  /** Vector of iovecs for readv calls. */
+  struct iovec * rd_iovecs;
+
+  /** Vector of iovecs for writev calls. */
+  struct iovec * wr_iovecs;
 
   /** Vector of VLIB rx buffers to use.  We allocate them in blocks
      of VLIB_FRAME_SIZE (256). */
@@ -159,11 +162,11 @@ tuntap_tx (vlib_main_t * vm,
         }
 
       /* Re-set iovecs if present. */
-      if (tm->iovecs)
-	_vec_len (tm->iovecs) = 0;
+      if (tm->wr_iovecs)
+	_vec_len (tm->wr_iovecs) = 0;
 
       /** VLIB buffer chain -> Unix iovec(s). */
-      vec_add2 (tm->iovecs, iov, 1);
+      vec_add2 (tm->wr_iovecs, iov, 1);
       iov->iov_base = b->data + b->current_data;
       iov->iov_len = l = b->current_length;
 
@@ -172,7 +175,7 @@ tuntap_tx (vlib_main_t * vm,
 	  do {
 	    b = vlib_get_buffer (vm, b->next_buffer);
 
-	    vec_add2 (tm->iovecs, iov, 1);
+	    vec_add2 (tm->wr_iovecs, iov, 1);
 
 	    iov->iov_base = b->data + b->current_data;
 	    iov->iov_len = b->current_length;
@@ -180,7 +183,8 @@ tuntap_tx (vlib_main_t * vm,
 	  } while (b->flags & VLIB_BUFFER_NEXT_PRESENT);
 	}
 
-      if (writev (tm->dev_net_tun_fd, tm->iovecs, vec_len (tm->iovecs)) < l)
+      if (writev (tm->dev_net_tun_fd, tm->wr_iovecs,
+		  vec_len (tm->wr_iovecs)) < l)
 	clib_unix_warning ("writev");
 
       n_bytes += l;
@@ -255,15 +259,15 @@ tuntap_rx (vlib_main_t * vm,
     /** We should have enough buffers left for an MTU sized packet. */
     ASSERT (vec_len (tm->rx_buffers) >= tm->mtu_buffers);
 
-    vec_validate (tm->iovecs, tm->mtu_buffers - 1);
+    vec_validate (tm->rd_iovecs, tm->mtu_buffers - 1);
     for (i = 0; i < tm->mtu_buffers; i++)
       {
 	b = vlib_get_buffer (vm, tm->rx_buffers[i_rx - i]);
-	tm->iovecs[i].iov_base = b->data;
-	tm->iovecs[i].iov_len = buffer_size;
+	tm->rd_iovecs[i].iov_base = b->data;
+	tm->rd_iovecs[i].iov_len = buffer_size;
       }
 
-    n_bytes_left = readv (tm->dev_net_tun_fd, tm->iovecs, tm->mtu_buffers);
+    n_bytes_left = readv (tm->dev_net_tun_fd, tm->rd_iovecs, tm->mtu_buffers);
     n_bytes_in_packet = n_bytes_left;
     if (n_bytes_left <= 0)
       {
author	Steven <sluong@cisco.com>	2017-09-26 15:58:24 -0700
committer	steven luong <sluong@cisco.com>	2017-10-06 04:20:02 +0000
commit	43bb653d7fbc4903ea4960dac9c8a3ee8a9ed173 (patch)
tree	ca43c8e494e9c9aebd469f8d4224cb6936afe89e
parent	348edb1c06c26bbb6f74ef91169cc5c8ce6cea37 (diff)