tun/tap: Bad packets sent to kernel via tun/tap interface

It was observed that under heavy traffic, VPP accidentally sent traffic with the wrong source and destination to the tun/tap interface. Traffic appears to be sent to the wrong direction. This problem is only seen when worker thread is configured. When worker thread is used, TX and RX may reside in different core. Yet both TX and RX threads are sharing the same global variable, namely iovecs without any mutex or memory barrier protection. This creates a race condition when heavy traffic is blasted to VPP, like 1000 pps. We could create a mutex or memory barrier to ensure atomic memory access. But why bother? It is a lot cheaper to just decouple the iovecs such that TX and RX have their own iovecs. Change-Id: I86a5a19bd8de54d54f32e1f0845bae6a81bbf686 Signed-off-by: Steven <sluong@cisco.com>
author: Steven <sluong@cisco.com> 2017-09-26 15:58:24 -0700
committer: Damjan Marion <dmarion.lists@gmail.com> 2017-09-28 00:09:15 +0000
commit: 4ff586d1c6fc5c40e1548cd6f221a8a7f3ad033b (patch)
tree: bd2d739d9c4998b1581bae1a7be408b4bfba6ef4
parent: 75a17ecddc9dc579a3aecfc5b53cbb60e993965f (diff)
2 files changed, 29 insertions, 22 deletions
diff --git a/src/vnet/unix/tapcli.c b/src/vnet/unix/tapcli.c
index 13154b3b034..ce386094c6e 100644
--- a/src/vnet/unix/tapcli.c
+++ b/src/vnet/unix/tapcli.c
@@ -99,8 +99,11 @@ u8 * format_tapcli_rx_trace (u8 * s, va_list * va)
  * @brief TAPCLI main state struct
  */
 typedef struct {
-  /** Vector of iovecs for readv/writev calls. */
-  struct iovec * iovecs;
+  /** Vector of iovecs for readv calls. */
+  struct iovec * rd_iovecs;
+
+  /** Vector of iovecs for writev calls. */
+  struct iovec * wr_iovecs;
 
   /** Vector of VLIB rx buffers to use.  We allocate them in blocks
      of VLIB_FRAME_SIZE (256). */
@@ -199,11 +202,11 @@ tapcli_tx (vlib_main_t * vm,
         ti = vec_elt_at_index (tm->tapcli_interfaces, p[0]);
 
       /* Re-set iovecs if present. */
-      if (tm->iovecs)
-	_vec_len (tm->iovecs) = 0;
+      if (tm->wr_iovecs)
+	_vec_len (tm->wr_iovecs) = 0;
 
       /* VLIB buffer chain -> Unix iovec(s). */
-      vec_add2 (tm->iovecs, iov, 1);
+      vec_add2 (tm->wr_iovecs, iov, 1);
       iov->iov_base = b->data + b->current_data;
       iov->iov_len = l = b->current_length;
 
@@ -212,7 +215,7 @@ tapcli_tx (vlib_main_t * vm,
 	  do {
 	    b = vlib_get_buffer (vm, b->next_buffer);
 
-	    vec_add2 (tm->iovecs, iov, 1);
+	    vec_add2 (tm->wr_iovecs, iov, 1);
 
 	    iov->iov_base = b->data + b->current_data;
 	    iov->iov_len = b->current_length;
@@ -220,7 +223,7 @@ tapcli_tx (vlib_main_t * vm,
 	  } while (b->flags & VLIB_BUFFER_NEXT_PRESENT);
 	}
 
-      if (writev (ti->unix_fd, tm->iovecs, vec_len (tm->iovecs)) < l)
+      if (writev (ti->unix_fd, tm->wr_iovecs, vec_len (tm->wr_iovecs)) < l)
 	clib_unix_warning ("writev");
     }
 
@@ -292,14 +295,14 @@ static uword tapcli_rx_iface(vlib_main_t * vm,
 
     /* Allocate RX buffers from end of rx_buffers.
            Turn them into iovecs to pass to readv. */
-    vec_validate (tm->iovecs, tm->mtu_buffers - 1);
+    vec_validate (tm->rd_iovecs, tm->mtu_buffers - 1);
     for (j = 0; j < tm->mtu_buffers; j++) {
       b = vlib_get_buffer (vm, tm->rx_buffers[i_rx - j]);
-      tm->iovecs[j].iov_base = b->data;
-      tm->iovecs[j].iov_len = buffer_size;
+      tm->rd_iovecs[j].iov_base = b->data;
+      tm->rd_iovecs[j].iov_len = buffer_size;
     }
 
-    n_bytes_left = readv (ti->unix_fd, tm->iovecs, tm->mtu_buffers);
+    n_bytes_left = readv (ti->unix_fd, tm->rd_iovecs, tm->mtu_buffers);
     n_bytes_in_packet = n_bytes_left;
     if (n_bytes_left <= 0) {
       if (errno != EAGAIN) {
diff --git a/src/vnet/unix/tuntap.c b/src/vnet/unix/tuntap.c
index 9616feb2d51..dc5c2a890d8 100644
--- a/src/vnet/unix/tuntap.c
+++ b/src/vnet/unix/tuntap.c
@@ -71,8 +71,11 @@ typedef struct {
  * @brief TUNTAP node main state
  */
 typedef struct {
-  /** Vector of iovecs for readv/writev calls. */
-  struct iovec * iovecs;
+  /** Vector of iovecs for readv calls. */
+  struct iovec * rd_iovecs;
+
+  /** Vector of iovecs for writev calls. */
+  struct iovec * wr_iovecs;
 
   /** Vector of VLIB rx buffers to use.  We allocate them in blocks
      of VLIB_FRAME_SIZE (256). */
@@ -160,11 +163,11 @@ tuntap_tx (vlib_main_t * vm,
         }
 
       /* Re-set iovecs if present. */
-      if (tm->iovecs)
-	_vec_len (tm->iovecs) = 0;
+      if (tm->wr_iovecs)
+	_vec_len (tm->wr_iovecs) = 0;
 
       /** VLIB buffer chain -> Unix iovec(s). */
-      vec_add2 (tm->iovecs, iov, 1);
+      vec_add2 (tm->wr_iovecs, iov, 1);
       iov->iov_base = b->data + b->current_data;
       iov->iov_len = l = b->current_length;
 
@@ -173,7 +176,7 @@ tuntap_tx (vlib_main_t * vm,
 	  do {
 	    b = vlib_get_buffer (vm, b->next_buffer);
 
-	    vec_add2 (tm->iovecs, iov, 1);
+	    vec_add2 (tm->wr_iovecs, iov, 1);
 
 	    iov->iov_base = b->data + b->current_data;
 	    iov->iov_len = b->current_length;
@@ -181,7 +184,8 @@ tuntap_tx (vlib_main_t * vm,
 	  } while (b->flags & VLIB_BUFFER_NEXT_PRESENT);
 	}
 
-      if (writev (tm->dev_net_tun_fd, tm->iovecs, vec_len (tm->iovecs)) < l)
+      if (writev (tm->dev_net_tun_fd, tm->wr_iovecs,
+		  vec_len (tm->wr_iovecs)) < l)
 	clib_unix_warning ("writev");
 
       n_bytes += l;
@@ -256,15 +260,15 @@ tuntap_rx (vlib_main_t * vm,
     /** We should have enough buffers left for an MTU sized packet. */
     ASSERT (vec_len (tm->rx_buffers) >= tm->mtu_buffers);
 
-    vec_validate (tm->iovecs, tm->mtu_buffers - 1);
+    vec_validate (tm->rd_iovecs, tm->mtu_buffers - 1);
     for (i = 0; i < tm->mtu_buffers; i++)
       {
 	b = vlib_get_buffer (vm, tm->rx_buffers[i_rx - i]);
-	tm->iovecs[i].iov_base = b->data;
-	tm->iovecs[i].iov_len = buffer_size;
+	tm->rd_iovecs[i].iov_base = b->data;
+	tm->rd_iovecs[i].iov_len = buffer_size;
       }
 
-    n_bytes_left = readv (tm->dev_net_tun_fd, tm->iovecs, tm->mtu_buffers);
+    n_bytes_left = readv (tm->dev_net_tun_fd, tm->rd_iovecs, tm->mtu_buffers);
     n_bytes_in_packet = n_bytes_left;
     if (n_bytes_left <= 0)
       {
author	Steven <sluong@cisco.com>	2017-09-26 15:58:24 -0700
committer	Damjan Marion <dmarion.lists@gmail.com>	2017-09-28 00:09:15 +0000
commit	4ff586d1c6fc5c40e1548cd6f221a8a7f3ad033b (patch)
tree	bd2d739d9c4998b1581bae1a7be408b4bfba6ef4
parent	75a17ecddc9dc579a3aecfc5b53cbb60e993965f (diff)