summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAnton Ivanov <anton.ivanov@cambridgegreys.com>2017-10-03 10:08:05 +0100
committerDamjan Marion <dmarion.lists@gmail.com>2017-10-04 09:42:23 +0000
commit3eab064e3fadaf2a6a128f167ad04ca0319b4e17 (patch)
tree3655480915e5d403efae47cbaec9d83262fb9fcb
parent28029530963223c5c3b94f7a2f9d1343662a1a04 (diff)
VPP-1001 - update AF Packet Driver to for modern kernels
1. Add VNET headers support for checksumming - required to operate correctly on any recent Linux 2. Bypass QDISC on transmit - improves performance by ~ 5%. Enabled only if the macro is detected - apparently not present on archaic distributions. This still does not solve all issues with TSO - it can be fixed only by going to tpacket v3 and dynamic rx ring as well as significant changes in the TX (sendmmsg?). Change-Id: Iea14ade12586c0a8da49e6dd1012108a08bc85b3 Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
-rw-r--r--src/vnet/devices/af_packet/af_packet.c30
-rw-r--r--src/vnet/devices/af_packet/af_packet.h3
-rw-r--r--src/vnet/devices/af_packet/device.c30
-rw-r--r--src/vnet/devices/af_packet/node.c66
4 files changed, 119 insertions, 10 deletions
diff --git a/src/vnet/devices/af_packet/af_packet.c b/src/vnet/devices/af_packet/af_packet.c
index 32696014727..fbcd488ac9b 100644
--- a/src/vnet/devices/af_packet/af_packet.c
+++ b/src/vnet/devices/af_packet/af_packet.c
@@ -23,6 +23,7 @@
#include <sys/stat.h>
#include <sys/types.h>
#include <fcntl.h>
+#include <linux/virtio_net.h>
#include <vppinfra/linux/sysfs.h>
#include <vlib/vlib.h>
@@ -128,6 +129,7 @@ static int
create_packet_v2_sock (int host_if_index, tpacket_req_t * rx_req,
tpacket_req_t * tx_req, int *fd, u8 ** ring)
{
+ af_packet_main_t *apm = &af_packet_main;
int ret, err;
struct sockaddr_ll sll;
int ver = TPACKET_V2;
@@ -141,7 +143,31 @@ create_packet_v2_sock (int host_if_index, tpacket_req_t * rx_req,
ret = VNET_API_ERROR_SYSCALL_ERROR_1;
goto error;
}
-
+ int opt = 1;
+ if (setsockopt (*fd, SOL_PACKET, PACKET_VNET_HDR, &opt, sizeof (opt)) != 0)
+ {
+ DBG_SOCK ("Failed to enable vnet headers on the socket");
+ if ((apm->flags & AF_PACKET_USES_VNET_HEADERS) != 0)
+ {
+ /* Should never happen - vnet was already enabled once,
+ * but we fail to reenable it on a new interface
+ **/
+ ret = VNET_API_ERROR_SYSCALL_ERROR_1;
+ goto error;
+ }
+ }
+ else
+ {
+ apm->flags |= AF_PACKET_USES_VNET_HEADERS;
+ }
+#ifdef PACKET_QDISC_BYPASS
+ opt = 1;
+ if (setsockopt (*fd, SOL_PACKET, PACKET_QDISC_BYPASS, &opt, sizeof (opt)) !=
+ 0)
+ {
+ DBG_SOCK ("Failed to bypass Linux QDISC");
+ }
+#endif
if ((err =
setsockopt (*fd, SOL_PACKET, PACKET_VERSION, &ver, sizeof (ver))) < 0)
{
@@ -150,7 +176,7 @@ create_packet_v2_sock (int host_if_index, tpacket_req_t * rx_req,
goto error;
}
- int opt = 1;
+ opt = 1;
if ((err =
setsockopt (*fd, SOL_PACKET, PACKET_LOSS, &opt, sizeof (opt))) < 0)
{
diff --git a/src/vnet/devices/af_packet/af_packet.h b/src/vnet/devices/af_packet/af_packet.h
index 95c7e7cf5a7..f731427c6b5 100644
--- a/src/vnet/devices/af_packet/af_packet.h
+++ b/src/vnet/devices/af_packet/af_packet.h
@@ -19,6 +19,8 @@
#include <vppinfra/lock.h>
+#define AF_PACKET_USES_VNET_HEADERS 1
+
typedef struct
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
@@ -54,6 +56,7 @@ typedef struct
/* hash of host interface names */
mhash_t if_index_by_host_if_name;
+ u32 flags;
} af_packet_main_t;
af_packet_main_t af_packet_main;
diff --git a/src/vnet/devices/af_packet/device.c b/src/vnet/devices/af_packet/device.c
index e01b1c71b32..a48ae5cf25d 100644
--- a/src/vnet/devices/af_packet/device.c
+++ b/src/vnet/devices/af_packet/device.c
@@ -23,6 +23,8 @@
#include <net/if.h>
#include <net/if_arp.h>
+#include <linux/virtio_net.h>
+
#include <vlib/vlib.h>
#include <vlib/unix/unix.h>
#include <vnet/ip/ip.h>
@@ -50,7 +52,6 @@ static char *af_packet_tx_func_error_strings[] = {
#undef _
};
-
static u8 *
format_af_packet_device_name (u8 * s, va_list * args)
{
@@ -76,6 +77,23 @@ format_af_packet_tx_trace (u8 * s, va_list * args)
return s;
}
+
+static_always_inline void
+af_packet_buffer_tx_offload (vlib_buffer_t * b, struct virtio_net_hdr *vhdr)
+{
+ /* For now - just mark the data as valid,
+ * DPDK csums on input, tap presently operates in legacy
+ * compatibility mode where the kernel checksums CSUM_PARTIAL
+ * for it and we have fixed the af_packet input
+ *
+ * In the future, locally originated frames, etc can be made
+ * to fit this convention so that they are not checksummed
+ * unless needed.
+ **/
+ vhdr->flags = VIRTIO_NET_HDR_F_DATA_VALID;
+}
+
+
static uword
af_packet_interface_tx (vlib_main_t * vm,
vlib_node_runtime_t * node, vlib_frame_t * frame)
@@ -102,6 +120,10 @@ af_packet_interface_tx (vlib_main_t * vm,
{
u32 len;
u32 offset = 0;
+ if (PREDICT_TRUE ((apm->flags & AF_PACKET_USES_VNET_HEADERS) != 0))
+ {
+ offset = sizeof (struct virtio_net_hdr);
+ }
vlib_buffer_t *b0;
n_left--;
u32 bi = buffers[0];
@@ -119,6 +141,12 @@ af_packet_interface_tx (vlib_main_t * vm,
do
{
b0 = vlib_get_buffer (vm, bi);
+ if (PREDICT_TRUE ((apm->flags & AF_PACKET_USES_VNET_HEADERS) != 0))
+ {
+ u8 *vh =
+ (u8 *) tph + TPACKET_ALIGN (sizeof (struct tpacket2_hdr));
+ af_packet_buffer_tx_offload (b0, (struct virtio_net_hdr *) vh);
+ }
len = b0->current_length;
clib_memcpy ((u8 *) tph +
TPACKET_ALIGN (sizeof (struct tpacket2_hdr)) + offset,
diff --git a/src/vnet/devices/af_packet/node.c b/src/vnet/devices/af_packet/node.c
index 99c91f38805..5301ad299f2 100644
--- a/src/vnet/devices/af_packet/node.c
+++ b/src/vnet/devices/af_packet/node.c
@@ -1,5 +1,4 @@
-/*
- *------------------------------------------------------------------
+/*------------------------------------------------------------------
* af_packet.c - linux kernel packet interface
*
* Copyright (c) 2016 Cisco and/or its affiliates.
@@ -18,6 +17,7 @@
*/
#include <linux/if_packet.h>
+#include <linux/virtio_net.h>
#include <vlib/vlib.h>
#include <vlib/unix/unix.h>
@@ -155,9 +155,18 @@ af_packet_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
while ((tph->tp_status & TP_STATUS_USER) && (n_free_bufs > min_bufs) &&
n_left_to_next)
{
+
+ struct virtio_net_hdr *vh =
+ (struct virtio_net_hdr *) (((u8 *) tph) + tph->tp_mac -
+ sizeof (struct virtio_net_hdr));
u32 data_len = tph->tp_snaplen;
u32 offset = 0;
u32 bi0 = 0, first_bi0 = 0, prev_bi0;
+ u32 vlan_len = 0;
+ ip_csum_t wsum = 0;
+ u16 *wsum_addr = NULL;
+ u32 do_vnet = apm->flags & AF_PACKET_USES_VNET_HEADERS;
+ u32 do_csum = tph->tp_status & TP_STATUS_CSUMNOTREADY;
while (data_len)
{
@@ -173,7 +182,6 @@ af_packet_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
/* copy data */
u32 bytes_to_copy =
data_len > n_buffer_bytes ? n_buffer_bytes : data_len;
- u32 vlan_len = 0;
u32 bytes_copied = 0;
b0->current_data = 0;
/* Kernel removes VLAN headers, so reconstruct VLAN */
@@ -195,10 +203,50 @@ af_packet_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
bytes_copied = sizeof (ethernet_header_t);
}
}
- clib_memcpy (((u8 *) vlib_buffer_get_current (b0)) +
- bytes_copied + vlan_len,
- (u8 *) tph + tph->tp_mac + offset + bytes_copied,
- (bytes_to_copy - bytes_copied));
+ /* Check if the incoming skb is marked as CSUM_PARTIAL,
+ * If VNET Headers are enabled TP_STATUS_CSUMNOTREADY is
+ * equivalent to the vnet csum flag.
+ **/
+ if (PREDICT_TRUE ((do_vnet != 0) && (do_csum != 0)))
+ {
+ wsum_addr = (u16 *) (((u8 *) vlib_buffer_get_current (b0)) +
+ vlan_len + vh->csum_start +
+ vh->csum_offset);
+ if (bytes_copied <= vh->csum_start)
+ {
+ clib_memcpy (((u8 *) vlib_buffer_get_current (b0)) +
+ bytes_copied + vlan_len,
+ (u8 *) tph + tph->tp_mac + offset +
+ bytes_copied,
+ (vh->csum_start - bytes_copied));
+ wsum =
+ ip_csum_and_memcpy (wsum,
+ ((u8 *)
+ vlib_buffer_get_current (b0)) +
+ vh->csum_start + vlan_len,
+ (u8 *) tph + tph->tp_mac +
+ offset + vh->csum_start,
+ (bytes_to_copy - vh->csum_start));
+ }
+ else
+ {
+ wsum =
+ ip_csum_and_memcpy (wsum,
+ ((u8 *)
+ vlib_buffer_get_current (b0)) +
+ bytes_copied + vlan_len,
+ (u8 *) tph + tph->tp_mac +
+ offset + bytes_copied,
+ (bytes_to_copy - bytes_copied));
+ }
+ }
+ else
+ {
+ clib_memcpy (((u8 *) vlib_buffer_get_current (b0)) +
+ bytes_copied + vlan_len,
+ (u8 *) tph + tph->tp_mac + offset +
+ bytes_copied, (bytes_to_copy - bytes_copied));
+ }
/* fill buffer header */
b0->current_length = bytes_to_copy + vlan_len;
@@ -218,6 +266,10 @@ af_packet_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
offset += bytes_to_copy;
data_len -= bytes_to_copy;
}
+ if (PREDICT_TRUE ((do_vnet != 0) && (do_csum != 0)))
+ {
+ *wsum_addr = ~ip_csum_fold (wsum);
+ }
n_rx_packets++;
n_rx_bytes += tph->tp_snaplen;
to_next[0] = first_bi0;