diff options
author | Christian Ehrhardt <christian.ehrhardt@canonical.com> | 2016-07-06 09:22:35 +0200 |
---|---|---|
committer | Christian Ehrhardt <christian.ehrhardt@canonical.com> | 2016-07-06 16:09:40 +0200 |
commit | 8b25d1ad5d2264bdfc2818c7bda74ee2697df6db (patch) | |
tree | 8c3c769777f7e66a2d1ba7dd7651b563cfde370b /drivers/net/virtio | |
parent | 97f17497d162afdb82c8704bf097f0fee3724b2e (diff) |
Imported Upstream version 16.07-rc1
Change-Id: I40a523e52f12e8496fdd69e902824b0226c303de
Signed-off-by: Christian Ehrhardt <christian.ehrhardt@canonical.com>
Diffstat (limited to 'drivers/net/virtio')
-rw-r--r-- | drivers/net/virtio/Makefile | 7 | ||||
-rw-r--r-- | drivers/net/virtio/virtio_ethdev.c | 515 | ||||
-rw-r--r-- | drivers/net/virtio/virtio_ethdev.h | 4 | ||||
-rw-r--r-- | drivers/net/virtio/virtio_logs.h | 6 | ||||
-rw-r--r-- | drivers/net/virtio/virtio_pci.c | 120 | ||||
-rw-r--r-- | drivers/net/virtio/virtio_pci.h | 9 | ||||
-rw-r--r-- | drivers/net/virtio/virtio_ring.h | 2 | ||||
-rw-r--r-- | drivers/net/virtio/virtio_rxtx.c | 361 | ||||
-rw-r--r-- | drivers/net/virtio/virtio_rxtx.h | 56 | ||||
-rw-r--r-- | drivers/net/virtio/virtio_rxtx_simple.c | 93 | ||||
-rw-r--r-- | drivers/net/virtio/virtio_user/vhost.h | 146 | ||||
-rw-r--r-- | drivers/net/virtio/virtio_user/vhost_user.c | 426 | ||||
-rw-r--r-- | drivers/net/virtio/virtio_user/virtio_user_dev.c | 333 | ||||
-rw-r--r-- | drivers/net/virtio/virtio_user/virtio_user_dev.h | 62 | ||||
-rw-r--r-- | drivers/net/virtio/virtio_user_ethdev.c | 440 | ||||
-rw-r--r-- | drivers/net/virtio/virtqueue.h | 80 |
16 files changed, 2183 insertions, 477 deletions
diff /*
----------------------------------------------------------------
Notice that the following BSD-style license applies to this one
file (memcheck.h) only. The rest of Valgrind is licensed under the
terms of the GNU General Public License, version 2, unless
otherwise indicated. See the COPYING file in the source
distribution for details.
----------------------------------------------------------------
This file is part of MemCheck, a heavyweight Valgrind tool for
detecting memory errors.
Copyright (C) 2000-2009 Julian Seward. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. The origin of this software must not be misrepresented; you must
not claim that you wrote the original software. If you use this
software in a product, an acknowledgment in the product
documentation would be appreciated but is not required.
3. Altered source versions must be plainly marked as such, and must
not be misrepresented as being the original software.
4. The name of the author may not be used to endorse or promote
products derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
----------------------------------------------------------------
Notice that the above BSD-style license applies to this one file
(memcheck.h) only. The entire rest of Valgrind is licensed under
the terms of the GNU General Public License, version 2. See the
COPYING file in the source distribution for details.
----------------------------------------------------------------
*/
#ifndef __MEMCHECK_H
#define __MEMCHECK_H
/* This file is for inclusion into client (your!) code.
You can use these macros to manipulate and query memory permissions
inside your own programs.
See comment near the top of valgrind.h on how to use them.
*/
#include "valgrind.h"
/* !! ABIWARNING !! ABIWARNING !! ABIWARNING !! ABIWARNING !!
This enum comprises an ABI exported by Valgrind to programs
which use client requests. DO NOT CHANGE THE ORDER OF THESE
ENTRIES, NOR DELETE ANY -- add new ones at the end. */
typedef enum
{
VG_USERREQ__MAKE_MEM_NOACCESS = VG_USERREQ_TOOL_BASE ('M', 'C'),
VG_USERREQ__MAKE_MEM_UNDEFINED,
VG_USERREQ__MAKE_MEM_DEFINED,
VG_USERREQ__DISCARD,
VG_USERREQ__CHECK_MEM_IS_ADDRESSABLE,
VG_USERREQ__CHECK_MEM_IS_DEFINED,
VG_USERREQ__DO_LEAK_CHECK,
VG_USERREQ__COUNT_LEAKS,
VG_USERREQ__GET_VBITS,
VG_USERREQ__SET_VBITS,
VG_USERREQ__CREATE_BLOCK,
VG_USERREQ__MAKE_MEM_DEFINED_IF_ADDRESSABLE,
/* Not next to VG_USERREQ__COUNT_LEAKS because it was added later. */
VG_USERREQ__COUNT_LEAK_BLOCKS,
/* This is just for memcheck's internal use - don't use it */
_VG_USERREQ__MEMCHECK_RECORD_OVERLAP_ERROR
= VG_USERREQ_TOOL_BASE ('M', 'C') + 256
} Vg_MemCheckClientRequest;
/* Client-code macros to manipulate the state of memory. */
/* Mark memory at _qzz_addr as unaddressable for _qzz_len bytes. */
#define VALGRIND_MAKE_MEM_NOACCESS(_qzz_addr,_qzz_len) \
(__extension__({unsigned long _qzz_res; \
VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0 /* default return */, \
VG_USERREQ__MAKE_MEM_NOACCESS, \
_qzz_addr, _qzz_len, 0, 0, 0); \
_qzz_res; \
}))
/* Similarly, mark memory at _qzz_addr as addressable but undefined
for _qzz_len bytes. */
#define VALGRIND_MAKE_MEM_UNDEFINED(_qzz_addr,_qzz_len) \
(__extension__({unsigned long _qzz_res; \
VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0 /* default return */, \
VG_USERREQ__MAKE_MEM_UNDEFINED, \
_qzz_addr, _qzz_len, 0, 0, 0); \
_qzz_res; \
}))
/* Similarly, mark memory at _qzz_addr as addressable and defined
for _qzz_len bytes. */
#define VALGRIND_MAKE_MEM_DEFINED(_qzz_addr,_qzz_len) \
(__extension__({unsigned long _qzz_res; \
VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0 /* default return */, \
VG_USERREQ__MAKE_MEM_DEFINED, \
_qzz_addr, _qzz_len, 0, 0, 0); \
_qzz_res; \
}))
/* Similar to VALGRIND_MAKE_MEM_DEFINED except that addressability is
not altered: bytes which are addressable are marked as defined,
but those which are not addressable are left unchanged. */
#define VALGRIND_MAKE_MEM_DEFINED_IF_ADDRESSABLE(_qzz_addr,_qzz_len) \
(__extension__({unsigned long _qzz_res; \
VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0 /* default return */, \
VG_USERREQ__MAKE_MEM_DEFINED_IF_ADDRESSABLE, \
_qzz_addr, _qzz_len, 0, 0, 0); \
_qzz_res; \
}))
/* Create a block-description handle. The description is an ascii
string which is included in any messages pertaining to addresses
within the specified memory range. Has no other effect on the
properties of the memory range. */
#define VALGRIND_CREATE_BLOCK(_qzz_addr,_qzz_len, _qzz_desc) \
(__extension__({unsigned long _qzz_res; \
VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0 /* default return */, \
VG_USERREQ__CREATE_BLOCK, \
_qzz_addr, _qzz_len, _qzz_desc, \
0, 0); \
_qzz_res; \
}))
/* Discard a block-description-handle. Returns 1 for an
invalid handle, 0 for a valid handle. */
#define VALGRIND_DISCARD(_qzz_blkindex) \
(__extension__ ({unsigned long _qzz_res; \
VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0 /* default return */, \
VG_USERREQ__DISCARD, \
0, _qzz_blkindex, 0, 0, 0); \
_qzz_res; \
}))
/* Client-code macros to check the state of memory. */
/* Check that memory at _qzz_addr is addressable for _qzz_len bytes.
If suitable addressibility is not established, Valgrind prints an
error message and returns the address of the first offending byte.
Otherwise it returns zero. */
#define VALGRIND_CHECK_MEM_IS_ADDRESSABLE(_qzz_addr,_qzz_len) \
(__extension__({unsigned long _qzz_res; \
VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \
VG_USERREQ__CHECK_MEM_IS_ADDRESSABLE,\
_qzz_addr, _qzz_len, 0, 0, 0); \
_qzz_res; \
}))
/* Check that memory at _qzz_addr is addressable and defined for
_qzz_len bytes. If suitable addressibility and definedness are not
established, Valgrind prints an error message and returns the
address of the first offending byte. Otherwise it returns zero. */
#define VALGRIND_CHECK_MEM_IS_DEFINED(_qzz_addr,_qzz_len) \
(__extension__({unsigned long _qzz_res; \
VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \
VG_USERREQ__CHECK_MEM_IS_DEFINED, \
_qzz_addr, _qzz_len, 0, 0, 0); \
_qzz_res; \
}))
/* Use this macro to force the definedness and addressibility of an
lvalue to be checked. If suitable addressibility and definedness
are not established, Valgrind prints an error message and returns
the address of the first offending byte. Otherwise it returns
zero. */
#define VALGRIND_CHECK_VALUE_IS_DEFINED(__lvalue) \
VALGRIND_CHECK_MEM_IS_DEFINED( \
(volatile unsigned char *)&(__lvalue), \
(unsigned long)(sizeof (__lvalue)))
/* Do a full memory leak check (like --leak-check=full) mid-execution. */
#define VALGRIND_DO_LEAK_CHECK \
{unsigned long _qzz_res; \
VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \
VG_USERREQ__DO_LEAK_CHECK, \
0, 0, 0, 0, 0); \
}
/* Do a summary memory leak check (like --leak-check=summary) mid-execution. */
#define VALGRIND_DO_QUICK_LEAK_CHECK \
{unsigned long _qzz_res; \
VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \
VG_USERREQ__DO_LEAK_CHECK, \
1, 0, 0, 0, 0); \
}
/* Return number of leaked, dubious, reachable and suppressed bytes found by
all previous leak checks. They must be lvalues. */
#define VALGRIND_COUNT_LEAKS(leaked, dubious, reachable, suppressed) \
/* For safety on 64-bit platforms we assign the results to private
unsigned long variables, then assign these to the lvalues the user
specified, which works no matter what type 'leaked', 'dubious', etc
are. We also initialise '_qzz_leaked', etc because
VG_USERREQ__COUNT_LEAKS doesn't mark the values returned as
defined. */ \
{unsigned long _qzz_res; \
unsigned long _qzz_leaked = 0, _qzz_dubious = 0; \
unsigned long _qzz_reachable = 0, _qzz_suppressed = 0; \
VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \
VG_USERREQ__COUNT_LEAKS, \
&_qzz_leaked, &_qzz_dubious, \
&_qzz_reachable, &_qzz_suppressed, 0); \
leaked = _qzz_leaked; \
dubious = _qzz_dubious; \
reachable = _qzz_reachable; \
suppressed = _qzz_suppressed; \
}
/* Return number of leaked, dubious, reachable and suppressed bytes found by
all previous leak checks. They must be lvalues. */
#define VALGRIND_COUNT_LEAK_BLOCKS(leaked, dubious, reachable, suppressed) \
/* For safety on 64-bit platforms we assign the results to private
unsigned long variables, then assign these to the lvalues the user
specified, which works no matter what type 'leaked', 'dubious', etc
are. We also initialise '_qzz_leaked', etc because
VG_USERREQ__COUNT_LEAKS doesn't mark the values returned as
defined. */ \
{unsigned long _qzz_res; \
unsigned long _qzz_leaked = 0, _qzz_dubious = 0; \
unsigned long _qzz_reachable = 0, _qzz_suppressed = 0; \
VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \
VG_USERREQ__COUNT_LEAK_BLOCKS, \
&_qzz_leaked, &_qzz_dubious, \
&_qzz_reachable, &_qzz_suppressed, 0); \
leaked = _qzz_leaked; \
dubious = _qzz_dubious; \
reachable = _qzz_reachable; \
suppressed = _qzz_suppressed; \
}
/* Get the validity data for addresses [zza..zza+zznbytes-1] and copy it
into the provided zzvbits array. Return values:
0 if not running on valgrind
1 success
2 [previously indicated unaligned arrays; these are now allowed]
3 if any parts of zzsrc/zzvbits are not addressable.
The metadata is not copied in cases 0, 2 or 3 so it should be
impossible to segfault your system by using this call.
*/
#define VALGRIND_GET_VBITS(zza,zzvbits,zznbytes) \
(__extension__({unsigned long _qzz_res; \
char* czza = (char*)zza; \
char* czzvbits = (char*)zzvbits; \
VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \
VG_USERREQ__GET_VBITS, \
czza, czzvbits, zznbytes, 0, 0 ); \
_qzz_res; \
}))
/* Set the validity data for addresses [zza..zza+zznbytes-1], copying it
from the provided zzvbits array. Return values:
0 if not running on valgrind
1 success
2 [previously indicated unaligned arrays; these are now allowed]
3 if any parts of zza/zzvbits are not addressable.
The metadata is not copied in cases 0, 2 or 3 so it should be
impossible to segfault your system by using this call.
*/
#define VALGRIND_SET_VBITS(zza,zzvbits,zznbytes) \
(__extension__({unsigned int _qzz_res; \
char* czza = (char*)zza; \
char* czzvbits = (char*)zzvbits; \
VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \
VG_USERREQ__SET_VBITS, \
czza, czzvbits, zznbytes, 0, 0 ); \
_qzz_res; \
}))
#endif
/*
* fd.io coding-style-patch-verification: ON
*
* Local Variables:
* eval: (c-set-style "gnu")
* End:
*/
- struct rte_pci_device *pci_dev = dev->pci_dev; PMD_INIT_LOG(DEBUG, "virtio_dev_close"); @@ -499,7 +556,7 @@ virtio_dev_close(struct rte_eth_dev *dev) virtio_dev_stop(dev); /* reset the NIC */ - if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC) + if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) vtpci_irq_config(hw, VIRTIO_MSI_NO_VECTOR); vtpci_reset(hw); virtio_dev_free_mbufs(dev); @@ -614,6 +671,7 @@ static const struct eth_dev_ops virtio_eth_dev_ops = { .dev_infos_get = virtio_dev_info_get, .stats_get = virtio_dev_stats_get, .xstats_get = virtio_dev_xstats_get, + .xstats_get_names = virtio_dev_xstats_get_names, .stats_reset = virtio_dev_stats_reset, .xstats_reset = virtio_dev_stats_reset, .link_update = virtio_dev_link_update, @@ -675,83 +733,121 @@ virtio_update_stats(struct rte_eth_dev *dev, struct rte_eth_stats *stats) unsigned i; for (i = 0; i < dev->data->nb_tx_queues; i++) { - const struct virtqueue *txvq = dev->data->tx_queues[i]; + const struct virtnet_tx *txvq = dev->data->tx_queues[i]; if (txvq == NULL) continue; - stats->opackets += txvq->packets; - stats->obytes += txvq->bytes; - stats->oerrors += txvq->errors; + stats->opackets += txvq->stats.packets; + stats->obytes += txvq->stats.bytes; + stats->oerrors += txvq->stats.errors; if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) { - stats->q_opackets[i] = txvq->packets; - stats->q_obytes[i] = txvq->bytes; + stats->q_opackets[i] = txvq->stats.packets; + stats->q_obytes[i] = txvq->stats.bytes; } } for (i = 0; i < dev->data->nb_rx_queues; i++) { - const struct virtqueue *rxvq = dev->data->rx_queues[i]; + const struct virtnet_rx *rxvq = dev->data->rx_queues[i]; if (rxvq == NULL) continue; - stats->ipackets += rxvq->packets; - stats->ibytes += rxvq->bytes; - stats->ierrors += rxvq->errors; + stats->ipackets += rxvq->stats.packets; + stats->ibytes += rxvq->stats.bytes; + stats->ierrors += rxvq->stats.errors; if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) { - stats->q_ipackets[i] = rxvq->packets; - stats->q_ibytes[i] = rxvq->bytes; + stats->q_ipackets[i] = rxvq->stats.packets; + stats->q_ibytes[i] = rxvq->stats.bytes; } } stats->rx_nombuf = dev->data->rx_mbuf_alloc_failed; } +static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev, + struct rte_eth_xstat_name *xstats_names, + __rte_unused unsigned limit) +{ + unsigned i; + unsigned count = 0; + unsigned t; + + unsigned nstats = dev->data->nb_tx_queues * VIRTIO_NB_TXQ_XSTATS + + dev->data->nb_rx_queues * VIRTIO_NB_RXQ_XSTATS; + + if (xstats_names != NULL) { + /* Note: limit checked in rte_eth_xstats_names() */ + + for (i = 0; i < dev->data->nb_rx_queues; i++) { + struct virtqueue *rxvq = dev->data->rx_queues[i]; + if (rxvq == NULL) + continue; + for (t = 0; t < VIRTIO_NB_RXQ_XSTATS; t++) { + snprintf(xstats_names[count].name, + sizeof(xstats_names[count].name), + "rx_q%u_%s", i, + rte_virtio_rxq_stat_strings[t].name); + count++; + } + } + + for (i = 0; i < dev->data->nb_tx_queues; i++) { + struct virtqueue *txvq = dev->data->tx_queues[i]; + if (txvq == NULL) + continue; + for (t = 0; t < VIRTIO_NB_TXQ_XSTATS; t++) { + snprintf(xstats_names[count].name, + sizeof(xstats_names[count].name), + "tx_q%u_%s", i, + rte_virtio_txq_stat_strings[t].name); + count++; + } + } + return count; + } + return nstats; +} + static int -virtio_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstats *xstats, +virtio_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats, unsigned n) { unsigned i; unsigned count = 0; - unsigned nstats = dev->data->nb_tx_queues * VIRTIO_NB_Q_XSTATS + - dev->data->nb_rx_queues * VIRTIO_NB_Q_XSTATS; + unsigned nstats = dev->data->nb_tx_queues * VIRTIO_NB_TXQ_XSTATS + + dev->data->nb_rx_queues * VIRTIO_NB_RXQ_XSTATS; if (n < nstats) return nstats; for (i = 0; i < dev->data->nb_rx_queues; i++) { - struct virtqueue *rxvq = dev->data->rx_queues[i]; + struct virtnet_rx *rxvq = dev->data->rx_queues[i]; if (rxvq == NULL) continue; unsigned t; - for (t = 0; t < VIRTIO_NB_Q_XSTATS; t++) { - snprintf(xstats[count].name, sizeof(xstats[count].name), - "rx_q%u_%s", i, - rte_virtio_q_stat_strings[t].name); + for (t = 0; t < VIRTIO_NB_RXQ_XSTATS; t++) { xstats[count].value = *(uint64_t *)(((char *)rxvq) + - rte_virtio_q_stat_strings[t].offset); + rte_virtio_rxq_stat_strings[t].offset); count++; } } for (i = 0; i < dev->data->nb_tx_queues; i++) { - struct virtqueue *txvq = dev->data->tx_queues[i]; + struct virtnet_tx *txvq = dev->data->tx_queues[i]; if (txvq == NULL) continue; unsigned t; - for (t = 0; t < VIRTIO_NB_Q_XSTATS; t++) { - snprintf(xstats[count].name, sizeof(xstats[count].name), - "tx_q%u_%s", i, - rte_virtio_q_stat_strings[t].name); + for (t = 0; t < VIRTIO_NB_TXQ_XSTATS; t++) { xstats[count].value = *(uint64_t *)(((char *)txvq) + - rte_virtio_q_stat_strings[t].offset); + rte_virtio_txq_stat_strings[t].offset); count++; } } @@ -771,29 +867,31 @@ virtio_dev_stats_reset(struct rte_eth_dev *dev) unsigned int i; for (i = 0; i < dev->data->nb_tx_queues; i++) { - struct virtqueue *txvq = dev->data->tx_queues[i]; + struct virtnet_tx *txvq = dev->data->tx_queues[i]; if (txvq == NULL) continue; - txvq->packets = 0; - txvq->bytes = 0; - txvq->errors = 0; - txvq->multicast = 0; - txvq->broadcast = 0; - memset(txvq->size_bins, 0, sizeof(txvq->size_bins[0]) * 8); + txvq->stats.packets = 0; + txvq->stats.bytes = 0; + txvq->stats.errors = 0; + txvq->stats.multicast = 0; + txvq->stats.broadcast = 0; + memset(txvq->stats.size_bins, 0, + sizeof(txvq->stats.size_bins[0]) * 8); } for (i = 0; i < dev->data->nb_rx_queues; i++) { - struct virtqueue *rxvq = dev->data->rx_queues[i]; + struct virtnet_rx *rxvq = dev->data->rx_queues[i]; if (rxvq == NULL) continue; - rxvq->packets = 0; - rxvq->bytes = 0; - rxvq->errors = 0; - rxvq->multicast = 0; - rxvq->broadcast = 0; - memset(rxvq->size_bins, 0, sizeof(rxvq->size_bins[0]) * 8); + rxvq->stats.packets = 0; + rxvq->stats.bytes = 0; + rxvq->stats.errors = 0; + rxvq->stats.multicast = 0; + rxvq->stats.broadcast = 0; + memset(rxvq->stats.size_bins, 0, + sizeof(rxvq->stats.size_bins[0]) * 8); } } @@ -827,7 +925,7 @@ virtio_mac_table_set(struct virtio_hw *hw, int err, len[2]; if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_MAC_ADDR)) { - PMD_DRV_LOG(INFO, "host does not support mac table\n"); + PMD_DRV_LOG(INFO, "host does not support mac table"); return; } @@ -1027,16 +1125,17 @@ rx_func_get(struct rte_eth_dev *eth_dev) * This function is based on probe() function in virtio_pci.c * It returns 0 on success. */ -static int +int eth_virtio_dev_init(struct rte_eth_dev *eth_dev) { struct virtio_hw *hw = eth_dev->data->dev_private; struct virtio_net_config *config; struct virtio_net_config local_config; struct rte_pci_device *pci_dev; + uint32_t dev_flags = RTE_ETH_DEV_DETACHABLE; int ret; - RTE_BUILD_BUG_ON(RTE_PKTMBUF_HEADROOM < sizeof(struct virtio_net_hdr)); + RTE_BUILD_BUG_ON(RTE_PKTMBUF_HEADROOM < sizeof(struct virtio_net_hdr_mrg_rxbuf)); eth_dev->dev_ops = &virtio_eth_dev_ops; eth_dev->tx_pkt_burst = &virtio_xmit_pkts; @@ -1057,9 +1156,11 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev) pci_dev = eth_dev->pci_dev; - ret = vtpci_init(pci_dev, hw); - if (ret) - return ret; + if (pci_dev) { + ret = vtpci_init(pci_dev, hw, &dev_flags); + if (ret) + return ret; + } /* Reset the device although not necessary at startup */ vtpci_reset(hw); @@ -1074,9 +1175,10 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev) /* If host does not support status then disable LSC */ if (!vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) - pci_dev->driver->drv_flags &= ~RTE_PCI_DRV_INTR_LSC; + dev_flags &= ~RTE_ETH_DEV_INTR_LSC; rte_eth_copy_pci_info(eth_dev, pci_dev); + eth_dev->data->dev_flags = dev_flags; rx_func_get(eth_dev); @@ -1150,12 +1252,13 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev) PMD_INIT_LOG(DEBUG, "hw->max_rx_queues=%d hw->max_tx_queues=%d", hw->max_rx_queues, hw->max_tx_queues); - PMD_INIT_LOG(DEBUG, "port %d vendorID=0x%x deviceID=0x%x", + if (pci_dev) + PMD_INIT_LOG(DEBUG, "port %d vendorID=0x%x deviceID=0x%x", eth_dev->data->port_id, pci_dev->id.vendor_id, pci_dev->id.device_id); /* Setup interrupt callback */ - if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC) + if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) rte_intr_callback_register(&pci_dev->intr_handle, virtio_interrupt_handler, eth_dev); @@ -1184,13 +1287,14 @@ eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev) eth_dev->tx_pkt_burst = NULL; eth_dev->rx_pkt_burst = NULL; - virtio_dev_queue_release(hw->cvq); + if (hw->cvq) + virtio_dev_queue_release(hw->cvq->vq); rte_free(eth_dev->data->mac_addrs); eth_dev->data->mac_addrs = NULL; /* reset interrupt callback */ - if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC) + if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) rte_intr_callback_unregister(&pci_dev->intr_handle, virtio_interrupt_handler, eth_dev); @@ -1240,7 +1344,6 @@ virtio_dev_configure(struct rte_eth_dev *dev) { const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode; struct virtio_hw *hw = dev->data->dev_private; - struct rte_pci_device *pci_dev = dev->pci_dev; PMD_INIT_LOG(DEBUG, "configure"); @@ -1258,7 +1361,7 @@ virtio_dev_configure(struct rte_eth_dev *dev) return -ENOTSUP; } - if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC) + if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) if (vtpci_irq_config(hw, 0) == VIRTIO_MSI_NO_VECTOR) { PMD_DRV_LOG(ERR, "failed to set config vector"); return -EBUSY; @@ -1273,11 +1376,12 @@ virtio_dev_start(struct rte_eth_dev *dev) { uint16_t nb_queues, i; struct virtio_hw *hw = dev->data->dev_private; - struct rte_pci_device *pci_dev = dev->pci_dev; + struct virtnet_rx *rxvq; + struct virtnet_tx *txvq __rte_unused; /* check if lsc interrupt feature is enabled */ if (dev->data->dev_conf.intr_conf.lsc) { - if (!(pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)) { + if (!(dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) { PMD_DRV_LOG(ERR, "link status not supported by host"); return -ENOTSUP; } @@ -1313,16 +1417,22 @@ virtio_dev_start(struct rte_eth_dev *dev) PMD_INIT_LOG(DEBUG, "nb_queues=%d", nb_queues); - for (i = 0; i < nb_queues; i++) - virtqueue_notify(dev->data->rx_queues[i]); + for (i = 0; i < nb_queues; i++) { + rxvq = dev->data->rx_queues[i]; + virtqueue_notify(rxvq->vq); + } PMD_INIT_LOG(DEBUG, "Notified backend at initialization"); - for (i = 0; i < dev->data->nb_rx_queues; i++) - VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[i]); + for (i = 0; i < dev->data->nb_rx_queues; i++) { + rxvq = dev->data->rx_queues[i]; + VIRTQUEUE_DUMP(rxvq->vq); + } - for (i = 0; i < dev->data->nb_tx_queues; i++) - VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[i]); + for (i = 0; i < dev->data->nb_tx_queues; i++) { + txvq = dev->data->tx_queues[i]; + VIRTQUEUE_DUMP(txvq->vq); + } return 0; } @@ -1333,14 +1443,14 @@ static void virtio_dev_free_mbufs(struct rte_eth_dev *dev) int i, mbuf_num = 0; for (i = 0; i < dev->data->nb_rx_queues; i++) { + struct virtnet_rx *rxvq = dev->data->rx_queues[i]; + PMD_INIT_LOG(DEBUG, "Before freeing rxq[%d] used and unused buf", i); - VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[i]); + VIRTQUEUE_DUMP(rxvq->vq); - PMD_INIT_LOG(DEBUG, "rx_queues[%d]=%p", - i, dev->data->rx_queues[i]); - while ((buf = (struct rte_mbuf *)virtqueue_detatch_unused( - dev->data->rx_queues[i])) != NULL) { + PMD_INIT_LOG(DEBUG, "rx_queues[%d]=%p", i, rxvq); + while ((buf = virtqueue_detatch_unused(rxvq->vq)) != NULL) { rte_pktmbuf_free(buf); mbuf_num++; } @@ -1348,27 +1458,27 @@ static void virtio_dev_free_mbufs(struct rte_eth_dev *dev) PMD_INIT_LOG(DEBUG, "free %d mbufs", mbuf_num); PMD_INIT_LOG(DEBUG, "After freeing rxq[%d] used and unused buf", i); - VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[i]); + VIRTQUEUE_DUMP(rxvq->vq); } for (i = 0; i < dev->data->nb_tx_queues; i++) { + struct virtnet_tx *txvq = dev->data->tx_queues[i]; + PMD_INIT_LOG(DEBUG, "Before freeing txq[%d] used and unused bufs", i); - VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[i]); + VIRTQUEUE_DUMP(txvq->vq); mbuf_num = 0; - while ((buf = (struct rte_mbuf *)virtqueue_detatch_unused( - dev->data->tx_queues[i])) != NULL) { + while ((buf = virtqueue_detatch_unused(txvq->vq)) != NULL) { rte_pktmbuf_free(buf); - mbuf_num++; } PMD_INIT_LOG(DEBUG, "free %d mbufs", mbuf_num); PMD_INIT_LOG(DEBUG, "After freeing txq[%d] used and unused buf", i); - VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[i]); + VIRTQUEUE_DUMP(txvq->vq); } } @@ -1431,7 +1541,10 @@ virtio_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) { struct virtio_hw *hw = dev->data->dev_private; - dev_info->driver_name = dev->driver->pci_drv.name; + if (dev->pci_dev) + dev_info->driver_name = dev->driver->pci_drv.name; + else + dev_info->driver_name = "virtio-user PMD"; dev_info->max_rx_queues = (uint16_t)hw->max_rx_queues; dev_info->max_tx_queues = (uint16_t)hw->max_tx_queues; dev_info->min_rx_bufsize = VIRTIO_MIN_RX_BUFSIZE; diff --git a/drivers/net/virtio/virtio_ethdev.h b/drivers/net/virtio/virtio_ethdev.h index 66423a07..2ecec6eb 100644 --- a/drivers/net/virtio/virtio_ethdev.h +++ b/drivers/net/virtio/virtio_ethdev.h @@ -81,7 +81,7 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, uint16_t vtpci_queue_idx, uint16_t nb_desc, unsigned int socket_id, - struct virtqueue **pvq); + void **pvq); void virtio_dev_queue_release(struct virtqueue *vq); @@ -113,6 +113,8 @@ uint16_t virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t virtio_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +int eth_virtio_dev_init(struct rte_eth_dev *eth_dev); + /* * The VIRTIO_NET_F_GUEST_TSO[46] features permit the host to send us * frames larger than 1514 bytes. We do not yet support software LRO diff --git a/drivers/net/virtio/virtio_logs.h b/drivers/net/virtio/virtio_logs.h index d6c33f7b..90a79eaa 100644 --- a/drivers/net/virtio/virtio_logs.h +++ b/drivers/net/virtio/virtio_logs.h @@ -47,14 +47,14 @@ #ifdef RTE_LIBRTE_VIRTIO_DEBUG_RX #define PMD_RX_LOG(level, fmt, args...) \ - RTE_LOG(level, PMD, "%s() rx: " fmt , __func__, ## args) + RTE_LOG(level, PMD, "%s() rx: " fmt "\n", __func__, ## args) #else #define PMD_RX_LOG(level, fmt, args...) do { } while(0) #endif #ifdef RTE_LIBRTE_VIRTIO_DEBUG_TX #define PMD_TX_LOG(level, fmt, args...) \ - RTE_LOG(level, PMD, "%s() tx: " fmt , __func__, ## args) + RTE_LOG(level, PMD, "%s() tx: " fmt "\n", __func__, ## args) #else #define PMD_TX_LOG(level, fmt, args...) do { } while(0) #endif @@ -62,7 +62,7 @@ #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DRIVER #define PMD_DRV_LOG(level, fmt, args...) \ - RTE_LOG(level, PMD, "%s(): " fmt , __func__, ## args) + RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args) #else #define PMD_DRV_LOG(level, fmt, args...) do { } while(0) #endif diff --git a/drivers/net/virtio/virtio_pci.c b/drivers/net/virtio/virtio_pci.c index c007959f..f1a7ca7e 100644 --- a/drivers/net/virtio/virtio_pci.c +++ b/drivers/net/virtio/virtio_pci.c @@ -55,20 +55,103 @@ */ #define VIRTIO_PCI_CONFIG(hw) (((hw)->use_msix) ? 24 : 20) +static inline int +check_vq_phys_addr_ok(struct virtqueue *vq) +{ + /* Virtio PCI device VIRTIO_PCI_QUEUE_PF register is 32bit, + * and only accepts 32 bit page frame number. + * Check if the allocated physical memory exceeds 16TB. + */ + if ((vq->vq_ring_mem + vq->vq_ring_size - 1) >> + (VIRTIO_PCI_QUEUE_ADDR_SHIFT + 32)) { + PMD_INIT_LOG(ERR, "vring address shouldn't be above 16TB!"); + return 0; + } + + return 1; +} + +/* + * Since we are in legacy mode: + * http://ozlabs.org/~rusty/virtio-spec/virtio-0.9.5.pdf + * + * "Note that this is possible because while the virtio header is PCI (i.e. + * little) endian, the device-specific region is encoded in the native endian of + * the guest (where such distinction is applicable)." + * + * For powerpc which supports both, qemu supposes that cpu is big endian and + * enforces this for the virtio-net stuff. + */ static void legacy_read_dev_config(struct virtio_hw *hw, size_t offset, void *dst, int length) { +#ifdef RTE_ARCH_PPC_64 + int size; + + while (length > 0) { + if (length >= 4) { + size = 4; + rte_eal_pci_ioport_read(&hw->io, dst, size, + VIRTIO_PCI_CONFIG(hw) + offset); + *(uint32_t *)dst = rte_be_to_cpu_32(*(uint32_t *)dst); + } else if (length >= 2) { + size = 2; + rte_eal_pci_ioport_read(&hw->io, dst, size, + VIRTIO_PCI_CONFIG(hw) + offset); + *(uint16_t *)dst = rte_be_to_cpu_16(*(uint16_t *)dst); + } else { + size = 1; + rte_eal_pci_ioport_read(&hw->io, dst, size, + VIRTIO_PCI_CONFIG(hw) + offset); + } + + dst = (char *)dst + size; + offset += size; + length -= size; + } +#else rte_eal_pci_ioport_read(&hw->io, dst, length, VIRTIO_PCI_CONFIG(hw) + offset); +#endif } static void legacy_write_dev_config(struct virtio_hw *hw, size_t offset, const void *src, int length) { +#ifdef RTE_ARCH_PPC_64 + union { + uint32_t u32; + uint16_t u16; + } tmp; + int size; + + while (length > 0) { + if (length >= 4) { + size = 4; + tmp.u32 = rte_cpu_to_be_32(*(const uint32_t *)src); + rte_eal_pci_ioport_write(&hw->io, &tmp.u32, size, + VIRTIO_PCI_CONFIG(hw) + offset); + } else if (length >= 2) { + size = 2; + tmp.u16 = rte_cpu_to_be_16(*(const uint16_t *)src); + rte_eal_pci_ioport_write(&hw->io, &tmp.u16, size, + VIRTIO_PCI_CONFIG(hw) + offset); + } else { + size = 1; + rte_eal_pci_ioport_write(&hw->io, src, size, + VIRTIO_PCI_CONFIG(hw) + offset); + } + + src = (const char *)src + size; + offset += size; + length -= size; + } +#else rte_eal_pci_ioport_write(&hw->io, src, length, VIRTIO_PCI_CONFIG(hw) + offset); +#endif } static uint64_t @@ -143,15 +226,20 @@ legacy_get_queue_num(struct virtio_hw *hw, uint16_t queue_id) return dst; } -static void +static int legacy_setup_queue(struct virtio_hw *hw, struct virtqueue *vq) { uint32_t src; + if (!check_vq_phys_addr_ok(vq)) + return -1; + rte_eal_pci_ioport_write(&hw->io, &vq->vq_queue_index, 2, VIRTIO_PCI_QUEUE_SEL); - src = vq->mz->phys_addr >> VIRTIO_PCI_QUEUE_ADDR_SHIFT; + src = vq->vq_ring_mem >> VIRTIO_PCI_QUEUE_ADDR_SHIFT; rte_eal_pci_ioport_write(&hw->io, &src, 4, VIRTIO_PCI_QUEUE_PFN); + + return 0; } static void @@ -179,7 +267,7 @@ legacy_virtio_has_msix(const struct rte_pci_addr *loc) char dirname[PATH_MAX]; snprintf(dirname, sizeof(dirname), - SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/msi_irqs", + "%s/" PCI_PRI_FMT "/msi_irqs", pci_get_sysfs_path(), loc->domain, loc->bus, loc->devid, loc->function); d = opendir(dirname); @@ -199,15 +287,15 @@ legacy_virtio_has_msix(const struct rte_pci_addr *loc __rte_unused) static int legacy_virtio_resource_init(struct rte_pci_device *pci_dev, - struct virtio_hw *hw) + struct virtio_hw *hw, uint32_t *dev_flags) { if (rte_eal_pci_ioport_map(pci_dev, 0, &hw->io) < 0) return -1; if (pci_dev->intr_handle.type != RTE_INTR_HANDLE_UNKNOWN) - pci_dev->driver->drv_flags |= RTE_PCI_DRV_INTR_LSC; + *dev_flags |= RTE_ETH_DEV_INTR_LSC; else - pci_dev->driver->drv_flags &= ~RTE_PCI_DRV_INTR_LSC; + *dev_flags &= ~RTE_ETH_DEV_INTR_LSC; return 0; } @@ -367,13 +455,16 @@ modern_get_queue_num(struct virtio_hw *hw, uint16_t queue_id) return io_read16(&hw->common_cfg->queue_size); } -static void +static int modern_setup_queue(struct virtio_hw *hw, struct virtqueue *vq) { uint64_t desc_addr, avail_addr, used_addr; uint16_t notify_off; - desc_addr = vq->mz->phys_addr; + if (!check_vq_phys_addr_ok(vq)) + return -1; + + desc_addr = vq->vq_ring_mem; avail_addr = desc_addr + vq->vq_nentries * sizeof(struct vring_desc); used_addr = RTE_ALIGN_CEIL(avail_addr + offsetof(struct vring_avail, ring[vq->vq_nentries]), @@ -400,6 +491,8 @@ modern_setup_queue(struct virtio_hw *hw, struct virtqueue *vq) PMD_INIT_LOG(DEBUG, "\t used_addr: %" PRIx64, used_addr); PMD_INIT_LOG(DEBUG, "\t notify addr: %p (notify offset: %u)", vq->notify_addr, notify_off); + + return 0; } static void @@ -626,11 +719,13 @@ next: * Return -1: * if there is error mapping with VFIO/UIO. * if port map error when driver type is KDRV_NONE. + * if whitelisted but driver type is KDRV_UNKNOWN. * Return 1 if kernel driver is managing the device. * Return 0 on success. */ int -vtpci_init(struct rte_pci_device *dev, struct virtio_hw *hw) +vtpci_init(struct rte_pci_device *dev, struct virtio_hw *hw, + uint32_t *dev_flags) { hw->dev = dev; @@ -643,14 +738,15 @@ vtpci_init(struct rte_pci_device *dev, struct virtio_hw *hw) PMD_INIT_LOG(INFO, "modern virtio pci detected."); hw->vtpci_ops = &modern_ops; hw->modern = 1; - dev->driver->drv_flags |= RTE_PCI_DRV_INTR_LSC; + *dev_flags |= RTE_ETH_DEV_INTR_LSC; return 0; } PMD_INIT_LOG(INFO, "trying with legacy virtio pci."); - if (legacy_virtio_resource_init(dev, hw) < 0) { + if (legacy_virtio_resource_init(dev, hw, dev_flags) < 0) { if (dev->kdrv == RTE_KDRV_UNKNOWN && - dev->devargs->type != RTE_DEVTYPE_WHITELISTED_PCI) { + (!dev->devargs || + dev->devargs->type != RTE_DEVTYPE_WHITELISTED_PCI)) { PMD_INIT_LOG(INFO, "skip kernel managed virtio device."); return 1; diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h index b69785ea..dd7693fe 100644 --- a/drivers/net/virtio/virtio_pci.h +++ b/drivers/net/virtio/virtio_pci.h @@ -40,6 +40,7 @@ #include <rte_ethdev.h> struct virtqueue; +struct virtnet_ctl; /* VirtIO PCI vendor/device ID. */ #define VIRTIO_PCI_VENDORID 0x1AF4 @@ -234,7 +235,7 @@ struct virtio_pci_ops { uint16_t (*set_config_irq)(struct virtio_hw *hw, uint16_t vec); uint16_t (*get_queue_num)(struct virtio_hw *hw, uint16_t queue_id); - void (*setup_queue)(struct virtio_hw *hw, struct virtqueue *vq); + int (*setup_queue)(struct virtio_hw *hw, struct virtqueue *vq); void (*del_queue)(struct virtio_hw *hw, struct virtqueue *vq); void (*notify_queue)(struct virtio_hw *hw, struct virtqueue *vq); }; @@ -242,7 +243,7 @@ struct virtio_pci_ops { struct virtio_net_config; struct virtio_hw { - struct virtqueue *cvq; + struct virtnet_ctl *cvq; struct rte_pci_ioport io; uint64_t guest_features; uint32_t max_tx_queues; @@ -260,6 +261,7 @@ struct virtio_hw { struct virtio_pci_common_cfg *common_cfg; struct virtio_net_config *dev_cfg; const struct virtio_pci_ops *vtpci_ops; + void *virtio_user_dev; }; /* @@ -293,7 +295,8 @@ vtpci_with_feature(struct virtio_hw *hw, uint64_t bit) /* * Function declaration from virtio_pci.c */ -int vtpci_init(struct rte_pci_device *, struct virtio_hw *); +int vtpci_init(struct rte_pci_device *, struct virtio_hw *, + uint32_t *dev_flags); void vtpci_reset(struct virtio_hw *); void vtpci_reinit_complete(struct virtio_hw *); diff --git a/drivers/net/virtio/virtio_ring.h b/drivers/net/virtio/virtio_ring.h index 447760a8..fcecc161 100644 --- a/drivers/net/virtio/virtio_ring.h +++ b/drivers/net/virtio/virtio_ring.h @@ -79,7 +79,7 @@ struct vring_used_elem { struct vring_used { uint16_t flags; - uint16_t idx; + volatile uint16_t idx; struct vring_used_elem ring[0]; }; diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c index ef21d8e3..a27208e3 100644 --- a/drivers/net/virtio/virtio_rxtx.c +++ b/drivers/net/virtio/virtio_rxtx.c @@ -193,8 +193,7 @@ virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie) start_dp = vq->vq_ring.desc; start_dp[idx].addr = - (uint64_t)(cookie->buf_physaddr + RTE_PKTMBUF_HEADROOM - - hw->vtnet_hdr_size); + MBUF_DATA_DMA_ADDR(cookie, vq->offset) - hw->vtnet_hdr_size; start_dp[idx].len = cookie->buf_len - RTE_PKTMBUF_HEADROOM + hw->vtnet_hdr_size; start_dp[idx].flags = VRING_DESC_F_WRITE; @@ -209,23 +208,24 @@ virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie) } static inline void -virtqueue_enqueue_xmit(struct virtqueue *txvq, struct rte_mbuf *cookie, +virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie, uint16_t needed, int use_indirect, int can_push) { struct vq_desc_extra *dxp; + struct virtqueue *vq = txvq->vq; struct vring_desc *start_dp; uint16_t seg_num = cookie->nb_segs; uint16_t head_idx, idx; - uint16_t head_size = txvq->hw->vtnet_hdr_size; + uint16_t head_size = vq->hw->vtnet_hdr_size; unsigned long offs; - head_idx = txvq->vq_desc_head_idx; + head_idx = vq->vq_desc_head_idx; idx = head_idx; - dxp = &txvq->vq_descx[idx]; + dxp = &vq->vq_descx[idx]; dxp->cookie = (void *)cookie; dxp->ndescs = needed; - start_dp = txvq->vq_ring.desc; + start_dp = vq->vq_ring.desc; if (can_push) { /* put on zero'd transmit header (no offloads) */ @@ -259,46 +259,32 @@ virtqueue_enqueue_xmit(struct virtqueue *txvq, struct rte_mbuf *cookie, + offsetof(struct virtio_tx_region, tx_hdr); start_dp[idx].addr = txvq->virtio_net_hdr_mem + offs; - start_dp[idx].len = txvq->hw->vtnet_hdr_size; + start_dp[idx].len = vq->hw->vtnet_hdr_size; start_dp[idx].flags = VRING_DESC_F_NEXT; idx = start_dp[idx].next; } do { - start_dp[idx].addr = rte_mbuf_data_dma_addr(cookie); + start_dp[idx].addr = MBUF_DATA_DMA_ADDR(cookie, vq->offset); start_dp[idx].len = cookie->data_len; start_dp[idx].flags = cookie->next ? VRING_DESC_F_NEXT : 0; idx = start_dp[idx].next; } while ((cookie = cookie->next) != NULL); - start_dp[idx].flags &= ~VRING_DESC_F_NEXT; - if (use_indirect) - idx = txvq->vq_ring.desc[head_idx].next; - - txvq->vq_desc_head_idx = idx; - if (txvq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END) - txvq->vq_desc_tail_idx = idx; - txvq->vq_free_cnt = (uint16_t)(txvq->vq_free_cnt - needed); - vq_update_avail_ring(txvq, head_idx); -} + idx = vq->vq_ring.desc[head_idx].next; -static inline struct rte_mbuf * -rte_rxmbuf_alloc(struct rte_mempool *mp) -{ - struct rte_mbuf *m; - - m = __rte_mbuf_raw_alloc(mp); - __rte_mbuf_sanity_check_raw(m, 0); - - return m; + vq->vq_desc_head_idx = idx; + if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END) + vq->vq_desc_tail_idx = idx; + vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed); + vq_update_avail_ring(vq, head_idx); } static void -virtio_dev_vring_start(struct virtqueue *vq, int queue_type) +virtio_dev_vring_start(struct virtqueue *vq) { - struct rte_mbuf *m; - int i, nbufs, error, size = vq->vq_nentries; + int size = vq->vq_nentries; struct vring *vr = &vq->vq_ring; uint8_t *ring_mem = vq->vq_ring_virt_mem; @@ -322,30 +308,70 @@ virtio_dev_vring_start(struct virtqueue *vq, int queue_type) * Disable device(host) interrupting guest */ virtqueue_disable_intr(vq); +} + +void +virtio_dev_cq_start(struct rte_eth_dev *dev) +{ + struct virtio_hw *hw = dev->data->dev_private; + + if (hw->cvq && hw->cvq->vq) { + virtio_dev_vring_start(hw->cvq->vq); + VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq->vq); + } +} - /* Only rx virtqueue needs mbufs to be allocated at initialization */ - if (queue_type == VTNET_RQ) { - if (vq->mpool == NULL) +void +virtio_dev_rxtx_start(struct rte_eth_dev *dev) +{ + /* + * Start receive and transmit vrings + * - Setup vring structure for all queues + * - Initialize descriptor for the rx vring + * - Allocate blank mbufs for the each rx descriptor + * + */ + uint16_t i; + uint16_t desc_idx; + + PMD_INIT_FUNC_TRACE(); + + /* Start rx vring. */ + for (i = 0; i < dev->data->nb_rx_queues; i++) { + struct virtnet_rx *rxvq = dev->data->rx_queues[i]; + struct virtqueue *vq = rxvq->vq; + int error, nbufs; + struct rte_mbuf *m; + + virtio_dev_vring_start(vq); + if (rxvq->mpool == NULL) { rte_exit(EXIT_FAILURE, - "Cannot allocate initial mbufs for rx virtqueue"); + "Cannot allocate mbufs for rx virtqueue"); + } /* Allocate blank mbufs for the each rx descriptor */ nbufs = 0; error = ENOSPC; #ifdef RTE_MACHINE_CPUFLAG_SSSE3 - if (use_simple_rxtx) - for (i = 0; i < vq->vq_nentries; i++) { - vq->vq_ring.avail->ring[i] = i; - vq->vq_ring.desc[i].flags = VRING_DESC_F_WRITE; + if (use_simple_rxtx) { + for (desc_idx = 0; desc_idx < vq->vq_nentries; + desc_idx++) { + vq->vq_ring.avail->ring[desc_idx] = desc_idx; + vq->vq_ring.desc[desc_idx].flags = + VRING_DESC_F_WRITE; } + } #endif - memset(&vq->fake_mbuf, 0, sizeof(vq->fake_mbuf)); - for (i = 0; i < RTE_PMD_VIRTIO_RX_MAX_BURST; i++) - vq->sw_ring[vq->vq_nentries + i] = &vq->fake_mbuf; + memset(&rxvq->fake_mbuf, 0, sizeof(rxvq->fake_mbuf)); + for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST; + desc_idx++) { + vq->sw_ring[vq->vq_nentries + desc_idx] = + &rxvq->fake_mbuf; + } while (!virtqueue_full(vq)) { - m = rte_rxmbuf_alloc(vq->mpool); + m = rte_mbuf_raw_alloc(rxvq->mpool); if (m == NULL) break; @@ -368,64 +394,40 @@ virtio_dev_vring_start(struct virtqueue *vq, int queue_type) vq_update_avail_idx(vq); PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs); - } else if (queue_type == VTNET_TQ) { + + VIRTQUEUE_DUMP(vq); + } + + /* Start tx vring. */ + for (i = 0; i < dev->data->nb_tx_queues; i++) { + struct virtnet_tx *txvq = dev->data->tx_queues[i]; + struct virtqueue *vq = txvq->vq; + + virtio_dev_vring_start(vq); #ifdef RTE_MACHINE_CPUFLAG_SSSE3 if (use_simple_rxtx) { - int mid_idx = vq->vq_nentries >> 1; - for (i = 0; i < mid_idx; i++) { - vq->vq_ring.avail->ring[i] = i + mid_idx; - vq->vq_ring.desc[i + mid_idx].next = i; - vq->vq_ring.desc[i + mid_idx].addr = - vq->virtio_net_hdr_mem + + uint16_t mid_idx = vq->vq_nentries >> 1; + + for (desc_idx = 0; desc_idx < mid_idx; desc_idx++) { + vq->vq_ring.avail->ring[desc_idx] = + desc_idx + mid_idx; + vq->vq_ring.desc[desc_idx + mid_idx].next = + desc_idx; + vq->vq_ring.desc[desc_idx + mid_idx].addr = + txvq->virtio_net_hdr_mem + offsetof(struct virtio_tx_region, tx_hdr); - vq->vq_ring.desc[i + mid_idx].len = + vq->vq_ring.desc[desc_idx + mid_idx].len = vq->hw->vtnet_hdr_size; - vq->vq_ring.desc[i + mid_idx].flags = + vq->vq_ring.desc[desc_idx + mid_idx].flags = VRING_DESC_F_NEXT; - vq->vq_ring.desc[i].flags = 0; + vq->vq_ring.desc[desc_idx].flags = 0; } - for (i = mid_idx; i < vq->vq_nentries; i++) - vq->vq_ring.avail->ring[i] = i; + for (desc_idx = mid_idx; desc_idx < vq->vq_nentries; + desc_idx++) + vq->vq_ring.avail->ring[desc_idx] = desc_idx; } #endif - } -} - -void -virtio_dev_cq_start(struct rte_eth_dev *dev) -{ - struct virtio_hw *hw = dev->data->dev_private; - - if (hw->cvq) { - virtio_dev_vring_start(hw->cvq, VTNET_CQ); - VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq); - } -} - -void -virtio_dev_rxtx_start(struct rte_eth_dev *dev) -{ - /* - * Start receive and transmit vrings - * - Setup vring structure for all queues - * - Initialize descriptor for the rx vring - * - Allocate blank mbufs for the each rx descriptor - * - */ - int i; - - PMD_INIT_FUNC_TRACE(); - - /* Start rx vring. */ - for (i = 0; i < dev->data->nb_rx_queues; i++) { - virtio_dev_vring_start(dev->data->rx_queues[i], VTNET_RQ); - VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[i]); - } - - /* Start tx vring. */ - for (i = 0; i < dev->data->nb_tx_queues; i++) { - virtio_dev_vring_start(dev->data->tx_queues[i], VTNET_TQ); - VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[i]); + VIRTQUEUE_DUMP(vq); } } @@ -438,24 +440,24 @@ virtio_dev_rx_queue_setup(struct rte_eth_dev *dev, struct rte_mempool *mp) { uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX; - struct virtqueue *vq; + struct virtnet_rx *rxvq; int ret; PMD_INIT_FUNC_TRACE(); ret = virtio_dev_queue_setup(dev, VTNET_RQ, queue_idx, vtpci_queue_idx, - nb_desc, socket_id, &vq); + nb_desc, socket_id, (void **)&rxvq); if (ret < 0) { PMD_INIT_LOG(ERR, "rvq initialization failed"); return ret; } /* Create mempool for rx mbuf allocation */ - vq->mpool = mp; + rxvq->mpool = mp; - dev->data->rx_queues[queue_idx] = vq; + dev->data->rx_queues[queue_idx] = rxvq; #ifdef RTE_MACHINE_CPUFLAG_SSSE3 - virtio_rxq_vec_setup(vq); + virtio_rxq_vec_setup(rxvq); #endif return 0; @@ -464,7 +466,16 @@ virtio_dev_rx_queue_setup(struct rte_eth_dev *dev, void virtio_dev_rx_queue_release(void *rxq) { - virtio_dev_queue_release(rxq); + struct virtnet_rx *rxvq = rxq; + struct virtqueue *vq = rxvq->vq; + /* rxvq is freed when vq is freed, and as mz should be freed after the + * del_queue, so we reserve the mz pointer first. + */ + const struct rte_memzone *mz = rxvq->mz; + + /* no need to free rxq as vq and rxq are allocated together */ + virtio_dev_queue_release(vq); + rte_memzone_free(mz); } /* @@ -486,6 +497,7 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev, #ifdef RTE_MACHINE_CPUFLAG_SSSE3 struct virtio_hw *hw = dev->data->dev_private; #endif + struct virtnet_tx *txvq; struct virtqueue *vq; uint16_t tx_free_thresh; int ret; @@ -510,11 +522,12 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev, #endif ret = virtio_dev_queue_setup(dev, VTNET_TQ, queue_idx, vtpci_queue_idx, - nb_desc, socket_id, &vq); + nb_desc, socket_id, (void **)&txvq); if (ret < 0) { - PMD_INIT_LOG(ERR, "rvq initialization failed"); + PMD_INIT_LOG(ERR, "tvq initialization failed"); return ret; } + vq = txvq->vq; tx_free_thresh = tx_conf->tx_free_thresh; if (tx_free_thresh == 0) @@ -532,14 +545,24 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev, vq->vq_free_thresh = tx_free_thresh; - dev->data->tx_queues[queue_idx] = vq; + dev->data->tx_queues[queue_idx] = txvq; return 0; } void virtio_dev_tx_queue_release(void *txq) { - virtio_dev_queue_release(txq); + struct virtnet_tx *txvq = txq; + struct virtqueue *vq = txvq->vq; + /* txvq is freed when vq is freed, and as mz should be freed after the + * del_queue, so we reserve the mz pointer first. + */ + const struct rte_memzone *hdr_mz = txvq->virtio_net_hdr_mz; + const struct rte_memzone *mz = txvq->mz; + + virtio_dev_queue_release(vq); + rte_memzone_free(mz); + rte_memzone_free(hdr_mz); } static void @@ -558,34 +581,34 @@ virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m) } static void -virtio_update_packet_stats(struct virtqueue *vq, struct rte_mbuf *mbuf) +virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf *mbuf) { uint32_t s = mbuf->pkt_len; struct ether_addr *ea; if (s == 64) { - vq->size_bins[1]++; + stats->size_bins[1]++; } else if (s > 64 && s < 1024) { uint32_t bin; /* count zeros, and offset into correct bin */ bin = (sizeof(s) * 8) - __builtin_clz(s) - 5; - vq->size_bins[bin]++; + stats->size_bins[bin]++; } else { if (s < 64) - vq->size_bins[0]++; + stats->size_bins[0]++; else if (s < 1519) - vq->size_bins[6]++; + stats->size_bins[6]++; else if (s >= 1519) - vq->size_bins[7]++; + stats->size_bins[7]++; } ea = rte_pktmbuf_mtod(mbuf, struct ether_addr *); if (is_multicast_ether_addr(ea)) { if (is_broadcast_ether_addr(ea)) - vq->broadcast++; + stats->broadcast++; else - vq->multicast++; + stats->multicast++; } } @@ -594,7 +617,8 @@ virtio_update_packet_stats(struct virtqueue *vq, struct rte_mbuf *mbuf) uint16_t virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) { - struct virtqueue *rxvq = rx_queue; + struct virtnet_rx *rxvq = rx_queue; + struct virtqueue *vq = rxvq->vq; struct virtio_hw *hw; struct rte_mbuf *rxm, *new_mbuf; uint16_t nb_used, num, nb_rx; @@ -604,19 +628,19 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) uint32_t i, nb_enqueued; uint32_t hdr_size; - nb_used = VIRTQUEUE_NUSED(rxvq); + nb_used = VIRTQUEUE_NUSED(vq); virtio_rmb(); num = (uint16_t)(likely(nb_used <= nb_pkts) ? nb_used : nb_pkts); num = (uint16_t)(likely(num <= VIRTIO_MBUF_BURST_SZ) ? num : VIRTIO_MBUF_BURST_SZ); if (likely(num > DESC_PER_CACHELINE)) - num = num - ((rxvq->vq_used_cons_idx + num) % DESC_PER_CACHELINE); + num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE); - num = virtqueue_dequeue_burst_rx(rxvq, rcv_pkts, len, num); + num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num); PMD_RX_LOG(DEBUG, "used:%d dequeue:%d", nb_used, num); - hw = rxvq->hw; + hw = vq->hw; nb_rx = 0; nb_enqueued = 0; hdr_size = hw->vtnet_hdr_size; @@ -629,8 +653,8 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) { PMD_RX_LOG(ERR, "Packet drop"); nb_enqueued++; - virtio_discard_rxbuf(rxvq, rxm); - rxvq->errors++; + virtio_discard_rxbuf(vq, rxm); + rxvq->stats.errors++; continue; } @@ -651,23 +675,23 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) rx_pkts[nb_rx++] = rxm; - rxvq->bytes += rx_pkts[nb_rx - 1]->pkt_len; - virtio_update_packet_stats(rxvq, rxm); + rxvq->stats.bytes += rx_pkts[nb_rx - 1]->pkt_len; + virtio_update_packet_stats(&rxvq->stats, rxm); } - rxvq->packets += nb_rx; + rxvq->stats.packets += nb_rx; /* Allocate new mbuf for the used descriptor */ error = ENOSPC; - while (likely(!virtqueue_full(rxvq))) { - new_mbuf = rte_rxmbuf_alloc(rxvq->mpool); + while (likely(!virtqueue_full(vq))) { + new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool); if (unlikely(new_mbuf == NULL)) { struct rte_eth_dev *dev = &rte_eth_devices[rxvq->port_id]; dev->data->rx_mbuf_alloc_failed++; break; } - error = virtqueue_enqueue_recv_refill(rxvq, new_mbuf); + error = virtqueue_enqueue_recv_refill(vq, new_mbuf); if (unlikely(error)) { rte_pktmbuf_free(new_mbuf); break; @@ -676,11 +700,11 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) } if (likely(nb_enqueued)) { - vq_update_avail_idx(rxvq); + vq_update_avail_idx(vq); - if (unlikely(virtqueue_kick_prepare(rxvq))) { - virtqueue_notify(rxvq); - PMD_RX_LOG(DEBUG, "Notified\n"); + if (unlikely(virtqueue_kick_prepare(vq))) { + virtqueue_notify(vq); + PMD_RX_LOG(DEBUG, "Notified"); } } @@ -692,7 +716,8 @@ virtio_recv_mergeable_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) { - struct virtqueue *rxvq = rx_queue; + struct virtnet_rx *rxvq = rx_queue; + struct virtqueue *vq = rxvq->vq; struct virtio_hw *hw; struct rte_mbuf *rxm, *new_mbuf; uint16_t nb_used, num, nb_rx; @@ -706,13 +731,13 @@ virtio_recv_mergeable_pkts(void *rx_queue, uint32_t seg_res; uint32_t hdr_size; - nb_used = VIRTQUEUE_NUSED(rxvq); + nb_used = VIRTQUEUE_NUSED(vq); virtio_rmb(); - PMD_RX_LOG(DEBUG, "used:%d\n", nb_used); + PMD_RX_LOG(DEBUG, "used:%d", nb_used); - hw = rxvq->hw; + hw = vq->hw; nb_rx = 0; i = 0; nb_enqueued = 0; @@ -727,22 +752,22 @@ virtio_recv_mergeable_pkts(void *rx_queue, if (nb_rx == nb_pkts) break; - num = virtqueue_dequeue_burst_rx(rxvq, rcv_pkts, len, 1); + num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, 1); if (num != 1) continue; i++; - PMD_RX_LOG(DEBUG, "dequeue:%d\n", num); - PMD_RX_LOG(DEBUG, "packet len:%d\n", len[0]); + PMD_RX_LOG(DEBUG, "dequeue:%d", num); + PMD_RX_LOG(DEBUG, "packet len:%d", len[0]); rxm = rcv_pkts[0]; if (unlikely(len[0] < hdr_size + ETHER_HDR_LEN)) { - PMD_RX_LOG(ERR, "Packet drop\n"); + PMD_RX_LOG(ERR, "Packet drop"); nb_enqueued++; - virtio_discard_rxbuf(rxvq, rxm); - rxvq->errors++; + virtio_discard_rxbuf(vq, rxm); + rxvq->stats.errors++; continue; } @@ -773,18 +798,18 @@ virtio_recv_mergeable_pkts(void *rx_queue, */ uint16_t rcv_cnt = RTE_MIN(seg_res, RTE_DIM(rcv_pkts)); - if (likely(VIRTQUEUE_NUSED(rxvq) >= rcv_cnt)) { + if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) { uint32_t rx_num = - virtqueue_dequeue_burst_rx(rxvq, + virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, rcv_cnt); i += rx_num; rcv_cnt = rx_num; } else { PMD_RX_LOG(ERR, - "No enough segments for packet.\n"); + "No enough segments for packet."); nb_enqueued++; - virtio_discard_rxbuf(rxvq, rxm); - rxvq->errors++; + virtio_discard_rxbuf(vq, rxm); + rxvq->stats.errors++; break; } @@ -814,24 +839,24 @@ virtio_recv_mergeable_pkts(void *rx_queue, VIRTIO_DUMP_PACKET(rx_pkts[nb_rx], rx_pkts[nb_rx]->data_len); - rxvq->bytes += rx_pkts[nb_rx]->pkt_len; - virtio_update_packet_stats(rxvq, rx_pkts[nb_rx]); + rxvq->stats.bytes += rx_pkts[nb_rx]->pkt_len; + virtio_update_packet_stats(&rxvq->stats, rx_pkts[nb_rx]); nb_rx++; } - rxvq->packets += nb_rx; + rxvq->stats.packets += nb_rx; /* Allocate new mbuf for the used descriptor */ error = ENOSPC; - while (likely(!virtqueue_full(rxvq))) { - new_mbuf = rte_rxmbuf_alloc(rxvq->mpool); + while (likely(!virtqueue_full(vq))) { + new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool); if (unlikely(new_mbuf == NULL)) { struct rte_eth_dev *dev = &rte_eth_devices[rxvq->port_id]; dev->data->rx_mbuf_alloc_failed++; break; } - error = virtqueue_enqueue_recv_refill(rxvq, new_mbuf); + error = virtqueue_enqueue_recv_refill(vq, new_mbuf); if (unlikely(error)) { rte_pktmbuf_free(new_mbuf); break; @@ -840,10 +865,10 @@ virtio_recv_mergeable_pkts(void *rx_queue, } if (likely(nb_enqueued)) { - vq_update_avail_idx(rxvq); + vq_update_avail_idx(vq); - if (unlikely(virtqueue_kick_prepare(rxvq))) { - virtqueue_notify(rxvq); + if (unlikely(virtqueue_kick_prepare(vq))) { + virtqueue_notify(vq); PMD_RX_LOG(DEBUG, "Notified"); } } @@ -854,8 +879,9 @@ virtio_recv_mergeable_pkts(void *rx_queue, uint16_t virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) { - struct virtqueue *txvq = tx_queue; - struct virtio_hw *hw = txvq->hw; + struct virtnet_tx *txvq = tx_queue; + struct virtqueue *vq = txvq->vq; + struct virtio_hw *hw = vq->hw; uint16_t hdr_size = hw->vtnet_hdr_size; uint16_t nb_used, nb_tx; int error; @@ -864,11 +890,11 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) return nb_pkts; PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts); - nb_used = VIRTQUEUE_NUSED(txvq); + nb_used = VIRTQUEUE_NUSED(vq); virtio_rmb(); - if (likely(nb_used > txvq->vq_nentries - txvq->vq_free_thresh)) - virtio_xmit_cleanup(txvq, nb_used); + if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh)) + virtio_xmit_cleanup(vq, nb_used); for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) { struct rte_mbuf *txm = tx_pkts[nb_tx]; @@ -886,6 +912,7 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) /* optimize ring usage */ if (vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) && rte_mbuf_refcnt_read(txm) == 1 && + RTE_MBUF_DIRECT(txm) && txm->nb_segs == 1 && rte_pktmbuf_headroom(txm) >= hdr_size && rte_is_aligned(rte_pktmbuf_mtod(txm, char *), @@ -901,16 +928,16 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) * default => number of segments + 1 */ slots = use_indirect ? 1 : (txm->nb_segs + !can_push); - need = slots - txvq->vq_free_cnt; + need = slots - vq->vq_free_cnt; /* Positive value indicates it need free vring descriptors */ if (unlikely(need > 0)) { - nb_used = VIRTQUEUE_NUSED(txvq); + nb_used = VIRTQUEUE_NUSED(vq); virtio_rmb(); need = RTE_MIN(need, (int)nb_used); - virtio_xmit_cleanup(txvq, need); - need = slots - txvq->vq_free_cnt; + virtio_xmit_cleanup(vq, need); + need = slots - vq->vq_free_cnt; if (unlikely(need > 0)) { PMD_TX_LOG(ERR, "No free tx descriptors to transmit"); @@ -921,17 +948,17 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) /* Enqueue Packet buffers */ virtqueue_enqueue_xmit(txvq, txm, slots, use_indirect, can_push); - txvq->bytes += txm->pkt_len; - virtio_update_packet_stats(txvq, txm); + txvq->stats.bytes += txm->pkt_len; + virtio_update_packet_stats(&txvq->stats, txm); } - txvq->packets += nb_tx; + txvq->stats.packets += nb_tx; if (likely(nb_tx)) { - vq_update_avail_idx(txvq); + vq_update_avail_idx(vq); - if (unlikely(virtqueue_kick_prepare(txvq))) { - virtqueue_notify(txvq); + if (unlikely(virtqueue_kick_prepare(vq))) { + virtqueue_notify(vq); PMD_TX_LOG(DEBUG, "Notified backend after xmit"); } } diff --git a/drivers/net/virtio/virtio_rxtx.h b/drivers/net/virtio/virtio_rxtx.h index a76c3e52..058b56a1 100644 --- a/drivers/net/virtio/virtio_rxtx.h +++ b/drivers/net/virtio/virtio_rxtx.h @@ -31,11 +31,65 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#ifndef _VIRTIO_RXTX_H_ +#define _VIRTIO_RXTX_H_ + #define RTE_PMD_VIRTIO_RX_MAX_BURST 64 +struct virtnet_stats { + uint64_t packets; + uint64_t bytes; + uint64_t errors; + uint64_t multicast; + uint64_t broadcast; + /* Size bins in array as RFC 2819, undersized [0], 64 [1], etc */ + uint64_t size_bins[8]; +}; + +struct virtnet_rx { + struct virtqueue *vq; + /* dummy mbuf, for wraparound when processing RX ring. */ + struct rte_mbuf fake_mbuf; + uint64_t mbuf_initializer; /**< value to init mbufs. */ + struct rte_mempool *mpool; /**< mempool for mbuf allocation */ + + uint16_t queue_id; /**< DPDK queue index. */ + uint8_t port_id; /**< Device port identifier. */ + + /* Statistics */ + struct virtnet_stats stats; + + const struct rte_memzone *mz; /**< mem zone to populate RX ring. */ +}; + +struct virtnet_tx { + struct virtqueue *vq; + /**< memzone to populate hdr. */ + const struct rte_memzone *virtio_net_hdr_mz; + phys_addr_t virtio_net_hdr_mem; /**< hdr for each xmit packet */ + + uint16_t queue_id; /**< DPDK queue index. */ + uint8_t port_id; /**< Device port identifier. */ + + /* Statistics */ + struct virtnet_stats stats; + + const struct rte_memzone *mz; /**< mem zone to populate TX ring. */ +}; + +struct virtnet_ctl { + struct virtqueue *vq; + /**< memzone to populate hdr. */ + const struct rte_memzone *virtio_net_hdr_mz; + phys_addr_t virtio_net_hdr_mem; /**< hdr for each xmit packet */ + uint8_t port_id; /**< Device port identifier. */ + const struct rte_memzone *mz; /**< mem zone to populate RX ring. */ +}; + #ifdef RTE_MACHINE_CPUFLAG_SSSE3 -int virtio_rxq_vec_setup(struct virtqueue *rxq); +int virtio_rxq_vec_setup(struct virtnet_rx *rxvq); int virtqueue_enqueue_recv_refill_simple(struct virtqueue *vq, struct rte_mbuf *m); #endif +#endif /* _VIRTIO_RXTX_H_ */ diff --git a/drivers/net/virtio/virtio_rxtx_simple.c b/drivers/net/virtio/virtio_rxtx_simple.c index 8f5293dd..242ad90d 100644 --- a/drivers/net/virtio/virtio_rxtx_simple.c +++ b/drivers/net/virtio/virtio_rxtx_simple.c @@ -80,8 +80,8 @@ virtqueue_enqueue_recv_refill_simple(struct virtqueue *vq, vq->sw_ring[desc_idx] = cookie; start_dp = vq->vq_ring.desc; - start_dp[desc_idx].addr = (uint64_t)((uintptr_t)cookie->buf_physaddr + - RTE_PKTMBUF_HEADROOM - vq->hw->vtnet_hdr_size); + start_dp[desc_idx].addr = MBUF_DATA_DMA_ADDR(cookie, vq->offset) - + vq->hw->vtnet_hdr_size; start_dp[desc_idx].len = cookie->buf_len - RTE_PKTMBUF_HEADROOM + vq->hw->vtnet_hdr_size; @@ -92,17 +92,18 @@ virtqueue_enqueue_recv_refill_simple(struct virtqueue *vq, } static inline void -virtio_rxq_rearm_vec(struct virtqueue *rxvq) +virtio_rxq_rearm_vec(struct virtnet_rx *rxvq) { int i; uint16_t desc_idx; struct rte_mbuf **sw_ring; struct vring_desc *start_dp; int ret; + struct virtqueue *vq = rxvq->vq; - desc_idx = rxvq->vq_avail_idx & (rxvq->vq_nentries - 1); - sw_ring = &rxvq->sw_ring[desc_idx]; - start_dp = &rxvq->vq_ring.desc[desc_idx]; + desc_idx = vq->vq_avail_idx & (vq->vq_nentries - 1); + sw_ring = &vq->sw_ring[desc_idx]; + start_dp = &vq->vq_ring.desc[desc_idx]; ret = rte_mempool_get_bulk(rxvq->mpool, (void **)sw_ring, RTE_VIRTIO_VPMD_RX_REARM_THRESH); @@ -119,15 +120,15 @@ virtio_rxq_rearm_vec(struct virtqueue *rxvq) *(uint64_t *)p = rxvq->mbuf_initializer; start_dp[i].addr = - (uint64_t)((uintptr_t)sw_ring[i]->buf_physaddr + - RTE_PKTMBUF_HEADROOM - rxvq->hw->vtnet_hdr_size); + MBUF_DATA_DMA_ADDR(sw_ring[i], vq->offset) - + vq->hw->vtnet_hdr_size; start_dp[i].len = sw_ring[i]->buf_len - - RTE_PKTMBUF_HEADROOM + rxvq->hw->vtnet_hdr_size; + RTE_PKTMBUF_HEADROOM + vq->hw->vtnet_hdr_size; } - rxvq->vq_avail_idx += RTE_VIRTIO_VPMD_RX_REARM_THRESH; - rxvq->vq_free_cnt -= RTE_VIRTIO_VPMD_RX_REARM_THRESH; - vq_update_avail_idx(rxvq); + vq->vq_avail_idx += RTE_VIRTIO_VPMD_RX_REARM_THRESH; + vq->vq_free_cnt -= RTE_VIRTIO_VPMD_RX_REARM_THRESH; + vq_update_avail_idx(vq); } /* virtio vPMD receive routine, only accept(nb_pkts >= RTE_VIRTIO_DESC_PER_LOOP) @@ -143,7 +144,8 @@ uint16_t virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) { - struct virtqueue *rxvq = rx_queue; + struct virtnet_rx *rxvq = rx_queue; + struct virtqueue *vq = rxvq->vq; uint16_t nb_used; uint16_t desc_idx; struct vring_used_elem *rused; @@ -175,15 +177,14 @@ virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, len_adjust = _mm_set_epi16( 0, 0, 0, - (uint16_t)-rxvq->hw->vtnet_hdr_size, - 0, (uint16_t)-rxvq->hw->vtnet_hdr_size, + (uint16_t)-vq->hw->vtnet_hdr_size, + 0, (uint16_t)-vq->hw->vtnet_hdr_size, 0, 0); if (unlikely(nb_pkts < RTE_VIRTIO_DESC_PER_LOOP)) return 0; - nb_used = *(volatile uint16_t *)&rxvq->vq_ring.used->idx - - rxvq->vq_used_cons_idx; + nb_used = VIRTQUEUE_NUSED(vq); rte_compiler_barrier(); @@ -193,17 +194,17 @@ virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_VIRTIO_DESC_PER_LOOP); nb_used = RTE_MIN(nb_used, nb_pkts); - desc_idx = (uint16_t)(rxvq->vq_used_cons_idx & (rxvq->vq_nentries - 1)); - rused = &rxvq->vq_ring.used->ring[desc_idx]; - sw_ring = &rxvq->sw_ring[desc_idx]; - sw_ring_end = &rxvq->sw_ring[rxvq->vq_nentries]; + desc_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1)); + rused = &vq->vq_ring.used->ring[desc_idx]; + sw_ring = &vq->sw_ring[desc_idx]; + sw_ring_end = &vq->sw_ring[vq->vq_nentries]; _mm_prefetch((const void *)rused, _MM_HINT_T0); - if (rxvq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) { + if (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) { virtio_rxq_rearm_vec(rxvq); - if (unlikely(virtqueue_kick_prepare(rxvq))) - virtqueue_notify(rxvq); + if (unlikely(virtqueue_kick_prepare(vq))) + virtqueue_notify(vq); } for (nb_pkts_received = 0; @@ -286,9 +287,9 @@ virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, } } - rxvq->vq_used_cons_idx += nb_pkts_received; - rxvq->vq_free_cnt += nb_pkts_received; - rxvq->packets += nb_pkts_received; + vq->vq_used_cons_idx += nb_pkts_received; + vq->vq_free_cnt += nb_pkts_received; + rxvq->stats.packets += nb_pkts_received; return nb_pkts_received; } @@ -342,31 +343,32 @@ uint16_t virtio_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) { - struct virtqueue *txvq = tx_queue; + struct virtnet_tx *txvq = tx_queue; + struct virtqueue *vq = txvq->vq; uint16_t nb_used; uint16_t desc_idx; struct vring_desc *start_dp; uint16_t nb_tail, nb_commit; int i; - uint16_t desc_idx_max = (txvq->vq_nentries >> 1) - 1; + uint16_t desc_idx_max = (vq->vq_nentries >> 1) - 1; - nb_used = VIRTQUEUE_NUSED(txvq); + nb_used = VIRTQUEUE_NUSED(vq); rte_compiler_barrier(); if (nb_used >= VIRTIO_TX_FREE_THRESH) - virtio_xmit_cleanup(tx_queue); + virtio_xmit_cleanup(vq); - nb_commit = nb_pkts = RTE_MIN((txvq->vq_free_cnt >> 1), nb_pkts); - desc_idx = (uint16_t) (txvq->vq_avail_idx & desc_idx_max); - start_dp = txvq->vq_ring.desc; + nb_commit = nb_pkts = RTE_MIN((vq->vq_free_cnt >> 1), nb_pkts); + desc_idx = (uint16_t)(vq->vq_avail_idx & desc_idx_max); + start_dp = vq->vq_ring.desc; nb_tail = (uint16_t) (desc_idx_max + 1 - desc_idx); if (nb_commit >= nb_tail) { for (i = 0; i < nb_tail; i++) - txvq->vq_descx[desc_idx + i].cookie = tx_pkts[i]; + vq->vq_descx[desc_idx + i].cookie = tx_pkts[i]; for (i = 0; i < nb_tail; i++) { start_dp[desc_idx].addr = - rte_mbuf_data_dma_addr(*tx_pkts); + MBUF_DATA_DMA_ADDR(*tx_pkts, vq->offset); start_dp[desc_idx].len = (*tx_pkts)->pkt_len; tx_pkts++; desc_idx++; @@ -375,9 +377,10 @@ virtio_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts, desc_idx = 0; } for (i = 0; i < nb_commit; i++) - txvq->vq_descx[desc_idx + i].cookie = tx_pkts[i]; + vq->vq_descx[desc_idx + i].cookie = tx_pkts[i]; for (i = 0; i < nb_commit; i++) { - start_dp[desc_idx].addr = rte_mbuf_data_dma_addr(*tx_pkts); + start_dp[desc_idx].addr = + MBUF_DATA_DMA_ADDR(*tx_pkts, vq->offset); start_dp[desc_idx].len = (*tx_pkts)->pkt_len; tx_pkts++; desc_idx++; @@ -385,21 +388,21 @@ virtio_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts, rte_compiler_barrier(); - txvq->vq_free_cnt -= (uint16_t)(nb_pkts << 1); - txvq->vq_avail_idx += nb_pkts; - txvq->vq_ring.avail->idx = txvq->vq_avail_idx; - txvq->packets += nb_pkts; + vq->vq_free_cnt -= (uint16_t)(nb_pkts << 1); + vq->vq_avail_idx += nb_pkts; + vq->vq_ring.avail->idx = vq->vq_avail_idx; + txvq->stats.packets += nb_pkts; if (likely(nb_pkts)) { - if (unlikely(virtqueue_kick_prepare(txvq))) - virtqueue_notify(txvq); + if (unlikely(virtqueue_kick_prepare(vq))) + virtqueue_notify(vq); } return nb_pkts; } int __attribute__((cold)) -virtio_rxq_vec_setup(struct virtqueue *rxq) +virtio_rxq_vec_setup(struct virtnet_rx *rxq) { uintptr_t p; struct rte_mbuf mb_def = { .buf_addr = 0 }; /* zeroed mbuf */ diff --git a/drivers/net/virtio/virtio_user/vhost.h b/drivers/net/virtio/virtio_user/vhost.h new file mode 100644 index 00000000..7adb55f5 --- /dev/null +++ b/drivers/net/virtio/virtio_user/vhost.h @@ -0,0 +1,146 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _VHOST_NET_USER_H +#define _VHOST_NET_USER_H + +#include <stdint.h> +#include <linux/types.h> +#include <linux/ioctl.h> + +#include "../virtio_pci.h" +#include "../virtio_logs.h" +#include "../virtqueue.h" + +#define VHOST_MEMORY_MAX_NREGIONS 8 + +struct vhost_vring_state { + unsigned int index; + unsigned int num; +}; + +struct vhost_vring_file { + unsigned int index; + int fd; +}; + +struct vhost_vring_addr { + unsigned int index; + /* Option flags. */ + unsigned int flags; + /* Flag values: */ + /* Whether log address is valid. If set enables logging. */ +#define VHOST_VRING_F_LOG 0 + + /* Start of array of descriptors (virtually contiguous) */ + uint64_t desc_user_addr; + /* Used structure address. Must be 32 bit aligned */ + uint64_t used_user_addr; + /* Available structure address. Must be 16 bit aligned */ + uint64_t avail_user_addr; + /* Logging support. */ + /* Log writes to used structure, at offset calculated from specified + * address. Address must be 32 bit aligned. + */ + uint64_t log_guest_addr; +}; + +enum vhost_user_request { + VHOST_USER_NONE = 0, + VHOST_USER_GET_FEATURES = 1, + VHOST_USER_SET_FEATURES = 2, + VHOST_USER_SET_OWNER = 3, + VHOST_USER_RESET_OWNER = 4, + VHOST_USER_SET_MEM_TABLE = 5, + VHOST_USER_SET_LOG_BASE = 6, + VHOST_USER_SET_LOG_FD = 7, + VHOST_USER_SET_VRING_NUM = 8, + VHOST_USER_SET_VRING_ADDR = 9, + VHOST_USER_SET_VRING_BASE = 10, + VHOST_USER_GET_VRING_BASE = 11, + VHOST_USER_SET_VRING_KICK = 12, + VHOST_USER_SET_VRING_CALL = 13, + VHOST_USER_SET_VRING_ERR = 14, + VHOST_USER_GET_PROTOCOL_FEATURES = 15, + VHOST_USER_SET_PROTOCOL_FEATURES = 16, + VHOST_USER_GET_QUEUE_NUM = 17, + VHOST_USER_SET_VRING_ENABLE = 18, + VHOST_USER_MAX +}; + +struct vhost_memory_region { + uint64_t guest_phys_addr; + uint64_t memory_size; /* bytes */ + uint64_t userspace_addr; + uint64_t mmap_offset; +}; + +struct vhost_memory { + uint32_t nregions; + uint32_t padding; + struct vhost_memory_region regions[VHOST_MEMORY_MAX_NREGIONS]; +}; + +struct vhost_user_msg { + enum vhost_user_request request; + +#define VHOST_USER_VERSION_MASK 0x3 +#define VHOST_USER_REPLY_MASK (0x1 << 2) + uint32_t flags; + uint32_t size; /* the following payload size */ + union { +#define VHOST_USER_VRING_IDX_MASK 0xff +#define VHOST_USER_VRING_NOFD_MASK (0x1 << 8) + uint64_t u64; + struct vhost_vring_state state; + struct vhost_vring_addr addr; + struct vhost_memory memory; + } payload; + int fds[VHOST_MEMORY_MAX_NREGIONS]; +} __attribute((packed)); + +#define VHOST_USER_HDR_SIZE offsetof(struct vhost_user_msg, payload.u64) +#define VHOST_USER_PAYLOAD_SIZE \ + (sizeof(struct vhost_user_msg) - VHOST_USER_HDR_SIZE) + +/* The version of the protocol we support */ +#define VHOST_USER_VERSION 0x1 + +#define VHOST_USER_F_PROTOCOL_FEATURES 30 +#define VHOST_USER_MQ (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) + +int vhost_user_sock(int vhostfd, enum vhost_user_request req, void *arg); +int vhost_user_setup(const char *path); +int vhost_user_enable_queue_pair(int vhostfd, uint16_t pair_idx, int enable); + +#endif diff --git a/drivers/net/virtio/virtio_user/vhost_user.c b/drivers/net/virtio/virtio_user/vhost_user.c new file mode 100644 index 00000000..a2b0687f --- /dev/null +++ b/drivers/net/virtio/virtio_user/vhost_user.c @@ -0,0 +1,426 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/socket.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/un.h> +#include <string.h> +#include <errno.h> + +#include "vhost.h" + +static int +vhost_user_write(int fd, void *buf, int len, int *fds, int fd_num) +{ + int r; + struct msghdr msgh; + struct iovec iov; + size_t fd_size = fd_num * sizeof(int); + char control[CMSG_SPACE(fd_size)]; + struct cmsghdr *cmsg; + + memset(&msgh, 0, sizeof(msgh)); + memset(control, 0, sizeof(control)); + + iov.iov_base = (uint8_t *)buf; + iov.iov_len = len; + + msgh.msg_iov = &iov; + msgh.msg_iovlen = 1; + msgh.msg_control = control; + msgh.msg_controllen = sizeof(control); + + cmsg = CMSG_FIRSTHDR(&msgh); + cmsg->cmsg_len = CMSG_LEN(fd_size); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + memcpy(CMSG_DATA(cmsg), fds, fd_size); + + do { + r = sendmsg(fd, &msgh, 0); + } while (r < 0 && errno == EINTR); + + return r; +} + +static int +vhost_user_read(int fd, struct vhost_user_msg *msg) +{ + uint32_t valid_flags = VHOST_USER_REPLY_MASK | VHOST_USER_VERSION; + int ret, sz_hdr = VHOST_USER_HDR_SIZE, sz_payload; + + ret = recv(fd, (void *)msg, sz_hdr, 0); + if (ret < sz_hdr) { + PMD_DRV_LOG(ERR, "Failed to recv msg hdr: %d instead of %d.", + ret, sz_hdr); + goto fail; + } + + /* validate msg flags */ + if (msg->flags != (valid_flags)) { + PMD_DRV_LOG(ERR, "Failed to recv msg: flags %x instead of %x.", + msg->flags, valid_flags); + goto fail; + } + + sz_payload = msg->size; + if (sz_payload) { + ret = recv(fd, (void *)((char *)msg + sz_hdr), sz_payload, 0); + if (ret < sz_payload) { + PMD_DRV_LOG(ERR, + "Failed to recv msg payload: %d instead of %d.", + ret, msg->size); + goto fail; + } + } + + return 0; + +fail: + return -1; +} + +struct hugepage_file_info { + uint64_t addr; /**< virtual addr */ + size_t size; /**< the file size */ + char path[PATH_MAX]; /**< path to backing file */ +}; + +/* Two possible options: + * 1. Match HUGEPAGE_INFO_FMT to find the file storing struct hugepage_file + * array. This is simple but cannot be used in secondary process because + * secondary process will close and munmap that file. + * 2. Match HUGEFILE_FMT to find hugepage files directly. + * + * We choose option 2. + */ +static int +get_hugepage_file_info(struct hugepage_file_info huges[], int max) +{ + int idx; + FILE *f; + char buf[BUFSIZ], *tmp, *tail; + char *str_underline, *str_start; + int huge_index; + uint64_t v_start, v_end; + + f = fopen("/proc/self/maps", "r"); + if (!f) { + PMD_DRV_LOG(ERR, "cannot open /proc/self/maps"); + return -1; + } + + idx = 0; + while (fgets(buf, sizeof(buf), f) != NULL) { + if (sscanf(buf, "%" PRIx64 "-%" PRIx64, &v_start, &v_end) < 2) { + PMD_DRV_LOG(ERR, "Failed to parse address"); + goto error; + } + + tmp = strchr(buf, ' ') + 1; /** skip address */ + tmp = strchr(tmp, ' ') + 1; /** skip perm */ + tmp = strchr(tmp, ' ') + 1; /** skip offset */ + tmp = strchr(tmp, ' ') + 1; /** skip dev */ + tmp = strchr(tmp, ' ') + 1; /** skip inode */ + while (*tmp == ' ') /** skip spaces */ + tmp++; + tail = strrchr(tmp, '\n'); /** remove newline if exists */ + if (tail) + *tail = '\0'; + + /* Match HUGEFILE_FMT, aka "%s/%smap_%d", + * which is defined in eal_filesystem.h + */ + str_underline = strrchr(tmp, '_'); + if (!str_underline) + continue; + + str_start = str_underline - strlen("map"); + if (str_start < tmp) + continue; + + if (sscanf(str_start, "map_%d", &huge_index) != 1) + continue; + + if (idx >= max) { + PMD_DRV_LOG(ERR, "Exceed maximum of %d", max); + goto error; + } + huges[idx].addr = v_start; + huges[idx].size = v_end - v_start; + strcpy(huges[idx].path, tmp); + idx++; + } + + fclose(f); + return idx; + +error: + fclose(f); + return -1; +} + +static int +prepare_vhost_memory_user(struct vhost_user_msg *msg, int fds[]) +{ + int i, num; + struct hugepage_file_info huges[VHOST_MEMORY_MAX_NREGIONS]; + struct vhost_memory_region *mr; + + num = get_hugepage_file_info(huges, VHOST_MEMORY_MAX_NREGIONS); + if (num < 0) { + PMD_INIT_LOG(ERR, "Failed to prepare memory for vhost-user"); + return -1; + } + + for (i = 0; i < num; ++i) { + mr = &msg->payload.memory.regions[i]; + mr->guest_phys_addr = huges[i].addr; /* use vaddr! */ + mr->userspace_addr = huges[i].addr; + mr->memory_size = huges[i].size; + mr->mmap_offset = 0; + fds[i] = open(huges[i].path, O_RDWR); + } + + msg->payload.memory.nregions = num; + msg->payload.memory.padding = 0; + + return 0; +} + +static struct vhost_user_msg m; + +static const char * const vhost_msg_strings[] = { + [VHOST_USER_SET_OWNER] = "VHOST_USER_SET_OWNER", + [VHOST_USER_RESET_OWNER] = "VHOST_USER_RESET_OWNER", + [VHOST_USER_SET_FEATURES] = "VHOST_USER_SET_FEATURES", + [VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES", + [VHOST_USER_SET_VRING_CALL] = "VHOST_USER_SET_VRING_CALL", + [VHOST_USER_SET_VRING_NUM] = "VHOST_USER_SET_VRING_NUM", + [VHOST_USER_SET_VRING_BASE] = "VHOST_USER_SET_VRING_BASE", + [VHOST_USER_GET_VRING_BASE] = "VHOST_USER_GET_VRING_BASE", + [VHOST_USER_SET_VRING_ADDR] = "VHOST_USER_SET_VRING_ADDR", + [VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK", + [VHOST_USER_SET_MEM_TABLE] = "VHOST_USER_SET_MEM_TABLE", + [VHOST_USER_SET_VRING_ENABLE] = "VHOST_USER_SET_VRING_ENABLE", + NULL, +}; + +int +vhost_user_sock(int vhostfd, enum vhost_user_request req, void *arg) +{ + struct vhost_user_msg msg; + struct vhost_vring_file *file = 0; + int need_reply = 0; + int fds[VHOST_MEMORY_MAX_NREGIONS]; + int fd_num = 0; + int i, len; + + RTE_SET_USED(m); + RTE_SET_USED(vhost_msg_strings); + + PMD_DRV_LOG(INFO, "%s", vhost_msg_strings[req]); + + msg.request = req; + msg.flags = VHOST_USER_VERSION; + msg.size = 0; + + switch (req) { + case VHOST_USER_GET_FEATURES: + need_reply = 1; + break; + + case VHOST_USER_SET_FEATURES: + case VHOST_USER_SET_LOG_BASE: + msg.payload.u64 = *((__u64 *)arg); + msg.size = sizeof(m.payload.u64); + break; + + case VHOST_USER_SET_OWNER: + case VHOST_USER_RESET_OWNER: + break; + + case VHOST_USER_SET_MEM_TABLE: + if (prepare_vhost_memory_user(&msg, fds) < 0) + return -1; + fd_num = msg.payload.memory.nregions; + msg.size = sizeof(m.payload.memory.nregions); + msg.size += sizeof(m.payload.memory.padding); + msg.size += fd_num * sizeof(struct vhost_memory_region); + break; + + case VHOST_USER_SET_LOG_FD: + fds[fd_num++] = *((int *)arg); + break; + + case VHOST_USER_SET_VRING_NUM: + case VHOST_USER_SET_VRING_BASE: + case VHOST_USER_SET_VRING_ENABLE: + memcpy(&msg.payload.state, arg, sizeof(msg.payload.state)); + msg.size = sizeof(m.payload.state); + break; + + case VHOST_USER_GET_VRING_BASE: + memcpy(&msg.payload.state, arg, sizeof(msg.payload.state)); + msg.size = sizeof(m.payload.state); + need_reply = 1; + break; + + case VHOST_USER_SET_VRING_ADDR: + memcpy(&msg.payload.addr, arg, sizeof(msg.payload.addr)); + msg.size = sizeof(m.payload.addr); + break; + + case VHOST_USER_SET_VRING_KICK: + case VHOST_USER_SET_VRING_CALL: + case VHOST_USER_SET_VRING_ERR: + file = arg; + msg.payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK; + msg.size = sizeof(m.payload.u64); + if (file->fd > 0) + fds[fd_num++] = file->fd; + else + msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK; + break; + + default: + PMD_DRV_LOG(ERR, "trying to send unhandled msg type"); + return -1; + } + + len = VHOST_USER_HDR_SIZE + msg.size; + if (vhost_user_write(vhostfd, &msg, len, fds, fd_num) < 0) { + PMD_DRV_LOG(ERR, "%s failed: %s", + vhost_msg_strings[req], strerror(errno)); + return -1; + } + + if (req == VHOST_USER_SET_MEM_TABLE) + for (i = 0; i < fd_num; ++i) + close(fds[i]); + + if (need_reply) { + if (vhost_user_read(vhostfd, &msg) < 0) { + PMD_DRV_LOG(ERR, "Received msg failed: %s", + strerror(errno)); + return -1; + } + + if (req != msg.request) { + PMD_DRV_LOG(ERR, "Received unexpected msg type"); + return -1; + } + + switch (req) { + case VHOST_USER_GET_FEATURES: + if (msg.size != sizeof(m.payload.u64)) { + PMD_DRV_LOG(ERR, "Received bad msg size"); + return -1; + } + *((__u64 *)arg) = msg.payload.u64; + break; + case VHOST_USER_GET_VRING_BASE: + if (msg.size != sizeof(m.payload.state)) { + PMD_DRV_LOG(ERR, "Received bad msg size"); + return -1; + } + memcpy(arg, &msg.payload.state, + sizeof(struct vhost_vring_state)); + break; + default: + PMD_DRV_LOG(ERR, "Received unexpected msg type"); + return -1; + } + } + + return 0; +} + +/** + * Set up environment to talk with a vhost user backend. + * @param path + * - The path to vhost user unix socket file. + * + * @return + * - (-1) if fail to set up; + * - (>=0) if successful, and it is the fd to vhostfd. + */ +int +vhost_user_setup(const char *path) +{ + int fd; + int flag; + struct sockaddr_un un; + + fd = socket(AF_UNIX, SOCK_STREAM, 0); + if (fd < 0) { + PMD_DRV_LOG(ERR, "socket() error, %s", strerror(errno)); + return -1; + } + + flag = fcntl(fd, F_GETFD); + fcntl(fd, F_SETFD, flag | FD_CLOEXEC); + + memset(&un, 0, sizeof(un)); + un.sun_family = AF_UNIX; + snprintf(un.sun_path, sizeof(un.sun_path), "%s", path); + if (connect(fd, (struct sockaddr *)&un, sizeof(un)) < 0) { + PMD_DRV_LOG(ERR, "connect error, %s", strerror(errno)); + close(fd); + return -1; + } + + return fd; +} + +int +vhost_user_enable_queue_pair(int vhostfd, uint16_t pair_idx, int enable) +{ + int i; + + for (i = 0; i < 2; ++i) { + struct vhost_vring_state state = { + .index = pair_idx * 2 + i, + .num = enable, + }; + + if (vhost_user_sock(vhostfd, + VHOST_USER_SET_VRING_ENABLE, &state)) + return -1; + } + + return 0; +} diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c b/drivers/net/virtio/virtio_user/virtio_user_dev.c new file mode 100644 index 00000000..3d12a320 --- /dev/null +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c @@ -0,0 +1,333 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdint.h> +#include <stdio.h> +#include <fcntl.h> +#include <string.h> +#include <errno.h> +#include <sys/mman.h> +#include <unistd.h> +#include <sys/eventfd.h> + +#include "vhost.h" +#include "virtio_user_dev.h" +#include "../virtio_ethdev.h" + +static int +virtio_user_kick_queue(struct virtio_user_dev *dev, uint32_t queue_sel) +{ + int callfd, kickfd; + struct vhost_vring_file file; + struct vhost_vring_state state; + struct vring *vring = &dev->vrings[queue_sel]; + struct vhost_vring_addr addr = { + .index = queue_sel, + .desc_user_addr = (uint64_t)(uintptr_t)vring->desc, + .avail_user_addr = (uint64_t)(uintptr_t)vring->avail, + .used_user_addr = (uint64_t)(uintptr_t)vring->used, + .log_guest_addr = 0, + .flags = 0, /* disable log */ + }; + + /* May use invalid flag, but some backend leverages kickfd and callfd as + * criteria to judge if dev is alive. so finally we use real event_fd. + */ + callfd = eventfd(0, O_CLOEXEC | O_NONBLOCK); + if (callfd < 0) { + PMD_DRV_LOG(ERR, "callfd error, %s\n", strerror(errno)); + return -1; + } + kickfd = eventfd(0, O_CLOEXEC | O_NONBLOCK); + if (kickfd < 0) { + close(callfd); + PMD_DRV_LOG(ERR, "kickfd error, %s\n", strerror(errno)); + return -1; + } + + /* Of all per virtqueue MSGs, make sure VHOST_SET_VRING_CALL come + * firstly because vhost depends on this msg to allocate virtqueue + * pair. + */ + file.index = queue_sel; + file.fd = callfd; + vhost_user_sock(dev->vhostfd, VHOST_USER_SET_VRING_CALL, &file); + dev->callfds[queue_sel] = callfd; + + state.index = queue_sel; + state.num = vring->num; + vhost_user_sock(dev->vhostfd, VHOST_USER_SET_VRING_NUM, &state); + + state.num = 0; /* no reservation */ + vhost_user_sock(dev->vhostfd, VHOST_USER_SET_VRING_BASE, &state); + + vhost_user_sock(dev->vhostfd, VHOST_USER_SET_VRING_ADDR, &addr); + + /* Of all per virtqueue MSGs, make sure VHOST_USER_SET_VRING_KICK comes + * lastly because vhost depends on this msg to judge if + * virtio is ready. + */ + file.fd = kickfd; + vhost_user_sock(dev->vhostfd, VHOST_USER_SET_VRING_KICK, &file); + dev->kickfds[queue_sel] = kickfd; + + return 0; +} + +int +virtio_user_start_device(struct virtio_user_dev *dev) +{ + uint64_t features; + uint32_t i, queue_sel; + int ret; + + /* construct memory region inside each implementation */ + ret = vhost_user_sock(dev->vhostfd, VHOST_USER_SET_MEM_TABLE, NULL); + if (ret < 0) + goto error; + + for (i = 0; i < dev->max_queue_pairs; ++i) { + queue_sel = 2 * i + VTNET_SQ_RQ_QUEUE_IDX; + if (virtio_user_kick_queue(dev, queue_sel) < 0) { + PMD_DRV_LOG(INFO, "kick rx vq fails: %u", i); + goto error; + } + } + for (i = 0; i < dev->max_queue_pairs; ++i) { + queue_sel = 2 * i + VTNET_SQ_TQ_QUEUE_IDX; + if (virtio_user_kick_queue(dev, queue_sel) < 0) { + PMD_DRV_LOG(INFO, "kick tx vq fails: %u", i); + goto error; + } + } + + /* After setup all virtqueues, we need to set_features so that these + * features can be set into each virtqueue in vhost side. And before + * that, make sure VHOST_USER_F_PROTOCOL_FEATURES is added if mq is + * enabled, and VIRTIO_NET_F_MAC is stripped. + */ + features = dev->features; + if (dev->max_queue_pairs > 1) + features |= VHOST_USER_MQ; + features &= ~(1ull << VIRTIO_NET_F_MAC); + ret = vhost_user_sock(dev->vhostfd, VHOST_USER_SET_FEATURES, &features); + if (ret < 0) + goto error; + PMD_DRV_LOG(INFO, "set features: %" PRIx64, features); + + return 0; +error: + /* TODO: free resource here or caller to check */ + return -1; +} + +int virtio_user_stop_device(struct virtio_user_dev *dev) +{ + return vhost_user_sock(dev->vhostfd, VHOST_USER_RESET_OWNER, NULL); +} + +static inline void +parse_mac(struct virtio_user_dev *dev, const char *mac) +{ + int i, r; + uint32_t tmp[ETHER_ADDR_LEN]; + + if (!mac) + return; + + r = sscanf(mac, "%x:%x:%x:%x:%x:%x", &tmp[0], + &tmp[1], &tmp[2], &tmp[3], &tmp[4], &tmp[5]); + if (r == ETHER_ADDR_LEN) { + for (i = 0; i < ETHER_ADDR_LEN; ++i) + dev->mac_addr[i] = (uint8_t)tmp[i]; + dev->mac_specified = 1; + } else { + /* ignore the wrong mac, use random mac */ + PMD_DRV_LOG(ERR, "wrong format of mac: %s", mac); + } +} + +int +virtio_user_dev_init(struct virtio_user_dev *dev, char *path, int queues, + int cq, int queue_size, const char *mac) +{ + strncpy(dev->path, path, PATH_MAX); + dev->max_queue_pairs = queues; + dev->queue_pairs = 1; /* mq disabled by default */ + dev->queue_size = queue_size; + dev->mac_specified = 0; + parse_mac(dev, mac); + dev->vhostfd = -1; + + dev->vhostfd = vhost_user_setup(dev->path); + if (dev->vhostfd < 0) { + PMD_INIT_LOG(ERR, "backend set up fails"); + return -1; + } + if (vhost_user_sock(dev->vhostfd, VHOST_USER_SET_OWNER, NULL) < 0) { + PMD_INIT_LOG(ERR, "set_owner fails: %s", strerror(errno)); + return -1; + } + + if (vhost_user_sock(dev->vhostfd, VHOST_USER_GET_FEATURES, + &dev->features) < 0) { + PMD_INIT_LOG(ERR, "get_features failed: %s", strerror(errno)); + return -1; + } + if (dev->mac_specified) + dev->features |= (1ull << VIRTIO_NET_F_MAC); + + if (!cq) { + dev->features &= ~(1ull << VIRTIO_NET_F_CTRL_VQ); + /* Also disable features depends on VIRTIO_NET_F_CTRL_VQ */ + dev->features &= ~(1ull << VIRTIO_NET_F_CTRL_RX); + dev->features &= ~(1ull << VIRTIO_NET_F_CTRL_VLAN); + dev->features &= ~(1ull << VIRTIO_NET_F_GUEST_ANNOUNCE); + dev->features &= ~(1ull << VIRTIO_NET_F_MQ); + dev->features &= ~(1ull << VIRTIO_NET_F_CTRL_MAC_ADDR); + } else { + /* vhost user backend does not need to know ctrl-q, so + * actually we need add this bit into features. However, + * DPDK vhost-user does send features with this bit, so we + * check it instead of OR it for now. + */ + if (!(dev->features & (1ull << VIRTIO_NET_F_CTRL_VQ))) + PMD_INIT_LOG(INFO, "vhost does not support ctrl-q"); + } + + if (dev->max_queue_pairs > 1) { + if (!(dev->features & VHOST_USER_MQ)) { + PMD_INIT_LOG(ERR, "MQ not supported by the backend"); + return -1; + } + } + + return 0; +} + +void +virtio_user_dev_uninit(struct virtio_user_dev *dev) +{ + uint32_t i; + + for (i = 0; i < dev->max_queue_pairs * 2; ++i) { + close(dev->callfds[i]); + close(dev->kickfds[i]); + } + + close(dev->vhostfd); +} + +static uint8_t +virtio_user_handle_mq(struct virtio_user_dev *dev, uint16_t q_pairs) +{ + uint16_t i; + uint8_t ret = 0; + + if (q_pairs > dev->max_queue_pairs) { + PMD_INIT_LOG(ERR, "multi-q config %u, but only %u supported", + q_pairs, dev->max_queue_pairs); + return -1; + } + + for (i = 0; i < q_pairs; ++i) + ret |= vhost_user_enable_queue_pair(dev->vhostfd, i, 1); + for (i = q_pairs; i < dev->max_queue_pairs; ++i) + ret |= vhost_user_enable_queue_pair(dev->vhostfd, i, 0); + + dev->queue_pairs = q_pairs; + + return ret; +} + +static uint32_t +virtio_user_handle_ctrl_msg(struct virtio_user_dev *dev, struct vring *vring, + uint16_t idx_hdr) +{ + struct virtio_net_ctrl_hdr *hdr; + virtio_net_ctrl_ack status = ~0; + uint16_t i, idx_data, idx_status; + uint32_t n_descs = 0; + + /* locate desc for header, data, and status */ + idx_data = vring->desc[idx_hdr].next; + n_descs++; + + i = idx_data; + while (vring->desc[i].flags == VRING_DESC_F_NEXT) { + i = vring->desc[i].next; + n_descs++; + } + + /* locate desc for status */ + idx_status = i; + n_descs++; + + hdr = (void *)(uintptr_t)vring->desc[idx_hdr].addr; + if (hdr->class == VIRTIO_NET_CTRL_MQ && + hdr->cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) { + uint16_t queues; + + queues = *(uint16_t *)(uintptr_t)vring->desc[idx_data].addr; + status = virtio_user_handle_mq(dev, queues); + } + + /* Update status */ + *(virtio_net_ctrl_ack *)(uintptr_t)vring->desc[idx_status].addr = status; + + return n_descs; +} + +void +virtio_user_handle_cq(struct virtio_user_dev *dev, uint16_t queue_idx) +{ + uint16_t avail_idx, desc_idx; + struct vring_used_elem *uep; + uint32_t n_descs; + struct vring *vring = &dev->vrings[queue_idx]; + + /* Consume avail ring, using used ring idx as first one */ + while (vring->used->idx != vring->avail->idx) { + avail_idx = (vring->used->idx) & (vring->num - 1); + desc_idx = vring->avail->ring[avail_idx]; + + n_descs = virtio_user_handle_ctrl_msg(dev, vring, desc_idx); + + /* Update used ring */ + uep = &vring->used->ring[avail_idx]; + uep->id = avail_idx; + uep->len = n_descs; + + vring->used->idx++; + } +} diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.h b/drivers/net/virtio/virtio_user/virtio_user_dev.h new file mode 100644 index 00000000..33690b5c --- /dev/null +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.h @@ -0,0 +1,62 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _VIRTIO_USER_DEV_H +#define _VIRTIO_USER_DEV_H + +#include <limits.h> +#include "../virtio_pci.h" +#include "../virtio_ring.h" + +struct virtio_user_dev { + int vhostfd; + int callfds[VIRTIO_MAX_VIRTQUEUES * 2 + 1]; + int kickfds[VIRTIO_MAX_VIRTQUEUES * 2 + 1]; + int mac_specified; + uint32_t max_queue_pairs; + uint32_t queue_pairs; + uint32_t queue_size; + uint64_t features; + uint8_t status; + uint8_t mac_addr[ETHER_ADDR_LEN]; + char path[PATH_MAX]; + struct vring vrings[VIRTIO_MAX_VIRTQUEUES * 2 + 1]; +}; + +int virtio_user_start_device(struct virtio_user_dev *dev); +int virtio_user_stop_device(struct virtio_user_dev *dev); +int virtio_user_dev_init(struct virtio_user_dev *dev, char *path, int queues, + int cq, int queue_size, const char *mac); +void virtio_user_dev_uninit(struct virtio_user_dev *dev); +void virtio_user_handle_cq(struct virtio_user_dev *dev, uint16_t queue_idx); +#endif diff --git a/drivers/net/virtio/virtio_user_ethdev.c b/drivers/net/virtio/virtio_user_ethdev.c new file mode 100644 index 00000000..5ab24711 --- /dev/null +++ b/drivers/net/virtio/virtio_user_ethdev.c @@ -0,0 +1,440 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdint.h> +#include <sys/types.h> +#include <unistd.h> + +#include <rte_malloc.h> +#include <rte_kvargs.h> + +#include "virtio_ethdev.h" +#include "virtio_logs.h" +#include "virtio_pci.h" +#include "virtqueue.h" +#include "virtio_rxtx.h" +#include "virtio_user/virtio_user_dev.h" + +#define virtio_user_get_dev(hw) \ + ((struct virtio_user_dev *)(hw)->virtio_user_dev) + +static void +virtio_user_read_dev_config(struct virtio_hw *hw, size_t offset, + void *dst, int length) +{ + int i; + struct virtio_user_dev *dev = virtio_user_get_dev(hw); + + if (offset == offsetof(struct virtio_net_config, mac) && + length == ETHER_ADDR_LEN) { + for (i = 0; i < ETHER_ADDR_LEN; ++i) + ((uint8_t *)dst)[i] = dev->mac_addr[i]; + return; + } + + if (offset == offsetof(struct virtio_net_config, status)) + *(uint16_t *)dst = dev->status; + + if (offset == offsetof(struct virtio_net_config, max_virtqueue_pairs)) + *(uint16_t *)dst = dev->max_queue_pairs; +} + +static void +virtio_user_write_dev_config(struct virtio_hw *hw, size_t offset, + const void *src, int length) +{ + int i; + struct virtio_user_dev *dev = virtio_user_get_dev(hw); + + if ((offset == offsetof(struct virtio_net_config, mac)) && + (length == ETHER_ADDR_LEN)) + for (i = 0; i < ETHER_ADDR_LEN; ++i) + dev->mac_addr[i] = ((const uint8_t *)src)[i]; + else + PMD_DRV_LOG(ERR, "not supported offset=%zu, len=%d\n", + offset, length); +} + +static void +virtio_user_set_status(struct virtio_hw *hw, uint8_t status) +{ + struct virtio_user_dev *dev = virtio_user_get_dev(hw); + + if (status & VIRTIO_CONFIG_STATUS_DRIVER_OK) + virtio_user_start_device(dev); + dev->status = status; +} + +static void +virtio_user_reset(struct virtio_hw *hw) +{ + struct virtio_user_dev *dev = virtio_user_get_dev(hw); + + virtio_user_stop_device(dev); +} + +static uint8_t +virtio_user_get_status(struct virtio_hw *hw) +{ + struct virtio_user_dev *dev = virtio_user_get_dev(hw); + + return dev->status; +} + +static uint64_t +virtio_user_get_features(struct virtio_hw *hw) +{ + struct virtio_user_dev *dev = virtio_user_get_dev(hw); + + return dev->features; +} + +static void +virtio_user_set_features(struct virtio_hw *hw, uint64_t features) +{ + struct virtio_user_dev *dev = virtio_user_get_dev(hw); + + dev->features = features; +} + +static uint8_t +virtio_user_get_isr(struct virtio_hw *hw __rte_unused) +{ + /* When config interrupt happens, driver calls this function to query + * what kinds of change happen. Interrupt mode not supported for now. + */ + return 0; +} + +static uint16_t +virtio_user_set_config_irq(struct virtio_hw *hw __rte_unused, + uint16_t vec __rte_unused) +{ + return VIRTIO_MSI_NO_VECTOR; +} + +/* This function is to get the queue size, aka, number of descs, of a specified + * queue. Different with the VHOST_USER_GET_QUEUE_NUM, which is used to get the + * max supported queues. + */ +static uint16_t +virtio_user_get_queue_num(struct virtio_hw *hw, uint16_t queue_id __rte_unused) +{ + struct virtio_user_dev *dev = virtio_user_get_dev(hw); + + /* Currently, each queue has same queue size */ + return dev->queue_size; +} + +static int +virtio_user_setup_queue(struct virtio_hw *hw, struct virtqueue *vq) +{ + struct virtio_user_dev *dev = virtio_user_get_dev(hw); + uint16_t queue_idx = vq->vq_queue_index; + uint64_t desc_addr, avail_addr, used_addr; + + desc_addr = (uintptr_t)vq->vq_ring_virt_mem; + avail_addr = desc_addr + vq->vq_nentries * sizeof(struct vring_desc); + used_addr = RTE_ALIGN_CEIL(avail_addr + offsetof(struct vring_avail, + ring[vq->vq_nentries]), + VIRTIO_PCI_VRING_ALIGN); + + dev->vrings[queue_idx].num = vq->vq_nentries; + dev->vrings[queue_idx].desc = (void *)(uintptr_t)desc_addr; + dev->vrings[queue_idx].avail = (void *)(uintptr_t)avail_addr; + dev->vrings[queue_idx].used = (void *)(uintptr_t)used_addr; + + return 0; +} + +static void +virtio_user_del_queue(struct virtio_hw *hw, struct virtqueue *vq) +{ + /* For legacy devices, write 0 to VIRTIO_PCI_QUEUE_PFN port, QEMU + * correspondingly stops the ioeventfds, and reset the status of + * the device. + * For modern devices, set queue desc, avail, used in PCI bar to 0, + * not see any more behavior in QEMU. + * + * Here we just care about what information to deliver to vhost-user + * or vhost-kernel. So we just close ioeventfd for now. + */ + struct virtio_user_dev *dev = virtio_user_get_dev(hw); + + close(dev->callfds[vq->vq_queue_index]); + close(dev->kickfds[vq->vq_queue_index]); +} + +static void +virtio_user_notify_queue(struct virtio_hw *hw, struct virtqueue *vq) +{ + uint64_t buf = 1; + struct virtio_user_dev *dev = virtio_user_get_dev(hw); + + if (hw->cvq && (hw->cvq->vq == vq)) { + virtio_user_handle_cq(dev, vq->vq_queue_index); + return; + } + + if (write(dev->kickfds[vq->vq_queue_index], &buf, sizeof(buf)) < 0) + PMD_DRV_LOG(ERR, "failed to kick backend: %s\n", + strerror(errno)); +} + +static const struct virtio_pci_ops virtio_user_ops = { + .read_dev_cfg = virtio_user_read_dev_config, + .write_dev_cfg = virtio_user_write_dev_config, + .reset = virtio_user_reset, + .get_status = virtio_user_get_status, + .set_status = virtio_user_set_status, + .get_features = virtio_user_get_features, + .set_features = virtio_user_set_features, + .get_isr = virtio_user_get_isr, + .set_config_irq = virtio_user_set_config_irq, + .get_queue_num = virtio_user_get_queue_num, + .setup_queue = virtio_user_setup_queue, + .del_queue = virtio_user_del_queue, + .notify_queue = virtio_user_notify_queue, +}; + +static const char *valid_args[] = { +#define VIRTIO_USER_ARG_QUEUES_NUM "queues" + VIRTIO_USER_ARG_QUEUES_NUM, +#define VIRTIO_USER_ARG_CQ_NUM "cq" + VIRTIO_USER_ARG_CQ_NUM, +#define VIRTIO_USER_ARG_MAC "mac" + VIRTIO_USER_ARG_MAC, +#define VIRTIO_USER_ARG_PATH "path" + VIRTIO_USER_ARG_PATH, +#define VIRTIO_USER_ARG_QUEUE_SIZE "queue_size" + VIRTIO_USER_ARG_QUEUE_SIZE, + NULL +}; + +#define VIRTIO_USER_DEF_CQ_EN 0 +#define VIRTIO_USER_DEF_Q_NUM 1 +#define VIRTIO_USER_DEF_Q_SZ 256 + +static int +get_string_arg(const char *key __rte_unused, + const char *value, void *extra_args) +{ + if (!value || !extra_args) + return -EINVAL; + + *(char **)extra_args = strdup(value); + + return 0; +} + +static int +get_integer_arg(const char *key __rte_unused, + const char *value, void *extra_args) +{ + if (!value || !extra_args) + return -EINVAL; + + *(uint64_t *)extra_args = strtoull(value, NULL, 0); + + return 0; +} + +static struct rte_eth_dev * +virtio_user_eth_dev_alloc(const char *name) +{ + struct rte_eth_dev *eth_dev; + struct rte_eth_dev_data *data; + struct virtio_hw *hw; + struct virtio_user_dev *dev; + + eth_dev = rte_eth_dev_allocate(name, RTE_ETH_DEV_VIRTUAL); + if (!eth_dev) { + PMD_INIT_LOG(ERR, "cannot alloc rte_eth_dev"); + return NULL; + } + + data = eth_dev->data; + + hw = rte_zmalloc(NULL, sizeof(*hw), 0); + if (!hw) { + PMD_INIT_LOG(ERR, "malloc virtio_hw failed"); + rte_eth_dev_release_port(eth_dev); + return NULL; + } + + dev = rte_zmalloc(NULL, sizeof(*dev), 0); + if (!dev) { + PMD_INIT_LOG(ERR, "malloc virtio_user_dev failed"); + rte_eth_dev_release_port(eth_dev); + rte_free(hw); + return NULL; + } + + hw->vtpci_ops = &virtio_user_ops; + hw->use_msix = 0; + hw->modern = 0; + hw->virtio_user_dev = dev; + data->dev_private = hw; + data->numa_node = SOCKET_ID_ANY; + data->kdrv = RTE_KDRV_NONE; + data->dev_flags = RTE_ETH_DEV_DETACHABLE; + eth_dev->pci_dev = NULL; + eth_dev->driver = NULL; + return eth_dev; +} + +/* Dev initialization routine. Invoked once for each virtio vdev at + * EAL init time, see rte_eal_dev_init(). + * Returns 0 on success. + */ +static int +virtio_user_pmd_devinit(const char *name, const char *params) +{ + struct rte_kvargs *kvlist; + struct rte_eth_dev *eth_dev; + struct virtio_hw *hw; + uint64_t queues = VIRTIO_USER_DEF_Q_NUM; + uint64_t cq = VIRTIO_USER_DEF_CQ_EN; + uint64_t queue_size = VIRTIO_USER_DEF_Q_SZ; + char *path = NULL; + char *mac_addr = NULL; + int ret = -1; + + if (!params || params[0] == '\0') { + PMD_INIT_LOG(ERR, "arg %s is mandatory for virtio-user", + VIRTIO_USER_ARG_QUEUE_SIZE); + goto end; + } + + kvlist = rte_kvargs_parse(params, valid_args); + if (!kvlist) { + PMD_INIT_LOG(ERR, "error when parsing param"); + goto end; + } + + if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_PATH) == 1) + rte_kvargs_process(kvlist, VIRTIO_USER_ARG_PATH, + &get_string_arg, &path); + else { + PMD_INIT_LOG(ERR, "arg %s is mandatory for virtio-user\n", + VIRTIO_USER_ARG_QUEUE_SIZE); + goto end; + } + + if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_MAC) == 1) + rte_kvargs_process(kvlist, VIRTIO_USER_ARG_MAC, + &get_string_arg, &mac_addr); + + if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_QUEUE_SIZE) == 1) + rte_kvargs_process(kvlist, VIRTIO_USER_ARG_QUEUE_SIZE, + &get_integer_arg, &queue_size); + + if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_QUEUES_NUM) == 1) + rte_kvargs_process(kvlist, VIRTIO_USER_ARG_QUEUES_NUM, + &get_integer_arg, &queues); + + if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_CQ_NUM) == 1) + rte_kvargs_process(kvlist, VIRTIO_USER_ARG_CQ_NUM, + &get_integer_arg, &cq); + else if (queues > 1) + cq = 1; + + if (queues > 1 && cq == 0) { + PMD_INIT_LOG(ERR, "multi-q requires ctrl-q"); + goto end; + } + + eth_dev = virtio_user_eth_dev_alloc(name); + if (!eth_dev) { + PMD_INIT_LOG(ERR, "virtio-user fails to alloc device"); + goto end; + } + + hw = eth_dev->data->dev_private; + if (virtio_user_dev_init(hw->virtio_user_dev, path, queues, cq, + queue_size, mac_addr) < 0) + goto end; + + /* previously called by rte_eal_pci_probe() for physical dev */ + if (eth_virtio_dev_init(eth_dev) < 0) { + PMD_INIT_LOG(ERR, "eth_virtio_dev_init fails"); + goto end; + } + ret = 0; + +end: + if (path) + free(path); + if (mac_addr) + free(mac_addr); + return ret; +} + +/** Called by rte_eth_dev_detach() */ +static int +virtio_user_pmd_devuninit(const char *name) +{ + struct rte_eth_dev *eth_dev; + struct virtio_hw *hw; + struct virtio_user_dev *dev; + + if (!name) + return -EINVAL; + + PMD_DRV_LOG(INFO, "Un-Initializing %s\n", name); + eth_dev = rte_eth_dev_allocated(name); + if (!eth_dev) + return -ENODEV; + + /* make sure the device is stopped, queues freed */ + rte_eth_dev_close(eth_dev->data->port_id); + + hw = eth_dev->data->dev_private; + dev = hw->virtio_user_dev; + virtio_user_dev_uninit(dev); + + rte_free(eth_dev->data->dev_private); + rte_free(eth_dev->data); + rte_eth_dev_release_port(eth_dev); + + return 0; +} + +static struct rte_driver virtio_user_driver = { + .name = "virtio-user", + .type = PMD_VDEV, + .init = virtio_user_pmd_devinit, + .uninit = virtio_user_pmd_devuninit, +}; + +PMD_REGISTER_DRIVER(virtio_user_driver); diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h index 4e9239e0..455aaafe 100644 --- a/drivers/net/virtio/virtqueue.h +++ b/drivers/net/virtio/virtqueue.h @@ -66,6 +66,14 @@ struct rte_mbuf; #define VIRTQUEUE_MAX_NAME_SZ 32 +#ifdef RTE_VIRTIO_USER +#define MBUF_DATA_DMA_ADDR(mb, offset) \ + ((uint64_t)((uintptr_t)(*(void **)((uintptr_t)mb + offset)) \ + + (mb)->data_off)) +#else /* RTE_VIRTIO_USER */ +#define MBUF_DATA_DMA_ADDR(mb, offset) rte_mbuf_data_dma_addr(mb) +#endif /* RTE_VIRTIO_USER */ + #define VTNET_SQ_RQ_QUEUE_IDX 0 #define VTNET_SQ_TQ_QUEUE_IDX 1 #define VTNET_SQ_CQ_QUEUE_IDX 2 @@ -153,23 +161,30 @@ struct virtio_pmd_ctrl { uint8_t data[VIRTIO_MAX_CTRL_DATA]; }; +struct vq_desc_extra { + void *cookie; + uint16_t ndescs; +}; + struct virtqueue { - struct virtio_hw *hw; /**< virtio_hw structure pointer. */ - const struct rte_memzone *mz; /**< mem zone to populate RX ring. */ - const struct rte_memzone *virtio_net_hdr_mz; /**< memzone to populate hdr. */ - struct rte_mempool *mpool; /**< mempool for mbuf allocation */ - uint16_t queue_id; /**< DPDK queue index. */ - uint8_t port_id; /**< Device port identifier. */ - uint16_t vq_queue_index; /**< PCI queue index */ - - void *vq_ring_virt_mem; /**< linear address of vring*/ + struct virtio_hw *hw; /**< virtio_hw structure pointer. */ + struct vring vq_ring; /**< vring keeping desc, used and avail */ + /** + * Last consumed descriptor in the used table, + * trails vq_ring.used->idx. + */ + uint16_t vq_used_cons_idx; + uint16_t vq_nentries; /**< vring desc numbers */ + uint16_t vq_free_cnt; /**< num of desc available */ + uint16_t vq_avail_idx; /**< sync until needed */ + uint16_t vq_free_thresh; /**< free threshold */ + + void *vq_ring_virt_mem; /**< linear address of vring*/ unsigned int vq_ring_size; - phys_addr_t vq_ring_mem; /**< physical address of vring */ - struct vring vq_ring; /**< vring keeping desc, used and avail */ - uint16_t vq_free_cnt; /**< num of desc available */ - uint16_t vq_nentries; /**< vring desc numbers */ - uint16_t vq_free_thresh; /**< free threshold */ + phys_addr_t vq_ring_mem; /**< physical address of vring */ + /**< use virtual address for virtio-user. */ + /** * Head of the free chain in the descriptor table. If * there are no free descriptors, this will be set to @@ -177,34 +192,12 @@ struct virtqueue { */ uint16_t vq_desc_head_idx; uint16_t vq_desc_tail_idx; - /** - * Last consumed descriptor in the used table, - * trails vq_ring.used->idx. - */ - uint16_t vq_used_cons_idx; - uint16_t vq_avail_idx; - uint64_t mbuf_initializer; /**< value to init mbufs. */ - phys_addr_t virtio_net_hdr_mem; /**< hdr for each xmit packet */ - - struct rte_mbuf **sw_ring; /**< RX software ring. */ - /* dummy mbuf, for wraparound when processing RX ring. */ - struct rte_mbuf fake_mbuf; - - /* Statistics */ - uint64_t packets; - uint64_t bytes; - uint64_t errors; - uint64_t multicast; - uint64_t broadcast; - /* Size bins in array as RFC 2819, undersized [0], 64 [1], etc */ - uint64_t size_bins[8]; - - uint16_t *notify_addr; - - struct vq_desc_extra { - void *cookie; - uint16_t ndescs; - } vq_descx[0]; + uint16_t vq_queue_index; /**< PCI queue index */ + uint16_t offset; /**< relative offset to obtain addr in mbuf */ + uint16_t *notify_addr; + int configured; + struct rte_mbuf **sw_ring; /**< RX software ring. */ + struct vq_desc_extra vq_descx[0]; }; /* If multiqueue is provided by host, then we suppport it. */ @@ -302,7 +295,8 @@ vq_update_avail_ring(struct virtqueue *vq, uint16_t desc_idx) * descriptor. */ avail_idx = (uint16_t)(vq->vq_avail_idx & (vq->vq_nentries - 1)); - vq->vq_ring.avail->ring[avail_idx] = desc_idx; + if (unlikely(vq->vq_ring.avail->ring[avail_idx] != desc_idx)) + vq->vq_ring.avail->ring[avail_idx] = desc_idx; vq->vq_avail_idx++; } |