diff options
Diffstat (limited to 'examples')
134 files changed, 5922 insertions, 1266 deletions
diff --git a/examples/Makefile b/examples/Makefile index 6298626b..28354ff8 100644 --- a/examples/Makefile +++ b/examples/Makefile @@ -32,7 +32,7 @@ ifeq ($(RTE_SDK),) $(error "Please define RTE_SDK environment variable") endif -# Default target, can be overriden by command line or environment +# Default target, can be overridden by command line or environment RTE_TARGET ?= x86_64-native-linuxapp-gcc include $(RTE_SDK)/mk/rte.vars.mk @@ -88,7 +88,7 @@ ifeq ($(CONFIG_RTE_LIBRTE_HASH),y) DIRS-$(CONFIG_RTE_LIBRTE_VHOST) += tep_termination endif DIRS-$(CONFIG_RTE_LIBRTE_TIMER) += timer -DIRS-$(CONFIG_RTE_LIBRTE_VHOST) += vhost +DIRS-$(CONFIG_RTE_LIBRTE_VHOST) += vhost vhost_scsi DIRS-$(CONFIG_RTE_LIBRTE_XEN_DOM0) += vhost_xen DIRS-y += vmdq DIRS-y += vmdq_dcb @@ -100,4 +100,6 @@ $(info vm_power_manager requires libvirt >= 0.9.3) endif endif +DIRS-y += eventdev_pipeline_sw_pmd + include $(RTE_SDK)/mk/rte.extsubdir.mk diff --git a/examples/bond/main.c b/examples/bond/main.c index 9a4ec807..2d019d43 100644 --- a/examples/bond/main.c +++ b/examples/bond/main.c @@ -53,7 +53,6 @@ #include <rte_memcpy.h> #include <rte_memzone.h> #include <rte_eal.h> -#include <rte_per_lcore.h> #include <rte_launch.h> #include <rte_atomic.h> #include <rte_cycles.h> @@ -67,10 +66,8 @@ #include <rte_debug.h> #include <rte_ether.h> #include <rte_ethdev.h> -#include <rte_log.h> #include <rte_mempool.h> #include <rte_mbuf.h> -#include <rte_memcpy.h> #include <rte_ip.h> #include <rte_tcp.h> #include <rte_arp.h> @@ -177,6 +174,8 @@ static void slave_port_init(uint8_t portid, struct rte_mempool *mbuf_pool) { int retval; + uint16_t nb_rxd = RTE_RX_DESC_DEFAULT; + uint16_t nb_txd = RTE_TX_DESC_DEFAULT; if (portid >= rte_eth_dev_count()) rte_exit(EXIT_FAILURE, "Invalid port\n"); @@ -186,8 +185,13 @@ slave_port_init(uint8_t portid, struct rte_mempool *mbuf_pool) rte_exit(EXIT_FAILURE, "port %u: configuration failed (res=%d)\n", portid, retval); + retval = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd, &nb_txd); + if (retval != 0) + rte_exit(EXIT_FAILURE, "port %u: rte_eth_dev_adjust_nb_rx_tx_desc " + "failed (res=%d)\n", portid, retval); + /* RX setup */ - retval = rte_eth_rx_queue_setup(portid, 0, RTE_RX_DESC_DEFAULT, + retval = rte_eth_rx_queue_setup(portid, 0, nb_rxd, rte_eth_dev_socket_id(portid), NULL, mbuf_pool); if (retval < 0) @@ -195,7 +199,7 @@ slave_port_init(uint8_t portid, struct rte_mempool *mbuf_pool) portid, retval); /* TX setup */ - retval = rte_eth_tx_queue_setup(portid, 0, RTE_TX_DESC_DEFAULT, + retval = rte_eth_tx_queue_setup(portid, 0, nb_txd, rte_eth_dev_socket_id(portid), NULL); if (retval < 0) @@ -221,6 +225,8 @@ bond_port_init(struct rte_mempool *mbuf_pool) { int retval; uint8_t i; + uint16_t nb_rxd = RTE_RX_DESC_DEFAULT; + uint16_t nb_txd = RTE_TX_DESC_DEFAULT; retval = rte_eth_bond_create("bond0", BONDING_MODE_ALB, 0 /*SOCKET_ID_ANY*/); @@ -235,8 +241,13 @@ bond_port_init(struct rte_mempool *mbuf_pool) rte_exit(EXIT_FAILURE, "port %u: configuration failed (res=%d)\n", BOND_PORT, retval); + retval = rte_eth_dev_adjust_nb_rx_tx_desc(BOND_PORT, &nb_rxd, &nb_txd); + if (retval != 0) + rte_exit(EXIT_FAILURE, "port %u: rte_eth_dev_adjust_nb_rx_tx_desc " + "failed (res=%d)\n", BOND_PORT, retval); + /* RX setup */ - retval = rte_eth_rx_queue_setup(BOND_PORT, 0, RTE_RX_DESC_DEFAULT, + retval = rte_eth_rx_queue_setup(BOND_PORT, 0, nb_rxd, rte_eth_dev_socket_id(BOND_PORT), NULL, mbuf_pool); if (retval < 0) @@ -244,7 +255,7 @@ bond_port_init(struct rte_mempool *mbuf_pool) BOND_PORT, retval); /* TX setup */ - retval = rte_eth_tx_queue_setup(BOND_PORT, 0, RTE_TX_DESC_DEFAULT, + retval = rte_eth_tx_queue_setup(BOND_PORT, 0, nb_txd, rte_eth_dev_socket_id(BOND_PORT), NULL); if (retval < 0) @@ -550,7 +561,7 @@ static void cmd_help_parsed(__attribute__((unused)) void *parsed_result, { cmdline_printf(cl, "ALB - link bonding mode 6 example\n" - "send IP - sends one ARPrequest thru bonding for IP.\n" + "send IP - sends one ARPrequest through bonding for IP.\n" "start - starts listening ARPs.\n" "stop - stops lcore_main.\n" "show - shows some bond info: ex. active slaves etc.\n" diff --git a/examples/cmdline/Makefile b/examples/cmdline/Makefile index 9ebe4355..5155a6c8 100644 --- a/examples/cmdline/Makefile +++ b/examples/cmdline/Makefile @@ -33,7 +33,7 @@ ifeq ($(RTE_SDK),) $(error "Please define RTE_SDK environment variable") endif -# Default target, can be overriden by command line or environment +# Default target, can be overridden by command line or environment RTE_TARGET ?= x86_64-native-linuxapp-gcc include $(RTE_SDK)/mk/rte.vars.mk diff --git a/examples/distributor/Makefile b/examples/distributor/Makefile index 6a5badaa..404993eb 100644 --- a/examples/distributor/Makefile +++ b/examples/distributor/Makefile @@ -33,7 +33,7 @@ ifeq ($(RTE_SDK),) $(error "Please define RTE_SDK environment variable") endif -# Default target, can be overriden by command line or environment +# Default target, can be overridden by command line or environment RTE_TARGET ?= x86_64-native-linuxapp-gcc include $(RTE_SDK)/mk/rte.vars.mk diff --git a/examples/distributor/main.c b/examples/distributor/main.c index 8071f919..87603d03 100644 --- a/examples/distributor/main.c +++ b/examples/distributor/main.c @@ -43,6 +43,7 @@ #include <rte_debug.h> #include <rte_prefetch.h> #include <rte_distributor.h> +#include <rte_pause.h> #define RX_RING_SIZE 512 #define TX_RING_SIZE 512 @@ -137,6 +138,8 @@ port_init(uint8_t port, struct rte_mempool *mbuf_pool) const uint16_t rxRings = 1, txRings = rte_lcore_count() - 1; int retval; uint16_t q; + uint16_t nb_rxd = RX_RING_SIZE; + uint16_t nb_txd = TX_RING_SIZE; if (port >= rte_eth_dev_count()) return -1; @@ -145,8 +148,12 @@ port_init(uint8_t port, struct rte_mempool *mbuf_pool) if (retval != 0) return retval; + retval = rte_eth_dev_adjust_nb_rx_tx_desc(port, &nb_rxd, &nb_txd); + if (retval != 0) + return retval; + for (q = 0; q < rxRings; q++) { - retval = rte_eth_rx_queue_setup(port, q, RX_RING_SIZE, + retval = rte_eth_rx_queue_setup(port, q, nb_rxd, rte_eth_dev_socket_id(port), NULL, mbuf_pool); if (retval < 0) @@ -154,7 +161,7 @@ port_init(uint8_t port, struct rte_mempool *mbuf_pool) } for (q = 0; q < txRings; q++) { - retval = rte_eth_tx_queue_setup(port, q, TX_RING_SIZE, + retval = rte_eth_tx_queue_setup(port, q, nb_txd, rte_eth_dev_socket_id(port), NULL); if (retval < 0) diff --git a/examples/ethtool/ethtool-app/main.c b/examples/ethtool/ethtool-app/main.c index 6d50d463..bbab2f6e 100644 --- a/examples/ethtool/ethtool-app/main.c +++ b/examples/ethtool/ethtool-app/main.c @@ -122,6 +122,8 @@ static void setup_ports(struct app_config *app_cfg, int cnt_ports) struct rte_eth_conf cfg_port; struct rte_eth_dev_info dev_info; char str_name[16]; + uint16_t nb_rxd = PORT_RX_QUEUE_SIZE; + uint16_t nb_txd = PORT_TX_QUEUE_SIZE; memset(&cfg_port, 0, sizeof(cfg_port)); cfg_port.txmode.mq_mode = ETH_MQ_TX_NONE; @@ -154,15 +156,19 @@ static void setup_ports(struct app_config *app_cfg, int cnt_ports) if (rte_eth_dev_configure(idx_port, 1, 1, &cfg_port) < 0) rte_exit(EXIT_FAILURE, "rte_eth_dev_configure failed"); + if (rte_eth_dev_adjust_nb_rx_tx_desc(idx_port, &nb_rxd, + &nb_txd) < 0) + rte_exit(EXIT_FAILURE, + "rte_eth_dev_adjust_nb_rx_tx_desc failed"); if (rte_eth_rx_queue_setup( - idx_port, 0, PORT_RX_QUEUE_SIZE, + idx_port, 0, nb_rxd, rte_eth_dev_socket_id(idx_port), NULL, ptr_port->pkt_pool) < 0) rte_exit(EXIT_FAILURE, "rte_eth_rx_queue_setup failed" ); if (rte_eth_tx_queue_setup( - idx_port, 0, PORT_TX_QUEUE_SIZE, + idx_port, 0, nb_txd, rte_eth_dev_socket_id(idx_port), NULL) < 0) rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup failed" @@ -264,7 +270,7 @@ int main(int argc, char **argv) uint32_t id_core; uint32_t cnt_ports; - /* Init runtime enviornment */ + /* Init runtime environment */ cnt_args_parsed = rte_eal_init(argc, argv); if (cnt_args_parsed < 0) rte_exit(EXIT_FAILURE, "rte_eal_init(): Failed"); diff --git a/examples/ethtool/lib/rte_ethtool.c b/examples/ethtool/lib/rte_ethtool.c index 7e465206..252382cb 100644 --- a/examples/ethtool/lib/rte_ethtool.c +++ b/examples/ethtool/lib/rte_ethtool.c @@ -36,6 +36,7 @@ #include <rte_version.h> #include <rte_ethdev.h> #include <rte_ether.h> +#include <rte_pci.h> #ifdef RTE_LIBRTE_IXGBE_PMD #include <rte_pmd_ixgbe.h> #endif @@ -64,7 +65,7 @@ rte_ethtool_get_drvinfo(uint8_t port_id, struct ethtool_drvinfo *drvinfo) printf("firmware version get error: (%s)\n", strerror(-ret)); else if (ret > 0) printf("Insufficient fw version buffer size, " - "the minimun size should be %d\n", ret); + "the minimum size should be %d\n", ret); memset(&dev_info, 0, sizeof(dev_info)); rte_eth_dev_info_get(port_id, &dev_info); @@ -73,6 +74,7 @@ rte_ethtool_get_drvinfo(uint8_t port_id, struct ethtool_drvinfo *drvinfo) dev_info.driver_name); snprintf(drvinfo->version, sizeof(drvinfo->version), "%s", rte_version()); + /* TODO: replace bus_info by rte_devargs.name */ if (dev_info.pci_dev) snprintf(drvinfo->bus_info, sizeof(drvinfo->bus_info), "%04x:%02x:%02x.%x", diff --git a/examples/ethtool/lib/rte_ethtool.h b/examples/ethtool/lib/rte_ethtool.h index 2e79d453..18f44404 100644 --- a/examples/ethtool/lib/rte_ethtool.h +++ b/examples/ethtool/lib/rte_ethtool.h @@ -365,7 +365,7 @@ int rte_ethtool_net_vlan_rx_kill_vid(uint8_t port_id, uint16_t vid); int rte_ethtool_net_set_rx_mode(uint8_t port_id); /** - * Getting ring paramaters for Ethernet device. + * Getting ring parameters for Ethernet device. * * @param port_id * The port identifier of the Ethernet device. @@ -384,7 +384,7 @@ int rte_ethtool_get_ringparam(uint8_t port_id, struct ethtool_ringparam *ring_param); /** - * Setting ring paramaters for Ethernet device. + * Setting ring parameters for Ethernet device. * * @param port_id * The port identifier of the Ethernet device. diff --git a/examples/eventdev_pipeline_sw_pmd/Makefile b/examples/eventdev_pipeline_sw_pmd/Makefile new file mode 100644 index 00000000..de4e22c8 --- /dev/null +++ b/examples/eventdev_pipeline_sw_pmd/Makefile @@ -0,0 +1,49 @@ +# BSD LICENSE +# +# Copyright(c) 2016-2017 Intel Corporation. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overridden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +# binary name +APP = eventdev_pipeline_sw_pmd + +# all source are stored in SRCS-y +SRCS-y := main.c + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) + +include $(RTE_SDK)/mk/rte.extapp.mk diff --git a/examples/eventdev_pipeline_sw_pmd/main.c b/examples/eventdev_pipeline_sw_pmd/main.c new file mode 100644 index 00000000..dd75cb7a --- /dev/null +++ b/examples/eventdev_pipeline_sw_pmd/main.c @@ -0,0 +1,1017 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016-2017 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <getopt.h> +#include <stdint.h> +#include <stdio.h> +#include <signal.h> +#include <sched.h> +#include <stdbool.h> + +#include <rte_eal.h> +#include <rte_mempool.h> +#include <rte_mbuf.h> +#include <rte_launch.h> +#include <rte_malloc.h> +#include <rte_random.h> +#include <rte_cycles.h> +#include <rte_ethdev.h> +#include <rte_eventdev.h> + +#define MAX_NUM_STAGES 8 +#define BATCH_SIZE 16 +#define MAX_NUM_CORE 64 + +struct prod_data { + uint8_t dev_id; + uint8_t port_id; + int32_t qid; + unsigned int num_nic_ports; +} __rte_cache_aligned; + +struct cons_data { + uint8_t dev_id; + uint8_t port_id; +} __rte_cache_aligned; + +static struct prod_data prod_data; +static struct cons_data cons_data; + +struct worker_data { + uint8_t dev_id; + uint8_t port_id; +} __rte_cache_aligned; + +struct fastpath_data { + volatile int done; + uint32_t rx_lock; + uint32_t tx_lock; + uint32_t sched_lock; + bool rx_single; + bool tx_single; + bool sched_single; + unsigned int rx_core[MAX_NUM_CORE]; + unsigned int tx_core[MAX_NUM_CORE]; + unsigned int sched_core[MAX_NUM_CORE]; + unsigned int worker_core[MAX_NUM_CORE]; + struct rte_eth_dev_tx_buffer *tx_buf[RTE_MAX_ETHPORTS]; +}; + +static struct fastpath_data *fdata; + +struct config_data { + unsigned int active_cores; + unsigned int num_workers; + int64_t num_packets; + unsigned int num_fids; + int queue_type; + int worker_cycles; + int enable_queue_priorities; + int quiet; + int dump_dev; + int dump_dev_signal; + unsigned int num_stages; + unsigned int worker_cq_depth; + int16_t next_qid[MAX_NUM_STAGES+2]; + int16_t qid[MAX_NUM_STAGES]; +}; + +static struct config_data cdata = { + .num_packets = (1L << 25), /* do ~32M packets */ + .num_fids = 512, + .queue_type = RTE_EVENT_QUEUE_CFG_ATOMIC_ONLY, + .next_qid = {-1}, + .qid = {-1}, + .num_stages = 1, + .worker_cq_depth = 16 +}; + +static bool +core_in_use(unsigned int lcore_id) { + return (fdata->rx_core[lcore_id] || fdata->sched_core[lcore_id] || + fdata->tx_core[lcore_id] || fdata->worker_core[lcore_id]); +} + +static void +eth_tx_buffer_retry(struct rte_mbuf **pkts, uint16_t unsent, + void *userdata) +{ + int port_id = (uintptr_t) userdata; + unsigned int _sent = 0; + + do { + /* Note: hard-coded TX queue */ + _sent += rte_eth_tx_burst(port_id, 0, &pkts[_sent], + unsent - _sent); + } while (_sent != unsent); +} + +static int +consumer(void) +{ + const uint64_t freq_khz = rte_get_timer_hz() / 1000; + struct rte_event packets[BATCH_SIZE]; + + static uint64_t received; + static uint64_t last_pkts; + static uint64_t last_time; + static uint64_t start_time; + unsigned int i, j; + uint8_t dev_id = cons_data.dev_id; + uint8_t port_id = cons_data.port_id; + + uint16_t n = rte_event_dequeue_burst(dev_id, port_id, + packets, RTE_DIM(packets), 0); + + if (n == 0) { + for (j = 0; j < rte_eth_dev_count(); j++) + rte_eth_tx_buffer_flush(j, 0, fdata->tx_buf[j]); + return 0; + } + if (start_time == 0) + last_time = start_time = rte_get_timer_cycles(); + + received += n; + for (i = 0; i < n; i++) { + uint8_t outport = packets[i].mbuf->port; + rte_eth_tx_buffer(outport, 0, fdata->tx_buf[outport], + packets[i].mbuf); + } + + /* Print out mpps every 1<22 packets */ + if (!cdata.quiet && received >= last_pkts + (1<<22)) { + const uint64_t now = rte_get_timer_cycles(); + const uint64_t total_ms = (now - start_time) / freq_khz; + const uint64_t delta_ms = (now - last_time) / freq_khz; + uint64_t delta_pkts = received - last_pkts; + + printf("# consumer RX=%"PRIu64", time %"PRIu64 "ms, " + "avg %.3f mpps [current %.3f mpps]\n", + received, + total_ms, + received / (total_ms * 1000.0), + delta_pkts / (delta_ms * 1000.0)); + last_pkts = received; + last_time = now; + } + + cdata.num_packets -= n; + if (cdata.num_packets <= 0) + fdata->done = 1; + + return 0; +} + +static int +producer(void) +{ + static uint8_t eth_port; + struct rte_mbuf *mbufs[BATCH_SIZE+2]; + struct rte_event ev[BATCH_SIZE+2]; + uint32_t i, num_ports = prod_data.num_nic_ports; + int32_t qid = prod_data.qid; + uint8_t dev_id = prod_data.dev_id; + uint8_t port_id = prod_data.port_id; + uint32_t prio_idx = 0; + + const uint16_t nb_rx = rte_eth_rx_burst(eth_port, 0, mbufs, BATCH_SIZE); + if (++eth_port == num_ports) + eth_port = 0; + if (nb_rx == 0) { + rte_pause(); + return 0; + } + + for (i = 0; i < nb_rx; i++) { + ev[i].flow_id = mbufs[i]->hash.rss; + ev[i].op = RTE_EVENT_OP_NEW; + ev[i].sched_type = cdata.queue_type; + ev[i].queue_id = qid; + ev[i].event_type = RTE_EVENT_TYPE_ETHDEV; + ev[i].sub_event_type = 0; + ev[i].priority = RTE_EVENT_DEV_PRIORITY_NORMAL; + ev[i].mbuf = mbufs[i]; + RTE_SET_USED(prio_idx); + } + + const int nb_tx = rte_event_enqueue_burst(dev_id, port_id, ev, nb_rx); + if (nb_tx != nb_rx) { + for (i = nb_tx; i < nb_rx; i++) + rte_pktmbuf_free(mbufs[i]); + } + + return 0; +} + +static inline void +schedule_devices(uint8_t dev_id, unsigned int lcore_id) +{ + if (fdata->rx_core[lcore_id] && (fdata->rx_single || + rte_atomic32_cmpset(&(fdata->rx_lock), 0, 1))) { + producer(); + rte_atomic32_clear((rte_atomic32_t *)&(fdata->rx_lock)); + } + + if (fdata->sched_core[lcore_id] && (fdata->sched_single || + rte_atomic32_cmpset(&(fdata->sched_lock), 0, 1))) { + rte_event_schedule(dev_id); + if (cdata.dump_dev_signal) { + rte_event_dev_dump(0, stdout); + cdata.dump_dev_signal = 0; + } + rte_atomic32_clear((rte_atomic32_t *)&(fdata->sched_lock)); + } + + if (fdata->tx_core[lcore_id] && (fdata->tx_single || + rte_atomic32_cmpset(&(fdata->tx_lock), 0, 1))) { + consumer(); + rte_atomic32_clear((rte_atomic32_t *)&(fdata->tx_lock)); + } +} + +static inline void +work(struct rte_mbuf *m) +{ + struct ether_hdr *eth; + struct ether_addr addr; + + /* change mac addresses on packet (to use mbuf data) */ + /* + * FIXME Swap mac address properly and also handle the + * case for both odd and even number of stages that the + * addresses end up the same at the end of the pipeline + */ + eth = rte_pktmbuf_mtod(m, struct ether_hdr *); + ether_addr_copy(ð->d_addr, &addr); + ether_addr_copy(&addr, ð->d_addr); + + /* do a number of cycles of work per packet */ + volatile uint64_t start_tsc = rte_rdtsc(); + while (rte_rdtsc() < start_tsc + cdata.worker_cycles) + rte_pause(); +} + +static int +worker(void *arg) +{ + struct rte_event events[BATCH_SIZE]; + + struct worker_data *data = (struct worker_data *)arg; + uint8_t dev_id = data->dev_id; + uint8_t port_id = data->port_id; + size_t sent = 0, received = 0; + unsigned int lcore_id = rte_lcore_id(); + + while (!fdata->done) { + uint16_t i; + + schedule_devices(dev_id, lcore_id); + + if (!fdata->worker_core[lcore_id]) { + rte_pause(); + continue; + } + + const uint16_t nb_rx = rte_event_dequeue_burst(dev_id, port_id, + events, RTE_DIM(events), 0); + + if (nb_rx == 0) { + rte_pause(); + continue; + } + received += nb_rx; + + for (i = 0; i < nb_rx; i++) { + + /* The first worker stage does classification */ + if (events[i].queue_id == cdata.qid[0]) + events[i].flow_id = events[i].mbuf->hash.rss + % cdata.num_fids; + + events[i].queue_id = cdata.next_qid[events[i].queue_id]; + events[i].op = RTE_EVENT_OP_FORWARD; + events[i].sched_type = cdata.queue_type; + + work(events[i].mbuf); + } + uint16_t nb_tx = rte_event_enqueue_burst(dev_id, port_id, + events, nb_rx); + while (nb_tx < nb_rx && !fdata->done) + nb_tx += rte_event_enqueue_burst(dev_id, port_id, + events + nb_tx, + nb_rx - nb_tx); + sent += nb_tx; + } + + if (!cdata.quiet) + printf(" worker %u thread done. RX=%zu TX=%zu\n", + rte_lcore_id(), received, sent); + + return 0; +} + +/* + * Parse the coremask given as argument (hexadecimal string) and fill + * the global configuration (core role and core count) with the parsed + * value. + */ +static int xdigit2val(unsigned char c) +{ + int val; + + if (isdigit(c)) + val = c - '0'; + else if (isupper(c)) + val = c - 'A' + 10; + else + val = c - 'a' + 10; + return val; +} + +static uint64_t +parse_coremask(const char *coremask) +{ + int i, j, idx = 0; + unsigned int count = 0; + char c; + int val; + uint64_t mask = 0; + const int32_t BITS_HEX = 4; + + if (coremask == NULL) + return -1; + /* Remove all blank characters ahead and after . + * Remove 0x/0X if exists. + */ + while (isblank(*coremask)) + coremask++; + if (coremask[0] == '0' && ((coremask[1] == 'x') + || (coremask[1] == 'X'))) + coremask += 2; + i = strlen(coremask); + while ((i > 0) && isblank(coremask[i - 1])) + i--; + if (i == 0) + return -1; + + for (i = i - 1; i >= 0 && idx < MAX_NUM_CORE; i--) { + c = coremask[i]; + if (isxdigit(c) == 0) { + /* invalid characters */ + return -1; + } + val = xdigit2val(c); + for (j = 0; j < BITS_HEX && idx < MAX_NUM_CORE; j++, idx++) { + if ((1 << j) & val) { + mask |= (1UL << idx); + count++; + } + } + } + for (; i >= 0; i--) + if (coremask[i] != '0') + return -1; + if (count == 0) + return -1; + return mask; +} + +static struct option long_options[] = { + {"workers", required_argument, 0, 'w'}, + {"packets", required_argument, 0, 'n'}, + {"atomic-flows", required_argument, 0, 'f'}, + {"num_stages", required_argument, 0, 's'}, + {"rx-mask", required_argument, 0, 'r'}, + {"tx-mask", required_argument, 0, 't'}, + {"sched-mask", required_argument, 0, 'e'}, + {"cq-depth", required_argument, 0, 'c'}, + {"work-cycles", required_argument, 0, 'W'}, + {"queue-priority", no_argument, 0, 'P'}, + {"parallel", no_argument, 0, 'p'}, + {"ordered", no_argument, 0, 'o'}, + {"quiet", no_argument, 0, 'q'}, + {"dump", no_argument, 0, 'D'}, + {0, 0, 0, 0} +}; + +static void +usage(void) +{ + const char *usage_str = + " Usage: eventdev_demo [options]\n" + " Options:\n" + " -n, --packets=N Send N packets (default ~32M), 0 implies no limit\n" + " -f, --atomic-flows=N Use N random flows from 1 to N (default 16)\n" + " -s, --num_stages=N Use N atomic stages (default 1)\n" + " -r, --rx-mask=core mask Run NIC rx on CPUs in core mask\n" + " -w, --worker-mask=core mask Run worker on CPUs in core mask\n" + " -t, --tx-mask=core mask Run NIC tx on CPUs in core mask\n" + " -e --sched-mask=core mask Run scheduler on CPUs in core mask\n" + " -c --cq-depth=N Worker CQ depth (default 16)\n" + " -W --work-cycles=N Worker cycles (default 0)\n" + " -P --queue-priority Enable scheduler queue prioritization\n" + " -o, --ordered Use ordered scheduling\n" + " -p, --parallel Use parallel scheduling\n" + " -q, --quiet Minimize printed output\n" + " -D, --dump Print detailed statistics before exit" + "\n"; + fprintf(stderr, "%s", usage_str); + exit(1); +} + +static void +parse_app_args(int argc, char **argv) +{ + /* Parse cli options*/ + int option_index; + int c; + opterr = 0; + uint64_t rx_lcore_mask = 0; + uint64_t tx_lcore_mask = 0; + uint64_t sched_lcore_mask = 0; + uint64_t worker_lcore_mask = 0; + int i; + + for (;;) { + c = getopt_long(argc, argv, "r:t:e:c:w:n:f:s:poPqDW:", + long_options, &option_index); + if (c == -1) + break; + + int popcnt = 0; + switch (c) { + case 'n': + cdata.num_packets = (int64_t)atol(optarg); + if (cdata.num_packets == 0) + cdata.num_packets = INT64_MAX; + break; + case 'f': + cdata.num_fids = (unsigned int)atoi(optarg); + break; + case 's': + cdata.num_stages = (unsigned int)atoi(optarg); + break; + case 'c': + cdata.worker_cq_depth = (unsigned int)atoi(optarg); + break; + case 'W': + cdata.worker_cycles = (unsigned int)atoi(optarg); + break; + case 'P': + cdata.enable_queue_priorities = 1; + break; + case 'o': + cdata.queue_type = RTE_EVENT_QUEUE_CFG_ORDERED_ONLY; + break; + case 'p': + cdata.queue_type = RTE_EVENT_QUEUE_CFG_PARALLEL_ONLY; + break; + case 'q': + cdata.quiet = 1; + break; + case 'D': + cdata.dump_dev = 1; + break; + case 'w': + worker_lcore_mask = parse_coremask(optarg); + break; + case 'r': + rx_lcore_mask = parse_coremask(optarg); + popcnt = __builtin_popcountll(rx_lcore_mask); + fdata->rx_single = (popcnt == 1); + break; + case 't': + tx_lcore_mask = parse_coremask(optarg); + popcnt = __builtin_popcountll(tx_lcore_mask); + fdata->tx_single = (popcnt == 1); + break; + case 'e': + sched_lcore_mask = parse_coremask(optarg); + popcnt = __builtin_popcountll(sched_lcore_mask); + fdata->sched_single = (popcnt == 1); + break; + default: + usage(); + } + } + + if (worker_lcore_mask == 0 || rx_lcore_mask == 0 || + sched_lcore_mask == 0 || tx_lcore_mask == 0) { + printf("Core part of pipeline was not assigned any cores. " + "This will stall the pipeline, please check core masks " + "(use -h for details on setting core masks):\n" + "\trx: %"PRIu64"\n\ttx: %"PRIu64"\n\tsched: %"PRIu64 + "\n\tworkers: %"PRIu64"\n", + rx_lcore_mask, tx_lcore_mask, sched_lcore_mask, + worker_lcore_mask); + rte_exit(-1, "Fix core masks\n"); + } + if (cdata.num_stages == 0 || cdata.num_stages > MAX_NUM_STAGES) + usage(); + + for (i = 0; i < MAX_NUM_CORE; i++) { + fdata->rx_core[i] = !!(rx_lcore_mask & (1UL << i)); + fdata->tx_core[i] = !!(tx_lcore_mask & (1UL << i)); + fdata->sched_core[i] = !!(sched_lcore_mask & (1UL << i)); + fdata->worker_core[i] = !!(worker_lcore_mask & (1UL << i)); + + if (fdata->worker_core[i]) + cdata.num_workers++; + if (core_in_use(i)) + cdata.active_cores++; + } +} + +/* + * Initializes a given port using global settings and with the RX buffers + * coming from the mbuf_pool passed as a parameter. + */ +static inline int +port_init(uint8_t port, struct rte_mempool *mbuf_pool) +{ + static const struct rte_eth_conf port_conf_default = { + .rxmode = { + .mq_mode = ETH_MQ_RX_RSS, + .max_rx_pkt_len = ETHER_MAX_LEN + }, + .rx_adv_conf = { + .rss_conf = { + .rss_hf = ETH_RSS_IP | + ETH_RSS_TCP | + ETH_RSS_UDP, + } + } + }; + const uint16_t rx_rings = 1, tx_rings = 1; + const uint16_t rx_ring_size = 512, tx_ring_size = 512; + struct rte_eth_conf port_conf = port_conf_default; + int retval; + uint16_t q; + + if (port >= rte_eth_dev_count()) + return -1; + + /* Configure the Ethernet device. */ + retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf); + if (retval != 0) + return retval; + + /* Allocate and set up 1 RX queue per Ethernet port. */ + for (q = 0; q < rx_rings; q++) { + retval = rte_eth_rx_queue_setup(port, q, rx_ring_size, + rte_eth_dev_socket_id(port), NULL, mbuf_pool); + if (retval < 0) + return retval; + } + + /* Allocate and set up 1 TX queue per Ethernet port. */ + for (q = 0; q < tx_rings; q++) { + retval = rte_eth_tx_queue_setup(port, q, tx_ring_size, + rte_eth_dev_socket_id(port), NULL); + if (retval < 0) + return retval; + } + + /* Start the Ethernet port. */ + retval = rte_eth_dev_start(port); + if (retval < 0) + return retval; + + /* Display the port MAC address. */ + struct ether_addr addr; + rte_eth_macaddr_get(port, &addr); + printf("Port %u MAC: %02" PRIx8 " %02" PRIx8 " %02" PRIx8 + " %02" PRIx8 " %02" PRIx8 " %02" PRIx8 "\n", + (unsigned int)port, + addr.addr_bytes[0], addr.addr_bytes[1], + addr.addr_bytes[2], addr.addr_bytes[3], + addr.addr_bytes[4], addr.addr_bytes[5]); + + /* Enable RX in promiscuous mode for the Ethernet device. */ + rte_eth_promiscuous_enable(port); + + return 0; +} + +static int +init_ports(unsigned int num_ports) +{ + uint8_t portid; + unsigned int i; + + struct rte_mempool *mp = rte_pktmbuf_pool_create("packet_pool", + /* mbufs */ 16384 * num_ports, + /* cache_size */ 512, + /* priv_size*/ 0, + /* data_room_size */ RTE_MBUF_DEFAULT_BUF_SIZE, + rte_socket_id()); + + for (portid = 0; portid < num_ports; portid++) + if (port_init(portid, mp) != 0) + rte_exit(EXIT_FAILURE, "Cannot init port %"PRIu8 "\n", + portid); + + for (i = 0; i < num_ports; i++) { + void *userdata = (void *)(uintptr_t) i; + fdata->tx_buf[i] = + rte_malloc(NULL, RTE_ETH_TX_BUFFER_SIZE(32), 0); + if (fdata->tx_buf[i] == NULL) + rte_panic("Out of memory\n"); + rte_eth_tx_buffer_init(fdata->tx_buf[i], 32); + rte_eth_tx_buffer_set_err_callback(fdata->tx_buf[i], + eth_tx_buffer_retry, + userdata); + } + + return 0; +} + +struct port_link { + uint8_t queue_id; + uint8_t priority; +}; + +static int +setup_eventdev(struct prod_data *prod_data, + struct cons_data *cons_data, + struct worker_data *worker_data) +{ + const uint8_t dev_id = 0; + /* +1 stages is for a SINGLE_LINK TX stage */ + const uint8_t nb_queues = cdata.num_stages + 1; + /* + 2 is one port for producer and one for consumer */ + const uint8_t nb_ports = cdata.num_workers + 2; + struct rte_event_dev_config config = { + .nb_event_queues = nb_queues, + .nb_event_ports = nb_ports, + .nb_events_limit = 4096, + .nb_event_queue_flows = 1024, + .nb_event_port_dequeue_depth = 128, + .nb_event_port_enqueue_depth = 128, + }; + struct rte_event_port_conf wkr_p_conf = { + .dequeue_depth = cdata.worker_cq_depth, + .enqueue_depth = 64, + .new_event_threshold = 4096, + }; + struct rte_event_queue_conf wkr_q_conf = { + .event_queue_cfg = cdata.queue_type, + .priority = RTE_EVENT_DEV_PRIORITY_NORMAL, + .nb_atomic_flows = 1024, + .nb_atomic_order_sequences = 1024, + }; + struct rte_event_port_conf tx_p_conf = { + .dequeue_depth = 128, + .enqueue_depth = 128, + .new_event_threshold = 4096, + }; + const struct rte_event_queue_conf tx_q_conf = { + .priority = RTE_EVENT_DEV_PRIORITY_HIGHEST, + .event_queue_cfg = + RTE_EVENT_QUEUE_CFG_ATOMIC_ONLY | + RTE_EVENT_QUEUE_CFG_SINGLE_LINK, + .nb_atomic_flows = 1024, + .nb_atomic_order_sequences = 1024, + }; + + struct port_link worker_queues[MAX_NUM_STAGES]; + struct port_link tx_queue; + unsigned int i; + + int ret, ndev = rte_event_dev_count(); + if (ndev < 1) { + printf("%d: No Eventdev Devices Found\n", __LINE__); + return -1; + } + + struct rte_event_dev_info dev_info; + ret = rte_event_dev_info_get(dev_id, &dev_info); + printf("\tEventdev %d: %s\n", dev_id, dev_info.driver_name); + + if (dev_info.max_event_port_dequeue_depth < + config.nb_event_port_dequeue_depth) + config.nb_event_port_dequeue_depth = + dev_info.max_event_port_dequeue_depth; + if (dev_info.max_event_port_enqueue_depth < + config.nb_event_port_enqueue_depth) + config.nb_event_port_enqueue_depth = + dev_info.max_event_port_enqueue_depth; + + ret = rte_event_dev_configure(dev_id, &config); + if (ret < 0) { + printf("%d: Error configuring device\n", __LINE__); + return -1; + } + + /* Q creation - one load balanced per pipeline stage*/ + printf(" Stages:\n"); + for (i = 0; i < cdata.num_stages; i++) { + if (rte_event_queue_setup(dev_id, i, &wkr_q_conf) < 0) { + printf("%d: error creating qid %d\n", __LINE__, i); + return -1; + } + cdata.qid[i] = i; + cdata.next_qid[i] = i+1; + worker_queues[i].queue_id = i; + if (cdata.enable_queue_priorities) { + /* calculate priority stepping for each stage, leaving + * headroom of 1 for the SINGLE_LINK TX below + */ + const uint32_t prio_delta = + (RTE_EVENT_DEV_PRIORITY_LOWEST-1) / nb_queues; + + /* higher priority for queues closer to tx */ + wkr_q_conf.priority = + RTE_EVENT_DEV_PRIORITY_LOWEST - prio_delta * i; + } + + const char *type_str = "Atomic"; + switch (wkr_q_conf.event_queue_cfg) { + case RTE_EVENT_QUEUE_CFG_ORDERED_ONLY: + type_str = "Ordered"; + break; + case RTE_EVENT_QUEUE_CFG_PARALLEL_ONLY: + type_str = "Parallel"; + break; + } + printf("\tStage %d, Type %s\tPriority = %d\n", i, type_str, + wkr_q_conf.priority); + } + printf("\n"); + + /* final queue for sending to TX core */ + if (rte_event_queue_setup(dev_id, i, &tx_q_conf) < 0) { + printf("%d: error creating qid %d\n", __LINE__, i); + return -1; + } + tx_queue.queue_id = i; + tx_queue.priority = RTE_EVENT_DEV_PRIORITY_HIGHEST; + + if (wkr_p_conf.dequeue_depth > config.nb_event_port_dequeue_depth) + wkr_p_conf.dequeue_depth = config.nb_event_port_dequeue_depth; + if (wkr_p_conf.enqueue_depth > config.nb_event_port_enqueue_depth) + wkr_p_conf.enqueue_depth = config.nb_event_port_enqueue_depth; + + /* set up one port per worker, linking to all stage queues */ + for (i = 0; i < cdata.num_workers; i++) { + struct worker_data *w = &worker_data[i]; + w->dev_id = dev_id; + if (rte_event_port_setup(dev_id, i, &wkr_p_conf) < 0) { + printf("Error setting up port %d\n", i); + return -1; + } + + uint32_t s; + for (s = 0; s < cdata.num_stages; s++) { + if (rte_event_port_link(dev_id, i, + &worker_queues[s].queue_id, + &worker_queues[s].priority, + 1) != 1) { + printf("%d: error creating link for port %d\n", + __LINE__, i); + return -1; + } + } + w->port_id = i; + } + + if (tx_p_conf.dequeue_depth > config.nb_event_port_dequeue_depth) + tx_p_conf.dequeue_depth = config.nb_event_port_dequeue_depth; + if (tx_p_conf.enqueue_depth > config.nb_event_port_enqueue_depth) + tx_p_conf.enqueue_depth = config.nb_event_port_enqueue_depth; + + /* port for consumer, linked to TX queue */ + if (rte_event_port_setup(dev_id, i, &tx_p_conf) < 0) { + printf("Error setting up port %d\n", i); + return -1; + } + if (rte_event_port_link(dev_id, i, &tx_queue.queue_id, + &tx_queue.priority, 1) != 1) { + printf("%d: error creating link for port %d\n", + __LINE__, i); + return -1; + } + /* port for producer, no links */ + struct rte_event_port_conf rx_p_conf = { + .dequeue_depth = 8, + .enqueue_depth = 8, + .new_event_threshold = 1200, + }; + + if (rx_p_conf.dequeue_depth > config.nb_event_port_dequeue_depth) + rx_p_conf.dequeue_depth = config.nb_event_port_dequeue_depth; + if (rx_p_conf.enqueue_depth > config.nb_event_port_enqueue_depth) + rx_p_conf.enqueue_depth = config.nb_event_port_enqueue_depth; + + if (rte_event_port_setup(dev_id, i + 1, &rx_p_conf) < 0) { + printf("Error setting up port %d\n", i); + return -1; + } + + *prod_data = (struct prod_data){.dev_id = dev_id, + .port_id = i + 1, + .qid = cdata.qid[0] }; + *cons_data = (struct cons_data){.dev_id = dev_id, + .port_id = i }; + + if (rte_event_dev_start(dev_id) < 0) { + printf("Error starting eventdev\n"); + return -1; + } + + return dev_id; +} + +static void +signal_handler(int signum) +{ + if (fdata->done) + rte_exit(1, "Exiting on signal %d\n", signum); + if (signum == SIGINT || signum == SIGTERM) { + printf("\n\nSignal %d received, preparing to exit...\n", + signum); + fdata->done = 1; + } + if (signum == SIGTSTP) + rte_event_dev_dump(0, stdout); +} + +static inline uint64_t +port_stat(int dev_id, int32_t p) +{ + char statname[64]; + snprintf(statname, sizeof(statname), "port_%u_rx", p); + return rte_event_dev_xstats_by_name_get(dev_id, statname, NULL); +} + +int +main(int argc, char **argv) +{ + struct worker_data *worker_data; + unsigned int num_ports; + int lcore_id; + int err; + + signal(SIGINT, signal_handler); + signal(SIGTERM, signal_handler); + signal(SIGTSTP, signal_handler); + + err = rte_eal_init(argc, argv); + if (err < 0) + rte_panic("Invalid EAL arguments\n"); + + argc -= err; + argv += err; + + fdata = rte_malloc(NULL, sizeof(struct fastpath_data), 0); + if (fdata == NULL) + rte_panic("Out of memory\n"); + + /* Parse cli options*/ + parse_app_args(argc, argv); + + num_ports = rte_eth_dev_count(); + if (num_ports == 0) + rte_panic("No ethernet ports found\n"); + + const unsigned int cores_needed = cdata.active_cores; + + if (!cdata.quiet) { + printf(" Config:\n"); + printf("\tports: %u\n", num_ports); + printf("\tworkers: %u\n", cdata.num_workers); + printf("\tpackets: %"PRIi64"\n", cdata.num_packets); + printf("\tQueue-prio: %u\n", cdata.enable_queue_priorities); + if (cdata.queue_type == RTE_EVENT_QUEUE_CFG_ORDERED_ONLY) + printf("\tqid0 type: ordered\n"); + if (cdata.queue_type == RTE_EVENT_QUEUE_CFG_ATOMIC_ONLY) + printf("\tqid0 type: atomic\n"); + printf("\tCores available: %u\n", rte_lcore_count()); + printf("\tCores used: %u\n", cores_needed); + } + + if (rte_lcore_count() < cores_needed) + rte_panic("Too few cores (%d < %d)\n", rte_lcore_count(), + cores_needed); + + const unsigned int ndevs = rte_event_dev_count(); + if (ndevs == 0) + rte_panic("No dev_id devs found. Pasl in a --vdev eventdev.\n"); + if (ndevs > 1) + fprintf(stderr, "Warning: More than one eventdev, using idx 0"); + + worker_data = rte_calloc(0, cdata.num_workers, + sizeof(worker_data[0]), 0); + if (worker_data == NULL) + rte_panic("rte_calloc failed\n"); + + int dev_id = setup_eventdev(&prod_data, &cons_data, worker_data); + if (dev_id < 0) + rte_exit(EXIT_FAILURE, "Error setting up eventdev\n"); + + prod_data.num_nic_ports = num_ports; + init_ports(num_ports); + + int worker_idx = 0; + RTE_LCORE_FOREACH_SLAVE(lcore_id) { + if (lcore_id >= MAX_NUM_CORE) + break; + + if (!fdata->rx_core[lcore_id] && + !fdata->worker_core[lcore_id] && + !fdata->tx_core[lcore_id] && + !fdata->sched_core[lcore_id]) + continue; + + if (fdata->rx_core[lcore_id]) + printf( + "[%s()] lcore %d executing NIC Rx, and using eventdev port %u\n", + __func__, lcore_id, prod_data.port_id); + + if (fdata->tx_core[lcore_id]) + printf( + "[%s()] lcore %d executing NIC Tx, and using eventdev port %u\n", + __func__, lcore_id, cons_data.port_id); + + if (fdata->sched_core[lcore_id]) + printf("[%s()] lcore %d executing scheduler\n", + __func__, lcore_id); + + if (fdata->worker_core[lcore_id]) + printf( + "[%s()] lcore %d executing worker, using eventdev port %u\n", + __func__, lcore_id, + worker_data[worker_idx].port_id); + + err = rte_eal_remote_launch(worker, &worker_data[worker_idx], + lcore_id); + if (err) { + rte_panic("Failed to launch worker on core %d\n", + lcore_id); + continue; + } + if (fdata->worker_core[lcore_id]) + worker_idx++; + } + + lcore_id = rte_lcore_id(); + + if (core_in_use(lcore_id)) + worker(&worker_data[worker_idx++]); + + rte_eal_mp_wait_lcore(); + + if (cdata.dump_dev) + rte_event_dev_dump(dev_id, stdout); + + if (!cdata.quiet && (port_stat(dev_id, worker_data[0].port_id) != + (uint64_t)-ENOTSUP)) { + printf("\nPort Workload distribution:\n"); + uint32_t i; + uint64_t tot_pkts = 0; + uint64_t pkts_per_wkr[RTE_MAX_LCORE] = {0}; + for (i = 0; i < cdata.num_workers; i++) { + pkts_per_wkr[i] = + port_stat(dev_id, worker_data[i].port_id); + tot_pkts += pkts_per_wkr[i]; + } + for (i = 0; i < cdata.num_workers; i++) { + float pc = pkts_per_wkr[i] * 100 / + ((float)tot_pkts); + printf("worker %i :\t%.1f %% (%"PRIu64" pkts)\n", + i, pc, pkts_per_wkr[i]); + } + + } + + return 0; +} diff --git a/examples/exception_path/Makefile b/examples/exception_path/Makefile index 4b6e0717..d16f74f6 100644 --- a/examples/exception_path/Makefile +++ b/examples/exception_path/Makefile @@ -33,18 +33,11 @@ ifeq ($(RTE_SDK),) $(error "Please define RTE_SDK environment variable") endif -# Default target, can be overriden by command line or environment +# Default target, can be overridden by command line or environment RTE_TARGET ?= x86_64-native-linuxapp-gcc include $(RTE_SDK)/mk/rte.vars.mk -ifneq ($(CONFIG_RTE_EXEC_ENV),"linuxapp") -$(info This application can only operate in a linuxapp environment, \ -please change the definition of the RTE_TARGET environment variable) -all: -clean: -else - # binary name APP = exception_path @@ -55,5 +48,3 @@ CFLAGS += -O3 CFLAGS += $(WERROR_FLAGS) include $(RTE_SDK)/mk/rte.extapp.mk - -endif diff --git a/examples/exception_path/main.c b/examples/exception_path/main.c index 89bf1cc0..e551e6d1 100644 --- a/examples/exception_path/main.c +++ b/examples/exception_path/main.c @@ -42,8 +42,10 @@ #include <getopt.h> #include <netinet/in.h> -#include <linux/if.h> +#include <net/if.h> +#ifdef RTE_EXEC_ENV_LINUXAPP #include <linux/if_tun.h> +#endif #include <fcntl.h> #include <sys/ioctl.h> #include <unistd.h> @@ -65,7 +67,6 @@ #include <rte_debug.h> #include <rte_ether.h> #include <rte_ethdev.h> -#include <rte_log.h> #include <rte_mempool.h> #include <rte_mbuf.h> #include <rte_string_fns.h> @@ -182,6 +183,7 @@ signal_handler(int signum) } } +#ifdef RTE_EXEC_ENV_LINUXAPP /* * Create a tap network interface, or use existing one with same name. * If name[0]='\0' then a name is automatically assigned and returned in name. @@ -214,6 +216,29 @@ static int tap_create(char *name) return fd; } +#else +/* + * Find a free tap network interface, or create a new one. + * The name is automatically assigned and returned in name. + */ +static int tap_create(char *name) +{ + int i, fd = -1; + char devname[PATH_MAX]; + + for (i = 0; i < 255; i++) { + snprintf(devname, sizeof(devname), "/dev/tap%d", i); + fd = open(devname, O_RDWR); + if (fd >= 0 || errno != EBUSY) + break; + } + + if (name) + snprintf(name, IFNAMSIZ, "tap%d", i); + + return fd; +} +#endif /* Main processing loop */ static int @@ -422,6 +447,8 @@ static void init_port(uint8_t port) { int ret; + uint16_t nb_rxd = NB_RXD; + uint16_t nb_txd = NB_TXD; /* Initialise device and RX/TX queues */ PRINT_INFO("Initialising port %u ...", (unsigned)port); @@ -431,14 +458,21 @@ init_port(uint8_t port) FATAL_ERROR("Could not configure port%u (%d)", (unsigned)port, ret); - ret = rte_eth_rx_queue_setup(port, 0, NB_RXD, rte_eth_dev_socket_id(port), + ret = rte_eth_dev_adjust_nb_rx_tx_desc(port, &nb_rxd, &nb_txd); + if (ret < 0) + FATAL_ERROR("Could not adjust number of descriptors for port%u (%d)", + (unsigned)port, ret); + + ret = rte_eth_rx_queue_setup(port, 0, nb_rxd, + rte_eth_dev_socket_id(port), NULL, pktmbuf_pool); if (ret < 0) FATAL_ERROR("Could not setup up RX queue for port%u (%d)", (unsigned)port, ret); - ret = rte_eth_tx_queue_setup(port, 0, NB_TXD, rte_eth_dev_socket_id(port), + ret = rte_eth_tx_queue_setup(port, 0, nb_txd, + rte_eth_dev_socket_id(port), NULL); if (ret < 0) FATAL_ERROR("Could not setup up TX queue for port%u (%d)", diff --git a/examples/helloworld/Makefile b/examples/helloworld/Makefile index d2cca7a7..c83ec01e 100644 --- a/examples/helloworld/Makefile +++ b/examples/helloworld/Makefile @@ -33,7 +33,7 @@ ifeq ($(RTE_SDK),) $(error "Please define RTE_SDK environment variable") endif -# Default target, can be overriden by command line or environment +# Default target, can be overridden by command line or environment RTE_TARGET ?= x86_64-native-linuxapp-gcc include $(RTE_SDK)/mk/rte.vars.mk diff --git a/examples/ip_fragmentation/Makefile b/examples/ip_fragmentation/Makefile index c321e6a1..4bc01abb 100644 --- a/examples/ip_fragmentation/Makefile +++ b/examples/ip_fragmentation/Makefile @@ -34,7 +34,7 @@ ifeq ($(RTE_SDK),) $(error "Please define RTE_SDK environment variable") endif -# Default target, can be overriden by command line or environment +# Default target, can be overridden by command line or environment RTE_TARGET ?= x86_64-native-linuxapp-gcc include $(RTE_SDK)/mk/rte.vars.mk diff --git a/examples/ip_fragmentation/main.c b/examples/ip_fragmentation/main.c index 71c1d12f..8c0e1791 100644 --- a/examples/ip_fragmentation/main.c +++ b/examples/ip_fragmentation/main.c @@ -50,7 +50,6 @@ #include <rte_memcpy.h> #include <rte_memzone.h> #include <rte_eal.h> -#include <rte_per_lcore.h> #include <rte_launch.h> #include <rte_atomic.h> #include <rte_cycles.h> @@ -960,6 +959,14 @@ main(int argc, char **argv) ret, portid); } + ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd, + &nb_txd); + if (ret < 0) { + printf("\n"); + rte_exit(EXIT_FAILURE, "Cannot adjust number of " + "descriptors: err=%d, port=%d\n", ret, portid); + } + /* init one RX queue */ ret = rte_eth_rx_queue_setup(portid, 0, nb_rxd, socket, NULL, @@ -1020,7 +1027,7 @@ main(int argc, char **argv) if (check_ptype(portid) == 0) { rte_eth_add_rx_callback(portid, 0, cb_parse_ptype, NULL); - printf("Add Rx callback funciton to detect L3 packet type by SW :" + printf("Add Rx callback function to detect L3 packet type by SW :" " port = %d\n", portid); } } diff --git a/examples/ip_pipeline/init.c b/examples/ip_pipeline/init.c index be148fca..7cde49a4 100644 --- a/examples/ip_pipeline/init.c +++ b/examples/ip_pipeline/init.c @@ -1003,16 +1003,30 @@ app_init_link(struct app_params *app) struct app_pktq_hwq_in_params *p_rxq = &app->hwq_in_params[j]; uint32_t rxq_link_id, rxq_queue_id; + uint16_t nb_rxd = p_rxq->size; sscanf(p_rxq->name, "RXQ%" PRIu32 ".%" PRIu32, &rxq_link_id, &rxq_queue_id); if (rxq_link_id != link_id) continue; + status = rte_eth_dev_adjust_nb_rx_tx_desc( + p_link->pmd_id, + &nb_rxd, + NULL); + if (status < 0) + rte_panic("%s (%" PRIu32 "): " + "%s adjust number of Rx descriptors " + "error (%" PRId32 ")\n", + p_link->name, + p_link->pmd_id, + p_rxq->name, + status); + status = rte_eth_rx_queue_setup( p_link->pmd_id, rxq_queue_id, - p_rxq->size, + nb_rxd, app_get_cpu_socket_id(p_link->pmd_id), &p_rxq->conf, app->mempool[p_rxq->mempool_id]); @@ -1030,16 +1044,30 @@ app_init_link(struct app_params *app) struct app_pktq_hwq_out_params *p_txq = &app->hwq_out_params[j]; uint32_t txq_link_id, txq_queue_id; + uint16_t nb_txd = p_txq->size; sscanf(p_txq->name, "TXQ%" PRIu32 ".%" PRIu32, &txq_link_id, &txq_queue_id); if (txq_link_id != link_id) continue; + status = rte_eth_dev_adjust_nb_rx_tx_desc( + p_link->pmd_id, + NULL, + &nb_txd); + if (status < 0) + rte_panic("%s (%" PRIu32 "): " + "%s adjust number of Tx descriptors " + "error (%" PRId32 ")\n", + p_link->name, + p_link->pmd_id, + p_txq->name, + status); + status = rte_eth_tx_queue_setup( p_link->pmd_id, txq_queue_id, - p_txq->size, + nb_txd, app_get_cpu_socket_id(p_link->pmd_id), &p_txq->conf); if (status < 0) diff --git a/examples/ip_pipeline/pipeline/hash_func.h b/examples/ip_pipeline/pipeline/hash_func.h index 9db7173f..b112369c 100644 --- a/examples/ip_pipeline/pipeline/hash_func.h +++ b/examples/ip_pipeline/pipeline/hash_func.h @@ -152,7 +152,7 @@ hash_xor_key64(void *key, __rte_unused uint32_t key_size, uint64_t seed) return (xor0 >> 32) ^ xor0; } -#if defined(RTE_ARCH_X86_64) && defined(RTE_MACHINE_CPUFLAG_SSE4_2) +#if defined(RTE_ARCH_X86_64) #include <x86intrin.h> diff --git a/examples/ip_pipeline/pipeline/pipeline_passthrough_be.c b/examples/ip_pipeline/pipeline/pipeline_passthrough_be.c index 7ab0afed..8cb2f0c7 100644 --- a/examples/ip_pipeline/pipeline/pipeline_passthrough_be.c +++ b/examples/ip_pipeline/pipeline/pipeline_passthrough_be.c @@ -76,7 +76,7 @@ static pipeline_msg_req_handler handlers[] = { pipeline_msg_req_invalid_handler, }; -static inline __attribute__((always_inline)) void +static __rte_always_inline void pkt_work_dma( struct rte_mbuf *pkt, void *arg, @@ -121,7 +121,7 @@ pkt_work_dma( } } -static inline __attribute__((always_inline)) void +static __rte_always_inline void pkt4_work_dma( struct rte_mbuf **pkts, void *arg, @@ -217,7 +217,7 @@ pkt4_work_dma( } } -static inline __attribute__((always_inline)) void +static __rte_always_inline void pkt_work_swap( struct rte_mbuf *pkt, void *arg) @@ -241,7 +241,7 @@ pkt_work_swap( } } -static inline __attribute__((always_inline)) void +static __rte_always_inline void pkt4_work_swap( struct rte_mbuf **pkts, void *arg) diff --git a/examples/ip_pipeline/pipeline/pipeline_routing_be.c b/examples/ip_pipeline/pipeline/pipeline_routing_be.c index 21ac7888..78317165 100644 --- a/examples/ip_pipeline/pipeline/pipeline_routing_be.c +++ b/examples/ip_pipeline/pipeline/pipeline_routing_be.c @@ -191,7 +191,7 @@ struct layout { dst->c = src->c; \ } -static inline __attribute__((always_inline)) void +static __rte_always_inline void pkt_work_routing( struct rte_mbuf *pkt, struct rte_pipeline_table_entry *table_entry, @@ -317,7 +317,7 @@ pkt_work_routing( } } -static inline __attribute__((always_inline)) void +static __rte_always_inline void pkt4_work_routing( struct rte_mbuf **pkts, struct rte_pipeline_table_entry **table_entries, diff --git a/examples/ip_reassembly/Makefile b/examples/ip_reassembly/Makefile index d9539a3a..85c64a38 100644 --- a/examples/ip_reassembly/Makefile +++ b/examples/ip_reassembly/Makefile @@ -34,7 +34,7 @@ ifeq ($(RTE_SDK),) $(error "Please define RTE_SDK environment variable") endif -# Default target, can be overriden by command line or environment +# Default target, can be overridden by command line or environment RTE_TARGET ?= x86_64-native-linuxapp-gcc include $(RTE_SDK)/mk/rte.vars.mk diff --git a/examples/ip_reassembly/main.c b/examples/ip_reassembly/main.c index c0f3ced6..e62636cb 100644 --- a/examples/ip_reassembly/main.c +++ b/examples/ip_reassembly/main.c @@ -51,7 +51,6 @@ #include <rte_memcpy.h> #include <rte_memzone.h> #include <rte_eal.h> -#include <rte_per_lcore.h> #include <rte_launch.h> #include <rte_atomic.h> #include <rte_cycles.h> @@ -904,7 +903,7 @@ setup_queue_tbl(struct rx_queue *rxq, uint32_t lcore, uint32_t queue) nb_mbuf = RTE_MAX(max_flow_num, 2UL * MAX_PKT_BURST) * MAX_FRAG_NUM; nb_mbuf *= (port_conf.rxmode.max_rx_pkt_len + BUF_SIZE - 1) / BUF_SIZE; nb_mbuf *= 2; /* ipv4 and ipv6 */ - nb_mbuf += RTE_TEST_RX_DESC_DEFAULT + RTE_TEST_TX_DESC_DEFAULT; + nb_mbuf += nb_rxd + nb_txd; nb_mbuf = RTE_MAX(nb_mbuf, (uint32_t)NB_MBUF); @@ -1088,6 +1087,14 @@ main(int argc, char **argv) rxq->portid = portid; rxq->lpm = socket_lpm[socket]; rxq->lpm6 = socket_lpm6[socket]; + + ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd, + &nb_txd); + if (ret < 0) + rte_exit(EXIT_FAILURE, + "Cannot adjust number of descriptors: err=%d, port=%d\n", + ret, portid); + if (setup_queue_tbl(rxq, rx_lcore_id, queueid) < 0) rte_exit(EXIT_FAILURE, "Failed to set up queue table\n"); qconf->n_rx_queue++; diff --git a/examples/ipsec-secgw/esp.c b/examples/ipsec-secgw/esp.c index e77afa0e..70bb81f7 100644 --- a/examples/ipsec-secgw/esp.c +++ b/examples/ipsec-secgw/esp.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2016 Intel Corporation. All rights reserved. + * Copyright(c) 2016-2017 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -84,68 +84,79 @@ esp_inbound(struct rte_mbuf *m, struct ipsec_sa *sa, } sym_cop = get_sym_cop(cop); - sym_cop->m_src = m; - sym_cop->cipher.data.offset = ip_hdr_len + sizeof(struct esp_hdr) + - sa->iv_len; - sym_cop->cipher.data.length = payload_len; - - struct cnt_blk *icb; - uint8_t *aad; - uint8_t *iv = RTE_PTR_ADD(ip4, ip_hdr_len + sizeof(struct esp_hdr)); - - switch (sa->cipher_algo) { - case RTE_CRYPTO_CIPHER_NULL: - case RTE_CRYPTO_CIPHER_AES_CBC: - sym_cop->cipher.iv.data = iv; - sym_cop->cipher.iv.phys_addr = rte_pktmbuf_mtophys_offset(m, - ip_hdr_len + sizeof(struct esp_hdr)); - sym_cop->cipher.iv.length = sa->iv_len; - break; - case RTE_CRYPTO_CIPHER_AES_CTR: - case RTE_CRYPTO_CIPHER_AES_GCM: + + if (sa->aead_algo == RTE_CRYPTO_AEAD_AES_GCM) { + sym_cop->aead.data.offset = ip_hdr_len + sizeof(struct esp_hdr) + + sa->iv_len; + sym_cop->aead.data.length = payload_len; + + struct cnt_blk *icb; + uint8_t *aad; + uint8_t *iv = RTE_PTR_ADD(ip4, ip_hdr_len + sizeof(struct esp_hdr)); + icb = get_cnt_blk(m); icb->salt = sa->salt; memcpy(&icb->iv, iv, 8); icb->cnt = rte_cpu_to_be_32(1); - sym_cop->cipher.iv.data = (uint8_t *)icb; - sym_cop->cipher.iv.phys_addr = rte_pktmbuf_mtophys_offset(m, - (uint8_t *)icb - rte_pktmbuf_mtod(m, uint8_t *)); - sym_cop->cipher.iv.length = 16; - break; - default: - RTE_LOG(ERR, IPSEC_ESP, "unsupported cipher algorithm %u\n", - sa->cipher_algo); - return -EINVAL; - } - switch (sa->auth_algo) { - case RTE_CRYPTO_AUTH_NULL: - case RTE_CRYPTO_AUTH_SHA1_HMAC: - case RTE_CRYPTO_AUTH_SHA256_HMAC: - sym_cop->auth.data.offset = ip_hdr_len; - sym_cop->auth.data.length = sizeof(struct esp_hdr) + - sa->iv_len + payload_len; - break; - case RTE_CRYPTO_AUTH_AES_GCM: aad = get_aad(m); memcpy(aad, iv - sizeof(struct esp_hdr), 8); - sym_cop->auth.aad.data = aad; - sym_cop->auth.aad.phys_addr = rte_pktmbuf_mtophys_offset(m, + sym_cop->aead.aad.data = aad; + sym_cop->aead.aad.phys_addr = rte_pktmbuf_mtophys_offset(m, aad - rte_pktmbuf_mtod(m, uint8_t *)); - sym_cop->auth.aad.length = 8; - break; - default: - RTE_LOG(ERR, IPSEC_ESP, "unsupported auth algorithm %u\n", - sa->auth_algo); - return -EINVAL; - } - sym_cop->auth.digest.data = rte_pktmbuf_mtod_offset(m, void*, - rte_pktmbuf_pkt_len(m) - sa->digest_len); - sym_cop->auth.digest.phys_addr = rte_pktmbuf_mtophys_offset(m, - rte_pktmbuf_pkt_len(m) - sa->digest_len); - sym_cop->auth.digest.length = sa->digest_len; + sym_cop->aead.digest.data = rte_pktmbuf_mtod_offset(m, void*, + rte_pktmbuf_pkt_len(m) - sa->digest_len); + sym_cop->aead.digest.phys_addr = rte_pktmbuf_mtophys_offset(m, + rte_pktmbuf_pkt_len(m) - sa->digest_len); + } else { + sym_cop->cipher.data.offset = ip_hdr_len + sizeof(struct esp_hdr) + + sa->iv_len; + sym_cop->cipher.data.length = payload_len; + + struct cnt_blk *icb; + uint8_t *iv = RTE_PTR_ADD(ip4, ip_hdr_len + sizeof(struct esp_hdr)); + uint8_t *iv_ptr = rte_crypto_op_ctod_offset(cop, + uint8_t *, IV_OFFSET); + + switch (sa->cipher_algo) { + case RTE_CRYPTO_CIPHER_NULL: + case RTE_CRYPTO_CIPHER_AES_CBC: + /* Copy IV at the end of crypto operation */ + rte_memcpy(iv_ptr, iv, sa->iv_len); + break; + case RTE_CRYPTO_CIPHER_AES_CTR: + icb = get_cnt_blk(m); + icb->salt = sa->salt; + memcpy(&icb->iv, iv, 8); + icb->cnt = rte_cpu_to_be_32(1); + break; + default: + RTE_LOG(ERR, IPSEC_ESP, "unsupported cipher algorithm %u\n", + sa->cipher_algo); + return -EINVAL; + } + + switch (sa->auth_algo) { + case RTE_CRYPTO_AUTH_NULL: + case RTE_CRYPTO_AUTH_SHA1_HMAC: + case RTE_CRYPTO_AUTH_SHA256_HMAC: + sym_cop->auth.data.offset = ip_hdr_len; + sym_cop->auth.data.length = sizeof(struct esp_hdr) + + sa->iv_len + payload_len; + break; + default: + RTE_LOG(ERR, IPSEC_ESP, "unsupported auth algorithm %u\n", + sa->auth_algo); + return -EINVAL; + } + + sym_cop->auth.digest.data = rte_pktmbuf_mtod_offset(m, void*, + rte_pktmbuf_pkt_len(m) - sa->digest_len); + sym_cop->auth.digest.phys_addr = rte_pktmbuf_mtophys_offset(m, + rte_pktmbuf_pkt_len(m) - sa->digest_len); + } return 0; } @@ -314,71 +325,87 @@ esp_outbound(struct rte_mbuf *m, struct ipsec_sa *sa, sym_cop = get_sym_cop(cop); sym_cop->m_src = m; - switch (sa->cipher_algo) { - case RTE_CRYPTO_CIPHER_NULL: - case RTE_CRYPTO_CIPHER_AES_CBC: - memset(iv, 0, sa->iv_len); - sym_cop->cipher.data.offset = ip_hdr_len + - sizeof(struct esp_hdr); - sym_cop->cipher.data.length = pad_payload_len + sa->iv_len; - break; - case RTE_CRYPTO_CIPHER_AES_CTR: - case RTE_CRYPTO_CIPHER_AES_GCM: + + if (sa->aead_algo == RTE_CRYPTO_AEAD_AES_GCM) { + uint8_t *aad; + *iv = sa->seq; - sym_cop->cipher.data.offset = ip_hdr_len + + sym_cop->aead.data.offset = ip_hdr_len + sizeof(struct esp_hdr) + sa->iv_len; - sym_cop->cipher.data.length = pad_payload_len; - break; - default: - RTE_LOG(ERR, IPSEC_ESP, "unsupported cipher algorithm %u\n", - sa->cipher_algo); - return -EINVAL; - } + sym_cop->aead.data.length = pad_payload_len; + + /* Fill pad_len using default sequential scheme */ + for (i = 0; i < pad_len - 2; i++) + padding[i] = i + 1; + padding[pad_len - 2] = pad_len - 2; + padding[pad_len - 1] = nlp; + + struct cnt_blk *icb = get_cnt_blk(m); + icb->salt = sa->salt; + icb->iv = sa->seq; + icb->cnt = rte_cpu_to_be_32(1); - /* Fill pad_len using default sequential scheme */ - for (i = 0; i < pad_len - 2; i++) - padding[i] = i + 1; - padding[pad_len - 2] = pad_len - 2; - padding[pad_len - 1] = nlp; - - struct cnt_blk *icb = get_cnt_blk(m); - icb->salt = sa->salt; - icb->iv = sa->seq; - icb->cnt = rte_cpu_to_be_32(1); - sym_cop->cipher.iv.data = (uint8_t *)icb; - sym_cop->cipher.iv.phys_addr = rte_pktmbuf_mtophys_offset(m, - (uint8_t *)icb - rte_pktmbuf_mtod(m, uint8_t *)); - sym_cop->cipher.iv.length = 16; - - uint8_t *aad; - - switch (sa->auth_algo) { - case RTE_CRYPTO_AUTH_NULL: - case RTE_CRYPTO_AUTH_SHA1_HMAC: - case RTE_CRYPTO_AUTH_SHA256_HMAC: - sym_cop->auth.data.offset = ip_hdr_len; - sym_cop->auth.data.length = sizeof(struct esp_hdr) + - sa->iv_len + pad_payload_len; - break; - case RTE_CRYPTO_AUTH_AES_GCM: aad = get_aad(m); memcpy(aad, esp, 8); - sym_cop->auth.aad.data = aad; - sym_cop->auth.aad.phys_addr = rte_pktmbuf_mtophys_offset(m, + sym_cop->aead.aad.data = aad; + sym_cop->aead.aad.phys_addr = rte_pktmbuf_mtophys_offset(m, aad - rte_pktmbuf_mtod(m, uint8_t *)); - sym_cop->auth.aad.length = 8; - break; - default: - RTE_LOG(ERR, IPSEC_ESP, "unsupported auth algorithm %u\n", - sa->auth_algo); - return -EINVAL; - } - sym_cop->auth.digest.data = rte_pktmbuf_mtod_offset(m, uint8_t *, + sym_cop->aead.digest.data = rte_pktmbuf_mtod_offset(m, uint8_t *, rte_pktmbuf_pkt_len(m) - sa->digest_len); - sym_cop->auth.digest.phys_addr = rte_pktmbuf_mtophys_offset(m, + sym_cop->aead.digest.phys_addr = rte_pktmbuf_mtophys_offset(m, rte_pktmbuf_pkt_len(m) - sa->digest_len); - sym_cop->auth.digest.length = sa->digest_len; + } else { + switch (sa->cipher_algo) { + case RTE_CRYPTO_CIPHER_NULL: + case RTE_CRYPTO_CIPHER_AES_CBC: + memset(iv, 0, sa->iv_len); + sym_cop->cipher.data.offset = ip_hdr_len + + sizeof(struct esp_hdr); + sym_cop->cipher.data.length = pad_payload_len + sa->iv_len; + break; + case RTE_CRYPTO_CIPHER_AES_CTR: + *iv = sa->seq; + sym_cop->cipher.data.offset = ip_hdr_len + + sizeof(struct esp_hdr) + sa->iv_len; + sym_cop->cipher.data.length = pad_payload_len; + break; + default: + RTE_LOG(ERR, IPSEC_ESP, "unsupported cipher algorithm %u\n", + sa->cipher_algo); + return -EINVAL; + } + + /* Fill pad_len using default sequential scheme */ + for (i = 0; i < pad_len - 2; i++) + padding[i] = i + 1; + padding[pad_len - 2] = pad_len - 2; + padding[pad_len - 1] = nlp; + + struct cnt_blk *icb = get_cnt_blk(m); + icb->salt = sa->salt; + icb->iv = sa->seq; + icb->cnt = rte_cpu_to_be_32(1); + + switch (sa->auth_algo) { + case RTE_CRYPTO_AUTH_NULL: + case RTE_CRYPTO_AUTH_SHA1_HMAC: + case RTE_CRYPTO_AUTH_SHA256_HMAC: + sym_cop->auth.data.offset = ip_hdr_len; + sym_cop->auth.data.length = sizeof(struct esp_hdr) + + sa->iv_len + pad_payload_len; + break; + default: + RTE_LOG(ERR, IPSEC_ESP, "unsupported auth algorithm %u\n", + sa->auth_algo); + return -EINVAL; + } + + sym_cop->auth.digest.data = rte_pktmbuf_mtod_offset(m, uint8_t *, + rte_pktmbuf_pkt_len(m) - sa->digest_len); + sym_cop->auth.digest.phys_addr = rte_pktmbuf_mtophys_offset(m, + rte_pktmbuf_pkt_len(m) - sa->digest_len); + } return 0; } diff --git a/examples/ipsec-secgw/ipsec-secgw.c b/examples/ipsec-secgw/ipsec-secgw.c index 8cbf6ac4..99dc270c 100644 --- a/examples/ipsec-secgw/ipsec-secgw.c +++ b/examples/ipsec-secgw/ipsec-secgw.c @@ -710,10 +710,12 @@ main_loop(__attribute__((unused)) void *dummy) qconf->inbound.sp6_ctx = socket_ctx[socket_id].sp_ip6_in; qconf->inbound.sa_ctx = socket_ctx[socket_id].sa_in; qconf->inbound.cdev_map = cdev_map_in; + qconf->inbound.session_pool = socket_ctx[socket_id].session_pool; qconf->outbound.sp4_ctx = socket_ctx[socket_id].sp_ip4_out; qconf->outbound.sp6_ctx = socket_ctx[socket_id].sp_ip6_out; qconf->outbound.sa_ctx = socket_ctx[socket_id].sa_out; qconf->outbound.cdev_map = cdev_map_out; + qconf->outbound.session_pool = socket_ctx[socket_id].session_pool; if (qconf->nb_rx_queue == 0) { RTE_LOG(INFO, IPSEC, "lcore %u has nothing to do\n", lcore_id); @@ -1238,6 +1240,13 @@ cryptodevs_init(void) printf("lcore/cryptodev/qp mappings:\n"); + uint32_t max_sess_sz = 0, sess_sz; + for (cdev_id = 0; cdev_id < rte_cryptodev_count(); cdev_id++) { + sess_sz = rte_cryptodev_get_private_session_size(cdev_id); + if (sess_sz > max_sess_sz) + max_sess_sz = sess_sz; + } + idx = 0; /* Start from last cdev id to give HW priority */ for (cdev_id = rte_cryptodev_count() - 1; cdev_id >= 0; cdev_id--) { @@ -1266,17 +1275,39 @@ cryptodevs_init(void) dev_conf.socket_id = rte_cryptodev_socket_id(cdev_id); dev_conf.nb_queue_pairs = qp; - dev_conf.session_mp.nb_objs = CDEV_MP_NB_OBJS; - dev_conf.session_mp.cache_size = CDEV_MP_CACHE_SZ; + + if (!socket_ctx[dev_conf.socket_id].session_pool) { + char mp_name[RTE_MEMPOOL_NAMESIZE]; + struct rte_mempool *sess_mp; + + snprintf(mp_name, RTE_MEMPOOL_NAMESIZE, + "sess_mp_%u", dev_conf.socket_id); + sess_mp = rte_mempool_create(mp_name, + CDEV_MP_NB_OBJS, + max_sess_sz, + CDEV_MP_CACHE_SZ, + 0, NULL, NULL, NULL, + NULL, dev_conf.socket_id, + 0); + if (sess_mp == NULL) + rte_exit(EXIT_FAILURE, + "Cannot create session pool on socket %d\n", + dev_conf.socket_id); + else + printf("Allocated session pool on socket %d\n", + dev_conf.socket_id); + socket_ctx[dev_conf.socket_id].session_pool = sess_mp; + } if (rte_cryptodev_configure(cdev_id, &dev_conf)) - rte_panic("Failed to initialize crypodev %u\n", + rte_panic("Failed to initialize cryptodev %u\n", cdev_id); qp_conf.nb_descriptors = CDEV_QUEUE_DESC; for (qp = 0; qp < dev_conf.nb_queue_pairs; qp++) if (rte_cryptodev_queue_pair_setup(cdev_id, qp, - &qp_conf, dev_conf.socket_id)) + &qp_conf, dev_conf.socket_id, + socket_ctx[dev_conf.socket_id].session_pool)) rte_panic("Failed to setup queue %u for " "cdev_id %u\n", 0, cdev_id); @@ -1332,6 +1363,11 @@ port_init(uint8_t portid) rte_exit(EXIT_FAILURE, "Cannot configure device: " "err=%d, port=%d\n", ret, portid); + ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd, &nb_txd); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Cannot adjust number of descriptors: " + "err=%d, port=%d\n", ret, portid); + /* init one TX queue per lcore */ tx_queueid = 0; for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { @@ -1433,7 +1469,7 @@ main(int32_t argc, char **argv) nb_lcores = rte_lcore_count(); - /* Replicate each contex per socket */ + /* Replicate each context per socket */ for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { if (rte_lcore_is_enabled(lcore_id) == 0) continue; diff --git a/examples/ipsec-secgw/ipsec.c b/examples/ipsec-secgw/ipsec.c index edca5f02..0afb9d67 100644 --- a/examples/ipsec-secgw/ipsec.c +++ b/examples/ipsec-secgw/ipsec.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2016 Intel Corporation. All rights reserved. + * Copyright(c) 2016-2017 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -45,7 +45,7 @@ #include "esp.h" static inline int -create_session(struct ipsec_ctx *ipsec_ctx __rte_unused, struct ipsec_sa *sa) +create_session(struct ipsec_ctx *ipsec_ctx, struct ipsec_sa *sa) { struct rte_cryptodev_info cdev_info; unsigned long cdev_id_qp = 0; @@ -72,11 +72,15 @@ create_session(struct ipsec_ctx *ipsec_ctx __rte_unused, struct ipsec_sa *sa) ipsec_ctx->tbl[cdev_id_qp].qp); sa->crypto_session = rte_cryptodev_sym_session_create( - ipsec_ctx->tbl[cdev_id_qp].id, sa->xforms); + ipsec_ctx->session_pool); + rte_cryptodev_sym_session_init(ipsec_ctx->tbl[cdev_id_qp].id, + sa->crypto_session, sa->xforms, + ipsec_ctx->session_pool); rte_cryptodev_info_get(ipsec_ctx->tbl[cdev_id_qp].id, &cdev_info); if (cdev_info.sym.max_nb_sessions_per_qp > 0) { ret = rte_cryptodev_queue_pair_attach_sym_session( + ipsec_ctx->tbl[cdev_id_qp].id, ipsec_ctx->tbl[cdev_id_qp].qp, sa->crypto_session); if (ret < 0) { @@ -140,7 +144,6 @@ ipsec_enqueue(ipsec_xform_fn xform_func, struct ipsec_ctx *ipsec_ctx, priv->cop.status = RTE_CRYPTO_OP_STATUS_NOT_PROCESSED; rte_prefetch0(&priv->sym_cop); - priv->cop.sym = &priv->sym_cop; if ((unlikely(sa->crypto_session == NULL)) && create_session(ipsec_ctx, sa)) { diff --git a/examples/ipsec-secgw/ipsec.h b/examples/ipsec-secgw/ipsec.h index fe426614..da1fb1b2 100644 --- a/examples/ipsec-secgw/ipsec.h +++ b/examples/ipsec-secgw/ipsec.h @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2016 Intel Corporation. All rights reserved. + * Copyright(c) 2016-2017 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -48,6 +48,9 @@ #define MAX_DIGEST_SIZE 32 /* Bytes -- 256 bits */ +#define IV_OFFSET (sizeof(struct rte_crypto_op) + \ + sizeof(struct rte_crypto_sym_op)) + #define uint32_t_to_char(ip, a, b, c, d) do {\ *a = (uint8_t)(ip >> 24 & 0xff);\ *b = (uint8_t)(ip >> 16 & 0xff);\ @@ -72,7 +75,6 @@ struct rte_crypto_xform; struct ipsec_xform; -struct rte_cryptodev_session; struct rte_mbuf; struct ipsec_sa; @@ -100,6 +102,7 @@ struct ipsec_sa { struct rte_cryptodev_sym_session *crypto_session; enum rte_crypto_cipher_algorithm cipher_algo; enum rte_crypto_auth_algorithm auth_algo; + enum rte_crypto_aead_algorithm aead_algo; uint16_t digest_len; uint16_t iv_len; uint16_t block_size; @@ -118,10 +121,10 @@ struct ipsec_sa { } __rte_cache_aligned; struct ipsec_mbuf_metadata { - uint8_t buf[32]; struct ipsec_sa *sa; struct rte_crypto_op cop; struct rte_crypto_sym_op sym_cop; + uint8_t buf[32]; } __rte_cache_aligned; struct cdev_qp { @@ -140,6 +143,7 @@ struct ipsec_ctx { uint16_t nb_qps; uint16_t last_qp; struct cdev_qp tbl[MAX_QP_PER_LCORE]; + struct rte_mempool *session_pool; }; struct cdev_key { @@ -158,6 +162,7 @@ struct socket_ctx { struct rt_ctx *rt_ip4; struct rt_ctx *rt_ip6; struct rte_mempool *mbuf_pool; + struct rte_mempool *session_pool; }; struct cnt_blk { diff --git a/examples/ipsec-secgw/sa.c b/examples/ipsec-secgw/sa.c index 39624c49..7be0e628 100644 --- a/examples/ipsec-secgw/sa.c +++ b/examples/ipsec-secgw/sa.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2016 Intel Corporation. All rights reserved. + * Copyright(c) 2016-2017 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -64,10 +64,20 @@ struct supported_auth_algo { enum rte_crypto_auth_algorithm algo; uint16_t digest_len; uint16_t key_len; - uint8_t aad_len; uint8_t key_not_req; }; +struct supported_aead_algo { + const char *keyword; + enum rte_crypto_aead_algorithm algo; + uint16_t iv_len; + uint16_t block_size; + uint16_t digest_len; + uint16_t key_len; + uint8_t aad_len; +}; + + const struct supported_cipher_algo cipher_algos[] = { { .keyword = "null", @@ -84,13 +94,6 @@ const struct supported_cipher_algo cipher_algos[] = { .key_len = 16 }, { - .keyword = "aes-128-gcm", - .algo = RTE_CRYPTO_CIPHER_AES_GCM, - .iv_len = 8, - .block_size = 4, - .key_len = 20 - }, - { .keyword = "aes-128-ctr", .algo = RTE_CRYPTO_CIPHER_AES_CTR, .iv_len = 8, @@ -118,13 +121,18 @@ const struct supported_auth_algo auth_algos[] = { .algo = RTE_CRYPTO_AUTH_SHA256_HMAC, .digest_len = 12, .key_len = 32 - }, + } +}; + +const struct supported_aead_algo aead_algos[] = { { .keyword = "aes-128-gcm", - .algo = RTE_CRYPTO_AUTH_AES_GCM, + .algo = RTE_CRYPTO_AEAD_AES_GCM, + .iv_len = 8, + .block_size = 4, + .key_len = 20, .digest_len = 16, .aad_len = 8, - .key_not_req = 1 } }; @@ -166,6 +174,22 @@ find_match_auth_algo(const char *auth_keyword) return NULL; } +static const struct supported_aead_algo * +find_match_aead_algo(const char *aead_keyword) +{ + size_t i; + + for (i = 0; i < RTE_DIM(aead_algos); i++) { + const struct supported_aead_algo *algo = + &aead_algos[i]; + + if (strcmp(aead_keyword, algo->keyword) == 0) + return algo; + } + + return NULL; +} + /** parse_key_string * parse x:x:x:x.... hex number key string into uint8_t *key * return: @@ -210,6 +234,7 @@ parse_sa_tokens(char **tokens, uint32_t n_tokens, uint32_t *ri /*rule index*/; uint32_t cipher_algo_p = 0; uint32_t auth_algo_p = 0; + uint32_t aead_algo_p = 0; uint32_t src_p = 0; uint32_t dst_p = 0; uint32_t mode_p = 0; @@ -319,8 +344,7 @@ parse_sa_tokens(char **tokens, uint32_t n_tokens, if (algo->algo == RTE_CRYPTO_CIPHER_AES_CBC) rule->salt = (uint32_t)rte_rand(); - if ((algo->algo == RTE_CRYPTO_CIPHER_AES_CTR) || - (algo->algo == RTE_CRYPTO_CIPHER_AES_GCM)) { + if (algo->algo == RTE_CRYPTO_CIPHER_AES_CTR) { key_len -= 4; rule->cipher_key_len = key_len; memcpy(&rule->salt, @@ -386,6 +410,61 @@ parse_sa_tokens(char **tokens, uint32_t n_tokens, continue; } + if (strcmp(tokens[ti], "aead_algo") == 0) { + const struct supported_aead_algo *algo; + uint32_t key_len; + + APP_CHECK_PRESENCE(aead_algo_p, tokens[ti], + status); + if (status->status < 0) + return; + + INCREMENT_TOKEN_INDEX(ti, n_tokens, status); + if (status->status < 0) + return; + + algo = find_match_aead_algo(tokens[ti]); + + APP_CHECK(algo != NULL, status, "unrecognized " + "input \"%s\"", tokens[ti]); + + rule->aead_algo = algo->algo; + rule->cipher_key_len = algo->key_len; + rule->digest_len = algo->digest_len; + rule->aad_len = algo->key_len; + rule->block_size = algo->block_size; + rule->iv_len = algo->iv_len; + + INCREMENT_TOKEN_INDEX(ti, n_tokens, status); + if (status->status < 0) + return; + + APP_CHECK(strcmp(tokens[ti], "aead_key") == 0, + status, "unrecognized input \"%s\", " + "expect \"aead_key\"", tokens[ti]); + if (status->status < 0) + return; + + INCREMENT_TOKEN_INDEX(ti, n_tokens, status); + if (status->status < 0) + return; + + key_len = parse_key_string(tokens[ti], + rule->cipher_key); + APP_CHECK(key_len == rule->cipher_key_len, status, + "unrecognized input \"%s\"", tokens[ti]); + if (status->status < 0) + return; + + key_len -= 4; + rule->cipher_key_len = key_len; + memcpy(&rule->salt, + &rule->cipher_key[key_len], 4); + + aead_algo_p = 1; + continue; + } + if (strcmp(tokens[ti], "src") == 0) { APP_CHECK_PRESENCE(src_p, tokens[ti], status); if (status->status < 0) @@ -477,13 +556,25 @@ parse_sa_tokens(char **tokens, uint32_t n_tokens, return; } - APP_CHECK(cipher_algo_p == 1, status, "missing cipher options"); - if (status->status < 0) - return; + if (aead_algo_p) { + APP_CHECK(cipher_algo_p == 0, status, + "AEAD used, no need for cipher options"); + if (status->status < 0) + return; - APP_CHECK(auth_algo_p == 1, status, "missing auth options"); - if (status->status < 0) - return; + APP_CHECK(auth_algo_p == 0, status, + "AEAD used, no need for auth options"); + if (status->status < 0) + return; + } else { + APP_CHECK(cipher_algo_p == 1, status, "missing cipher or AEAD options"); + if (status->status < 0) + return; + + APP_CHECK(auth_algo_p == 1, status, "missing auth or AEAD options"); + if (status->status < 0) + return; + } APP_CHECK(mode_p == 1, status, "missing mode option"); if (status->status < 0) @@ -514,6 +605,13 @@ print_one_sa_rule(const struct ipsec_sa *sa, int inbound) } } + for (i = 0; i < RTE_DIM(aead_algos); i++) { + if (aead_algos[i].algo == sa->aead_algo) { + printf("%s ", aead_algos[i].keyword); + break; + } + } + printf("mode:"); switch (sa->flags) { @@ -589,6 +687,7 @@ sa_add_rules(struct sa_ctx *sa_ctx, const struct ipsec_sa entries[], { struct ipsec_sa *sa; uint32_t i, idx; + uint16_t iv_length; for (i = 0; i < nb_entries; i++) { idx = SPI2IDX(entries[i].spi); @@ -607,56 +706,110 @@ sa_add_rules(struct sa_ctx *sa_ctx, const struct ipsec_sa entries[], sa->dst.ip.ip4 = rte_cpu_to_be_32(sa->dst.ip.ip4); } - if (inbound) { - sa_ctx->xf[idx].b.type = RTE_CRYPTO_SYM_XFORM_CIPHER; - sa_ctx->xf[idx].b.cipher.algo = sa->cipher_algo; - sa_ctx->xf[idx].b.cipher.key.data = sa->cipher_key; - sa_ctx->xf[idx].b.cipher.key.length = - sa->cipher_key_len; - sa_ctx->xf[idx].b.cipher.op = - RTE_CRYPTO_CIPHER_OP_DECRYPT; - sa_ctx->xf[idx].b.next = NULL; + if (sa->aead_algo == RTE_CRYPTO_AEAD_AES_GCM) { + iv_length = 16; + + if (inbound) { + sa_ctx->xf[idx].a.type = RTE_CRYPTO_SYM_XFORM_AEAD; + sa_ctx->xf[idx].a.aead.algo = sa->aead_algo; + sa_ctx->xf[idx].a.aead.key.data = sa->cipher_key; + sa_ctx->xf[idx].a.aead.key.length = + sa->cipher_key_len; + sa_ctx->xf[idx].a.aead.op = + RTE_CRYPTO_AEAD_OP_DECRYPT; + sa_ctx->xf[idx].a.next = NULL; + sa_ctx->xf[idx].a.aead.iv.offset = IV_OFFSET; + sa_ctx->xf[idx].a.aead.iv.length = iv_length; + sa_ctx->xf[idx].a.aead.aad_length = + sa->aad_len; + sa_ctx->xf[idx].a.aead.digest_length = + sa->digest_len; + } else { /* outbound */ + sa_ctx->xf[idx].a.type = RTE_CRYPTO_SYM_XFORM_AEAD; + sa_ctx->xf[idx].a.aead.algo = sa->aead_algo; + sa_ctx->xf[idx].a.aead.key.data = sa->cipher_key; + sa_ctx->xf[idx].a.aead.key.length = + sa->cipher_key_len; + sa_ctx->xf[idx].a.aead.op = + RTE_CRYPTO_AEAD_OP_ENCRYPT; + sa_ctx->xf[idx].a.next = NULL; + sa_ctx->xf[idx].a.aead.iv.offset = IV_OFFSET; + sa_ctx->xf[idx].a.aead.iv.length = iv_length; + sa_ctx->xf[idx].a.aead.aad_length = + sa->aad_len; + sa_ctx->xf[idx].a.aead.digest_length = + sa->digest_len; + } - sa_ctx->xf[idx].a.type = RTE_CRYPTO_SYM_XFORM_AUTH; - sa_ctx->xf[idx].a.auth.algo = sa->auth_algo; - sa_ctx->xf[idx].a.auth.add_auth_data_length = - sa->aad_len; - sa_ctx->xf[idx].a.auth.key.data = sa->auth_key; - sa_ctx->xf[idx].a.auth.key.length = - sa->auth_key_len; - sa_ctx->xf[idx].a.auth.digest_length = - sa->digest_len; - sa_ctx->xf[idx].a.auth.op = - RTE_CRYPTO_AUTH_OP_VERIFY; - - } else { /* outbound */ - sa_ctx->xf[idx].a.type = RTE_CRYPTO_SYM_XFORM_CIPHER; - sa_ctx->xf[idx].a.cipher.algo = sa->cipher_algo; - sa_ctx->xf[idx].a.cipher.key.data = sa->cipher_key; - sa_ctx->xf[idx].a.cipher.key.length = - sa->cipher_key_len; - sa_ctx->xf[idx].a.cipher.op = - RTE_CRYPTO_CIPHER_OP_ENCRYPT; - sa_ctx->xf[idx].a.next = NULL; - - sa_ctx->xf[idx].b.type = RTE_CRYPTO_SYM_XFORM_AUTH; - sa_ctx->xf[idx].b.auth.algo = sa->auth_algo; - sa_ctx->xf[idx].b.auth.add_auth_data_length = - sa->aad_len; - sa_ctx->xf[idx].b.auth.key.data = sa->auth_key; - sa_ctx->xf[idx].b.auth.key.length = - sa->auth_key_len; - sa_ctx->xf[idx].b.auth.digest_length = - sa->digest_len; - sa_ctx->xf[idx].b.auth.op = - RTE_CRYPTO_AUTH_OP_GENERATE; - } + sa->xforms = &sa_ctx->xf[idx].a; - sa_ctx->xf[idx].a.next = &sa_ctx->xf[idx].b; - sa_ctx->xf[idx].b.next = NULL; - sa->xforms = &sa_ctx->xf[idx].a; + print_one_sa_rule(sa, inbound); + } else { + switch (sa->cipher_algo) { + case RTE_CRYPTO_CIPHER_NULL: + case RTE_CRYPTO_CIPHER_AES_CBC: + iv_length = sa->iv_len; + break; + case RTE_CRYPTO_CIPHER_AES_CTR: + iv_length = 16; + break; + default: + RTE_LOG(ERR, IPSEC_ESP, + "unsupported cipher algorithm %u\n", + sa->cipher_algo); + return -EINVAL; + } + + if (inbound) { + sa_ctx->xf[idx].b.type = RTE_CRYPTO_SYM_XFORM_CIPHER; + sa_ctx->xf[idx].b.cipher.algo = sa->cipher_algo; + sa_ctx->xf[idx].b.cipher.key.data = sa->cipher_key; + sa_ctx->xf[idx].b.cipher.key.length = + sa->cipher_key_len; + sa_ctx->xf[idx].b.cipher.op = + RTE_CRYPTO_CIPHER_OP_DECRYPT; + sa_ctx->xf[idx].b.next = NULL; + sa_ctx->xf[idx].b.cipher.iv.offset = IV_OFFSET; + sa_ctx->xf[idx].b.cipher.iv.length = iv_length; + + sa_ctx->xf[idx].a.type = RTE_CRYPTO_SYM_XFORM_AUTH; + sa_ctx->xf[idx].a.auth.algo = sa->auth_algo; + sa_ctx->xf[idx].a.auth.key.data = sa->auth_key; + sa_ctx->xf[idx].a.auth.key.length = + sa->auth_key_len; + sa_ctx->xf[idx].a.auth.digest_length = + sa->digest_len; + sa_ctx->xf[idx].a.auth.op = + RTE_CRYPTO_AUTH_OP_VERIFY; + } else { /* outbound */ + sa_ctx->xf[idx].a.type = RTE_CRYPTO_SYM_XFORM_CIPHER; + sa_ctx->xf[idx].a.cipher.algo = sa->cipher_algo; + sa_ctx->xf[idx].a.cipher.key.data = sa->cipher_key; + sa_ctx->xf[idx].a.cipher.key.length = + sa->cipher_key_len; + sa_ctx->xf[idx].a.cipher.op = + RTE_CRYPTO_CIPHER_OP_ENCRYPT; + sa_ctx->xf[idx].a.next = NULL; + sa_ctx->xf[idx].a.cipher.iv.offset = IV_OFFSET; + sa_ctx->xf[idx].a.cipher.iv.length = iv_length; + + sa_ctx->xf[idx].b.type = RTE_CRYPTO_SYM_XFORM_AUTH; + sa_ctx->xf[idx].b.auth.algo = sa->auth_algo; + sa_ctx->xf[idx].b.auth.key.data = sa->auth_key; + sa_ctx->xf[idx].b.auth.key.length = + sa->auth_key_len; + sa_ctx->xf[idx].b.auth.digest_length = + sa->digest_len; + sa_ctx->xf[idx].b.auth.op = + RTE_CRYPTO_AUTH_OP_GENERATE; + } - print_one_sa_rule(sa, inbound); + sa_ctx->xf[idx].a.next = &sa_ctx->xf[idx].b; + sa_ctx->xf[idx].b.next = NULL; + sa->xforms = &sa_ctx->xf[idx].a; + + print_one_sa_rule(sa, inbound); + } } return 0; diff --git a/examples/ipv4_multicast/Makefile b/examples/ipv4_multicast/Makefile index 44f0a3bb..1f7c53af 100644 --- a/examples/ipv4_multicast/Makefile +++ b/examples/ipv4_multicast/Makefile @@ -34,7 +34,7 @@ ifeq ($(RTE_SDK),) $(error "Please define RTE_SDK environment variable") endif -# Default target, can be overriden by command line or environment +# Default target, can be overridden by command line or environment RTE_TARGET ?= x86_64-native-linuxapp-gcc include $(RTE_SDK)/mk/rte.vars.mk diff --git a/examples/ipv4_multicast/main.c b/examples/ipv4_multicast/main.c index 96a4ab6e..9a13d353 100644 --- a/examples/ipv4_multicast/main.c +++ b/examples/ipv4_multicast/main.c @@ -49,7 +49,6 @@ #include <rte_memcpy.h> #include <rte_memzone.h> #include <rte_eal.h> -#include <rte_per_lcore.h> #include <rte_launch.h> #include <rte_atomic.h> #include <rte_cycles.h> @@ -757,6 +756,13 @@ main(int argc, char **argv) rte_exit(EXIT_FAILURE, "Cannot configure device: err=%d, port=%d\n", ret, portid); + ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd, + &nb_txd); + if (ret < 0) + rte_exit(EXIT_FAILURE, + "Cannot adjust number of descriptors: err=%d, port=%d\n", + ret, portid); + rte_eth_macaddr_get(portid, &ports_eth_addr[portid]); print_ethaddr(" Address:", &ports_eth_addr[portid]); printf(", "); diff --git a/examples/kni/Makefile b/examples/kni/Makefile index 6800dd5c..08a4f0c5 100644 --- a/examples/kni/Makefile +++ b/examples/kni/Makefile @@ -33,7 +33,7 @@ ifeq ($(RTE_SDK),) $(error "Please define RTE_SDK environment variable") endif -# Default target, can be overriden by command line or environment +# Default target, can be overridden by command line or environment RTE_TARGET ?= x86_64-native-linuxapp-gcc include $(RTE_SDK)/mk/rte.vars.mk diff --git a/examples/kni/main.c b/examples/kni/main.c index 0be57d83..e3bc2fb7 100644 --- a/examples/kni/main.c +++ b/examples/kni/main.c @@ -65,7 +65,6 @@ #include <rte_debug.h> #include <rte_ether.h> #include <rte_ethdev.h> -#include <rte_log.h> #include <rte_mempool.h> #include <rte_mbuf.h> #include <rte_string_fns.h> @@ -605,6 +604,8 @@ static void init_port(uint8_t port) { int ret; + uint16_t nb_rxd = NB_RXD; + uint16_t nb_txd = NB_TXD; /* Initialise device and RX/TX queues */ RTE_LOG(INFO, APP, "Initialising port %u ...\n", (unsigned)port); @@ -614,13 +615,18 @@ init_port(uint8_t port) rte_exit(EXIT_FAILURE, "Could not configure port%u (%d)\n", (unsigned)port, ret); - ret = rte_eth_rx_queue_setup(port, 0, NB_RXD, + ret = rte_eth_dev_adjust_nb_rx_tx_desc(port, &nb_rxd, &nb_txd); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Could not adjust number of descriptors " + "for port%u (%d)\n", (unsigned)port, ret); + + ret = rte_eth_rx_queue_setup(port, 0, nb_rxd, rte_eth_dev_socket_id(port), NULL, pktmbuf_pool); if (ret < 0) rte_exit(EXIT_FAILURE, "Could not setup up RX queue for " "port%u (%d)\n", (unsigned)port, ret); - ret = rte_eth_tx_queue_setup(port, 0, NB_TXD, + ret = rte_eth_tx_queue_setup(port, 0, nb_txd, rte_eth_dev_socket_id(port), NULL); if (ret < 0) rte_exit(EXIT_FAILURE, "Could not setup up TX queue for " diff --git a/examples/l2fwd-cat/l2fwd-cat.c b/examples/l2fwd-cat/l2fwd-cat.c index 8cce33b8..c293bd9c 100644 --- a/examples/l2fwd-cat/l2fwd-cat.c +++ b/examples/l2fwd-cat/l2fwd-cat.c @@ -65,6 +65,8 @@ port_init(uint8_t port, struct rte_mempool *mbuf_pool) const uint16_t rx_rings = 1, tx_rings = 1; int retval; uint16_t q; + uint16_t nb_rxd = RX_RING_SIZE; + uint16_t nb_txd = TX_RING_SIZE; if (port >= rte_eth_dev_count()) return -1; @@ -74,9 +76,13 @@ port_init(uint8_t port, struct rte_mempool *mbuf_pool) if (retval != 0) return retval; + retval = rte_eth_dev_adjust_nb_rx_tx_desc(port, &nb_rxd, &nb_txd); + if (retval != 0) + return retval; + /* Allocate and set up 1 RX queue per Ethernet port. */ for (q = 0; q < rx_rings; q++) { - retval = rte_eth_rx_queue_setup(port, q, RX_RING_SIZE, + retval = rte_eth_rx_queue_setup(port, q, nb_rxd, rte_eth_dev_socket_id(port), NULL, mbuf_pool); if (retval < 0) return retval; @@ -84,7 +90,7 @@ port_init(uint8_t port, struct rte_mempool *mbuf_pool) /* Allocate and set up 1 TX queue per Ethernet port. */ for (q = 0; q < tx_rings; q++) { - retval = rte_eth_tx_queue_setup(port, q, TX_RING_SIZE, + retval = rte_eth_tx_queue_setup(port, q, nb_txd, rte_eth_dev_socket_id(port), NULL); if (retval < 0) return retval; diff --git a/examples/l2fwd-crypto/main.c b/examples/l2fwd-crypto/main.c index 94921935..f020be32 100644 --- a/examples/l2fwd-crypto/main.c +++ b/examples/l2fwd-crypto/main.c @@ -88,6 +88,12 @@ enum cdev_type { #define MAX_KEY_SIZE 128 #define MAX_PKT_BURST 32 #define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */ +#define MAX_SESSIONS 32 +#define SESSION_POOL_CACHE_SIZE 0 + +#define MAXIMUM_IV_LENGTH 16 +#define IV_OFFSET (sizeof(struct rte_crypto_op) + \ + sizeof(struct rte_crypto_sym_op)) /* * Configurable number of RX/TX ring descriptors @@ -126,7 +132,8 @@ enum l2fwd_crypto_xform_chain { L2FWD_CRYPTO_CIPHER_HASH, L2FWD_CRYPTO_HASH_CIPHER, L2FWD_CRYPTO_CIPHER_ONLY, - L2FWD_CRYPTO_HASH_ONLY + L2FWD_CRYPTO_HASH_ONLY, + L2FWD_CRYPTO_AEAD }; struct l2fwd_key { @@ -135,6 +142,11 @@ struct l2fwd_key { phys_addr_t phys_addr; }; +struct l2fwd_iv { + uint8_t *data; + uint16_t length; +}; + /** l2fwd crypto application command line options */ struct l2fwd_crypto_options { unsigned portmask; @@ -151,14 +163,26 @@ struct l2fwd_crypto_options { unsigned ckey_param; int ckey_random_size; - struct l2fwd_key iv; - unsigned iv_param; - int iv_random_size; + struct l2fwd_iv cipher_iv; + unsigned int cipher_iv_param; + int cipher_iv_random_size; struct rte_crypto_sym_xform auth_xform; uint8_t akey_param; int akey_random_size; + struct l2fwd_iv auth_iv; + unsigned int auth_iv_param; + int auth_iv_random_size; + + struct rte_crypto_sym_xform aead_xform; + unsigned int aead_key_param; + int aead_key_random_size; + + struct l2fwd_iv aead_iv; + unsigned int aead_iv_param; + int aead_iv_random_size; + struct l2fwd_key aad; unsigned aad_param; int aad_random_size; @@ -169,6 +193,8 @@ struct l2fwd_crypto_options { char string_type[MAX_STR_LEN]; uint64_t cryptodev_mask; + + unsigned int mac_updating; }; /** l2fwd crypto lcore params */ @@ -179,16 +205,20 @@ struct l2fwd_crypto_params { unsigned digest_length; unsigned block_size; - struct l2fwd_key iv; + struct l2fwd_iv cipher_iv; + struct l2fwd_iv auth_iv; + struct l2fwd_iv aead_iv; struct l2fwd_key aad; struct rte_cryptodev_sym_session *session; uint8_t do_cipher; uint8_t do_hash; + uint8_t do_aead; uint8_t hash_verify; enum rte_crypto_cipher_algorithm cipher_algo; enum rte_crypto_auth_algorithm auth_algo; + enum rte_crypto_aead_algorithm aead_algo; }; /** lcore configuration */ @@ -223,6 +253,7 @@ static const struct rte_eth_conf port_conf = { struct rte_mempool *l2fwd_pktmbuf_pool; struct rte_mempool *l2fwd_crypto_op_pool; +struct rte_mempool *session_pool_socket[RTE_MAX_NUMA_NODES] = { 0 }; /* Per-port statistics struct */ struct l2fwd_port_statistics { @@ -444,6 +475,18 @@ l2fwd_simple_crypto_enqueue(struct rte_mbuf *m, rte_crypto_op_attach_sym_session(op, cparams->session); if (cparams->do_hash) { + if (cparams->auth_iv.length) { + uint8_t *iv_ptr = rte_crypto_op_ctod_offset(op, + uint8_t *, + IV_OFFSET + + cparams->cipher_iv.length); + /* + * Copy IV at the end of the crypto operation, + * after the cipher IV, if added + */ + rte_memcpy(iv_ptr, cparams->auth_iv.data, + cparams->auth_iv.length); + } if (!cparams->hash_verify) { /* Append space for digest to end of packet */ op->sym->auth.digest.data = (uint8_t *)rte_pktmbuf_append(m, @@ -455,7 +498,6 @@ l2fwd_simple_crypto_enqueue(struct rte_mbuf *m, op->sym->auth.digest.phys_addr = rte_pktmbuf_mtophys_offset(m, rte_pktmbuf_pkt_len(m) - cparams->digest_length); - op->sym->auth.digest.length = cparams->digest_length; /* For wireless algorithms, offset/length must be in bits */ if (cparams->auth_algo == RTE_CRYPTO_AUTH_SNOW3G_UIA2 || @@ -467,22 +509,14 @@ l2fwd_simple_crypto_enqueue(struct rte_mbuf *m, op->sym->auth.data.offset = ipdata_offset; op->sym->auth.data.length = data_len; } - - if (cparams->aad.length) { - op->sym->auth.aad.data = cparams->aad.data; - op->sym->auth.aad.phys_addr = cparams->aad.phys_addr; - op->sym->auth.aad.length = cparams->aad.length; - } else { - op->sym->auth.aad.data = NULL; - op->sym->auth.aad.phys_addr = 0; - op->sym->auth.aad.length = 0; - } } if (cparams->do_cipher) { - op->sym->cipher.iv.data = cparams->iv.data; - op->sym->cipher.iv.phys_addr = cparams->iv.phys_addr; - op->sym->cipher.iv.length = cparams->iv.length; + uint8_t *iv_ptr = rte_crypto_op_ctod_offset(op, uint8_t *, + IV_OFFSET); + /* Copy IV at the end of the crypto operation */ + rte_memcpy(iv_ptr, cparams->cipher_iv.data, + cparams->cipher_iv.length); /* For wireless algorithms, offset/length must be in bits */ if (cparams->cipher_algo == RTE_CRYPTO_CIPHER_SNOW3G_UEA2 || @@ -496,6 +530,33 @@ l2fwd_simple_crypto_enqueue(struct rte_mbuf *m, } } + if (cparams->do_aead) { + uint8_t *iv_ptr = rte_crypto_op_ctod_offset(op, uint8_t *, + IV_OFFSET); + /* Copy IV at the end of the crypto operation */ + rte_memcpy(iv_ptr, cparams->aead_iv.data, cparams->aead_iv.length); + + op->sym->aead.data.offset = ipdata_offset; + op->sym->aead.data.length = data_len; + + if (!cparams->hash_verify) { + /* Append space for digest to end of packet */ + op->sym->aead.digest.data = (uint8_t *)rte_pktmbuf_append(m, + cparams->digest_length); + } else { + op->sym->aead.digest.data = rte_pktmbuf_mtod(m, + uint8_t *) + ipdata_offset + data_len; + } + + op->sym->auth.digest.phys_addr = rte_pktmbuf_mtophys_offset(m, + rte_pktmbuf_pkt_len(m) - cparams->digest_length); + + if (cparams->aad.length) { + op->sym->aead.aad.data = cparams->aad.data; + op->sym->aead.aad.phys_addr = cparams->aad.phys_addr; + } + } + op->sym->m_src = m; return l2fwd_crypto_enqueue(op, cparams); @@ -549,21 +610,31 @@ l2fwd_send_packet(struct rte_mbuf *m, uint8_t port) } static void -l2fwd_simple_forward(struct rte_mbuf *m, unsigned portid) +l2fwd_mac_updating(struct rte_mbuf *m, unsigned int dest_portid) { struct ether_hdr *eth; void *tmp; - unsigned dst_port; - dst_port = l2fwd_dst_ports[portid]; eth = rte_pktmbuf_mtod(m, struct ether_hdr *); /* 02:00:00:00:00:xx */ tmp = ð->d_addr.addr_bytes[0]; - *((uint64_t *)tmp) = 0x000000000002 + ((uint64_t)dst_port << 40); + *((uint64_t *)tmp) = 0x000000000002 + ((uint64_t)dest_portid << 40); /* src addr */ - ether_addr_copy(&l2fwd_ports_eth_addr[dst_port], ð->s_addr); + ether_addr_copy(&l2fwd_ports_eth_addr[dest_portid], ð->s_addr); +} + +static void +l2fwd_simple_forward(struct rte_mbuf *m, unsigned int portid, + struct l2fwd_crypto_options *options) +{ + unsigned int dst_port; + + dst_port = l2fwd_dst_ports[portid]; + + if (options->mac_updating) + l2fwd_mac_updating(m, dst_port); l2fwd_send_packet(m, (uint8_t) dst_port); } @@ -587,12 +658,21 @@ generate_random_key(uint8_t *key, unsigned length) } static struct rte_cryptodev_sym_session * -initialize_crypto_session(struct l2fwd_crypto_options *options, - uint8_t cdev_id) +initialize_crypto_session(struct l2fwd_crypto_options *options, uint8_t cdev_id) { struct rte_crypto_sym_xform *first_xform; + struct rte_cryptodev_sym_session *session; + int retval = rte_cryptodev_socket_id(cdev_id); + + if (retval < 0) + return NULL; - if (options->xform_chain == L2FWD_CRYPTO_CIPHER_HASH) { + uint8_t socket_id = (uint8_t) retval; + struct rte_mempool *sess_mp = session_pool_socket[socket_id]; + + if (options->xform_chain == L2FWD_CRYPTO_AEAD) { + first_xform = &options->aead_xform; + } else if (options->xform_chain == L2FWD_CRYPTO_CIPHER_HASH) { first_xform = &options->cipher_xform; first_xform->next = &options->auth_xform; } else if (options->xform_chain == L2FWD_CRYPTO_HASH_CIPHER) { @@ -604,8 +684,16 @@ initialize_crypto_session(struct l2fwd_crypto_options *options, first_xform = &options->auth_xform; } - /* Setup Cipher Parameters */ - return rte_cryptodev_sym_session_create(cdev_id, first_xform); + session = rte_cryptodev_sym_session_create(sess_mp); + + if (session == NULL) + return NULL; + + if (rte_cryptodev_sym_session_init(cdev_id, session, + first_xform, sess_mp) < 0) + return NULL; + + return session; } static void @@ -626,6 +714,7 @@ l2fwd_main_loop(struct l2fwd_crypto_options *options) US_PER_S * BURST_TX_DRAIN_US; struct l2fwd_crypto_params *cparams; struct l2fwd_crypto_params port_cparams[qconf->nb_crypto_devs]; + struct rte_cryptodev_sym_session *session; if (qconf->nb_rx_ports == 0) { RTE_LOG(INFO, L2FWD, "lcore %u has nothing to do\n", lcore_id); @@ -644,8 +733,12 @@ l2fwd_main_loop(struct l2fwd_crypto_options *options) for (i = 0; i < qconf->nb_crypto_devs; i++) { port_cparams[i].do_cipher = 0; port_cparams[i].do_hash = 0; + port_cparams[i].do_aead = 0; switch (options->xform_chain) { + case L2FWD_CRYPTO_AEAD: + port_cparams[i].do_aead = 1; + break; case L2FWD_CRYPTO_CIPHER_HASH: case L2FWD_CRYPTO_HASH_CIPHER: port_cparams[i].do_cipher = 1; @@ -665,13 +758,41 @@ l2fwd_main_loop(struct l2fwd_crypto_options *options) port_cparams[i].block_size = options->block_size; if (port_cparams[i].do_hash) { + port_cparams[i].auth_iv.data = options->auth_iv.data; + port_cparams[i].auth_iv.length = options->auth_iv.length; + if (!options->auth_iv_param) + generate_random_key(port_cparams[i].auth_iv.data, + port_cparams[i].auth_iv.length); + if (options->auth_xform.auth.op == RTE_CRYPTO_AUTH_OP_VERIFY) + port_cparams[i].hash_verify = 1; + else + port_cparams[i].hash_verify = 0; + + port_cparams[i].auth_algo = options->auth_xform.auth.algo; port_cparams[i].digest_length = options->auth_xform.auth.digest_length; - if (options->auth_xform.auth.add_auth_data_length) { + /* Set IV parameters */ + if (options->auth_iv.length) { + options->auth_xform.auth.iv.offset = + IV_OFFSET + options->cipher_iv.length; + options->auth_xform.auth.iv.length = + options->auth_iv.length; + } + } + + if (port_cparams[i].do_aead) { + port_cparams[i].aead_iv.data = options->aead_iv.data; + port_cparams[i].aead_iv.length = options->aead_iv.length; + if (!options->aead_iv_param) + generate_random_key(port_cparams[i].aead_iv.data, + port_cparams[i].aead_iv.length); + port_cparams[i].aead_algo = options->aead_xform.aead.algo; + port_cparams[i].digest_length = + options->aead_xform.aead.digest_length; + if (options->aead_xform.aead.aad_length) { port_cparams[i].aad.data = options->aad.data; - port_cparams[i].aad.length = - options->auth_xform.auth.add_auth_data_length; port_cparams[i].aad.phys_addr = options->aad.phys_addr; + port_cparams[i].aad.length = options->aad.length; if (!options->aad_param) generate_random_key(port_cparams[i].aad.data, port_cparams[i].aad.length); @@ -679,30 +800,37 @@ l2fwd_main_loop(struct l2fwd_crypto_options *options) } else port_cparams[i].aad.length = 0; - if (options->auth_xform.auth.op == RTE_CRYPTO_AUTH_OP_VERIFY) + if (options->aead_xform.aead.op == RTE_CRYPTO_AEAD_OP_DECRYPT) port_cparams[i].hash_verify = 1; else port_cparams[i].hash_verify = 0; - port_cparams[i].auth_algo = options->auth_xform.auth.algo; + /* Set IV parameters */ + options->aead_xform.aead.iv.offset = IV_OFFSET; + options->aead_xform.aead.iv.length = options->aead_iv.length; } if (port_cparams[i].do_cipher) { - port_cparams[i].iv.data = options->iv.data; - port_cparams[i].iv.length = options->iv.length; - port_cparams[i].iv.phys_addr = options->iv.phys_addr; - if (!options->iv_param) - generate_random_key(port_cparams[i].iv.data, - port_cparams[i].iv.length); + port_cparams[i].cipher_iv.data = options->cipher_iv.data; + port_cparams[i].cipher_iv.length = options->cipher_iv.length; + if (!options->cipher_iv_param) + generate_random_key(port_cparams[i].cipher_iv.data, + port_cparams[i].cipher_iv.length); port_cparams[i].cipher_algo = options->cipher_xform.cipher.algo; + /* Set IV parameters */ + options->cipher_xform.cipher.iv.offset = IV_OFFSET; + options->cipher_xform.cipher.iv.length = + options->cipher_iv.length; } - port_cparams[i].session = initialize_crypto_session(options, + session = initialize_crypto_session(options, port_cparams[i].dev_id); + if (session == NULL) + rte_exit(EXIT_FAILURE, "Failed to initialize crypto session\n"); + + port_cparams[i].session = session; - if (port_cparams[i].session == NULL) - return; RTE_LOG(INFO, L2FWD, " -- lcoreid=%u cryptoid=%u\n", lcore_id, port_cparams[i].dev_id); } @@ -816,7 +944,8 @@ l2fwd_main_loop(struct l2fwd_crypto_options *options) m = ops_burst[j]->sym->m_src; rte_crypto_op_free(ops_burst[j]); - l2fwd_simple_forward(m, portid); + l2fwd_simple_forward(m, portid, + options); } } while (nb_rx == MAX_PKT_BURST); } @@ -842,25 +971,41 @@ l2fwd_crypto_usage(const char *prgname) " (0 to disable, 10 default, 86400 maximum)\n" " --cdev_type HW / SW / ANY\n" - " --chain HASH_CIPHER / CIPHER_HASH\n" + " --chain HASH_CIPHER / CIPHER_HASH / CIPHER_ONLY /" + " HASH_ONLY / AEAD\n" " --cipher_algo ALGO\n" " --cipher_op ENCRYPT / DECRYPT\n" " --cipher_key KEY (bytes separated with \":\")\n" " --cipher_key_random_size SIZE: size of cipher key when generated randomly\n" - " --iv IV (bytes separated with \":\")\n" - " --iv_random_size SIZE: size of IV when generated randomly\n" + " --cipher_iv IV (bytes separated with \":\")\n" + " --cipher_iv_random_size SIZE: size of cipher IV when generated randomly\n" " --auth_algo ALGO\n" " --auth_op GENERATE / VERIFY\n" " --auth_key KEY (bytes separated with \":\")\n" " --auth_key_random_size SIZE: size of auth key when generated randomly\n" + " --auth_iv IV (bytes separated with \":\")\n" + " --auth_iv_random_size SIZE: size of auth IV when generated randomly\n" + + " --aead_algo ALGO\n" + " --aead_op ENCRYPT / DECRYPT\n" + " --aead_key KEY (bytes separated with \":\")\n" + " --aead_key_random_size SIZE: size of AEAD key when generated randomly\n" + " --aead_iv IV (bytes separated with \":\")\n" + " --aead_iv_random_size SIZE: size of AEAD IV when generated randomly\n" " --aad AAD (bytes separated with \":\")\n" " --aad_random_size SIZE: size of AAD when generated randomly\n" + " --digest_size SIZE: size of digest to be generated/verified\n" " --sessionless\n" - " --cryptodev_mask MASK: hexadecimal bitmask of crypto devices to configure\n", + " --cryptodev_mask MASK: hexadecimal bitmask of crypto devices to configure\n" + + " --[no-]mac-updating: Enable or disable MAC addresses updating (enabled by default)\n" + " When enabled:\n" + " - The source MAC address is replaced by the TX port MAC address\n" + " - The destination MAC address is replaced by 02:00:00:00:00:TX_PORT_ID\n", prgname); } @@ -898,6 +1043,9 @@ parse_crypto_opt_chain(struct l2fwd_crypto_options *options, char *optarg) } else if (strcmp("HASH_ONLY", optarg) == 0) { options->xform_chain = L2FWD_CRYPTO_HASH_ONLY; return 0; + } else if (strcmp("AEAD", optarg) == 0) { + options->xform_chain = L2FWD_CRYPTO_AEAD; + return 0; } return -1; @@ -1005,6 +1153,32 @@ parse_auth_op(enum rte_crypto_auth_operation *op, char *optarg) } static int +parse_aead_algo(enum rte_crypto_aead_algorithm *algo, char *optarg) +{ + if (rte_cryptodev_get_aead_algo_enum(algo, optarg) < 0) { + RTE_LOG(ERR, USER1, "AEAD algorithm specified " + "not supported!\n"); + return -1; + } + + return 0; +} + +static int +parse_aead_op(enum rte_crypto_aead_operation *op, char *optarg) +{ + if (strcmp("ENCRYPT", optarg) == 0) { + *op = RTE_CRYPTO_AEAD_OP_ENCRYPT; + return 0; + } else if (strcmp("DECRYPT", optarg) == 0) { + *op = RTE_CRYPTO_AEAD_OP_DECRYPT; + return 0; + } + + printf("AEAD operation specified not supported!\n"); + return -1; +} +static int parse_cryptodev_mask(struct l2fwd_crypto_options *options, const char *q_arg) { @@ -1065,18 +1239,18 @@ l2fwd_crypto_parse_args_long_options(struct l2fwd_crypto_options *options, else if (strcmp(lgopts[option_index].name, "cipher_key_random_size") == 0) return parse_size(&options->ckey_random_size, optarg); - else if (strcmp(lgopts[option_index].name, "iv") == 0) { - options->iv_param = 1; - options->iv.length = - parse_key(options->iv.data, optarg); - if (options->iv.length > 0) + else if (strcmp(lgopts[option_index].name, "cipher_iv") == 0) { + options->cipher_iv_param = 1; + options->cipher_iv.length = + parse_key(options->cipher_iv.data, optarg); + if (options->cipher_iv.length > 0) return 0; else return -1; } - else if (strcmp(lgopts[option_index].name, "iv_random_size") == 0) - return parse_size(&options->iv_random_size, optarg); + else if (strcmp(lgopts[option_index].name, "cipher_iv_random_size") == 0) + return parse_size(&options->cipher_iv_random_size, optarg); /* Authentication options */ else if (strcmp(lgopts[option_index].name, "auth_algo") == 0) { @@ -1102,6 +1276,56 @@ l2fwd_crypto_parse_args_long_options(struct l2fwd_crypto_options *options, return parse_size(&options->akey_random_size, optarg); } + else if (strcmp(lgopts[option_index].name, "auth_iv") == 0) { + options->auth_iv_param = 1; + options->auth_iv.length = + parse_key(options->auth_iv.data, optarg); + if (options->auth_iv.length > 0) + return 0; + else + return -1; + } + + else if (strcmp(lgopts[option_index].name, "auth_iv_random_size") == 0) + return parse_size(&options->auth_iv_random_size, optarg); + + /* AEAD options */ + else if (strcmp(lgopts[option_index].name, "aead_algo") == 0) { + return parse_aead_algo(&options->aead_xform.aead.algo, + optarg); + } + + else if (strcmp(lgopts[option_index].name, "aead_op") == 0) + return parse_aead_op(&options->aead_xform.aead.op, + optarg); + + else if (strcmp(lgopts[option_index].name, "aead_key") == 0) { + options->aead_key_param = 1; + options->aead_xform.aead.key.length = + parse_key(options->aead_xform.aead.key.data, optarg); + if (options->aead_xform.aead.key.length > 0) + return 0; + else + return -1; + } + + else if (strcmp(lgopts[option_index].name, "aead_key_random_size") == 0) + return parse_size(&options->aead_key_random_size, optarg); + + + else if (strcmp(lgopts[option_index].name, "aead_iv") == 0) { + options->aead_iv_param = 1; + options->aead_iv.length = + parse_key(options->aead_iv.data, optarg); + if (options->aead_iv.length > 0) + return 0; + else + return -1; + } + + else if (strcmp(lgopts[option_index].name, "aead_iv_random_size") == 0) + return parse_size(&options->aead_iv_random_size, optarg); + else if (strcmp(lgopts[option_index].name, "aad") == 0) { options->aad_param = 1; options->aad.length = @@ -1128,6 +1352,16 @@ l2fwd_crypto_parse_args_long_options(struct l2fwd_crypto_options *options, else if (strcmp(lgopts[option_index].name, "cryptodev_mask") == 0) return parse_cryptodev_mask(options, optarg); + else if (strcmp(lgopts[option_index].name, "mac-updating") == 0) { + options->mac_updating = 1; + return 0; + } + + else if (strcmp(lgopts[option_index].name, "no-mac-updating") == 0) { + options->mac_updating = 0; + return 0; + } + return -1; } @@ -1220,9 +1454,9 @@ l2fwd_crypto_default_options(struct l2fwd_crypto_options *options) options->ckey_param = 0; options->ckey_random_size = -1; options->cipher_xform.cipher.key.length = 0; - options->iv_param = 0; - options->iv_random_size = -1; - options->iv.length = 0; + options->cipher_iv_param = 0; + options->cipher_iv_random_size = -1; + options->cipher_iv.length = 0; options->cipher_xform.cipher.algo = RTE_CRYPTO_CIPHER_AES_CBC; options->cipher_xform.cipher.op = RTE_CRYPTO_CIPHER_OP_ENCRYPT; @@ -1233,16 +1467,36 @@ l2fwd_crypto_default_options(struct l2fwd_crypto_options *options) options->akey_param = 0; options->akey_random_size = -1; options->auth_xform.auth.key.length = 0; + options->auth_iv_param = 0; + options->auth_iv_random_size = -1; + options->auth_iv.length = 0; + + options->auth_xform.auth.algo = RTE_CRYPTO_AUTH_SHA1_HMAC; + options->auth_xform.auth.op = RTE_CRYPTO_AUTH_OP_GENERATE; + + /* AEAD Data */ + options->aead_xform.type = RTE_CRYPTO_SYM_XFORM_AEAD; + options->aead_xform.next = NULL; + options->aead_key_param = 0; + options->aead_key_random_size = -1; + options->aead_xform.aead.key.length = 0; + options->aead_iv_param = 0; + options->aead_iv_random_size = -1; + options->aead_iv.length = 0; + + options->auth_xform.aead.algo = RTE_CRYPTO_AEAD_AES_GCM; + options->auth_xform.aead.op = RTE_CRYPTO_AEAD_OP_ENCRYPT; + options->aad_param = 0; options->aad_random_size = -1; options->aad.length = 0; - options->digest_size = -1; - options->auth_xform.auth.algo = RTE_CRYPTO_AUTH_SHA1_HMAC; - options->auth_xform.auth.op = RTE_CRYPTO_AUTH_OP_GENERATE; + options->digest_size = -1; options->type = CDEV_TYPE_ANY; options->cryptodev_mask = UINT64_MAX; + + options->mac_updating = 1; } static void @@ -1254,7 +1508,7 @@ display_cipher_info(struct l2fwd_crypto_options *options) rte_hexdump(stdout, "Cipher key:", options->cipher_xform.cipher.key.data, options->cipher_xform.cipher.key.length); - rte_hexdump(stdout, "IV:", options->iv.data, options->iv.length); + rte_hexdump(stdout, "IV:", options->cipher_iv.data, options->cipher_iv.length); } static void @@ -1262,10 +1516,23 @@ display_auth_info(struct l2fwd_crypto_options *options) { printf("\n---- Authentication information ---\n"); printf("Algorithm: %s\n", - rte_crypto_auth_algorithm_strings[options->auth_xform.cipher.algo]); + rte_crypto_auth_algorithm_strings[options->auth_xform.auth.algo]); rte_hexdump(stdout, "Auth key:", options->auth_xform.auth.key.data, options->auth_xform.auth.key.length); + rte_hexdump(stdout, "IV:", options->auth_iv.data, options->auth_iv.length); +} + +static void +display_aead_info(struct l2fwd_crypto_options *options) +{ + printf("\n---- AEAD information ---\n"); + printf("Algorithm: %s\n", + rte_crypto_aead_algorithm_strings[options->aead_xform.aead.algo]); + rte_hexdump(stdout, "AEAD key:", + options->aead_xform.aead.key.data, + options->aead_xform.aead.key.length); + rte_hexdump(stdout, "IV:", options->aead_iv.data, options->aead_iv.length); rte_hexdump(stdout, "AAD:", options->aad.data, options->aad.length); } @@ -1274,6 +1541,7 @@ l2fwd_crypto_options_print(struct l2fwd_crypto_options *options) { char string_cipher_op[MAX_STR_LEN]; char string_auth_op[MAX_STR_LEN]; + char string_aead_op[MAX_STR_LEN]; if (options->cipher_xform.cipher.op == RTE_CRYPTO_CIPHER_OP_ENCRYPT) strcpy(string_cipher_op, "Encrypt"); @@ -1285,6 +1553,12 @@ l2fwd_crypto_options_print(struct l2fwd_crypto_options *options) else strcpy(string_auth_op, "Auth verify"); + if (options->aead_xform.aead.op == RTE_CRYPTO_AEAD_OP_ENCRYPT) + strcpy(string_aead_op, "Authenticated encryption"); + else + strcpy(string_aead_op, "Authenticated decryption"); + + printf("Options:-\nn"); printf("portmask: %x\n", options->portmask); printf("ports per lcore: %u\n", options->nb_ports_per_lcore); @@ -1303,14 +1577,21 @@ l2fwd_crypto_options_print(struct l2fwd_crypto_options *options) if (options->akey_param && (options->akey_random_size != -1)) printf("Auth key already parsed, ignoring size of random key\n"); - if (options->iv_param && (options->iv_random_size != -1)) - printf("IV already parsed, ignoring size of random IV\n"); + if (options->cipher_iv_param && (options->cipher_iv_random_size != -1)) + printf("Cipher IV already parsed, ignoring size of random IV\n"); + + if (options->auth_iv_param && (options->auth_iv_random_size != -1)) + printf("Auth IV already parsed, ignoring size of random IV\n"); if (options->aad_param && (options->aad_random_size != -1)) printf("AAD already parsed, ignoring size of random AAD\n"); printf("\nCrypto chain: "); switch (options->xform_chain) { + case L2FWD_CRYPTO_AEAD: + printf("Input --> %s --> Output\n", string_aead_op); + display_aead_info(options); + break; case L2FWD_CRYPTO_CIPHER_HASH: printf("Input --> %s --> %s --> Output\n", string_cipher_op, string_auth_op); @@ -1352,27 +1633,40 @@ l2fwd_crypto_parse_args(struct l2fwd_crypto_options *options, { "cipher_op", required_argument, 0, 0 }, { "cipher_key", required_argument, 0, 0 }, { "cipher_key_random_size", required_argument, 0, 0 }, + { "cipher_iv", required_argument, 0, 0 }, + { "cipher_iv_random_size", required_argument, 0, 0 }, { "auth_algo", required_argument, 0, 0 }, { "auth_op", required_argument, 0, 0 }, { "auth_key", required_argument, 0, 0 }, { "auth_key_random_size", required_argument, 0, 0 }, + { "auth_iv", required_argument, 0, 0 }, + { "auth_iv_random_size", required_argument, 0, 0 }, + + { "aead_algo", required_argument, 0, 0 }, + { "aead_op", required_argument, 0, 0 }, + { "aead_key", required_argument, 0, 0 }, + { "aead_key_random_size", required_argument, 0, 0 }, + { "aead_iv", required_argument, 0, 0 }, + { "aead_iv_random_size", required_argument, 0, 0 }, - { "iv", required_argument, 0, 0 }, - { "iv_random_size", required_argument, 0, 0 }, { "aad", required_argument, 0, 0 }, { "aad_random_size", required_argument, 0, 0 }, + { "digest_size", required_argument, 0, 0 }, { "sessionless", no_argument, 0, 0 }, { "cryptodev_mask", required_argument, 0, 0}, + { "mac-updating", no_argument, 0, 0}, + { "no-mac-updating", no_argument, 0, 0}, + { NULL, 0, 0, 0 } }; l2fwd_crypto_default_options(options); - while ((opt = getopt_long(argc, argvopt, "p:q:st:", lgopts, + while ((opt = getopt_long(argc, argvopt, "p:q:sT:", lgopts, &option_index)) != EOF) { switch (opt) { /* long options */ @@ -1492,7 +1786,8 @@ check_all_ports_link_status(uint8_t port_num, uint32_t port_mask) /* Check if device has to be HW/SW or any */ static int -check_type(struct l2fwd_crypto_options *options, struct rte_cryptodev_info *dev_info) +check_type(const struct l2fwd_crypto_options *options, + const struct rte_cryptodev_info *dev_info) { if (options->type == CDEV_TYPE_HW && (dev_info->feature_flags & RTE_CRYPTODEV_FF_HW_ACCELERATED)) @@ -1506,6 +1801,108 @@ check_type(struct l2fwd_crypto_options *options, struct rte_cryptodev_info *dev_ return -1; } +static const struct rte_cryptodev_capabilities * +check_device_support_cipher_algo(const struct l2fwd_crypto_options *options, + const struct rte_cryptodev_info *dev_info, + uint8_t cdev_id) +{ + unsigned int i = 0; + const struct rte_cryptodev_capabilities *cap = &dev_info->capabilities[0]; + enum rte_crypto_cipher_algorithm cap_cipher_algo; + enum rte_crypto_cipher_algorithm opt_cipher_algo = + options->cipher_xform.cipher.algo; + + while (cap->op != RTE_CRYPTO_OP_TYPE_UNDEFINED) { + cap_cipher_algo = cap->sym.cipher.algo; + if (cap->sym.xform_type == RTE_CRYPTO_SYM_XFORM_CIPHER) { + if (cap_cipher_algo == opt_cipher_algo) { + if (check_type(options, dev_info) == 0) + break; + } + } + cap = &dev_info->capabilities[++i]; + } + + if (cap->op == RTE_CRYPTO_OP_TYPE_UNDEFINED) { + printf("Algorithm %s not supported by cryptodev %u" + " or device not of preferred type (%s)\n", + rte_crypto_cipher_algorithm_strings[opt_cipher_algo], + cdev_id, + options->string_type); + return NULL; + } + + return cap; +} + +static const struct rte_cryptodev_capabilities * +check_device_support_auth_algo(const struct l2fwd_crypto_options *options, + const struct rte_cryptodev_info *dev_info, + uint8_t cdev_id) +{ + unsigned int i = 0; + const struct rte_cryptodev_capabilities *cap = &dev_info->capabilities[0]; + enum rte_crypto_auth_algorithm cap_auth_algo; + enum rte_crypto_auth_algorithm opt_auth_algo = + options->auth_xform.auth.algo; + + while (cap->op != RTE_CRYPTO_OP_TYPE_UNDEFINED) { + cap_auth_algo = cap->sym.auth.algo; + if (cap->sym.xform_type == RTE_CRYPTO_SYM_XFORM_AUTH) { + if (cap_auth_algo == opt_auth_algo) { + if (check_type(options, dev_info) == 0) + break; + } + } + cap = &dev_info->capabilities[++i]; + } + + if (cap->op == RTE_CRYPTO_OP_TYPE_UNDEFINED) { + printf("Algorithm %s not supported by cryptodev %u" + " or device not of preferred type (%s)\n", + rte_crypto_auth_algorithm_strings[opt_auth_algo], + cdev_id, + options->string_type); + return NULL; + } + + return cap; +} + +static const struct rte_cryptodev_capabilities * +check_device_support_aead_algo(const struct l2fwd_crypto_options *options, + const struct rte_cryptodev_info *dev_info, + uint8_t cdev_id) +{ + unsigned int i = 0; + const struct rte_cryptodev_capabilities *cap = &dev_info->capabilities[0]; + enum rte_crypto_aead_algorithm cap_aead_algo; + enum rte_crypto_aead_algorithm opt_aead_algo = + options->aead_xform.aead.algo; + + while (cap->op != RTE_CRYPTO_OP_TYPE_UNDEFINED) { + cap_aead_algo = cap->sym.aead.algo; + if (cap->sym.xform_type == RTE_CRYPTO_SYM_XFORM_AEAD) { + if (cap_aead_algo == opt_aead_algo) { + if (check_type(options, dev_info) == 0) + break; + } + } + cap = &dev_info->capabilities[++i]; + } + + if (cap->op == RTE_CRYPTO_OP_TYPE_UNDEFINED) { + printf("Algorithm %s not supported by cryptodev %u" + " or device not of preferred type (%s)\n", + rte_crypto_aead_algorithm_strings[opt_aead_algo], + cdev_id, + options->string_type); + return NULL; + } + + return cap; +} + /* Check if the device is enabled by cryptodev_mask */ static int check_cryptodev_mask(struct l2fwd_crypto_options *options, @@ -1539,16 +1936,53 @@ check_supported_size(uint16_t length, uint16_t min, uint16_t max, return -1; } + +static int +check_iv_param(const struct rte_crypto_param_range *iv_range_size, + unsigned int iv_param, int iv_random_size, + uint16_t *iv_length) +{ + /* + * Check if length of provided IV is supported + * by the algorithm chosen. + */ + if (iv_param) { + if (check_supported_size(*iv_length, + iv_range_size->min, + iv_range_size->max, + iv_range_size->increment) + != 0) { + printf("Unsupported IV length\n"); + return -1; + } + /* + * Check if length of IV to be randomly generated + * is supported by the algorithm chosen. + */ + } else if (iv_random_size != -1) { + if (check_supported_size(iv_random_size, + iv_range_size->min, + iv_range_size->max, + iv_range_size->increment) + != 0) { + printf("Unsupported IV length\n"); + return -1; + } + *iv_length = iv_random_size; + /* No size provided, use minimum size. */ + } else + *iv_length = iv_range_size->min; + + return 0; +} + static int initialize_cryptodevs(struct l2fwd_crypto_options *options, unsigned nb_ports, uint8_t *enabled_cdevs) { - unsigned i, cdev_id, cdev_count, enabled_cdev_count = 0; + unsigned int cdev_id, cdev_count, enabled_cdev_count = 0; const struct rte_cryptodev_capabilities *cap; - enum rte_crypto_auth_algorithm cap_auth_algo; - enum rte_crypto_auth_algorithm opt_auth_algo; - enum rte_crypto_cipher_algorithm cap_cipher_algo; - enum rte_crypto_cipher_algorithm opt_cipher_algo; + unsigned int sess_sz, max_sess_sz = 0; int retval; cdev_count = rte_cryptodev_count(); @@ -1557,18 +1991,28 @@ initialize_cryptodevs(struct l2fwd_crypto_options *options, unsigned nb_ports, return -1; } + for (cdev_id = 0; cdev_id < cdev_count; cdev_id++) { + sess_sz = rte_cryptodev_get_private_session_size(cdev_id); + if (sess_sz > max_sess_sz) + max_sess_sz = sess_sz; + } + for (cdev_id = 0; cdev_id < cdev_count && enabled_cdev_count < nb_ports; cdev_id++) { struct rte_cryptodev_qp_conf qp_conf; struct rte_cryptodev_info dev_info; + retval = rte_cryptodev_socket_id(cdev_id); + + if (retval < 0) { + printf("Invalid crypto device id used\n"); + return -1; + } + + uint8_t socket_id = (uint8_t) retval; struct rte_cryptodev_config conf = { .nb_queue_pairs = 1, - .socket_id = SOCKET_ID_ANY, - .session_mp = { - .nb_objs = 2048, - .cache_size = 64 - } + .socket_id = socket_id, }; if (check_cryptodev_mask(options, (uint8_t)cdev_id)) @@ -1576,66 +2020,157 @@ initialize_cryptodevs(struct l2fwd_crypto_options *options, unsigned nb_ports, rte_cryptodev_info_get(cdev_id, &dev_info); - /* Set cipher parameters */ - if (options->xform_chain == L2FWD_CRYPTO_CIPHER_HASH || - options->xform_chain == L2FWD_CRYPTO_HASH_CIPHER || - options->xform_chain == L2FWD_CRYPTO_CIPHER_ONLY) { - /* Check if device supports cipher algo */ - i = 0; - opt_cipher_algo = options->cipher_xform.cipher.algo; - cap = &dev_info.capabilities[i]; - while (cap->op != RTE_CRYPTO_OP_TYPE_UNDEFINED) { - cap_cipher_algo = cap->sym.cipher.algo; - if (cap->sym.xform_type == - RTE_CRYPTO_SYM_XFORM_CIPHER) { - if (cap_cipher_algo == opt_cipher_algo) { - if (check_type(options, &dev_info) == 0) - break; - } - } - cap = &dev_info.capabilities[++i]; + if (session_pool_socket[socket_id] == NULL) { + char mp_name[RTE_MEMPOOL_NAMESIZE]; + struct rte_mempool *sess_mp; + + snprintf(mp_name, RTE_MEMPOOL_NAMESIZE, + "sess_mp_%u", socket_id); + + /* + * Create enough objects for session headers and + * device private data + */ + sess_mp = rte_mempool_create(mp_name, + MAX_SESSIONS * 2, + max_sess_sz, + SESSION_POOL_CACHE_SIZE, + 0, NULL, NULL, NULL, + NULL, socket_id, + 0); + + if (sess_mp == NULL) { + printf("Cannot create session pool on socket %d\n", + socket_id); + return -ENOMEM; } - if (cap->op == RTE_CRYPTO_OP_TYPE_UNDEFINED) { - printf("Algorithm %s not supported by cryptodev %u" - " or device not of preferred type (%s)\n", - rte_crypto_cipher_algorithm_strings[opt_cipher_algo], - cdev_id, - options->string_type); + printf("Allocated session pool on socket %d\n", socket_id); + session_pool_socket[socket_id] = sess_mp; + } + + /* Set AEAD parameters */ + if (options->xform_chain == L2FWD_CRYPTO_AEAD) { + /* Check if device supports AEAD algo */ + cap = check_device_support_aead_algo(options, &dev_info, + cdev_id); + if (cap == NULL) continue; - } - options->block_size = cap->sym.cipher.block_size; + options->block_size = cap->sym.aead.block_size; + + check_iv_param(&cap->sym.aead.iv_size, + options->aead_iv_param, + options->aead_iv_random_size, + &options->aead_iv.length); + + /* + * Check if length of provided AEAD key is supported + * by the algorithm chosen. + */ + if (options->aead_key_param) { + if (check_supported_size( + options->aead_xform.aead.key.length, + cap->sym.aead.key_size.min, + cap->sym.aead.key_size.max, + cap->sym.aead.key_size.increment) + != 0) { + printf("Unsupported aead key length\n"); + return -1; + } + /* + * Check if length of the aead key to be randomly generated + * is supported by the algorithm chosen. + */ + } else if (options->aead_key_random_size != -1) { + if (check_supported_size(options->aead_key_random_size, + cap->sym.aead.key_size.min, + cap->sym.aead.key_size.max, + cap->sym.aead.key_size.increment) + != 0) { + printf("Unsupported aead key length\n"); + return -1; + } + options->aead_xform.aead.key.length = + options->aead_key_random_size; + /* No size provided, use minimum size. */ + } else + options->aead_xform.aead.key.length = + cap->sym.aead.key_size.min; + + if (!options->aead_key_param) + generate_random_key( + options->aead_xform.aead.key.data, + options->aead_xform.aead.key.length); + /* - * Check if length of provided IV is supported + * Check if length of provided AAD is supported * by the algorithm chosen. */ - if (options->iv_param) { - if (check_supported_size(options->iv.length, - cap->sym.cipher.iv_size.min, - cap->sym.cipher.iv_size.max, - cap->sym.cipher.iv_size.increment) + if (options->aad_param) { + if (check_supported_size(options->aad.length, + cap->sym.aead.aad_size.min, + cap->sym.aead.aad_size.max, + cap->sym.aead.aad_size.increment) != 0) { - printf("Unsupported IV length\n"); + printf("Unsupported AAD length\n"); return -1; } /* - * Check if length of IV to be randomly generated + * Check if length of AAD to be randomly generated * is supported by the algorithm chosen. */ - } else if (options->iv_random_size != -1) { - if (check_supported_size(options->iv_random_size, - cap->sym.cipher.iv_size.min, - cap->sym.cipher.iv_size.max, - cap->sym.cipher.iv_size.increment) + } else if (options->aad_random_size != -1) { + if (check_supported_size(options->aad_random_size, + cap->sym.aead.aad_size.min, + cap->sym.aead.aad_size.max, + cap->sym.aead.aad_size.increment) != 0) { - printf("Unsupported IV length\n"); + printf("Unsupported AAD length\n"); return -1; } - options->iv.length = options->iv_random_size; + options->aad.length = options->aad_random_size; /* No size provided, use minimum size. */ } else - options->iv.length = cap->sym.cipher.iv_size.min; + options->aad.length = cap->sym.auth.aad_size.min; + + options->aead_xform.aead.aad_length = + options->aad.length; + + /* Check if digest size is supported by the algorithm. */ + if (options->digest_size != -1) { + if (check_supported_size(options->digest_size, + cap->sym.aead.digest_size.min, + cap->sym.aead.digest_size.max, + cap->sym.aead.digest_size.increment) + != 0) { + printf("Unsupported digest length\n"); + return -1; + } + options->aead_xform.aead.digest_length = + options->digest_size; + /* No size provided, use minimum size. */ + } else + options->aead_xform.aead.digest_length = + cap->sym.aead.digest_size.min; + } + + /* Set cipher parameters */ + if (options->xform_chain == L2FWD_CRYPTO_CIPHER_HASH || + options->xform_chain == L2FWD_CRYPTO_HASH_CIPHER || + options->xform_chain == L2FWD_CRYPTO_CIPHER_ONLY) { + /* Check if device supports cipher algo */ + cap = check_device_support_cipher_algo(options, &dev_info, + cdev_id); + if (cap == NULL) + continue; + + options->block_size = cap->sym.cipher.block_size; + + check_iv_param(&cap->sym.cipher.iv_size, + options->cipher_iv_param, + options->cipher_iv_random_size, + &options->cipher_iv.length); /* * Check if length of provided cipher key is supported @@ -1683,62 +2218,15 @@ initialize_cryptodevs(struct l2fwd_crypto_options *options, unsigned nb_ports, options->xform_chain == L2FWD_CRYPTO_HASH_CIPHER || options->xform_chain == L2FWD_CRYPTO_HASH_ONLY) { /* Check if device supports auth algo */ - i = 0; - opt_auth_algo = options->auth_xform.auth.algo; - cap = &dev_info.capabilities[i]; - while (cap->op != RTE_CRYPTO_OP_TYPE_UNDEFINED) { - cap_auth_algo = cap->sym.auth.algo; - if ((cap->sym.xform_type == RTE_CRYPTO_SYM_XFORM_AUTH) && - (cap_auth_algo == opt_auth_algo) && - (check_type(options, &dev_info) == 0)) { - break; - } - cap = &dev_info.capabilities[++i]; - } - - if (cap->op == RTE_CRYPTO_OP_TYPE_UNDEFINED) { - printf("Algorithm %s not supported by cryptodev %u" - " or device not of preferred type (%s)\n", - rte_crypto_auth_algorithm_strings[opt_auth_algo], - cdev_id, - options->string_type); + cap = check_device_support_auth_algo(options, &dev_info, + cdev_id); + if (cap == NULL) continue; - } - - /* - * Check if length of provided AAD is supported - * by the algorithm chosen. - */ - if (options->aad_param) { - if (check_supported_size(options->aad.length, - cap->sym.auth.aad_size.min, - cap->sym.auth.aad_size.max, - cap->sym.auth.aad_size.increment) - != 0) { - printf("Unsupported AAD length\n"); - return -1; - } - /* - * Check if length of AAD to be randomly generated - * is supported by the algorithm chosen. - */ - } else if (options->aad_random_size != -1) { - if (check_supported_size(options->aad_random_size, - cap->sym.auth.aad_size.min, - cap->sym.auth.aad_size.max, - cap->sym.auth.aad_size.increment) - != 0) { - printf("Unsupported AAD length\n"); - return -1; - } - options->aad.length = options->aad_random_size; - /* No size provided, use minimum size. */ - } else - options->aad.length = cap->sym.auth.aad_size.min; - - options->auth_xform.auth.add_auth_data_length = - options->aad.length; + check_iv_param(&cap->sym.auth.iv_size, + options->auth_iv_param, + options->auth_iv_random_size, + &options->auth_iv.length); /* * Check if length of provided auth key is supported * by the algorithm chosen. @@ -1805,7 +2293,7 @@ initialize_cryptodevs(struct l2fwd_crypto_options *options, unsigned nb_ports, qp_conf.nb_descriptors = 2048; retval = rte_cryptodev_queue_pair_setup(cdev_id, 0, &qp_conf, - SOCKET_ID_ANY); + socket_id, session_pool_socket[socket_id]); if (retval < 0) { printf("Failed to setup queue pair %u on cryptodev %u", 0, cdev_id); @@ -1861,6 +2349,14 @@ initialize_ports(struct l2fwd_crypto_options *options) return -1; } + retval = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd, + &nb_txd); + if (retval < 0) { + printf("Cannot adjust number of descriptors: err=%d, port=%u\n", + retval, (unsigned) portid); + return -1; + } + /* init one RX queue */ fflush(stdout); retval = rte_eth_rx_queue_setup(portid, 0, nb_rxd, @@ -1940,16 +2436,27 @@ reserve_key_memory(struct l2fwd_crypto_options *options) if (options->cipher_xform.cipher.key.data == NULL) rte_exit(EXIT_FAILURE, "Failed to allocate memory for cipher key"); - options->auth_xform.auth.key.data = rte_malloc("auth key", MAX_KEY_SIZE, 0); if (options->auth_xform.auth.key.data == NULL) rte_exit(EXIT_FAILURE, "Failed to allocate memory for auth key"); - options->iv.data = rte_malloc("iv", MAX_KEY_SIZE, 0); - if (options->iv.data == NULL) - rte_exit(EXIT_FAILURE, "Failed to allocate memory for IV"); - options->iv.phys_addr = rte_malloc_virt2phy(options->iv.data); + options->aead_xform.aead.key.data = rte_malloc("aead key", + MAX_KEY_SIZE, 0); + if (options->aead_xform.aead.key.data == NULL) + rte_exit(EXIT_FAILURE, "Failed to allocate memory for AEAD key"); + + options->cipher_iv.data = rte_malloc("cipher iv", MAX_KEY_SIZE, 0); + if (options->cipher_iv.data == NULL) + rte_exit(EXIT_FAILURE, "Failed to allocate memory for cipher IV"); + + options->auth_iv.data = rte_malloc("auth iv", MAX_KEY_SIZE, 0); + if (options->auth_iv.data == NULL) + rte_exit(EXIT_FAILURE, "Failed to allocate memory for auth IV"); + + options->aead_iv.data = rte_malloc("aead_iv", MAX_KEY_SIZE, 0); + if (options->aead_iv.data == NULL) + rte_exit(EXIT_FAILURE, "Failed to allocate memory for AEAD iv"); options->aad.data = rte_malloc("aad", MAX_KEY_SIZE, 0); if (options->aad.data == NULL) @@ -1983,6 +2490,9 @@ main(int argc, char **argv) if (ret < 0) rte_exit(EXIT_FAILURE, "Invalid L2FWD-CRYPTO arguments\n"); + printf("MAC updating %s\n", + options.mac_updating ? "enabled" : "disabled"); + /* create the mbuf pool */ l2fwd_pktmbuf_pool = rte_pktmbuf_pool_create("mbuf_pool", NB_MBUF, 512, sizeof(struct rte_crypto_op), @@ -1992,7 +2502,7 @@ main(int argc, char **argv) /* create crypto op pool */ l2fwd_crypto_op_pool = rte_crypto_op_pool_create("crypto_op_pool", - RTE_CRYPTO_OP_TYPE_SYMMETRIC, NB_MBUF, 128, 0, + RTE_CRYPTO_OP_TYPE_SYMMETRIC, NB_MBUF, 128, MAXIMUM_IV_LENGTH, rte_socket_id()); if (l2fwd_crypto_op_pool == NULL) rte_exit(EXIT_FAILURE, "Cannot create crypto op pool\n"); diff --git a/examples/l2fwd-jobstats/main.c b/examples/l2fwd-jobstats/main.c index e6e6c228..98936206 100644 --- a/examples/l2fwd-jobstats/main.c +++ b/examples/l2fwd-jobstats/main.c @@ -38,7 +38,6 @@ #include <ctype.h> #include <getopt.h> -#include <rte_alarm.h> #include <rte_common.h> #include <rte_log.h> #include <rte_malloc.h> @@ -46,7 +45,6 @@ #include <rte_memcpy.h> #include <rte_memzone.h> #include <rte_eal.h> -#include <rte_per_lcore.h> #include <rte_launch.h> #include <rte_atomic.h> #include <rte_cycles.h> @@ -67,6 +65,7 @@ #include <rte_jobstats.h> #include <rte_timer.h> #include <rte_alarm.h> +#include <rte_pause.h> #define RTE_LOGTYPE_L2FWD RTE_LOGTYPE_USER1 @@ -883,6 +882,13 @@ main(int argc, char **argv) rte_exit(EXIT_FAILURE, "Cannot configure device: err=%d, port=%u\n", ret, (unsigned) portid); + ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd, + &nb_txd); + if (ret < 0) + rte_exit(EXIT_FAILURE, + "Cannot adjust number of descriptors: err=%d, port=%u\n", + ret, (unsigned) portid); + rte_eth_macaddr_get(portid, &l2fwd_ports_eth_addr[portid]); /* init one RX queue */ diff --git a/examples/l2fwd-keepalive/ka-agent/main.c b/examples/l2fwd-keepalive/ka-agent/main.c index be1c7f49..ba0ac352 100644 --- a/examples/l2fwd-keepalive/ka-agent/main.c +++ b/examples/l2fwd-keepalive/ka-agent/main.c @@ -31,7 +31,6 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include <errno.h> #include <stdio.h> #include <string.h> #include <stdint.h> diff --git a/examples/l2fwd-keepalive/main.c b/examples/l2fwd-keepalive/main.c index 37453483..83bc542c 100644 --- a/examples/l2fwd-keepalive/main.c +++ b/examples/l2fwd-keepalive/main.c @@ -53,7 +53,6 @@ #include <rte_memcpy.h> #include <rte_memzone.h> #include <rte_eal.h> -#include <rte_per_lcore.h> #include <rte_launch.h> #include <rte_atomic.h> #include <rte_cycles.h> @@ -677,6 +676,13 @@ main(int argc, char **argv) "Cannot configure device: err=%d, port=%u\n", ret, (unsigned) portid); + ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd, + &nb_txd); + if (ret < 0) + rte_exit(EXIT_FAILURE, + "Cannot adjust number of descriptors: err=%d, port=%u\n", + ret, (unsigned) portid); + rte_eth_macaddr_get(portid, &l2fwd_ports_eth_addr[portid]); /* init one RX queue */ diff --git a/examples/l2fwd/Makefile b/examples/l2fwd/Makefile index 78feeeb8..8896ab45 100644 --- a/examples/l2fwd/Makefile +++ b/examples/l2fwd/Makefile @@ -33,7 +33,7 @@ ifeq ($(RTE_SDK),) $(error "Please define RTE_SDK environment variable") endif -# Default target, can be overriden by command line or environment +# Default target, can be overridden by command line or environment RTE_TARGET ?= x86_64-native-linuxapp-gcc include $(RTE_SDK)/mk/rte.vars.mk diff --git a/examples/l2fwd/main.c b/examples/l2fwd/main.c index f9667272..14263358 100644 --- a/examples/l2fwd/main.c +++ b/examples/l2fwd/main.c @@ -54,7 +54,6 @@ #include <rte_memcpy.h> #include <rte_memzone.h> #include <rte_eal.h> -#include <rte_per_lcore.h> #include <rte_launch.h> #include <rte_atomic.h> #include <rte_cycles.h> @@ -666,6 +665,13 @@ main(int argc, char **argv) rte_exit(EXIT_FAILURE, "Cannot configure device: err=%d, port=%u\n", ret, (unsigned) portid); + ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd, + &nb_txd); + if (ret < 0) + rte_exit(EXIT_FAILURE, + "Cannot adjust number of descriptors: err=%d, port=%u\n", + ret, (unsigned) portid); + rte_eth_macaddr_get(portid,&l2fwd_ports_eth_addr[portid]); /* init one RX queue */ diff --git a/examples/l3fwd-acl/Makefile b/examples/l3fwd-acl/Makefile index a3473a83..3cd299f1 100644 --- a/examples/l3fwd-acl/Makefile +++ b/examples/l3fwd-acl/Makefile @@ -33,7 +33,7 @@ ifeq ($(RTE_SDK),) $(error "Please define RTE_SDK environment variable") endif -# Default target, can be overriden by command line or environment +# Default target, can be overridden by command line or environment RTE_TARGET ?= x86_64-native-linuxapp-gcc include $(RTE_SDK)/mk/rte.vars.mk diff --git a/examples/l3fwd-acl/main.c b/examples/l3fwd-acl/main.c index ea0b5b1e..8eff4de4 100644 --- a/examples/l3fwd-acl/main.c +++ b/examples/l3fwd-acl/main.c @@ -49,7 +49,6 @@ #include <rte_memcpy.h> #include <rte_memzone.h> #include <rte_eal.h> -#include <rte_per_lcore.h> #include <rte_launch.h> #include <rte_atomic.h> #include <rte_cycles.h> @@ -91,10 +90,10 @@ */ #define NB_MBUF RTE_MAX(\ - (nb_ports * nb_rx_queue*RTE_TEST_RX_DESC_DEFAULT + \ - nb_ports * nb_lcores * MAX_PKT_BURST + \ - nb_ports * n_tx_queue * RTE_TEST_TX_DESC_DEFAULT + \ - nb_lcores * MEMPOOL_CACHE_SIZE), \ + (nb_ports * nb_rx_queue * nb_rxd + \ + nb_ports * nb_lcores * MAX_PKT_BURST + \ + nb_ports * n_tx_queue * nb_txd + \ + nb_lcores * MEMPOOL_CACHE_SIZE), \ (unsigned)8192) #define MAX_PKT_BURST 32 @@ -1951,6 +1950,13 @@ main(int argc, char **argv) "Cannot configure device: err=%d, port=%d\n", ret, portid); + ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd, + &nb_txd); + if (ret < 0) + rte_exit(EXIT_FAILURE, + "rte_eth_dev_adjust_nb_rx_tx_desc: err=%d, port=%d\n", + ret, portid); + rte_eth_macaddr_get(portid, &ports_eth_addr[portid]); print_ethaddr(" Address:", &ports_eth_addr[portid]); printf(", "); diff --git a/examples/l3fwd-power/Makefile b/examples/l3fwd-power/Makefile index 783772a7..9c4f4430 100644 --- a/examples/l3fwd-power/Makefile +++ b/examples/l3fwd-power/Makefile @@ -33,7 +33,7 @@ ifeq ($(RTE_SDK),) $(error "Please define RTE_SDK environment variable") endif -# Default target, can be overriden by command line or environment +# Default target, can be overridden by command line or environment RTE_TARGET ?= x86_64-native-linuxapp-gcc include $(RTE_SDK)/mk/rte.vars.mk diff --git a/examples/l3fwd-power/main.c b/examples/l3fwd-power/main.c index 9d57fdef..fd442f5e 100644 --- a/examples/l3fwd-power/main.c +++ b/examples/l3fwd-power/main.c @@ -52,7 +52,6 @@ #include <rte_memcpy.h> #include <rte_memzone.h> #include <rte_eal.h> -#include <rte_per_lcore.h> #include <rte_launch.h> #include <rte_atomic.h> #include <rte_cycles.h> @@ -74,7 +73,6 @@ #include <rte_string_fns.h> #include <rte_timer.h> #include <rte_power.h> -#include <rte_eal.h> #include <rte_spinlock.h> #define RTE_LOGTYPE_L3FWD_POWER RTE_LOGTYPE_USER1 @@ -131,9 +129,9 @@ */ #define NB_MBUF RTE_MAX ( \ - (nb_ports*nb_rx_queue*RTE_TEST_RX_DESC_DEFAULT + \ + (nb_ports*nb_rx_queue*nb_rxd + \ nb_ports*nb_lcores*MAX_PKT_BURST + \ - nb_ports*n_tx_queue*RTE_TEST_TX_DESC_DEFAULT + \ + nb_ports*n_tx_queue*nb_txd + \ nb_lcores*MEMPOOL_CACHE_SIZE), \ (unsigned)8192) @@ -245,7 +243,7 @@ static struct rte_mempool * pktmbuf_pool[NB_SOCKETS]; #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) -#ifdef RTE_MACHINE_CPUFLAG_SSE4_2 +#ifdef RTE_ARCH_X86 #include <rte_hash_crc.h> #define DEFAULT_HASH_FUNC rte_hash_crc #else @@ -1726,6 +1724,13 @@ main(int argc, char **argv) rte_exit(EXIT_FAILURE, "Cannot configure device: " "err=%d, port=%d\n", ret, portid); + ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd, + &nb_txd); + if (ret < 0) + rte_exit(EXIT_FAILURE, + "Cannot adjust number of descriptors: err=%d, port=%d\n", + ret, portid); + rte_eth_macaddr_get(portid, &ports_eth_addr[portid]); print_ethaddr(" Address:", &ports_eth_addr[portid]); printf(", "); diff --git a/examples/l3fwd-vf/Makefile b/examples/l3fwd-vf/Makefile index d97611cf..989faf03 100644 --- a/examples/l3fwd-vf/Makefile +++ b/examples/l3fwd-vf/Makefile @@ -33,7 +33,7 @@ ifeq ($(RTE_SDK),) $(error "Please define RTE_SDK environment variable") endif -# Default target, can be overriden by command line or environment +# Default target, can be overridden by command line or environment RTE_TARGET ?= x86_64-native-linuxapp-gcc include $(RTE_SDK)/mk/rte.vars.mk diff --git a/examples/l3fwd-vf/main.c b/examples/l3fwd-vf/main.c index 797f722a..34e4a6be 100644 --- a/examples/l3fwd-vf/main.c +++ b/examples/l3fwd-vf/main.c @@ -50,7 +50,6 @@ #include <rte_memcpy.h> #include <rte_memzone.h> #include <rte_eal.h> -#include <rte_per_lcore.h> #include <rte_launch.h> #include <rte_atomic.h> #include <rte_spinlock.h> @@ -99,11 +98,11 @@ * RTE_MAX is used to ensure that NB_MBUF never goes below a minimum value of 8192 */ -#define NB_MBUF RTE_MAX ( \ - (nb_ports*nb_rx_queue*RTE_TEST_RX_DESC_DEFAULT + \ - nb_ports*nb_lcores*MAX_PKT_BURST + \ - nb_ports*n_tx_queue*RTE_TEST_TX_DESC_DEFAULT + \ - nb_lcores*MEMPOOL_CACHE_SIZE), \ +#define NB_MBUF RTE_MAX ( \ + (nb_ports*nb_rx_queue*nb_rxd + \ + nb_ports*nb_lcores*MAX_PKT_BURST + \ + nb_ports*n_tx_queue*nb_txd + \ + nb_lcores*MEMPOOL_CACHE_SIZE), \ (unsigned)8192) /* @@ -215,7 +214,7 @@ static struct rte_mempool * pktmbuf_pool[NB_SOCKETS]; #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) -#ifdef RTE_MACHINE_CPUFLAG_SSE4_2 +#ifdef RTE_ARCH_X86 #include <rte_hash_crc.h> #define DEFAULT_HASH_FUNC rte_hash_crc #else @@ -1010,6 +1009,13 @@ main(int argc, char **argv) rte_exit(EXIT_FAILURE, "Cannot configure device: err=%d, port=%d\n", ret, portid); + ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd, + &nb_txd); + if (ret < 0) + rte_exit(EXIT_FAILURE, + "Cannot adjust number of descriptors: err=%d, port=%d\n", + ret, portid); + rte_eth_macaddr_get(portid, &ports_eth_addr[portid]); print_ethaddr(" Address:", &ports_eth_addr[portid]); printf(", "); diff --git a/examples/l3fwd/Makefile b/examples/l3fwd/Makefile index 5ce0ce05..d99a43ad 100644 --- a/examples/l3fwd/Makefile +++ b/examples/l3fwd/Makefile @@ -33,7 +33,7 @@ ifeq ($(RTE_SDK),) $(error "Please define RTE_SDK environment variable") endif -# Default target, can be overriden by command line or environment +# Default target, can be overridden by command line or environment RTE_TARGET ?= x86_64-native-linuxapp-gcc include $(RTE_SDK)/mk/rte.vars.mk diff --git a/examples/l3fwd/l3fwd_common.h b/examples/l3fwd/l3fwd_common.h new file mode 100644 index 00000000..2867365d --- /dev/null +++ b/examples/l3fwd/l3fwd_common.h @@ -0,0 +1,293 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016 Intel Corporation. All rights reserved. + * Copyright(c) 2017, Linaro Limited + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +#ifndef _L3FWD_COMMON_H_ +#define _L3FWD_COMMON_H_ + +#ifdef DO_RFC_1812_CHECKS + +#define IPV4_MIN_VER_IHL 0x45 +#define IPV4_MAX_VER_IHL 0x4f +#define IPV4_MAX_VER_IHL_DIFF (IPV4_MAX_VER_IHL - IPV4_MIN_VER_IHL) + +/* Minimum value of IPV4 total length (20B) in network byte order. */ +#define IPV4_MIN_LEN_BE (sizeof(struct ipv4_hdr) << 8) + +/* + * From http://www.rfc-editor.org/rfc/rfc1812.txt section 5.2.2: + * - The IP version number must be 4. + * - The IP header length field must be large enough to hold the + * minimum length legal IP datagram (20 bytes = 5 words). + * - The IP total length field must be large enough to hold the IP + * datagram header, whose length is specified in the IP header length + * field. + * If we encounter invalid IPV4 packet, then set destination port for it + * to BAD_PORT value. + */ +static __rte_always_inline void +rfc1812_process(struct ipv4_hdr *ipv4_hdr, uint16_t *dp, uint32_t ptype) +{ + uint8_t ihl; + + if (RTE_ETH_IS_IPV4_HDR(ptype)) { + ihl = ipv4_hdr->version_ihl - IPV4_MIN_VER_IHL; + + ipv4_hdr->time_to_live--; + ipv4_hdr->hdr_checksum++; + + if (ihl > IPV4_MAX_VER_IHL_DIFF || + ((uint8_t)ipv4_hdr->total_length == 0 && + ipv4_hdr->total_length < IPV4_MIN_LEN_BE)) + dp[0] = BAD_PORT; + + } +} + +#else +#define rfc1812_process(mb, dp, ptype) do { } while (0) +#endif /* DO_RFC_1812_CHECKS */ + +/* + * We group consecutive packets with the same destionation port into one burst. + * To avoid extra latency this is done together with some other packet + * processing, but after we made a final decision about packet's destination. + * To do this we maintain: + * pnum - array of number of consecutive packets with the same dest port for + * each packet in the input burst. + * lp - pointer to the last updated element in the pnum. + * dlp - dest port value lp corresponds to. + */ + +#define GRPSZ (1 << FWDSTEP) +#define GRPMSK (GRPSZ - 1) + +#define GROUP_PORT_STEP(dlp, dcp, lp, pn, idx) do { \ + if (likely((dlp) == (dcp)[(idx)])) { \ + (lp)[0]++; \ + } else { \ + (dlp) = (dcp)[idx]; \ + (lp) = (pn) + (idx); \ + (lp)[0] = 1; \ + } \ +} while (0) + +static const struct { + uint64_t pnum; /* prebuild 4 values for pnum[]. */ + int32_t idx; /* index for new last updated elemnet. */ + uint16_t lpv; /* add value to the last updated element. */ +} gptbl[GRPSZ] = { + { + /* 0: a != b, b != c, c != d, d != e */ + .pnum = UINT64_C(0x0001000100010001), + .idx = 4, + .lpv = 0, + }, + { + /* 1: a == b, b != c, c != d, d != e */ + .pnum = UINT64_C(0x0001000100010002), + .idx = 4, + .lpv = 1, + }, + { + /* 2: a != b, b == c, c != d, d != e */ + .pnum = UINT64_C(0x0001000100020001), + .idx = 4, + .lpv = 0, + }, + { + /* 3: a == b, b == c, c != d, d != e */ + .pnum = UINT64_C(0x0001000100020003), + .idx = 4, + .lpv = 2, + }, + { + /* 4: a != b, b != c, c == d, d != e */ + .pnum = UINT64_C(0x0001000200010001), + .idx = 4, + .lpv = 0, + }, + { + /* 5: a == b, b != c, c == d, d != e */ + .pnum = UINT64_C(0x0001000200010002), + .idx = 4, + .lpv = 1, + }, + { + /* 6: a != b, b == c, c == d, d != e */ + .pnum = UINT64_C(0x0001000200030001), + .idx = 4, + .lpv = 0, + }, + { + /* 7: a == b, b == c, c == d, d != e */ + .pnum = UINT64_C(0x0001000200030004), + .idx = 4, + .lpv = 3, + }, + { + /* 8: a != b, b != c, c != d, d == e */ + .pnum = UINT64_C(0x0002000100010001), + .idx = 3, + .lpv = 0, + }, + { + /* 9: a == b, b != c, c != d, d == e */ + .pnum = UINT64_C(0x0002000100010002), + .idx = 3, + .lpv = 1, + }, + { + /* 0xa: a != b, b == c, c != d, d == e */ + .pnum = UINT64_C(0x0002000100020001), + .idx = 3, + .lpv = 0, + }, + { + /* 0xb: a == b, b == c, c != d, d == e */ + .pnum = UINT64_C(0x0002000100020003), + .idx = 3, + .lpv = 2, + }, + { + /* 0xc: a != b, b != c, c == d, d == e */ + .pnum = UINT64_C(0x0002000300010001), + .idx = 2, + .lpv = 0, + }, + { + /* 0xd: a == b, b != c, c == d, d == e */ + .pnum = UINT64_C(0x0002000300010002), + .idx = 2, + .lpv = 1, + }, + { + /* 0xe: a != b, b == c, c == d, d == e */ + .pnum = UINT64_C(0x0002000300040001), + .idx = 1, + .lpv = 0, + }, + { + /* 0xf: a == b, b == c, c == d, d == e */ + .pnum = UINT64_C(0x0002000300040005), + .idx = 0, + .lpv = 4, + }, +}; + +static __rte_always_inline void +send_packetsx4(struct lcore_conf *qconf, uint8_t port, struct rte_mbuf *m[], + uint32_t num) +{ + uint32_t len, j, n; + + len = qconf->tx_mbufs[port].len; + + /* + * If TX buffer for that queue is empty, and we have enough packets, + * then send them straightway. + */ + if (num >= MAX_TX_BURST && len == 0) { + n = rte_eth_tx_burst(port, qconf->tx_queue_id[port], m, num); + if (unlikely(n < num)) { + do { + rte_pktmbuf_free(m[n]); + } while (++n < num); + } + return; + } + + /* + * Put packets into TX buffer for that queue. + */ + + n = len + num; + n = (n > MAX_PKT_BURST) ? MAX_PKT_BURST - len : num; + + j = 0; + switch (n % FWDSTEP) { + while (j < n) { + case 0: + qconf->tx_mbufs[port].m_table[len + j] = m[j]; + j++; + /* fallthrough */ + case 3: + qconf->tx_mbufs[port].m_table[len + j] = m[j]; + j++; + /* fallthrough */ + case 2: + qconf->tx_mbufs[port].m_table[len + j] = m[j]; + j++; + /* fallthrough */ + case 1: + qconf->tx_mbufs[port].m_table[len + j] = m[j]; + j++; + } + } + + len += n; + + /* enough pkts to be sent */ + if (unlikely(len == MAX_PKT_BURST)) { + + send_burst(qconf, MAX_PKT_BURST, port); + + /* copy rest of the packets into the TX buffer. */ + len = num - n; + j = 0; + switch (len % FWDSTEP) { + while (j < len) { + case 0: + qconf->tx_mbufs[port].m_table[j] = m[n + j]; + j++; + /* fallthrough */ + case 3: + qconf->tx_mbufs[port].m_table[j] = m[n + j]; + j++; + /* fallthrough */ + case 2: + qconf->tx_mbufs[port].m_table[j] = m[n + j]; + j++; + /* fallthrough */ + case 1: + qconf->tx_mbufs[port].m_table[j] = m[n + j]; + j++; + } + } + } + + qconf->tx_mbufs[port].len = len; +} + +#endif /* _L3FWD_COMMON_H_ */ diff --git a/examples/l3fwd/l3fwd_em.c b/examples/l3fwd/l3fwd_em.c index 9cc44603..53d081bd 100644 --- a/examples/l3fwd/l3fwd_em.c +++ b/examples/l3fwd/l3fwd_em.c @@ -57,7 +57,7 @@ #include "l3fwd.h" -#if defined(RTE_MACHINE_CPUFLAG_SSE4_2) || defined(RTE_MACHINE_CPUFLAG_CRC32) +#if defined(RTE_ARCH_X86) || defined(RTE_MACHINE_CPUFLAG_CRC32) #define EM_HASH_CRC 1 #endif @@ -246,7 +246,7 @@ static rte_xmm_t mask0; static rte_xmm_t mask1; static rte_xmm_t mask2; -#if defined(__SSE2__) +#if defined(RTE_MACHINE_CPUFLAG_SSE2) static inline xmm_t em_mask_key(void *key, xmm_t mask) { @@ -328,11 +328,11 @@ em_get_ipv6_dst_port(void *ipv6_hdr, uint8_t portid, void *lookup_struct) return (uint8_t)((ret < 0) ? portid : ipv6_l3fwd_out_if[ret]); } -#if defined(__SSE4_1__) +#if defined RTE_ARCH_X86 || defined RTE_MACHINE_CPUFLAG_NEON #if defined(NO_HASH_MULTI_LOOKUP) -#include "l3fwd_em_sse.h" +#include "l3fwd_em_sequential.h" #else -#include "l3fwd_em_hlm_sse.h" +#include "l3fwd_em_hlm.h" #endif #else #include "l3fwd_em.h" @@ -614,7 +614,7 @@ em_parse_ptype(struct rte_mbuf *m) packet_type |= RTE_PTYPE_L4_UDP; } else packet_type |= RTE_PTYPE_L3_IPV4_EXT; - } else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) { + } else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv6)) { ipv6_hdr = (struct ipv6_hdr *)l3; if (ipv6_hdr->proto == IPPROTO_TCP) packet_type |= RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP; @@ -709,13 +709,13 @@ em_main_loop(__attribute__((unused)) void *dummy) if (nb_rx == 0) continue; -#if defined(__SSE4_1__) +#if defined RTE_ARCH_X86 || defined RTE_MACHINE_CPUFLAG_NEON l3fwd_em_send_packets(nb_rx, pkts_burst, portid, qconf); #else l3fwd_em_no_opt_send_packets(nb_rx, pkts_burst, portid, qconf); -#endif /* __SSE_4_1__ */ +#endif } } diff --git a/examples/l3fwd/l3fwd_em.h b/examples/l3fwd/l3fwd_em.h index 2284bbd5..d509a1fc 100644 --- a/examples/l3fwd/l3fwd_em.h +++ b/examples/l3fwd/l3fwd_em.h @@ -34,7 +34,7 @@ #ifndef __L3FWD_EM_H__ #define __L3FWD_EM_H__ -static inline __attribute__((always_inline)) void +static __rte_always_inline void l3fwd_em_simple_forward(struct rte_mbuf *m, uint8_t portid, struct lcore_conf *qconf) { diff --git a/examples/l3fwd/l3fwd_em_hlm.h b/examples/l3fwd/l3fwd_em_hlm.h new file mode 100644 index 00000000..520672d5 --- /dev/null +++ b/examples/l3fwd/l3fwd_em_hlm.h @@ -0,0 +1,218 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016 Intel Corporation. All rights reserved. + * Copyright(c) 2017, Linaro Limited + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __L3FWD_EM_HLM_H__ +#define __L3FWD_EM_HLM_H__ + +#if defined RTE_ARCH_X86 +#include "l3fwd_sse.h" +#include "l3fwd_em_hlm_sse.h" +#elif defined RTE_MACHINE_CPUFLAG_NEON +#include "l3fwd_neon.h" +#include "l3fwd_em_hlm_neon.h" +#endif + +#ifdef RTE_ARCH_ARM64 +#define EM_HASH_LOOKUP_COUNT 16 +#else +#define EM_HASH_LOOKUP_COUNT 8 +#endif + + +static __rte_always_inline void +em_get_dst_port_ipv4xN(struct lcore_conf *qconf, struct rte_mbuf *m[], + uint8_t portid, uint16_t dst_port[]) +{ + int i; + int32_t ret[EM_HASH_LOOKUP_COUNT]; + union ipv4_5tuple_host key[EM_HASH_LOOKUP_COUNT]; + const void *key_array[EM_HASH_LOOKUP_COUNT]; + + for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++) { + get_ipv4_5tuple(m[i], mask0.x, &key[i]); + key_array[i] = &key[i]; + } + + rte_hash_lookup_bulk(qconf->ipv4_lookup_struct, &key_array[0], + EM_HASH_LOOKUP_COUNT, ret); + + for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++) { + dst_port[i] = (uint8_t) ((ret[i] < 0) ? + portid : ipv4_l3fwd_out_if[ret[i]]); + + if (dst_port[i] >= RTE_MAX_ETHPORTS || + (enabled_port_mask & 1 << dst_port[i]) == 0) + dst_port[i] = portid; + } +} + +static __rte_always_inline void +em_get_dst_port_ipv6xN(struct lcore_conf *qconf, struct rte_mbuf *m[], + uint8_t portid, uint16_t dst_port[]) +{ + int i; + int32_t ret[EM_HASH_LOOKUP_COUNT]; + union ipv6_5tuple_host key[EM_HASH_LOOKUP_COUNT]; + const void *key_array[EM_HASH_LOOKUP_COUNT]; + + for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++) { + get_ipv6_5tuple(m[i], mask1.x, mask2.x, &key[i]); + key_array[i] = &key[i]; + } + + rte_hash_lookup_bulk(qconf->ipv6_lookup_struct, &key_array[0], + EM_HASH_LOOKUP_COUNT, ret); + + for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++) { + dst_port[i] = (uint8_t) ((ret[i] < 0) ? + portid : ipv6_l3fwd_out_if[ret[i]]); + + if (dst_port[i] >= RTE_MAX_ETHPORTS || + (enabled_port_mask & 1 << dst_port[i]) == 0) + dst_port[i] = portid; + } +} + +static __rte_always_inline uint16_t +em_get_dst_port(const struct lcore_conf *qconf, struct rte_mbuf *pkt, + uint8_t portid) +{ + uint8_t next_hop; + struct ipv4_hdr *ipv4_hdr; + struct ipv6_hdr *ipv6_hdr; + uint32_t tcp_or_udp; + uint32_t l3_ptypes; + + tcp_or_udp = pkt->packet_type & (RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP); + l3_ptypes = pkt->packet_type & RTE_PTYPE_L3_MASK; + + if (tcp_or_udp && (l3_ptypes == RTE_PTYPE_L3_IPV4)) { + + /* Handle IPv4 headers.*/ + ipv4_hdr = rte_pktmbuf_mtod_offset(pkt, struct ipv4_hdr *, + sizeof(struct ether_hdr)); + + next_hop = em_get_ipv4_dst_port(ipv4_hdr, portid, + qconf->ipv4_lookup_struct); + + if (next_hop >= RTE_MAX_ETHPORTS || + (enabled_port_mask & 1 << next_hop) == 0) + next_hop = portid; + + return next_hop; + + } else if (tcp_or_udp && (l3_ptypes == RTE_PTYPE_L3_IPV6)) { + + /* Handle IPv6 headers.*/ + ipv6_hdr = rte_pktmbuf_mtod_offset(pkt, struct ipv6_hdr *, + sizeof(struct ether_hdr)); + + next_hop = em_get_ipv6_dst_port(ipv6_hdr, portid, + qconf->ipv6_lookup_struct); + + if (next_hop >= RTE_MAX_ETHPORTS || + (enabled_port_mask & 1 << next_hop) == 0) + next_hop = portid; + + return next_hop; + + } + + return portid; +} + +/* + * Buffer optimized handling of packets, invoked + * from main_loop. + */ +static inline void +l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, + uint8_t portid, struct lcore_conf *qconf) +{ + int32_t i, j, pos; + uint16_t dst_port[MAX_PKT_BURST]; + + /* + * Send nb_rx - nb_rx % EM_HASH_LOOKUP_COUNT packets + * in groups of EM_HASH_LOOKUP_COUNT. + */ + int32_t n = RTE_ALIGN_FLOOR(nb_rx, EM_HASH_LOOKUP_COUNT); + + for (j = 0; j < EM_HASH_LOOKUP_COUNT && j < nb_rx; j++) { + rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[j], + struct ether_hdr *) + 1); + } + + for (j = 0; j < n; j += EM_HASH_LOOKUP_COUNT) { + + uint32_t pkt_type = RTE_PTYPE_L3_MASK | + RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP; + uint32_t l3_type, tcp_or_udp; + + for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++) + pkt_type &= pkts_burst[j + i]->packet_type; + + l3_type = pkt_type & RTE_PTYPE_L3_MASK; + tcp_or_udp = pkt_type & (RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP); + + for (i = 0, pos = j + EM_HASH_LOOKUP_COUNT; + i < EM_HASH_LOOKUP_COUNT && pos < nb_rx; i++, pos++) { + rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[pos], + struct ether_hdr *) + 1); + } + + if (tcp_or_udp && (l3_type == RTE_PTYPE_L3_IPV4)) { + + em_get_dst_port_ipv4xN(qconf, &pkts_burst[j], portid, + &dst_port[j]); + + } else if (tcp_or_udp && (l3_type == RTE_PTYPE_L3_IPV6)) { + + em_get_dst_port_ipv6xN(qconf, &pkts_burst[j], portid, + &dst_port[j]); + + } else { + for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++) + dst_port[j + i] = em_get_dst_port(qconf, + pkts_burst[j + i], portid); + } + } + + for (; j < nb_rx; j++) + dst_port[j] = em_get_dst_port(qconf, pkts_burst[j], portid); + + send_packets_multi(qconf, pkts_burst, dst_port, nb_rx); + +} +#endif /* __L3FWD_EM_HLM_H__ */ diff --git a/examples/l3fwd/l3fwd_em_hlm_neon.h b/examples/l3fwd/l3fwd_em_hlm_neon.h new file mode 100644 index 00000000..dae1acfb --- /dev/null +++ b/examples/l3fwd/l3fwd_em_hlm_neon.h @@ -0,0 +1,74 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016 Intel Corporation. All rights reserved. + * Copyright(c) 2017, Linaro Limited + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __L3FWD_EM_HLM_NEON_H__ +#define __L3FWD_EM_HLM_NEON_H__ + +#include <arm_neon.h> + +static inline void +get_ipv4_5tuple(struct rte_mbuf *m0, int32x4_t mask0, + union ipv4_5tuple_host *key) +{ + int32x4_t tmpdata0 = vld1q_s32(rte_pktmbuf_mtod_offset(m0, int32_t *, + sizeof(struct ether_hdr) + + offsetof(struct ipv4_hdr, time_to_live))); + + key->xmm = vandq_s32(tmpdata0, mask0); +} + +static inline void +get_ipv6_5tuple(struct rte_mbuf *m0, int32x4_t mask0, + int32x4_t mask1, union ipv6_5tuple_host *key) +{ + int32x4_t tmpdata0 = vld1q_s32( + rte_pktmbuf_mtod_offset(m0, int *, + sizeof(struct ether_hdr) + + offsetof(struct ipv6_hdr, payload_len))); + + int32x4_t tmpdata1 = vld1q_s32( + rte_pktmbuf_mtod_offset(m0, int *, + sizeof(struct ether_hdr) + + offsetof(struct ipv6_hdr, payload_len) + 8)); + + int32x4_t tmpdata2 = vld1q_s32( + rte_pktmbuf_mtod_offset(m0, int *, + sizeof(struct ether_hdr) + + offsetof(struct ipv6_hdr, payload_len) + 16)); + + key->xmm[0] = vandq_s32(tmpdata0, mask0); + key->xmm[1] = tmpdata1; + key->xmm[2] = vandq_s32(tmpdata2, mask1); +} +#endif /* __L3FWD_EM_HLM_NEON_H__ */ diff --git a/examples/l3fwd/l3fwd_em_hlm_sse.h b/examples/l3fwd/l3fwd_em_hlm_sse.h index 7714a20c..0dd44dfa 100644 --- a/examples/l3fwd/l3fwd_em_hlm_sse.h +++ b/examples/l3fwd/l3fwd_em_hlm_sse.h @@ -36,102 +36,16 @@ #include "l3fwd_sse.h" -static inline __attribute__((always_inline)) void -em_get_dst_port_ipv4x8(struct lcore_conf *qconf, struct rte_mbuf *m[8], - uint8_t portid, uint16_t dst_port[8]) +static __rte_always_inline void +get_ipv4_5tuple(struct rte_mbuf *m0, __m128i mask0, + union ipv4_5tuple_host *key) { - int32_t ret[8]; - union ipv4_5tuple_host key[8]; - __m128i data[8]; - - data[0] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[0], __m128i *, - sizeof(struct ether_hdr) + - offsetof(struct ipv4_hdr, time_to_live))); - data[1] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[1], __m128i *, - sizeof(struct ether_hdr) + - offsetof(struct ipv4_hdr, time_to_live))); - data[2] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[2], __m128i *, - sizeof(struct ether_hdr) + - offsetof(struct ipv4_hdr, time_to_live))); - data[3] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[3], __m128i *, - sizeof(struct ether_hdr) + - offsetof(struct ipv4_hdr, time_to_live))); - data[4] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[4], __m128i *, - sizeof(struct ether_hdr) + - offsetof(struct ipv4_hdr, time_to_live))); - data[5] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[5], __m128i *, - sizeof(struct ether_hdr) + - offsetof(struct ipv4_hdr, time_to_live))); - data[6] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[6], __m128i *, - sizeof(struct ether_hdr) + - offsetof(struct ipv4_hdr, time_to_live))); - data[7] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[7], __m128i *, + __m128i tmpdata0 = _mm_loadu_si128( + rte_pktmbuf_mtod_offset(m0, __m128i *, sizeof(struct ether_hdr) + offsetof(struct ipv4_hdr, time_to_live))); - key[0].xmm = _mm_and_si128(data[0], mask0.x); - key[1].xmm = _mm_and_si128(data[1], mask0.x); - key[2].xmm = _mm_and_si128(data[2], mask0.x); - key[3].xmm = _mm_and_si128(data[3], mask0.x); - key[4].xmm = _mm_and_si128(data[4], mask0.x); - key[5].xmm = _mm_and_si128(data[5], mask0.x); - key[6].xmm = _mm_and_si128(data[6], mask0.x); - key[7].xmm = _mm_and_si128(data[7], mask0.x); - - const void *key_array[8] = {&key[0], &key[1], &key[2], &key[3], - &key[4], &key[5], &key[6], &key[7]}; - - rte_hash_lookup_bulk(qconf->ipv4_lookup_struct, &key_array[0], 8, ret); - - dst_port[0] = (uint8_t) ((ret[0] < 0) ? - portid : ipv4_l3fwd_out_if[ret[0]]); - dst_port[1] = (uint8_t) ((ret[1] < 0) ? - portid : ipv4_l3fwd_out_if[ret[1]]); - dst_port[2] = (uint8_t) ((ret[2] < 0) ? - portid : ipv4_l3fwd_out_if[ret[2]]); - dst_port[3] = (uint8_t) ((ret[3] < 0) ? - portid : ipv4_l3fwd_out_if[ret[3]]); - dst_port[4] = (uint8_t) ((ret[4] < 0) ? - portid : ipv4_l3fwd_out_if[ret[4]]); - dst_port[5] = (uint8_t) ((ret[5] < 0) ? - portid : ipv4_l3fwd_out_if[ret[5]]); - dst_port[6] = (uint8_t) ((ret[6] < 0) ? - portid : ipv4_l3fwd_out_if[ret[6]]); - dst_port[7] = (uint8_t) ((ret[7] < 0) ? - portid : ipv4_l3fwd_out_if[ret[7]]); - - if (dst_port[0] >= RTE_MAX_ETHPORTS || - (enabled_port_mask & 1 << dst_port[0]) == 0) - dst_port[0] = portid; - - if (dst_port[1] >= RTE_MAX_ETHPORTS || - (enabled_port_mask & 1 << dst_port[1]) == 0) - dst_port[1] = portid; - - if (dst_port[2] >= RTE_MAX_ETHPORTS || - (enabled_port_mask & 1 << dst_port[2]) == 0) - dst_port[2] = portid; - - if (dst_port[3] >= RTE_MAX_ETHPORTS || - (enabled_port_mask & 1 << dst_port[3]) == 0) - dst_port[3] = portid; - - if (dst_port[4] >= RTE_MAX_ETHPORTS || - (enabled_port_mask & 1 << dst_port[4]) == 0) - dst_port[4] = portid; - - if (dst_port[5] >= RTE_MAX_ETHPORTS || - (enabled_port_mask & 1 << dst_port[5]) == 0) - dst_port[5] = portid; - - if (dst_port[6] >= RTE_MAX_ETHPORTS || - (enabled_port_mask & 1 << dst_port[6]) == 0) - dst_port[6] = portid; - - if (dst_port[7] >= RTE_MAX_ETHPORTS || - (enabled_port_mask & 1 << dst_port[7]) == 0) - dst_port[7] = portid; - + key->xmm = _mm_and_si128(tmpdata0, mask0); } static inline void @@ -159,184 +73,4 @@ get_ipv6_5tuple(struct rte_mbuf *m0, __m128i mask0, key->xmm[1] = tmpdata1; key->xmm[2] = _mm_and_si128(tmpdata2, mask1); } - -static inline __attribute__((always_inline)) void -em_get_dst_port_ipv6x8(struct lcore_conf *qconf, struct rte_mbuf *m[8], - uint8_t portid, uint16_t dst_port[8]) -{ - int32_t ret[8]; - union ipv6_5tuple_host key[8]; - - get_ipv6_5tuple(m[0], mask1.x, mask2.x, &key[0]); - get_ipv6_5tuple(m[1], mask1.x, mask2.x, &key[1]); - get_ipv6_5tuple(m[2], mask1.x, mask2.x, &key[2]); - get_ipv6_5tuple(m[3], mask1.x, mask2.x, &key[3]); - get_ipv6_5tuple(m[4], mask1.x, mask2.x, &key[4]); - get_ipv6_5tuple(m[5], mask1.x, mask2.x, &key[5]); - get_ipv6_5tuple(m[6], mask1.x, mask2.x, &key[6]); - get_ipv6_5tuple(m[7], mask1.x, mask2.x, &key[7]); - - const void *key_array[8] = {&key[0], &key[1], &key[2], &key[3], - &key[4], &key[5], &key[6], &key[7]}; - - rte_hash_lookup_bulk(qconf->ipv6_lookup_struct, &key_array[0], 8, ret); - - dst_port[0] = (uint8_t) ((ret[0] < 0) ? - portid : ipv6_l3fwd_out_if[ret[0]]); - dst_port[1] = (uint8_t) ((ret[1] < 0) ? - portid : ipv6_l3fwd_out_if[ret[1]]); - dst_port[2] = (uint8_t) ((ret[2] < 0) ? - portid : ipv6_l3fwd_out_if[ret[2]]); - dst_port[3] = (uint8_t) ((ret[3] < 0) ? - portid : ipv6_l3fwd_out_if[ret[3]]); - dst_port[4] = (uint8_t) ((ret[4] < 0) ? - portid : ipv6_l3fwd_out_if[ret[4]]); - dst_port[5] = (uint8_t) ((ret[5] < 0) ? - portid : ipv6_l3fwd_out_if[ret[5]]); - dst_port[6] = (uint8_t) ((ret[6] < 0) ? - portid : ipv6_l3fwd_out_if[ret[6]]); - dst_port[7] = (uint8_t) ((ret[7] < 0) ? - portid : ipv6_l3fwd_out_if[ret[7]]); - - if (dst_port[0] >= RTE_MAX_ETHPORTS || - (enabled_port_mask & 1 << dst_port[0]) == 0) - dst_port[0] = portid; - - if (dst_port[1] >= RTE_MAX_ETHPORTS || - (enabled_port_mask & 1 << dst_port[1]) == 0) - dst_port[1] = portid; - - if (dst_port[2] >= RTE_MAX_ETHPORTS || - (enabled_port_mask & 1 << dst_port[2]) == 0) - dst_port[2] = portid; - - if (dst_port[3] >= RTE_MAX_ETHPORTS || - (enabled_port_mask & 1 << dst_port[3]) == 0) - dst_port[3] = portid; - - if (dst_port[4] >= RTE_MAX_ETHPORTS || - (enabled_port_mask & 1 << dst_port[4]) == 0) - dst_port[4] = portid; - - if (dst_port[5] >= RTE_MAX_ETHPORTS || - (enabled_port_mask & 1 << dst_port[5]) == 0) - dst_port[5] = portid; - - if (dst_port[6] >= RTE_MAX_ETHPORTS || - (enabled_port_mask & 1 << dst_port[6]) == 0) - dst_port[6] = portid; - - if (dst_port[7] >= RTE_MAX_ETHPORTS || - (enabled_port_mask & 1 << dst_port[7]) == 0) - dst_port[7] = portid; - -} - -static inline __attribute__((always_inline)) uint16_t -em_get_dst_port(const struct lcore_conf *qconf, struct rte_mbuf *pkt, - uint8_t portid) -{ - uint8_t next_hop; - struct ipv4_hdr *ipv4_hdr; - struct ipv6_hdr *ipv6_hdr; - uint32_t tcp_or_udp; - uint32_t l3_ptypes; - - tcp_or_udp = pkt->packet_type & (RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP); - l3_ptypes = pkt->packet_type & RTE_PTYPE_L3_MASK; - - if (tcp_or_udp && (l3_ptypes == RTE_PTYPE_L3_IPV4)) { - - /* Handle IPv4 headers.*/ - ipv4_hdr = rte_pktmbuf_mtod_offset(pkt, struct ipv4_hdr *, - sizeof(struct ether_hdr)); - - next_hop = em_get_ipv4_dst_port(ipv4_hdr, portid, - qconf->ipv4_lookup_struct); - - if (next_hop >= RTE_MAX_ETHPORTS || - (enabled_port_mask & 1 << next_hop) == 0) - next_hop = portid; - - return next_hop; - - } else if (tcp_or_udp && (l3_ptypes == RTE_PTYPE_L3_IPV6)) { - - /* Handle IPv6 headers.*/ - ipv6_hdr = rte_pktmbuf_mtod_offset(pkt, struct ipv6_hdr *, - sizeof(struct ether_hdr)); - - next_hop = em_get_ipv6_dst_port(ipv6_hdr, portid, - qconf->ipv6_lookup_struct); - - if (next_hop >= RTE_MAX_ETHPORTS || - (enabled_port_mask & 1 << next_hop) == 0) - next_hop = portid; - - return next_hop; - - } - - return portid; -} - -/* - * Buffer optimized handling of packets, invoked - * from main_loop. - */ -static inline void -l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, - uint8_t portid, struct lcore_conf *qconf) -{ - int32_t j; - uint16_t dst_port[MAX_PKT_BURST]; - - /* - * Send nb_rx - nb_rx%8 packets - * in groups of 8. - */ - int32_t n = RTE_ALIGN_FLOOR(nb_rx, 8); - - for (j = 0; j < n; j += 8) { - - uint32_t pkt_type = - pkts_burst[j]->packet_type & - pkts_burst[j+1]->packet_type & - pkts_burst[j+2]->packet_type & - pkts_burst[j+3]->packet_type & - pkts_burst[j+4]->packet_type & - pkts_burst[j+5]->packet_type & - pkts_burst[j+6]->packet_type & - pkts_burst[j+7]->packet_type; - - uint32_t l3_type = pkt_type & RTE_PTYPE_L3_MASK; - uint32_t tcp_or_udp = pkt_type & - (RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP); - - if (tcp_or_udp && (l3_type == RTE_PTYPE_L3_IPV4)) { - - em_get_dst_port_ipv4x8(qconf, &pkts_burst[j], portid, &dst_port[j]); - - } else if (tcp_or_udp && (l3_type == RTE_PTYPE_L3_IPV6)) { - - em_get_dst_port_ipv6x8(qconf, &pkts_burst[j], portid, &dst_port[j]); - - } else { - dst_port[j] = em_get_dst_port(qconf, pkts_burst[j], portid); - dst_port[j+1] = em_get_dst_port(qconf, pkts_burst[j+1], portid); - dst_port[j+2] = em_get_dst_port(qconf, pkts_burst[j+2], portid); - dst_port[j+3] = em_get_dst_port(qconf, pkts_burst[j+3], portid); - dst_port[j+4] = em_get_dst_port(qconf, pkts_burst[j+4], portid); - dst_port[j+5] = em_get_dst_port(qconf, pkts_burst[j+5], portid); - dst_port[j+6] = em_get_dst_port(qconf, pkts_burst[j+6], portid); - dst_port[j+7] = em_get_dst_port(qconf, pkts_burst[j+7], portid); - } - } - - for (; j < nb_rx; j++) - dst_port[j] = em_get_dst_port(qconf, pkts_burst[j], portid); - - send_packets_multi(qconf, pkts_burst, dst_port, nb_rx); - -} #endif /* __L3FWD_EM_SSE_HLM_H__ */ diff --git a/examples/l3fwd/l3fwd_em_sse.h b/examples/l3fwd/l3fwd_em_sequential.h index c0a9725a..cb7c2abb 100644 --- a/examples/l3fwd/l3fwd_em_sse.h +++ b/examples/l3fwd/l3fwd_em_sequential.h @@ -31,8 +31,8 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#ifndef __L3FWD_EM_SSE_H__ -#define __L3FWD_EM_SSE_H__ +#ifndef __L3FWD_EM_SEQUENTIAL_H__ +#define __L3FWD_EM_SEQUENTIAL_H__ /** * @file @@ -43,9 +43,13 @@ * compilation time. */ +#if defined RTE_ARCH_X86 #include "l3fwd_sse.h" +#elif defined RTE_MACHINE_CPUFLAG_NEON +#include "l3fwd_neon.h" +#endif -static inline __attribute__((always_inline)) uint16_t +static __rte_always_inline uint16_t em_get_dst_port(const struct lcore_conf *qconf, struct rte_mbuf *pkt, uint8_t portid) { @@ -101,12 +105,22 @@ static inline void l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, uint8_t portid, struct lcore_conf *qconf) { - int32_t j; + int32_t i, j; uint16_t dst_port[MAX_PKT_BURST]; - for (j = 0; j < nb_rx; j++) + if (nb_rx > 0) { + rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[0], + struct ether_hdr *) + 1); + } + + for (i = 1, j = 0; j < nb_rx; i++, j++) { + if (i < nb_rx) { + rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[i], + struct ether_hdr *) + 1); + } dst_port[j] = em_get_dst_port(qconf, pkts_burst[j], portid); + } send_packets_multi(qconf, pkts_burst, dst_port, nb_rx); } -#endif /* __L3FWD_EM_SSE_H__ */ +#endif /* __L3FWD_EM_SEQUENTIAL_H__ */ diff --git a/examples/l3fwd/l3fwd_lpm.c b/examples/l3fwd/l3fwd_lpm.c index f6212697..ff1e4035 100644 --- a/examples/l3fwd/l3fwd_lpm.c +++ b/examples/l3fwd/l3fwd_lpm.c @@ -104,8 +104,93 @@ static struct ipv6_l3fwd_lpm_route ipv6_l3fwd_lpm_route_array[] = { struct rte_lpm *ipv4_l3fwd_lpm_lookup_struct[NB_SOCKETS]; struct rte_lpm6 *ipv6_l3fwd_lpm_lookup_struct[NB_SOCKETS]; -#if defined(__SSE4_1__) +static inline uint16_t +lpm_get_ipv4_dst_port(void *ipv4_hdr, uint8_t portid, void *lookup_struct) +{ + uint32_t next_hop; + struct rte_lpm *ipv4_l3fwd_lookup_struct = + (struct rte_lpm *)lookup_struct; + + return (uint16_t) ((rte_lpm_lookup(ipv4_l3fwd_lookup_struct, + rte_be_to_cpu_32(((struct ipv4_hdr *)ipv4_hdr)->dst_addr), + &next_hop) == 0) ? next_hop : portid); +} + +static inline uint16_t +lpm_get_ipv6_dst_port(void *ipv6_hdr, uint8_t portid, void *lookup_struct) +{ + uint32_t next_hop; + struct rte_lpm6 *ipv6_l3fwd_lookup_struct = + (struct rte_lpm6 *)lookup_struct; + + return (uint16_t) ((rte_lpm6_lookup(ipv6_l3fwd_lookup_struct, + ((struct ipv6_hdr *)ipv6_hdr)->dst_addr, + &next_hop) == 0) ? next_hop : portid); +} + +static __rte_always_inline uint16_t +lpm_get_dst_port(const struct lcore_conf *qconf, struct rte_mbuf *pkt, + uint8_t portid) +{ + struct ipv6_hdr *ipv6_hdr; + struct ipv4_hdr *ipv4_hdr; + struct ether_hdr *eth_hdr; + + if (RTE_ETH_IS_IPV4_HDR(pkt->packet_type)) { + + eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *); + ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1); + + return lpm_get_ipv4_dst_port(ipv4_hdr, portid, + qconf->ipv4_lookup_struct); + } else if (RTE_ETH_IS_IPV6_HDR(pkt->packet_type)) { + + eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *); + ipv6_hdr = (struct ipv6_hdr *)(eth_hdr + 1); + + return lpm_get_ipv6_dst_port(ipv6_hdr, portid, + qconf->ipv6_lookup_struct); + } + + return portid; +} + +/* + * lpm_get_dst_port optimized routine for packets where dst_ipv4 is already + * precalculated. If packet is ipv6 dst_addr is taken directly from packet + * header and dst_ipv4 value is not used. + */ +static __rte_always_inline uint16_t +lpm_get_dst_port_with_ipv4(const struct lcore_conf *qconf, struct rte_mbuf *pkt, + uint32_t dst_ipv4, uint8_t portid) +{ + uint32_t next_hop; + struct ipv6_hdr *ipv6_hdr; + struct ether_hdr *eth_hdr; + + if (RTE_ETH_IS_IPV4_HDR(pkt->packet_type)) { + return (uint16_t) ((rte_lpm_lookup(qconf->ipv4_lookup_struct, + dst_ipv4, &next_hop) == 0) + ? next_hop : portid); + + } else if (RTE_ETH_IS_IPV6_HDR(pkt->packet_type)) { + + eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *); + ipv6_hdr = (struct ipv6_hdr *)(eth_hdr + 1); + + return (uint16_t) ((rte_lpm6_lookup(qconf->ipv6_lookup_struct, + ipv6_hdr->dst_addr, &next_hop) == 0) + ? next_hop : portid); + + } + + return portid; +} + +#if defined(RTE_ARCH_X86) #include "l3fwd_lpm_sse.h" +#elif defined RTE_MACHINE_CPUFLAG_NEON +#include "l3fwd_lpm_neon.h" #else #include "l3fwd_lpm.h" #endif @@ -178,13 +263,13 @@ lpm_main_loop(__attribute__((unused)) void *dummy) if (nb_rx == 0) continue; -#if defined(__SSE4_1__) +#if defined RTE_ARCH_X86 || defined RTE_MACHINE_CPUFLAG_NEON l3fwd_lpm_send_packets(nb_rx, pkts_burst, portid, qconf); #else l3fwd_lpm_no_opt_send_packets(nb_rx, pkts_burst, portid, qconf); -#endif /* __SSE_4_1__ */ +#endif /* X86 */ } } diff --git a/examples/l3fwd/l3fwd_lpm.h b/examples/l3fwd/l3fwd_lpm.h index 258a82fe..55c3e832 100644 --- a/examples/l3fwd/l3fwd_lpm.h +++ b/examples/l3fwd/l3fwd_lpm.h @@ -34,37 +34,13 @@ #ifndef __L3FWD_LPM_H__ #define __L3FWD_LPM_H__ -static inline uint8_t -lpm_get_ipv4_dst_port(void *ipv4_hdr, uint8_t portid, void *lookup_struct) -{ - uint32_t next_hop; - struct rte_lpm *ipv4_l3fwd_lookup_struct = - (struct rte_lpm *)lookup_struct; - - return (uint8_t) ((rte_lpm_lookup(ipv4_l3fwd_lookup_struct, - rte_be_to_cpu_32(((struct ipv4_hdr *)ipv4_hdr)->dst_addr), - &next_hop) == 0) ? next_hop : portid); -} - -static inline uint8_t -lpm_get_ipv6_dst_port(void *ipv6_hdr, uint8_t portid, void *lookup_struct) -{ - uint32_t next_hop; - struct rte_lpm6 *ipv6_l3fwd_lookup_struct = - (struct rte_lpm6 *)lookup_struct; - - return (uint8_t) ((rte_lpm6_lookup(ipv6_l3fwd_lookup_struct, - ((struct ipv6_hdr *)ipv6_hdr)->dst_addr, - &next_hop) == 0) ? next_hop : portid); -} - -static inline __attribute__((always_inline)) void +static __rte_always_inline void l3fwd_lpm_simple_forward(struct rte_mbuf *m, uint8_t portid, struct lcore_conf *qconf) { struct ether_hdr *eth_hdr; struct ipv4_hdr *ipv4_hdr; - uint8_t dst_port; + uint16_t dst_port; eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); diff --git a/examples/l3fwd/l3fwd_lpm_neon.h b/examples/l3fwd/l3fwd_lpm_neon.h new file mode 100644 index 00000000..baedbfe8 --- /dev/null +++ b/examples/l3fwd/l3fwd_lpm_neon.h @@ -0,0 +1,193 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * Copyright(c) 2017, Linaro Limited + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __L3FWD_LPM_NEON_H__ +#define __L3FWD_LPM_NEON_H__ + +#include <arm_neon.h> + +#include "l3fwd_neon.h" + +/* + * Read packet_type and destination IPV4 addresses from 4 mbufs. + */ +static inline void +processx4_step1(struct rte_mbuf *pkt[FWDSTEP], + int32x4_t *dip, + uint32_t *ipv4_flag) +{ + struct ipv4_hdr *ipv4_hdr; + struct ether_hdr *eth_hdr; + int32_t dst[FWDSTEP]; + + eth_hdr = rte_pktmbuf_mtod(pkt[0], struct ether_hdr *); + ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1); + dst[0] = ipv4_hdr->dst_addr; + ipv4_flag[0] = pkt[0]->packet_type & RTE_PTYPE_L3_IPV4; + + eth_hdr = rte_pktmbuf_mtod(pkt[1], struct ether_hdr *); + ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1); + dst[1] = ipv4_hdr->dst_addr; + ipv4_flag[0] &= pkt[1]->packet_type; + + eth_hdr = rte_pktmbuf_mtod(pkt[2], struct ether_hdr *); + ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1); + dst[2] = ipv4_hdr->dst_addr; + ipv4_flag[0] &= pkt[2]->packet_type; + + eth_hdr = rte_pktmbuf_mtod(pkt[3], struct ether_hdr *); + ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1); + dst[3] = ipv4_hdr->dst_addr; + ipv4_flag[0] &= pkt[3]->packet_type; + + dip[0] = vld1q_s32(dst); +} + +/* + * Lookup into LPM for destination port. + * If lookup fails, use incoming port (portid) as destination port. + */ +static inline void +processx4_step2(const struct lcore_conf *qconf, + int32x4_t dip, + uint32_t ipv4_flag, + uint8_t portid, + struct rte_mbuf *pkt[FWDSTEP], + uint16_t dprt[FWDSTEP]) +{ + rte_xmm_t dst; + + dip = vreinterpretq_s32_u8(vrev32q_u8(vreinterpretq_u8_s32(dip))); + + /* if all 4 packets are IPV4. */ + if (likely(ipv4_flag)) { + rte_lpm_lookupx4(qconf->ipv4_lookup_struct, dip, dst.u32, + portid); + /* get rid of unused upper 16 bit for each dport. */ + vst1_s16((int16_t *)dprt, vqmovn_s32(dst.x)); + } else { + dst.x = dip; + dprt[0] = lpm_get_dst_port_with_ipv4(qconf, pkt[0], + dst.u32[0], portid); + dprt[1] = lpm_get_dst_port_with_ipv4(qconf, pkt[1], + dst.u32[1], portid); + dprt[2] = lpm_get_dst_port_with_ipv4(qconf, pkt[2], + dst.u32[2], portid); + dprt[3] = lpm_get_dst_port_with_ipv4(qconf, pkt[3], + dst.u32[3], portid); + } +} + +/* + * Buffer optimized handling of packets, invoked + * from main_loop. + */ +static inline void +l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, + uint8_t portid, struct lcore_conf *qconf) +{ + int32_t i = 0, j = 0; + uint16_t dst_port[MAX_PKT_BURST]; + int32x4_t dip; + uint32_t ipv4_flag; + const int32_t k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP); + const int32_t m = nb_rx % FWDSTEP; + + if (k) { + for (i = 0; i < FWDSTEP; i++) { + rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[i], + struct ether_hdr *) + 1); + } + + for (j = 0; j != k - FWDSTEP; j += FWDSTEP) { + for (i = 0; i < FWDSTEP; i++) { + rte_prefetch0(rte_pktmbuf_mtod( + pkts_burst[j + i + FWDSTEP], + struct ether_hdr *) + 1); + } + + processx4_step1(&pkts_burst[j], &dip, &ipv4_flag); + processx4_step2(qconf, dip, ipv4_flag, portid, + &pkts_burst[j], &dst_port[j]); + } + + processx4_step1(&pkts_burst[j], &dip, &ipv4_flag); + processx4_step2(qconf, dip, ipv4_flag, portid, &pkts_burst[j], + &dst_port[j]); + + j += FWDSTEP; + } + + if (m) { + /* Prefetch last up to 3 packets one by one */ + switch (m) { + case 3: + rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[j], + struct ether_hdr *) + 1); + j++; + /* fallthrough */ + case 2: + rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[j], + struct ether_hdr *) + 1); + j++; + /* fallthrough */ + case 1: + rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[j], + struct ether_hdr *) + 1); + j++; + } + + j -= m; + /* Classify last up to 3 packets one by one */ + switch (m) { + case 3: + dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], + portid); + j++; + /* fallthrough */ + case 2: + dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], + portid); + j++; + /* fallthrough */ + case 1: + dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], + portid); + } + } + + send_packets_multi(qconf, pkts_burst, dst_port, nb_rx); +} + +#endif /* __L3FWD_LPM_NEON_H__ */ diff --git a/examples/l3fwd/l3fwd_lpm_sse.h b/examples/l3fwd/l3fwd_lpm_sse.h index aa06b6d3..4e294c84 100644 --- a/examples/l3fwd/l3fwd_lpm_sse.h +++ b/examples/l3fwd/l3fwd_lpm_sse.h @@ -36,72 +36,6 @@ #include "l3fwd_sse.h" -static inline __attribute__((always_inline)) uint16_t -lpm_get_dst_port(const struct lcore_conf *qconf, struct rte_mbuf *pkt, - uint8_t portid) -{ - uint32_t next_hop; - struct ipv6_hdr *ipv6_hdr; - struct ipv4_hdr *ipv4_hdr; - struct ether_hdr *eth_hdr; - - if (RTE_ETH_IS_IPV4_HDR(pkt->packet_type)) { - - eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *); - ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1); - - return (uint16_t) ( - (rte_lpm_lookup(qconf->ipv4_lookup_struct, - rte_be_to_cpu_32(ipv4_hdr->dst_addr), - &next_hop) == 0) ? - next_hop : portid); - - } else if (RTE_ETH_IS_IPV6_HDR(pkt->packet_type)) { - - eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *); - ipv6_hdr = (struct ipv6_hdr *)(eth_hdr + 1); - - return (uint16_t) ((rte_lpm6_lookup(qconf->ipv6_lookup_struct, - ipv6_hdr->dst_addr, &next_hop) == 0) - ? next_hop : portid); - - } - - return portid; -} - -/* - * lpm_get_dst_port optimized routine for packets where dst_ipv4 is already - * precalculated. If packet is ipv6 dst_addr is taken directly from packet - * header and dst_ipv4 value is not used. - */ -static inline __attribute__((always_inline)) uint16_t -lpm_get_dst_port_with_ipv4(const struct lcore_conf *qconf, struct rte_mbuf *pkt, - uint32_t dst_ipv4, uint8_t portid) -{ - uint32_t next_hop; - struct ipv6_hdr *ipv6_hdr; - struct ether_hdr *eth_hdr; - - if (RTE_ETH_IS_IPV4_HDR(pkt->packet_type)) { - return (uint16_t) ((rte_lpm_lookup(qconf->ipv4_lookup_struct, dst_ipv4, - &next_hop) == 0) ? next_hop : portid); - - } else if (RTE_ETH_IS_IPV6_HDR(pkt->packet_type)) { - - eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *); - ipv6_hdr = (struct ipv6_hdr *)(eth_hdr + 1); - - return (uint16_t) ((rte_lpm6_lookup(qconf->ipv6_lookup_struct, - ipv6_hdr->dst_addr, &next_hop) == 0) - ? next_hop : portid); - - } - - return portid; - -} - /* * Read packet_type and destination IPV4 addresses from 4 mbufs. */ @@ -199,9 +133,11 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, case 3: dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); j++; + /* fall-through */ case 2: dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); j++; + /* fall-through */ case 1: dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); j++; diff --git a/examples/l3fwd/l3fwd_neon.h b/examples/l3fwd/l3fwd_neon.h new file mode 100644 index 00000000..42d50d3c --- /dev/null +++ b/examples/l3fwd/l3fwd_neon.h @@ -0,0 +1,259 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016 Intel Corporation. All rights reserved. + * Copyright(c) 2017, Linaro Limited + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +#ifndef _L3FWD_NEON_H_ +#define _L3FWD_NEON_H_ + +#include "l3fwd.h" +#include "l3fwd_common.h" + +/* + * Update source and destination MAC addresses in the ethernet header. + * Perform RFC1812 checks and updates for IPV4 packets. + */ +static inline void +processx4_step3(struct rte_mbuf *pkt[FWDSTEP], uint16_t dst_port[FWDSTEP]) +{ + uint32x4_t te[FWDSTEP]; + uint32x4_t ve[FWDSTEP]; + uint32_t *p[FWDSTEP]; + + p[0] = rte_pktmbuf_mtod(pkt[0], uint32_t *); + p[1] = rte_pktmbuf_mtod(pkt[1], uint32_t *); + p[2] = rte_pktmbuf_mtod(pkt[2], uint32_t *); + p[3] = rte_pktmbuf_mtod(pkt[3], uint32_t *); + + ve[0] = vreinterpretq_u32_s32(val_eth[dst_port[0]]); + te[0] = vld1q_u32(p[0]); + + ve[1] = vreinterpretq_u32_s32(val_eth[dst_port[1]]); + te[1] = vld1q_u32(p[1]); + + ve[2] = vreinterpretq_u32_s32(val_eth[dst_port[2]]); + te[2] = vld1q_u32(p[2]); + + ve[3] = vreinterpretq_u32_s32(val_eth[dst_port[3]]); + te[3] = vld1q_u32(p[3]); + + /* Update last 4 bytes */ + ve[0] = vsetq_lane_u32(vgetq_lane_u32(te[0], 3), ve[0], 3); + ve[1] = vsetq_lane_u32(vgetq_lane_u32(te[1], 3), ve[1], 3); + ve[2] = vsetq_lane_u32(vgetq_lane_u32(te[2], 3), ve[2], 3); + ve[3] = vsetq_lane_u32(vgetq_lane_u32(te[3], 3), ve[3], 3); + + vst1q_u32(p[0], ve[0]); + vst1q_u32(p[1], ve[1]); + vst1q_u32(p[2], ve[2]); + vst1q_u32(p[3], ve[3]); + + rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[0] + 1), + &dst_port[0], pkt[0]->packet_type); + rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[1] + 1), + &dst_port[1], pkt[1]->packet_type); + rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[2] + 1), + &dst_port[2], pkt[2]->packet_type); + rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[3] + 1), + &dst_port[3], pkt[3]->packet_type); +} + +/* + * Group consecutive packets with the same destination port in bursts of 4. + * Suppose we have array of destionation ports: + * dst_port[] = {a, b, c, d,, e, ... } + * dp1 should contain: <a, b, c, d>, dp2: <b, c, d, e>. + * We doing 4 comparisons at once and the result is 4 bit mask. + * This mask is used as an index into prebuild array of pnum values. + */ +static inline uint16_t * +port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp, uint16x8_t dp1, + uint16x8_t dp2) +{ + union { + uint16_t u16[FWDSTEP + 1]; + uint64_t u64; + } *pnum = (void *)pn; + + int32_t v; + uint16x8_t mask = {1, 2, 4, 8, 0, 0, 0, 0}; + + dp1 = vceqq_u16(dp1, dp2); + dp1 = vandq_u16(dp1, mask); + v = vaddvq_u16(dp1); + + /* update last port counter. */ + lp[0] += gptbl[v].lpv; + + /* if dest port value has changed. */ + if (v != GRPMSK) { + pnum->u64 = gptbl[v].pnum; + pnum->u16[FWDSTEP] = 1; + lp = pnum->u16 + gptbl[v].idx; + } + + return lp; +} + +/** + * Process one packet: + * Update source and destination MAC addresses in the ethernet header. + * Perform RFC1812 checks and updates for IPV4 packets. + */ +static inline void +process_packet(struct rte_mbuf *pkt, uint16_t *dst_port) +{ + struct ether_hdr *eth_hdr; + uint32x4_t te, ve; + + eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *); + + te = vld1q_u32((uint32_t *)eth_hdr); + ve = vreinterpretq_u32_s32(val_eth[dst_port[0]]); + + + rfc1812_process((struct ipv4_hdr *)(eth_hdr + 1), dst_port, + pkt->packet_type); + + ve = vcopyq_laneq_u32(ve, 3, te, 3); + vst1q_u32((uint32_t *)eth_hdr, ve); +} + +/** + * Send packets burst from pkts_burst to the ports in dst_port array + */ +static __rte_always_inline void +send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst, + uint16_t dst_port[MAX_PKT_BURST], int nb_rx) +{ + int32_t k; + int j = 0; + uint16_t dlp; + uint16_t *lp; + uint16_t pnum[MAX_PKT_BURST + 1]; + + /* + * Finish packet processing and group consecutive + * packets with the same destination port. + */ + k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP); + if (k != 0) { + uint16x8_t dp1, dp2; + + lp = pnum; + lp[0] = 1; + + processx4_step3(pkts_burst, dst_port); + + /* dp1: <d[0], d[1], d[2], d[3], ... > */ + dp1 = vld1q_u16(dst_port); + + for (j = FWDSTEP; j != k; j += FWDSTEP) { + processx4_step3(&pkts_burst[j], &dst_port[j]); + + /* + * dp2: + * <d[j-3], d[j-2], d[j-1], d[j], ... > + */ + dp2 = vld1q_u16(&dst_port[j - FWDSTEP + 1]); + lp = port_groupx4(&pnum[j - FWDSTEP], lp, dp1, dp2); + + /* + * dp1: + * <d[j], d[j+1], d[j+2], d[j+3], ... > + */ + dp1 = vextq_u16(dp1, dp1, FWDSTEP - 1); + } + + /* + * dp2: <d[j-3], d[j-2], d[j-1], d[j-1], ... > + */ + dp2 = vextq_u16(dp1, dp1, 1); + dp2 = vsetq_lane_u16(vgetq_lane_u16(dp2, 2), dp2, 3); + lp = port_groupx4(&pnum[j - FWDSTEP], lp, dp1, dp2); + + /* + * remove values added by the last repeated + * dst port. + */ + lp[0]--; + dlp = dst_port[j - 1]; + } else { + /* set dlp and lp to the never used values. */ + dlp = BAD_PORT - 1; + lp = pnum + MAX_PKT_BURST; + } + + /* Process up to last 3 packets one by one. */ + switch (nb_rx % FWDSTEP) { + case 3: + process_packet(pkts_burst[j], dst_port + j); + GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j); + j++; + /* fallthrough */ + case 2: + process_packet(pkts_burst[j], dst_port + j); + GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j); + j++; + /* fallthrough */ + case 1: + process_packet(pkts_burst[j], dst_port + j); + GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j); + j++; + } + + /* + * Send packets out, through destination port. + * Consecutive packets with the same destination port + * are already grouped together. + * If destination port for the packet equals BAD_PORT, + * then free the packet without sending it out. + */ + for (j = 0; j < nb_rx; j += k) { + + int32_t m; + uint16_t pn; + + pn = dst_port[j]; + k = pnum[j]; + + if (likely(pn != BAD_PORT)) + send_packetsx4(qconf, pn, pkts_burst + j, k); + else + for (m = j; m != j + k; m++) + rte_pktmbuf_free(pkts_burst[m]); + + } +} + +#endif /* _L3FWD_NEON_H_ */ diff --git a/examples/l3fwd/l3fwd_sse.h b/examples/l3fwd/l3fwd_sse.h index 1afa1f00..831760f0 100644 --- a/examples/l3fwd/l3fwd_sse.h +++ b/examples/l3fwd/l3fwd_sse.h @@ -32,53 +32,11 @@ */ -#ifndef _L3FWD_COMMON_H_ -#define _L3FWD_COMMON_H_ +#ifndef _L3FWD_SSE_H_ +#define _L3FWD_SSE_H_ #include "l3fwd.h" - -#ifdef DO_RFC_1812_CHECKS - -#define IPV4_MIN_VER_IHL 0x45 -#define IPV4_MAX_VER_IHL 0x4f -#define IPV4_MAX_VER_IHL_DIFF (IPV4_MAX_VER_IHL - IPV4_MIN_VER_IHL) - -/* Minimum value of IPV4 total length (20B) in network byte order. */ -#define IPV4_MIN_LEN_BE (sizeof(struct ipv4_hdr) << 8) - -/* - * From http://www.rfc-editor.org/rfc/rfc1812.txt section 5.2.2: - * - The IP version number must be 4. - * - The IP header length field must be large enough to hold the - * minimum length legal IP datagram (20 bytes = 5 words). - * - The IP total length field must be large enough to hold the IP - * datagram header, whose length is specified in the IP header length - * field. - * If we encounter invalid IPV4 packet, then set destination port for it - * to BAD_PORT value. - */ -static inline __attribute__((always_inline)) void -rfc1812_process(struct ipv4_hdr *ipv4_hdr, uint16_t *dp, uint32_t ptype) -{ - uint8_t ihl; - - if (RTE_ETH_IS_IPV4_HDR(ptype)) { - ihl = ipv4_hdr->version_ihl - IPV4_MIN_VER_IHL; - - ipv4_hdr->time_to_live--; - ipv4_hdr->hdr_checksum++; - - if (ihl > IPV4_MAX_VER_IHL_DIFF || - ((uint8_t)ipv4_hdr->total_length == 0 && - ipv4_hdr->total_length < IPV4_MIN_LEN_BE)) - dp[0] = BAD_PORT; - - } -} - -#else -#define rfc1812_process(mb, dp, ptype) do { } while (0) -#endif /* DO_RFC_1812_CHECKS */ +#include "l3fwd_common.h" /* * Update source and destination MAC addresses in the ethernet header. @@ -130,143 +88,16 @@ processx4_step3(struct rte_mbuf *pkt[FWDSTEP], uint16_t dst_port[FWDSTEP]) } /* - * We group consecutive packets with the same destionation port into one burst. - * To avoid extra latency this is done together with some other packet - * processing, but after we made a final decision about packet's destination. - * To do this we maintain: - * pnum - array of number of consecutive packets with the same dest port for - * each packet in the input burst. - * lp - pointer to the last updated element in the pnum. - * dlp - dest port value lp corresponds to. - */ - -#define GRPSZ (1 << FWDSTEP) -#define GRPMSK (GRPSZ - 1) - -#define GROUP_PORT_STEP(dlp, dcp, lp, pn, idx) do { \ - if (likely((dlp) == (dcp)[(idx)])) { \ - (lp)[0]++; \ - } else { \ - (dlp) = (dcp)[idx]; \ - (lp) = (pn) + (idx); \ - (lp)[0] = 1; \ - } \ -} while (0) - -/* * Group consecutive packets with the same destination port in bursts of 4. * Suppose we have array of destionation ports: * dst_port[] = {a, b, c, d,, e, ... } * dp1 should contain: <a, b, c, d>, dp2: <b, c, d, e>. - * We doing 4 comparisions at once and the result is 4 bit mask. + * We doing 4 comparisons at once and the result is 4 bit mask. * This mask is used as an index into prebuild array of pnum values. */ static inline uint16_t * port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp, __m128i dp1, __m128i dp2) { - static const struct { - uint64_t pnum; /* prebuild 4 values for pnum[]. */ - int32_t idx; /* index for new last updated elemnet. */ - uint16_t lpv; /* add value to the last updated element. */ - } gptbl[GRPSZ] = { - { - /* 0: a != b, b != c, c != d, d != e */ - .pnum = UINT64_C(0x0001000100010001), - .idx = 4, - .lpv = 0, - }, - { - /* 1: a == b, b != c, c != d, d != e */ - .pnum = UINT64_C(0x0001000100010002), - .idx = 4, - .lpv = 1, - }, - { - /* 2: a != b, b == c, c != d, d != e */ - .pnum = UINT64_C(0x0001000100020001), - .idx = 4, - .lpv = 0, - }, - { - /* 3: a == b, b == c, c != d, d != e */ - .pnum = UINT64_C(0x0001000100020003), - .idx = 4, - .lpv = 2, - }, - { - /* 4: a != b, b != c, c == d, d != e */ - .pnum = UINT64_C(0x0001000200010001), - .idx = 4, - .lpv = 0, - }, - { - /* 5: a == b, b != c, c == d, d != e */ - .pnum = UINT64_C(0x0001000200010002), - .idx = 4, - .lpv = 1, - }, - { - /* 6: a != b, b == c, c == d, d != e */ - .pnum = UINT64_C(0x0001000200030001), - .idx = 4, - .lpv = 0, - }, - { - /* 7: a == b, b == c, c == d, d != e */ - .pnum = UINT64_C(0x0001000200030004), - .idx = 4, - .lpv = 3, - }, - { - /* 8: a != b, b != c, c != d, d == e */ - .pnum = UINT64_C(0x0002000100010001), - .idx = 3, - .lpv = 0, - }, - { - /* 9: a == b, b != c, c != d, d == e */ - .pnum = UINT64_C(0x0002000100010002), - .idx = 3, - .lpv = 1, - }, - { - /* 0xa: a != b, b == c, c != d, d == e */ - .pnum = UINT64_C(0x0002000100020001), - .idx = 3, - .lpv = 0, - }, - { - /* 0xb: a == b, b == c, c != d, d == e */ - .pnum = UINT64_C(0x0002000100020003), - .idx = 3, - .lpv = 2, - }, - { - /* 0xc: a != b, b != c, c == d, d == e */ - .pnum = UINT64_C(0x0002000300010001), - .idx = 2, - .lpv = 0, - }, - { - /* 0xd: a == b, b != c, c == d, d == e */ - .pnum = UINT64_C(0x0002000300010002), - .idx = 2, - .lpv = 1, - }, - { - /* 0xe: a != b, b == c, c == d, d == e */ - .pnum = UINT64_C(0x0002000300040001), - .idx = 1, - .lpv = 0, - }, - { - /* 0xf: a == b, b == c, c == d, d == e */ - .pnum = UINT64_C(0x0002000300040005), - .idx = 0, - .lpv = 4, - }, - }; - union { uint16_t u16[FWDSTEP + 1]; uint64_t u64; @@ -314,88 +145,10 @@ process_packet(struct rte_mbuf *pkt, uint16_t *dst_port) _mm_storeu_si128((__m128i *)eth_hdr, te); } -static inline __attribute__((always_inline)) void -send_packetsx4(struct lcore_conf *qconf, uint8_t port, struct rte_mbuf *m[], - uint32_t num) -{ - uint32_t len, j, n; - - len = qconf->tx_mbufs[port].len; - - /* - * If TX buffer for that queue is empty, and we have enough packets, - * then send them straightway. - */ - if (num >= MAX_TX_BURST && len == 0) { - n = rte_eth_tx_burst(port, qconf->tx_queue_id[port], m, num); - if (unlikely(n < num)) { - do { - rte_pktmbuf_free(m[n]); - } while (++n < num); - } - return; - } - - /* - * Put packets into TX buffer for that queue. - */ - - n = len + num; - n = (n > MAX_PKT_BURST) ? MAX_PKT_BURST - len : num; - - j = 0; - switch (n % FWDSTEP) { - while (j < n) { - case 0: - qconf->tx_mbufs[port].m_table[len + j] = m[j]; - j++; - case 3: - qconf->tx_mbufs[port].m_table[len + j] = m[j]; - j++; - case 2: - qconf->tx_mbufs[port].m_table[len + j] = m[j]; - j++; - case 1: - qconf->tx_mbufs[port].m_table[len + j] = m[j]; - j++; - } - } - - len += n; - - /* enough pkts to be sent */ - if (unlikely(len == MAX_PKT_BURST)) { - - send_burst(qconf, MAX_PKT_BURST, port); - - /* copy rest of the packets into the TX buffer. */ - len = num - n; - j = 0; - switch (len % FWDSTEP) { - while (j < len) { - case 0: - qconf->tx_mbufs[port].m_table[j] = m[n + j]; - j++; - case 3: - qconf->tx_mbufs[port].m_table[j] = m[n + j]; - j++; - case 2: - qconf->tx_mbufs[port].m_table[j] = m[n + j]; - j++; - case 1: - qconf->tx_mbufs[port].m_table[j] = m[n + j]; - j++; - } - } - } - - qconf->tx_mbufs[port].len = len; -} - /** * Send packets burst from pkts_burst to the ports in dst_port array */ -static inline __attribute__((always_inline)) void +static __rte_always_inline void send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst, uint16_t dst_port[MAX_PKT_BURST], int nb_rx) { @@ -464,10 +217,12 @@ send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst, process_packet(pkts_burst[j], dst_port + j); GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j); j++; + /* fall-through */ case 2: process_packet(pkts_burst[j], dst_port + j); GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j); j++; + /* fall-through */ case 1: process_packet(pkts_burst[j], dst_port + j); GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j); @@ -498,4 +253,4 @@ send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst, } } -#endif /* _L3FWD_COMMON_H_ */ +#endif /* _L3FWD_SSE_H_ */ diff --git a/examples/l3fwd/main.c b/examples/l3fwd/main.c index fd6605bf..81995fdb 100644 --- a/examples/l3fwd/main.c +++ b/examples/l3fwd/main.c @@ -52,7 +52,6 @@ #include <rte_memcpy.h> #include <rte_memzone.h> #include <rte_eal.h> -#include <rte_per_lcore.h> #include <rte_launch.h> #include <rte_atomic.h> #include <rte_cycles.h> @@ -522,10 +521,10 @@ static const struct option lgopts[] = { * value of 8192 */ #define NB_MBUF RTE_MAX( \ - (nb_ports*nb_rx_queue*RTE_TEST_RX_DESC_DEFAULT + \ - nb_ports*nb_lcores*MAX_PKT_BURST + \ - nb_ports*n_tx_queue*RTE_TEST_TX_DESC_DEFAULT + \ - nb_lcores*MEMPOOL_CACHE_SIZE), \ + (nb_ports*nb_rx_queue*nb_rxd + \ + nb_ports*nb_lcores*MAX_PKT_BURST + \ + nb_ports*n_tx_queue*nb_txd + \ + nb_lcores*MEMPOOL_CACHE_SIZE), \ (unsigned)8192) /* Parse the argument given in the command line of the application */ @@ -918,6 +917,13 @@ main(int argc, char **argv) "Cannot configure device: err=%d, port=%d\n", ret, portid); + ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd, + &nb_txd); + if (ret < 0) + rte_exit(EXIT_FAILURE, + "Cannot adjust number of descriptors: err=%d, " + "port=%d\n", ret, portid); + rte_eth_macaddr_get(portid, &ports_eth_addr[portid]); print_ethaddr(" Address:", &ports_eth_addr[portid]); printf(", "); diff --git a/examples/link_status_interrupt/Makefile b/examples/link_status_interrupt/Makefile index 9ecc7fc4..d5ee073a 100644 --- a/examples/link_status_interrupt/Makefile +++ b/examples/link_status_interrupt/Makefile @@ -33,7 +33,7 @@ ifeq ($(RTE_SDK),) $(error "Please define RTE_SDK environment variable") endif -# Default target, can be overriden by command line or environment +# Default target, can be overridden by command line or environment RTE_TARGET ?= x86_64-native-linuxapp-gcc include $(RTE_SDK)/mk/rte.vars.mk diff --git a/examples/link_status_interrupt/main.c b/examples/link_status_interrupt/main.c index 25da28eb..f4e3969a 100644 --- a/examples/link_status_interrupt/main.c +++ b/examples/link_status_interrupt/main.c @@ -37,7 +37,6 @@ #include <stdint.h> #include <inttypes.h> #include <sys/types.h> -#include <string.h> #include <sys/queue.h> #include <netinet/in.h> #include <setjmp.h> @@ -53,7 +52,6 @@ #include <rte_memcpy.h> #include <rte_memzone.h> #include <rte_eal.h> -#include <rte_per_lcore.h> #include <rte_launch.h> #include <rte_atomic.h> #include <rte_cycles.h> @@ -469,14 +467,16 @@ lsi_parse_args(int argc, char **argv) * Pointer to(address of) the parameters. * * @return - * void. + * int. */ -static void -lsi_event_callback(uint8_t port_id, enum rte_eth_event_type type, void *param) +static int +lsi_event_callback(uint8_t port_id, enum rte_eth_event_type type, void *param, + void *ret_param) { struct rte_eth_link link; RTE_SET_USED(param); + RTE_SET_USED(ret_param); printf("\n\nIn registered callback...\n"); printf("Event type: %s\n", type == RTE_ETH_EVENT_INTR_LSC ? "LSC interrupt" : "unknown event"); @@ -488,6 +488,8 @@ lsi_event_callback(uint8_t port_id, enum rte_eth_event_type type, void *param) ("full-duplex") : ("half-duplex")); } else printf("Port %d Link Down\n\n", port_id); + + return 0; } /* Check the link status of all ports in up to 9s, and print them finally */ @@ -646,6 +648,13 @@ main(int argc, char **argv) rte_exit(EXIT_FAILURE, "Cannot configure device: err=%d, port=%u\n", ret, (unsigned) portid); + ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd, + &nb_txd); + if (ret < 0) + rte_exit(EXIT_FAILURE, + "rte_eth_dev_adjust_nb_rx_tx_desc: err=%d, port=%u\n", + ret, (unsigned) portid); + /* register lsi interrupt callback, need to be after * rte_eth_dev_configure(). if (intr_conf.lsc == 0), no * lsc interrupt will be present, and below callback to diff --git a/examples/load_balancer/Makefile b/examples/load_balancer/Makefile index 2c5fd9b0..f656e51c 100644 --- a/examples/load_balancer/Makefile +++ b/examples/load_balancer/Makefile @@ -33,7 +33,7 @@ ifeq ($(RTE_SDK),) $(error "Please define RTE_SDK environment variable") endif -# Default target, can be overriden by command line or environment +# Default target, can be overridden by command line or environment RTE_TARGET ?= x86_64-native-linuxapp-gcc include $(RTE_SDK)/mk/rte.vars.mk diff --git a/examples/load_balancer/config.c b/examples/load_balancer/config.c index 07f92a1a..50325095 100644 --- a/examples/load_balancer/config.c +++ b/examples/load_balancer/config.c @@ -49,7 +49,6 @@ #include <rte_memcpy.h> #include <rte_memzone.h> #include <rte_eal.h> -#include <rte_per_lcore.h> #include <rte_launch.h> #include <rte_atomic.h> #include <rte_cycles.h> diff --git a/examples/load_balancer/init.c b/examples/load_balancer/init.c index abd05a31..717232e6 100644 --- a/examples/load_balancer/init.c +++ b/examples/load_balancer/init.c @@ -49,7 +49,6 @@ #include <rte_memcpy.h> #include <rte_memzone.h> #include <rte_eal.h> -#include <rte_per_lcore.h> #include <rte_launch.h> #include <rte_atomic.h> #include <rte_cycles.h> @@ -430,6 +429,8 @@ app_init_nics(void) /* Init NIC ports and queues, then start the ports */ for (port = 0; port < APP_MAX_NIC_PORTS; port ++) { struct rte_mempool *pool; + uint16_t nic_rx_ring_size; + uint16_t nic_tx_ring_size; n_rx_queues = app_get_nic_rx_queues_per_port(port); n_tx_queues = app.nic_tx_port_mask[port]; @@ -450,6 +451,17 @@ app_init_nics(void) } rte_eth_promiscuous_enable(port); + nic_rx_ring_size = app.nic_rx_ring_size; + nic_tx_ring_size = app.nic_tx_ring_size; + ret = rte_eth_dev_adjust_nb_rx_tx_desc( + port, &nic_rx_ring_size, &nic_tx_ring_size); + if (ret < 0) { + rte_panic("Cannot adjust number of descriptors for port %u (%d)\n", + (unsigned) port, ret); + } + app.nic_rx_ring_size = nic_rx_ring_size; + app.nic_tx_ring_size = nic_tx_ring_size; + /* Init RX queues */ for (queue = 0; queue < APP_MAX_RX_QUEUES_PER_NIC_PORT; queue ++) { if (app.nic_rx_queue_mask[port][queue] == 0) { diff --git a/examples/load_balancer/main.c b/examples/load_balancer/main.c index c97bf6fa..65ceea4a 100644 --- a/examples/load_balancer/main.c +++ b/examples/load_balancer/main.c @@ -50,7 +50,6 @@ #include <rte_memcpy.h> #include <rte_memzone.h> #include <rte_eal.h> -#include <rte_per_lcore.h> #include <rte_launch.h> #include <rte_atomic.h> #include <rte_cycles.h> diff --git a/examples/load_balancer/main.h b/examples/load_balancer/main.h index d98468a7..dc407555 100644 --- a/examples/load_balancer/main.h +++ b/examples/load_balancer/main.h @@ -56,7 +56,11 @@ #endif #ifndef APP_MAX_IO_LCORES +#if (APP_MAX_LCORES > 16) #define APP_MAX_IO_LCORES 16 +#else +#define APP_MAX_IO_LCORES APP_MAX_LCORES +#endif #endif #if (APP_MAX_IO_LCORES > APP_MAX_LCORES) #error "APP_MAX_IO_LCORES is too big" @@ -74,7 +78,11 @@ #endif #ifndef APP_MAX_WORKER_LCORES +#if (APP_MAX_LCORES > 16) #define APP_MAX_WORKER_LCORES 16 +#else +#define APP_MAX_WORKER_LCORES APP_MAX_LCORES +#endif #endif #if (APP_MAX_WORKER_LCORES > APP_MAX_LCORES) #error "APP_MAX_WORKER_LCORES is too big" diff --git a/examples/load_balancer/runtime.c b/examples/load_balancer/runtime.c index 7f918aa4..e54b7851 100644 --- a/examples/load_balancer/runtime.c +++ b/examples/load_balancer/runtime.c @@ -49,7 +49,6 @@ #include <rte_memcpy.h> #include <rte_memzone.h> #include <rte_eal.h> -#include <rte_per_lcore.h> #include <rte_launch.h> #include <rte_atomic.h> #include <rte_cycles.h> diff --git a/examples/multi_process/Makefile b/examples/multi_process/Makefile index 6b315cc0..696633b9 100644 --- a/examples/multi_process/Makefile +++ b/examples/multi_process/Makefile @@ -33,7 +33,7 @@ ifeq ($(RTE_SDK),) $(error "Please define RTE_SDK environment variable") endif -# Default target, can be overriden by command line or environment +# Default target, can be overridden by command line or environment RTE_TARGET ?= x86_64-native-linuxapp-gcc include $(RTE_SDK)/mk/rte.vars.mk diff --git a/examples/multi_process/client_server_mp/Makefile b/examples/multi_process/client_server_mp/Makefile index 89cc6bf8..feb508a4 100644 --- a/examples/multi_process/client_server_mp/Makefile +++ b/examples/multi_process/client_server_mp/Makefile @@ -33,7 +33,7 @@ ifeq ($(RTE_SDK),) $(error "Please define RTE_SDK environment variable") endif -# Default target, can be overriden by command line or environment +# Default target, can be overridden by command line or environment RTE_TARGET ?= x86_64-native-linuxapp-gcc include $(RTE_SDK)/mk/rte.vars.mk diff --git a/examples/multi_process/client_server_mp/mp_client/Makefile b/examples/multi_process/client_server_mp/mp_client/Makefile index 2688fed0..2ee8cd2c 100644 --- a/examples/multi_process/client_server_mp/mp_client/Makefile +++ b/examples/multi_process/client_server_mp/mp_client/Makefile @@ -33,7 +33,7 @@ ifeq ($(RTE_SDK),) $(error "Please define RTE_SDK environment variable") endif -# Default target, can be overriden by command line or environment +# Default target, can be overridden by command line or environment include $(RTE_SDK)/mk/rte.vars.mk # binary name diff --git a/examples/multi_process/client_server_mp/mp_client/client.c b/examples/multi_process/client_server_mp/mp_client/client.c index 01b535c2..f8453e57 100644 --- a/examples/multi_process/client_server_mp/mp_client/client.c +++ b/examples/multi_process/client_server_mp/mp_client/client.c @@ -50,11 +50,9 @@ #include <rte_branch_prediction.h> #include <rte_log.h> #include <rte_per_lcore.h> -#include <rte_launch.h> #include <rte_lcore.h> #include <rte_ring.h> #include <rte_launch.h> -#include <rte_lcore.h> #include <rte_debug.h> #include <rte_mempool.h> #include <rte_mbuf.h> diff --git a/examples/multi_process/client_server_mp/mp_server/Makefile b/examples/multi_process/client_server_mp/mp_server/Makefile index c29e4783..5552999b 100644 --- a/examples/multi_process/client_server_mp/mp_server/Makefile +++ b/examples/multi_process/client_server_mp/mp_server/Makefile @@ -33,7 +33,7 @@ ifeq ($(RTE_SDK),) $(error "Please define RTE_SDK environment variable") endif -# Default target, can be overriden by command line or environment +# Default target, can be overridden by command line or environment RTE_TARGET ?= x86_64-native-linuxapp-gcc include $(RTE_SDK)/mk/rte.vars.mk diff --git a/examples/multi_process/client_server_mp/mp_server/init.c b/examples/multi_process/client_server_mp/mp_server/init.c index ad941a7a..0bc92921 100644 --- a/examples/multi_process/client_server_mp/mp_server/init.c +++ b/examples/multi_process/client_server_mp/mp_server/init.c @@ -123,8 +123,8 @@ init_port(uint8_t port_num) } }; const uint16_t rx_rings = 1, tx_rings = num_clients; - const uint16_t rx_ring_size = RTE_MP_RX_DESC_DEFAULT; - const uint16_t tx_ring_size = RTE_MP_TX_DESC_DEFAULT; + uint16_t rx_ring_size = RTE_MP_RX_DESC_DEFAULT; + uint16_t tx_ring_size = RTE_MP_TX_DESC_DEFAULT; uint16_t q; int retval; @@ -138,6 +138,11 @@ init_port(uint8_t port_num) &port_conf)) != 0) return retval; + retval = rte_eth_dev_adjust_nb_rx_tx_desc(port_num, &rx_ring_size, + &tx_ring_size); + if (retval != 0) + return retval; + for (q = 0; q < rx_rings; q++) { retval = rte_eth_rx_queue_setup(port_num, q, rx_ring_size, rte_eth_dev_socket_id(port_num), diff --git a/examples/multi_process/client_server_mp/mp_server/main.c b/examples/multi_process/client_server_mp/mp_server/main.c index c2b0261d..7055b543 100644 --- a/examples/multi_process/client_server_mp/mp_server/main.c +++ b/examples/multi_process/client_server_mp/mp_server/main.c @@ -38,7 +38,6 @@ #include <stdint.h> #include <stdarg.h> #include <inttypes.h> -#include <inttypes.h> #include <sys/queue.h> #include <errno.h> #include <netinet/ip.h> @@ -47,7 +46,6 @@ #include <rte_memory.h> #include <rte_memzone.h> #include <rte_eal.h> -#include <rte_byteorder.h> #include <rte_launch.h> #include <rte_per_lcore.h> #include <rte_lcore.h> diff --git a/examples/multi_process/l2fwd_fork/Makefile b/examples/multi_process/l2fwd_fork/Makefile index ff257a35..11ae8ff4 100644 --- a/examples/multi_process/l2fwd_fork/Makefile +++ b/examples/multi_process/l2fwd_fork/Makefile @@ -33,7 +33,7 @@ ifeq ($(RTE_SDK),) $(error "Please define RTE_SDK environment variable") endif -# Default target, can be overriden by command line or environment +# Default target, can be overridden by command line or environment RTE_TARGET ?= x86_64-native-linuxapp-gcc include $(RTE_SDK)/mk/rte.vars.mk diff --git a/examples/multi_process/l2fwd_fork/flib.c b/examples/multi_process/l2fwd_fork/flib.c index 85bbc2d3..c22e983b 100644 --- a/examples/multi_process/l2fwd_fork/flib.c +++ b/examples/multi_process/l2fwd_fork/flib.c @@ -56,7 +56,6 @@ #include <rte_memcpy.h> #include <rte_memzone.h> #include <rte_eal.h> -#include <rte_per_lcore.h> #include <rte_launch.h> #include <rte_atomic.h> #include <rte_cycles.h> diff --git a/examples/multi_process/l2fwd_fork/flib.h b/examples/multi_process/l2fwd_fork/flib.h index 711e3b6d..1064c9bb 100644 --- a/examples/multi_process/l2fwd_fork/flib.h +++ b/examples/multi_process/l2fwd_fork/flib.h @@ -120,7 +120,7 @@ int flib_register_slave_exit_notify(unsigned slave_id, /** * Assign a lcore ID to non-slave thread. Non-slave thread refers to thread that * not created by function rte_eal_remote_launch or rte_eal_mp_remote_launch. - * These threads can either bind lcore or float among differnt lcores. + * These threads can either bind lcore or float among different lcores. * This lcore ID will be unique in multi-thread or multi-process DPDK running * environment, then it can benefit from using the cache mechanism provided in * mempool library. diff --git a/examples/multi_process/l2fwd_fork/main.c b/examples/multi_process/l2fwd_fork/main.c index d922522f..f8a626ba 100644 --- a/examples/multi_process/l2fwd_fork/main.c +++ b/examples/multi_process/l2fwd_fork/main.c @@ -53,7 +53,6 @@ #include <rte_memcpy.h> #include <rte_memzone.h> #include <rte_eal.h> -#include <rte_per_lcore.h> #include <rte_launch.h> #include <rte_atomic.h> #include <rte_spinlock.h> @@ -937,7 +936,6 @@ main(int argc, char **argv) unsigned rx_lcore_id; unsigned nb_ports_in_mask = 0; unsigned i; - int flags = 0; uint64_t prev_tsc, diff_tsc, cur_tsc, timer_tsc; /* Save cpu_affinity first, restore it in case it's floating process option */ @@ -987,7 +985,6 @@ main(int argc, char **argv) if ((l2fwd_enabled_port_mask & (1 << portid)) == 0) continue; char buf_name[RTE_MEMPOOL_NAMESIZE]; - flags = MEMPOOL_F_SP_PUT | MEMPOOL_F_SC_GET; snprintf(buf_name, RTE_MEMPOOL_NAMESIZE, MBUF_NAME, portid); l2fwd_pktmbuf_pool[portid] = rte_pktmbuf_pool_create(buf_name, NB_MBUF, 32, @@ -1082,6 +1079,13 @@ main(int argc, char **argv) rte_exit(EXIT_FAILURE, "Cannot configure device: err=%d, port=%u\n", ret, (unsigned) portid); + ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd, + &nb_txd); + if (ret < 0) + rte_exit(EXIT_FAILURE, + "rte_eth_dev_adjust_nb_rx_tx_desc: err=%d, port=%u\n", + ret, (unsigned) portid); + rte_eth_macaddr_get(portid,&l2fwd_ports_eth_addr[portid]); /* init one RX queue */ diff --git a/examples/multi_process/simple_mp/Makefile b/examples/multi_process/simple_mp/Makefile index 31ec0c80..7ac96f2f 100644 --- a/examples/multi_process/simple_mp/Makefile +++ b/examples/multi_process/simple_mp/Makefile @@ -33,7 +33,7 @@ ifeq ($(RTE_SDK),) $(error "Please define RTE_SDK environment variable") endif -# Default target, can be overriden by command line or environment +# Default target, can be overridden by command line or environment RTE_TARGET ?= x86_64-native-linuxapp-gcc include $(RTE_SDK)/mk/rte.vars.mk diff --git a/examples/multi_process/symmetric_mp/Makefile b/examples/multi_process/symmetric_mp/Makefile index c789f3c9..77d90c68 100644 --- a/examples/multi_process/symmetric_mp/Makefile +++ b/examples/multi_process/symmetric_mp/Makefile @@ -33,7 +33,7 @@ ifeq ($(RTE_SDK),) $(error "Please define RTE_SDK environment variable") endif -# Default target, can be overriden by command line or environment +# Default target, can be overridden by command line or environment RTE_TARGET ?= x86_64-native-linuxapp-gcc include $(RTE_SDK)/mk/rte.vars.mk diff --git a/examples/multi_process/symmetric_mp/main.c b/examples/multi_process/symmetric_mp/main.c index 0990d965..0f497910 100644 --- a/examples/multi_process/symmetric_mp/main.c +++ b/examples/multi_process/symmetric_mp/main.c @@ -61,7 +61,6 @@ #include <rte_eal.h> #include <rte_per_lcore.h> #include <rte_lcore.h> -#include <rte_debug.h> #include <rte_atomic.h> #include <rte_branch_prediction.h> #include <rte_debug.h> @@ -229,6 +228,8 @@ smp_port_init(uint8_t port, struct rte_mempool *mbuf_pool, uint16_t num_queues) struct rte_eth_dev_info info; int retval; uint16_t q; + uint16_t nb_rxd = RX_RING_SIZE; + uint16_t nb_txd = TX_RING_SIZE; if (rte_eal_process_type() == RTE_PROC_SECONDARY) return 0; @@ -246,8 +247,12 @@ smp_port_init(uint8_t port, struct rte_mempool *mbuf_pool, uint16_t num_queues) if (retval < 0) return retval; + retval = rte_eth_dev_adjust_nb_rx_tx_desc(port, &nb_rxd, &nb_txd); + if (retval < 0) + return retval; + for (q = 0; q < rx_rings; q ++) { - retval = rte_eth_rx_queue_setup(port, q, RX_RING_SIZE, + retval = rte_eth_rx_queue_setup(port, q, nb_rxd, rte_eth_dev_socket_id(port), &info.default_rxconf, mbuf_pool); @@ -256,7 +261,7 @@ smp_port_init(uint8_t port, struct rte_mempool *mbuf_pool, uint16_t num_queues) } for (q = 0; q < tx_rings; q ++) { - retval = rte_eth_tx_queue_setup(port, q, TX_RING_SIZE, + retval = rte_eth_tx_queue_setup(port, q, nb_txd, rte_eth_dev_socket_id(port), NULL); if (retval < 0) diff --git a/examples/netmap_compat/Makefile b/examples/netmap_compat/Makefile index 52d80869..fd4630af 100644 --- a/examples/netmap_compat/Makefile +++ b/examples/netmap_compat/Makefile @@ -33,7 +33,7 @@ ifeq ($(RTE_SDK),) $(error "Please define RTE_SDK environment variable") endif -# Default target, can be overriden by command line or environment +# Default target, can be overridden by command line or environment RTE_TARGET ?= x86_64-native-linuxapp-gcc include $(RTE_SDK)/mk/rte.vars.mk diff --git a/examples/netmap_compat/bridge/Makefile b/examples/netmap_compat/bridge/Makefile index 1d4ddfff..ce38a345 100644 --- a/examples/netmap_compat/bridge/Makefile +++ b/examples/netmap_compat/bridge/Makefile @@ -33,7 +33,7 @@ ifeq ($(RTE_SDK),) $(error "Please define the RTE_SDK environment variable") endif -# Default target, can be overriden by command line or environment +# Default target, can be overridden by command line or environment RTE_TARGET ?= x86_64-native-linuxapp-gcc include $(RTE_SDK)/mk/rte.vars.mk diff --git a/examples/netmap_compat/lib/compat_netmap.c b/examples/netmap_compat/lib/compat_netmap.c index 112c551f..af2d9f3f 100644 --- a/examples/netmap_compat/lib/compat_netmap.c +++ b/examples/netmap_compat/lib/compat_netmap.c @@ -168,7 +168,7 @@ mbuf_to_slot(struct rte_mbuf *mbuf, struct netmap_ring *r, uint32_t index) /** * Given a Netmap ring and a slot index for that ring, construct a dpdk mbuf * from the data held in the buffer associated with the slot. - * Allocation/deallocation of the dpdk mbuf are the responsability of the + * Allocation/deallocation of the dpdk mbuf are the responsibility of the * caller. * Note that mbuf chains are not supported. */ @@ -719,6 +719,15 @@ rte_netmap_init_port(uint8_t portid, const struct rte_netmap_port_conf *conf) return ret; } + ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &rx_slots, &tx_slots); + + if (ret < 0) { + RTE_LOG(ERR, USER1, + "Couldn't ot adjust number of descriptors for port %hhu\n", + portid); + return ret; + } + for (i = 0; i < conf->nr_tx_rings; i++) { ret = rte_eth_tx_queue_setup(portid, i, tx_slots, conf->socket_id, NULL); diff --git a/examples/packet_ordering/main.c b/examples/packet_ordering/main.c index 49ae35b8..b26c33df 100644 --- a/examples/packet_ordering/main.c +++ b/examples/packet_ordering/main.c @@ -290,6 +290,8 @@ configure_eth_port(uint8_t port_id) const uint8_t nb_ports = rte_eth_dev_count(); int ret; uint16_t q; + uint16_t nb_rxd = RX_DESC_PER_QUEUE; + uint16_t nb_txd = TX_DESC_PER_QUEUE; if (port_id > nb_ports) return -1; @@ -298,8 +300,12 @@ configure_eth_port(uint8_t port_id) if (ret != 0) return ret; + ret = rte_eth_dev_adjust_nb_rx_tx_desc(port_id, &nb_rxd, &nb_txd); + if (ret != 0) + return ret; + for (q = 0; q < rxRings; q++) { - ret = rte_eth_rx_queue_setup(port_id, q, RX_DESC_PER_QUEUE, + ret = rte_eth_rx_queue_setup(port_id, q, nb_rxd, rte_eth_dev_socket_id(port_id), NULL, mbuf_pool); if (ret < 0) @@ -307,7 +313,7 @@ configure_eth_port(uint8_t port_id) } for (q = 0; q < txRings; q++) { - ret = rte_eth_tx_queue_setup(port_id, q, TX_DESC_PER_QUEUE, + ret = rte_eth_tx_queue_setup(port_id, q, nb_txd, rte_eth_dev_socket_id(port_id), NULL); if (ret < 0) return ret; diff --git a/examples/performance-thread/Makefile b/examples/performance-thread/Makefile index d19f8489..0c5edfdb 100644 --- a/examples/performance-thread/Makefile +++ b/examples/performance-thread/Makefile @@ -38,8 +38,8 @@ RTE_TARGET ?= x86_64-native-linuxapp-gcc include $(RTE_SDK)/mk/rte.vars.mk -ifneq ($(CONFIG_RTE_ARCH),"x86_64") -$(error This application is only supported for x86_64 targets) +ifeq ($(filter y,$(CONFIG_RTE_ARCH_X86_64) $(CONFIG_RTE_ARCH_ARM64)),) +$(error This application is only supported for x86_64 and arm64 targets) endif DIRS-y += l3fwd-thread diff --git a/examples/performance-thread/common/arch/arm64/ctx.c b/examples/performance-thread/common/arch/arm64/ctx.c new file mode 100644 index 00000000..d0eacaa6 --- /dev/null +++ b/examples/performance-thread/common/arch/arm64/ctx.c @@ -0,0 +1,90 @@ +/* + * BSD LICENSE + * + * Copyright (C) Cavium, Inc. 2017. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium, Inc nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <rte_common.h> +#include <ctx.h> + +void +ctx_switch(struct ctx *new_ctx __rte_unused, struct ctx *curr_ctx __rte_unused) +{ + /* SAVE CURRENT CONTEXT */ + asm volatile ( + /* Save SP */ + "mov x3, sp\n" + "str x3, [x1, #0]\n" + + /* Save FP and LR */ + "stp x29, x30, [x1, #8]\n" + + /* Save Callee Saved Regs x19 - x28 */ + "stp x19, x20, [x1, #24]\n" + "stp x21, x22, [x1, #40]\n" + "stp x23, x24, [x1, #56]\n" + "stp x25, x26, [x1, #72]\n" + "stp x27, x28, [x1, #88]\n" + + /* + * Save bottom 64-bits of Callee Saved + * SIMD Regs v8 - v15 + */ + "stp d8, d9, [x1, #104]\n" + "stp d10, d11, [x1, #120]\n" + "stp d12, d13, [x1, #136]\n" + "stp d14, d15, [x1, #152]\n" + ); + + /* RESTORE NEW CONTEXT */ + asm volatile ( + /* Restore SP */ + "ldr x3, [x0, #0]\n" + "mov sp, x3\n" + + /* Restore FP and LR */ + "ldp x29, x30, [x0, #8]\n" + + /* Restore Callee Saved Regs x19 - x28 */ + "ldp x19, x20, [x0, #24]\n" + "ldp x21, x22, [x0, #40]\n" + "ldp x23, x24, [x0, #56]\n" + "ldp x25, x26, [x0, #72]\n" + "ldp x27, x28, [x0, #88]\n" + + /* + * Restore bottom 64-bits of Callee Saved + * SIMD Regs v8 - v15 + */ + "ldp d8, d9, [x0, #104]\n" + "ldp d10, d11, [x0, #120]\n" + "ldp d12, d13, [x0, #136]\n" + "ldp d14, d15, [x0, #152]\n" + ); +} diff --git a/examples/performance-thread/common/arch/arm64/ctx.h b/examples/performance-thread/common/arch/arm64/ctx.h new file mode 100644 index 00000000..38c86ce6 --- /dev/null +++ b/examples/performance-thread/common/arch/arm64/ctx.h @@ -0,0 +1,83 @@ +/* + * BSD LICENSE + * + * Copyright (C) Cavium, Inc. 2017. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium, Inc nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef CTX_H +#define CTX_H + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * CPU context registers + */ +struct ctx { + void *sp; /* 0 */ + void *fp; /* 8 */ + void *lr; /* 16 */ + + /* Callee Saved Generic Registers */ + void *r19; /* 24 */ + void *r20; /* 32 */ + void *r21; /* 40 */ + void *r22; /* 48 */ + void *r23; /* 56 */ + void *r24; /* 64 */ + void *r25; /* 72 */ + void *r26; /* 80 */ + void *r27; /* 88 */ + void *r28; /* 96 */ + + /* + * Callee Saved SIMD Registers. Only the bottom 64-bits + * of these registers needs to be saved. + */ + void *v8; /* 104 */ + void *v9; /* 112 */ + void *v10; /* 120 */ + void *v11; /* 128 */ + void *v12; /* 136 */ + void *v13; /* 144 */ + void *v14; /* 152 */ + void *v15; /* 160 */ +}; + + +void +ctx_switch(struct ctx *new_ctx, struct ctx *curr_ctx); + + +#ifdef __cplusplus +} +#endif + +#endif /* RTE_CTX_H_ */ diff --git a/examples/performance-thread/common/arch/arm64/stack.h b/examples/performance-thread/common/arch/arm64/stack.h new file mode 100644 index 00000000..fa3b31e9 --- /dev/null +++ b/examples/performance-thread/common/arch/arm64/stack.h @@ -0,0 +1,84 @@ +/* + * BSD LICENSE + * + * Copyright (C) Cavium, Inc. 2017. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium, Inc nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef STACK_H +#define STACK_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "lthread_int.h" + +/* + * Sets up the initial stack for the lthread. + */ +static inline void +arch_set_stack(struct lthread *lt, void *func) +{ + void **stack_top = (void *)((char *)(lt->stack) + lt->stack_size); + + /* + * Align stack_top to 16 bytes. Arm64 has the constraint that the + * stack pointer must always be quad-word aligned. + */ + stack_top = (void **)(((unsigned long)(stack_top)) & ~0xfUL); + + /* + * First Stack Frame + */ + stack_top[0] = NULL; + stack_top[-1] = NULL; + + /* + * Initialize the context + */ + lt->ctx.fp = &stack_top[-1]; + lt->ctx.sp = &stack_top[-2]; + + /* + * Here only the address of _lthread_exec is saved as the link + * register value. The argument to _lthread_exec i.e the address of + * the lthread struct is not saved. This is because the first + * argument to ctx_switch is the address of the new context, + * which also happens to be the address of required lthread struct. + * So while returning from ctx_switch into _thread_exec, parameter + * register x0 will always contain the required value. + */ + lt->ctx.lr = func; +} + +#ifdef __cplusplus +} +#endif + +#endif /* STACK_H_ */ diff --git a/examples/performance-thread/common/arch/x86/stack.h b/examples/performance-thread/common/arch/x86/stack.h new file mode 100644 index 00000000..98723ba3 --- /dev/null +++ b/examples/performance-thread/common/arch/x86/stack.h @@ -0,0 +1,94 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. All rights reserved. + * Copyright(c) Cavium, Inc. 2017. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Some portions of this software is derived from the + * https://github.com/halayli/lthread which carrys the following license. + * + * Copyright (C) 2012, Hasan Alayli <halayli@gmail.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + + +#ifndef STACK_H +#define STACK_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "lthread_int.h" + +/* + * Sets up the initial stack for the lthread. + */ +static inline void +arch_set_stack(struct lthread *lt, void *func) +{ + char *stack_top = (char *)(lt->stack) + lt->stack_size; + void **s = (void **)stack_top; + + /* set initial context */ + s[-3] = NULL; + s[-2] = (void *)lt; + lt->ctx.rsp = (void *)(stack_top - (4 * sizeof(void *))); + lt->ctx.rbp = (void *)(stack_top - (3 * sizeof(void *))); + lt->ctx.rip = func; +} + +#ifdef __cplusplus +} +#endif + +#endif /* STACK_H_ */ diff --git a/examples/performance-thread/common/common.mk b/examples/performance-thread/common/common.mk index f6cab771..f1f05fdd 100644 --- a/examples/performance-thread/common/common.mk +++ b/examples/performance-thread/common/common.mk @@ -37,8 +37,14 @@ MKFILE_PATH=$(abspath $(dir $(lastword $(MAKEFILE_LIST)))) -VPATH := $(MKFILE_PATH) $(MKFILE_PATH)/arch/x86 +ifeq ($(CONFIG_RTE_ARCH_X86_64),y) +ARCH_PATH += $(MKFILE_PATH)/arch/x86 +else ifeq ($(CONFIG_RTE_ARCH_ARM64),y) +ARCH_PATH += $(MKFILE_PATH)/arch/arm64 +endif + +VPATH := $(MKFILE_PATH) $(ARCH_PATH) SRCS-y += lthread.c lthread_sched.c lthread_cond.c lthread_tls.c lthread_mutex.c lthread_diag.c ctx.c -INCLUDES += -I$(MKFILE_PATH) -I$(MKFILE_PATH)/arch/x86/ +INCLUDES += -I$(MKFILE_PATH) -I$(ARCH_PATH) diff --git a/examples/performance-thread/common/lthread.c b/examples/performance-thread/common/lthread.c index 062275a4..7d76c8c4 100644 --- a/examples/performance-thread/common/lthread.c +++ b/examples/performance-thread/common/lthread.c @@ -76,6 +76,7 @@ #include <rte_log.h> #include <ctx.h> +#include <stack.h> #include "lthread_api.h" #include "lthread.h" @@ -190,19 +191,11 @@ _lthread_init(struct lthread *lt, */ void _lthread_set_stack(struct lthread *lt, void *stack, size_t stack_size) { - char *stack_top = (char *)stack + stack_size; - void **s = (void **)stack_top; - /* set stack */ lt->stack = stack; lt->stack_size = stack_size; - /* set initial context */ - s[-3] = NULL; - s[-2] = (void *)lt; - lt->ctx.rsp = (void *)(stack_top - (4 * sizeof(void *))); - lt->ctx.rbp = (void *)(stack_top - (3 * sizeof(void *))); - lt->ctx.rip = (void *)_lthread_exec; + arch_set_stack(lt, _lthread_exec); } /* diff --git a/examples/performance-thread/common/lthread_int.h b/examples/performance-thread/common/lthread_int.h index 3f7fb92d..e1da2462 100644 --- a/examples/performance-thread/common/lthread_int.h +++ b/examples/performance-thread/common/lthread_int.h @@ -59,7 +59,6 @@ * SUCH DAMAGE. */ #ifndef LTHREAD_INT_H -#include <lthread_api.h> #define LTHREAD_INT_H #ifdef __cplusplus diff --git a/examples/performance-thread/common/lthread_mutex.c b/examples/performance-thread/common/lthread_mutex.c index c1bc6271..c06d3d51 100644 --- a/examples/performance-thread/common/lthread_mutex.c +++ b/examples/performance-thread/common/lthread_mutex.c @@ -173,7 +173,7 @@ int lthread_mutex_lock(struct lthread_mutex *m) return 0; } -/* try to lock a mutex but dont block */ +/* try to lock a mutex but don't block */ int lthread_mutex_trylock(struct lthread_mutex *m) { struct lthread *lt = THIS_LTHREAD; diff --git a/examples/performance-thread/common/lthread_pool.h b/examples/performance-thread/common/lthread_pool.h index fb0c578b..315a2e21 100644 --- a/examples/performance-thread/common/lthread_pool.h +++ b/examples/performance-thread/common/lthread_pool.h @@ -174,7 +174,7 @@ _qnode_pool_create(const char *name, int prealloc_size) { /* * Insert a node into the pool */ -static inline void __attribute__ ((always_inline)) +static __rte_always_inline void _qnode_pool_insert(struct qnode_pool *p, struct qnode *n) { n->next = NULL; @@ -198,7 +198,7 @@ _qnode_pool_insert(struct qnode_pool *p, struct qnode *n) * last item from the queue incurs the penalty of an atomic exchange. Since the * pool is maintained with a bulk pre-allocation the cost of this is amortised. */ -static inline struct qnode *__attribute__ ((always_inline)) +static __rte_always_inline struct qnode * _pool_remove(struct qnode_pool *p) { struct qnode *head; @@ -239,7 +239,7 @@ _pool_remove(struct qnode_pool *p) * This adds a retry to the _pool_remove function * defined above */ -static inline struct qnode *__attribute__ ((always_inline)) +static __rte_always_inline struct qnode * _qnode_pool_remove(struct qnode_pool *p) { struct qnode *n; @@ -259,7 +259,7 @@ _qnode_pool_remove(struct qnode_pool *p) * Allocate a node from the pool * If the pool is empty add mode nodes */ -static inline struct qnode *__attribute__ ((always_inline)) +static __rte_always_inline struct qnode * _qnode_alloc(void) { struct qnode_pool *p = (THIS_SCHED)->qnode_pool; @@ -304,7 +304,7 @@ _qnode_alloc(void) /* * free a queue node to the per scheduler pool from which it came */ -static inline void __attribute__ ((always_inline)) +static __rte_always_inline void _qnode_free(struct qnode *n) { struct qnode_pool *p = n->pool; diff --git a/examples/performance-thread/common/lthread_queue.h b/examples/performance-thread/common/lthread_queue.h index 4fc2074e..833ed92b 100644 --- a/examples/performance-thread/common/lthread_queue.h +++ b/examples/performance-thread/common/lthread_queue.h @@ -154,7 +154,7 @@ _lthread_queue_create(const char *name) /** * Return true if the queue is empty */ -static inline int __attribute__ ((always_inline)) +static __rte_always_inline int _lthread_queue_empty(struct lthread_queue *q) { return q->tail == q->head; @@ -185,7 +185,7 @@ RTE_DECLARE_PER_LCORE(struct lthread_sched *, this_sched); * Insert a node into a queue * this implementation is multi producer safe */ -static inline struct qnode *__attribute__ ((always_inline)) +static __rte_always_inline struct qnode * _lthread_queue_insert_mp(struct lthread_queue *q, void *data) { @@ -219,7 +219,7 @@ _lthread_queue_insert_mp(struct lthread_queue * Insert an node into a queue in single producer mode * this implementation is NOT mult producer safe */ -static inline struct qnode *__attribute__ ((always_inline)) +static __rte_always_inline struct qnode * _lthread_queue_insert_sp(struct lthread_queue *q, void *data) { @@ -247,7 +247,7 @@ _lthread_queue_insert_sp(struct lthread_queue /* * Remove a node from a queue */ -static inline void *__attribute__ ((always_inline)) +static __rte_always_inline void * _lthread_queue_poll(struct lthread_queue *q) { void *data = NULL; @@ -278,7 +278,7 @@ _lthread_queue_poll(struct lthread_queue *q) /* * Remove a node from a queue */ -static inline void *__attribute__ ((always_inline)) +static __rte_always_inline void * _lthread_queue_remove(struct lthread_queue *q) { void *data = NULL; diff --git a/examples/performance-thread/common/lthread_sched.c b/examples/performance-thread/common/lthread_sched.c index c64c21ff..98291478 100644 --- a/examples/performance-thread/common/lthread_sched.c +++ b/examples/performance-thread/common/lthread_sched.c @@ -369,8 +369,8 @@ void lthread_scheduler_shutdown_all(void) /* * Resume a suspended lthread */ -static inline void -_lthread_resume(struct lthread *lt) __attribute__ ((always_inline)); +static __rte_always_inline void +_lthread_resume(struct lthread *lt); static inline void _lthread_resume(struct lthread *lt) { struct lthread_sched *sched = THIS_SCHED; diff --git a/examples/performance-thread/common/lthread_sched.h b/examples/performance-thread/common/lthread_sched.h index 7cddda9c..aa2f0c48 100644 --- a/examples/performance-thread/common/lthread_sched.h +++ b/examples/performance-thread/common/lthread_sched.h @@ -112,8 +112,8 @@ static inline uint64_t _sched_now(void) return 1; } -static inline void -_affinitize(void) __attribute__ ((always_inline)); +static __rte_always_inline void +_affinitize(void); static inline void _affinitize(void) { @@ -123,8 +123,8 @@ _affinitize(void) ctx_switch(&(THIS_SCHED)->ctx, <->ctx); } -static inline void -_suspend(void) __attribute__ ((always_inline)); +static __rte_always_inline void +_suspend(void); static inline void _suspend(void) { @@ -136,8 +136,8 @@ _suspend(void) (THIS_SCHED)->nb_blocked_threads--; } -static inline void -_reschedule(void) __attribute__ ((always_inline)); +static __rte_always_inline void +_reschedule(void); static inline void _reschedule(void) { diff --git a/examples/performance-thread/l3fwd-thread/main.c b/examples/performance-thread/l3fwd-thread/main.c index 2d98473e..7954b974 100644 --- a/examples/performance-thread/l3fwd-thread/main.c +++ b/examples/performance-thread/l3fwd-thread/main.c @@ -52,7 +52,6 @@ #include <rte_memcpy.h> #include <rte_memzone.h> #include <rte_eal.h> -#include <rte_per_lcore.h> #include <rte_launch.h> #include <rte_atomic.h> #include <rte_cycles.h> @@ -73,6 +72,7 @@ #include <rte_tcp.h> #include <rte_udp.h> #include <rte_string_fns.h> +#include <rte_pause.h> #include <cmdline_parse.h> #include <cmdline_parse_etheraddr.h> @@ -157,11 +157,7 @@ cb_parse_ptype(__rte_unused uint8_t port, __rte_unused uint16_t queue, * When set to one, optimized forwarding path is enabled. * Note that LPM optimisation path uses SSE4.1 instructions. */ -#if ((APP_LOOKUP_METHOD == APP_LOOKUP_LPM) && !defined(__SSE4_1__)) -#define ENABLE_MULTI_BUFFER_OPTIMIZE 0 -#else #define ENABLE_MULTI_BUFFER_OPTIMIZE 1 -#endif #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) #include <rte_hash.h> @@ -188,10 +184,10 @@ cb_parse_ptype(__rte_unused uint8_t port, __rte_unused uint16_t queue, */ #define NB_MBUF RTE_MAX(\ - (nb_ports*nb_rx_queue*RTE_TEST_RX_DESC_DEFAULT + \ - nb_ports*nb_lcores*MAX_PKT_BURST + \ - nb_ports*n_tx_queue*RTE_TEST_TX_DESC_DEFAULT + \ - nb_lcores*MEMPOOL_CACHE_SIZE), \ + (nb_ports*nb_rx_queue*nb_rxd + \ + nb_ports*nb_lcores*MAX_PKT_BURST + \ + nb_ports*n_tx_queue*nb_txd + \ + nb_lcores*MEMPOOL_CACHE_SIZE), \ (unsigned)8192) #define MAX_PKT_BURST 32 @@ -225,7 +221,7 @@ static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; static uint64_t dest_eth_addr[RTE_MAX_ETHPORTS]; static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; -static __m128i val_eth[RTE_MAX_ETHPORTS]; +static xmm_t val_eth[RTE_MAX_ETHPORTS]; /* replace first 12B of the ethernet header. */ #define MASK_ETH 0x3f @@ -362,13 +358,8 @@ static struct rte_mempool *pktmbuf_pool[NB_SOCKETS]; #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) -#ifdef RTE_MACHINE_CPUFLAG_SSE4_2 #include <rte_hash_crc.h> #define DEFAULT_HASH_FUNC rte_hash_crc -#else -#include <rte_jhash.h> -#define DEFAULT_HASH_FUNC rte_jhash -#endif struct ipv4_5tuple { uint32_t ip_dst; @@ -485,17 +476,10 @@ ipv4_hash_crc(const void *data, __rte_unused uint32_t data_len, t = k->proto; p = (const uint32_t *)&k->port_src; -#ifdef RTE_MACHINE_CPUFLAG_SSE4_2 init_val = rte_hash_crc_4byte(t, init_val); init_val = rte_hash_crc_4byte(k->ip_src, init_val); init_val = rte_hash_crc_4byte(k->ip_dst, init_val); init_val = rte_hash_crc_4byte(*p, init_val); -#else /* RTE_MACHINE_CPUFLAG_SSE4_2 */ - init_val = rte_jhash_1word(t, init_val); - init_val = rte_jhash_1word(k->ip_src, init_val); - init_val = rte_jhash_1word(k->ip_dst, init_val); - init_val = rte_jhash_1word(*p, init_val); -#endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */ return init_val; } @@ -506,16 +490,13 @@ ipv6_hash_crc(const void *data, __rte_unused uint32_t data_len, const union ipv6_5tuple_host *k; uint32_t t; const uint32_t *p; -#ifdef RTE_MACHINE_CPUFLAG_SSE4_2 const uint32_t *ip_src0, *ip_src1, *ip_src2, *ip_src3; const uint32_t *ip_dst0, *ip_dst1, *ip_dst2, *ip_dst3; -#endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */ k = data; t = k->proto; p = (const uint32_t *)&k->port_src; -#ifdef RTE_MACHINE_CPUFLAG_SSE4_2 ip_src0 = (const uint32_t *) k->ip_src; ip_src1 = (const uint32_t *)(k->ip_src + 4); ip_src2 = (const uint32_t *)(k->ip_src + 8); @@ -534,12 +515,6 @@ ipv6_hash_crc(const void *data, __rte_unused uint32_t data_len, init_val = rte_hash_crc_4byte(*ip_dst2, init_val); init_val = rte_hash_crc_4byte(*ip_dst3, init_val); init_val = rte_hash_crc_4byte(*p, init_val); -#else /* RTE_MACHINE_CPUFLAG_SSE4_2 */ - init_val = rte_jhash_1word(t, init_val); - init_val = rte_jhash(k->ip_src, sizeof(uint8_t) * IPV6_ADDR_LEN, init_val); - init_val = rte_jhash(k->ip_dst, sizeof(uint8_t) * IPV6_ADDR_LEN, init_val); - init_val = rte_jhash_1word(*p, init_val); -#endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */ return init_val; } @@ -720,7 +695,7 @@ send_single_packet(struct rte_mbuf *m, uint8_t port) #if ((APP_LOOKUP_METHOD == APP_LOOKUP_LPM) && \ (ENABLE_MULTI_BUFFER_OPTIMIZE == 1)) -static inline __attribute__((always_inline)) void +static __rte_always_inline void send_packetsx4(uint8_t port, struct rte_mbuf *m[], uint32_t num) { @@ -761,12 +736,15 @@ send_packetsx4(uint8_t port, case 0: qconf->tx_mbufs[port].m_table[len + j] = m[j]; j++; + /* fall-through */ case 3: qconf->tx_mbufs[port].m_table[len + j] = m[j]; j++; + /* fall-through */ case 2: qconf->tx_mbufs[port].m_table[len + j] = m[j]; j++; + /* fall-through */ case 1: qconf->tx_mbufs[port].m_table[len + j] = m[j]; j++; @@ -788,12 +766,15 @@ send_packetsx4(uint8_t port, case 0: qconf->tx_mbufs[port].m_table[j] = m[n + j]; j++; + /* fall-through */ case 3: qconf->tx_mbufs[port].m_table[j] = m[n + j]; j++; + /* fall-through */ case 2: qconf->tx_mbufs[port].m_table[j] = m[n + j]; j++; + /* fall-through */ case 1: qconf->tx_mbufs[port].m_table[j] = m[n + j]; j++; @@ -1281,7 +1262,7 @@ simple_ipv6_fwd_8pkts(struct rte_mbuf *m[8], uint8_t portid) } #endif /* APP_LOOKUP_METHOD */ -static inline __attribute__((always_inline)) void +static __rte_always_inline void l3fwd_simple_forward(struct rte_mbuf *m, uint8_t portid) { struct ether_hdr *eth_hdr; @@ -1369,7 +1350,7 @@ l3fwd_simple_forward(struct rte_mbuf *m, uint8_t portid) * If we encounter invalid IPV4 packet, then set destination port for it * to BAD_PORT value. */ -static inline __attribute__((always_inline)) void +static __rte_always_inline void rfc1812_process(struct ipv4_hdr *ipv4_hdr, uint16_t *dp, uint32_t ptype) { uint8_t ihl; @@ -1397,7 +1378,7 @@ rfc1812_process(struct ipv4_hdr *ipv4_hdr, uint16_t *dp, uint32_t ptype) #if ((APP_LOOKUP_METHOD == APP_LOOKUP_LPM) && \ (ENABLE_MULTI_BUFFER_OPTIMIZE == 1)) -static inline __attribute__((always_inline)) uint16_t +static __rte_always_inline uint16_t get_dst_port(struct rte_mbuf *pkt, uint32_t dst_ipv4, uint8_t portid) { uint32_t next_hop; @@ -1598,7 +1579,7 @@ processx4_step3(struct rte_mbuf *pkt[FWDSTEP], uint16_t dst_port[FWDSTEP]) * Suppose we have array of destionation ports: * dst_port[] = {a, b, c, d,, e, ... } * dp1 should contain: <a, b, c, d>, dp2: <b, c, d, e>. - * We doing 4 comparisions at once and the result is 4 bit mask. + * We doing 4 comparisons at once and the result is 4 bit mask. * This mask is used as an index into prebuild array of pnum values. */ static inline uint16_t * @@ -1860,10 +1841,12 @@ process_burst(struct rte_mbuf *pkts_burst[MAX_PKT_BURST], int nb_rx, process_packet(pkts_burst[j], dst_port + j, portid); GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j); j++; + /* fall-through */ case 2: process_packet(pkts_burst[j], dst_port + j, portid); GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j); j++; + /* fall-through */ case 1: process_packet(pkts_burst[j], dst_port + j, portid); GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j); @@ -3587,6 +3570,13 @@ main(int argc, char **argv) rte_exit(EXIT_FAILURE, "Cannot configure device: err=%d, port=%d\n", ret, portid); + ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd, + &nb_txd); + if (ret < 0) + rte_exit(EXIT_FAILURE, + "rte_eth_dev_adjust_nb_rx_tx_desc: err=%d, port=%d\n", + ret, portid); + rte_eth_macaddr_get(portid, &ports_eth_addr[portid]); print_ethaddr(" Address:", &ports_eth_addr[portid]); printf(", "); diff --git a/examples/ptpclient/ptpclient.c b/examples/ptpclient/ptpclient.c index a80961d3..ddfcdb83 100644 --- a/examples/ptpclient/ptpclient.c +++ b/examples/ptpclient/ptpclient.c @@ -210,6 +210,8 @@ port_init(uint8_t port, struct rte_mempool *mbuf_pool) const uint16_t tx_rings = 1; int retval; uint16_t q; + uint16_t nb_rxd = RX_RING_SIZE; + uint16_t nb_txd = TX_RING_SIZE; if (port >= rte_eth_dev_count()) return -1; @@ -219,9 +221,13 @@ port_init(uint8_t port, struct rte_mempool *mbuf_pool) if (retval != 0) return retval; + retval = rte_eth_dev_adjust_nb_rx_tx_desc(port, &nb_rxd, &nb_txd); + if (retval != 0) + return retval; + /* Allocate and set up 1 RX queue per Ethernet port. */ for (q = 0; q < rx_rings; q++) { - retval = rte_eth_rx_queue_setup(port, q, RX_RING_SIZE, + retval = rte_eth_rx_queue_setup(port, q, nb_rxd, rte_eth_dev_socket_id(port), NULL, mbuf_pool); if (retval < 0) @@ -237,7 +243,7 @@ port_init(uint8_t port, struct rte_mempool *mbuf_pool) txconf = &dev_info.default_txconf; txconf->txq_flags = 0; - retval = rte_eth_tx_queue_setup(port, q, TX_RING_SIZE, + retval = rte_eth_tx_queue_setup(port, q, nb_txd, rte_eth_dev_socket_id(port), txconf); if (retval < 0) return retval; diff --git a/examples/qos_meter/Makefile b/examples/qos_meter/Makefile index 5113a129..de1f12ce 100644 --- a/examples/qos_meter/Makefile +++ b/examples/qos_meter/Makefile @@ -33,7 +33,7 @@ ifeq ($(RTE_SDK),) $(error "Please define RTE_SDK environment variable") endif -# Default target, can be overriden by command line or environment +# Default target, can be overridden by command line or environment RTE_TARGET ?= x86_64-native-linuxapp-gcc include $(RTE_SDK)/mk/rte.vars.mk diff --git a/examples/qos_meter/main.c b/examples/qos_meter/main.c index d8a2107d..b0909f6a 100644 --- a/examples/qos_meter/main.c +++ b/examples/qos_meter/main.c @@ -308,6 +308,8 @@ int main(int argc, char **argv) { uint32_t lcore_id; + uint16_t nb_rxd = NIC_RX_QUEUE_DESC; + uint16_t nb_txd = NIC_TX_QUEUE_DESC; int ret; /* EAL init */ @@ -337,13 +339,18 @@ main(int argc, char **argv) if (ret < 0) rte_exit(EXIT_FAILURE, "Port %d configuration error (%d)\n", port_rx, ret); - ret = rte_eth_rx_queue_setup(port_rx, NIC_RX_QUEUE, NIC_RX_QUEUE_DESC, + ret = rte_eth_dev_adjust_nb_rx_tx_desc(port_rx, &nb_rxd, &nb_txd); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Port %d adjust number of descriptors error (%d)\n", + port_rx, ret); + + ret = rte_eth_rx_queue_setup(port_rx, NIC_RX_QUEUE, nb_rxd, rte_eth_dev_socket_id(port_rx), NULL, pool); if (ret < 0) rte_exit(EXIT_FAILURE, "Port %d RX queue setup error (%d)\n", port_rx, ret); - ret = rte_eth_tx_queue_setup(port_rx, NIC_TX_QUEUE, NIC_TX_QUEUE_DESC, + ret = rte_eth_tx_queue_setup(port_rx, NIC_TX_QUEUE, nb_txd, rte_eth_dev_socket_id(port_rx), NULL); if (ret < 0) @@ -353,13 +360,20 @@ main(int argc, char **argv) if (ret < 0) rte_exit(EXIT_FAILURE, "Port %d configuration error (%d)\n", port_tx, ret); - ret = rte_eth_rx_queue_setup(port_tx, NIC_RX_QUEUE, NIC_RX_QUEUE_DESC, + nb_rxd = NIC_RX_QUEUE_DESC; + nb_txd = NIC_TX_QUEUE_DESC; + ret = rte_eth_dev_adjust_nb_rx_tx_desc(port_tx, &nb_rxd, &nb_txd); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Port %d adjust number of descriptors error (%d)\n", + port_tx, ret); + + ret = rte_eth_rx_queue_setup(port_tx, NIC_RX_QUEUE, nb_rxd, rte_eth_dev_socket_id(port_tx), NULL, pool); if (ret < 0) rte_exit(EXIT_FAILURE, "Port %d RX queue setup error (%d)\n", port_tx, ret); - ret = rte_eth_tx_queue_setup(port_tx, NIC_TX_QUEUE, NIC_TX_QUEUE_DESC, + ret = rte_eth_tx_queue_setup(port_tx, NIC_TX_QUEUE, nb_txd, rte_eth_dev_socket_id(port_tx), NULL); if (ret < 0) diff --git a/examples/qos_sched/Makefile b/examples/qos_sched/Makefile index e41ac500..56829c21 100644 --- a/examples/qos_sched/Makefile +++ b/examples/qos_sched/Makefile @@ -33,7 +33,7 @@ ifeq ($(RTE_SDK),) $(error "Please define RTE_SDK environment variable") endif -# Default target, can be overriden by command line or environment +# Default target, can be overridden by command line or environment RTE_TARGET ?= x86_64-native-linuxapp-gcc include $(RTE_SDK)/mk/rte.vars.mk diff --git a/examples/qos_sched/args.c b/examples/qos_sched/args.c index 476a0ee1..2350d64f 100644 --- a/examples/qos_sched/args.c +++ b/examples/qos_sched/args.c @@ -245,6 +245,7 @@ app_parse_flow_conf(const char *conf_str) struct flow_conf *pconf; uint64_t mask; + memset(vals, 0, sizeof(vals)); ret = app_parse_opt_vals(conf_str, ',', 6, vals); if (ret < 4 || ret > 5) return ret; diff --git a/examples/qos_sched/init.c b/examples/qos_sched/init.c index fe0221c6..a82cbd7d 100644 --- a/examples/qos_sched/init.c +++ b/examples/qos_sched/init.c @@ -106,6 +106,8 @@ app_init_port(uint8_t portid, struct rte_mempool *mp) struct rte_eth_link link; struct rte_eth_rxconf rx_conf; struct rte_eth_txconf tx_conf; + uint16_t rx_size; + uint16_t tx_size; /* check if port already initialized (multistream configuration) */ if (app_inited_port_mask & (1u << portid)) @@ -132,6 +134,15 @@ app_init_port(uint8_t portid, struct rte_mempool *mp) rte_exit(EXIT_FAILURE, "Cannot configure device: " "err=%d, port=%"PRIu8"\n", ret, portid); + rx_size = ring_conf.rx_size; + tx_size = ring_conf.tx_size; + ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &rx_size, &tx_size); + if (ret < 0) + rte_exit(EXIT_FAILURE, "rte_eth_dev_adjust_nb_rx_tx_desc: " + "err=%d, port=%"PRIu8"\n", ret, portid); + ring_conf.rx_size = rx_size; + ring_conf.tx_size = tx_size; + /* init one RX queue */ fflush(stdout); ret = rte_eth_rx_queue_setup(portid, 0, (uint16_t)ring_conf.rx_size, diff --git a/examples/qos_sched/main.h b/examples/qos_sched/main.h index c7490c61..8d02e1ad 100644 --- a/examples/qos_sched/main.h +++ b/examples/qos_sched/main.h @@ -69,8 +69,13 @@ extern "C" { #define BURST_TX_DRAIN_US 100 #ifndef APP_MAX_LCORE +#if (RTE_MAX_LCORE > 64) #define APP_MAX_LCORE 64 +#else +#define APP_MAX_LCORE RTE_MAX_LCORE +#endif #endif + #define MAX_DATA_STREAMS (APP_MAX_LCORE/2) #define MAX_SCHED_SUBPORTS 8 #define MAX_SCHED_PIPES 4096 diff --git a/examples/quota_watermark/Makefile b/examples/quota_watermark/Makefile index 17fe473b..40a01fa4 100644 --- a/examples/quota_watermark/Makefile +++ b/examples/quota_watermark/Makefile @@ -33,7 +33,7 @@ ifeq ($(RTE_SDK),) $(error "Please define RTE_SDK environment variable") endif -# Default target, can be overriden by command line or environment +# Default target, can be overridden by command line or environment RTE_TARGET ?= x86_64-native-linuxapp-gcc include $(RTE_SDK)/mk/rte.vars.mk diff --git a/examples/quota_watermark/qw/Makefile b/examples/quota_watermark/qw/Makefile index fac9328d..627897ce 100644 --- a/examples/quota_watermark/qw/Makefile +++ b/examples/quota_watermark/qw/Makefile @@ -33,7 +33,7 @@ ifeq ($(RTE_SDK),) $(error "Please define RTE_SDK environment variable") endif -# Default target, can be overriden by command line or environment +# Default target, can be overridden by command line or environment RTE_TARGET ?= x86_64-native-linuxapp-gcc include $(RTE_SDK)/mk/rte.vars.mk diff --git a/examples/quota_watermark/qw/init.c b/examples/quota_watermark/qw/init.c index b6264fcf..083a37a9 100644 --- a/examples/quota_watermark/qw/init.c +++ b/examples/quota_watermark/qw/init.c @@ -76,6 +76,8 @@ static struct rte_eth_fc_conf fc_conf = { void configure_eth_port(uint8_t port_id) { int ret; + uint16_t nb_rxd = RX_DESC_PER_QUEUE; + uint16_t nb_txd = TX_DESC_PER_QUEUE; rte_eth_dev_stop(port_id); @@ -84,8 +86,14 @@ void configure_eth_port(uint8_t port_id) rte_exit(EXIT_FAILURE, "Cannot configure port %u (error %d)\n", (unsigned int) port_id, ret); + ret = rte_eth_dev_adjust_nb_rx_tx_desc(port_id, &nb_rxd, &nb_txd); + if (ret < 0) + rte_exit(EXIT_FAILURE, + "Cannot adjust number of descriptors for port %u (error %d)\n", + (unsigned int) port_id, ret); + /* Initialize the port's RX queue */ - ret = rte_eth_rx_queue_setup(port_id, 0, RX_DESC_PER_QUEUE, + ret = rte_eth_rx_queue_setup(port_id, 0, nb_rxd, rte_eth_dev_socket_id(port_id), NULL, mbuf_pool); @@ -95,7 +103,7 @@ void configure_eth_port(uint8_t port_id) (unsigned int) port_id, ret); /* Initialize the port's TX queue */ - ret = rte_eth_tx_queue_setup(port_id, 0, TX_DESC_PER_QUEUE, + ret = rte_eth_tx_queue_setup(port_id, 0, nb_txd, rte_eth_dev_socket_id(port_id), NULL); if (ret < 0) diff --git a/examples/quota_watermark/qwctl/Makefile b/examples/quota_watermark/qwctl/Makefile index 1ca2f1e9..e0f0083d 100644 --- a/examples/quota_watermark/qwctl/Makefile +++ b/examples/quota_watermark/qwctl/Makefile @@ -33,7 +33,7 @@ ifeq ($(RTE_SDK),) $(error "Please define RTE_SDK environment variable") endif -# Default target, can be overriden by command line or environment +# Default target, can be overridden by command line or environment RTE_TARGET ?= x86_64-native-linuxapp-gcc include $(RTE_SDK)/mk/rte.vars.mk diff --git a/examples/rxtx_callbacks/main.c b/examples/rxtx_callbacks/main.c index 048b23f5..66992405 100644 --- a/examples/rxtx_callbacks/main.c +++ b/examples/rxtx_callbacks/main.c @@ -101,6 +101,8 @@ port_init(uint8_t port, struct rte_mempool *mbuf_pool) { struct rte_eth_conf port_conf = port_conf_default; const uint16_t rx_rings = 1, tx_rings = 1; + uint16_t nb_rxd = RX_RING_SIZE; + uint16_t nb_txd = TX_RING_SIZE; int retval; uint16_t q; @@ -111,15 +113,19 @@ port_init(uint8_t port, struct rte_mempool *mbuf_pool) if (retval != 0) return retval; + retval = rte_eth_dev_adjust_nb_rx_tx_desc(port, &nb_rxd, &nb_txd); + if (retval != 0) + return retval; + for (q = 0; q < rx_rings; q++) { - retval = rte_eth_rx_queue_setup(port, q, RX_RING_SIZE, + retval = rte_eth_rx_queue_setup(port, q, nb_rxd, rte_eth_dev_socket_id(port), NULL, mbuf_pool); if (retval < 0) return retval; } for (q = 0; q < tx_rings; q++) { - retval = rte_eth_tx_queue_setup(port, q, TX_RING_SIZE, + retval = rte_eth_tx_queue_setup(port, q, nb_txd, rte_eth_dev_socket_id(port), NULL); if (retval < 0) return retval; diff --git a/examples/server_node_efd/node/node.c b/examples/server_node_efd/node/node.c index f780b926..86e57c89 100644 --- a/examples/server_node_efd/node/node.c +++ b/examples/server_node_efd/node/node.c @@ -53,8 +53,6 @@ #include <rte_launch.h> #include <rte_lcore.h> #include <rte_ring.h> -#include <rte_launch.h> -#include <rte_lcore.h> #include <rte_debug.h> #include <rte_mempool.h> #include <rte_mbuf.h> diff --git a/examples/server_node_efd/server/init.c b/examples/server_node_efd/server/init.c index 82457b44..d114e5bf 100644 --- a/examples/server_node_efd/server/init.c +++ b/examples/server_node_efd/server/init.c @@ -130,8 +130,8 @@ init_port(uint8_t port_num) } }; const uint16_t rx_rings = 1, tx_rings = num_nodes; - const uint16_t rx_ring_size = RTE_MP_RX_DESC_DEFAULT; - const uint16_t tx_ring_size = RTE_MP_TX_DESC_DEFAULT; + uint16_t rx_ring_size = RTE_MP_RX_DESC_DEFAULT; + uint16_t tx_ring_size = RTE_MP_TX_DESC_DEFAULT; uint16_t q; int retval; @@ -147,6 +147,11 @@ init_port(uint8_t port_num) if (retval != 0) return retval; + retval = rte_eth_dev_adjust_nb_rx_tx_desc(port_num, &rx_ring_size, + &tx_ring_size); + if (retval != 0) + return retval; + for (q = 0; q < rx_rings; q++) { retval = rte_eth_rx_queue_setup(port_num, q, rx_ring_size, rte_eth_dev_socket_id(port_num), diff --git a/examples/server_node_efd/server/main.c b/examples/server_node_efd/server/main.c index 597b4c25..dcdc0a48 100644 --- a/examples/server_node_efd/server/main.c +++ b/examples/server_node_efd/server/main.c @@ -38,7 +38,6 @@ #include <stdint.h> #include <stdarg.h> #include <inttypes.h> -#include <inttypes.h> #include <sys/queue.h> #include <errno.h> #include <netinet/ip.h> @@ -47,7 +46,6 @@ #include <rte_memory.h> #include <rte_memzone.h> #include <rte_eal.h> -#include <rte_byteorder.h> #include <rte_launch.h> #include <rte_per_lcore.h> #include <rte_lcore.h> diff --git a/examples/skeleton/basicfwd.c b/examples/skeleton/basicfwd.c index c89822cb..b4d50de8 100644 --- a/examples/skeleton/basicfwd.c +++ b/examples/skeleton/basicfwd.c @@ -61,6 +61,8 @@ port_init(uint8_t port, struct rte_mempool *mbuf_pool) { struct rte_eth_conf port_conf = port_conf_default; const uint16_t rx_rings = 1, tx_rings = 1; + uint16_t nb_rxd = RX_RING_SIZE; + uint16_t nb_txd = TX_RING_SIZE; int retval; uint16_t q; @@ -72,9 +74,13 @@ port_init(uint8_t port, struct rte_mempool *mbuf_pool) if (retval != 0) return retval; + retval = rte_eth_dev_adjust_nb_rx_tx_desc(port, &nb_rxd, &nb_txd); + if (retval != 0) + return retval; + /* Allocate and set up 1 RX queue per Ethernet port. */ for (q = 0; q < rx_rings; q++) { - retval = rte_eth_rx_queue_setup(port, q, RX_RING_SIZE, + retval = rte_eth_rx_queue_setup(port, q, nb_rxd, rte_eth_dev_socket_id(port), NULL, mbuf_pool); if (retval < 0) return retval; @@ -82,7 +88,7 @@ port_init(uint8_t port, struct rte_mempool *mbuf_pool) /* Allocate and set up 1 TX queue per Ethernet port. */ for (q = 0; q < tx_rings; q++) { - retval = rte_eth_tx_queue_setup(port, q, TX_RING_SIZE, + retval = rte_eth_tx_queue_setup(port, q, nb_txd, rte_eth_dev_socket_id(port), NULL); if (retval < 0) return retval; diff --git a/examples/tep_termination/main.c b/examples/tep_termination/main.c index cd6e3f1c..aee36c6e 100644 --- a/examples/tep_termination/main.c +++ b/examples/tep_termination/main.c @@ -50,6 +50,7 @@ #include <rte_string_fns.h> #include <rte_malloc.h> #include <rte_vhost.h> +#include <rte_pause.h> #include "main.h" #include "vxlan.h" @@ -559,7 +560,7 @@ check_ports_num(unsigned max_nb_ports) * This function routes the TX packet to the correct interface. This may be a local device * or the physical port. */ -static inline void __attribute__((always_inline)) +static __rte_always_inline void virtio_tx_route(struct vhost_dev *vdev, struct rte_mbuf *m) { struct mbuf_table *tx_q; diff --git a/examples/tep_termination/vxlan_setup.c b/examples/tep_termination/vxlan_setup.c index b57c0451..050bb32d 100644 --- a/examples/tep_termination/vxlan_setup.c +++ b/examples/tep_termination/vxlan_setup.c @@ -135,8 +135,8 @@ vxlan_port_init(uint8_t port, struct rte_mempool *mbuf_pool) uint16_t q; struct rte_eth_dev_info dev_info; uint16_t rx_rings, tx_rings = (uint16_t)rte_lcore_count(); - const uint16_t rx_ring_size = RTE_TEST_RX_DESC_DEFAULT; - const uint16_t tx_ring_size = RTE_TEST_TX_DESC_DEFAULT; + uint16_t rx_ring_size = RTE_TEST_RX_DESC_DEFAULT; + uint16_t tx_ring_size = RTE_TEST_TX_DESC_DEFAULT; struct rte_eth_udp_tunnel tunnel_udp; struct rte_eth_rxconf *rxconf; struct rte_eth_txconf *txconf; @@ -166,6 +166,11 @@ vxlan_port_init(uint8_t port, struct rte_mempool *mbuf_pool) if (retval != 0) return retval; + retval = rte_eth_dev_adjust_nb_rx_tx_desc(port, &rx_ring_size, + &tx_ring_size); + if (retval != 0) + return retval; + /* Setup the queues. */ for (q = 0; q < rx_rings; q++) { retval = rte_eth_rx_queue_setup(port, q, rx_ring_size, diff --git a/examples/timer/Makefile b/examples/timer/Makefile index af12b7ba..7db48ec6 100644 --- a/examples/timer/Makefile +++ b/examples/timer/Makefile @@ -33,7 +33,7 @@ ifeq ($(RTE_SDK),) $(error "Please define RTE_SDK environment variable") endif -# Default target, can be overriden by command line or environment +# Default target, can be overridden by command line or environment RTE_TARGET ?= x86_64-native-linuxapp-gcc include $(RTE_SDK)/mk/rte.vars.mk diff --git a/examples/vhost/Makefile b/examples/vhost/Makefile index af7be99a..add9f27b 100644 --- a/examples/vhost/Makefile +++ b/examples/vhost/Makefile @@ -33,7 +33,7 @@ ifeq ($(RTE_SDK),) $(error "Please define RTE_SDK environment variable") endif -# Default target, can be overriden by command line or environment +# Default target, can be overridden by command line or environment RTE_TARGET ?= x86_64-native-linuxapp-gcc include $(RTE_SDK)/mk/rte.vars.mk diff --git a/examples/vhost/main.c b/examples/vhost/main.c index e07f8669..4d1589d0 100644 --- a/examples/vhost/main.c +++ b/examples/vhost/main.c @@ -52,6 +52,7 @@ #include <rte_vhost.h> #include <rte_ip.h> #include <rte_tcp.h> +#include <rte_pause.h> #include "main.h" @@ -338,6 +339,19 @@ port_init(uint8_t port) return retval; } + retval = rte_eth_dev_adjust_nb_rx_tx_desc(port, &rx_ring_size, + &tx_ring_size); + if (retval != 0) { + RTE_LOG(ERR, VHOST_PORT, "Failed to adjust number of descriptors " + "for port %u: %s.\n", port, strerror(-retval)); + return retval; + } + if (rx_ring_size > RTE_TEST_RX_DESC_DEFAULT) { + RTE_LOG(ERR, VHOST_PORT, "Mbuf pool has an insufficient size " + "for Rx queues on port %u.\n", port); + return -1; + } + /* Setup the queues. */ for (q = 0; q < rx_rings; q ++) { retval = rte_eth_rx_queue_setup(port, q, rx_ring_size, @@ -691,7 +705,7 @@ static unsigned check_ports_num(unsigned nb_ports) return valid_num_ports; } -static inline struct vhost_dev *__attribute__((always_inline)) +static __rte_always_inline struct vhost_dev * find_vhost_dev(struct ether_addr *mac) { struct vhost_dev *vdev; @@ -791,7 +805,7 @@ unlink_vmdq(struct vhost_dev *vdev) } } -static inline void __attribute__((always_inline)) +static __rte_always_inline void virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev *src_vdev, struct rte_mbuf *m) { @@ -815,7 +829,7 @@ virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev *src_vdev, * Check if the packet destination MAC address is for a local device. If so then put * the packet on that devices RX queue. If not then return. */ -static inline int __attribute__((always_inline)) +static __rte_always_inline int virtio_tx_local(struct vhost_dev *vdev, struct rte_mbuf *m) { struct ether_hdr *pkt_hdr; @@ -851,7 +865,7 @@ virtio_tx_local(struct vhost_dev *vdev, struct rte_mbuf *m) * Check if the destination MAC of a packet is one local VM, * and get its vlan tag, and offset if it is. */ -static inline int __attribute__((always_inline)) +static __rte_always_inline int find_local_dest(struct vhost_dev *vdev, struct rte_mbuf *m, uint32_t *offset, uint16_t *vlan_tag) { @@ -919,7 +933,7 @@ free_pkts(struct rte_mbuf **pkts, uint16_t n) rte_pktmbuf_free(pkts[n]); } -static inline void __attribute__((always_inline)) +static __rte_always_inline void do_drain_mbuf_table(struct mbuf_table *tx_q) { uint16_t count; @@ -936,7 +950,7 @@ do_drain_mbuf_table(struct mbuf_table *tx_q) * This function routes the TX packet to the correct interface. This * may be a local device or the physical port. */ -static inline void __attribute__((always_inline)) +static __rte_always_inline void virtio_tx_route(struct vhost_dev *vdev, struct rte_mbuf *m, uint16_t vlan_tag) { struct mbuf_table *tx_q; @@ -1024,7 +1038,7 @@ queue2nic: } -static inline void __attribute__((always_inline)) +static __rte_always_inline void drain_mbuf_table(struct mbuf_table *tx_q) { static uint64_t prev_tsc; @@ -1044,7 +1058,7 @@ drain_mbuf_table(struct mbuf_table *tx_q) } } -static inline void __attribute__((always_inline)) +static __rte_always_inline void drain_eth_rx(struct vhost_dev *vdev) { uint16_t rx_count, enqueue_count; @@ -1088,7 +1102,7 @@ drain_eth_rx(struct vhost_dev *vdev) free_pkts(pkts, rx_count); } -static inline void __attribute__((always_inline)) +static __rte_always_inline void drain_virtio_tx(struct vhost_dev *vdev) { struct rte_mbuf *pkts[MAX_PKT_BURST]; diff --git a/examples/vhost/virtio_net.c b/examples/vhost/virtio_net.c index 5e1ed44a..1ab57f52 100644 --- a/examples/vhost/virtio_net.c +++ b/examples/vhost/virtio_net.c @@ -80,7 +80,7 @@ vs_vhost_net_remove(struct vhost_dev *dev) free(dev->mem); } -static inline int __attribute__((always_inline)) +static __rte_always_inline int enqueue_pkt(struct vhost_dev *dev, struct rte_vhost_vring *vr, struct rte_mbuf *m, uint16_t desc_idx) { @@ -217,7 +217,7 @@ vs_enqueue_pkts(struct vhost_dev *dev, uint16_t queue_id, return count; } -static inline int __attribute__((always_inline)) +static __rte_always_inline int dequeue_pkt(struct vhost_dev *dev, struct rte_vhost_vring *vr, struct rte_mbuf *m, uint16_t desc_idx, struct rte_mempool *mbuf_pool) diff --git a/examples/vhost_scsi/Makefile b/examples/vhost_scsi/Makefile new file mode 100644 index 00000000..0306a6ae --- /dev/null +++ b/examples/vhost_scsi/Makefile @@ -0,0 +1,59 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2017 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overridden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +ifneq ($(CONFIG_RTE_EXEC_ENV),"linuxapp") +$(info This application can only operate in a linuxapp environment, \ +please change the definition of the RTE_TARGET environment variable) +all: +else + +# binary name +APP = vhost-scsi + +# all source are stored in SRCS-y +SRCS-y := scsi.c vhost_scsi.c + +CFLAGS += -O2 -D_FILE_OFFSET_BITS=64 +CFLAGS += $(WERROR_FLAGS) +CFLAGS += -D_GNU_SOURCE + +include $(RTE_SDK)/mk/rte.extapp.mk + +endif diff --git a/examples/vhost_scsi/scsi.c b/examples/vhost_scsi/scsi.c new file mode 100644 index 00000000..54d3104e --- /dev/null +++ b/examples/vhost_scsi/scsi.c @@ -0,0 +1,539 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2017 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * This work is largely based on the "vhost-user-scsi" implementation by + * SPDK(https://github.com/spdk/spdk). + */ + +#include <stdio.h> +#include <stdint.h> +#include <unistd.h> +#include <assert.h> +#include <ctype.h> +#include <string.h> +#include <stddef.h> + +#include <rte_atomic.h> +#include <rte_cycles.h> +#include <rte_log.h> +#include <rte_malloc.h> +#include <rte_byteorder.h> + +#include "vhost_scsi.h" +#include "scsi_spec.h" + +#define INQ_OFFSET(field) (offsetof(struct scsi_cdb_inquiry_data, field) + \ + sizeof(((struct scsi_cdb_inquiry_data *)0x0)->field)) + +static void +vhost_strcpy_pad(void *dst, const char *src, size_t size, int pad) +{ + size_t len; + + len = strlen(src); + if (len < size) { + memcpy(dst, src, len); + memset((char *)dst + len, pad, size - len); + } else { + memcpy(dst, src, size); + } +} + +static int +vhost_hex2bin(char ch) +{ + if ((ch >= '0') && (ch <= '9')) + return ch - '0'; + ch = tolower(ch); + if ((ch >= 'a') && (ch <= 'f')) + return ch - 'a' + 10; + return (int)ch; +} + +static void +vhost_bdev_scsi_set_naa_ieee_extended(const char *name, uint8_t *buf) +{ + int i, value, count = 0; + uint64_t *temp64, local_value; + + for (i = 0; (i < 16) && (name[i] != '\0'); i++) { + value = vhost_hex2bin(name[i]); + if (i % 2) + buf[count++] |= value << 4; + else + buf[count] = value; + } + + local_value = *(uint64_t *)buf; + /* + * see spc3r23 7.6.3.6.2, + * NAA IEEE Extended identifer format + */ + local_value &= 0x0fff000000ffffffull; + /* NAA 02, and 00 03 47 for IEEE Intel */ + local_value |= 0x2000000347000000ull; + + temp64 = (uint64_t *)buf; + *temp64 = rte_cpu_to_be_64(local_value); +} + +static void +scsi_task_build_sense_data(struct vhost_scsi_task *task, int sk, + int asc, int ascq) +{ + uint8_t *cp; + int resp_code; + + resp_code = 0x70; /* Current + Fixed format */ + + /* Sense Data */ + cp = (uint8_t *)task->resp->sense; + + /* VALID(7) RESPONSE CODE(6-0) */ + cp[0] = 0x80 | resp_code; + /* Obsolete */ + cp[1] = 0; + /* FILEMARK(7) EOM(6) ILI(5) SENSE KEY(3-0) */ + cp[2] = sk & 0xf; + /* INFORMATION */ + memset(&cp[3], 0, 4); + + /* ADDITIONAL SENSE LENGTH */ + cp[7] = 10; + + /* COMMAND-SPECIFIC INFORMATION */ + memset(&cp[8], 0, 4); + /* ADDITIONAL SENSE CODE */ + cp[12] = asc; + /* ADDITIONAL SENSE CODE QUALIFIER */ + cp[13] = ascq; + /* FIELD REPLACEABLE UNIT CODE */ + cp[14] = 0; + + /* SKSV(7) SENSE KEY SPECIFIC(6-0,7-0,7-0) */ + cp[15] = 0; + cp[16] = 0; + cp[17] = 0; + + /* SenseLength */ + task->resp->sense_len = 18; +} + +static void +scsi_task_set_status(struct vhost_scsi_task *task, int sc, int sk, + int asc, int ascq) +{ + if (sc == SCSI_STATUS_CHECK_CONDITION) + scsi_task_build_sense_data(task, sk, asc, ascq); + task->resp->status = sc; +} + +static int +vhost_bdev_scsi_inquiry_command(struct vhost_block_dev *bdev, + struct vhost_scsi_task *task) +{ + int hlen = 0; + uint32_t alloc_len = 0; + uint16_t len = 0; + uint16_t *temp16; + int pc; + int pd; + int evpd; + int i; + uint8_t *buf; + struct scsi_cdb_inquiry *inq; + + inq = (struct scsi_cdb_inquiry *)task->req->cdb; + + assert(task->iovs_cnt == 1); + + /* At least 36Bytes for inquiry command */ + if (task->data_len < 0x24) + goto inq_error; + + pd = SPC_PERIPHERAL_DEVICE_TYPE_DISK; + pc = inq->page_code; + evpd = inq->evpd & 0x1; + + if (!evpd && pc) + goto inq_error; + + if (evpd) { + struct scsi_vpd_page *vpage = (struct scsi_vpd_page *) + task->iovs[0].iov_base; + + /* PERIPHERAL QUALIFIER(7-5) PERIPHERAL DEVICE TYPE(4-0) */ + vpage->peripheral = pd; + /* PAGE CODE */ + vpage->page_code = pc; + + switch (pc) { + case SPC_VPD_SUPPORTED_VPD_PAGES: + hlen = 4; + vpage->params[0] = SPC_VPD_SUPPORTED_VPD_PAGES; + vpage->params[1] = SPC_VPD_UNIT_SERIAL_NUMBER; + vpage->params[2] = SPC_VPD_DEVICE_IDENTIFICATION; + len = 3; + /* PAGE LENGTH */ + vpage->alloc_len = rte_cpu_to_be_16(len); + break; + case SPC_VPD_UNIT_SERIAL_NUMBER: + hlen = 4; + strncpy((char *)vpage->params, bdev->name, 32); + vpage->alloc_len = rte_cpu_to_be_16(32); + break; + case SPC_VPD_DEVICE_IDENTIFICATION: + buf = vpage->params; + struct scsi_desig_desc *desig; + + hlen = 4; + /* NAA designator */ + desig = (struct scsi_desig_desc *)buf; + desig->code_set = SPC_VPD_CODE_SET_BINARY; + desig->protocol_id = SPC_PROTOCOL_IDENTIFIER_ISCSI; + desig->type = SPC_VPD_IDENTIFIER_TYPE_NAA; + desig->association = SPC_VPD_ASSOCIATION_LOGICAL_UNIT; + desig->reserved0 = 0; + desig->piv = 1; + desig->reserved1 = 0; + desig->len = 8; + vhost_bdev_scsi_set_naa_ieee_extended(bdev->name, + desig->desig); + len = sizeof(struct scsi_desig_desc) + 8; + + buf += sizeof(struct scsi_desig_desc) + desig->len; + + /* T10 Vendor ID designator */ + desig = (struct scsi_desig_desc *)buf; + desig->code_set = SPC_VPD_CODE_SET_ASCII; + desig->protocol_id = SPC_PROTOCOL_IDENTIFIER_ISCSI; + desig->type = SPC_VPD_IDENTIFIER_TYPE_T10_VENDOR_ID; + desig->association = SPC_VPD_ASSOCIATION_LOGICAL_UNIT; + desig->reserved0 = 0; + desig->piv = 1; + desig->reserved1 = 0; + desig->len = 8 + 16 + 32; + strncpy((char *)desig->desig, "INTEL", 8); + vhost_strcpy_pad((char *)&desig->desig[8], + bdev->product_name, 16, ' '); + strncpy((char *)&desig->desig[24], bdev->name, 32); + len += sizeof(struct scsi_desig_desc) + 8 + 16 + 32; + + buf += sizeof(struct scsi_desig_desc) + desig->len; + + /* SCSI Device Name designator */ + desig = (struct scsi_desig_desc *)buf; + desig->code_set = SPC_VPD_CODE_SET_UTF8; + desig->protocol_id = SPC_PROTOCOL_IDENTIFIER_ISCSI; + desig->type = SPC_VPD_IDENTIFIER_TYPE_SCSI_NAME; + desig->association = SPC_VPD_ASSOCIATION_TARGET_DEVICE; + desig->reserved0 = 0; + desig->piv = 1; + desig->reserved1 = 0; + desig->len = snprintf((char *)desig->desig, + 255, "%s", bdev->name); + len += sizeof(struct scsi_desig_desc) + desig->len; + + buf += sizeof(struct scsi_desig_desc) + desig->len; + vpage->alloc_len = rte_cpu_to_be_16(len); + break; + default: + goto inq_error; + } + + } else { + struct scsi_cdb_inquiry_data *inqdata = + (struct scsi_cdb_inquiry_data *)task->iovs[0].iov_base; + /* Standard INQUIRY data */ + /* PERIPHERAL QUALIFIER(7-5) PERIPHERAL DEVICE TYPE(4-0) */ + inqdata->peripheral = pd; + + /* RMB(7) */ + inqdata->rmb = 0; + + /* VERSION */ + /* See SPC3/SBC2/MMC4/SAM2 for more details */ + inqdata->version = SPC_VERSION_SPC3; + + /* NORMACA(5) HISUP(4) RESPONSE DATA FORMAT(3-0) */ + /* format 2 */ /* hierarchical support */ + inqdata->response = 2 | 1 << 4; + + hlen = 5; + + /* SCCS(7) ACC(6) TPGS(5-4) 3PC(3) PROTECT(0) */ + /* Not support TPGS */ + inqdata->flags = 0; + + /* MULTIP */ + inqdata->flags2 = 0x10; + + /* WBUS16(5) SYNC(4) LINKED(3) CMDQUE(1) VS(0) */ + /* CMDQUE */ + inqdata->flags3 = 0x2; + + /* T10 VENDOR IDENTIFICATION */ + strncpy((char *)inqdata->t10_vendor_id, "INTEL", 8); + + /* PRODUCT IDENTIFICATION */ + strncpy((char *)inqdata->product_id, bdev->product_name, 16); + + /* PRODUCT REVISION LEVEL */ + strncpy((char *)inqdata->product_rev, "0001", 4); + + /* Standard inquiry data ends here. Only populate + * remaining fields if alloc_len indicates enough + * space to hold it. + */ + len = INQ_OFFSET(product_rev) - 5; + + if (alloc_len >= INQ_OFFSET(vendor)) { + /* Vendor specific */ + memset(inqdata->vendor, 0x20, 20); + len += sizeof(inqdata->vendor); + } + + if (alloc_len >= INQ_OFFSET(ius)) { + /* CLOCKING(3-2) QAS(1) IUS(0) */ + inqdata->ius = 0; + len += sizeof(inqdata->ius); + } + + if (alloc_len >= INQ_OFFSET(reserved)) { + /* Reserved */ + inqdata->reserved = 0; + len += sizeof(inqdata->reserved); + } + + /* VERSION DESCRIPTOR 1-8 */ + if (alloc_len >= INQ_OFFSET(reserved) + 2) { + temp16 = (uint16_t *)&inqdata->desc[0]; + *temp16 = rte_cpu_to_be_16(0x0960); + len += 2; + } + + if (alloc_len >= INQ_OFFSET(reserved) + 4) { + /* SPC-3 (no version claimed) */ + temp16 = (uint16_t *)&inqdata->desc[2]; + *temp16 = rte_cpu_to_be_16(0x0300); + len += 2; + } + + if (alloc_len >= INQ_OFFSET(reserved) + 6) { + /* SBC-2 (no version claimed) */ + temp16 = (uint16_t *)&inqdata->desc[4]; + *temp16 = rte_cpu_to_be_16(0x0320); + len += 2; + } + + if (alloc_len >= INQ_OFFSET(reserved) + 8) { + /* SAM-2 (no version claimed) */ + temp16 = (uint16_t *)&inqdata->desc[6]; + *temp16 = rte_cpu_to_be_16(0x0040); + len += 2; + } + + if (alloc_len > INQ_OFFSET(reserved) + 8) { + i = alloc_len - (INQ_OFFSET(reserved) + 8); + if (i > 30) + i = 30; + memset(&inqdata->desc[8], 0, i); + len += i; + } + + /* ADDITIONAL LENGTH */ + inqdata->add_len = len; + } + + /* STATUS GOOD */ + scsi_task_set_status(task, SCSI_STATUS_GOOD, 0, 0, 0); + return hlen + len; + +inq_error: + scsi_task_set_status(task, SCSI_STATUS_CHECK_CONDITION, + SCSI_SENSE_ILLEGAL_REQUEST, + SCSI_ASC_INVALID_FIELD_IN_CDB, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + return 0; +} + +static int +vhost_bdev_scsi_readwrite(struct vhost_block_dev *bdev, + struct vhost_scsi_task *task, + uint64_t lba, __rte_unused uint32_t xfer_len) +{ + uint32_t i; + uint64_t offset; + uint32_t nbytes = 0; + + offset = lba * bdev->blocklen; + + for (i = 0; i < task->iovs_cnt; i++) { + if (task->dxfer_dir == SCSI_DIR_TO_DEV) + memcpy(bdev->data + offset, task->iovs[i].iov_base, + task->iovs[i].iov_len); + else + memcpy(task->iovs[i].iov_base, bdev->data + offset, + task->iovs[i].iov_len); + offset += task->iovs[i].iov_len; + nbytes += task->iovs[i].iov_len; + } + + return nbytes; +} + +static int +vhost_bdev_scsi_process_block(struct vhost_block_dev *bdev, + struct vhost_scsi_task *task) +{ + uint64_t lba, *temp64; + uint32_t xfer_len, *temp32; + uint16_t *temp16; + uint8_t *cdb = (uint8_t *)task->req->cdb; + + switch (cdb[0]) { + case SBC_READ_6: + case SBC_WRITE_6: + lba = (uint64_t)cdb[1] << 16; + lba |= (uint64_t)cdb[2] << 8; + lba |= (uint64_t)cdb[3]; + xfer_len = cdb[4]; + if (xfer_len == 0) + xfer_len = 256; + return vhost_bdev_scsi_readwrite(bdev, task, lba, xfer_len); + + case SBC_READ_10: + case SBC_WRITE_10: + temp32 = (uint32_t *)&cdb[2]; + lba = rte_be_to_cpu_32(*temp32); + temp16 = (uint16_t *)&cdb[7]; + xfer_len = rte_be_to_cpu_16(*temp16); + return vhost_bdev_scsi_readwrite(bdev, task, lba, xfer_len); + + case SBC_READ_12: + case SBC_WRITE_12: + temp32 = (uint32_t *)&cdb[2]; + lba = rte_be_to_cpu_32(*temp32); + temp32 = (uint32_t *)&cdb[6]; + xfer_len = rte_be_to_cpu_32(*temp32); + return vhost_bdev_scsi_readwrite(bdev, task, lba, xfer_len); + + case SBC_READ_16: + case SBC_WRITE_16: + temp64 = (uint64_t *)&cdb[2]; + lba = rte_be_to_cpu_64(*temp64); + temp32 = (uint32_t *)&cdb[10]; + xfer_len = rte_be_to_cpu_32(*temp32); + return vhost_bdev_scsi_readwrite(bdev, task, lba, xfer_len); + + case SBC_READ_CAPACITY_10: { + uint8_t buffer[8]; + + if (bdev->blockcnt - 1 > 0xffffffffULL) + memset(buffer, 0xff, 4); + else { + temp32 = (uint32_t *)buffer; + *temp32 = rte_cpu_to_be_32(bdev->blockcnt - 1); + } + temp32 = (uint32_t *)&buffer[4]; + *temp32 = rte_cpu_to_be_32(bdev->blocklen); + memcpy(task->iovs[0].iov_base, buffer, sizeof(buffer)); + task->resp->status = SCSI_STATUS_GOOD; + return sizeof(buffer); + } + + case SBC_SYNCHRONIZE_CACHE_10: + case SBC_SYNCHRONIZE_CACHE_16: + task->resp->status = SCSI_STATUS_GOOD; + return 0; + } + + scsi_task_set_status(task, SCSI_STATUS_CHECK_CONDITION, + SCSI_SENSE_ILLEGAL_REQUEST, + SCSI_ASC_INVALID_FIELD_IN_CDB, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + return 0; +} + +int +vhost_bdev_process_scsi_commands(struct vhost_block_dev *bdev, + struct vhost_scsi_task *task) +{ + int len; + uint8_t *data; + uint64_t *temp64, fmt_lun = 0; + uint32_t *temp32; + const uint8_t *lun; + uint8_t *cdb = (uint8_t *)task->req->cdb; + + lun = (const uint8_t *)task->req->lun; + /* only 1 LUN supported */ + if (lun[0] != 1 || lun[1] >= 1) + return -1; + + switch (cdb[0]) { + case SPC_INQUIRY: + len = vhost_bdev_scsi_inquiry_command(bdev, task); + task->data_len = len; + break; + case SPC_REPORT_LUNS: + data = (uint8_t *)task->iovs[0].iov_base; + fmt_lun |= (0x0ULL & 0x00ffULL) << 48; + temp64 = (uint64_t *)&data[8]; + *temp64 = rte_cpu_to_be_64(fmt_lun); + temp32 = (uint32_t *)data; + *temp32 = rte_cpu_to_be_32(8); + task->data_len = 16; + scsi_task_set_status(task, SCSI_STATUS_GOOD, 0, 0, 0); + break; + case SPC_MODE_SELECT_6: + case SPC_MODE_SELECT_10: + /* don't support it now */ + scsi_task_set_status(task, SCSI_STATUS_GOOD, 0, 0, 0); + break; + case SPC_MODE_SENSE_6: + case SPC_MODE_SENSE_10: + /* don't support it now */ + scsi_task_set_status(task, SCSI_STATUS_GOOD, 0, 0, 0); + break; + case SPC_TEST_UNIT_READY: + scsi_task_set_status(task, SCSI_STATUS_GOOD, 0, 0, 0); + break; + default: + len = vhost_bdev_scsi_process_block(bdev, task); + task->data_len = len; + } + + return 0; +} diff --git a/examples/vhost_scsi/scsi_spec.h b/examples/vhost_scsi/scsi_spec.h new file mode 100644 index 00000000..60d761cb --- /dev/null +++ b/examples/vhost_scsi/scsi_spec.h @@ -0,0 +1,493 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * SCSI specification definition + * refer http://www.t10.org/drafts.htm#SPC_Family for SPC-3 and SBC-3 + */ + +#ifndef _SCSI_SPEC_H +#define _SCSI_SPEC_H + +#include <stdint.h> + +enum scsi_group_code { + SCSI_6BYTE_CMD = 0x00, + SCSI_10BYTE_CMD = 0x20, + SCSI_10BYTE_CMD2 = 0x40, + SCSI_16BYTE_CMD = 0x80, + SCSI_12BYTE_CMD = 0xa0, +}; + +#define SCSI_GROUP_MASK 0xe0 +#define SCSI_OPCODE_MASK 0x1f + +enum scsi_status { + SCSI_STATUS_GOOD = 0x00, + SCSI_STATUS_CHECK_CONDITION = 0x02, + SCSI_STATUS_CONDITION_MET = 0x04, + SCSI_STATUS_BUSY = 0x08, + SCSI_STATUS_INTERMEDIATE = 0x10, + SCSI_STATUS_INTERMEDIATE_CONDITION_MET = 0x14, + SCSI_STATUS_RESERVATION_CONFLICT = 0x18, + SCSI_STATUS_Obsolete = 0x22, + SCSI_STATUS_TASK_SET_FULL = 0x28, + SCSI_STATUS_ACA_ACTIVE = 0x30, + SCSI_STATUS_TASK_ABORTED = 0x40, +}; + +enum scsi_sense { + SCSI_SENSE_NO_SENSE = 0x00, + SCSI_SENSE_RECOVERED_ERROR = 0x01, + SCSI_SENSE_NOT_READY = 0x02, + SCSI_SENSE_MEDIUM_ERROR = 0x03, + SCSI_SENSE_HARDWARE_ERROR = 0x04, + SCSI_SENSE_ILLEGAL_REQUEST = 0x05, + SCSI_SENSE_UNIT_ATTENTION = 0x06, + SCSI_SENSE_DATA_PROTECT = 0x07, + SCSI_SENSE_BLANK_CHECK = 0x08, + SCSI_SENSE_VENDOR_SPECIFIC = 0x09, + SCSI_SENSE_COPY_ABORTED = 0x0a, + SCSI_SENSE_ABORTED_COMMAND = 0x0b, + SCSI_SENSE_VOLUME_OVERFLOW = 0x0d, + SCSI_SENSE_MISCOMPARE = 0x0e, +}; + +enum scsi_asc { + SCSI_ASC_NO_ADDITIONAL_SENSE = 0x00, + SCSI_ASC_PERIPHERAL_DEVICE_WRITE_FAULT = 0x03, + SCSI_ASC_LOGICAL_UNIT_NOT_READY = 0x04, + SCSI_ASC_WARNING = 0x0b, + SCSI_ASC_LOGICAL_BLOCK_GUARD_CHECK_FAILED = 0x10, + SCSI_ASC_LOGICAL_BLOCK_APP_TAG_CHECK_FAILED = 0x10, + SCSI_ASC_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED = 0x10, + SCSI_ASC_UNRECOVERED_READ_ERROR = 0x11, + SCSI_ASC_MISCOMPARE_DURING_VERIFY_OPERATION = 0x1d, + SCSI_ASC_INVALID_COMMAND_OPERATION_CODE = 0x20, + SCSI_ASC_ACCESS_DENIED = 0x20, + SCSI_ASC_LOGICAL_BLOCK_ADDRESS_OUT_OF_RANGE = 0x21, + SCSI_ASC_INVALID_FIELD_IN_CDB = 0x24, + SCSI_ASC_LOGICAL_UNIT_NOT_SUPPORTED = 0x25, + SCSI_ASC_WRITE_PROTECTED = 0x27, + SCSI_ASC_FORMAT_COMMAND_FAILED = 0x31, + SCSI_ASC_INTERNAL_TARGET_FAILURE = 0x44, +}; + +enum scsi_ascq { + SCSI_ASCQ_CAUSE_NOT_REPORTABLE = 0x00, + SCSI_ASCQ_BECOMING_READY = 0x01, + SCSI_ASCQ_FORMAT_COMMAND_FAILED = 0x01, + SCSI_ASCQ_LOGICAL_BLOCK_GUARD_CHECK_FAILED = 0x01, + SCSI_ASCQ_LOGICAL_BLOCK_APP_TAG_CHECK_FAILED = 0x02, + SCSI_ASCQ_NO_ACCESS_RIGHTS = 0x02, + SCSI_ASCQ_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED = 0x03, + SCSI_ASCQ_POWER_LOSS_EXPECTED = 0x08, + SCSI_ASCQ_INVALID_LU_IDENTIFIER = 0x09, +}; + +enum spc_opcode { + /* SPC3 related */ + SPC_ACCESS_CONTROL_IN = 0x86, + SPC_ACCESS_CONTROL_OUT = 0x87, + SPC_EXTENDED_COPY = 0x83, + SPC_INQUIRY = 0x12, + SPC_LOG_SELECT = 0x4c, + SPC_LOG_SENSE = 0x4d, + SPC_MODE_SELECT_6 = 0x15, + SPC_MODE_SELECT_10 = 0x55, + SPC_MODE_SENSE_6 = 0x1a, + SPC_MODE_SENSE_10 = 0x5a, + SPC_PERSISTENT_RESERVE_IN = 0x5e, + SPC_PERSISTENT_RESERVE_OUT = 0x5f, + SPC_PREVENT_ALLOW_MEDIUM_REMOVAL = 0x1e, + SPC_READ_ATTRIBUTE = 0x8c, + SPC_READ_BUFFER = 0x3c, + SPC_RECEIVE_COPY_RESULTS = 0x84, + SPC_RECEIVE_DIAGNOSTIC_RESULTS = 0x1c, + SPC_REPORT_LUNS = 0xa0, + SPC_REQUEST_SENSE = 0x03, + SPC_SEND_DIAGNOSTIC = 0x1d, + SPC_TEST_UNIT_READY = 0x00, + SPC_WRITE_ATTRIBUTE = 0x8d, + SPC_WRITE_BUFFER = 0x3b, + + SPC_SERVICE_ACTION_IN_12 = 0xab, + SPC_SERVICE_ACTION_OUT_12 = 0xa9, + SPC_SERVICE_ACTION_IN_16 = 0x9e, + SPC_SERVICE_ACTION_OUT_16 = 0x9f, + + SPC_VARIABLE_LENGTH = 0x7f, + + SPC_MO_CHANGE_ALIASES = 0x0b, + SPC_MO_SET_DEVICE_IDENTIFIER = 0x06, + SPC_MO_SET_PRIORITY = 0x0e, + SPC_MO_SET_TARGET_PORT_GROUPS = 0x0a, + SPC_MO_SET_TIMESTAMP = 0x0f, + SPC_MI_REPORT_ALIASES = 0x0b, + SPC_MI_REPORT_DEVICE_IDENTIFIER = 0x05, + SPC_MI_REPORT_PRIORITY = 0x0e, + SPC_MI_REPORT_SUPPORTED_OPERATION_CODES = 0x0c, + SPC_MI_REPORT_SUPPORTED_TASK_MANAGEMENT_FUNCTIONS = 0x0d, + SPC_MI_REPORT_TARGET_PORT_GROUPS = 0x0a, + SPC_MI_REPORT_TIMESTAMP = 0x0f, + + /* SPC2 related (Obsolete) */ + SPC2_RELEASE_6 = 0x17, + SPC2_RELEASE_10 = 0x57, + SPC2_RESERVE_6 = 0x16, + SPC2_RESERVE_10 = 0x56, +}; + +enum scc_opcode { + SCC_MAINTENANCE_IN = 0xa3, + SCC_MAINTENANCE_OUT = 0xa4, +}; + +enum sbc_opcode { + SBC_COMPARE_AND_WRITE = 0x89, + SBC_FORMAT_UNIT = 0x04, + SBC_GET_LBA_STATUS = 0x0012009e, + SBC_ORWRITE_16 = 0x8b, + SBC_PRE_FETCH_10 = 0x34, + SBC_PRE_FETCH_16 = 0x90, + SBC_READ_6 = 0x08, + SBC_READ_10 = 0x28, + SBC_READ_12 = 0xa8, + SBC_READ_16 = 0x88, + SBC_READ_ATTRIBUTE = 0x8c, + SBC_READ_BUFFER = 0x3c, + SBC_READ_CAPACITY_10 = 0x25, + SBC_READ_DEFECT_DATA_10 = 0x37, + SBC_READ_DEFECT_DATA_12 = 0xb7, + SBC_READ_LONG_10 = 0x3e, + SBC_REASSIGN_BLOCKS = 0x07, + SBC_SANITIZE = 0x48, + SBC_START_STOP_UNIT = 0x1b, + SBC_SYNCHRONIZE_CACHE_10 = 0x35, + SBC_SYNCHRONIZE_CACHE_16 = 0x91, + SBC_UNMAP = 0x42, + SBC_VERIFY_10 = 0x2f, + SBC_VERIFY_12 = 0xaf, + SBC_VERIFY_16 = 0x8f, + SBC_WRITE_6 = 0x0a, + SBC_WRITE_10 = 0x2a, + SBC_WRITE_12 = 0xaa, + SBC_WRITE_16 = 0x8a, + SBC_WRITE_AND_VERIFY_10 = 0x2e, + SBC_WRITE_AND_VERIFY_12 = 0xae, + SBC_WRITE_AND_VERIFY_16 = 0x8e, + SBC_WRITE_LONG_10 = 0x3f, + SBC_WRITE_SAME_10 = 0x41, + SBC_WRITE_SAME_16 = 0x93, + SBC_XDREAD_10 = 0x52, + SBC_XDWRITE_10 = 0x50, + SBC_XDWRITEREAD_10 = 0x53, + SBC_XPWRITE_10 = 0x51, + + SBC_SAI_READ_CAPACITY_16 = 0x10, + SBC_SAI_READ_LONG_16 = 0x11, + SBC_SAO_WRITE_LONG_16 = 0x11, + + SBC_VL_READ_32 = 0x0009, + SBC_VL_VERIFY_32 = 0x000a, + SBC_VL_WRITE_32 = 0x000b, + SBC_VL_WRITE_AND_VERIFY_32 = 0x000c, + SBC_VL_WRITE_SAME_32 = 0x000d, + SBC_VL_XDREAD_32 = 0x0003, + SBC_VL_XDWRITE_32 = 0x0004, + SBC_VL_XDWRITEREAD_32 = 0x0007, + SBC_VL_XPWRITE_32 = 0x0006, +}; + +enum mmc_opcode { + /* MMC6 */ + MMC_READ_DISC_STRUCTURE = 0xad, + + /* MMC4 */ + MMC_BLANK = 0xa1, + MMC_CLOSE_TRACK_SESSION = 0x5b, + MMC_ERASE_10 = 0x2c, + MMC_FORMAT_UNIT = 0x04, + MMC_GET_CONFIGURATION = 0x46, + MMC_GET_EVENT_STATUS_NOTIFICATION = 0x4a, + MMC_GET_PERFORMANCE = 0xac, + MMC_INQUIRY = 0x12, + MMC_LOAD_UNLOAD_MEDIUM = 0xa6, + MMC_MECHANISM_STATUS = 0xbd, + MMC_MODE_SELECT_10 = 0x55, + MMC_MODE_SENSE_10 = 0x5a, + MMC_PAUSE_RESUME = 0x4b, + MMC_PLAY_AUDIO_10 = 0x45, + MMC_PLAY_AUDIO_12 = 0xa5, + MMC_PLAY_AUDIO_MSF = 0x47, + MMC_PREVENT_ALLOW_MEDIUM_REMOVAL = 0x1e, + MMC_READ_10 = 0x28, + MMC_READ_12 = 0xa8, + MMC_READ_BUFFER = 0x3c, + MMC_READ_BUFFER_CAPACITY = 0x5c, + MMC_READ_CAPACITY = 0x25, + MMC_READ_CD = 0xbe, + MMC_READ_CD_MSF = 0xb9, + MMC_READ_DISC_INFORMATION = 0x51, + MMC_READ_DVD_STRUCTURE = 0xad, + MMC_READ_FORMAT_CAPACITIES = 0x23, + MMC_READ_SUB_CHANNEL = 0x42, + MMC_READ_TOC_PMA_ATIP = 0x43, + MMC_READ_TRACK_INFORMATION = 0x52, + MMC_REPAIR_TRACK = 0x58, + MMC_REPORT_KEY = 0xa4, + MMC_REQUEST_SENSE = 0x03, + MMC_RESERVE_TRACK = 0x53, + MMC_SCAN = 0xba, + MMC_SEEK_10 = 0x2b, + MMC_SEND_CUE_SHEET = 0x5d, + MMC_SEND_DVD_STRUCTURE = 0xbf, + MMC_SEND_KEY = 0xa3, + MMC_SEND_OPC_INFORMATION = 0x54, + MMC_SET_CD_SPEED = 0xbb, + MMC_SET_READ_AHEAD = 0xa7, + MMC_SET_STREAMING = 0xb6, + MMC_START_STOP_UNIT = 0x1b, + MMC_STOP_PLAY_SCAN = 0x4e, + MMC_SYNCHRONIZE_CACHE = 0x35, + MMC_TEST_UNIT_READY = 0x00, + MMC_VERIFY_10 = 0x2f, + MMC_WRITE_10 = 0xa2, + MMC_WRITE_12 = 0xaa, + MMC_WRITE_AND_VERIFY_10 = 0x2e, + MMC_WRITE_BUFFER = 0x3b, +}; + +enum ssc_opcode { + SSC_ERASE_6 = 0x19, + SSC_FORMAT_MEDIUM = 0x04, + SSC_LOAD_UNLOAD = 0x1b, + SSC_LOCATE_10 = 0x2b, + SSC_LOCATE_16 = 0x92, + SSC_MOVE_MEDIUM_ATTACHED = 0xa7, + SSC_READ_6 = 0x08, + SSC_READ_BLOCK_LIMITS = 0x05, + SSC_READ_ELEMENT_STATUS_ATTACHED = 0xb4, + SSC_READ_POSITION = 0x34, + SSC_READ_REVERSE_6 = 0x0f, + SSC_RECOVER_BUFFERED_DATA = 0x14, + SSC_REPORT_DENSITY_SUPPORT = 0x44, + SSC_REWIND = 0x01, + SSC_SET_CAPACITY = 0x0b, + SSC_SPACE_6 = 0x11, + SSC_SPACE_16 = 0x91, + SSC_VERIFY_6 = 0x13, + SSC_WRITE_6 = 0x0a, + SSC_WRITE_FILEMARKS_6 = 0x10, +}; + +enum spc_vpd { + SPC_VPD_DEVICE_IDENTIFICATION = 0x83, + SPC_VPD_EXTENDED_INQUIRY_DATA = 0x86, + SPC_VPD_MANAGEMENT_NETWORK_ADDRESSES = 0x85, + SPC_VPD_MODE_PAGE_POLICY = 0x87, + SPC_VPD_SCSI_PORTS = 0x88, + SPC_VPD_SOFTWARE_INTERFACE_IDENTIFICATION = 0x84, + SPC_VPD_SUPPORTED_VPD_PAGES = 0x00, + SPC_VPD_UNIT_SERIAL_NUMBER = 0x80, + SPC_VPD_BLOCK_LIMITS = 0xb0, + SPC_VPD_BLOCK_DEV_CHARS = 0xb1, + SPC_VPD_BLOCK_THIN_PROVISION = 0xb2, +}; + +enum { + SPC_PERIPHERAL_DEVICE_TYPE_DISK = 0x00, + SPC_PERIPHERAL_DEVICE_TYPE_TAPE = 0x01, + SPC_PERIPHERAL_DEVICE_TYPE_DVD = 0x05, + SPC_PERIPHERAL_DEVICE_TYPE_CHANGER = 0x08, + + SPC_VERSION_NONE = 0x00, + SPC_VERSION_SPC = 0x03, + SPC_VERSION_SPC2 = 0x04, + SPC_VERSION_SPC3 = 0x05, + SPC_VERSION_SPC4 = 0x06, + + SPC_PROTOCOL_IDENTIFIER_FC = 0x00, + SPC_PROTOCOL_IDENTIFIER_PSCSI = 0x01, + SPC_PROTOCOL_IDENTIFIER_SSA = 0x02, + SPC_PROTOCOL_IDENTIFIER_IEEE1394 = 0x03, + SPC_PROTOCOL_IDENTIFIER_RDMA = 0x04, + SPC_PROTOCOL_IDENTIFIER_ISCSI = 0x05, + SPC_PROTOCOL_IDENTIFIER_SAS = 0x06, + SPC_PROTOCOL_IDENTIFIER_ADT = 0x07, + SPC_PROTOCOL_IDENTIFIER_ATA = 0x08, + + SPC_VPD_CODE_SET_BINARY = 0x01, + SPC_VPD_CODE_SET_ASCII = 0x02, + SPC_VPD_CODE_SET_UTF8 = 0x03, + + SPC_VPD_ASSOCIATION_LOGICAL_UNIT = 0x00, + SPC_VPD_ASSOCIATION_TARGET_PORT = 0x01, + SPC_VPD_ASSOCIATION_TARGET_DEVICE = 0x02, + + SPC_VPD_IDENTIFIER_TYPE_VENDOR_SPECIFIC = 0x00, + SPC_VPD_IDENTIFIER_TYPE_T10_VENDOR_ID = 0x01, + SPC_VPD_IDENTIFIER_TYPE_EUI64 = 0x02, + SPC_VPD_IDENTIFIER_TYPE_NAA = 0x03, + SPC_VPD_IDENTIFIER_TYPE_RELATIVE_TARGET_PORT = 0x04, + SPC_VPD_IDENTIFIER_TYPE_TARGET_PORT_GROUP = 0x05, + SPC_VPD_IDENTIFIER_TYPE_LOGICAL_UNIT_GROUP = 0x06, + SPC_VPD_IDENTIFIER_TYPE_MD5_LOGICAL_UNIT = 0x07, + SPC_VPD_IDENTIFIER_TYPE_SCSI_NAME = 0x08, +}; + +struct scsi_cdb_inquiry { + uint8_t opcode; + uint8_t evpd; + uint8_t page_code; + uint16_t alloc_len; + uint8_t control; +}; + +struct scsi_cdb_inquiry_data { + uint8_t peripheral; + uint8_t rmb; + uint8_t version; + uint8_t response; + uint8_t add_len; + uint8_t flags; + uint8_t flags2; + uint8_t flags3; + uint8_t t10_vendor_id[8]; + uint8_t product_id[16]; + uint8_t product_rev[4]; + uint8_t vendor[20]; + uint8_t ius; + uint8_t reserved; + uint8_t desc[]; +}; + +struct scsi_vpd_page { + uint8_t peripheral; + uint8_t page_code; + uint16_t alloc_len; + uint8_t params[]; +}; + +#define SCSI_VEXT_REF_CHK 0x01 +#define SCSI_VEXT_APP_CHK 0x02 +#define SCSI_VEXT_GRD_CHK 0x04 +#define SCSI_VEXT_SIMPSUP 0x01 +#define SCSI_VEXT_ORDSUP 0x02 +#define SCSI_VEXT_HEADSUP 0x04 +#define SCSI_VEXT_PRIOR_SUP 0x08 +#define SCSI_VEXT_GROUP_SUP 0x10 +#define SCSI_VEXT_UASK_SUP 0x20 +#define SCSI_VEXT_V_SUP 0x01 +#define SCSI_VEXT_NV_SUP 0x02 +#define SCSI_VEXT_CRD_SUP 0x04 +#define SCSI_VEXT_WU_SUP 0x08 + +struct scsi_vpd_ext_inquiry { + uint8_t peripheral; + uint8_t page_code; + uint16_t alloc_len; + uint8_t check; + uint8_t sup; + uint8_t sup2; + uint8_t luiclr; + uint8_t cbcs; + uint8_t micro_dl; + uint8_t reserved[54]; +}; + +#define SPC_VPD_DESIG_PIV 0x80 + +/* designation descriptor */ +struct scsi_desig_desc { + uint8_t code_set : 4; + uint8_t protocol_id : 4; + uint8_t type : 4; + uint8_t association : 2; + uint8_t reserved0 : 1; + uint8_t piv : 1; + uint8_t reserved1; + uint8_t len; + uint8_t desig[]; +}; + +/* mode page policy descriptor */ +struct scsi_mpage_policy_desc { + uint8_t page_code; + uint8_t sub_page_code; + uint8_t policy; + uint8_t reserved; +}; + +/* target port descriptor */ +struct scsi_tgt_port_desc { + uint8_t code_set; + uint8_t desig_type; + uint8_t reserved; + uint8_t len; + uint8_t designator[]; +}; + +/* SCSI port designation descriptor */ +struct scsi_port_desc { + uint16_t reserved; + uint16_t rel_port_id; + uint16_t reserved2; + uint16_t init_port_len; + uint16_t init_port_id; + uint16_t reserved3; + uint16_t tgt_desc_len; + uint8_t tgt_desc[]; +}; + +/* SCSI UNMAP block descriptor */ +struct scsi_unmap_bdesc { + /* UNMAP LOGICAL BLOCK ADDRESS */ + uint64_t lba; + + /* NUMBER OF LOGICAL BLOCKS */ + uint32_t block_count; + + /* RESERVED */ + uint32_t reserved; +}; + +#define SCSI_UNMAP_LBPU (1 << 7) +#define SCSI_UNMAP_LBPWS (1 << 6) +#define SCSI_UNMAP_LBPWS10 (1 << 5) + +#define SCSI_UNMAP_FULL_PROVISIONING 0x00 +#define SCSI_UNMAP_RESOURCE_PROVISIONING 0x01 +#define SCSI_UNMAP_THIN_PROVISIONING 0x02 + +#endif /* _SCSI_SPEC_H */ diff --git a/examples/vhost_scsi/vhost_scsi.c b/examples/vhost_scsi/vhost_scsi.c new file mode 100644 index 00000000..b4f1f8d2 --- /dev/null +++ b/examples/vhost_scsi/vhost_scsi.c @@ -0,0 +1,474 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2017 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdint.h> +#include <unistd.h> +#include <stdbool.h> +#include <signal.h> +#include <assert.h> +#include <semaphore.h> +#include <linux/virtio_scsi.h> +#include <linux/virtio_ring.h> + +#include <rte_atomic.h> +#include <rte_cycles.h> +#include <rte_log.h> +#include <rte_malloc.h> +#include <rte_vhost.h> + +#include "vhost_scsi.h" +#include "scsi_spec.h" + +#define VIRTIO_SCSI_FEATURES ((1 << VIRTIO_F_NOTIFY_ON_EMPTY) |\ + (1 << VIRTIO_RING_F_EVENT_IDX) |\ + (1 << VIRTIO_SCSI_F_INOUT) |\ + (1 << VIRTIO_SCSI_F_CHANGE)) + +/* Path to folder where character device will be created. Can be set by user. */ +static char dev_pathname[PATH_MAX] = ""; + +static struct vhost_scsi_ctrlr *g_vhost_ctrlr; +static int g_should_stop; +static sem_t exit_sem; + +static struct vhost_scsi_ctrlr * +vhost_scsi_ctrlr_find(__rte_unused const char *ctrlr_name) +{ + /* currently we only support 1 socket file fd */ + return g_vhost_ctrlr; +} + +static uint64_t gpa_to_vva(int vid, uint64_t gpa) +{ + char path[PATH_MAX]; + struct vhost_scsi_ctrlr *ctrlr; + int ret = 0; + + ret = rte_vhost_get_ifname(vid, path, PATH_MAX); + if (ret) { + fprintf(stderr, "Cannot get socket name\n"); + assert(ret != 0); + } + + ctrlr = vhost_scsi_ctrlr_find(path); + if (!ctrlr) { + fprintf(stderr, "Controller is not ready\n"); + assert(ctrlr != NULL); + } + + assert(ctrlr->mem != NULL); + + return rte_vhost_gpa_to_vva(ctrlr->mem, gpa); +} + +static struct vring_desc * +descriptor_get_next(struct vring_desc *vq_desc, struct vring_desc *cur_desc) +{ + return &vq_desc[cur_desc->next]; +} + +static bool +descriptor_has_next(struct vring_desc *cur_desc) +{ + return !!(cur_desc->flags & VRING_DESC_F_NEXT); +} + +static bool +descriptor_is_wr(struct vring_desc *cur_desc) +{ + return !!(cur_desc->flags & VRING_DESC_F_WRITE); +} + +static void +submit_completion(struct vhost_scsi_task *task) +{ + struct rte_vhost_vring *vq; + struct vring_used *used; + + vq = task->vq; + used = vq->used; + /* Fill out the next entry in the "used" ring. id = the + * index of the descriptor that contained the SCSI request. + * len = the total amount of data transferred for the SCSI + * request. We must report the correct len, for variable + * length SCSI CDBs, where we may return less data than + * allocated by the guest VM. + */ + used->ring[used->idx & (vq->size - 1)].id = task->req_idx; + used->ring[used->idx & (vq->size - 1)].len = task->data_len; + used->idx++; + + /* Send an interrupt back to the guest VM so that it knows + * a completion is ready to be processed. + */ + eventfd_write(vq->callfd, (eventfd_t)1); +} + +static void +vhost_process_read_payload_chain(struct vhost_scsi_task *task) +{ + void *data; + + task->iovs_cnt = 0; + task->resp = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid, + task->desc->addr); + + while (descriptor_has_next(task->desc)) { + task->desc = descriptor_get_next(task->vq->desc, task->desc); + data = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid, + task->desc->addr); + task->iovs[task->iovs_cnt].iov_base = data; + task->iovs[task->iovs_cnt].iov_len = task->desc->len; + task->data_len += task->desc->len; + task->iovs_cnt++; + } +} + +static void +vhost_process_write_payload_chain(struct vhost_scsi_task *task) +{ + void *data; + + task->iovs_cnt = 0; + + do { + data = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid, + task->desc->addr); + task->iovs[task->iovs_cnt].iov_base = data; + task->iovs[task->iovs_cnt].iov_len = task->desc->len; + task->data_len += task->desc->len; + task->iovs_cnt++; + task->desc = descriptor_get_next(task->vq->desc, task->desc); + } while (descriptor_has_next(task->desc)); + + task->resp = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid, + task->desc->addr); +} + +static struct vhost_block_dev * +vhost_scsi_bdev_construct(const char *bdev_name, const char *bdev_serial, + uint32_t blk_size, uint64_t blk_cnt, + bool wce_enable) +{ + struct vhost_block_dev *bdev; + + bdev = rte_zmalloc(NULL, sizeof(*bdev), RTE_CACHE_LINE_SIZE); + if (!bdev) + return NULL; + + strncpy(bdev->name, bdev_name, sizeof(bdev->name)); + strncpy(bdev->product_name, bdev_serial, sizeof(bdev->product_name)); + bdev->blocklen = blk_size; + bdev->blockcnt = blk_cnt; + bdev->write_cache = wce_enable; + + /* use memory as disk storage space */ + bdev->data = rte_zmalloc(NULL, blk_cnt * blk_size, 0); + if (!bdev->data) { + fprintf(stderr, "no enough reseverd huge memory for disk\n"); + return NULL; + } + + return bdev; +} + +static void +process_requestq(struct vhost_scsi_ctrlr *ctrlr, uint32_t q_idx) +{ + int ret; + struct vhost_scsi_queue *scsi_vq; + struct rte_vhost_vring *vq; + + scsi_vq = &ctrlr->bdev->queues[q_idx]; + vq = &scsi_vq->vq; + ret = rte_vhost_get_vhost_vring(ctrlr->bdev->vid, q_idx, vq); + assert(ret == 0); + + while (vq->avail->idx != scsi_vq->last_used_idx) { + int req_idx; + uint16_t last_idx; + struct vhost_scsi_task *task; + + last_idx = scsi_vq->last_used_idx & (vq->size - 1); + req_idx = vq->avail->ring[last_idx]; + + task = rte_zmalloc(NULL, sizeof(*task), 0); + assert(task != NULL); + + task->ctrlr = ctrlr; + task->bdev = ctrlr->bdev; + task->vq = vq; + task->req_idx = req_idx; + task->desc = &task->vq->desc[task->req_idx]; + + /* does not support indirect descriptors */ + assert((task->desc->flags & VRING_DESC_F_INDIRECT) == 0); + scsi_vq->last_used_idx++; + + task->req = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid, + task->desc->addr); + + task->desc = descriptor_get_next(task->vq->desc, task->desc); + if (!descriptor_has_next(task->desc)) { + task->dxfer_dir = SCSI_DIR_NONE; + task->resp = (void *)(uintptr_t) + gpa_to_vva(task->bdev->vid, + task->desc->addr); + + } else if (!descriptor_is_wr(task->desc)) { + task->dxfer_dir = SCSI_DIR_TO_DEV; + vhost_process_write_payload_chain(task); + } else { + task->dxfer_dir = SCSI_DIR_FROM_DEV; + vhost_process_read_payload_chain(task); + } + + ret = vhost_bdev_process_scsi_commands(ctrlr->bdev, task); + if (ret) { + /* invalid response */ + task->resp->response = VIRTIO_SCSI_S_BAD_TARGET; + } else { + /* successfully */ + task->resp->response = VIRTIO_SCSI_S_OK; + task->resp->status = 0; + task->resp->resid = 0; + } + submit_completion(task); + rte_free(task); + } +} + +/* Main framework for processing IOs */ +static void * +ctrlr_worker(void *arg) +{ + uint32_t idx, num; + struct vhost_scsi_ctrlr *ctrlr = (struct vhost_scsi_ctrlr *)arg; + cpu_set_t cpuset; + pthread_t thread; + + thread = pthread_self(); + CPU_ZERO(&cpuset); + CPU_SET(0, &cpuset); + pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset); + + num = rte_vhost_get_vring_num(ctrlr->bdev->vid); + fprintf(stdout, "Ctrlr Worker Thread Started with %u Vring\n", num); + + if (num != NUM_OF_SCSI_QUEUES) { + fprintf(stderr, "Only 1 IO queue are supported\n"); + exit(0); + } + + while (!g_should_stop && ctrlr->bdev != NULL) { + /* At least 3 vrings, currently only can support 1 IO queue + * Queue 2 for IO queue, does not support TMF and hotplug + * for the example application now + */ + for (idx = 2; idx < num; idx++) + process_requestq(ctrlr, idx); + } + + fprintf(stdout, "Ctrlr Worker Thread Exiting\n"); + sem_post(&exit_sem); + return NULL; +} + +static int +new_device(int vid) +{ + char path[PATH_MAX]; + struct vhost_scsi_ctrlr *ctrlr; + struct vhost_scsi_queue *scsi_vq; + struct rte_vhost_vring *vq; + pthread_t tid; + int i, ret; + + ret = rte_vhost_get_ifname(vid, path, PATH_MAX); + if (ret) { + fprintf(stderr, "Cannot get socket name\n"); + return -1; + } + + ctrlr = vhost_scsi_ctrlr_find(path); + if (!ctrlr) { + fprintf(stderr, "Controller is not ready\n"); + return -1; + } + + ret = rte_vhost_get_mem_table(vid, &ctrlr->mem); + if (ret) { + fprintf(stderr, "Get Controller memory region failed\n"); + return -1; + } + assert(ctrlr->mem != NULL); + + /* hardcoded block device information with 128MiB */ + ctrlr->bdev = vhost_scsi_bdev_construct("malloc0", "vhost_scsi_malloc0", + 4096, 32768, 0); + if (!ctrlr->bdev) + return -1; + + ctrlr->bdev->vid = vid; + + /* Disable Notifications */ + for (i = 0; i < NUM_OF_SCSI_QUEUES; i++) { + rte_vhost_enable_guest_notification(vid, i, 0); + /* restore used index */ + scsi_vq = &ctrlr->bdev->queues[i]; + vq = &scsi_vq->vq; + ret = rte_vhost_get_vhost_vring(ctrlr->bdev->vid, i, vq); + assert(ret == 0); + scsi_vq->last_used_idx = vq->used->idx; + scsi_vq->last_avail_idx = vq->used->idx; + } + + g_should_stop = 0; + fprintf(stdout, "New Device %s, Device ID %d\n", path, vid); + if (pthread_create(&tid, NULL, &ctrlr_worker, ctrlr) < 0) { + fprintf(stderr, "Worker Thread Started Failed\n"); + return -1; + } + pthread_detach(tid); + return 0; +} + +static void +destroy_device(int vid) +{ + char path[PATH_MAX]; + struct vhost_scsi_ctrlr *ctrlr; + + rte_vhost_get_ifname(vid, path, PATH_MAX); + fprintf(stdout, "Destroy %s Device ID %d\n", path, vid); + ctrlr = vhost_scsi_ctrlr_find(path); + if (!ctrlr) { + fprintf(stderr, "Destroy Ctrlr Failed\n"); + return; + } + ctrlr->bdev = NULL; + g_should_stop = 1; + + sem_wait(&exit_sem); +} + +static const struct vhost_device_ops vhost_scsi_device_ops = { + .new_device = new_device, + .destroy_device = destroy_device, +}; + +static struct vhost_scsi_ctrlr * +vhost_scsi_ctrlr_construct(const char *ctrlr_name) +{ + int ret; + struct vhost_scsi_ctrlr *ctrlr; + char *path; + char cwd[PATH_MAX]; + + /* always use current directory */ + path = getcwd(cwd, PATH_MAX); + if (!path) { + fprintf(stderr, "Cannot get current working directory\n"); + return NULL; + } + snprintf(dev_pathname, sizeof(dev_pathname), "%s/%s", path, ctrlr_name); + + if (access(dev_pathname, F_OK) != -1) { + if (unlink(dev_pathname) != 0) + rte_exit(EXIT_FAILURE, "Cannot remove %s.\n", + dev_pathname); + } + + if (rte_vhost_driver_register(dev_pathname, 0) != 0) { + fprintf(stderr, "socket %s already exists\n", dev_pathname); + return NULL; + } + + fprintf(stdout, "socket file: %s created\n", dev_pathname); + + ret = rte_vhost_driver_set_features(dev_pathname, VIRTIO_SCSI_FEATURES); + if (ret != 0) { + fprintf(stderr, "Set vhost driver features failed\n"); + return NULL; + } + + ctrlr = rte_zmalloc(NULL, sizeof(*ctrlr), RTE_CACHE_LINE_SIZE); + if (!ctrlr) + return NULL; + + rte_vhost_driver_callback_register(dev_pathname, + &vhost_scsi_device_ops); + + return ctrlr; +} + +static void +signal_handler(__rte_unused int signum) +{ + + if (access(dev_pathname, F_OK) == 0) + unlink(dev_pathname); + exit(0); +} + +int main(int argc, char *argv[]) +{ + int ret; + + signal(SIGINT, signal_handler); + + /* init EAL */ + ret = rte_eal_init(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Error with EAL initialization\n"); + + g_vhost_ctrlr = vhost_scsi_ctrlr_construct("vhost.socket"); + if (g_vhost_ctrlr == NULL) { + fprintf(stderr, "Construct vhost scsi controller failed\n"); + return 0; + } + + if (sem_init(&exit_sem, 0, 0) < 0) { + fprintf(stderr, "Error init exit_sem\n"); + return -1; + } + + rte_vhost_driver_start(dev_pathname); + + /* loop for exit the application */ + while (1) + sleep(1); + + return 0; +} + diff --git a/examples/vhost_scsi/vhost_scsi.h b/examples/vhost_scsi/vhost_scsi.h new file mode 100644 index 00000000..edb416da --- /dev/null +++ b/examples/vhost_scsi/vhost_scsi.h @@ -0,0 +1,108 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2017 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _VHOST_SCSI_H_ +#define _VHOST_SCSI_H_ + +#include <sys/uio.h> +#include <stdint.h> +#include <linux/virtio_scsi.h> +#include <linux/virtio_ring.h> + +#include <rte_vhost.h> + +struct vhost_scsi_queue { + struct rte_vhost_vring vq; + uint16_t last_avail_idx; + uint16_t last_used_idx; +}; + +#define NUM_OF_SCSI_QUEUES 3 + +struct vhost_block_dev { + /** ID for vhost library. */ + int vid; + /** Queues for the block device */ + struct vhost_scsi_queue queues[NUM_OF_SCSI_QUEUES]; + /** Unique name for this block device. */ + char name[64]; + + /** Unique product name for this kind of block device. */ + char product_name[256]; + + /** Size in bytes of a logical block for the backend */ + uint32_t blocklen; + + /** Number of blocks */ + uint64_t blockcnt; + + /** write cache enabled, not used at the moment */ + int write_cache; + + /** use memory as disk storage space */ + uint8_t *data; +}; + +struct vhost_scsi_ctrlr { + /** Only support 1 LUN for the example */ + struct vhost_block_dev *bdev; + /** VM memory region */ + struct rte_vhost_memory *mem; +} __rte_cache_aligned; + +#define VHOST_SCSI_MAX_IOVS 128 + +enum scsi_data_dir { + SCSI_DIR_NONE = 0, + SCSI_DIR_TO_DEV = 1, + SCSI_DIR_FROM_DEV = 2, +}; + +struct vhost_scsi_task { + int req_idx; + uint32_t dxfer_dir; + uint32_t data_len; + struct virtio_scsi_cmd_req *req; + struct virtio_scsi_cmd_resp *resp; + struct iovec iovs[VHOST_SCSI_MAX_IOVS]; + uint32_t iovs_cnt; + struct vring_desc *desc; + struct rte_vhost_vring *vq; + struct vhost_block_dev *bdev; + struct vhost_scsi_ctrlr *ctrlr; +}; + +int vhost_bdev_process_scsi_commands(struct vhost_block_dev *bdev, + struct vhost_scsi_task *task); + +#endif /* _VHOST_SCSI_H_ */ diff --git a/examples/vhost_xen/Makefile b/examples/vhost_xen/Makefile index 47e14898..ad2466aa 100644 --- a/examples/vhost_xen/Makefile +++ b/examples/vhost_xen/Makefile @@ -33,7 +33,7 @@ ifeq ($(RTE_SDK),) $(error "Please define RTE_SDK environment variable") endif -# Default target, can be overriden by command line or environment +# Default target, can be overridden by command line or environment RTE_TARGET ?= x86_64-native-linuxapp-gcc include $(RTE_SDK)/mk/rte.vars.mk diff --git a/examples/vhost_xen/main.c b/examples/vhost_xen/main.c index d9ef140f..eba4d35a 100644 --- a/examples/vhost_xen/main.c +++ b/examples/vhost_xen/main.c @@ -48,6 +48,7 @@ #include <rte_ethdev.h> #include <rte_log.h> #include <rte_string_fns.h> +#include <rte_pause.h> #include "main.h" #include "virtio-net.h" @@ -278,7 +279,8 @@ port_init(uint8_t port, struct rte_mempool *mbuf_pool) struct rte_eth_rxconf *rxconf; struct rte_eth_conf port_conf; uint16_t rx_rings, tx_rings = (uint16_t)rte_lcore_count(); - const uint16_t rx_ring_size = RTE_TEST_RX_DESC_DEFAULT, tx_ring_size = RTE_TEST_TX_DESC_DEFAULT; + uint16_t rx_ring_size = RTE_TEST_RX_DESC_DEFAULT; + uint16_t tx_ring_size = RTE_TEST_TX_DESC_DEFAULT; int retval; uint16_t q; @@ -306,6 +308,17 @@ port_init(uint8_t port, struct rte_mempool *mbuf_pool) if (retval != 0) return retval; + retval = rte_eth_dev_adjust_nb_rx_tx_desc(port, &rx_ring_size, + &tx_ring_size); + if (retval != 0) + return retval; + if (rx_ring_size > RTE_TEST_RX_DESC_DEFAULT || + tx_ring_size > RTE_TEST_TX_DESC_DEFAULT) { + RTE_LOG(ERR, VHOST_PORT, "Mbuf pool has an insufficient size for " + "port %u.\n", port); + return -1; + } + rte_eth_dev_info_get(port, &dev_info); rxconf = &dev_info.default_rxconf; rxconf->rx_drop_en = 1; @@ -510,7 +523,7 @@ static unsigned check_ports_num(unsigned nb_ports) * Function to convert guest physical addresses to vhost virtual addresses. This * is used to convert virtio buffer addresses. */ -static inline uint64_t __attribute__((always_inline)) +static __rte_always_inline uint64_t gpa_to_vva(struct virtio_net *dev, uint64_t guest_pa) { struct virtio_memory_regions *region; @@ -534,10 +547,10 @@ gpa_to_vva(struct virtio_net *dev, uint64_t guest_pa) /* * This function adds buffers to the virtio devices RX virtqueue. Buffers can * be received from the physical port or from another virtio device. A packet - * count is returned to indicate the number of packets that were succesfully + * count is returned to indicate the number of packets that were successfully * added to the RX queue. */ -static inline uint32_t __attribute__((always_inline)) +static __rte_always_inline uint32_t virtio_dev_rx(struct virtio_net *dev, struct rte_mbuf **pkts, uint32_t count) { struct vhost_virtqueue *vq; @@ -662,7 +675,7 @@ virtio_dev_rx(struct virtio_net *dev, struct rte_mbuf **pkts, uint32_t count) /* * Compares a packet destination MAC address to a device MAC address. */ -static inline int __attribute__((always_inline)) +static __rte_always_inline int ether_addr_cmp(struct ether_addr *ea, struct ether_addr *eb) { return ((*(uint64_t *)ea ^ *(uint64_t *)eb) & MAC_ADDR_CMP) == 0; @@ -757,7 +770,7 @@ unlink_vmdq(struct virtio_net *dev) * Check if the packet destination MAC address is for a local device. If so then put * the packet on that devices RX queue. If not then return. */ -static inline unsigned __attribute__((always_inline)) +static __rte_always_inline unsigned virtio_tx_local(struct virtio_net *dev, struct rte_mbuf *m) { struct virtio_net_data_ll *dev_ll; @@ -814,7 +827,7 @@ virtio_tx_local(struct virtio_net *dev, struct rte_mbuf *m) * This function routes the TX packet to the correct interface. This may be a local device * or the physical port. */ -static inline void __attribute__((always_inline)) +static __rte_always_inline void virtio_tx_route(struct virtio_net* dev, struct rte_mbuf *m, struct rte_mempool *mbuf_pool, uint16_t vlan_tag) { struct mbuf_table *tx_q; @@ -883,7 +896,7 @@ virtio_tx_route(struct virtio_net* dev, struct rte_mbuf *m, struct rte_mempool * return; } -static inline void __attribute__((always_inline)) +static __rte_always_inline void virtio_dev_tx(struct virtio_net* dev, struct rte_mempool *mbuf_pool) { struct rte_mbuf m; diff --git a/examples/vhost_xen/xenstore_parse.c b/examples/vhost_xen/xenstore_parse.c index 26d24320..ab089f1b 100644 --- a/examples/vhost_xen/xenstore_parse.c +++ b/examples/vhost_xen/xenstore_parse.c @@ -293,7 +293,7 @@ err: } /* - * This function maps grant node of vring or mbuf pool to a continous virtual address space, + * This function maps grant node of vring or mbuf pool to a continuous virtual address space, * and returns mapped address, pfn array, index array * @param gntnode * Pointer to grant node @@ -460,7 +460,7 @@ cleanup_mempool(struct xen_mempool *mempool) /* * process mempool node idx#_mempool_gref, idx = 0, 1, 2... - * untill we encounter a node that doesn't exist. + * until we encounter a node that doesn't exist. */ int parse_mempoolnode(struct xen_guest *guest) diff --git a/examples/vm_power_manager/guest_cli/main.c b/examples/vm_power_manager/guest_cli/main.c index 5ac98ed3..ac2b1fa5 100644 --- a/examples/vm_power_manager/guest_cli/main.c +++ b/examples/vm_power_manager/guest_cli/main.c @@ -35,12 +35,10 @@ #include <stdio.h> #include <string.h> #include <stdint.h> -#include <errno.h> #include <sys/epoll.h> #include <fcntl.h> #include <unistd.h> #include <stdlib.h> -#include <unistd.h> #include <errno.h> */ #include <signal.h> diff --git a/examples/vm_power_manager/main.c b/examples/vm_power_manager/main.c index 97178d14..c33fcc93 100644 --- a/examples/vm_power_manager/main.c +++ b/examples/vm_power_manager/main.c @@ -34,12 +34,10 @@ #include <stdio.h> #include <string.h> #include <stdint.h> -#include <errno.h> #include <sys/epoll.h> #include <fcntl.h> #include <unistd.h> #include <stdlib.h> -#include <unistd.h> #include <signal.h> #include <errno.h> diff --git a/examples/vmdq/Makefile b/examples/vmdq/Makefile index 198e3bfe..50172822 100644 --- a/examples/vmdq/Makefile +++ b/examples/vmdq/Makefile @@ -33,7 +33,7 @@ ifeq ($(RTE_SDK),) $(error "Please define RTE_SDK environment variable") endif -# Default target, can be overriden by command line or environment +# Default target, can be overridden by command line or environment RTE_TARGET ?= x86_64-native-linuxapp-gcc include $(RTE_SDK)/mk/rte.vars.mk diff --git a/examples/vmdq/main.c b/examples/vmdq/main.c index f639355d..8949a115 100644 --- a/examples/vmdq/main.c +++ b/examples/vmdq/main.c @@ -49,7 +49,6 @@ #include <rte_memcpy.h> #include <rte_memzone.h> #include <rte_eal.h> -#include <rte_per_lcore.h> #include <rte_launch.h> #include <rte_atomic.h> #include <rte_cycles.h> @@ -63,10 +62,8 @@ #include <rte_debug.h> #include <rte_ether.h> #include <rte_ethdev.h> -#include <rte_log.h> #include <rte_mempool.h> #include <rte_mbuf.h> -#include <rte_memcpy.h> #define MAX_QUEUES 1024 /* @@ -195,7 +192,8 @@ port_init(uint8_t port, struct rte_mempool *mbuf_pool) struct rte_eth_rxconf *rxconf; struct rte_eth_conf port_conf; uint16_t rxRings, txRings; - const uint16_t rxRingSize = RTE_TEST_RX_DESC_DEFAULT, txRingSize = RTE_TEST_TX_DESC_DEFAULT; + uint16_t rxRingSize = RTE_TEST_RX_DESC_DEFAULT; + uint16_t txRingSize = RTE_TEST_TX_DESC_DEFAULT; int retval; uint16_t q; uint16_t queues_per_pool; @@ -253,6 +251,17 @@ port_init(uint8_t port, struct rte_mempool *mbuf_pool) if (retval != 0) return retval; + retval = rte_eth_dev_adjust_nb_rx_tx_desc(port, &rxRingSize, + &txRingSize); + if (retval != 0) + return retval; + if (RTE_MAX(rxRingSize, txRingSize) > RTE_MAX(RTE_TEST_RX_DESC_DEFAULT, + RTE_TEST_TX_DESC_DEFAULT)) { + printf("Mbuf pool has an insufficient size for port %u.\n", + port); + return -1; + } + rte_eth_dev_info_get(port, &dev_info); rxconf = &dev_info.default_rxconf; rxconf->rx_drop_en = 1; diff --git a/examples/vmdq_dcb/Makefile b/examples/vmdq_dcb/Makefile index 8c51131b..0c200a98 100644 --- a/examples/vmdq_dcb/Makefile +++ b/examples/vmdq_dcb/Makefile @@ -33,7 +33,7 @@ ifeq ($(RTE_SDK),) $(error "Please define RTE_SDK environment variable") endif -# Default target, can be overriden by command line or environment +# Default target, can be overridden by command line or environment RTE_TARGET ?= x86_64-native-linuxapp-gcc include $(RTE_SDK)/mk/rte.vars.mk diff --git a/examples/vmdq_dcb/main.c b/examples/vmdq_dcb/main.c index 35ffffad..b6ebccb2 100644 --- a/examples/vmdq_dcb/main.c +++ b/examples/vmdq_dcb/main.c @@ -49,7 +49,6 @@ #include <rte_memcpy.h> #include <rte_memzone.h> #include <rte_eal.h> -#include <rte_per_lcore.h> #include <rte_launch.h> #include <rte_atomic.h> #include <rte_cycles.h> @@ -63,10 +62,8 @@ #include <rte_debug.h> #include <rte_ether.h> #include <rte_ethdev.h> -#include <rte_log.h> #include <rte_mempool.h> #include <rte_mbuf.h> -#include <rte_memcpy.h> /* basic constants used in application */ #define MAX_QUEUES 1024 @@ -227,8 +224,8 @@ port_init(uint8_t port, struct rte_mempool *mbuf_pool) { struct rte_eth_dev_info dev_info; struct rte_eth_conf port_conf = {0}; - const uint16_t rxRingSize = RTE_TEST_RX_DESC_DEFAULT; - const uint16_t txRingSize = RTE_TEST_TX_DESC_DEFAULT; + uint16_t rxRingSize = RTE_TEST_RX_DESC_DEFAULT; + uint16_t txRingSize = RTE_TEST_TX_DESC_DEFAULT; int retval; uint16_t q; uint16_t queues_per_pool; @@ -299,6 +296,17 @@ port_init(uint8_t port, struct rte_mempool *mbuf_pool) if (retval != 0) return retval; + retval = rte_eth_dev_adjust_nb_rx_tx_desc(port, &rxRingSize, + &txRingSize); + if (retval != 0) + return retval; + if (RTE_MAX(rxRingSize, txRingSize) > + RTE_MAX(RTE_TEST_RX_DESC_DEFAULT, RTE_TEST_TX_DESC_DEFAULT)) { + printf("Mbuf pool has an insufficient size for port %u.\n", + port); + return -1; + } + for (q = 0; q < num_queues; q++) { retval = rte_eth_rx_queue_setup(port, q, rxRingSize, rte_eth_dev_socket_id(port), |