diff options
Diffstat (limited to 'examples')
283 files changed, 100675 insertions, 0 deletions
diff --git a/examples/Makefile b/examples/Makefile new file mode 100644 index 00000000..b28b30e7 --- /dev/null +++ b/examples/Makefile @@ -0,0 +1,98 @@ +# BSD LICENSE +# +# Copyright(c) 2014 6WIND S.A. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of 6WIND S.A. nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overriden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +DIRS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += bond +DIRS-y += cmdline +DIRS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) += distributor +ifneq ($(ICP_ROOT),) +DIRS-y += dpdk_qat +endif +DIRS-y += ethtool +DIRS-y += exception_path +DIRS-y += helloworld +DIRS-$(CONFIG_RTE_LIBRTE_PIPELINE) += ip_pipeline +ifeq ($(CONFIG_RTE_LIBRTE_LPM),y) +DIRS-$(CONFIG_RTE_IP_FRAG) += ip_reassembly +DIRS-$(CONFIG_RTE_IP_FRAG) += ip_fragmentation +endif +ifeq ($(CONFIG_RTE_LIBRTE_ACL)$(CONFIG_RTE_LIBRTE_HASH)$(CONFIG_RTE_LIBRTE_LPM),yyy) +DIRS-$(CONFIG_RTE_LIBRTE_CRYPTODEV) += ipsec-secgw +endif +DIRS-y += ipv4_multicast +DIRS-$(CONFIG_RTE_LIBRTE_KNI) += kni +DIRS-y += l2fwd +ifneq ($(PQOS_INSTALL_PATH),) +DIRS-y += l2fwd-cat +endif +DIRS-$(CONFIG_RTE_LIBRTE_CRYPTODEV) += l2fwd-crypto +DIRS-$(CONFIG_RTE_LIBRTE_IVSHMEM) += l2fwd-ivshmem +DIRS-$(CONFIG_RTE_LIBRTE_JOBSTATS) += l2fwd-jobstats +DIRS-y += l2fwd-keepalive +DIRS-$(CONFIG_RTE_LIBRTE_LPM) += l3fwd +DIRS-$(CONFIG_RTE_LIBRTE_ACL) += l3fwd-acl +ifeq ($(CONFIG_RTE_LIBRTE_LPM),y) +DIRS-$(CONFIG_RTE_LIBRTE_POWER) += l3fwd-power +DIRS-y += l3fwd-vf +endif +DIRS-y += link_status_interrupt +DIRS-$(CONFIG_RTE_LIBRTE_LPM) += load_balancer +DIRS-y += multi_process +DIRS-y += netmap_compat/bridge +DIRS-$(CONFIG_RTE_LIBRTE_REORDER) += packet_ordering +DIRS-$(CONFIG_RTE_LIBRTE_IEEE1588) += ptpclient +DIRS-$(CONFIG_RTE_LIBRTE_METER) += qos_meter +DIRS-$(CONFIG_RTE_LIBRTE_SCHED) += qos_sched +DIRS-y += quota_watermark +DIRS-$(CONFIG_RTE_ETHDEV_RXTX_CALLBACKS) += rxtx_callbacks +DIRS-y += skeleton +DIRS-$(CONFIG_RTE_LIBRTE_VHOST) += tep_termination +DIRS-$(CONFIG_RTE_LIBRTE_TIMER) += timer +DIRS-$(CONFIG_RTE_LIBRTE_VHOST) += vhost +DIRS-$(CONFIG_RTE_LIBRTE_XEN_DOM0) += vhost_xen +DIRS-y += vmdq +DIRS-y += vmdq_dcb +ifeq ($(CONFIG_RTE_LIBRTE_POWER), y) +ifeq ($(shell pkg-config --atleast-version=0.9.3 libvirt; echo $$?), 0) +DIRS-y += vm_power_manager +else +$(info vm_power_manager requires libvirt >= 0.9.3) +endif +endif + +include $(RTE_SDK)/mk/rte.extsubdir.mk diff --git a/examples/bond/Makefile b/examples/bond/Makefile new file mode 100644 index 00000000..626d79d9 --- /dev/null +++ b/examples/bond/Makefile @@ -0,0 +1,57 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2015 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overridden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +# binary name +APP = bond_app + +# all source are stored in SRCS-y +SRCS-y := main.c + +CFLAGS += $(WERROR_FLAGS) + +# workaround for a gcc bug with noreturn attribute +# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603 +ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y) +CFLAGS_main.o += -Wno-return-type +endif + +CFLAGS += -O3 + +include $(RTE_SDK)/mk/rte.extapp.mk diff --git a/examples/bond/main.c b/examples/bond/main.c new file mode 100644 index 00000000..53bd0441 --- /dev/null +++ b/examples/bond/main.c @@ -0,0 +1,786 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdint.h> +#include <sys/queue.h> +#include <sys/socket.h> +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include <assert.h> +#include <errno.h> +#include <signal.h> +#include <stdarg.h> +#include <inttypes.h> +#include <getopt.h> +#include <termios.h> +#include <unistd.h> +#include <pthread.h> + +#include <rte_common.h> +#include <rte_log.h> +#include <rte_memory.h> +#include <rte_memcpy.h> +#include <rte_memzone.h> +#include <rte_eal.h> +#include <rte_per_lcore.h> +#include <rte_launch.h> +#include <rte_atomic.h> +#include <rte_cycles.h> +#include <rte_prefetch.h> +#include <rte_lcore.h> +#include <rte_per_lcore.h> +#include <rte_branch_prediction.h> +#include <rte_interrupts.h> +#include <rte_pci.h> +#include <rte_random.h> +#include <rte_debug.h> +#include <rte_ether.h> +#include <rte_ethdev.h> +#include <rte_ring.h> +#include <rte_log.h> +#include <rte_mempool.h> +#include <rte_mbuf.h> +#include <rte_memcpy.h> +#include <rte_ip.h> +#include <rte_tcp.h> +#include <rte_arp.h> +#include <rte_spinlock.h> + +#include <cmdline_rdline.h> +#include <cmdline_parse.h> +#include <cmdline_parse_num.h> +#include <cmdline_parse_string.h> +#include <cmdline_parse_ipaddr.h> +#include <cmdline_parse_etheraddr.h> +#include <cmdline_socket.h> +#include <cmdline.h> + +#include "main.h" + +#include <rte_devargs.h> + + +#include "rte_byteorder.h" +#include "rte_cpuflags.h" +#include "rte_eth_bond.h" + +#define RTE_LOGTYPE_DCB RTE_LOGTYPE_USER1 + +#define NB_MBUF (1024*8) + +#define MAX_PKT_BURST 32 +#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */ +#define BURST_RX_INTERVAL_NS (10) /* RX poll interval ~100ns */ + +/* + * RX and TX Prefetch, Host, and Write-back threshold values should be + * carefully set for optimal performance. Consult the network + * controller's datasheet and supporting DPDK documentation for guidance + * on how these parameters should be set. + */ +#define RX_PTHRESH 8 /**< Default values of RX prefetch threshold reg. */ +#define RX_HTHRESH 8 /**< Default values of RX host threshold reg. */ +#define RX_WTHRESH 4 /**< Default values of RX write-back threshold reg. */ +#define RX_FTHRESH (MAX_PKT_BURST * 2)/**< Default values of RX free threshold reg. */ + +/* + * These default values are optimized for use with the Intel(R) 82599 10 GbE + * Controller and the DPDK ixgbe PMD. Consider using other values for other + * network controllers and/or network drivers. + */ +#define TX_PTHRESH 36 /**< Default values of TX prefetch threshold reg. */ +#define TX_HTHRESH 0 /**< Default values of TX host threshold reg. */ +#define TX_WTHRESH 0 /**< Default values of TX write-back threshold reg. */ + +/* + * Configurable number of RX/TX ring descriptors + */ +#define RTE_RX_DESC_DEFAULT 128 +#define RTE_TX_DESC_DEFAULT 512 + +#define BOND_IP_1 7 +#define BOND_IP_2 0 +#define BOND_IP_3 0 +#define BOND_IP_4 10 + +/* not defined under linux */ +#ifndef NIPQUAD +#define NIPQUAD_FMT "%u.%u.%u.%u" +#endif + +#define MAX_PORTS 4 +#define PRINT_MAC(addr) printf("%02"PRIx8":%02"PRIx8":%02"PRIx8 \ + ":%02"PRIx8":%02"PRIx8":%02"PRIx8, \ + addr.addr_bytes[0], addr.addr_bytes[1], addr.addr_bytes[2], \ + addr.addr_bytes[3], addr.addr_bytes[4], addr.addr_bytes[5]) + +uint8_t slaves[RTE_MAX_ETHPORTS]; +uint8_t slaves_count; + +static uint8_t BOND_PORT = 0xff; + +static struct rte_mempool *mbuf_pool; + +static struct rte_eth_conf port_conf = { + .rxmode = { + .mq_mode = ETH_MQ_RX_NONE, + .max_rx_pkt_len = ETHER_MAX_LEN, + .split_hdr_size = 0, + .header_split = 0, /**< Header Split disabled */ + .hw_ip_checksum = 0, /**< IP checksum offload enabled */ + .hw_vlan_filter = 0, /**< VLAN filtering disabled */ + .jumbo_frame = 0, /**< Jumbo Frame Support disabled */ + .hw_strip_crc = 0, /**< CRC stripped by hardware */ + }, + .rx_adv_conf = { + .rss_conf = { + .rss_key = NULL, + .rss_hf = ETH_RSS_IP, + }, + }, + .txmode = { + .mq_mode = ETH_MQ_TX_NONE, + }, +}; + +static void +slave_port_init(uint8_t portid, struct rte_mempool *mbuf_pool) +{ + int retval; + + if (portid >= rte_eth_dev_count()) + rte_exit(EXIT_FAILURE, "Invalid port\n"); + + retval = rte_eth_dev_configure(portid, 1, 1, &port_conf); + if (retval != 0) + rte_exit(EXIT_FAILURE, "port %u: configuration failed (res=%d)\n", + portid, retval); + + /* RX setup */ + retval = rte_eth_rx_queue_setup(portid, 0, RTE_RX_DESC_DEFAULT, + rte_eth_dev_socket_id(portid), NULL, + mbuf_pool); + if (retval < 0) + rte_exit(retval, " port %u: RX queue 0 setup failed (res=%d)", + portid, retval); + + /* TX setup */ + retval = rte_eth_tx_queue_setup(portid, 0, RTE_TX_DESC_DEFAULT, + rte_eth_dev_socket_id(portid), NULL); + + if (retval < 0) + rte_exit(retval, "port %u: TX queue 0 setup failed (res=%d)", + portid, retval); + + retval = rte_eth_dev_start(portid); + if (retval < 0) + rte_exit(retval, + "Start port %d failed (res=%d)", + portid, retval); + + struct ether_addr addr; + + rte_eth_macaddr_get(portid, &addr); + printf("Port %u MAC: ", (unsigned)portid); + PRINT_MAC(addr); + printf("\n"); +} + +static void +bond_port_init(struct rte_mempool *mbuf_pool) +{ + int retval; + uint8_t i; + + retval = rte_eth_bond_create("bond0", BONDING_MODE_ALB, + 0 /*SOCKET_ID_ANY*/); + if (retval < 0) + rte_exit(EXIT_FAILURE, + "Faled to create bond port\n"); + + BOND_PORT = (uint8_t)retval; + + retval = rte_eth_dev_configure(BOND_PORT, 1, 1, &port_conf); + if (retval != 0) + rte_exit(EXIT_FAILURE, "port %u: configuration failed (res=%d)\n", + BOND_PORT, retval); + + /* RX setup */ + retval = rte_eth_rx_queue_setup(BOND_PORT, 0, RTE_RX_DESC_DEFAULT, + rte_eth_dev_socket_id(BOND_PORT), NULL, + mbuf_pool); + if (retval < 0) + rte_exit(retval, " port %u: RX queue 0 setup failed (res=%d)", + BOND_PORT, retval); + + /* TX setup */ + retval = rte_eth_tx_queue_setup(BOND_PORT, 0, RTE_TX_DESC_DEFAULT, + rte_eth_dev_socket_id(BOND_PORT), NULL); + + if (retval < 0) + rte_exit(retval, "port %u: TX queue 0 setup failed (res=%d)", + BOND_PORT, retval); + + for (i = 0; i < slaves_count; i++) { + if (rte_eth_bond_slave_add(BOND_PORT, slaves[i]) == -1) + rte_exit(-1, "Oooops! adding slave (%u) to bond (%u) failed!\n", + slaves[i], BOND_PORT); + + } + + retval = rte_eth_dev_start(BOND_PORT); + if (retval < 0) + rte_exit(retval, "Start port %d failed (res=%d)", BOND_PORT, retval); + + rte_eth_promiscuous_enable(BOND_PORT); + + struct ether_addr addr; + + rte_eth_macaddr_get(BOND_PORT, &addr); + printf("Port %u MAC: ", (unsigned)BOND_PORT); + PRINT_MAC(addr); + printf("\n"); +} + +static inline size_t +get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto) +{ + size_t vlan_offset = 0; + + if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) { + struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1); + + vlan_offset = sizeof(struct vlan_hdr); + *proto = vlan_hdr->eth_proto; + + if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) { + vlan_hdr = vlan_hdr + 1; + + *proto = vlan_hdr->eth_proto; + vlan_offset += sizeof(struct vlan_hdr); + } + } + return vlan_offset; +} + +struct global_flag_stru_t { + int LcoreMainIsRunning; + int LcoreMainCore; + uint32_t port_packets[4]; + rte_spinlock_t lock; +}; +struct global_flag_stru_t global_flag_stru; +struct global_flag_stru_t *global_flag_stru_p = &global_flag_stru; + +/* + * Main thread that does the work, reading from INPUT_PORT + * and writing to OUTPUT_PORT + */ +static int lcore_main(__attribute__((unused)) void *arg1) +{ + struct rte_mbuf *pkts[MAX_PKT_BURST] __rte_cache_aligned; + struct ether_addr d_addr; + + struct ether_hdr *eth_hdr; + struct arp_hdr *arp_hdr; + struct ipv4_hdr *ipv4_hdr; + uint16_t ether_type, offset; + + uint16_t rx_cnt; + uint32_t bond_ip; + int i = 0; + uint8_t is_free; + + bond_ip = BOND_IP_1 | (BOND_IP_2 << 8) | + (BOND_IP_3 << 16) | (BOND_IP_4 << 24); + + rte_spinlock_trylock(&global_flag_stru_p->lock); + + while (global_flag_stru_p->LcoreMainIsRunning) { + rte_spinlock_unlock(&global_flag_stru_p->lock); + rx_cnt = rte_eth_rx_burst(BOND_PORT, 0, pkts, MAX_PKT_BURST); + is_free = 0; + + /* If didn't receive any packets, wait and go to next iteration */ + if (rx_cnt == 0) { + rte_delay_us(50); + continue; + } + + /* Search incoming data for ARP packets and prepare response */ + for (i = 0; i < rx_cnt; i++) { + if (rte_spinlock_trylock(&global_flag_stru_p->lock) == 1) { + global_flag_stru_p->port_packets[0]++; + rte_spinlock_unlock(&global_flag_stru_p->lock); + } + eth_hdr = rte_pktmbuf_mtod(pkts[i], struct ether_hdr *); + ether_type = eth_hdr->ether_type; + if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_VLAN)) + printf("VLAN taged frame, offset:"); + offset = get_vlan_offset(eth_hdr, ðer_type); + if (offset > 0) + printf("%d\n", offset); + if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) { + if (rte_spinlock_trylock(&global_flag_stru_p->lock) == 1) { + global_flag_stru_p->port_packets[1]++; + rte_spinlock_unlock(&global_flag_stru_p->lock); + } + arp_hdr = (struct arp_hdr *)((char *)(eth_hdr + 1) + offset); + if (arp_hdr->arp_data.arp_tip == bond_ip) { + if (arp_hdr->arp_op == rte_cpu_to_be_16(ARP_OP_REQUEST)) { + arp_hdr->arp_op = rte_cpu_to_be_16(ARP_OP_REPLY); + /* Switch src and dst data and set bonding MAC */ + ether_addr_copy(ð_hdr->s_addr, ð_hdr->d_addr); + rte_eth_macaddr_get(BOND_PORT, ð_hdr->s_addr); + ether_addr_copy(&arp_hdr->arp_data.arp_sha, &arp_hdr->arp_data.arp_tha); + arp_hdr->arp_data.arp_tip = arp_hdr->arp_data.arp_sip; + rte_eth_macaddr_get(BOND_PORT, &d_addr); + ether_addr_copy(&d_addr, &arp_hdr->arp_data.arp_sha); + arp_hdr->arp_data.arp_sip = bond_ip; + rte_eth_tx_burst(BOND_PORT, 0, &pkts[i], 1); + is_free = 1; + } else { + rte_eth_tx_burst(BOND_PORT, 0, NULL, 0); + } + } + } else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) { + if (rte_spinlock_trylock(&global_flag_stru_p->lock) == 1) { + global_flag_stru_p->port_packets[2]++; + rte_spinlock_unlock(&global_flag_stru_p->lock); + } + ipv4_hdr = (struct ipv4_hdr *)((char *)(eth_hdr + 1) + offset); + if (ipv4_hdr->dst_addr == bond_ip) { + ether_addr_copy(ð_hdr->s_addr, ð_hdr->d_addr); + rte_eth_macaddr_get(BOND_PORT, ð_hdr->s_addr); + ipv4_hdr->dst_addr = ipv4_hdr->src_addr; + ipv4_hdr->src_addr = bond_ip; + rte_eth_tx_burst(BOND_PORT, 0, &pkts[i], 1); + } + + } + + /* Free processed packets */ + if (is_free == 0) + rte_pktmbuf_free(pkts[i]); + } + rte_spinlock_trylock(&global_flag_stru_p->lock); + } + rte_spinlock_unlock(&global_flag_stru_p->lock); + printf("BYE lcore_main\n"); + return 0; +} + +struct cmd_obj_send_result { + cmdline_fixed_string_t action; + cmdline_ipaddr_t ip; +}; +static inline void get_string(struct cmd_obj_send_result *res, char *buf, uint8_t size) +{ + snprintf(buf, size, NIPQUAD_FMT, + ((unsigned)((unsigned char *)&(res->ip.addr.ipv4))[0]), + ((unsigned)((unsigned char *)&(res->ip.addr.ipv4))[1]), + ((unsigned)((unsigned char *)&(res->ip.addr.ipv4))[2]), + ((unsigned)((unsigned char *)&(res->ip.addr.ipv4))[3]) + ); +} +static void cmd_obj_send_parsed(void *parsed_result, + __attribute__((unused)) struct cmdline *cl, + __attribute__((unused)) void *data) +{ + + struct cmd_obj_send_result *res = parsed_result; + char ip_str[INET6_ADDRSTRLEN]; + + struct rte_mbuf *created_pkt; + struct ether_hdr *eth_hdr; + struct arp_hdr *arp_hdr; + + uint32_t bond_ip; + size_t pkt_size; + + if (res->ip.family == AF_INET) + get_string(res, ip_str, INET_ADDRSTRLEN); + else + cmdline_printf(cl, "Wrong IP format. Only IPv4 is supported\n"); + + bond_ip = BOND_IP_1 | (BOND_IP_2 << 8) | + (BOND_IP_3 << 16) | (BOND_IP_4 << 24); + + created_pkt = rte_pktmbuf_alloc(mbuf_pool); + pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr); + created_pkt->data_len = pkt_size; + created_pkt->pkt_len = pkt_size; + + eth_hdr = rte_pktmbuf_mtod(created_pkt, struct ether_hdr *); + rte_eth_macaddr_get(BOND_PORT, ð_hdr->s_addr); + memset(ð_hdr->d_addr, 0xFF, ETHER_ADDR_LEN); + eth_hdr->ether_type = rte_cpu_to_be_16(ETHER_TYPE_ARP); + + arp_hdr = (struct arp_hdr *)((char *)eth_hdr + sizeof(struct ether_hdr)); + arp_hdr->arp_hrd = rte_cpu_to_be_16(ARP_HRD_ETHER); + arp_hdr->arp_pro = rte_cpu_to_be_16(ETHER_TYPE_IPv4); + arp_hdr->arp_hln = ETHER_ADDR_LEN; + arp_hdr->arp_pln = sizeof(uint32_t); + arp_hdr->arp_op = rte_cpu_to_be_16(ARP_OP_REQUEST); + + rte_eth_macaddr_get(BOND_PORT, &arp_hdr->arp_data.arp_sha); + arp_hdr->arp_data.arp_sip = bond_ip; + memset(&arp_hdr->arp_data.arp_tha, 0, ETHER_ADDR_LEN); + arp_hdr->arp_data.arp_tip = + ((unsigned char *)&res->ip.addr.ipv4)[0] | + (((unsigned char *)&res->ip.addr.ipv4)[1] << 8) | + (((unsigned char *)&res->ip.addr.ipv4)[2] << 16) | + (((unsigned char *)&res->ip.addr.ipv4)[3] << 24); + rte_eth_tx_burst(BOND_PORT, 0, &created_pkt, 1); + + rte_delay_ms(100); + cmdline_printf(cl, "\n"); +} + +cmdline_parse_token_string_t cmd_obj_action_send = + TOKEN_STRING_INITIALIZER(struct cmd_obj_send_result, action, "send"); +cmdline_parse_token_ipaddr_t cmd_obj_ip = + TOKEN_IPV4_INITIALIZER(struct cmd_obj_send_result, ip); + +cmdline_parse_inst_t cmd_obj_send = { + .f = cmd_obj_send_parsed, /* function to call */ + .data = NULL, /* 2nd arg of func */ + .help_str = "send client_ip", + .tokens = { /* token list, NULL terminated */ + (void *)&cmd_obj_action_send, + (void *)&cmd_obj_ip, + NULL, + }, +}; + +struct cmd_start_result { + cmdline_fixed_string_t start; +}; + +static void cmd_start_parsed(__attribute__((unused)) void *parsed_result, + struct cmdline *cl, + __attribute__((unused)) void *data) +{ + int slave_core_id = rte_lcore_id(); + + rte_spinlock_trylock(&global_flag_stru_p->lock); + if (global_flag_stru_p->LcoreMainIsRunning == 0) { + if (lcore_config[global_flag_stru_p->LcoreMainCore].state != WAIT) { + rte_spinlock_unlock(&global_flag_stru_p->lock); + return; + } + rte_spinlock_unlock(&global_flag_stru_p->lock); + } else { + cmdline_printf(cl, "lcore_main already running on core:%d\n", + global_flag_stru_p->LcoreMainCore); + rte_spinlock_unlock(&global_flag_stru_p->lock); + return; + } + + /* start lcore main on core != master_core - ARP response thread */ + slave_core_id = rte_get_next_lcore(rte_lcore_id(), 1, 0); + if ((slave_core_id >= RTE_MAX_LCORE) || (slave_core_id == 0)) + return; + + rte_spinlock_trylock(&global_flag_stru_p->lock); + global_flag_stru_p->LcoreMainIsRunning = 1; + rte_spinlock_unlock(&global_flag_stru_p->lock); + cmdline_printf(cl, + "Starting lcore_main on core %d:%d " + "Our IP:%d.%d.%d.%d\n", + slave_core_id, + rte_eal_remote_launch(lcore_main, NULL, slave_core_id), + BOND_IP_1, + BOND_IP_2, + BOND_IP_3, + BOND_IP_4 + ); +} + +cmdline_parse_token_string_t cmd_start_start = + TOKEN_STRING_INITIALIZER(struct cmd_start_result, start, "start"); + +cmdline_parse_inst_t cmd_start = { + .f = cmd_start_parsed, /* function to call */ + .data = NULL, /* 2nd arg of func */ + .help_str = "starts listening if not started at startup", + .tokens = { /* token list, NULL terminated */ + (void *)&cmd_start_start, + NULL, + }, +}; + +struct cmd_help_result { + cmdline_fixed_string_t help; +}; + +static void cmd_help_parsed(__attribute__((unused)) void *parsed_result, + struct cmdline *cl, + __attribute__((unused)) void *data) +{ + cmdline_printf(cl, + "ALB - link bonding mode 6 example\n" + "send IP - sends one ARPrequest thru bonding for IP.\n" + "start - starts listening ARPs.\n" + "stop - stops lcore_main.\n" + "show - shows some bond info: ex. active slaves etc.\n" + "help - prints help.\n" + "quit - terminate all threads and quit.\n" + ); +} + +cmdline_parse_token_string_t cmd_help_help = + TOKEN_STRING_INITIALIZER(struct cmd_help_result, help, "help"); + +cmdline_parse_inst_t cmd_help = { + .f = cmd_help_parsed, /* function to call */ + .data = NULL, /* 2nd arg of func */ + .help_str = "show help", + .tokens = { /* token list, NULL terminated */ + (void *)&cmd_help_help, + NULL, + }, +}; + +struct cmd_stop_result { + cmdline_fixed_string_t stop; +}; + +static void cmd_stop_parsed(__attribute__((unused)) void *parsed_result, + struct cmdline *cl, + __attribute__((unused)) void *data) +{ + rte_spinlock_trylock(&global_flag_stru_p->lock); + if (global_flag_stru_p->LcoreMainIsRunning == 0) { + cmdline_printf(cl, + "lcore_main not running on core:%d\n", + global_flag_stru_p->LcoreMainCore); + rte_spinlock_unlock(&global_flag_stru_p->lock); + return; + } + global_flag_stru_p->LcoreMainIsRunning = 0; + rte_eal_wait_lcore(global_flag_stru_p->LcoreMainCore); + cmdline_printf(cl, + "lcore_main stopped on core:%d\n", + global_flag_stru_p->LcoreMainCore); + rte_spinlock_unlock(&global_flag_stru_p->lock); +} + +cmdline_parse_token_string_t cmd_stop_stop = + TOKEN_STRING_INITIALIZER(struct cmd_stop_result, stop, "stop"); + +cmdline_parse_inst_t cmd_stop = { + .f = cmd_stop_parsed, /* function to call */ + .data = NULL, /* 2nd arg of func */ + .help_str = "this command do not handle any arguments", + .tokens = { /* token list, NULL terminated */ + (void *)&cmd_stop_stop, + NULL, + }, +}; + +struct cmd_quit_result { + cmdline_fixed_string_t quit; +}; + +static void cmd_quit_parsed(__attribute__((unused)) void *parsed_result, + struct cmdline *cl, + __attribute__((unused)) void *data) +{ + rte_spinlock_trylock(&global_flag_stru_p->lock); + if (global_flag_stru_p->LcoreMainIsRunning == 0) { + cmdline_printf(cl, + "lcore_main not running on core:%d\n", + global_flag_stru_p->LcoreMainCore); + rte_spinlock_unlock(&global_flag_stru_p->lock); + cmdline_quit(cl); + return; + } + global_flag_stru_p->LcoreMainIsRunning = 0; + rte_eal_wait_lcore(global_flag_stru_p->LcoreMainCore); + cmdline_printf(cl, + "lcore_main stopped on core:%d\n", + global_flag_stru_p->LcoreMainCore); + rte_spinlock_unlock(&global_flag_stru_p->lock); + cmdline_quit(cl); +} + +cmdline_parse_token_string_t cmd_quit_quit = + TOKEN_STRING_INITIALIZER(struct cmd_quit_result, quit, "quit"); + +cmdline_parse_inst_t cmd_quit = { + .f = cmd_quit_parsed, /* function to call */ + .data = NULL, /* 2nd arg of func */ + .help_str = "this command do not handle any arguments", + .tokens = { /* token list, NULL terminated */ + (void *)&cmd_quit_quit, + NULL, + }, +}; + +struct cmd_show_result { + cmdline_fixed_string_t show; +}; + +static void cmd_show_parsed(__attribute__((unused)) void *parsed_result, + struct cmdline *cl, + __attribute__((unused)) void *data) +{ + uint8_t slaves[16] = {0}; + uint8_t len = 16; + struct ether_addr addr; + uint8_t i = 0; + + while (i < slaves_count) { + rte_eth_macaddr_get(i, &addr); + PRINT_MAC(addr); + printf("\n"); + i++; + } + + rte_spinlock_trylock(&global_flag_stru_p->lock); + cmdline_printf(cl, + "Active_slaves:%d " + "packets received:Tot:%d Arp:%d IPv4:%d\n", + rte_eth_bond_active_slaves_get(BOND_PORT, slaves, len), + global_flag_stru_p->port_packets[0], + global_flag_stru_p->port_packets[1], + global_flag_stru_p->port_packets[2]); + rte_spinlock_unlock(&global_flag_stru_p->lock); +} + +cmdline_parse_token_string_t cmd_show_show = + TOKEN_STRING_INITIALIZER(struct cmd_show_result, show, "show"); + +cmdline_parse_inst_t cmd_show = { + .f = cmd_show_parsed, /* function to call */ + .data = NULL, /* 2nd arg of func */ + .help_str = "this command do not handle any arguments", + .tokens = { /* token list, NULL terminated */ + (void *)&cmd_show_show, + NULL, + }, +}; + +/****** CONTEXT (list of instruction) */ + +cmdline_parse_ctx_t main_ctx[] = { + (cmdline_parse_inst_t *)&cmd_start, + (cmdline_parse_inst_t *)&cmd_obj_send, + (cmdline_parse_inst_t *)&cmd_stop, + (cmdline_parse_inst_t *)&cmd_show, + (cmdline_parse_inst_t *)&cmd_quit, + (cmdline_parse_inst_t *)&cmd_help, + NULL, +}; + +/* prompt function, called from main on MASTER lcore */ +static void prompt(__attribute__((unused)) void *arg1) +{ + struct cmdline *cl; + + cl = cmdline_stdin_new(main_ctx, "bond6>"); + if (cl != NULL) { + cmdline_interact(cl); + cmdline_stdin_exit(cl); + } +} + +/* Main function, does initialisation and calls the per-lcore functions */ +int +main(int argc, char *argv[]) +{ + int ret; + uint8_t nb_ports, i; + + /* init EAL */ + ret = rte_eal_init(argc, argv); + rte_eal_devargs_dump(stdout); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Error with EAL initialization\n"); + argc -= ret; + argv += ret; + + nb_ports = rte_eth_dev_count(); + if (nb_ports == 0) + rte_exit(EXIT_FAILURE, "Give at least one port\n"); + else if (nb_ports > MAX_PORTS) + rte_exit(EXIT_FAILURE, "You can have max 4 ports\n"); + + mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL", NB_MBUF, 32, + 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id()); + if (mbuf_pool == NULL) + rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n"); + + /* initialize all ports */ + slaves_count = nb_ports; + for (i = 0; i < nb_ports; i++) { + slave_port_init(i, mbuf_pool); + slaves[i] = i; + } + + bond_port_init(mbuf_pool); + + rte_spinlock_init(&global_flag_stru_p->lock); + int slave_core_id = rte_lcore_id(); + + /* check state of lcores */ + RTE_LCORE_FOREACH_SLAVE(slave_core_id) { + if (lcore_config[slave_core_id].state != WAIT) + return -EBUSY; + } + /* start lcore main on core != master_core - ARP response thread */ + slave_core_id = rte_get_next_lcore(rte_lcore_id(), 1, 0); + if ((slave_core_id >= RTE_MAX_LCORE) || (slave_core_id == 0)) + return -EPERM; + + global_flag_stru_p->LcoreMainIsRunning = 1; + global_flag_stru_p->LcoreMainCore = slave_core_id; + printf("Starting lcore_main on core %d:%d Our IP:%d.%d.%d.%d\n", + slave_core_id, + rte_eal_remote_launch((lcore_function_t *)lcore_main, + NULL, + slave_core_id), + BOND_IP_1, + BOND_IP_2, + BOND_IP_3, + BOND_IP_4 + ); + + /* Start prompt for user interact */ + prompt(NULL); + + rte_delay_ms(100); + return 0; +} diff --git a/examples/bond/main.h b/examples/bond/main.h new file mode 100644 index 00000000..ea331e56 --- /dev/null +++ b/examples/bond/main.h @@ -0,0 +1,39 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _MAIN_H_ +#define _MAIN_H_ + +int main(int argc, char *argv[]); + +#endif /* ifndef _MAIN_H_ */ diff --git a/examples/cmdline/Makefile b/examples/cmdline/Makefile new file mode 100644 index 00000000..9ebe4355 --- /dev/null +++ b/examples/cmdline/Makefile @@ -0,0 +1,51 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overriden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +# binary name +APP = cmdline + +# all source are stored in SRCS-y +SRCS-y := main.c commands.c parse_obj_list.c + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) +CFLAGS_parse_obj_list.o := -D_GNU_SOURCE + +include $(RTE_SDK)/mk/rte.extapp.mk diff --git a/examples/cmdline/commands.c b/examples/cmdline/commands.c new file mode 100644 index 00000000..f3ba2476 --- /dev/null +++ b/examples/cmdline/commands.c @@ -0,0 +1,283 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Copyright (c) 2009, Olivier MATZ <zer0@droids-corp.org> + * All rights reserved. + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University of California, Berkeley nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdio.h> +#include <stdint.h> +#include <string.h> +#include <stdlib.h> +#include <stdarg.h> +#include <errno.h> +#include <netinet/in.h> +#include <termios.h> +#ifndef __linux__ + #ifdef __FreeBSD__ + #include <sys/socket.h> + #else + #include <net/socket.h> + #endif +#endif + +#include <cmdline_rdline.h> +#include <cmdline_parse.h> +#include <cmdline_parse_ipaddr.h> +#include <cmdline_parse_num.h> +#include <cmdline_parse_string.h> +#include <cmdline.h> + +#include <rte_string_fns.h> + +#include "parse_obj_list.h" + +struct object_list global_obj_list; + +/* not defined under linux */ +#ifndef NIPQUAD +#define NIPQUAD_FMT "%u.%u.%u.%u" +#define NIPQUAD(addr) \ + (unsigned)((unsigned char *)&addr)[0], \ + (unsigned)((unsigned char *)&addr)[1], \ + (unsigned)((unsigned char *)&addr)[2], \ + (unsigned)((unsigned char *)&addr)[3] +#endif + +#ifndef NIP6 +#define NIP6_FMT "%02x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x" +#define NIP6(addr) \ + (unsigned)((addr).s6_addr[0]), \ + (unsigned)((addr).s6_addr[1]), \ + (unsigned)((addr).s6_addr[2]), \ + (unsigned)((addr).s6_addr[3]), \ + (unsigned)((addr).s6_addr[4]), \ + (unsigned)((addr).s6_addr[5]), \ + (unsigned)((addr).s6_addr[6]), \ + (unsigned)((addr).s6_addr[7]), \ + (unsigned)((addr).s6_addr[8]), \ + (unsigned)((addr).s6_addr[9]), \ + (unsigned)((addr).s6_addr[10]), \ + (unsigned)((addr).s6_addr[11]), \ + (unsigned)((addr).s6_addr[12]), \ + (unsigned)((addr).s6_addr[13]), \ + (unsigned)((addr).s6_addr[14]), \ + (unsigned)((addr).s6_addr[15]) +#endif + + +/**********************************************************/ + +struct cmd_obj_del_show_result { + cmdline_fixed_string_t action; + struct object *obj; +}; + +static void cmd_obj_del_show_parsed(void *parsed_result, + struct cmdline *cl, + __attribute__((unused)) void *data) +{ + struct cmd_obj_del_show_result *res = parsed_result; + char ip_str[INET6_ADDRSTRLEN]; + + if (res->obj->ip.family == AF_INET) + snprintf(ip_str, sizeof(ip_str), NIPQUAD_FMT, + NIPQUAD(res->obj->ip.addr.ipv4)); + else + snprintf(ip_str, sizeof(ip_str), NIP6_FMT, + NIP6(res->obj->ip.addr.ipv6)); + + if (strcmp(res->action, "del") == 0) { + SLIST_REMOVE(&global_obj_list, res->obj, object, next); + cmdline_printf(cl, "Object %s removed, ip=%s\n", + res->obj->name, ip_str); + free(res->obj); + } + else if (strcmp(res->action, "show") == 0) { + cmdline_printf(cl, "Object %s, ip=%s\n", + res->obj->name, ip_str); + } +} + +cmdline_parse_token_string_t cmd_obj_action = + TOKEN_STRING_INITIALIZER(struct cmd_obj_del_show_result, + action, "show#del"); +parse_token_obj_list_t cmd_obj_obj = + TOKEN_OBJ_LIST_INITIALIZER(struct cmd_obj_del_show_result, obj, + &global_obj_list); + +cmdline_parse_inst_t cmd_obj_del_show = { + .f = cmd_obj_del_show_parsed, /* function to call */ + .data = NULL, /* 2nd arg of func */ + .help_str = "Show/del an object", + .tokens = { /* token list, NULL terminated */ + (void *)&cmd_obj_action, + (void *)&cmd_obj_obj, + NULL, + }, +}; + +/**********************************************************/ + +struct cmd_obj_add_result { + cmdline_fixed_string_t action; + cmdline_fixed_string_t name; + cmdline_ipaddr_t ip; +}; + +static void cmd_obj_add_parsed(void *parsed_result, + struct cmdline *cl, + __attribute__((unused)) void *data) +{ + struct cmd_obj_add_result *res = parsed_result; + struct object *o; + char ip_str[INET6_ADDRSTRLEN]; + + SLIST_FOREACH(o, &global_obj_list, next) { + if (!strcmp(res->name, o->name)) { + cmdline_printf(cl, "Object %s already exist\n", res->name); + return; + } + break; + } + + o = malloc(sizeof(*o)); + if (!o) { + cmdline_printf(cl, "mem error\n"); + return; + } + snprintf(o->name, sizeof(o->name), "%s", res->name); + o->ip = res->ip; + SLIST_INSERT_HEAD(&global_obj_list, o, next); + + if (o->ip.family == AF_INET) + snprintf(ip_str, sizeof(ip_str), NIPQUAD_FMT, + NIPQUAD(o->ip.addr.ipv4)); + else + snprintf(ip_str, sizeof(ip_str), NIP6_FMT, + NIP6(o->ip.addr.ipv6)); + + cmdline_printf(cl, "Object %s added, ip=%s\n", + o->name, ip_str); +} + +cmdline_parse_token_string_t cmd_obj_action_add = + TOKEN_STRING_INITIALIZER(struct cmd_obj_add_result, action, "add"); +cmdline_parse_token_string_t cmd_obj_name = + TOKEN_STRING_INITIALIZER(struct cmd_obj_add_result, name, NULL); +cmdline_parse_token_ipaddr_t cmd_obj_ip = + TOKEN_IPADDR_INITIALIZER(struct cmd_obj_add_result, ip); + +cmdline_parse_inst_t cmd_obj_add = { + .f = cmd_obj_add_parsed, /* function to call */ + .data = NULL, /* 2nd arg of func */ + .help_str = "Add an object (name, val)", + .tokens = { /* token list, NULL terminated */ + (void *)&cmd_obj_action_add, + (void *)&cmd_obj_name, + (void *)&cmd_obj_ip, + NULL, + }, +}; + +/**********************************************************/ + +struct cmd_help_result { + cmdline_fixed_string_t help; +}; + +static void cmd_help_parsed(__attribute__((unused)) void *parsed_result, + struct cmdline *cl, + __attribute__((unused)) void *data) +{ + cmdline_printf(cl, + "Demo example of command line interface in RTE\n\n" + "This is a readline-like interface that can be used to\n" + "debug your RTE application. It supports some features\n" + "of GNU readline like completion, cut/paste, and some\n" + "other special bindings.\n\n" + "This demo shows how rte_cmdline library can be\n" + "extended to handle a list of objects. There are\n" + "3 commands:\n" + "- add obj_name IP\n" + "- del obj_name\n" + "- show obj_name\n\n"); +} + +cmdline_parse_token_string_t cmd_help_help = + TOKEN_STRING_INITIALIZER(struct cmd_help_result, help, "help"); + +cmdline_parse_inst_t cmd_help = { + .f = cmd_help_parsed, /* function to call */ + .data = NULL, /* 2nd arg of func */ + .help_str = "show help", + .tokens = { /* token list, NULL terminated */ + (void *)&cmd_help_help, + NULL, + }, +}; + + +/**********************************************************/ +/**********************************************************/ +/****** CONTEXT (list of instruction) */ + +cmdline_parse_ctx_t main_ctx[] = { + (cmdline_parse_inst_t *)&cmd_obj_del_show, + (cmdline_parse_inst_t *)&cmd_obj_add, + (cmdline_parse_inst_t *)&cmd_help, + NULL, +}; diff --git a/examples/cmdline/commands.h b/examples/cmdline/commands.h new file mode 100644 index 00000000..712894b1 --- /dev/null +++ b/examples/cmdline/commands.h @@ -0,0 +1,39 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _COMMANDS_H_ +#define _COMMANDS_H_ + +extern cmdline_parse_ctx_t main_ctx[]; + +#endif /* _COMMANDS_H_ */ diff --git a/examples/cmdline/main.c b/examples/cmdline/main.c new file mode 100644 index 00000000..c966df03 --- /dev/null +++ b/examples/cmdline/main.c @@ -0,0 +1,96 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Copyright (c) 2009, Olivier MATZ <zer0@droids-corp.org> + * All rights reserved. + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University of California, Berkeley nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdio.h> +#include <string.h> +#include <stdint.h> +#include <errno.h> +#include <termios.h> +#include <sys/queue.h> + +#include <cmdline_rdline.h> +#include <cmdline_parse.h> +#include <cmdline_socket.h> +#include <cmdline.h> + +#include <rte_memory.h> +#include <rte_memzone.h> +#include <rte_eal.h> +#include <rte_debug.h> + +#include "commands.h" + +int main(int argc, char **argv) +{ + int ret; + struct cmdline *cl; + + ret = rte_eal_init(argc, argv); + if (ret < 0) + rte_panic("Cannot init EAL\n"); + + cl = cmdline_stdin_new(main_ctx, "example> "); + if (cl == NULL) + rte_panic("Cannot create cmdline instance\n"); + cmdline_interact(cl); + cmdline_stdin_exit(cl); + + return 0; +} diff --git a/examples/cmdline/parse_obj_list.c b/examples/cmdline/parse_obj_list.c new file mode 100644 index 00000000..cdbaf2fe --- /dev/null +++ b/examples/cmdline/parse_obj_list.c @@ -0,0 +1,166 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Copyright (c) 2009, Olivier MATZ <zer0@droids-corp.org> + * All rights reserved. + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University of California, Berkeley nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdio.h> +#include <inttypes.h> +#include <stdarg.h> +#include <errno.h> +#include <ctype.h> +#include <string.h> +#include <netinet/in.h> + +#include <cmdline_parse.h> +#include <cmdline_parse_ipaddr.h> + +#include <rte_string_fns.h> + +#include "parse_obj_list.h" + +/* This file is an example of extension of libcmdline. It provides an + * example of objects stored in a list. */ + +struct cmdline_token_ops token_obj_list_ops = { + .parse = parse_obj_list, + .complete_get_nb = complete_get_nb_obj_list, + .complete_get_elt = complete_get_elt_obj_list, + .get_help = get_help_obj_list, +}; + +int +parse_obj_list(cmdline_parse_token_hdr_t *tk, const char *buf, void *res, + unsigned ressize) +{ + struct token_obj_list *tk2 = (struct token_obj_list *)tk; + struct token_obj_list_data *tkd = &tk2->obj_list_data; + struct object *o; + unsigned int token_len = 0; + + if (*buf == 0) + return -1; + + if (res && ressize < sizeof(struct object *)) + return -1; + + while(!cmdline_isendoftoken(buf[token_len])) + token_len++; + + SLIST_FOREACH(o, tkd->list, next) { + if (token_len != strnlen(o->name, OBJ_NAME_LEN_MAX)) + continue; + if (strncmp(buf, o->name, token_len)) + continue; + break; + } + if (!o) /* not found */ + return -1; + + /* store the address of object in structure */ + if (res) + *(struct object **)res = o; + + return token_len; +} + +int complete_get_nb_obj_list(cmdline_parse_token_hdr_t *tk) +{ + struct token_obj_list *tk2 = (struct token_obj_list *)tk; + struct token_obj_list_data *tkd = &tk2->obj_list_data; + struct object *o; + int ret = 0; + + SLIST_FOREACH(o, tkd->list, next) { + ret ++; + } + return ret; +} + +int complete_get_elt_obj_list(cmdline_parse_token_hdr_t *tk, + int idx, char *dstbuf, unsigned int size) +{ + struct token_obj_list *tk2 = (struct token_obj_list *)tk; + struct token_obj_list_data *tkd = &tk2->obj_list_data; + struct object *o; + int i = 0; + unsigned len; + + SLIST_FOREACH(o, tkd->list, next) { + if (i++ == idx) + break; + } + if (!o) + return -1; + + len = strnlen(o->name, OBJ_NAME_LEN_MAX); + if ((len + 1) > size) + return -1; + + if (dstbuf) + snprintf(dstbuf, size, "%s", o->name); + + return 0; +} + + +int get_help_obj_list(__attribute__((unused)) cmdline_parse_token_hdr_t *tk, + char *dstbuf, unsigned int size) +{ + snprintf(dstbuf, size, "Obj-List"); + return 0; +} diff --git a/examples/cmdline/parse_obj_list.h b/examples/cmdline/parse_obj_list.h new file mode 100644 index 00000000..871c53a4 --- /dev/null +++ b/examples/cmdline/parse_obj_list.h @@ -0,0 +1,112 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Copyright (c) 2009, Olivier MATZ <zer0@droids-corp.org> + * All rights reserved. + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University of California, Berkeley nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _PARSE_OBJ_LIST_H_ +#define _PARSE_OBJ_LIST_H_ + +/* This file is an example of extension of libcmdline. It provides an + * example of objects stored in a list. */ + +#include <sys/queue.h> +#include <cmdline_parse.h> + +#define OBJ_NAME_LEN_MAX 64 + +struct object { + SLIST_ENTRY(object) next; + char name[OBJ_NAME_LEN_MAX]; + cmdline_ipaddr_t ip; +}; + +/* define struct object_list */ +SLIST_HEAD(object_list, object); + +/* data is a pointer to a list */ +struct token_obj_list_data { + struct object_list *list; +}; + +struct token_obj_list { + struct cmdline_token_hdr hdr; + struct token_obj_list_data obj_list_data; +}; +typedef struct token_obj_list parse_token_obj_list_t; + +extern struct cmdline_token_ops token_obj_list_ops; + +int parse_obj_list(cmdline_parse_token_hdr_t *tk, const char *srcbuf, void *res, + unsigned ressize); +int complete_get_nb_obj_list(cmdline_parse_token_hdr_t *tk); +int complete_get_elt_obj_list(cmdline_parse_token_hdr_t *tk, int idx, + char *dstbuf, unsigned int size); +int get_help_obj_list(cmdline_parse_token_hdr_t *tk, char *dstbuf, unsigned int size); + +#define TOKEN_OBJ_LIST_INITIALIZER(structure, field, obj_list_ptr) \ +{ \ + .hdr = { \ + .ops = &token_obj_list_ops, \ + .offset = offsetof(structure, field), \ + }, \ + .obj_list_data = { \ + .list = obj_list_ptr, \ + }, \ +} + +#endif /* _PARSE_OBJ_LIST_H_ */ diff --git a/examples/distributor/Makefile b/examples/distributor/Makefile new file mode 100644 index 00000000..6a5badaa --- /dev/null +++ b/examples/distributor/Makefile @@ -0,0 +1,57 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overriden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +# binary name +APP = distributor_app + +# all source are stored in SRCS-y +SRCS-y := main.c + +CFLAGS += $(WERROR_FLAGS) + +# workaround for a gcc bug with noreturn attribute +# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603 +ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y) +CFLAGS_main.o += -Wno-return-type +endif + +EXTRA_CFLAGS += -O3 -Wfatal-errors + +include $(RTE_SDK)/mk/rte.extapp.mk diff --git a/examples/distributor/main.c b/examples/distributor/main.c new file mode 100644 index 00000000..c0201a9e --- /dev/null +++ b/examples/distributor/main.c @@ -0,0 +1,600 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdint.h> +#include <inttypes.h> +#include <unistd.h> +#include <signal.h> +#include <getopt.h> + +#include <rte_eal.h> +#include <rte_ethdev.h> +#include <rte_cycles.h> +#include <rte_malloc.h> +#include <rte_debug.h> +#include <rte_prefetch.h> +#include <rte_distributor.h> + +#define RX_RING_SIZE 256 +#define TX_RING_SIZE 512 +#define NUM_MBUFS ((64*1024)-1) +#define MBUF_CACHE_SIZE 250 +#define BURST_SIZE 32 +#define RTE_RING_SZ 1024 + +/* uncommnet below line to enable debug logs */ +/* #define DEBUG */ + +#ifdef DEBUG +#define LOG_LEVEL RTE_LOG_DEBUG +#define LOG_DEBUG(log_type, fmt, args...) do { \ + RTE_LOG(DEBUG, log_type, fmt, ##args); \ +} while (0) +#else +#define LOG_LEVEL RTE_LOG_INFO +#define LOG_DEBUG(log_type, fmt, args...) do {} while (0) +#endif + +#define RTE_LOGTYPE_DISTRAPP RTE_LOGTYPE_USER1 + +/* mask of enabled ports */ +static uint32_t enabled_port_mask; +volatile uint8_t quit_signal; +volatile uint8_t quit_signal_rx; + +static volatile struct app_stats { + struct { + uint64_t rx_pkts; + uint64_t returned_pkts; + uint64_t enqueued_pkts; + } rx __rte_cache_aligned; + + struct { + uint64_t dequeue_pkts; + uint64_t tx_pkts; + } tx __rte_cache_aligned; +} app_stats; + +static const struct rte_eth_conf port_conf_default = { + .rxmode = { + .mq_mode = ETH_MQ_RX_RSS, + .max_rx_pkt_len = ETHER_MAX_LEN, + }, + .txmode = { + .mq_mode = ETH_MQ_TX_NONE, + }, + .rx_adv_conf = { + .rss_conf = { + .rss_hf = ETH_RSS_IP | ETH_RSS_UDP | + ETH_RSS_TCP | ETH_RSS_SCTP, + } + }, +}; + +struct output_buffer { + unsigned count; + struct rte_mbuf *mbufs[BURST_SIZE]; +}; + +/* + * Initialises a given port using global settings and with the rx buffers + * coming from the mbuf_pool passed as parameter + */ +static inline int +port_init(uint8_t port, struct rte_mempool *mbuf_pool) +{ + struct rte_eth_conf port_conf = port_conf_default; + const uint16_t rxRings = 1, txRings = rte_lcore_count() - 1; + int retval; + uint16_t q; + + if (port >= rte_eth_dev_count()) + return -1; + + retval = rte_eth_dev_configure(port, rxRings, txRings, &port_conf); + if (retval != 0) + return retval; + + for (q = 0; q < rxRings; q++) { + retval = rte_eth_rx_queue_setup(port, q, RX_RING_SIZE, + rte_eth_dev_socket_id(port), + NULL, mbuf_pool); + if (retval < 0) + return retval; + } + + for (q = 0; q < txRings; q++) { + retval = rte_eth_tx_queue_setup(port, q, TX_RING_SIZE, + rte_eth_dev_socket_id(port), + NULL); + if (retval < 0) + return retval; + } + + retval = rte_eth_dev_start(port); + if (retval < 0) + return retval; + + struct rte_eth_link link; + rte_eth_link_get_nowait(port, &link); + if (!link.link_status) { + sleep(1); + rte_eth_link_get_nowait(port, &link); + } + + if (!link.link_status) { + printf("Link down on port %"PRIu8"\n", port); + return 0; + } + + struct ether_addr addr; + rte_eth_macaddr_get(port, &addr); + printf("Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8 + " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n", + (unsigned)port, + addr.addr_bytes[0], addr.addr_bytes[1], + addr.addr_bytes[2], addr.addr_bytes[3], + addr.addr_bytes[4], addr.addr_bytes[5]); + + rte_eth_promiscuous_enable(port); + + return 0; +} + +struct lcore_params { + unsigned worker_id; + struct rte_distributor *d; + struct rte_ring *r; + struct rte_mempool *mem_pool; +}; + +static void +quit_workers(struct rte_distributor *d, struct rte_mempool *p) +{ + const unsigned num_workers = rte_lcore_count() - 2; + unsigned i; + struct rte_mbuf *bufs[num_workers]; + rte_mempool_get_bulk(p, (void *)bufs, num_workers); + + for (i = 0; i < num_workers; i++) + bufs[i]->hash.rss = i << 1; + + rte_distributor_process(d, bufs, num_workers); + rte_mempool_put_bulk(p, (void *)bufs, num_workers); +} + +static int +lcore_rx(struct lcore_params *p) +{ + struct rte_distributor *d = p->d; + struct rte_mempool *mem_pool = p->mem_pool; + struct rte_ring *r = p->r; + const uint8_t nb_ports = rte_eth_dev_count(); + const int socket_id = rte_socket_id(); + uint8_t port; + + for (port = 0; port < nb_ports; port++) { + /* skip ports that are not enabled */ + if ((enabled_port_mask & (1 << port)) == 0) + continue; + + if (rte_eth_dev_socket_id(port) > 0 && + rte_eth_dev_socket_id(port) != socket_id) + printf("WARNING, port %u is on remote NUMA node to " + "RX thread.\n\tPerformance will not " + "be optimal.\n", port); + } + + printf("\nCore %u doing packet RX.\n", rte_lcore_id()); + port = 0; + while (!quit_signal_rx) { + + /* skip ports that are not enabled */ + if ((enabled_port_mask & (1 << port)) == 0) { + if (++port == nb_ports) + port = 0; + continue; + } + struct rte_mbuf *bufs[BURST_SIZE*2]; + const uint16_t nb_rx = rte_eth_rx_burst(port, 0, bufs, + BURST_SIZE); + app_stats.rx.rx_pkts += nb_rx; + + rte_distributor_process(d, bufs, nb_rx); + const uint16_t nb_ret = rte_distributor_returned_pkts(d, + bufs, BURST_SIZE*2); + app_stats.rx.returned_pkts += nb_ret; + if (unlikely(nb_ret == 0)) + continue; + + uint16_t sent = rte_ring_enqueue_burst(r, (void *)bufs, nb_ret); + app_stats.rx.enqueued_pkts += sent; + if (unlikely(sent < nb_ret)) { + LOG_DEBUG(DISTRAPP, "%s:Packet loss due to full ring\n", __func__); + while (sent < nb_ret) + rte_pktmbuf_free(bufs[sent++]); + } + if (++port == nb_ports) + port = 0; + } + rte_distributor_process(d, NULL, 0); + /* flush distributor to bring to known state */ + rte_distributor_flush(d); + /* set worker & tx threads quit flag */ + quit_signal = 1; + /* + * worker threads may hang in get packet as + * distributor process is not running, just make sure workers + * get packets till quit_signal is actually been + * received and they gracefully shutdown + */ + quit_workers(d, mem_pool); + /* rx thread should quit at last */ + return 0; +} + +static inline void +flush_one_port(struct output_buffer *outbuf, uint8_t outp) +{ + unsigned nb_tx = rte_eth_tx_burst(outp, 0, outbuf->mbufs, + outbuf->count); + app_stats.tx.tx_pkts += nb_tx; + + if (unlikely(nb_tx < outbuf->count)) { + LOG_DEBUG(DISTRAPP, "%s:Packet loss with tx_burst\n", __func__); + do { + rte_pktmbuf_free(outbuf->mbufs[nb_tx]); + } while (++nb_tx < outbuf->count); + } + outbuf->count = 0; +} + +static inline void +flush_all_ports(struct output_buffer *tx_buffers, uint8_t nb_ports) +{ + uint8_t outp; + for (outp = 0; outp < nb_ports; outp++) { + /* skip ports that are not enabled */ + if ((enabled_port_mask & (1 << outp)) == 0) + continue; + + if (tx_buffers[outp].count == 0) + continue; + + flush_one_port(&tx_buffers[outp], outp); + } +} + +static int +lcore_tx(struct rte_ring *in_r) +{ + static struct output_buffer tx_buffers[RTE_MAX_ETHPORTS]; + const uint8_t nb_ports = rte_eth_dev_count(); + const int socket_id = rte_socket_id(); + uint8_t port; + + for (port = 0; port < nb_ports; port++) { + /* skip ports that are not enabled */ + if ((enabled_port_mask & (1 << port)) == 0) + continue; + + if (rte_eth_dev_socket_id(port) > 0 && + rte_eth_dev_socket_id(port) != socket_id) + printf("WARNING, port %u is on remote NUMA node to " + "TX thread.\n\tPerformance will not " + "be optimal.\n", port); + } + + printf("\nCore %u doing packet TX.\n", rte_lcore_id()); + while (!quit_signal) { + + for (port = 0; port < nb_ports; port++) { + /* skip ports that are not enabled */ + if ((enabled_port_mask & (1 << port)) == 0) + continue; + + struct rte_mbuf *bufs[BURST_SIZE]; + const uint16_t nb_rx = rte_ring_dequeue_burst(in_r, + (void *)bufs, BURST_SIZE); + app_stats.tx.dequeue_pkts += nb_rx; + + /* if we get no traffic, flush anything we have */ + if (unlikely(nb_rx == 0)) { + flush_all_ports(tx_buffers, nb_ports); + continue; + } + + /* for traffic we receive, queue it up for transmit */ + uint16_t i; + rte_prefetch_non_temporal((void *)bufs[0]); + rte_prefetch_non_temporal((void *)bufs[1]); + rte_prefetch_non_temporal((void *)bufs[2]); + for (i = 0; i < nb_rx; i++) { + struct output_buffer *outbuf; + uint8_t outp; + rte_prefetch_non_temporal((void *)bufs[i + 3]); + /* + * workers should update in_port to hold the + * output port value + */ + outp = bufs[i]->port; + /* skip ports that are not enabled */ + if ((enabled_port_mask & (1 << outp)) == 0) + continue; + + outbuf = &tx_buffers[outp]; + outbuf->mbufs[outbuf->count++] = bufs[i]; + if (outbuf->count == BURST_SIZE) + flush_one_port(outbuf, outp); + } + } + } + return 0; +} + +static void +int_handler(int sig_num) +{ + printf("Exiting on signal %d\n", sig_num); + /* set quit flag for rx thread to exit */ + quit_signal_rx = 1; +} + +static void +print_stats(void) +{ + struct rte_eth_stats eth_stats; + unsigned i; + + printf("\nRX thread stats:\n"); + printf(" - Received: %"PRIu64"\n", app_stats.rx.rx_pkts); + printf(" - Processed: %"PRIu64"\n", app_stats.rx.returned_pkts); + printf(" - Enqueued: %"PRIu64"\n", app_stats.rx.enqueued_pkts); + + printf("\nTX thread stats:\n"); + printf(" - Dequeued: %"PRIu64"\n", app_stats.tx.dequeue_pkts); + printf(" - Transmitted: %"PRIu64"\n", app_stats.tx.tx_pkts); + + for (i = 0; i < rte_eth_dev_count(); i++) { + rte_eth_stats_get(i, ð_stats); + printf("\nPort %u stats:\n", i); + printf(" - Pkts in: %"PRIu64"\n", eth_stats.ipackets); + printf(" - Pkts out: %"PRIu64"\n", eth_stats.opackets); + printf(" - In Errs: %"PRIu64"\n", eth_stats.ierrors); + printf(" - Out Errs: %"PRIu64"\n", eth_stats.oerrors); + printf(" - Mbuf Errs: %"PRIu64"\n", eth_stats.rx_nombuf); + } +} + +static int +lcore_worker(struct lcore_params *p) +{ + struct rte_distributor *d = p->d; + const unsigned id = p->worker_id; + /* + * for single port, xor_val will be zero so we won't modify the output + * port, otherwise we send traffic from 0 to 1, 2 to 3, and vice versa + */ + const unsigned xor_val = (rte_eth_dev_count() > 1); + struct rte_mbuf *buf = NULL; + + printf("\nCore %u acting as worker core.\n", rte_lcore_id()); + while (!quit_signal) { + buf = rte_distributor_get_pkt(d, id, buf); + buf->port ^= xor_val; + } + return 0; +} + +/* display usage */ +static void +print_usage(const char *prgname) +{ + printf("%s [EAL options] -- -p PORTMASK\n" + " -p PORTMASK: hexadecimal bitmask of ports to configure\n", + prgname); +} + +static int +parse_portmask(const char *portmask) +{ + char *end = NULL; + unsigned long pm; + + /* parse hexadecimal string */ + pm = strtoul(portmask, &end, 16); + if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0')) + return -1; + + if (pm == 0) + return -1; + + return pm; +} + +/* Parse the argument given in the command line of the application */ +static int +parse_args(int argc, char **argv) +{ + int opt; + char **argvopt; + int option_index; + char *prgname = argv[0]; + static struct option lgopts[] = { + {NULL, 0, 0, 0} + }; + + argvopt = argv; + + while ((opt = getopt_long(argc, argvopt, "p:", + lgopts, &option_index)) != EOF) { + + switch (opt) { + /* portmask */ + case 'p': + enabled_port_mask = parse_portmask(optarg); + if (enabled_port_mask == 0) { + printf("invalid portmask\n"); + print_usage(prgname); + return -1; + } + break; + + default: + print_usage(prgname); + return -1; + } + } + + if (optind <= 1) { + print_usage(prgname); + return -1; + } + + argv[optind-1] = prgname; + + optind = 0; /* reset getopt lib */ + return 0; +} + +/* Main function, does initialization and calls the per-lcore functions */ +int +main(int argc, char *argv[]) +{ + struct rte_mempool *mbuf_pool; + struct rte_distributor *d; + struct rte_ring *output_ring; + unsigned lcore_id, worker_id = 0; + unsigned nb_ports; + uint8_t portid; + uint8_t nb_ports_available; + + /* catch ctrl-c so we can print on exit */ + signal(SIGINT, int_handler); + + /* init EAL */ + int ret = rte_eal_init(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Error with EAL initialization\n"); + argc -= ret; + argv += ret; + + /* parse application arguments (after the EAL ones) */ + ret = parse_args(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Invalid distributor parameters\n"); + + if (rte_lcore_count() < 3) + rte_exit(EXIT_FAILURE, "Error, This application needs at " + "least 3 logical cores to run:\n" + "1 lcore for packet RX and distribution\n" + "1 lcore for packet TX\n" + "and at least 1 lcore for worker threads\n"); + + nb_ports = rte_eth_dev_count(); + if (nb_ports == 0) + rte_exit(EXIT_FAILURE, "Error: no ethernet ports detected\n"); + if (nb_ports != 1 && (nb_ports & 1)) + rte_exit(EXIT_FAILURE, "Error: number of ports must be even, except " + "when using a single port\n"); + + mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL", + NUM_MBUFS * nb_ports, MBUF_CACHE_SIZE, 0, + RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id()); + if (mbuf_pool == NULL) + rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n"); + nb_ports_available = nb_ports; + + /* initialize all ports */ + for (portid = 0; portid < nb_ports; portid++) { + /* skip ports that are not enabled */ + if ((enabled_port_mask & (1 << portid)) == 0) { + printf("\nSkipping disabled port %d\n", portid); + nb_ports_available--; + continue; + } + /* init port */ + printf("Initializing port %u... done\n", (unsigned) portid); + + if (port_init(portid, mbuf_pool) != 0) + rte_exit(EXIT_FAILURE, "Cannot initialize port %"PRIu8"\n", + portid); + } + + if (!nb_ports_available) { + rte_exit(EXIT_FAILURE, + "All available ports are disabled. Please set portmask.\n"); + } + + d = rte_distributor_create("PKT_DIST", rte_socket_id(), + rte_lcore_count() - 2); + if (d == NULL) + rte_exit(EXIT_FAILURE, "Cannot create distributor\n"); + + /* + * scheduler ring is read only by the transmitter core, but written to + * by multiple threads + */ + output_ring = rte_ring_create("Output_ring", RTE_RING_SZ, + rte_socket_id(), RING_F_SC_DEQ); + if (output_ring == NULL) + rte_exit(EXIT_FAILURE, "Cannot create output ring\n"); + + RTE_LCORE_FOREACH_SLAVE(lcore_id) { + if (worker_id == rte_lcore_count() - 2) + rte_eal_remote_launch((lcore_function_t *)lcore_tx, + output_ring, lcore_id); + else { + struct lcore_params *p = + rte_malloc(NULL, sizeof(*p), 0); + if (!p) + rte_panic("malloc failure\n"); + *p = (struct lcore_params){worker_id, d, output_ring, mbuf_pool}; + + rte_eal_remote_launch((lcore_function_t *)lcore_worker, + p, lcore_id); + } + worker_id++; + } + /* call lcore_main on master core only */ + struct lcore_params p = { 0, d, output_ring, mbuf_pool}; + lcore_rx(&p); + + RTE_LCORE_FOREACH_SLAVE(lcore_id) { + if (rte_eal_wait_lcore(lcore_id) < 0) + return -1; + } + + print_stats(); + return 0; +} diff --git a/examples/dpdk_qat/Makefile b/examples/dpdk_qat/Makefile new file mode 100644 index 00000000..01d61bcf --- /dev/null +++ b/examples/dpdk_qat/Makefile @@ -0,0 +1,93 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2013 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +ifeq ($(ICP_ROOT),) +$(error "Please define ICP_ROOT environment variable") +endif + +# Default target, can be overriden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +ifneq ($(CONFIG_RTE_EXEC_ENV),"linuxapp") +$(error This application can only operate in a linuxapp environment, \ +please change the definition of the RTE_TARGET environment variable) +endif + +LBITS := $(shell uname -p) +ifeq ($(CROSS_COMPILE),) + ifneq ($(CONFIG_RTE_ARCH),"x86_64") + ifneq ($(LBITS),i686) + $(error The RTE_TARGET chosen is not compatible with this environment \ + (x86_64), for this application. Please change the definition of the \ + RTE_TARGET environment variable, or run the application on a i686 OS) + endif + endif +endif + +# binary name +APP = dpdk_qat + +# all source are stored in SRCS-y +SRCS-y := main.c crypto.c + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) +CFLAGS += -I$(ICP_ROOT)/quickassist/include \ + -I$(ICP_ROOT)/quickassist/include/lac \ + -I$(ICP_ROOT)/quickassist/lookaside/access_layer/include + +# From CRF 1.2 driver, library was renamed to libicp_qa_al.a +ifneq ($(wildcard $(ICP_ROOT)/build/icp_qa_al.a),) +ICP_LIBRARY_PATH = $(ICP_ROOT)/build/icp_qa_al.a +else +ICP_LIBRARY_PATH = $(ICP_ROOT)/build/libicp_qa_al.a +endif + +LDLIBS += -L$(ICP_ROOT)/build +LDLIBS += $(ICP_LIBRARY_PATH) \ + -lz \ + -losal \ + -ladf_proxy \ + -lcrypto + +# workaround for a gcc bug with noreturn attribute +# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603 +ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y) +CFLAGS_main.o += -Wno-return-type +endif + +include $(RTE_SDK)/mk/rte.extapp.mk diff --git a/examples/dpdk_qat/config_files/coleto/dh895xcc_qa_dev0.conf b/examples/dpdk_qat/config_files/coleto/dh895xcc_qa_dev0.conf new file mode 100644 index 00000000..fd139e2f --- /dev/null +++ b/examples/dpdk_qat/config_files/coleto/dh895xcc_qa_dev0.conf @@ -0,0 +1,65 @@ +[GENERAL] +ServicesEnabled = cy;dc +ConfigVersion = 2 +cyHmacAuthMode = 1 +dcTotalSRAMAvailable = 0 +Firmware_MofPath = dh895xcc/mof_firmware.bin +Firmware_MmpPath = dh895xcc/mmp_firmware.bin +statsGeneral = 1 +statsDc = 1 +statsDh = 1 +statsDrbg = 1 +statsDsa = 1 +statsEcc = 1 +statsKeyGen = 1 +statsLn = 1 +statsPrime = 1 +statsRsa = 1 +statsSym = 1 +SRIOV_Enabled = 0 +ProcDebug = 1 + +[KERNEL] +NumberCyInstances = 0 +NumberDcInstances = 0 + +[SSL] +NumberCyInstances = 8 +NumberDcInstances = 0 +NumProcesses = 1 +LimitDevAccess = 0 + +Cy0Name = "SSL0" +Cy0IsPolled = 1 +Cy0CoreAffinity = 0 + +Cy1Name = "SSL1" +Cy1IsPolled = 1 +Cy1CoreAffinity = 1 + +Cy2Name = "SSL2" +Cy2IsPolled = 1 +Cy2CoreAffinity = 2 + +Cy3Name = "SSL3" +Cy3IsPolled = 1 +Cy3CoreAffinity = 3 + + +Cy4Name = "SSL4" +Cy4IsPolled = 1 +Cy4CoreAffinity = 4 + + +Cy5Name = "SSL5" +Cy5IsPolled = 1 +Cy5CoreAffinity = 5 + +Cy6Name = "SSL6" +Cy6IsPolled = 1 +Cy6CoreAffinity = 6 + + +Cy7Name = "SSL7" +Cy7IsPolled = 1 +Cy7CoreAffinity = 7 diff --git a/examples/dpdk_qat/config_files/shumway/dh89xxcc_qa_dev0.conf b/examples/dpdk_qat/config_files/shumway/dh89xxcc_qa_dev0.conf new file mode 100644 index 00000000..9e1c1d11 --- /dev/null +++ b/examples/dpdk_qat/config_files/shumway/dh89xxcc_qa_dev0.conf @@ -0,0 +1,293 @@ +######################################################################### +# +# @par +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ######################################################################### +# ######################################################## +# +# This file is the configuration for a single dh89xxcc_qa +# device. +# +# Each device has up to two accelerators. +# - The client may load balance between these +# accelerators. +# Each accelerator has 8 independent ring banks. +# - The interrupt for each can be directed to a +# specific core. +# Each ring bank as 16 rings (hardware assisted queues). +# +######################################################### +# General Section +############################################## + +[GENERAL] +ServicesEnabled = cy0;cy1 + +# Use version 2 of the config file +ConfigVersion = 2 +# Look Aside Cryptographic Configuration +cyHmacAuthMode = 1 + +# Look Aside Compression Configuration +dcTotalSRAMAvailable = 0 + +# Firmware Location Configuration +Firmware_MofPath = mof_firmware.bin +Firmware_MmpPath = mmp_firmware.bin + +#Default values for number of concurrent requests*/ +CyNumConcurrentSymRequests = 512 +CyNumConcurrentAsymRequests = 64 +DcNumConcurrentRequests = 512 + +#Statistics, valid values: 1,0 +statsGeneral = 1 +statsDc = 1 +statsDh = 1 +statsDrbg = 1 +statsDsa = 1 +statsEcc = 1 +statsKeyGen = 1 +statsLn = 1 +statsPrime = 1 +statsRsa = 1 +statsSym = 1 + +# Enables or disables Single Root Complex IO Virtualization. +# If this is enabled (1) then SRIOV and VT-d need to be enabled in +# BIOS and there can be no Cy or Dc instances created in PF (Dom0). +# If this i disabled (0) then SRIOV and VT-d need to be disabled +# in BIOS and Cy and/or Dc instances can be used in PF (Dom0) +SRIOV_Enabled = 0 + +#Debug feature, if set to 1 it enables additional entries in /proc filesystem +ProcDebug = 1 + +####################################################### +# +# Logical Instances Section +# A logical instance allows each address domain +# (kernel space and individual user space processes) +# to configure rings (i.e. hardware assisted queues) +# to be used by that address domain and to define the +# behavior of that ring. +# +# The address domains are in the following format +# - For kernel address domains +# [KERNEL] +# - For user process address domains +# [xxxxx] +# Where xxxxx may be any ascii value which uniquely identifies +# the user mode process. +# To allow the driver correctly configure the +# logical instances associated with this user process, +# the process must call the icp_sal_userStartMultiProcess(...) +# passing the xxxxx string during process initialisation. +# When the user space process is finished it must call +# icp_sal_userStop(...) to free resources. +# NumProcesses will indicate the maximum number of processes +# that can call icp_sal_userStartMultiProcess on this instance. +# Warning: the resources are preallocated: if NumProcesses +# is too high, the driver will fail to load +# +# Items configurable by a logical instance are: +# - Name of the logical instance +# - The accelerator associated with this logical +# instance +# - The core the instance is affinitized to (optional) +# +# Note: Logical instances may not share the same ring, but +# may share a ring bank. +# +# The format of the logical instances are: +# - For crypto: +# Cy<n>Name = "xxxx" +# Cy<n>AcceleratorNumber = 0-3 +# Cy<n>CoreAffinity = 0-7 +# +# - For Data Compression +# Dc<n>Name = "xxxx" +# Dc<n>AcceleratorNumber = 0-1 +# Dc<n>CoreAffinity = 0-7 +# +# Where: +# - n is the number of this logical instance starting at 0. +# - xxxx may be any ascii value which identifies the logical instance. +# +# Note: for user space processes, a list of values can be specified for +# the accelerator number and the core affinity: for example +# Cy0AcceleratorNumber = 0,2 +# Cy0CoreAffinity = 0,2,4 +# These comma-separated lists will allow the multiple processes to use +# different accelerators and cores, and will wrap around the numbers +# in the list. In the above example, process 0 will use accelerator 0, +# and process 1 will use accelerator 2 +# +######################################################## + +############################################## +# Kernel Instances Section +############################################## +[KERNEL] +NumberCyInstances = 0 +NumberDcInstances = 0 + +############################################## +# User Process Instance Section +############################################## +[SSL] +NumberCyInstances = 16 +NumberDcInstances = 0 +NumProcesses = 1 +LimitDevAccess = 0 + +# Crypto - User instance #0 +Cy0Name = "SSL0" +Cy0IsPolled = 1 +Cy0AcceleratorNumber = 0 +# List of core affinities +Cy0CoreAffinity = 0 + +# Crypto - User instance #1 +Cy1Name = "SSL1" +Cy1IsPolled = 1 +Cy1AcceleratorNumber = 1 +# List of core affinities +Cy1CoreAffinity = 1 + +# Crypto - User instance #2 +Cy2Name = "SSL2" +Cy2IsPolled = 1 +Cy2AcceleratorNumber = 2 +# List of core affinities +Cy2CoreAffinity = 2 + +# Crypto - User instance #3 +Cy3Name = "SSL3" +Cy3IsPolled = 1 +Cy3AcceleratorNumber = 3 +# List of core affinities +Cy3CoreAffinity = 3 + +# Crypto - User instance #4 +Cy4Name = "SSL4" +Cy4IsPolled = 1 +Cy4AcceleratorNumber = 0 +# List of core affinities +Cy4CoreAffinity = 4 + +# Crypto - User instance #5 +Cy5Name = "SSL5" +Cy5IsPolled = 1 +Cy5AcceleratorNumber = 1 +# List of core affinities +Cy5CoreAffinity = 5 + +# Crypto - User instance #6 +Cy6Name = "SSL6" +Cy6IsPolled = 1 +Cy6AcceleratorNumber = 2 +# List of core affinities +Cy6CoreAffinity = 6 + +# Crypto - User instance #7 +Cy7Name = "SSL7" +Cy7IsPolled = 1 +Cy7AcceleratorNumber = 3 +# List of core affinities +Cy7CoreAffinity = 7 + +# Crypto - User instance #8 +Cy8Name = "SSL8" +Cy8IsPolled = 1 +Cy8AcceleratorNumber = 0 +# List of core affinities +Cy8CoreAffinity = 16 + +# Crypto - User instance #9 +Cy9Name = "SSL9" +Cy9IsPolled = 1 +Cy9AcceleratorNumber = 1 +# List of core affinities +Cy9CoreAffinity = 17 + +# Crypto - User instance #10 +Cy10Name = "SSL10" +Cy10IsPolled = 1 +Cy10AcceleratorNumber = 2 +# List of core affinities +Cy10CoreAffinity = 18 + +# Crypto - User instance #11 +Cy11Name = "SSL11" +Cy11IsPolled = 1 +Cy11AcceleratorNumber = 3 +# List of core affinities +Cy11CoreAffinity = 19 + +# Crypto - User instance #12 +Cy12Name = "SSL12" +Cy12IsPolled = 1 +Cy12AcceleratorNumber = 0 +# List of core affinities +Cy12CoreAffinity = 20 + +# Crypto - User instance #13 +Cy13Name = "SSL13" +Cy13IsPolled = 1 +Cy13AcceleratorNumber = 1 +# List of core affinities +Cy13CoreAffinity = 21 + +# Crypto - User instance #14 +Cy14Name = "SSL14" +Cy14IsPolled = 1 +Cy14AcceleratorNumber = 2 +# List of core affinities +Cy14CoreAffinity = 22 + +# Crypto - User instance #15 +Cy15Name = "SSL15" +Cy15IsPolled = 1 +Cy15AcceleratorNumber = 3 +# List of core affinities +Cy15CoreAffinity = 23 + + + +############################################## +# Wireless Process Instance Section +############################################## +[WIRELESS] +NumberCyInstances = 0 +NumberDcInstances = 0 +NumProcesses = 0 diff --git a/examples/dpdk_qat/config_files/shumway/dh89xxcc_qa_dev1.conf b/examples/dpdk_qat/config_files/shumway/dh89xxcc_qa_dev1.conf new file mode 100644 index 00000000..3e8d8b6b --- /dev/null +++ b/examples/dpdk_qat/config_files/shumway/dh89xxcc_qa_dev1.conf @@ -0,0 +1,292 @@ +######################################################################### +# +# @par +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ######################################################################### +# ######################################################## +# +# This file is the configuration for a single dh89xxcc_qa +# device. +# +# Each device has up to two accelerators. +# - The client may load balance between these +# accelerators. +# Each accelerator has 8 independent ring banks. +# - The interrupt for each can be directed to a +# specific core. +# Each ring bank as 16 rings (hardware assisted queues). +# +######################################################### +# General Section +############################################## + +[GENERAL] +ServicesEnabled = cy0;cy1 + +# Use version 2 of the config file +ConfigVersion = 2 +# Look Aside Cryptographic Configuration +cyHmacAuthMode = 1 + +# Look Aside Compression Configuration +dcTotalSRAMAvailable = 0 + +# Firmware Location Configuration +Firmware_MofPath = mof_firmware.bin +Firmware_MmpPath = mmp_firmware.bin + +#Default values for number of concurrent requests*/ +CyNumConcurrentSymRequests = 512 +CyNumConcurrentAsymRequests = 64 +DcNumConcurrentRequests = 512 + +#Statistics, valid values: 1,0 +statsGeneral = 1 +statsDc = 1 +statsDh = 1 +statsDrbg = 1 +statsDsa = 1 +statsEcc = 1 +statsKeyGen = 1 +statsLn = 1 +statsPrime = 1 +statsRsa = 1 +statsSym = 1 + +# Enables or disables Single Root Complex IO Virtualization. +# If this is enabled (1) then SRIOV and VT-d need to be enabled in +# BIOS and there can be no Cy or Dc instances created in PF (Dom0). +# If this i disabled (0) then SRIOV and VT-d need to be disabled +# in BIOS and Cy and/or Dc instances can be used in PF (Dom0) +SRIOV_Enabled = 0 + +#Debug feature, if set to 1 it enables additional entries in /proc filesystem +ProcDebug = 1 + +####################################################### +# +# Logical Instances Section +# A logical instance allows each address domain +# (kernel space and individual user space processes) +# to configure rings (i.e. hardware assisted queues) +# to be used by that address domain and to define the +# behavior of that ring. +# +# The address domains are in the following format +# - For kernel address domains +# [KERNEL] +# - For user process address domains +# [xxxxx] +# Where xxxxx may be any ascii value which uniquely identifies +# the user mode process. +# To allow the driver correctly configure the +# logical instances associated with this user process, +# the process must call the icp_sal_userStartMultiProcess(...) +# passing the xxxxx string during process initialisation. +# When the user space process is finished it must call +# icp_sal_userStop(...) to free resources. +# NumProcesses will indicate the maximum number of processes +# that can call icp_sal_userStartMultiProcess on this instance. +# Warning: the resources are preallocated: if NumProcesses +# is too high, the driver will fail to load +# +# Items configurable by a logical instance are: +# - Name of the logical instance +# - The accelerator associated with this logical +# instance +# - The core the instance is affinitized to (optional) +# +# Note: Logical instances may not share the same ring, but +# may share a ring bank. +# +# The format of the logical instances are: +# - For crypto: +# Cy<n>Name = "xxxx" +# Cy<n>AcceleratorNumber = 0-3 +# Cy<n>CoreAffinity = 0-7 +# +# - For Data Compression +# Dc<n>Name = "xxxx" +# Dc<n>AcceleratorNumber = 0-1 +# Dc<n>CoreAffinity = 0-7 +# +# Where: +# - n is the number of this logical instance starting at 0. +# - xxxx may be any ascii value which identifies the logical instance. +# +# Note: for user space processes, a list of values can be specified for +# the accelerator number and the core affinity: for example +# Cy0AcceleratorNumber = 0,2 +# Cy0CoreAffinity = 0,2,4 +# These comma-separated lists will allow the multiple processes to use +# different accelerators and cores, and will wrap around the numbers +# in the list. In the above example, process 0 will use accelerator 0, +# and process 1 will use accelerator 2 +# +######################################################## + +############################################## +# Kernel Instances Section +############################################## +[KERNEL] +NumberCyInstances = 0 +NumberDcInstances = 0 + +############################################## +# User Process Instance Section +############################################## +[SSL] +NumberCyInstances = 16 +NumberDcInstances = 0 +NumProcesses = 1 +LimitDevAccess = 0 + +# Crypto - User instance #0 +Cy0Name = "SSL0" +Cy0IsPolled = 1 +Cy0AcceleratorNumber = 0 +# List of core affinities +Cy0CoreAffinity = 8 + +# Crypto - User instance #1 +Cy1Name = "SSL1" +Cy1IsPolled = 1 +Cy1AcceleratorNumber = 1 +# List of core affinities +Cy1CoreAffinity = 9 + +# Crypto - User instance #2 +Cy2Name = "SSL2" +Cy2IsPolled = 1 +Cy2AcceleratorNumber = 2 +# List of core affinities +Cy2CoreAffinity = 10 + +# Crypto - User instance #3 +Cy3Name = "SSL3" +Cy3IsPolled = 1 +Cy3AcceleratorNumber = 3 +# List of core affinities +Cy3CoreAffinity = 11 + +# Crypto - User instance #4 +Cy4Name = "SSL4" +Cy4IsPolled = 1 +Cy4AcceleratorNumber = 0 +# List of core affinities +Cy4CoreAffinity = 12 + +# Crypto - User instance #5 +Cy5Name = "SSL5" +Cy5IsPolled = 1 +Cy5AcceleratorNumber = 1 +# List of core affinities +Cy5CoreAffinity = 13 + +# Crypto - User instance #6 +Cy6Name = "SSL6" +Cy6IsPolled = 1 +Cy6AcceleratorNumber = 2 +# List of core affinities +Cy6CoreAffinity = 14 + +# Crypto - User instance #7 +Cy7Name = "SSL7" +Cy7IsPolled = 1 +Cy7AcceleratorNumber = 3 +# List of core affinities +Cy7CoreAffinity = 15 + +# Crypto - User instance #8 +Cy8Name = "SSL8" +Cy8IsPolled = 1 +Cy8AcceleratorNumber = 0 +# List of core affinities +Cy8CoreAffinity = 24 + +# Crypto - User instance #9 +Cy9Name = "SSL9" +Cy9IsPolled = 1 +Cy9AcceleratorNumber = 1 +# List of core affinities +Cy9CoreAffinity = 25 + +# Crypto - User instance #10 +Cy10Name = "SSL10" +Cy10IsPolled = 1 +Cy10AcceleratorNumber = 2 +# List of core affinities +Cy10CoreAffinity = 26 + +# Crypto - User instance #11 +Cy11Name = "SSL11" +Cy11IsPolled = 1 +Cy11AcceleratorNumber = 3 +# List of core affinities +Cy11CoreAffinity = 27 + +# Crypto - User instance #12 +Cy12Name = "SSL12" +Cy12IsPolled = 1 +Cy12AcceleratorNumber = 0 +# List of core affinities +Cy12CoreAffinity = 28 + +# Crypto - User instance #13 +Cy13Name = "SSL13" +Cy13IsPolled = 1 +Cy13AcceleratorNumber = 1 +# List of core affinities +Cy13CoreAffinity = 29 + +# Crypto - User instance #14 +Cy14Name = "SSL14" +Cy14IsPolled = 1 +Cy14AcceleratorNumber = 2 +# List of core affinities +Cy14CoreAffinity = 30 + +# Crypto - User instance #15 +Cy15Name = "SSL15" +Cy15IsPolled = 1 +Cy15AcceleratorNumber = 3 +# List of core affinities +Cy15CoreAffinity = 31 + + +############################################## +# Wireless Process Instance Section +############################################## +[WIRELESS] +NumberCyInstances = 0 +NumberDcInstances = 0 +NumProcesses = 0 diff --git a/examples/dpdk_qat/config_files/stargo/dh89xxcc_qa_dev0.conf b/examples/dpdk_qat/config_files/stargo/dh89xxcc_qa_dev0.conf new file mode 100644 index 00000000..c3a85dea --- /dev/null +++ b/examples/dpdk_qat/config_files/stargo/dh89xxcc_qa_dev0.conf @@ -0,0 +1,235 @@ +######################################################################### +# +# @par +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ######################################################################### +# ######################################################## +# +# This file is the configuration for a single dh89xxcc_qa +# device. +# +# Each device has up to two accelerators. +# - The client may load balance between these +# accelerators. +# Each accelerator has 8 independent ring banks. +# - The interrupt for each can be directed to a +# specific core. +# Each ring bank as 16 rings (hardware assisted queues). +# +######################################################### +# General Section +############################################## + +[GENERAL] +ServicesEnabled = cy0;cy1 + +# Use version 2 of the config file +ConfigVersion = 2 +# Look Aside Cryptographic Configuration +cyHmacAuthMode = 1 + +# Look Aside Compression Configuration +dcTotalSRAMAvailable = 0 + +# Firmware Location Configuration +Firmware_MofPath = mof_firmware.bin +Firmware_MmpPath = mmp_firmware.bin + +#Default values for number of concurrent requests*/ +CyNumConcurrentSymRequests = 512 +CyNumConcurrentAsymRequests = 64 +DcNumConcurrentRequests = 512 + +#Statistics, valid values: 1,0 +statsGeneral = 1 +statsDc = 1 +statsDh = 1 +statsDrbg = 1 +statsDsa = 1 +statsEcc = 1 +statsKeyGen = 1 +statsLn = 1 +statsPrime = 1 +statsRsa = 1 +statsSym = 1 + +# Enables or disables Single Root Complex IO Virtualization. +# If this is enabled (1) then SRIOV and VT-d need to be enabled in +# BIOS and there can be no Cy or Dc instances created in PF (Dom0). +# If this i disabled (0) then SRIOV and VT-d need to be disabled +# in BIOS and Cy and/or Dc instances can be used in PF (Dom0) +SRIOV_Enabled = 0 + +#Debug feature, if set to 1 it enables additional entries in /proc filesystem +ProcDebug = 1 + +####################################################### +# +# Logical Instances Section +# A logical instance allows each address domain +# (kernel space and individual user space processes) +# to configure rings (i.e. hardware assisted queues) +# to be used by that address domain and to define the +# behavior of that ring. +# +# The address domains are in the following format +# - For kernel address domains +# [KERNEL] +# - For user process address domains +# [xxxxx] +# Where xxxxx may be any ascii value which uniquely identifies +# the user mode process. +# To allow the driver correctly configure the +# logical instances associated with this user process, +# the process must call the icp_sal_userStartMultiProcess(...) +# passing the xxxxx string during process initialisation. +# When the user space process is finished it must call +# icp_sal_userStop(...) to free resources. +# NumProcesses will indicate the maximum number of processes +# that can call icp_sal_userStartMultiProcess on this instance. +# Warning: the resources are preallocated: if NumProcesses +# is too high, the driver will fail to load +# +# Items configurable by a logical instance are: +# - Name of the logical instance +# - The accelerator associated with this logical +# instance +# - The core the instance is affinitized to (optional) +# +# Note: Logical instances may not share the same ring, but +# may share a ring bank. +# +# The format of the logical instances are: +# - For crypto: +# Cy<n>Name = "xxxx" +# Cy<n>AcceleratorNumber = 0-3 +# Cy<n>CoreAffinity = 0-7 +# +# - For Data Compression +# Dc<n>Name = "xxxx" +# Dc<n>AcceleratorNumber = 0-1 +# Dc<n>CoreAffinity = 0-7 +# +# Where: +# - n is the number of this logical instance starting at 0. +# - xxxx may be any ascii value which identifies the logical instance. +# +# Note: for user space processes, a list of values can be specified for +# the accelerator number and the core affinity: for example +# Cy0AcceleratorNumber = 0,2 +# Cy0CoreAffinity = 0,2,4 +# These comma-separated lists will allow the multiple processes to use +# different accelerators and cores, and will wrap around the numbers +# in the list. In the above example, process 0 will use accelerator 0, +# and process 1 will use accelerator 2 +# +######################################################## + +############################################## +# Kernel Instances Section +############################################## +[KERNEL] +NumberCyInstances = 0 +NumberDcInstances = 0 + +############################################## +# User Process Instance Section +############################################## +[SSL] +NumberCyInstances = 8 +NumberDcInstances = 0 +NumProcesses = 1 +LimitDevAccess = 0 + +# Crypto - User instance #0 +Cy0Name = "SSL0" +Cy0IsPolled = 1 +Cy0AcceleratorNumber = 0 +# List of core affinities +Cy0CoreAffinity = 0 + +# Crypto - User instance #1 +Cy1Name = "SSL1" +Cy1IsPolled = 1 +Cy1AcceleratorNumber = 1 +# List of core affinities +Cy1CoreAffinity = 1 + +# Crypto - User instance #2 +Cy2Name = "SSL2" +Cy2IsPolled = 1 +Cy2AcceleratorNumber = 2 +# List of core affinities +Cy2CoreAffinity = 2 + +# Crypto - User instance #3 +Cy3Name = "SSL3" +Cy3IsPolled = 1 +Cy3AcceleratorNumber = 3 +# List of core affinities +Cy3CoreAffinity = 3 + +# Crypto - User instance #4 +Cy4Name = "SSL4" +Cy4IsPolled = 1 +Cy4AcceleratorNumber = 0 +# List of core affinities +Cy4CoreAffinity = 4 + +# Crypto - User instance #5 +Cy5Name = "SSL5" +Cy5IsPolled = 1 +Cy5AcceleratorNumber = 1 +# List of core affinities +Cy5CoreAffinity = 5 + +# Crypto - User instance #6 +Cy6Name = "SSL6" +Cy6IsPolled = 1 +Cy6AcceleratorNumber = 2 +# List of core affinities +Cy6CoreAffinity = 6 + +# Crypto - User instance #7 +Cy7Name = "SSL7" +Cy7IsPolled = 1 +Cy7AcceleratorNumber = 3 +# List of core affinities +Cy7CoreAffinity = 7 + +############################################## +# Wireless Process Instance Section +############################################## +[WIRELESS] +NumberCyInstances = 0 +NumberDcInstances = 0 +NumProcesses = 0 diff --git a/examples/dpdk_qat/crypto.c b/examples/dpdk_qat/crypto.c new file mode 100644 index 00000000..8954bf87 --- /dev/null +++ b/examples/dpdk_qat/crypto.c @@ -0,0 +1,944 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <strings.h> +#include <string.h> +#include <inttypes.h> +#include <errno.h> +#include <sys/queue.h> +#include <stdarg.h> + +#include <rte_common.h> +#include <rte_log.h> +#include <rte_debug.h> +#include <rte_memory.h> +#include <rte_memzone.h> +#include <rte_ether.h> +#include <rte_malloc.h> +#include <rte_launch.h> +#include <rte_eal.h> +#include <rte_per_lcore.h> +#include <rte_lcore.h> +#include <rte_atomic.h> +#include <rte_branch_prediction.h> +#include <rte_ring.h> +#include <rte_mempool.h> +#include <rte_mbuf.h> +#include <rte_string_fns.h> + +#define CPA_CY_SYM_DP_TMP_WORKAROUND 1 + +#include "cpa.h" +#include "cpa_types.h" +#include "cpa_cy_sym_dp.h" +#include "cpa_cy_common.h" +#include "cpa_cy_im.h" +#include "icp_sal_user.h" +#include "icp_sal_poll.h" + +#include "crypto.h" + +/* CIPHER KEY LENGTHS */ +#define KEY_SIZE_64_IN_BYTES (64 / 8) +#define KEY_SIZE_56_IN_BYTES (56 / 8) +#define KEY_SIZE_128_IN_BYTES (128 / 8) +#define KEY_SIZE_168_IN_BYTES (168 / 8) +#define KEY_SIZE_192_IN_BYTES (192 / 8) +#define KEY_SIZE_256_IN_BYTES (256 / 8) + +/* HMAC AUTH KEY LENGTHS */ +#define AES_XCBC_AUTH_KEY_LENGTH_IN_BYTES (128 / 8) +#define SHA1_AUTH_KEY_LENGTH_IN_BYTES (160 / 8) +#define SHA224_AUTH_KEY_LENGTH_IN_BYTES (224 / 8) +#define SHA256_AUTH_KEY_LENGTH_IN_BYTES (256 / 8) +#define SHA384_AUTH_KEY_LENGTH_IN_BYTES (384 / 8) +#define SHA512_AUTH_KEY_LENGTH_IN_BYTES (512 / 8) +#define MD5_AUTH_KEY_LENGTH_IN_BYTES (128 / 8) +#define KASUMI_AUTH_KEY_LENGTH_IN_BYTES (128 / 8) + +/* HASH DIGEST LENGHTS */ +#define AES_XCBC_DIGEST_LENGTH_IN_BYTES (128 / 8) +#define AES_XCBC_96_DIGEST_LENGTH_IN_BYTES (96 / 8) +#define MD5_DIGEST_LENGTH_IN_BYTES (128 / 8) +#define SHA1_DIGEST_LENGTH_IN_BYTES (160 / 8) +#define SHA1_96_DIGEST_LENGTH_IN_BYTES (96 / 8) +#define SHA224_DIGEST_LENGTH_IN_BYTES (224 / 8) +#define SHA256_DIGEST_LENGTH_IN_BYTES (256 / 8) +#define SHA384_DIGEST_LENGTH_IN_BYTES (384 / 8) +#define SHA512_DIGEST_LENGTH_IN_BYTES (512 / 8) +#define KASUMI_DIGEST_LENGTH_IN_BYTES (32 / 8) + +#define IV_LENGTH_16_BYTES (16) +#define IV_LENGTH_8_BYTES (8) + + +/* + * rte_memzone is used to allocate physically contiguous virtual memory. + * In this application we allocate a single block and divide between variables + * which require a virtual to physical mapping for use by the QAT driver. + * Virt2phys is only performed during initialisation and not on the data-path. + */ + +#define LCORE_MEMZONE_SIZE (1 << 22) + +struct lcore_memzone +{ + const struct rte_memzone *memzone; + void *next_free_address; +}; + +/* + * Size the qa software response queue. + * Note: Head and Tail are 8 bit, therefore, the queue is + * fixed to 256 entries. + */ +#define CRYPTO_SOFTWARE_QUEUE_SIZE 256 + +struct qa_callbackQueue { + uint8_t head; + uint8_t tail; + uint16_t numEntries; + struct rte_mbuf *qaCallbackRing[CRYPTO_SOFTWARE_QUEUE_SIZE]; +}; + +struct qa_core_conf { + CpaCySymDpSessionCtx *encryptSessionHandleTbl[NUM_CRYPTO][NUM_HMAC]; + CpaCySymDpSessionCtx *decryptSessionHandleTbl[NUM_CRYPTO][NUM_HMAC]; + CpaInstanceHandle instanceHandle; + struct qa_callbackQueue callbackQueue; + uint64_t qaOutstandingRequests; + uint64_t numResponseAttempts; + uint8_t kickFreq; + void *pPacketIV; + CpaPhysicalAddr packetIVPhy; + struct lcore_memzone lcoreMemzone; +} __rte_cache_aligned; + +#define MAX_CORES (RTE_MAX_LCORE) + +static struct qa_core_conf qaCoreConf[MAX_CORES]; + +/* + *Create maximum possible key size, + *One for cipher and one for hash + */ +struct glob_keys { + uint8_t cipher_key[32]; + uint8_t hash_key[64]; + uint8_t iv[16]; +}; + +struct glob_keys g_crypto_hash_keys = { + .cipher_key = {0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08, + 0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10, + 0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18, + 0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,0x20}, + .hash_key = {0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08, + 0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10, + 0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18, + 0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,0x20, + 0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28, + 0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,0x30, + 0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38, + 0x39,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,0x50}, + .iv = {0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08, + 0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10} +}; + +/* + * Offsets from the start of the packet. + * + */ +#define PACKET_DATA_START_PHYS(p) \ + ((p)->buf_physaddr + (p)->data_off) + +/* + * A fixed offset to where the crypto is to be performed, which is the first + * byte after the Ethernet(14 bytes) and IPv4 headers(20 bytes) + */ +#define CRYPTO_START_OFFSET (14+20) +#define HASH_START_OFFSET (14+20) +#define CIPHER_BLOCK_DEFAULT_SIZE (16) +#define HASH_BLOCK_DEFAULT_SIZE (16) + +/* + * Offset to the opdata from the start of the data portion of packet. + * Assumption: The buffer is physically contiguous. + * +18 takes this to the next cache line. + */ + +#define CRYPTO_OFFSET_TO_OPDATA (ETHER_MAX_LEN+18) + +/* + * Default number of requests to place on the hardware ring before kicking the + * ring pointers. + */ +#define CRYPTO_BURST_TX (16) + +/* + * Only call the qa poll function when the number responses in the software + * queue drops below this number. + */ +#define CRYPTO_QUEUED_RESP_POLL_THRESHOLD (32) + +/* + * Limit the number of polls per call to get_next_response. + */ +#define GET_NEXT_RESPONSE_FREQ (32) + +/* + * Max number of responses to pull from the qa in one poll. + */ +#define CRYPTO_MAX_RESPONSE_QUOTA \ + (CRYPTO_SOFTWARE_QUEUE_SIZE-CRYPTO_QUEUED_RESP_POLL_THRESHOLD-1) + +#if (CRYPTO_QUEUED_RESP_POLL_THRESHOLD + CRYPTO_MAX_RESPONSE_QUOTA >= \ + CRYPTO_SOFTWARE_QUEUE_SIZE) +#error Its possible to overflow the qa response Q with current poll and \ + response quota. +#endif + +static void +crypto_callback(CpaCySymDpOpData *pOpData, + __rte_unused CpaStatus status, + __rte_unused CpaBoolean verifyResult) +{ + uint32_t lcore_id; + lcore_id = rte_lcore_id(); + struct qa_callbackQueue *callbackQ = &(qaCoreConf[lcore_id].callbackQueue); + + /* + * Received a completion from the QA hardware. + * Place the response on the return queue. + */ + callbackQ->qaCallbackRing[callbackQ->head] = pOpData->pCallbackTag; + callbackQ->head++; + callbackQ->numEntries++; + qaCoreConf[lcore_id].qaOutstandingRequests--; +} + +static void +qa_crypto_callback(CpaCySymDpOpData *pOpData, CpaStatus status, + CpaBoolean verifyResult) +{ + crypto_callback(pOpData, status, verifyResult); +} + +/* + * Each allocation from a particular memzone lasts for the life-time of + * the application. No freeing of previous allocations will occur. + */ +static void * +alloc_memzone_region(uint32_t length, uint32_t lcore_id) +{ + char *current_free_addr_ptr = NULL; + struct lcore_memzone *lcore_memzone = &(qaCoreConf[lcore_id].lcoreMemzone); + + current_free_addr_ptr = lcore_memzone->next_free_address; + + if (current_free_addr_ptr + length >= + (char *)lcore_memzone->memzone->addr + lcore_memzone->memzone->len) { + printf("Crypto: No memory available in memzone\n"); + return NULL; + } + lcore_memzone->next_free_address = current_free_addr_ptr + length; + + return (void *)current_free_addr_ptr; +} + +/* + * Virtual to Physical Address translation is only executed during initialization + * and not on the data-path. + */ +static CpaPhysicalAddr +qa_v2p(void *ptr) +{ + const struct rte_memzone *memzone = NULL; + uint32_t lcore_id = 0; + RTE_LCORE_FOREACH(lcore_id) { + memzone = qaCoreConf[lcore_id].lcoreMemzone.memzone; + + if ((char*) ptr >= (char *) memzone->addr && + (char*) ptr < ((char*) memzone->addr + memzone->len)) { + return (CpaPhysicalAddr) + (memzone->phys_addr + ((char *) ptr - (char*) memzone->addr)); + } + } + printf("Crypto: Corresponding physical address not found in memzone\n"); + return (CpaPhysicalAddr) 0; +} + +static CpaStatus +getCoreAffinity(Cpa32U *coreAffinity, const CpaInstanceHandle instanceHandle) +{ + CpaInstanceInfo2 info; + Cpa16U i = 0; + CpaStatus status = CPA_STATUS_SUCCESS; + + memset(&info, 0, sizeof(CpaInstanceInfo2)); + + status = cpaCyInstanceGetInfo2(instanceHandle, &info); + if (CPA_STATUS_SUCCESS != status) { + printf("Crypto: Error getting instance info\n"); + return CPA_STATUS_FAIL; + } + for (i = 0; i < MAX_CORES; i++) { + if (CPA_BITMAP_BIT_TEST(info.coreAffinity, i)) { + *coreAffinity = i; + return CPA_STATUS_SUCCESS; + } + } + return CPA_STATUS_FAIL; +} + +static CpaStatus +get_crypto_instance_on_core(CpaInstanceHandle *pInstanceHandle, + uint32_t lcore_id) +{ + Cpa16U numInstances = 0, i = 0; + CpaStatus status = CPA_STATUS_FAIL; + CpaInstanceHandle *pLocalInstanceHandles = NULL; + Cpa32U coreAffinity = 0; + + status = cpaCyGetNumInstances(&numInstances); + if (CPA_STATUS_SUCCESS != status || numInstances == 0) { + return CPA_STATUS_FAIL; + } + + pLocalInstanceHandles = rte_malloc("pLocalInstanceHandles", + sizeof(CpaInstanceHandle) * numInstances, RTE_CACHE_LINE_SIZE); + + if (NULL == pLocalInstanceHandles) { + return CPA_STATUS_FAIL; + } + status = cpaCyGetInstances(numInstances, pLocalInstanceHandles); + if (CPA_STATUS_SUCCESS != status) { + printf("Crypto: cpaCyGetInstances failed with status: %"PRId32"\n", status); + rte_free((void *) pLocalInstanceHandles); + return CPA_STATUS_FAIL; + } + + for (i = 0; i < numInstances; i++) { + status = getCoreAffinity(&coreAffinity, pLocalInstanceHandles[i]); + if (CPA_STATUS_SUCCESS != status) { + rte_free((void *) pLocalInstanceHandles); + return CPA_STATUS_FAIL; + } + if (coreAffinity == lcore_id) { + printf("Crypto: instance found on core %d\n", i); + *pInstanceHandle = pLocalInstanceHandles[i]; + return CPA_STATUS_SUCCESS; + } + } + /* core affinity not found */ + rte_free((void *) pLocalInstanceHandles); + return CPA_STATUS_FAIL; +} + +static CpaStatus +initCySymSession(const int pkt_cipher_alg, + const int pkt_hash_alg, const CpaCySymHashMode hashMode, + const CpaCySymCipherDirection crypto_direction, + CpaCySymSessionCtx **ppSessionCtx, + const CpaInstanceHandle cyInstanceHandle, + const uint32_t lcore_id) +{ + Cpa32U sessionCtxSizeInBytes = 0; + CpaStatus status = CPA_STATUS_FAIL; + CpaBoolean isCrypto = CPA_TRUE, isHmac = CPA_TRUE; + CpaCySymSessionSetupData sessionSetupData; + + memset(&sessionSetupData, 0, sizeof(CpaCySymSessionSetupData)); + + /* Assumption: key length is set to each algorithm's max length */ + switch (pkt_cipher_alg) { + case NO_CIPHER: + isCrypto = CPA_FALSE; + break; + case CIPHER_DES: + sessionSetupData.cipherSetupData.cipherAlgorithm = + CPA_CY_SYM_CIPHER_DES_ECB; + sessionSetupData.cipherSetupData.cipherKeyLenInBytes = + KEY_SIZE_64_IN_BYTES; + break; + case CIPHER_DES_CBC: + sessionSetupData.cipherSetupData.cipherAlgorithm = + CPA_CY_SYM_CIPHER_DES_CBC; + sessionSetupData.cipherSetupData.cipherKeyLenInBytes = + KEY_SIZE_64_IN_BYTES; + break; + case CIPHER_DES3: + sessionSetupData.cipherSetupData.cipherAlgorithm = + CPA_CY_SYM_CIPHER_3DES_ECB; + sessionSetupData.cipherSetupData.cipherKeyLenInBytes = + KEY_SIZE_192_IN_BYTES; + break; + case CIPHER_DES3_CBC: + sessionSetupData.cipherSetupData.cipherAlgorithm = + CPA_CY_SYM_CIPHER_3DES_CBC; + sessionSetupData.cipherSetupData.cipherKeyLenInBytes = + KEY_SIZE_192_IN_BYTES; + break; + case CIPHER_AES: + sessionSetupData.cipherSetupData.cipherAlgorithm = + CPA_CY_SYM_CIPHER_AES_ECB; + sessionSetupData.cipherSetupData.cipherKeyLenInBytes = + KEY_SIZE_128_IN_BYTES; + break; + case CIPHER_AES_CBC_128: + sessionSetupData.cipherSetupData.cipherAlgorithm = + CPA_CY_SYM_CIPHER_AES_CBC; + sessionSetupData.cipherSetupData.cipherKeyLenInBytes = + KEY_SIZE_128_IN_BYTES; + break; + case CIPHER_KASUMI_F8: + sessionSetupData.cipherSetupData.cipherAlgorithm = + CPA_CY_SYM_CIPHER_KASUMI_F8; + sessionSetupData.cipherSetupData.cipherKeyLenInBytes = + KEY_SIZE_128_IN_BYTES; + break; + default: + printf("Crypto: Undefined Cipher specified\n"); + break; + } + /* Set the cipher direction */ + if (isCrypto) { + sessionSetupData.cipherSetupData.cipherDirection = crypto_direction; + sessionSetupData.cipherSetupData.pCipherKey = + g_crypto_hash_keys.cipher_key; + sessionSetupData.symOperation = CPA_CY_SYM_OP_CIPHER; + } + + /* Setup Hash common fields */ + switch (pkt_hash_alg) { + case NO_HASH: + isHmac = CPA_FALSE; + break; + case HASH_AES_XCBC: + sessionSetupData.hashSetupData.hashAlgorithm = CPA_CY_SYM_HASH_AES_XCBC; + sessionSetupData.hashSetupData.digestResultLenInBytes = + AES_XCBC_DIGEST_LENGTH_IN_BYTES; + break; + case HASH_AES_XCBC_96: + sessionSetupData.hashSetupData.hashAlgorithm = CPA_CY_SYM_HASH_AES_XCBC; + sessionSetupData.hashSetupData.digestResultLenInBytes = + AES_XCBC_96_DIGEST_LENGTH_IN_BYTES; + break; + case HASH_MD5: + sessionSetupData.hashSetupData.hashAlgorithm = CPA_CY_SYM_HASH_MD5; + sessionSetupData.hashSetupData.digestResultLenInBytes = + MD5_DIGEST_LENGTH_IN_BYTES; + break; + case HASH_SHA1: + sessionSetupData.hashSetupData.hashAlgorithm = CPA_CY_SYM_HASH_SHA1; + sessionSetupData.hashSetupData.digestResultLenInBytes = + SHA1_DIGEST_LENGTH_IN_BYTES; + break; + case HASH_SHA1_96: + sessionSetupData.hashSetupData.hashAlgorithm = CPA_CY_SYM_HASH_SHA1; + sessionSetupData.hashSetupData.digestResultLenInBytes = + SHA1_96_DIGEST_LENGTH_IN_BYTES; + break; + case HASH_SHA224: + sessionSetupData.hashSetupData.hashAlgorithm = CPA_CY_SYM_HASH_SHA224; + sessionSetupData.hashSetupData.digestResultLenInBytes = + SHA224_DIGEST_LENGTH_IN_BYTES; + break; + case HASH_SHA256: + sessionSetupData.hashSetupData.hashAlgorithm = CPA_CY_SYM_HASH_SHA256; + sessionSetupData.hashSetupData.digestResultLenInBytes = + SHA256_DIGEST_LENGTH_IN_BYTES; + break; + case HASH_SHA384: + sessionSetupData.hashSetupData.hashAlgorithm = CPA_CY_SYM_HASH_SHA384; + sessionSetupData.hashSetupData.digestResultLenInBytes = + SHA384_DIGEST_LENGTH_IN_BYTES; + break; + case HASH_SHA512: + sessionSetupData.hashSetupData.hashAlgorithm = CPA_CY_SYM_HASH_SHA512; + sessionSetupData.hashSetupData.digestResultLenInBytes = + SHA512_DIGEST_LENGTH_IN_BYTES; + break; + case HASH_KASUMI_F9: + sessionSetupData.hashSetupData.hashAlgorithm = CPA_CY_SYM_HASH_KASUMI_F9; + sessionSetupData.hashSetupData.digestResultLenInBytes = + KASUMI_DIGEST_LENGTH_IN_BYTES; + break; + default: + printf("Crypto: Undefined Hash specified\n"); + break; + } + if (isHmac) { + sessionSetupData.hashSetupData.hashMode = hashMode; + sessionSetupData.symOperation = CPA_CY_SYM_OP_HASH; + /* If using authenticated hash setup key lengths */ + if (CPA_CY_SYM_HASH_MODE_AUTH == hashMode) { + /* Use a common max length key */ + sessionSetupData.hashSetupData.authModeSetupData.authKey = + g_crypto_hash_keys.hash_key; + switch (pkt_hash_alg) { + case HASH_AES_XCBC: + case HASH_AES_XCBC_96: + sessionSetupData.hashSetupData.authModeSetupData.authKeyLenInBytes = + AES_XCBC_AUTH_KEY_LENGTH_IN_BYTES; + break; + case HASH_MD5: + sessionSetupData.hashSetupData.authModeSetupData.authKeyLenInBytes = + SHA1_AUTH_KEY_LENGTH_IN_BYTES; + break; + case HASH_SHA1: + case HASH_SHA1_96: + sessionSetupData.hashSetupData.authModeSetupData.authKeyLenInBytes = + SHA1_AUTH_KEY_LENGTH_IN_BYTES; + break; + case HASH_SHA224: + sessionSetupData.hashSetupData.authModeSetupData.authKeyLenInBytes = + SHA224_AUTH_KEY_LENGTH_IN_BYTES; + break; + case HASH_SHA256: + sessionSetupData.hashSetupData.authModeSetupData.authKeyLenInBytes = + SHA256_AUTH_KEY_LENGTH_IN_BYTES; + break; + case HASH_SHA384: + sessionSetupData.hashSetupData.authModeSetupData.authKeyLenInBytes = + SHA384_AUTH_KEY_LENGTH_IN_BYTES; + break; + case HASH_SHA512: + sessionSetupData.hashSetupData.authModeSetupData.authKeyLenInBytes = + SHA512_AUTH_KEY_LENGTH_IN_BYTES; + break; + case HASH_KASUMI_F9: + sessionSetupData.hashSetupData.authModeSetupData.authKeyLenInBytes = + KASUMI_AUTH_KEY_LENGTH_IN_BYTES; + break; + default: + printf("Crypto: Undefined Hash specified\n"); + return CPA_STATUS_FAIL; + } + } + } + + /* Only high priority supported */ + sessionSetupData.sessionPriority = CPA_CY_PRIORITY_HIGH; + + /* If chaining algorithms */ + if (isCrypto && isHmac) { + sessionSetupData.symOperation = CPA_CY_SYM_OP_ALGORITHM_CHAINING; + /* @assumption Alg Chain order is cipher then hash for encrypt + * and hash then cipher then has for decrypt*/ + if (CPA_CY_SYM_CIPHER_DIRECTION_ENCRYPT == crypto_direction) { + sessionSetupData.algChainOrder = + CPA_CY_SYM_ALG_CHAIN_ORDER_CIPHER_THEN_HASH; + } else { + sessionSetupData.algChainOrder = + CPA_CY_SYM_ALG_CHAIN_ORDER_HASH_THEN_CIPHER; + } + } + if (!isCrypto && !isHmac) { + *ppSessionCtx = NULL; + return CPA_STATUS_SUCCESS; + } + + /* Set flags for digest operations */ + sessionSetupData.digestIsAppended = CPA_FALSE; + sessionSetupData.verifyDigest = CPA_TRUE; + + /* Get the session context size based on the crypto and/or hash operations*/ + status = cpaCySymDpSessionCtxGetSize(cyInstanceHandle, &sessionSetupData, + &sessionCtxSizeInBytes); + if (CPA_STATUS_SUCCESS != status) { + printf("Crypto: cpaCySymDpSessionCtxGetSize error, status: %"PRId32"\n", + status); + return CPA_STATUS_FAIL; + } + + *ppSessionCtx = alloc_memzone_region(sessionCtxSizeInBytes, lcore_id); + if (NULL == *ppSessionCtx) { + printf("Crypto: Failed to allocate memory for Session Context\n"); + return CPA_STATUS_FAIL; + } + + status = cpaCySymDpInitSession(cyInstanceHandle, &sessionSetupData, + *ppSessionCtx); + if (CPA_STATUS_SUCCESS != status) { + printf("Crypto: cpaCySymDpInitSession failed with status %"PRId32"\n", status); + return CPA_STATUS_FAIL; + } + return CPA_STATUS_SUCCESS; +} + +static CpaStatus +initSessionDataTables(struct qa_core_conf *qaCoreConf,uint32_t lcore_id) +{ + Cpa32U i = 0, j = 0; + CpaStatus status = CPA_STATUS_FAIL; + for (i = 0; i < NUM_CRYPTO; i++) { + for (j = 0; j < NUM_HMAC; j++) { + if (((i == CIPHER_KASUMI_F8) && (j != NO_HASH) && (j != HASH_KASUMI_F9)) || + ((i != NO_CIPHER) && (i != CIPHER_KASUMI_F8) && (j == HASH_KASUMI_F9))) + continue; + status = initCySymSession(i, j, CPA_CY_SYM_HASH_MODE_AUTH, + CPA_CY_SYM_CIPHER_DIRECTION_ENCRYPT, + &qaCoreConf->encryptSessionHandleTbl[i][j], + qaCoreConf->instanceHandle, + lcore_id); + if (CPA_STATUS_SUCCESS != status) { + printf("Crypto: Failed to initialize Encrypt sessions\n"); + return CPA_STATUS_FAIL; + } + status = initCySymSession(i, j, CPA_CY_SYM_HASH_MODE_AUTH, + CPA_CY_SYM_CIPHER_DIRECTION_DECRYPT, + &qaCoreConf->decryptSessionHandleTbl[i][j], + qaCoreConf->instanceHandle, + lcore_id); + if (CPA_STATUS_SUCCESS != status) { + printf("Crypto: Failed to initialize Decrypt sessions\n"); + return CPA_STATUS_FAIL; + } + } + } + return CPA_STATUS_SUCCESS; +} + +int +crypto_init(void) +{ + if (CPA_STATUS_SUCCESS != icp_sal_userStartMultiProcess("SSL",CPA_FALSE)) { + printf("Crypto: Could not start sal for user space\n"); + return CPA_STATUS_FAIL; + } + printf("Crypto: icp_sal_userStartMultiProcess(\"SSL\",CPA_FALSE)\n"); + return 0; +} + +/* + * Per core initialisation + */ +int +per_core_crypto_init(uint32_t lcore_id) +{ + CpaStatus status = CPA_STATUS_FAIL; + char memzone_name[RTE_MEMZONE_NAMESIZE]; + + int socketID = rte_lcore_to_socket_id(lcore_id); + + /* Allocate software ring for response messages. */ + + qaCoreConf[lcore_id].callbackQueue.head = 0; + qaCoreConf[lcore_id].callbackQueue.tail = 0; + qaCoreConf[lcore_id].callbackQueue.numEntries = 0; + qaCoreConf[lcore_id].kickFreq = 0; + qaCoreConf[lcore_id].qaOutstandingRequests = 0; + qaCoreConf[lcore_id].numResponseAttempts = 0; + + /* Initialise and reserve lcore memzone for virt2phys translation */ + snprintf(memzone_name, + RTE_MEMZONE_NAMESIZE, + "lcore_%u", + lcore_id); + + qaCoreConf[lcore_id].lcoreMemzone.memzone = rte_memzone_reserve( + memzone_name, + LCORE_MEMZONE_SIZE, + socketID, + 0); + if (NULL == qaCoreConf[lcore_id].lcoreMemzone.memzone) { + printf("Crypto: Error allocating memzone on lcore %u\n",lcore_id); + return -1; + } + qaCoreConf[lcore_id].lcoreMemzone.next_free_address = + qaCoreConf[lcore_id].lcoreMemzone.memzone->addr; + + qaCoreConf[lcore_id].pPacketIV = alloc_memzone_region(IV_LENGTH_16_BYTES, + lcore_id); + + if (NULL == qaCoreConf[lcore_id].pPacketIV ) { + printf("Crypto: Failed to allocate memory for Initialization Vector\n"); + return -1; + } + + memcpy(qaCoreConf[lcore_id].pPacketIV, &g_crypto_hash_keys.iv, + IV_LENGTH_16_BYTES); + + qaCoreConf[lcore_id].packetIVPhy = qa_v2p(qaCoreConf[lcore_id].pPacketIV); + if (0 == qaCoreConf[lcore_id].packetIVPhy) { + printf("Crypto: Invalid physical address for Initialization Vector\n"); + return -1; + } + + /* + * Obtain the instance handle that is mapped to the current lcore. + * This can fail if an instance is not mapped to a bank which has been + * affinitized to the current lcore. + */ + status = get_crypto_instance_on_core(&(qaCoreConf[lcore_id].instanceHandle), + lcore_id); + if (CPA_STATUS_SUCCESS != status) { + printf("Crypto: get_crypto_instance_on_core failed with status: %"PRId32"\n", + status); + return -1; + } + + status = cpaCySymDpRegCbFunc(qaCoreConf[lcore_id].instanceHandle, + (CpaCySymDpCbFunc) qa_crypto_callback); + if (CPA_STATUS_SUCCESS != status) { + printf("Crypto: cpaCySymDpRegCbFunc failed with status: %"PRId32"\n", status); + return -1; + } + + /* + * Set the address translation callback for virtual to physcial address + * mapping. This will be called by the QAT driver during initialisation only. + */ + status = cpaCySetAddressTranslation(qaCoreConf[lcore_id].instanceHandle, + (CpaVirtualToPhysical) qa_v2p); + if (CPA_STATUS_SUCCESS != status) { + printf("Crypto: cpaCySetAddressTranslation failed with status: %"PRId32"\n", + status); + return -1; + } + + status = initSessionDataTables(&qaCoreConf[lcore_id],lcore_id); + if (CPA_STATUS_SUCCESS != status) { + printf("Crypto: Failed to allocate all session tables."); + return -1; + } + return 0; +} + +static CpaStatus +enqueueOp(CpaCySymDpOpData *opData, uint32_t lcore_id) +{ + + CpaStatus status; + + /* + * Assumption is there is no requirement to do load balancing between + * acceleration units - that is one acceleration unit is tied to a core. + */ + opData->instanceHandle = qaCoreConf[lcore_id].instanceHandle; + + if ((++qaCoreConf[lcore_id].kickFreq) % CRYPTO_BURST_TX == 0) { + status = cpaCySymDpEnqueueOp(opData, CPA_TRUE); + } else { + status = cpaCySymDpEnqueueOp(opData, CPA_FALSE); + } + + qaCoreConf[lcore_id].qaOutstandingRequests++; + + return status; +} + +void +crypto_flush_tx_queue(uint32_t lcore_id) +{ + + cpaCySymDpPerformOpNow(qaCoreConf[lcore_id].instanceHandle); +} + +enum crypto_result +crypto_encrypt(struct rte_mbuf *rte_buff, enum cipher_alg c, enum hash_alg h) +{ + CpaCySymDpOpData *opData = + rte_pktmbuf_mtod_offset(rte_buff, CpaCySymDpOpData *, + CRYPTO_OFFSET_TO_OPDATA); + uint32_t lcore_id; + + if (unlikely(c >= NUM_CRYPTO || h >= NUM_HMAC)) + return CRYPTO_RESULT_FAIL; + + lcore_id = rte_lcore_id(); + + memset(opData, 0, sizeof(CpaCySymDpOpData)); + + opData->srcBuffer = opData->dstBuffer = PACKET_DATA_START_PHYS(rte_buff); + opData->srcBufferLen = opData->dstBufferLen = rte_buff->data_len; + opData->sessionCtx = qaCoreConf[lcore_id].encryptSessionHandleTbl[c][h]; + opData->thisPhys = PACKET_DATA_START_PHYS(rte_buff) + + CRYPTO_OFFSET_TO_OPDATA; + opData->pCallbackTag = rte_buff; + + /* if no crypto or hash operations are specified return fail */ + if (NO_CIPHER == c && NO_HASH == h) + return CRYPTO_RESULT_FAIL; + + if (NO_CIPHER != c) { + opData->pIv = qaCoreConf[lcore_id].pPacketIV; + opData->iv = qaCoreConf[lcore_id].packetIVPhy; + + if (CIPHER_AES_CBC_128 == c) + opData->ivLenInBytes = IV_LENGTH_16_BYTES; + else + opData->ivLenInBytes = IV_LENGTH_8_BYTES; + + opData->cryptoStartSrcOffsetInBytes = CRYPTO_START_OFFSET; + opData->messageLenToCipherInBytes = rte_buff->data_len + - CRYPTO_START_OFFSET; + /* + * Work around for padding, message length has to be a multiple of + * block size. + */ + opData->messageLenToCipherInBytes -= opData->messageLenToCipherInBytes + % CIPHER_BLOCK_DEFAULT_SIZE; + } + + if (NO_HASH != h) { + + opData->hashStartSrcOffsetInBytes = HASH_START_OFFSET; + opData->messageLenToHashInBytes = rte_buff->data_len + - HASH_START_OFFSET; + /* + * Work around for padding, message length has to be a multiple of block + * size. + */ + opData->messageLenToHashInBytes -= opData->messageLenToHashInBytes + % HASH_BLOCK_DEFAULT_SIZE; + + /* + * Assumption: Ok ignore the passed digest pointer and place HMAC at end + * of packet. + */ + opData->digestResult = rte_buff->buf_physaddr + rte_buff->data_len; + } + + if (CPA_STATUS_SUCCESS != enqueueOp(opData, lcore_id)) { + /* + * Failed to place a packet on the hardware queue. + * Most likely because the QA hardware is busy. + */ + return CRYPTO_RESULT_FAIL; + } + return CRYPTO_RESULT_IN_PROGRESS; +} + +enum crypto_result +crypto_decrypt(struct rte_mbuf *rte_buff, enum cipher_alg c, enum hash_alg h) +{ + + CpaCySymDpOpData *opData = rte_pktmbuf_mtod_offset(rte_buff, void *, + CRYPTO_OFFSET_TO_OPDATA); + uint32_t lcore_id; + + if (unlikely(c >= NUM_CRYPTO || h >= NUM_HMAC)) + return CRYPTO_RESULT_FAIL; + + lcore_id = rte_lcore_id(); + + memset(opData, 0, sizeof(CpaCySymDpOpData)); + + opData->dstBuffer = opData->srcBuffer = PACKET_DATA_START_PHYS(rte_buff); + opData->dstBufferLen = opData->srcBufferLen = rte_buff->data_len; + opData->thisPhys = PACKET_DATA_START_PHYS(rte_buff) + + CRYPTO_OFFSET_TO_OPDATA; + opData->sessionCtx = qaCoreConf[lcore_id].decryptSessionHandleTbl[c][h]; + opData->pCallbackTag = rte_buff; + + /* if no crypto or hmac operations are specified return fail */ + if (NO_CIPHER == c && NO_HASH == h) + return CRYPTO_RESULT_FAIL; + + if (NO_CIPHER != c) { + opData->pIv = qaCoreConf[lcore_id].pPacketIV; + opData->iv = qaCoreConf[lcore_id].packetIVPhy; + + if (CIPHER_AES_CBC_128 == c) + opData->ivLenInBytes = IV_LENGTH_16_BYTES; + else + opData->ivLenInBytes = IV_LENGTH_8_BYTES; + + opData->cryptoStartSrcOffsetInBytes = CRYPTO_START_OFFSET; + opData->messageLenToCipherInBytes = rte_buff->data_len + - CRYPTO_START_OFFSET; + + /* + * Work around for padding, message length has to be a multiple of block + * size. + */ + opData->messageLenToCipherInBytes -= opData->messageLenToCipherInBytes + % CIPHER_BLOCK_DEFAULT_SIZE; + } + if (NO_HASH != h) { + opData->hashStartSrcOffsetInBytes = HASH_START_OFFSET; + opData->messageLenToHashInBytes = rte_buff->data_len + - HASH_START_OFFSET; + /* + * Work around for padding, message length has to be a multiple of block + * size. + */ + opData->messageLenToHashInBytes -= opData->messageLenToHashInBytes + % HASH_BLOCK_DEFAULT_SIZE; + opData->digestResult = rte_buff->buf_physaddr + rte_buff->data_len; + } + + if (CPA_STATUS_SUCCESS != enqueueOp(opData, lcore_id)) { + /* + * Failed to place a packet on the hardware queue. + * Most likely because the QA hardware is busy. + */ + return CRYPTO_RESULT_FAIL; + } + return CRYPTO_RESULT_IN_PROGRESS; +} + +void * +crypto_get_next_response(void) +{ + uint32_t lcore_id; + lcore_id = rte_lcore_id(); + struct qa_callbackQueue *callbackQ = &(qaCoreConf[lcore_id].callbackQueue); + void *entry = NULL; + + if (callbackQ->numEntries) { + entry = callbackQ->qaCallbackRing[callbackQ->tail]; + callbackQ->tail++; + callbackQ->numEntries--; + } + + /* If there are no outstanding requests no need to poll, return entry */ + if (qaCoreConf[lcore_id].qaOutstandingRequests == 0) + return entry; + + if (callbackQ->numEntries < CRYPTO_QUEUED_RESP_POLL_THRESHOLD + && qaCoreConf[lcore_id].numResponseAttempts++ + % GET_NEXT_RESPONSE_FREQ == 0) { + /* + * Only poll the hardware when there is less than + * CRYPTO_QUEUED_RESP_POLL_THRESHOLD elements in the software queue + */ + icp_sal_CyPollDpInstance(qaCoreConf[lcore_id].instanceHandle, + CRYPTO_MAX_RESPONSE_QUOTA); + } + return entry; +} diff --git a/examples/dpdk_qat/crypto.h b/examples/dpdk_qat/crypto.h new file mode 100644 index 00000000..f68b0b65 --- /dev/null +++ b/examples/dpdk_qat/crypto.h @@ -0,0 +1,90 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef CRYPTO_H_ +#define CRYPTO_H_ + +/* Pass Labels/Values to crypto units */ +enum cipher_alg { + /* Option to not do any cryptography */ + NO_CIPHER, + CIPHER_DES, + CIPHER_DES_CBC, + CIPHER_DES3, + CIPHER_DES3_CBC, + CIPHER_AES, + CIPHER_AES_CBC_128, + CIPHER_KASUMI_F8, + NUM_CRYPTO, +}; + +enum hash_alg { + /* Option to not do any hash */ + NO_HASH, + HASH_MD5, + HASH_SHA1, + HASH_SHA1_96, + HASH_SHA224, + HASH_SHA256, + HASH_SHA384, + HASH_SHA512, + HASH_AES_XCBC, + HASH_AES_XCBC_96, + HASH_KASUMI_F9, + NUM_HMAC, +}; + +/* Return value from crypto_{encrypt/decrypt} */ +enum crypto_result { + /* Packet was successfully put into crypto queue */ + CRYPTO_RESULT_IN_PROGRESS, + /* Cryptography has failed in some way */ + CRYPTO_RESULT_FAIL, +}; + +extern enum crypto_result crypto_encrypt(struct rte_mbuf *pkt, enum cipher_alg c, + enum hash_alg h); +extern enum crypto_result crypto_decrypt(struct rte_mbuf *pkt, enum cipher_alg c, + enum hash_alg h); + +extern int crypto_init(void); + +extern int per_core_crypto_init(uint32_t lcore_id); + +extern void crypto_exit(void); + +extern void *crypto_get_next_response(void); + +extern void crypto_flush_tx_queue(uint32_t lcore_id); + +#endif /* CRYPTO_H_ */ diff --git a/examples/dpdk_qat/main.c b/examples/dpdk_qat/main.c new file mode 100644 index 00000000..dc68989a --- /dev/null +++ b/examples/dpdk_qat/main.c @@ -0,0 +1,824 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <inttypes.h> +#include <sys/types.h> +#include <string.h> +#include <sys/queue.h> +#include <stdarg.h> +#include <errno.h> +#include <getopt.h> + +#include <rte_common.h> +#include <rte_byteorder.h> +#include <rte_log.h> +#include <rte_memory.h> +#include <rte_memzone.h> +#include <rte_eal.h> +#include <rte_per_lcore.h> +#include <rte_launch.h> +#include <rte_atomic.h> +#include <rte_cycles.h> +#include <rte_prefetch.h> +#include <rte_lcore.h> +#include <rte_per_lcore.h> +#include <rte_branch_prediction.h> +#include <rte_interrupts.h> +#include <rte_pci.h> +#include <rte_random.h> +#include <rte_debug.h> +#include <rte_ether.h> +#include <rte_ethdev.h> +#include <rte_ring.h> +#include <rte_mempool.h> +#include <rte_mbuf.h> +#include <rte_ip.h> +#include <rte_string_fns.h> + +#include "crypto.h" + +#define NB_MBUF (32 * 1024) + +#define MAX_PKT_BURST 32 +#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */ + +#define TX_QUEUE_FLUSH_MASK 0xFFFFFFFF +#define TSC_COUNT_LIMIT 1000 + +#define ACTION_ENCRYPT 1 +#define ACTION_DECRYPT 2 + +/* + * Configurable number of RX/TX ring descriptors + */ +#define RTE_TEST_RX_DESC_DEFAULT 128 +#define RTE_TEST_TX_DESC_DEFAULT 512 +static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; +static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; + +/* ethernet addresses of ports */ +static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; + +/* mask of enabled ports */ +static unsigned enabled_port_mask = 0; +static int promiscuous_on = 1; /**< Ports set in promiscuous mode on by default. */ + +/* list of enabled ports */ +static uint32_t dst_ports[RTE_MAX_ETHPORTS]; + +struct mbuf_table { + uint16_t len; + struct rte_mbuf *m_table[MAX_PKT_BURST]; +}; + +struct lcore_rx_queue { + uint8_t port_id; + uint8_t queue_id; +}; + +#define MAX_RX_QUEUE_PER_LCORE 16 + +#define MAX_LCORE_PARAMS 1024 +struct lcore_params { + uint8_t port_id; + uint8_t queue_id; + uint8_t lcore_id; +}; + +static struct lcore_params lcore_params_array[MAX_LCORE_PARAMS]; +static struct lcore_params lcore_params_array_default[] = { + {0, 0, 2}, + {0, 1, 2}, + {0, 2, 2}, + {1, 0, 2}, + {1, 1, 2}, + {1, 2, 2}, + {2, 0, 2}, + {3, 0, 3}, + {3, 1, 3}, +}; + +static struct lcore_params * lcore_params = lcore_params_array_default; +static uint16_t nb_lcore_params = sizeof(lcore_params_array_default) / + sizeof(lcore_params_array_default[0]); + +static struct rte_eth_conf port_conf = { + .rxmode = { + .mq_mode = ETH_MQ_RX_RSS, + .split_hdr_size = 0, + .header_split = 0, /**< Header Split disabled */ + .hw_ip_checksum = 1, /**< IP checksum offload enabled */ + .hw_vlan_filter = 0, /**< VLAN filtering disabled */ + .jumbo_frame = 0, /**< Jumbo Frame Support disabled */ + .hw_strip_crc = 0, /**< CRC stripped by hardware */ + }, + .rx_adv_conf = { + .rss_conf = { + .rss_key = NULL, + .rss_hf = ETH_RSS_IP, + }, + }, + .txmode = { + .mq_mode = ETH_MQ_TX_NONE, + }, +}; + +static struct rte_mempool * pktmbuf_pool[RTE_MAX_NUMA_NODES]; + +struct lcore_conf { + uint64_t tsc; + uint64_t tsc_count; + uint32_t tx_mask; + uint16_t n_rx_queue; + uint16_t rx_queue_list_pos; + struct lcore_rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE]; + uint16_t tx_queue_id[RTE_MAX_ETHPORTS]; + struct mbuf_table rx_mbuf; + uint32_t rx_mbuf_pos; + uint32_t rx_curr_queue; + struct mbuf_table tx_mbufs[RTE_MAX_ETHPORTS]; +} __rte_cache_aligned; + +static struct lcore_conf lcore_conf[RTE_MAX_LCORE]; + +static inline struct rte_mbuf * +nic_rx_get_packet(struct lcore_conf *qconf) +{ + struct rte_mbuf *pkt; + + if (unlikely(qconf->n_rx_queue == 0)) + return NULL; + + /* Look for the next queue with packets; return if none */ + if (unlikely(qconf->rx_mbuf_pos == qconf->rx_mbuf.len)) { + uint32_t i; + + qconf->rx_mbuf_pos = 0; + for (i = 0; i < qconf->n_rx_queue; i++) { + qconf->rx_mbuf.len = rte_eth_rx_burst( + qconf->rx_queue_list[qconf->rx_curr_queue].port_id, + qconf->rx_queue_list[qconf->rx_curr_queue].queue_id, + qconf->rx_mbuf.m_table, MAX_PKT_BURST); + + qconf->rx_curr_queue++; + if (unlikely(qconf->rx_curr_queue == qconf->n_rx_queue)) + qconf->rx_curr_queue = 0; + if (likely(qconf->rx_mbuf.len > 0)) + break; + } + if (unlikely(i == qconf->n_rx_queue)) + return NULL; + } + + /* Get the next packet from the current queue; if last packet, go to next queue */ + pkt = qconf->rx_mbuf.m_table[qconf->rx_mbuf_pos]; + qconf->rx_mbuf_pos++; + + return pkt; +} + +static inline void +nic_tx_flush_queues(struct lcore_conf *qconf) +{ + uint8_t portid; + + for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) { + struct rte_mbuf **m_table = NULL; + uint16_t queueid, len; + uint32_t n, i; + + if (likely((qconf->tx_mask & (1 << portid)) == 0)) + continue; + + len = qconf->tx_mbufs[portid].len; + if (likely(len == 0)) + continue; + + queueid = qconf->tx_queue_id[portid]; + m_table = qconf->tx_mbufs[portid].m_table; + + n = rte_eth_tx_burst(portid, queueid, m_table, len); + for (i = n; i < len; i++){ + rte_pktmbuf_free(m_table[i]); + } + + qconf->tx_mbufs[portid].len = 0; + } + + qconf->tx_mask = TX_QUEUE_FLUSH_MASK; +} + +static inline void +nic_tx_send_packet(struct rte_mbuf *pkt, uint8_t port) +{ + struct lcore_conf *qconf; + uint32_t lcoreid; + uint16_t len; + + if (unlikely(pkt == NULL)) { + return; + } + + lcoreid = rte_lcore_id(); + qconf = &lcore_conf[lcoreid]; + + len = qconf->tx_mbufs[port].len; + qconf->tx_mbufs[port].m_table[len] = pkt; + len++; + + /* enough pkts to be sent */ + if (unlikely(len == MAX_PKT_BURST)) { + uint32_t n, i; + uint16_t queueid; + + queueid = qconf->tx_queue_id[port]; + n = rte_eth_tx_burst(port, queueid, qconf->tx_mbufs[port].m_table, MAX_PKT_BURST); + for (i = n; i < MAX_PKT_BURST; i++){ + rte_pktmbuf_free(qconf->tx_mbufs[port].m_table[i]); + } + + qconf->tx_mask &= ~(1 << port); + len = 0; + } + + qconf->tx_mbufs[port].len = len; +} + +/* main processing loop */ +static __attribute__((noreturn)) int +main_loop(__attribute__((unused)) void *dummy) +{ + uint32_t lcoreid; + struct lcore_conf *qconf; + const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US; + + lcoreid = rte_lcore_id(); + qconf = &lcore_conf[lcoreid]; + + printf("Thread %u starting...\n", lcoreid); + + for (;;) { + struct rte_mbuf *pkt; + uint32_t pkt_from_nic_rx = 0; + uint8_t port; + + /* Flush TX queues */ + qconf->tsc_count++; + if (unlikely(qconf->tsc_count == TSC_COUNT_LIMIT)) { + uint64_t tsc, diff_tsc; + + tsc = rte_rdtsc(); + + diff_tsc = tsc - qconf->tsc; + if (unlikely(diff_tsc > drain_tsc)) { + nic_tx_flush_queues(qconf); + crypto_flush_tx_queue(lcoreid); + qconf->tsc = tsc; + } + + qconf->tsc_count = 0; + } + + /* + * Check the Intel QuickAssist queues first + * + ***/ + pkt = (struct rte_mbuf *) crypto_get_next_response(); + if (pkt == NULL) { + pkt = nic_rx_get_packet(qconf); + pkt_from_nic_rx = 1; + } + if (pkt == NULL) + continue; + /* Send packet to either QAT encrypt, QAT decrypt or NIC TX */ + if (pkt_from_nic_rx) { + struct ipv4_hdr *ip = rte_pktmbuf_mtod_offset(pkt, + struct ipv4_hdr *, + sizeof(struct ether_hdr)); + if (ip->src_addr & rte_cpu_to_be_32(ACTION_ENCRYPT)) { + if (CRYPTO_RESULT_FAIL == crypto_encrypt(pkt, + (enum cipher_alg)((ip->src_addr >> 16) & 0xFF), + (enum hash_alg)((ip->src_addr >> 8) & 0xFF))) + rte_pktmbuf_free(pkt); + continue; + } + + if (ip->src_addr & rte_cpu_to_be_32(ACTION_DECRYPT)) { + if(CRYPTO_RESULT_FAIL == crypto_decrypt(pkt, + (enum cipher_alg)((ip->src_addr >> 16) & 0xFF), + (enum hash_alg)((ip->src_addr >> 8) & 0xFF))) + rte_pktmbuf_free(pkt); + continue; + } + } + + port = dst_ports[pkt->port]; + + /* Transmit the packet */ + nic_tx_send_packet(pkt, (uint8_t)port); + } +} + +static inline unsigned +get_port_max_rx_queues(uint8_t port_id) +{ + struct rte_eth_dev_info dev_info; + + rte_eth_dev_info_get(port_id, &dev_info); + return dev_info.max_rx_queues; +} + +static inline unsigned +get_port_max_tx_queues(uint8_t port_id) +{ + struct rte_eth_dev_info dev_info; + + rte_eth_dev_info_get(port_id, &dev_info); + return dev_info.max_tx_queues; +} + +static int +check_lcore_params(void) +{ + uint16_t i; + + for (i = 0; i < nb_lcore_params; ++i) { + if (lcore_params[i].queue_id >= get_port_max_rx_queues(lcore_params[i].port_id)) { + printf("invalid queue number: %hhu\n", lcore_params[i].queue_id); + return -1; + } + if (!rte_lcore_is_enabled(lcore_params[i].lcore_id)) { + printf("error: lcore %hhu is not enabled in lcore mask\n", + lcore_params[i].lcore_id); + return -1; + } + } + return 0; +} + +static int +check_port_config(const unsigned nb_ports) +{ + unsigned portid; + uint16_t i; + + for (i = 0; i < nb_lcore_params; ++i) { + portid = lcore_params[i].port_id; + if ((enabled_port_mask & (1 << portid)) == 0) { + printf("port %u is not enabled in port mask\n", portid); + return -1; + } + if (portid >= nb_ports) { + printf("port %u is not present on the board\n", portid); + return -1; + } + } + return 0; +} + +static uint8_t +get_port_n_rx_queues(const uint8_t port) +{ + int queue = -1; + uint16_t i; + + for (i = 0; i < nb_lcore_params; ++i) { + if (lcore_params[i].port_id == port && lcore_params[i].queue_id > queue) + queue = lcore_params[i].queue_id; + } + return (uint8_t)(++queue); +} + +static int +init_lcore_rx_queues(void) +{ + uint16_t i, nb_rx_queue; + uint8_t lcore; + + for (i = 0; i < nb_lcore_params; ++i) { + lcore = lcore_params[i].lcore_id; + nb_rx_queue = lcore_conf[lcore].n_rx_queue; + if (nb_rx_queue >= MAX_RX_QUEUE_PER_LCORE) { + printf("error: too many queues (%u) for lcore: %u\n", + (unsigned)nb_rx_queue + 1, (unsigned)lcore); + return -1; + } + lcore_conf[lcore].rx_queue_list[nb_rx_queue].port_id = + lcore_params[i].port_id; + lcore_conf[lcore].rx_queue_list[nb_rx_queue].queue_id = + lcore_params[i].queue_id; + lcore_conf[lcore].n_rx_queue++; + } + return 0; +} + +/* display usage */ +static void +print_usage(const char *prgname) +{ + printf ("%s [EAL options] -- -p PORTMASK [--no-promisc]" + " [--config '(port,queue,lcore)[,(port,queue,lcore)]'\n" + " -p PORTMASK: hexadecimal bitmask of ports to configure\n" + " --no-promisc: disable promiscuous mode (default is ON)\n" + " --config '(port,queue,lcore)': rx queues configuration\n", + prgname); +} + +static unsigned +parse_portmask(const char *portmask) +{ + char *end = NULL; + unsigned pm; + + /* parse hexadecimal string */ + pm = strtoul(portmask, &end, 16); + if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0')) + return 0; + + return pm; +} + +static int +parse_config(const char *q_arg) +{ + char s[256]; + const char *p, *p_end = q_arg; + char *end; + enum fieldnames { + FLD_PORT = 0, + FLD_QUEUE, + FLD_LCORE, + _NUM_FLD + }; + unsigned long int_fld[_NUM_FLD]; + char *str_fld[_NUM_FLD]; + int i; + unsigned size; + + nb_lcore_params = 0; + + while ((p = strchr(p_end,'(')) != NULL) { + if (nb_lcore_params >= MAX_LCORE_PARAMS) { + printf("exceeded max number of lcore params: %hu\n", + nb_lcore_params); + return -1; + } + ++p; + if((p_end = strchr(p,')')) == NULL) + return -1; + + size = p_end - p; + if(size >= sizeof(s)) + return -1; + + snprintf(s, sizeof(s), "%.*s", size, p); + if (rte_strsplit(s, sizeof(s), str_fld, _NUM_FLD, ',') != _NUM_FLD) + return -1; + for (i = 0; i < _NUM_FLD; i++) { + errno = 0; + int_fld[i] = strtoul(str_fld[i], &end, 0); + if (errno != 0 || end == str_fld[i] || int_fld[i] > 255) + return -1; + } + lcore_params_array[nb_lcore_params].port_id = (uint8_t)int_fld[FLD_PORT]; + lcore_params_array[nb_lcore_params].queue_id = (uint8_t)int_fld[FLD_QUEUE]; + lcore_params_array[nb_lcore_params].lcore_id = (uint8_t)int_fld[FLD_LCORE]; + ++nb_lcore_params; + } + lcore_params = lcore_params_array; + return 0; +} + +/* Parse the argument given in the command line of the application */ +static int +parse_args(int argc, char **argv) +{ + int opt, ret; + char **argvopt; + int option_index; + char *prgname = argv[0]; + static struct option lgopts[] = { + {"config", 1, 0, 0}, + {"no-promisc", 0, 0, 0}, + {NULL, 0, 0, 0} + }; + + argvopt = argv; + + while ((opt = getopt_long(argc, argvopt, "p:", + lgopts, &option_index)) != EOF) { + + switch (opt) { + /* portmask */ + case 'p': + enabled_port_mask = parse_portmask(optarg); + if (enabled_port_mask == 0) { + printf("invalid portmask\n"); + print_usage(prgname); + return -1; + } + break; + + /* long options */ + case 0: + if (strcmp(lgopts[option_index].name, "config") == 0) { + ret = parse_config(optarg); + if (ret) { + printf("invalid config\n"); + print_usage(prgname); + return -1; + } + } + if (strcmp(lgopts[option_index].name, "no-promisc") == 0) { + printf("Promiscuous mode disabled\n"); + promiscuous_on = 0; + } + break; + default: + print_usage(prgname); + return -1; + } + } + + if (enabled_port_mask == 0) { + printf("portmask not specified\n"); + print_usage(prgname); + return -1; + } + + if (optind >= 0) + argv[optind-1] = prgname; + + ret = optind-1; + optind = 0; /* reset getopt lib */ + return ret; +} + +static void +print_ethaddr(const char *name, const struct ether_addr *eth_addr) +{ + char buf[ETHER_ADDR_FMT_SIZE]; + ether_format_addr(buf, ETHER_ADDR_FMT_SIZE, eth_addr); + printf("%s%s", name, buf); +} + +static int +init_mem(void) +{ + int socketid; + unsigned lcoreid; + char s[64]; + + RTE_LCORE_FOREACH(lcoreid) { + socketid = rte_lcore_to_socket_id(lcoreid); + if (socketid >= RTE_MAX_NUMA_NODES) { + printf("Socket %d of lcore %u is out of range %d\n", + socketid, lcoreid, RTE_MAX_NUMA_NODES); + return -1; + } + if (pktmbuf_pool[socketid] == NULL) { + snprintf(s, sizeof(s), "mbuf_pool_%d", socketid); + pktmbuf_pool[socketid] = + rte_pktmbuf_pool_create(s, NB_MBUF, 32, 0, + RTE_MBUF_DEFAULT_BUF_SIZE, socketid); + if (pktmbuf_pool[socketid] == NULL) { + printf("Cannot init mbuf pool on socket %d\n", socketid); + return -1; + } + printf("Allocated mbuf pool on socket %d\n", socketid); + } + } + return 0; +} + +int +main(int argc, char **argv) +{ + struct lcore_conf *qconf; + struct rte_eth_link link; + int ret; + unsigned nb_ports; + uint16_t queueid; + unsigned lcoreid; + uint32_t nb_tx_queue; + uint8_t portid, nb_rx_queue, queue, socketid, last_port; + unsigned nb_ports_in_mask = 0; + + /* init EAL */ + ret = rte_eal_init(argc, argv); + if (ret < 0) + return -1; + argc -= ret; + argv += ret; + + /* parse application arguments (after the EAL ones) */ + ret = parse_args(argc, argv); + if (ret < 0) + return -1; + + if (check_lcore_params() < 0) + rte_panic("check_lcore_params failed\n"); + + ret = init_lcore_rx_queues(); + if (ret < 0) + return -1; + + ret = init_mem(); + if (ret < 0) + return -1; + + nb_ports = rte_eth_dev_count(); + if (nb_ports > RTE_MAX_ETHPORTS) + nb_ports = RTE_MAX_ETHPORTS; + + if (check_port_config(nb_ports) < 0) + rte_panic("check_port_config failed\n"); + + /* reset dst_ports */ + for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) + dst_ports[portid] = 0; + last_port = 0; + + /* + * Each logical core is assigned a dedicated TX queue on each port. + */ + for (portid = 0; portid < nb_ports; portid++) { + /* skip ports that are not enabled */ + if ((enabled_port_mask & (1 << portid)) == 0) + continue; + + if (nb_ports_in_mask % 2) { + dst_ports[portid] = last_port; + dst_ports[last_port] = portid; + } + else + last_port = portid; + + nb_ports_in_mask++; + } + if (nb_ports_in_mask % 2) { + printf("Notice: odd number of ports in portmask.\n"); + dst_ports[last_port] = last_port; + } + + /* initialize all ports */ + for (portid = 0; portid < nb_ports; portid++) { + /* skip ports that are not enabled */ + if ((enabled_port_mask & (1 << portid)) == 0) { + printf("\nSkipping disabled port %d\n", portid); + continue; + } + + /* init port */ + printf("Initializing port %d ... ", portid ); + fflush(stdout); + + nb_rx_queue = get_port_n_rx_queues(portid); + if (nb_rx_queue > get_port_max_rx_queues(portid)) + rte_panic("Number of rx queues %d exceeds max number of rx queues %u" + " for port %d\n", nb_rx_queue, get_port_max_rx_queues(portid), + portid); + nb_tx_queue = rte_lcore_count(); + if (nb_tx_queue > get_port_max_tx_queues(portid)) + rte_panic("Number of lcores %u exceeds max number of tx queues %u" + " for port %d\n", nb_tx_queue, get_port_max_tx_queues(portid), + portid); + printf("Creating queues: nb_rxq=%d nb_txq=%u... ", + nb_rx_queue, (unsigned)nb_tx_queue ); + ret = rte_eth_dev_configure(portid, nb_rx_queue, + (uint16_t)nb_tx_queue, &port_conf); + if (ret < 0) + rte_panic("Cannot configure device: err=%d, port=%d\n", + ret, portid); + + rte_eth_macaddr_get(portid, &ports_eth_addr[portid]); + print_ethaddr(" Address:", &ports_eth_addr[portid]); + printf(", "); + + /* init one TX queue per couple (lcore,port) */ + queueid = 0; + RTE_LCORE_FOREACH(lcoreid) { + socketid = (uint8_t)rte_lcore_to_socket_id(lcoreid); + printf("txq=%u,%d,%d ", lcoreid, queueid, socketid); + fflush(stdout); + ret = rte_eth_tx_queue_setup(portid, queueid, nb_txd, + socketid, + NULL); + if (ret < 0) + rte_panic("rte_eth_tx_queue_setup: err=%d, " + "port=%d\n", ret, portid); + + qconf = &lcore_conf[lcoreid]; + qconf->tx_queue_id[portid] = queueid; + queueid++; + } + printf("\n"); + } + + RTE_LCORE_FOREACH(lcoreid) { + qconf = &lcore_conf[lcoreid]; + printf("\nInitializing rx queues on lcore %u ... ", lcoreid ); + fflush(stdout); + /* init RX queues */ + for(queue = 0; queue < qconf->n_rx_queue; ++queue) { + portid = qconf->rx_queue_list[queue].port_id; + queueid = qconf->rx_queue_list[queue].queue_id; + socketid = (uint8_t)rte_lcore_to_socket_id(lcoreid); + printf("rxq=%d,%d,%d ", portid, queueid, socketid); + fflush(stdout); + + ret = rte_eth_rx_queue_setup(portid, queueid, nb_rxd, + socketid, + NULL, + pktmbuf_pool[socketid]); + if (ret < 0) + rte_panic("rte_eth_rx_queue_setup: err=%d," + "port=%d\n", ret, portid); + } + } + + printf("\n"); + + /* start ports */ + for (portid = 0; portid < nb_ports; portid++) { + if ((enabled_port_mask & (1 << portid)) == 0) + continue; + /* Start device */ + ret = rte_eth_dev_start(portid); + if (ret < 0) + rte_panic("rte_eth_dev_start: err=%d, port=%d\n", + ret, portid); + + printf("done: Port %d ", portid); + + /* get link status */ + rte_eth_link_get(portid, &link); + if (link.link_status) + printf(" Link Up - speed %u Mbps - %s\n", + (unsigned) link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? + ("full-duplex") : ("half-duplex\n")); + else + printf(" Link Down\n"); + /* + * If enabled, put device in promiscuous mode. + * This allows IO forwarding mode to forward packets + * to itself through 2 cross-connected ports of the + * target machine. + */ + if (promiscuous_on) + rte_eth_promiscuous_enable(portid); + } + printf("Crypto: Initializing Crypto...\n"); + if (crypto_init() != 0) + return -1; + + RTE_LCORE_FOREACH(lcoreid) { + if (per_core_crypto_init(lcoreid) != 0) { + printf("Crypto: Cannot init lcore crypto on lcore %u\n", (unsigned)lcoreid); + return -1; + } + } + printf("Crypto: Initialization complete\n"); + /* launch per-lcore init on every lcore */ + rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER); + RTE_LCORE_FOREACH_SLAVE(lcoreid) { + if (rte_eal_wait_lcore(lcoreid) < 0) + return -1; + } + + return 0; +} diff --git a/examples/ethtool/Makefile b/examples/ethtool/Makefile new file mode 100644 index 00000000..995cd25b --- /dev/null +++ b/examples/ethtool/Makefile @@ -0,0 +1,49 @@ +# BSD LICENSE +# +# Copyright(c) 2015 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overwritten by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +ifneq ($(CONFIG_RTE_EXEC_ENV),"linuxapp") +$(info This application can only operate in a linuxapp environment, \ +please change the definition of the RTE_TARGET environment variable) +else + +DIRS-y += lib ethtool-app +endif + +include $(RTE_SDK)/mk/rte.extsubdir.mk diff --git a/examples/ethtool/ethtool-app/Makefile b/examples/ethtool/ethtool-app/Makefile new file mode 100644 index 00000000..09c66ad1 --- /dev/null +++ b/examples/ethtool/ethtool-app/Makefile @@ -0,0 +1,54 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overridden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +# binary name +APP = ethtool + +# all source are stored in SRCS-y +SRCS-y := main.c ethapp.c + +CFLAGS += -O3 -D_GNU_SOURCE -pthread -I$(SRCDIR)/../lib +CFLAGS += $(WERROR_FLAGS) + +LDLIBS += -L$(subst ethtool-app,lib,$(RTE_OUTPUT))/lib +LDLIBS += -lrte_ethtool + + +include $(RTE_SDK)/mk/rte.extapp.mk diff --git a/examples/ethtool/ethtool-app/ethapp.c b/examples/ethtool/ethtool-app/ethapp.c new file mode 100644 index 00000000..2ed4796d --- /dev/null +++ b/examples/ethtool/ethtool-app/ethapp.c @@ -0,0 +1,873 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <cmdline_parse.h> +#include <cmdline_parse_num.h> +#include <cmdline_parse_string.h> +#include <cmdline_parse_etheraddr.h> +#include <cmdline_socket.h> +#include <cmdline.h> + +#include "rte_ethtool.h" +#include "ethapp.h" + +#define EEPROM_DUMP_CHUNKSIZE 1024 + + +struct pcmd_get_params { + cmdline_fixed_string_t cmd; +}; +struct pcmd_int_params { + cmdline_fixed_string_t cmd; + uint16_t port; +}; +struct pcmd_intstr_params { + cmdline_fixed_string_t cmd; + uint16_t port; + cmdline_fixed_string_t opt; +}; +struct pcmd_intmac_params { + cmdline_fixed_string_t cmd; + uint16_t port; + struct ether_addr mac; +}; +struct pcmd_str_params { + cmdline_fixed_string_t cmd; + cmdline_fixed_string_t opt; +}; +struct pcmd_vlan_params { + cmdline_fixed_string_t cmd; + uint16_t port; + cmdline_fixed_string_t mode; + uint16_t vid; +}; +struct pcmd_intintint_params { + cmdline_fixed_string_t cmd; + uint16_t port; + uint16_t tx; + uint16_t rx; +}; + + +/* Parameter-less commands */ +cmdline_parse_token_string_t pcmd_quit_token_cmd = + TOKEN_STRING_INITIALIZER(struct pcmd_get_params, cmd, "quit"); +cmdline_parse_token_string_t pcmd_stats_token_cmd = + TOKEN_STRING_INITIALIZER(struct pcmd_get_params, cmd, "stats"); +cmdline_parse_token_string_t pcmd_drvinfo_token_cmd = + TOKEN_STRING_INITIALIZER(struct pcmd_get_params, cmd, "drvinfo"); +cmdline_parse_token_string_t pcmd_link_token_cmd = + TOKEN_STRING_INITIALIZER(struct pcmd_get_params, cmd, "link"); + +/* Commands taking just port id */ +cmdline_parse_token_string_t pcmd_open_token_cmd = + TOKEN_STRING_INITIALIZER(struct pcmd_int_params, cmd, "open"); +cmdline_parse_token_string_t pcmd_stop_token_cmd = + TOKEN_STRING_INITIALIZER(struct pcmd_int_params, cmd, "stop"); +cmdline_parse_token_string_t pcmd_rxmode_token_cmd = + TOKEN_STRING_INITIALIZER(struct pcmd_int_params, cmd, "rxmode"); +cmdline_parse_token_string_t pcmd_portstats_token_cmd = + TOKEN_STRING_INITIALIZER(struct pcmd_int_params, cmd, "portstats"); +cmdline_parse_token_num_t pcmd_int_token_port = + TOKEN_NUM_INITIALIZER(struct pcmd_int_params, port, UINT16); + +/* Commands taking port id and string */ +cmdline_parse_token_string_t pcmd_eeprom_token_cmd = + TOKEN_STRING_INITIALIZER(struct pcmd_intstr_params, cmd, "eeprom"); +cmdline_parse_token_string_t pcmd_mtu_token_cmd = + TOKEN_STRING_INITIALIZER(struct pcmd_intstr_params, cmd, "mtu"); +cmdline_parse_token_string_t pcmd_regs_token_cmd = + TOKEN_STRING_INITIALIZER(struct pcmd_intstr_params, cmd, "regs"); + +cmdline_parse_token_num_t pcmd_intstr_token_port = + TOKEN_NUM_INITIALIZER(struct pcmd_intstr_params, port, UINT16); +cmdline_parse_token_string_t pcmd_intstr_token_opt = + TOKEN_STRING_INITIALIZER(struct pcmd_intstr_params, opt, NULL); + +/* Commands taking port id and a MAC address string */ +cmdline_parse_token_string_t pcmd_macaddr_token_cmd = + TOKEN_STRING_INITIALIZER(struct pcmd_intmac_params, cmd, "macaddr"); +cmdline_parse_token_num_t pcmd_intmac_token_port = + TOKEN_NUM_INITIALIZER(struct pcmd_intmac_params, port, UINT16); +cmdline_parse_token_etheraddr_t pcmd_intmac_token_mac = + TOKEN_ETHERADDR_INITIALIZER(struct pcmd_intmac_params, mac); + +/* Command taking just a MAC address */ +cmdline_parse_token_string_t pcmd_validate_token_cmd = + TOKEN_STRING_INITIALIZER(struct pcmd_intmac_params, cmd, "validate"); + + +/* Commands taking port id and two integers */ +cmdline_parse_token_string_t pcmd_ringparam_token_cmd = + TOKEN_STRING_INITIALIZER(struct pcmd_intintint_params, cmd, + "ringparam"); +cmdline_parse_token_num_t pcmd_intintint_token_port = + TOKEN_NUM_INITIALIZER(struct pcmd_intintint_params, port, UINT16); +cmdline_parse_token_num_t pcmd_intintint_token_tx = + TOKEN_NUM_INITIALIZER(struct pcmd_intintint_params, tx, UINT16); +cmdline_parse_token_num_t pcmd_intintint_token_rx = + TOKEN_NUM_INITIALIZER(struct pcmd_intintint_params, rx, UINT16); + + +/* Pause commands */ +cmdline_parse_token_string_t pcmd_pause_token_cmd = + TOKEN_STRING_INITIALIZER(struct pcmd_intstr_params, cmd, "pause"); +cmdline_parse_token_num_t pcmd_pause_token_port = + TOKEN_NUM_INITIALIZER(struct pcmd_intstr_params, port, UINT16); +cmdline_parse_token_string_t pcmd_pause_token_opt = + TOKEN_STRING_INITIALIZER(struct pcmd_intstr_params, + opt, "all#tx#rx#none"); + +/* VLAN commands */ +cmdline_parse_token_string_t pcmd_vlan_token_cmd = + TOKEN_STRING_INITIALIZER(struct pcmd_vlan_params, cmd, "vlan"); +cmdline_parse_token_num_t pcmd_vlan_token_port = + TOKEN_NUM_INITIALIZER(struct pcmd_vlan_params, port, UINT16); +cmdline_parse_token_string_t pcmd_vlan_token_mode = + TOKEN_STRING_INITIALIZER(struct pcmd_vlan_params, mode, "add#del"); +cmdline_parse_token_num_t pcmd_vlan_token_vid = + TOKEN_NUM_INITIALIZER(struct pcmd_vlan_params, vid, UINT16); + + +static void +pcmd_quit_callback(__rte_unused void *ptr_params, + struct cmdline *ctx, + __rte_unused void *ptr_data) +{ + cmdline_quit(ctx); +} + + +static void +pcmd_drvinfo_callback(__rte_unused void *ptr_params, + __rte_unused struct cmdline *ctx, + __rte_unused void *ptr_data) +{ + struct ethtool_drvinfo info; + int id_port; + + for (id_port = 0; id_port < rte_eth_dev_count(); id_port++) { + if (rte_ethtool_get_drvinfo(id_port, &info)) { + printf("Error getting info for port %i\n", id_port); + return; + } + printf("Port %i driver: %s (ver: %s)\n", + id_port, info.driver, info.version + ); + } +} + + +static void +pcmd_link_callback(__rte_unused void *ptr_params, + __rte_unused struct cmdline *ctx, + __rte_unused void *ptr_data) +{ + int num_ports = rte_eth_dev_count(); + int id_port, stat_port; + + for (id_port = 0; id_port < num_ports; id_port++) { + if (!rte_eth_dev_is_valid_port(id_port)) + continue; + stat_port = rte_ethtool_get_link(id_port); + switch (stat_port) { + case 0: + printf("Port %i: Down\n", id_port); + break; + case 1: + printf("Port %i: Up\n", id_port); + break; + default: + printf("Port %i: Error getting link status\n", + id_port + ); + break; + } + } + printf("\n"); +} + + +static void +pcmd_regs_callback(void *ptr_params, + __rte_unused struct cmdline *ctx, + __rte_unused void *ptr_data) +{ + struct pcmd_intstr_params *params = ptr_params; + int len_regs; + struct ethtool_regs regs; + unsigned char *buf_data; + FILE *fp_regs; + + if (!rte_eth_dev_is_valid_port(params->port)) { + printf("Error: Invalid port number %i\n", params->port); + return; + } + len_regs = rte_ethtool_get_regs_len(params->port); + if (len_regs > 0) { + printf("Port %i: %i bytes\n", params->port, len_regs); + buf_data = malloc(len_regs); + if (buf_data == NULL) { + printf("Error allocating %i bytes for buffer\n", + len_regs); + return; + } + if (!rte_ethtool_get_regs(params->port, ®s, buf_data)) { + fp_regs = fopen(params->opt, "wb"); + if (fp_regs == NULL) { + printf("Error opening '%s' for writing\n", + params->opt); + } else { + if ((int)fwrite(buf_data, + 1, len_regs, + fp_regs) != len_regs) + printf("Error writing '%s'\n", + params->opt); + fclose(fp_regs); + } + } + free(buf_data); + } else if (len_regs == -ENOTSUP) + printf("Port %i: Operation not supported\n", params->port); + else + printf("Port %i: Error getting registers\n", params->port); +} + + +static void +pcmd_eeprom_callback(void *ptr_params, + __rte_unused struct cmdline *ctx, + __rte_unused void *ptr_data) +{ + struct pcmd_intstr_params *params = ptr_params; + struct ethtool_eeprom info_eeprom; + int len_eeprom; + int pos_eeprom; + int stat; + unsigned char bytes_eeprom[EEPROM_DUMP_CHUNKSIZE]; + FILE *fp_eeprom; + + if (!rte_eth_dev_is_valid_port(params->port)) { + printf("Error: Invalid port number %i\n", params->port); + return; + } + len_eeprom = rte_ethtool_get_eeprom_len(params->port); + if (len_eeprom > 0) { + fp_eeprom = fopen(params->opt, "wb"); + if (fp_eeprom == NULL) { + printf("Error opening '%s' for writing\n", + params->opt); + return; + } + printf("Total EEPROM length: %i bytes\n", len_eeprom); + info_eeprom.len = EEPROM_DUMP_CHUNKSIZE; + for (pos_eeprom = 0; + pos_eeprom < len_eeprom; + pos_eeprom += EEPROM_DUMP_CHUNKSIZE) { + info_eeprom.offset = pos_eeprom; + if (pos_eeprom + EEPROM_DUMP_CHUNKSIZE > len_eeprom) + info_eeprom.len = len_eeprom - pos_eeprom; + else + info_eeprom.len = EEPROM_DUMP_CHUNKSIZE; + stat = rte_ethtool_get_eeprom( + params->port, &info_eeprom, bytes_eeprom + ); + if (stat != 0) { + printf("EEPROM read error %i\n", stat); + break; + } + if (fwrite(bytes_eeprom, + 1, info_eeprom.len, + fp_eeprom) != info_eeprom.len) { + printf("Error writing '%s'\n", params->opt); + break; + } + } + fclose(fp_eeprom); + } else if (len_eeprom == 0) + printf("Port %i: Device does not have EEPROM\n", params->port); + else if (len_eeprom == -ENOTSUP) + printf("Port %i: Operation not supported\n", params->port); + else + printf("Port %i: Error getting EEPROM\n", params->port); +} + + +static void +pcmd_pause_callback(void *ptr_params, + __rte_unused struct cmdline *ctx, + void *ptr_data) +{ + struct pcmd_intstr_params *params = ptr_params; + struct ethtool_pauseparam info; + int stat; + + if (!rte_eth_dev_is_valid_port(params->port)) { + printf("Error: Invalid port number %i\n", params->port); + return; + } + if (ptr_data != NULL) { + stat = rte_ethtool_get_pauseparam(params->port, &info); + } else { + memset(&info, 0, sizeof(info)); + if (strcasecmp("all", params->opt) == 0) { + info.tx_pause = 1; + info.rx_pause = 1; + } else if (strcasecmp("tx", params->opt) == 0) { + info.tx_pause = 1; + info.rx_pause = 0; + } else if (strcasecmp("rx", params->opt) == 0) { + info.tx_pause = 0; + info.rx_pause = 1; + } else { + info.tx_pause = 0; + info.rx_pause = 0; + } + /* Assume auto-negotiation wanted */ + info.autoneg = 1; + stat = rte_ethtool_set_pauseparam(params->port, &info); + } + if (stat == 0) { + if (info.rx_pause && info.tx_pause) + printf("Port %i: Tx & Rx Paused\n", params->port); + else if (info.rx_pause) + printf("Port %i: Rx Paused\n", params->port); + else if (info.tx_pause) + printf("Port %i: Tx Paused\n", params->port); + else + printf("Port %i: Tx & Rx not paused\n", params->port); + } else if (stat == -ENOTSUP) + printf("Port %i: Operation not supported\n", params->port); + else + printf("Port %i: Error %i\n", params->port, stat); +} + + +static void +pcmd_open_callback(__rte_unused void *ptr_params, + __rte_unused struct cmdline *ctx, + __rte_unused void *ptr_data) +{ + struct pcmd_int_params *params = ptr_params; + int stat; + + if (!rte_eth_dev_is_valid_port(params->port)) { + printf("Error: Invalid port number %i\n", params->port); + return; + } + lock_port(params->port); + stat = rte_ethtool_net_open(params->port); + mark_port_active(params->port); + unlock_port(params->port); + if (stat == 0) + return; + else if (stat == -ENOTSUP) + printf("Port %i: Operation not supported\n", params->port); + else + printf("Port %i: Error opening device\n", params->port); +} + +static void +pcmd_stop_callback(__rte_unused void *ptr_params, + __rte_unused struct cmdline *ctx, + __rte_unused void *ptr_data) +{ + struct pcmd_int_params *params = ptr_params; + int stat; + + if (!rte_eth_dev_is_valid_port(params->port)) { + printf("Error: Invalid port number %i\n", params->port); + return; + } + lock_port(params->port); + stat = rte_ethtool_net_stop(params->port); + mark_port_inactive(params->port); + unlock_port(params->port); + if (stat == 0) + return; + else if (stat == -ENOTSUP) + printf("Port %i: Operation not supported\n", params->port); + else + printf("Port %i: Error stopping device\n", params->port); +} + + +static void +pcmd_rxmode_callback(void *ptr_params, + __rte_unused struct cmdline *ctx, + __rte_unused void *ptr_data) +{ + struct pcmd_intstr_params *params = ptr_params; + int stat; + + if (!rte_eth_dev_is_valid_port(params->port)) { + printf("Error: Invalid port number %i\n", params->port); + return; + } + stat = rte_ethtool_net_set_rx_mode(params->port); + if (stat == 0) + return; + else if (stat == -ENOTSUP) + printf("Port %i: Operation not supported\n", params->port); + else + printf("Port %i: Error setting rx mode\n", params->port); +} + + +static void +pcmd_macaddr_callback(void *ptr_params, + __rte_unused struct cmdline *ctx, + void *ptr_data) +{ + struct pcmd_intmac_params *params = ptr_params; + struct ether_addr mac_addr; + int stat; + + stat = 0; + if (!rte_eth_dev_is_valid_port(params->port)) { + printf("Error: Invalid port number %i\n", params->port); + return; + } + if (ptr_data != NULL) { + lock_port(params->port); + stat = rte_ethtool_net_set_mac_addr(params->port, + ¶ms->mac); + mark_port_newmac(params->port); + unlock_port(params->port); + if (stat == 0) { + printf("MAC address changed\n"); + return; + } + } else { + stat = rte_ethtool_net_get_mac_addr(params->port, &mac_addr); + if (stat == 0) { + printf( + "Port %i MAC Address: %02x:%02x:%02x:%02x:%02x:%02x\n", + params->port, + mac_addr.addr_bytes[0], + mac_addr.addr_bytes[1], + mac_addr.addr_bytes[2], + mac_addr.addr_bytes[3], + mac_addr.addr_bytes[4], + mac_addr.addr_bytes[5]); + return; + } + } + + printf("Port %i: Error %s\n", params->port, + strerror(-stat)); +} + +static void +pcmd_mtu_callback(void *ptr_params, + __rte_unused struct cmdline *ctx, + __rte_unused void *ptr_data) +{ + struct pcmd_intstr_params *params = ptr_params; + int stat; + int new_mtu; + char *ptr_parse_end; + + if (!rte_eth_dev_is_valid_port(params->port)) { + printf("Error: Invalid port number %i\n", params->port); + return; + } + new_mtu = atoi(params->opt); + new_mtu = strtoul(params->opt, &ptr_parse_end, 10); + if (*ptr_parse_end != '\0' || + new_mtu < ETHER_MIN_MTU || + new_mtu > ETHER_MAX_JUMBO_FRAME_LEN) { + printf("Port %i: Invalid MTU value\n", params->port); + return; + } + stat = rte_ethtool_net_change_mtu(params->port, new_mtu); + if (stat == 0) + printf("Port %i: MTU set to %i\n", params->port, new_mtu); + else if (stat == -ENOTSUP) + printf("Port %i: Operation not supported\n", params->port); + else + printf("Port %i: Error setting MTU\n", params->port); +} + + + +static void pcmd_portstats_callback(__rte_unused void *ptr_params, + __rte_unused struct cmdline *ctx, + __rte_unused void *ptr_data) +{ + struct pcmd_int_params *params = ptr_params; + struct rte_eth_stats stat_info; + int stat; + + if (!rte_eth_dev_is_valid_port(params->port)) { + printf("Error: Invalid port number %i\n", params->port); + return; + } + stat = rte_ethtool_net_get_stats64(params->port, &stat_info); + if (stat == 0) { + /* Most of rte_eth_stats is deprecated.. */ + printf("Port %i stats\n", params->port); + printf(" In: %" PRIu64 " (%" PRIu64 " bytes)\n" + " Out: %"PRIu64" (%"PRIu64 " bytes)\n" + " Err: %"PRIu64"\n", + stat_info.ipackets, + stat_info.ibytes, + stat_info.opackets, + stat_info.obytes, + stat_info.ierrors+stat_info.oerrors + ); + } else if (stat == -ENOTSUP) + printf("Port %i: Operation not supported\n", params->port); + else + printf("Port %i: Error fetching statistics\n", params->port); +} + +static void pcmd_ringparam_callback(__rte_unused void *ptr_params, + __rte_unused struct cmdline *ctx, + void *ptr_data) +{ + struct pcmd_intintint_params *params = ptr_params; + struct ethtool_ringparam ring_data; + struct ethtool_ringparam ring_params; + int stat; + + if (!rte_eth_dev_is_valid_port(params->port)) { + printf("Error: Invalid port number %i\n", params->port); + return; + } + if (ptr_data == NULL) { + stat = rte_ethtool_get_ringparam(params->port, &ring_data); + if (stat == 0) { + printf("Port %i ring parameters\n" + " Rx Pending: %i (%i max)\n" + " Tx Pending: %i (%i max)\n", + params->port, + ring_data.rx_pending, + ring_data.rx_max_pending, + ring_data.tx_pending, + ring_data.tx_max_pending); + } + } else { + if (params->tx < 1 || params->rx < 1) { + printf("Error: Invalid parameters\n"); + return; + } + memset(&ring_params, 0, sizeof(struct ethtool_ringparam)); + ring_params.tx_pending = params->tx; + ring_params.rx_pending = params->rx; + lock_port(params->port); + stat = rte_ethtool_set_ringparam(params->port, &ring_params); + unlock_port(params->port); + } + if (stat == 0) + return; + else if (stat == -ENOTSUP) + printf("Port %i: Operation not supported\n", params->port); + else + printf("Port %i: Error fetching statistics\n", params->port); +} + +static void pcmd_validate_callback(void *ptr_params, + __rte_unused struct cmdline *ctx, + __rte_unused void *ptr_data) +{ + struct pcmd_intmac_params *params = ptr_params; + + if (rte_ethtool_net_validate_addr(0, ¶ms->mac)) + printf("Address is unicast\n"); + else + printf("Address is not unicast\n"); +} + + +static void pcmd_vlan_callback(__rte_unused void *ptr_params, + __rte_unused struct cmdline *ctx, + __rte_unused void *ptr_data) +{ + struct pcmd_vlan_params *params = ptr_params; + int stat; + + if (!rte_eth_dev_is_valid_port(params->port)) { + printf("Error: Invalid port number %i\n", params->port); + return; + } + stat = 0; + + if (strcasecmp("add", params->mode) == 0) { + stat = rte_ethtool_net_vlan_rx_add_vid( + params->port, params->vid + ); + if (stat == 0) + printf("VLAN vid %i added\n", params->vid); + + } else if (strcasecmp("del", params->mode) == 0) { + stat = rte_ethtool_net_vlan_rx_kill_vid( + params->port, params->vid + ); + if (stat == 0) + printf("VLAN vid %i removed\n", params->vid); + } else { + /* Should not happen! */ + printf("Error: Bad mode %s\n", params->mode); + } + if (stat == -ENOTSUP) + printf("Port %i: Operation not supported\n", params->port); + else if (stat == -ENOSYS) + printf("Port %i: VLAN filtering disabled\n", params->port); + else if (stat != 0) + printf("Port %i: Error changing VLAN setup (code %i)\n", + params->port, -stat); +} + + +cmdline_parse_inst_t pcmd_quit = { + .f = pcmd_quit_callback, + .data = NULL, + .help_str = "quit\n Exit program", + .tokens = {(void *)&pcmd_quit_token_cmd, NULL}, +}; +cmdline_parse_inst_t pcmd_drvinfo = { + .f = pcmd_drvinfo_callback, + .data = NULL, + .help_str = "drvinfo\n Print driver info", + .tokens = {(void *)&pcmd_drvinfo_token_cmd, NULL}, +}; +cmdline_parse_inst_t pcmd_link = { + .f = pcmd_link_callback, + .data = NULL, + .help_str = "link\n Print port link states", + .tokens = {(void *)&pcmd_link_token_cmd, NULL}, +}; +cmdline_parse_inst_t pcmd_regs = { + .f = pcmd_regs_callback, + .data = NULL, + .help_str = "regs <port_id> <filename>\n" + " Dump port register(s) to file", + .tokens = { + (void *)&pcmd_regs_token_cmd, + (void *)&pcmd_intstr_token_port, + (void *)&pcmd_intstr_token_opt, + NULL + }, +}; +cmdline_parse_inst_t pcmd_eeprom = { + .f = pcmd_eeprom_callback, + .data = NULL, + .help_str = "eeprom <port_id> <filename>\n Dump EEPROM to file", + .tokens = { + (void *)&pcmd_eeprom_token_cmd, + (void *)&pcmd_intstr_token_port, + (void *)&pcmd_intstr_token_opt, + NULL + }, +}; +cmdline_parse_inst_t pcmd_pause_noopt = { + .f = pcmd_pause_callback, + .data = (void *)0x01, + .help_str = "pause <port_id>\n Print port pause state", + .tokens = { + (void *)&pcmd_pause_token_cmd, + (void *)&pcmd_pause_token_port, + NULL + }, +}; +cmdline_parse_inst_t pcmd_pause = { + .f = pcmd_pause_callback, + .data = NULL, + .help_str = + "pause <port_id> <all|tx|rx|none>\n Pause/unpause port", + .tokens = { + (void *)&pcmd_pause_token_cmd, + (void *)&pcmd_pause_token_port, + (void *)&pcmd_pause_token_opt, + NULL + }, +}; +cmdline_parse_inst_t pcmd_open = { + .f = pcmd_open_callback, + .data = NULL, + .help_str = "open <port_id>\n Open port", + .tokens = { + (void *)&pcmd_open_token_cmd, + (void *)&pcmd_int_token_port, + NULL + }, +}; +cmdline_parse_inst_t pcmd_stop = { + .f = pcmd_stop_callback, + .data = NULL, + .help_str = "stop <port_id>\n Stop port", + .tokens = { + (void *)&pcmd_stop_token_cmd, + (void *)&pcmd_int_token_port, + NULL + }, +}; +cmdline_parse_inst_t pcmd_rxmode = { + .f = pcmd_rxmode_callback, + .data = NULL, + .help_str = "rxmode <port_id>\n Toggle port Rx mode", + .tokens = { + (void *)&pcmd_rxmode_token_cmd, + (void *)&pcmd_int_token_port, + NULL + }, +}; +cmdline_parse_inst_t pcmd_macaddr_get = { + .f = pcmd_macaddr_callback, + .data = NULL, + .help_str = "macaddr <port_id>\n" + " Get MAC address", + .tokens = { + (void *)&pcmd_macaddr_token_cmd, + (void *)&pcmd_intstr_token_port, + NULL + }, +}; +cmdline_parse_inst_t pcmd_macaddr = { + .f = pcmd_macaddr_callback, + .data = (void *)0x01, + .help_str = + "macaddr <port_id> <mac_addr>\n" + " Set MAC address", + .tokens = { + (void *)&pcmd_macaddr_token_cmd, + (void *)&pcmd_intmac_token_port, + (void *)&pcmd_intmac_token_mac, + NULL + }, +}; +cmdline_parse_inst_t pcmd_mtu = { + .f = pcmd_mtu_callback, + .data = NULL, + .help_str = "mtu <port_id> <mtu_value>\n" + " Change MTU", + .tokens = { + (void *)&pcmd_mtu_token_cmd, + (void *)&pcmd_intstr_token_port, + (void *)&pcmd_intstr_token_opt, + NULL + }, +}; +cmdline_parse_inst_t pcmd_portstats = { + .f = pcmd_portstats_callback, + .data = NULL, + .help_str = "portstats <port_id>\n" + " Print port eth statistics", + .tokens = { + (void *)&pcmd_portstats_token_cmd, + (void *)&pcmd_int_token_port, + NULL + }, +}; +cmdline_parse_inst_t pcmd_ringparam = { + .f = pcmd_ringparam_callback, + .data = NULL, + .help_str = "ringparam <port_id>\n" + " Print ring parameters", + .tokens = { + (void *)&pcmd_ringparam_token_cmd, + (void *)&pcmd_intintint_token_port, + NULL + }, +}; +cmdline_parse_inst_t pcmd_ringparam_set = { + .f = pcmd_ringparam_callback, + .data = (void *)1, + .help_str = "ringparam <port_id> <tx_param> <rx_param>\n" + " Set ring parameters", + .tokens = { + (void *)&pcmd_ringparam_token_cmd, + (void *)&pcmd_intintint_token_port, + (void *)&pcmd_intintint_token_tx, + (void *)&pcmd_intintint_token_rx, + NULL + }, +}; +cmdline_parse_inst_t pcmd_validate = { + .f = pcmd_validate_callback, + .data = NULL, + .help_str = "validate <mac_addr>\n" + " Check that MAC address is valid unicast address", + .tokens = { + (void *)&pcmd_validate_token_cmd, + (void *)&pcmd_intmac_token_mac, + NULL + }, +}; +cmdline_parse_inst_t pcmd_vlan = { + .f = pcmd_vlan_callback, + .data = NULL, + .help_str = "vlan <port_id> <add|del> <vlan_id>\n" + " Add/remove VLAN id", + .tokens = { + (void *)&pcmd_vlan_token_cmd, + (void *)&pcmd_vlan_token_port, + (void *)&pcmd_vlan_token_mode, + (void *)&pcmd_vlan_token_vid, + NULL + }, +}; + + +cmdline_parse_ctx_t list_prompt_commands[] = { + (cmdline_parse_inst_t *)&pcmd_drvinfo, + (cmdline_parse_inst_t *)&pcmd_eeprom, + (cmdline_parse_inst_t *)&pcmd_link, + (cmdline_parse_inst_t *)&pcmd_macaddr_get, + (cmdline_parse_inst_t *)&pcmd_macaddr, + (cmdline_parse_inst_t *)&pcmd_mtu, + (cmdline_parse_inst_t *)&pcmd_open, + (cmdline_parse_inst_t *)&pcmd_pause_noopt, + (cmdline_parse_inst_t *)&pcmd_pause, + (cmdline_parse_inst_t *)&pcmd_portstats, + (cmdline_parse_inst_t *)&pcmd_regs, + (cmdline_parse_inst_t *)&pcmd_ringparam, + (cmdline_parse_inst_t *)&pcmd_ringparam_set, + (cmdline_parse_inst_t *)&pcmd_rxmode, + (cmdline_parse_inst_t *)&pcmd_stop, + (cmdline_parse_inst_t *)&pcmd_validate, + (cmdline_parse_inst_t *)&pcmd_vlan, + (cmdline_parse_inst_t *)&pcmd_quit, + NULL +}; + + +void ethapp_main(void) +{ + struct cmdline *ctx_cmdline; + + ctx_cmdline = cmdline_stdin_new(list_prompt_commands, "EthApp> "); + cmdline_interact(ctx_cmdline); + cmdline_stdin_exit(ctx_cmdline); +} diff --git a/examples/ethtool/ethtool-app/ethapp.h b/examples/ethtool/ethtool-app/ethapp.h new file mode 100644 index 00000000..ba438eea --- /dev/null +++ b/examples/ethtool/ethtool-app/ethapp.h @@ -0,0 +1,41 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +void ethapp_main(void); +void print_stats(void); +void lock_port(int idx_port); +void unlock_port(int idx_port); +void mark_port_inactive(int idx_port); +void mark_port_active(int idx_port); +void mark_port_newmac(int idx_port); diff --git a/examples/ethtool/ethtool-app/main.c b/examples/ethtool/ethtool-app/main.c new file mode 100644 index 00000000..2c655d83 --- /dev/null +++ b/examples/ethtool/ethtool-app/main.c @@ -0,0 +1,305 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +#include <stdio.h> +#include <stdlib.h> + +#include <rte_common.h> +#include <rte_spinlock.h> +#include <rte_eal.h> +#include <rte_ethdev.h> +#include <rte_ether.h> +#include <rte_ip.h> +#include <rte_memory.h> +#include <rte_mempool.h> +#include <rte_mbuf.h> + +#include "ethapp.h" + +#define MAX_PORTS RTE_MAX_ETHPORTS +#define MAX_BURST_LENGTH 32 +#define PORT_RX_QUEUE_SIZE 128 +#define PORT_TX_QUEUE_SIZE 256 +#define PKTPOOL_EXTRA_SIZE 512 +#define PKTPOOL_CACHE 32 + + +struct txq_port { + uint16_t cnt_unsent; + struct rte_mbuf *buf_frames[MAX_BURST_LENGTH]; +}; + +struct app_port { + struct ether_addr mac_addr; + struct txq_port txq; + rte_spinlock_t lock; + int port_active; + int port_dirty; + int idx_port; + struct rte_mempool *pkt_pool; +}; + +struct app_config { + struct app_port ports[MAX_PORTS]; + int cnt_ports; + int exit_now; +}; + + +struct app_config app_cfg; + + +void lock_port(int idx_port) +{ + struct app_port *ptr_port = &app_cfg.ports[idx_port]; + + rte_spinlock_lock(&ptr_port->lock); +} + +void unlock_port(int idx_port) +{ + struct app_port *ptr_port = &app_cfg.ports[idx_port]; + + rte_spinlock_unlock(&ptr_port->lock); +} + +void mark_port_active(int idx_port) +{ + struct app_port *ptr_port = &app_cfg.ports[idx_port]; + + ptr_port->port_active = 1; +} + +void mark_port_inactive(int idx_port) +{ + struct app_port *ptr_port = &app_cfg.ports[idx_port]; + + ptr_port->port_active = 0; +} + +void mark_port_newmac(int idx_port) +{ + struct app_port *ptr_port = &app_cfg.ports[idx_port]; + + ptr_port->port_dirty = 1; +} + +static void setup_ports(struct app_config *app_cfg, int cnt_ports) +{ + int idx_port; + int size_pktpool; + struct rte_eth_conf cfg_port; + struct rte_eth_dev_info dev_info; + char str_name[16]; + + memset(&cfg_port, 0, sizeof(cfg_port)); + cfg_port.txmode.mq_mode = ETH_MQ_TX_NONE; + + for (idx_port = 0; idx_port < cnt_ports; idx_port++) { + struct app_port *ptr_port = &app_cfg->ports[idx_port]; + + rte_eth_dev_info_get(idx_port, &dev_info); + size_pktpool = dev_info.rx_desc_lim.nb_max + + dev_info.tx_desc_lim.nb_max + PKTPOOL_EXTRA_SIZE; + + snprintf(str_name, 16, "pkt_pool%i", idx_port); + ptr_port->pkt_pool = rte_pktmbuf_pool_create( + str_name, + size_pktpool, PKTPOOL_CACHE, + 0, + RTE_MBUF_DEFAULT_BUF_SIZE, + rte_socket_id() + ); + if (ptr_port->pkt_pool == NULL) + rte_exit(EXIT_FAILURE, + "rte_pktmbuf_pool_create failed" + ); + + printf("Init port %i..\n", idx_port); + ptr_port->port_active = 1; + ptr_port->port_dirty = 0; + ptr_port->idx_port = idx_port; + + if (rte_eth_dev_configure(idx_port, 1, 1, &cfg_port) < 0) + rte_exit(EXIT_FAILURE, + "rte_eth_dev_configure failed"); + if (rte_eth_rx_queue_setup( + idx_port, 0, PORT_RX_QUEUE_SIZE, + rte_eth_dev_socket_id(idx_port), NULL, + ptr_port->pkt_pool) < 0) + rte_exit(EXIT_FAILURE, + "rte_eth_rx_queue_setup failed" + ); + if (rte_eth_tx_queue_setup( + idx_port, 0, PORT_TX_QUEUE_SIZE, + rte_eth_dev_socket_id(idx_port), NULL) < 0) + rte_exit(EXIT_FAILURE, + "rte_eth_tx_queue_setup failed" + ); + if (rte_eth_dev_start(idx_port) < 0) + rte_exit(EXIT_FAILURE, + "%s:%i: rte_eth_dev_start failed", + __FILE__, __LINE__ + ); + rte_eth_promiscuous_enable(idx_port); + rte_eth_macaddr_get(idx_port, &ptr_port->mac_addr); + rte_spinlock_init(&ptr_port->lock); + } +} + +static void process_frame(struct app_port *ptr_port, + struct rte_mbuf *ptr_frame) +{ + struct ether_hdr *ptr_mac_hdr; + + ptr_mac_hdr = rte_pktmbuf_mtod(ptr_frame, struct ether_hdr *); + ether_addr_copy(&ptr_mac_hdr->s_addr, &ptr_mac_hdr->d_addr); + ether_addr_copy(&ptr_port->mac_addr, &ptr_mac_hdr->s_addr); +} + +static int slave_main(__attribute__((unused)) void *ptr_data) +{ + struct app_port *ptr_port; + struct rte_mbuf *ptr_frame; + struct txq_port *txq; + + uint16_t cnt_recv_frames; + uint16_t idx_frame; + uint16_t cnt_sent; + uint16_t idx_port; + uint16_t lock_result; + + while (app_cfg.exit_now == 0) { + for (idx_port = 0; idx_port < app_cfg.cnt_ports; idx_port++) { + /* Check that port is active and unlocked */ + ptr_port = &app_cfg.ports[idx_port]; + lock_result = rte_spinlock_trylock(&ptr_port->lock); + if (lock_result == 0) + continue; + if (ptr_port->port_active == 0) { + rte_spinlock_unlock(&ptr_port->lock); + continue; + } + txq = &ptr_port->txq; + + /* MAC address was updated */ + if (ptr_port->port_dirty == 1) { + rte_eth_macaddr_get(ptr_port->idx_port, + &ptr_port->mac_addr); + ptr_port->port_dirty = 0; + } + + /* Incoming frames */ + cnt_recv_frames = rte_eth_rx_burst( + ptr_port->idx_port, 0, + &txq->buf_frames[txq->cnt_unsent], + RTE_DIM(txq->buf_frames) - txq->cnt_unsent + ); + if (cnt_recv_frames > 0) { + for (idx_frame = 0; + idx_frame < cnt_recv_frames; + idx_frame++) { + ptr_frame = txq->buf_frames[ + idx_frame + txq->cnt_unsent]; + process_frame(ptr_port, ptr_frame); + } + txq->cnt_unsent += cnt_recv_frames; + } + + /* Outgoing frames */ + if (txq->cnt_unsent > 0) { + cnt_sent = rte_eth_tx_burst( + ptr_port->idx_port, 0, + txq->buf_frames, + txq->cnt_unsent + ); + /* Shuffle up unsent frame pointers */ + for (idx_frame = cnt_sent; + idx_frame < txq->cnt_unsent; + idx_frame++) + txq->buf_frames[idx_frame - cnt_sent] = + txq->buf_frames[idx_frame]; + txq->cnt_unsent -= cnt_sent; + } + rte_spinlock_unlock(&ptr_port->lock); + } /* end for( idx_port ) */ + } /* end for(;;) */ + + return 0; +} + +int main(int argc, char **argv) +{ + int cnt_args_parsed; + uint32_t id_core; + uint32_t cnt_ports; + + /* Init runtime enviornment */ + cnt_args_parsed = rte_eal_init(argc, argv); + if (cnt_args_parsed < 0) + rte_exit(EXIT_FAILURE, "rte_eal_init(): Failed"); + + cnt_ports = rte_eth_dev_count(); + printf("Number of NICs: %i\n", cnt_ports); + if (cnt_ports == 0) + rte_exit(EXIT_FAILURE, "No available NIC ports!\n"); + if (cnt_ports > MAX_PORTS) { + printf("Info: Using only %i of %i ports\n", + cnt_ports, MAX_PORTS + ); + cnt_ports = MAX_PORTS; + } + + setup_ports(&app_cfg, cnt_ports); + + app_cfg.exit_now = 0; + app_cfg.cnt_ports = cnt_ports; + + if (rte_lcore_count() < 2) + rte_exit(EXIT_FAILURE, "No available slave core!\n"); + /* Assume there is an available slave.. */ + id_core = rte_lcore_id(); + id_core = rte_get_next_lcore(id_core, 1, 1); + rte_eal_remote_launch(slave_main, NULL, id_core); + + ethapp_main(); + + app_cfg.exit_now = 1; + RTE_LCORE_FOREACH_SLAVE(id_core) { + if (rte_eal_wait_lcore(id_core) < 0) + return -1; + } + + return 0; +} diff --git a/examples/ethtool/lib/Makefile b/examples/ethtool/lib/Makefile new file mode 100644 index 00000000..d7ee9555 --- /dev/null +++ b/examples/ethtool/lib/Makefile @@ -0,0 +1,57 @@ +# BSD LICENSE +# +# Copyright(c) 2015 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overwritten by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +ifneq ($(CONFIG_RTE_EXEC_ENV),"linuxapp") +$(error This application can only operate in a linuxapp environment, \ +please change the definition of the RTE_TARGET environment variable) +endif + +# library name +LIB = librte_ethtool.a + +LIBABIVER := 1 + +# all source are stored in SRC-Y +SRCS-y := rte_ethtool.c + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) + +include $(RTE_SDK)/mk/rte.extlib.mk diff --git a/examples/ethtool/lib/rte_ethtool.c b/examples/ethtool/lib/rte_ethtool.c new file mode 100644 index 00000000..42e05f1f --- /dev/null +++ b/examples/ethtool/lib/rte_ethtool.c @@ -0,0 +1,423 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include <stdio.h> +#include <string.h> +#include <stdint.h> +#include <rte_version.h> +#include <rte_ethdev.h> +#include <rte_ether.h> +#include "rte_ethtool.h" + +#define PKTPOOL_SIZE 512 +#define PKTPOOL_CACHE 32 + + +int +rte_ethtool_get_drvinfo(uint8_t port_id, struct ethtool_drvinfo *drvinfo) +{ + struct rte_eth_dev_info dev_info; + int n; + + if (drvinfo == NULL) + return -EINVAL; + + if (!rte_eth_dev_is_valid_port(port_id)) + return -ENODEV; + + memset(&dev_info, 0, sizeof(dev_info)); + rte_eth_dev_info_get(port_id, &dev_info); + + snprintf(drvinfo->driver, sizeof(drvinfo->driver), "%s", + dev_info.driver_name); + snprintf(drvinfo->version, sizeof(drvinfo->version), "%s", + rte_version()); + snprintf(drvinfo->bus_info, sizeof(drvinfo->bus_info), + "%04x:%02x:%02x.%x", + dev_info.pci_dev->addr.domain, dev_info.pci_dev->addr.bus, + dev_info.pci_dev->addr.devid, dev_info.pci_dev->addr.function); + + n = rte_eth_dev_get_reg_length(port_id); + if (n > 0) + drvinfo->regdump_len = n; + else + drvinfo->regdump_len = 0; + + n = rte_eth_dev_get_eeprom_length(port_id); + if (n > 0) + drvinfo->eedump_len = n; + else + drvinfo->eedump_len = 0; + + drvinfo->n_stats = sizeof(struct rte_eth_stats) / sizeof(uint64_t); + drvinfo->testinfo_len = 0; + + return 0; +} + +int +rte_ethtool_get_regs_len(uint8_t port_id) +{ + int count_regs; + + count_regs = rte_eth_dev_get_reg_length(port_id); + if (count_regs > 0) + return count_regs * sizeof(uint32_t); + return count_regs; +} + +int +rte_ethtool_get_regs(uint8_t port_id, struct ethtool_regs *regs, void *data) +{ + struct rte_dev_reg_info reg_info; + int status; + + if (regs == NULL || data == NULL) + return -EINVAL; + + reg_info.data = data; + reg_info.length = 0; + + status = rte_eth_dev_get_reg_info(port_id, ®_info); + if (status) + return status; + regs->version = reg_info.version; + + return 0; +} + +int +rte_ethtool_get_link(uint8_t port_id) +{ + struct rte_eth_link link; + + if (!rte_eth_dev_is_valid_port(port_id)) + return -ENODEV; + rte_eth_link_get(port_id, &link); + return link.link_status; +} + +int +rte_ethtool_get_eeprom_len(uint8_t port_id) +{ + return rte_eth_dev_get_eeprom_length(port_id); +} + +int +rte_ethtool_get_eeprom(uint8_t port_id, struct ethtool_eeprom *eeprom, + void *words) +{ + struct rte_dev_eeprom_info eeprom_info; + int status; + + if (eeprom == NULL || words == NULL) + return -EINVAL; + + eeprom_info.offset = eeprom->offset; + eeprom_info.length = eeprom->len; + eeprom_info.data = words; + + status = rte_eth_dev_get_eeprom(port_id, &eeprom_info); + if (status) + return status; + + eeprom->magic = eeprom_info.magic; + + return 0; +} + +int +rte_ethtool_set_eeprom(uint8_t port_id, struct ethtool_eeprom *eeprom, + void *words) +{ + struct rte_dev_eeprom_info eeprom_info; + int status; + + if (eeprom == NULL || words == NULL || eeprom->offset >= eeprom->len) + return -EINVAL; + + eeprom_info.offset = eeprom->offset; + eeprom_info.length = eeprom->len; + eeprom_info.data = words; + + status = rte_eth_dev_set_eeprom(port_id, &eeprom_info); + if (status) + return status; + + eeprom->magic = eeprom_info.magic; + + return 0; +} + +int +rte_ethtool_get_pauseparam(uint8_t port_id, + struct ethtool_pauseparam *pause_param) +{ + struct rte_eth_fc_conf fc_conf; + int status; + + if (pause_param == NULL) + return -EINVAL; + + status = rte_eth_dev_flow_ctrl_get(port_id, &fc_conf); + if (status) + return status; + + pause_param->tx_pause = 0; + pause_param->rx_pause = 0; + switch (fc_conf.mode) { + case RTE_FC_RX_PAUSE: + pause_param->rx_pause = 1; + break; + case RTE_FC_TX_PAUSE: + pause_param->tx_pause = 1; + break; + case RTE_FC_FULL: + pause_param->rx_pause = 1; + pause_param->tx_pause = 1; + default: + /* dummy block to avoid compiler warning */ + break; + } + pause_param->autoneg = (uint32_t)fc_conf.autoneg; + + return 0; +} + +int +rte_ethtool_set_pauseparam(uint8_t port_id, + struct ethtool_pauseparam *pause_param) +{ + struct rte_eth_fc_conf fc_conf; + int status; + + if (pause_param == NULL) + return -EINVAL; + + /* + * Read device flow control parameter first since + * ethtool set_pauseparam op doesn't have all the information. + * as defined in struct rte_eth_fc_conf. + * This API requires the device to support both + * rte_eth_dev_flow_ctrl_get and rte_eth_dev_flow_ctrl_set, otherwise + * return -ENOTSUP + */ + status = rte_eth_dev_flow_ctrl_get(port_id, &fc_conf); + if (status) + return status; + + fc_conf.autoneg = (uint8_t)pause_param->autoneg; + + if (pause_param->tx_pause) { + if (pause_param->rx_pause) + fc_conf.mode = RTE_FC_FULL; + else + fc_conf.mode = RTE_FC_TX_PAUSE; + } else { + if (pause_param->rx_pause) + fc_conf.mode = RTE_FC_RX_PAUSE; + else + fc_conf.mode = RTE_FC_NONE; + } + + status = rte_eth_dev_flow_ctrl_set(port_id, &fc_conf); + if (status) + return status; + + return 0; +} + +int +rte_ethtool_net_open(uint8_t port_id) +{ + rte_eth_dev_stop(port_id); + + return rte_eth_dev_start(port_id); +} + +int +rte_ethtool_net_stop(uint8_t port_id) +{ + if (!rte_eth_dev_is_valid_port(port_id)) + return -ENODEV; + rte_eth_dev_stop(port_id); + + return 0; +} + +int +rte_ethtool_net_get_mac_addr(uint8_t port_id, struct ether_addr *addr) +{ + if (!rte_eth_dev_is_valid_port(port_id)) + return -ENODEV; + if (addr == NULL) + return -EINVAL; + rte_eth_macaddr_get(port_id, addr); + + return 0; +} + +int +rte_ethtool_net_set_mac_addr(uint8_t port_id, struct ether_addr *addr) +{ + if (addr == NULL) + return -EINVAL; + return rte_eth_dev_default_mac_addr_set(port_id, addr); +} + +int +rte_ethtool_net_validate_addr(uint8_t port_id __rte_unused, + struct ether_addr *addr) +{ + if (addr == NULL) + return -EINVAL; + return is_valid_assigned_ether_addr(addr); +} + +int +rte_ethtool_net_change_mtu(uint8_t port_id, int mtu) +{ + if (mtu < 0 || mtu > UINT16_MAX) + return -EINVAL; + return rte_eth_dev_set_mtu(port_id, (uint16_t)mtu); +} + +int +rte_ethtool_net_get_stats64(uint8_t port_id, struct rte_eth_stats *stats) +{ + if (stats == NULL) + return -EINVAL; + return rte_eth_stats_get(port_id, stats); +} + +int +rte_ethtool_net_vlan_rx_add_vid(uint8_t port_id, uint16_t vid) +{ + return rte_eth_dev_vlan_filter(port_id, vid, 1); +} + +int +rte_ethtool_net_vlan_rx_kill_vid(uint8_t port_id, uint16_t vid) +{ + return rte_eth_dev_vlan_filter(port_id, vid, 0); +} + +/* + * The set_rx_mode provides driver-specific rx mode setting. + * This implementation implements rx mode setting based upon + * ixgbe/igb drivers. Further improvement is to provide a + * callback op field over struct rte_eth_dev::dev_ops so each + * driver can register device-specific implementation + */ +int +rte_ethtool_net_set_rx_mode(uint8_t port_id) +{ + uint16_t num_vfs; + struct rte_eth_dev_info dev_info; + uint16_t vf; + + memset(&dev_info, 0, sizeof(dev_info)); + rte_eth_dev_info_get(port_id, &dev_info); + num_vfs = dev_info.max_vfs; + + /* Set VF vf_rx_mode, VF unsupport status is discard */ + for (vf = 0; vf < num_vfs; vf++) + rte_eth_dev_set_vf_rxmode(port_id, vf, + ETH_VMDQ_ACCEPT_UNTAG, 0); + + /* Enable Rx vlan filter, VF unspport status is discard */ + rte_eth_dev_set_vlan_offload(port_id, ETH_VLAN_FILTER_MASK); + + return 0; +} + + +int +rte_ethtool_get_ringparam(uint8_t port_id, + struct ethtool_ringparam *ring_param) +{ + struct rte_eth_dev_info dev_info; + struct rte_eth_rxq_info rx_qinfo; + struct rte_eth_txq_info tx_qinfo; + int stat; + + if (ring_param == NULL) + return -EINVAL; + + rte_eth_dev_info_get(port_id, &dev_info); + + stat = rte_eth_rx_queue_info_get(port_id, 0, &rx_qinfo); + if (stat != 0) + return stat; + + stat = rte_eth_tx_queue_info_get(port_id, 0, &tx_qinfo); + if (stat != 0) + return stat; + + memset(ring_param, 0, sizeof(*ring_param)); + ring_param->rx_pending = rx_qinfo.nb_desc; + ring_param->rx_max_pending = dev_info.rx_desc_lim.nb_max; + ring_param->tx_pending = tx_qinfo.nb_desc; + ring_param->tx_max_pending = dev_info.tx_desc_lim.nb_max; + + return 0; +} + + +int +rte_ethtool_set_ringparam(uint8_t port_id, + struct ethtool_ringparam *ring_param) +{ + struct rte_eth_rxq_info rx_qinfo; + int stat; + + if (ring_param == NULL) + return -EINVAL; + + stat = rte_eth_rx_queue_info_get(port_id, 0, &rx_qinfo); + if (stat != 0) + return stat; + + rte_eth_dev_stop(port_id); + + stat = rte_eth_tx_queue_setup(port_id, 0, ring_param->tx_pending, + rte_socket_id(), NULL); + if (stat != 0) + return stat; + + stat = rte_eth_rx_queue_setup(port_id, 0, ring_param->rx_pending, + rte_socket_id(), NULL, rx_qinfo.mp); + if (stat != 0) + return stat; + + return rte_eth_dev_start(port_id); +} diff --git a/examples/ethtool/lib/rte_ethtool.h b/examples/ethtool/lib/rte_ethtool.h new file mode 100644 index 00000000..2e79d453 --- /dev/null +++ b/examples/ethtool/lib/rte_ethtool.h @@ -0,0 +1,410 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_ETHTOOL_H_ +#define _RTE_ETHTOOL_H_ + +/* + * This new interface is designed to provide a user-space shim layer for + * Ethtool and Netdevice op API. + * + * rte_ethtool_get_driver: ethtool_ops::get_driverinfo + * rte_ethtool_get_link: ethtool_ops::get_link + * rte_ethtool_get_regs_len: ethtool_ops::get_regs_len + * rte_ethtool_get_regs: ethtool_ops::get_regs + * rte_ethtool_get_eeprom_len: ethtool_ops::get_eeprom_len + * rte_ethtool_get_eeprom: ethtool_ops::get_eeprom + * rte_ethtool_set_eeprom: ethtool_ops::set_eeprom + * rte_ethtool_get_pauseparam: ethtool_ops::get_pauseparam + * rte_ethtool_set_pauseparam: ethtool_ops::set_pauseparam + * + * rte_ethtool_net_open: net_device_ops::ndo_open + * rte_ethtool_net_stop: net_device_ops::ndo_stop + * rte_ethtool_net_set_mac_addr: net_device_ops::ndo_set_mac_address + * rte_ethtool_net_validate_addr: net_device_ops::ndo_validate_addr + * rte_ethtool_net_change_mtu: net_device_ops::rte_net_change_mtu + * rte_ethtool_net_get_stats64: net_device_ops::ndo_get_stats64 + * rte_ethtool_net_vlan_rx_add_vid net_device_ops::ndo_vlan_rx_add_vid + * rte_ethtool_net_vlan_rx_kill_vid net_device_ops::ndo_vlan_rx_kill_vid + * rte_ethtool_net_set_rx_mode net_device_ops::ndo_set_rx_mode + * + */ +#ifdef __cplusplus +extern "C" { +#endif + +#include <stdint.h> +#include <rte_ethdev.h> +#include <linux/ethtool.h> + +/** + * Retrieve the Ethernet device driver information according to + * attributes described by ethtool data structure, ethtool_drvinfo. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param drvinfo + * A pointer to get driver information + * @return + * - (0) if successful. + * - (-ENODEV) if *port_id* invalid. + */ +int rte_ethtool_get_drvinfo(uint8_t port_id, struct ethtool_drvinfo *drvinfo); + +/** + * Retrieve the Ethernet device register length in bytes. + * + * @param port_id + * The port identifier of the Ethernet device. + * @return + * - (> 0) # of device registers (in bytes) available for dump + * - (0) no registers available for dump. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port_id* invalid. + * - others depends on the specific operations implementation. + */ +int rte_ethtool_get_regs_len(uint8_t port_id); + +/** + * Retrieve the Ethernet device register information according to + * attributes described by ethtool data structure, ethtool_regs + * + * @param port_id + * The port identifier of the Ethernet device. + * @param reg + * A pointer to ethtool_regs that has register information + * @param data + * A pointer to a buffer that is used to retrieve device register content + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port_id* invalid. + * - others depends on the specific operations implementation. + */ +int rte_ethtool_get_regs(uint8_t port_id, struct ethtool_regs *regs, + void *data); + +/** + * Retrieve the Ethernet device link status + * + * @param port_id + * The port identifier of the Ethernet device. + * @return + * - (1) if link up. + * - (0) if link down. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port_id* invalid. + * - (-EINVAL) if parameters invalid. + * - others depends on the specific operations implementation. + */ +int rte_ethtool_get_link(uint8_t port_id); + +/** + * Retrieve the Ethernet device EEPROM size + * + * @param port_id + * The port identifier of the Ethernet device. + * @return + * - (> 0) device EEPROM size in bytes + * - (0) device has NO EEPROM + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port_id* invalid. + * - others depends on the specific operations implementation. + */ +int rte_ethtool_get_eeprom_len(uint8_t port_id); + +/** + * Retrieve EEPROM content based upon eeprom range described in ethtool + * data structure, ethtool_eeprom + * + * @param port_id + * The port identifier of the Ethernet device. + * @param eeprom + * The pointer of ethtool_eeprom that provides eeprom range + * @param words + * A buffer that holds data read from eeprom + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port_id* invalid. + * - others depends on the specific operations implementation. + */ +int rte_ethtool_get_eeprom(uint8_t port_id, struct ethtool_eeprom *eeprom, + void *words); + +/** + * Setting EEPROM content based upon eeprom range described in ethtool + * data structure, ethtool_eeprom + * + * @param port_id + * The port identifier of the Ethernet device. + * @param eeprom + * The pointer of ethtool_eeprom that provides eeprom range + * @param words + * A buffer that holds data to be written into eeprom + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port_id* invalid. + * - (-EINVAL) if parameters invalid. + * - others depends on the specific operations implementation. + */ +int rte_ethtool_set_eeprom(uint8_t port_id, struct ethtool_eeprom *eeprom, + void *words); + +/** + * Retrieve the Ethernet device pause frame configuration according to + * parameter attributes desribed by ethtool data structure, + * ethtool_pauseparam. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param pause_param + * The pointer of ethtool_coalesce that gets pause frame + * configuration parameters + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port_id* invalid. + * - (-EINVAL) if parameters invalid. + * - others depends on the specific operations implementation. + */ +int rte_ethtool_get_pauseparam(uint8_t port_id, + struct ethtool_pauseparam *pause_param); + +/** + * Setting the Ethernet device pause frame configuration according to + * parameter attributes desribed by ethtool data structure, ethtool_pauseparam. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param pause_param + * The pointer of ethtool_coalesce that gets ring configuration parameters + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port_id* invalid. + * - (-EINVAL) if parameters invalid. + * - others depends on the specific operations implementation. + */ +int rte_ethtool_set_pauseparam(uint8_t port_id, + struct ethtool_pauseparam *param); + +/** + * Start the Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port_id* invalid. + * - others depends on the specific operations implementation. + */ +int rte_ethtool_net_open(uint8_t port_id); + +/** + * Stop the Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @return + * - (0) if successful. + * - (-ENODEV) if *port_id* invalid. + */ +int rte_ethtool_net_stop(uint8_t port_id); + +/** + * Get the Ethernet device MAC address. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param addr + * MAC address of the Ethernet device. + * @return + * - (0) if successful. + * - (-ENODEV) if *port_id* invalid. + */ +int rte_ethtool_net_get_mac_addr(uint8_t port_id, struct ether_addr *addr); + +/** + * Setting the Ethernet device MAC address. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param addr + * The new MAC addr. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port_id* invalid. + * - (-EINVAL) if parameters invalid. + * - others depends on the specific operations implementation. + */ +int rte_ethtool_net_set_mac_addr(uint8_t port_id, struct ether_addr *addr); + +/** + * Validate if the provided MAC address is valid unicast address + * + * @param port_id + * The port identifier of the Ethernet device. + * @param addr + * A pointer to a buffer (6-byte, 48bit) for the target MAC address + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port_id* invalid. + * - (-EINVAL) if parameters invalid. + * - others depends on the specific operations implementation. + */ +int rte_ethtool_net_validate_addr(uint8_t port_id, struct ether_addr *addr); + +/** + * Setting the Ethernet device maximum Tx unit. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param mtu + * New MTU + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port_id* invalid. + * - (-EINVAL) if parameters invalid. + * - others depends on the specific operations implementation. + */ +int rte_ethtool_net_change_mtu(uint8_t port_id, int mtu); + +/** + * Retrieve the Ethernet device traffic statistics + * + * @param port_id + * The port identifier of the Ethernet device. + * @param stats + * A pointer to struct rte_eth_stats for statistics parameters + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port_id* invalid. + * - (-EINVAL) if parameters invalid. + * - others depends on the specific operations implementation. + */ +int rte_ethtool_net_get_stats64(uint8_t port_id, struct rte_eth_stats *stats); + +/** + * Update the Ethernet device VLAN filter with new vid + * + * @param port_id + * The port identifier of the Ethernet device. + * @param vid + * A new VLAN id + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port_id* invalid. + * - others depends on the specific operations implementation. + */ +int rte_ethtool_net_vlan_rx_add_vid(uint8_t port_id, uint16_t vid); + +/** + * Remove VLAN id from Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param vid + * A new VLAN id + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port_id* invalid. + * - others depends on the specific operations implementation. + */ +int rte_ethtool_net_vlan_rx_kill_vid(uint8_t port_id, uint16_t vid); + +/** + * Setting the Ethernet device rx mode. + * + * @param port_id + * The port identifier of the Ethernet device. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port_id* invalid. + * - others depends on the specific operations implementation. + */ +int rte_ethtool_net_set_rx_mode(uint8_t port_id); + +/** + * Getting ring paramaters for Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param ring_param + * Pointer to struct ethrool_ringparam to receive parameters. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port_id* invalid. + * - others depends on the specific operations implementation. + * @note + * Only the tx_pending and rx_pending fields of struct ethtool_ringparam + * are used, and the function only gets parameters for queue 0. + */ +int rte_ethtool_get_ringparam(uint8_t port_id, + struct ethtool_ringparam *ring_param); + +/** + * Setting ring paramaters for Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param ring_param + * Pointer to struct ethrool_ringparam with parameters to set. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port_id* invalid. + * - others depends on the specific operations implementation. + * @note + * Only the tx_pending and rx_pending fields of struct ethtool_ringparam + * are used, and the function only sets parameters for queue 0. + */ +int rte_ethtool_set_ringparam(uint8_t port_id, + struct ethtool_ringparam *ring_param); + + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_ETHTOOL_H_ */ diff --git a/examples/exception_path/Makefile b/examples/exception_path/Makefile new file mode 100644 index 00000000..959914a2 --- /dev/null +++ b/examples/exception_path/Makefile @@ -0,0 +1,58 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overriden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +ifneq ($(CONFIG_RTE_EXEC_ENV),"linuxapp") +$(info This application can only operate in a linuxapp environment, \ +please change the definition of the RTE_TARGET environment variable) +all: +else + +# binary name +APP = exception_path + +# all source are stored in SRCS-y +SRCS-y := main.c + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) + +include $(RTE_SDK)/mk/rte.extapp.mk + +endif diff --git a/examples/exception_path/main.c b/examples/exception_path/main.c new file mode 100644 index 00000000..bec98040 --- /dev/null +++ b/examples/exception_path/main.c @@ -0,0 +1,571 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <inttypes.h> +#include <string.h> +#include <sys/queue.h> +#include <stdarg.h> +#include <errno.h> +#include <getopt.h> + +#include <netinet/in.h> +#include <linux/if.h> +#include <linux/if_tun.h> +#include <fcntl.h> +#include <sys/ioctl.h> +#include <unistd.h> +#include <signal.h> + +#include <rte_common.h> +#include <rte_log.h> +#include <rte_memory.h> +#include <rte_memcpy.h> +#include <rte_memzone.h> +#include <rte_eal.h> +#include <rte_per_lcore.h> +#include <rte_launch.h> +#include <rte_atomic.h> +#include <rte_lcore.h> +#include <rte_branch_prediction.h> +#include <rte_interrupts.h> +#include <rte_pci.h> +#include <rte_debug.h> +#include <rte_ether.h> +#include <rte_ethdev.h> +#include <rte_ring.h> +#include <rte_log.h> +#include <rte_mempool.h> +#include <rte_mbuf.h> +#include <rte_string_fns.h> +#include <rte_cycles.h> + +/* Macros for printing using RTE_LOG */ +#define RTE_LOGTYPE_APP RTE_LOGTYPE_USER1 +#define FATAL_ERROR(fmt, args...) rte_exit(EXIT_FAILURE, fmt "\n", ##args) +#define PRINT_INFO(fmt, args...) RTE_LOG(INFO, APP, fmt "\n", ##args) + +/* Max ports than can be used (each port is associated with two lcores) */ +#define MAX_PORTS (RTE_MAX_LCORE / 2) + +/* Max size of a single packet */ +#define MAX_PACKET_SZ (2048) + +/* Size of the data buffer in each mbuf */ +#define MBUF_DATA_SZ (MAX_PACKET_SZ + RTE_PKTMBUF_HEADROOM) + +/* Number of mbufs in mempool that is created */ +#define NB_MBUF 8192 + +/* How many packets to attempt to read from NIC in one go */ +#define PKT_BURST_SZ 32 + +/* How many objects (mbufs) to keep in per-lcore mempool cache */ +#define MEMPOOL_CACHE_SZ PKT_BURST_SZ + +/* Number of RX ring descriptors */ +#define NB_RXD 128 + +/* Number of TX ring descriptors */ +#define NB_TXD 512 + +/* + * RX and TX Prefetch, Host, and Write-back threshold values should be + * carefully set for optimal performance. Consult the network + * controller's datasheet and supporting DPDK documentation for guidance + * on how these parameters should be set. + */ + +/* Options for configuring ethernet port */ +static const struct rte_eth_conf port_conf = { + .rxmode = { + .header_split = 0, /* Header Split disabled */ + .hw_ip_checksum = 0, /* IP checksum offload disabled */ + .hw_vlan_filter = 0, /* VLAN filtering disabled */ + .jumbo_frame = 0, /* Jumbo Frame Support disabled */ + .hw_strip_crc = 0, /* CRC stripped by hardware */ + }, + .txmode = { + .mq_mode = ETH_MQ_TX_NONE, + }, +}; + +/* Mempool for mbufs */ +static struct rte_mempool * pktmbuf_pool = NULL; + +/* Mask of enabled ports */ +static uint32_t ports_mask = 0; + +/* Mask of cores that read from NIC and write to tap */ +static uint64_t input_cores_mask = 0; + +/* Mask of cores that read from tap and write to NIC */ +static uint64_t output_cores_mask = 0; + +/* Array storing port_id that is associated with each lcore */ +static uint8_t port_ids[RTE_MAX_LCORE]; + +/* Structure type for recording lcore-specific stats */ +struct stats { + uint64_t rx; + uint64_t tx; + uint64_t dropped; +}; + +/* Array of lcore-specific stats */ +static struct stats lcore_stats[RTE_MAX_LCORE]; + +/* Print out statistics on packets handled */ +static void +print_stats(void) +{ + unsigned i; + + printf("\n**Exception-Path example application statistics**\n" + "======= ====== ============ ============ ===============\n" + " Lcore Port RX TX Dropped on TX\n" + "------- ------ ------------ ------------ ---------------\n"); + RTE_LCORE_FOREACH(i) { + printf("%6u %7u %13"PRIu64" %13"PRIu64" %16"PRIu64"\n", + i, (unsigned)port_ids[i], + lcore_stats[i].rx, lcore_stats[i].tx, + lcore_stats[i].dropped); + } + printf("======= ====== ============ ============ ===============\n"); +} + +/* Custom handling of signals to handle stats */ +static void +signal_handler(int signum) +{ + /* When we receive a USR1 signal, print stats */ + if (signum == SIGUSR1) { + print_stats(); + } + + /* When we receive a USR2 signal, reset stats */ + if (signum == SIGUSR2) { + memset(&lcore_stats, 0, sizeof(lcore_stats)); + printf("\n**Statistics have been reset**\n"); + return; + } +} + +/* + * Create a tap network interface, or use existing one with same name. + * If name[0]='\0' then a name is automatically assigned and returned in name. + */ +static int tap_create(char *name) +{ + struct ifreq ifr; + int fd, ret; + + fd = open("/dev/net/tun", O_RDWR); + if (fd < 0) + return fd; + + memset(&ifr, 0, sizeof(ifr)); + + /* TAP device without packet information */ + ifr.ifr_flags = IFF_TAP | IFF_NO_PI; + + if (name && *name) + snprintf(ifr.ifr_name, IFNAMSIZ, "%s", name); + + ret = ioctl(fd, TUNSETIFF, (void *) &ifr); + if (ret < 0) { + close(fd); + return ret; + } + + if (name) + snprintf(name, IFNAMSIZ, "%s", ifr.ifr_name); + + return fd; +} + +/* Main processing loop */ +static int +main_loop(__attribute__((unused)) void *arg) +{ + const unsigned lcore_id = rte_lcore_id(); + char tap_name[IFNAMSIZ]; + int tap_fd; + + if ((1ULL << lcore_id) & input_cores_mask) { + /* Create new tap interface */ + snprintf(tap_name, IFNAMSIZ, "tap_dpdk_%.2u", lcore_id); + tap_fd = tap_create(tap_name); + if (tap_fd < 0) + FATAL_ERROR("Could not create tap interface \"%s\" (%d)", + tap_name, tap_fd); + + PRINT_INFO("Lcore %u is reading from port %u and writing to %s", + lcore_id, (unsigned)port_ids[lcore_id], tap_name); + fflush(stdout); + /* Loop forever reading from NIC and writing to tap */ + for (;;) { + struct rte_mbuf *pkts_burst[PKT_BURST_SZ]; + unsigned i; + const unsigned nb_rx = + rte_eth_rx_burst(port_ids[lcore_id], 0, + pkts_burst, PKT_BURST_SZ); + lcore_stats[lcore_id].rx += nb_rx; + for (i = 0; likely(i < nb_rx); i++) { + struct rte_mbuf *m = pkts_burst[i]; + /* Ignore return val from write() */ + int ret = write(tap_fd, + rte_pktmbuf_mtod(m, void*), + rte_pktmbuf_data_len(m)); + rte_pktmbuf_free(m); + if (unlikely(ret < 0)) + lcore_stats[lcore_id].dropped++; + else + lcore_stats[lcore_id].tx++; + } + } + } + else if ((1ULL << lcore_id) & output_cores_mask) { + /* Create new tap interface */ + snprintf(tap_name, IFNAMSIZ, "tap_dpdk_%.2u", lcore_id); + tap_fd = tap_create(tap_name); + if (tap_fd < 0) + FATAL_ERROR("Could not create tap interface \"%s\" (%d)", + tap_name, tap_fd); + + PRINT_INFO("Lcore %u is reading from %s and writing to port %u", + lcore_id, tap_name, (unsigned)port_ids[lcore_id]); + fflush(stdout); + /* Loop forever reading from tap and writing to NIC */ + for (;;) { + int ret; + struct rte_mbuf *m = rte_pktmbuf_alloc(pktmbuf_pool); + if (m == NULL) + continue; + + ret = read(tap_fd, rte_pktmbuf_mtod(m, void *), + MAX_PACKET_SZ); + lcore_stats[lcore_id].rx++; + if (unlikely(ret < 0)) { + FATAL_ERROR("Reading from %s interface failed", + tap_name); + } + m->nb_segs = 1; + m->next = NULL; + m->pkt_len = (uint16_t)ret; + m->data_len = (uint16_t)ret; + ret = rte_eth_tx_burst(port_ids[lcore_id], 0, &m, 1); + if (unlikely(ret < 1)) { + rte_pktmbuf_free(m); + lcore_stats[lcore_id].dropped++; + } + else { + lcore_stats[lcore_id].tx++; + } + } + } + else { + PRINT_INFO("Lcore %u has nothing to do", lcore_id); + return 0; + } + /* + * Tap file is closed automatically when program exits. Putting close() + * here will cause the compiler to give an error about unreachable code. + */ +} + +/* Display usage instructions */ +static void +print_usage(const char *prgname) +{ + PRINT_INFO("\nUsage: %s [EAL options] -- -p PORTMASK -i IN_CORES -o OUT_CORES\n" + " -p PORTMASK: hex bitmask of ports to use\n" + " -i IN_CORES: hex bitmask of cores which read from NIC\n" + " -o OUT_CORES: hex bitmask of cores which write to NIC", + prgname); +} + +/* Convert string to unsigned number. 0 is returned if error occurs */ +static uint64_t +parse_unsigned(const char *portmask) +{ + char *end = NULL; + uint64_t num; + + num = strtoull(portmask, &end, 16); + if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0')) + return 0; + + return (uint64_t)num; +} + +/* Record affinities between ports and lcores in global port_ids[] array */ +static void +setup_port_lcore_affinities(void) +{ + unsigned long i; + uint8_t tx_port = 0; + uint8_t rx_port = 0; + + /* Setup port_ids[] array, and check masks were ok */ + RTE_LCORE_FOREACH(i) { + if (input_cores_mask & (1ULL << i)) { + /* Skip ports that are not enabled */ + while ((ports_mask & (1 << rx_port)) == 0) { + rx_port++; + if (rx_port > (sizeof(ports_mask) * 8)) + goto fail; /* not enough ports */ + } + + port_ids[i] = rx_port++; + } + else if (output_cores_mask & (1ULL << i)) { + /* Skip ports that are not enabled */ + while ((ports_mask & (1 << tx_port)) == 0) { + tx_port++; + if (tx_port > (sizeof(ports_mask) * 8)) + goto fail; /* not enough ports */ + } + + port_ids[i] = tx_port++; + } + } + + if (rx_port != tx_port) + goto fail; /* uneven number of cores in masks */ + + if (ports_mask & (~((1 << rx_port) - 1))) + goto fail; /* unused ports */ + + return; +fail: + FATAL_ERROR("Invalid core/port masks specified on command line"); +} + +/* Parse the arguments given in the command line of the application */ +static void +parse_args(int argc, char **argv) +{ + int opt; + const char *prgname = argv[0]; + + /* Disable printing messages within getopt() */ + opterr = 0; + + /* Parse command line */ + while ((opt = getopt(argc, argv, "i:o:p:")) != EOF) { + switch (opt) { + case 'i': + input_cores_mask = parse_unsigned(optarg); + break; + case 'o': + output_cores_mask = parse_unsigned(optarg); + break; + case 'p': + ports_mask = parse_unsigned(optarg); + break; + default: + print_usage(prgname); + FATAL_ERROR("Invalid option specified"); + } + } + + /* Check that options were parsed ok */ + if (input_cores_mask == 0) { + print_usage(prgname); + FATAL_ERROR("IN_CORES not specified correctly"); + } + if (output_cores_mask == 0) { + print_usage(prgname); + FATAL_ERROR("OUT_CORES not specified correctly"); + } + if (ports_mask == 0) { + print_usage(prgname); + FATAL_ERROR("PORTMASK not specified correctly"); + } + + setup_port_lcore_affinities(); +} + +/* Initialise a single port on an Ethernet device */ +static void +init_port(uint8_t port) +{ + int ret; + + /* Initialise device and RX/TX queues */ + PRINT_INFO("Initialising port %u ...", (unsigned)port); + fflush(stdout); + ret = rte_eth_dev_configure(port, 1, 1, &port_conf); + if (ret < 0) + FATAL_ERROR("Could not configure port%u (%d)", + (unsigned)port, ret); + + ret = rte_eth_rx_queue_setup(port, 0, NB_RXD, rte_eth_dev_socket_id(port), + NULL, + pktmbuf_pool); + if (ret < 0) + FATAL_ERROR("Could not setup up RX queue for port%u (%d)", + (unsigned)port, ret); + + ret = rte_eth_tx_queue_setup(port, 0, NB_TXD, rte_eth_dev_socket_id(port), + NULL); + if (ret < 0) + FATAL_ERROR("Could not setup up TX queue for port%u (%d)", + (unsigned)port, ret); + + ret = rte_eth_dev_start(port); + if (ret < 0) + FATAL_ERROR("Could not start port%u (%d)", (unsigned)port, ret); + + rte_eth_promiscuous_enable(port); +} + +/* Check the link status of all ports in up to 9s, and print them finally */ +static void +check_all_ports_link_status(uint8_t port_num, uint32_t port_mask) +{ +#define CHECK_INTERVAL 100 /* 100ms */ +#define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */ + uint8_t portid, count, all_ports_up, print_flag = 0; + struct rte_eth_link link; + + printf("\nChecking link status"); + fflush(stdout); + for (count = 0; count <= MAX_CHECK_TIME; count++) { + all_ports_up = 1; + for (portid = 0; portid < port_num; portid++) { + if ((port_mask & (1 << portid)) == 0) + continue; + memset(&link, 0, sizeof(link)); + rte_eth_link_get_nowait(portid, &link); + /* print link status if flag set */ + if (print_flag == 1) { + if (link.link_status) + printf("Port %d Link Up - speed %u " + "Mbps - %s\n", (uint8_t)portid, + (unsigned)link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? + ("full-duplex") : ("half-duplex\n")); + else + printf("Port %d Link Down\n", + (uint8_t)portid); + continue; + } + /* clear all_ports_up flag if any link down */ + if (link.link_status == ETH_LINK_DOWN) { + all_ports_up = 0; + break; + } + } + /* after finally printing all link status, get out */ + if (print_flag == 1) + break; + + if (all_ports_up == 0) { + printf("."); + fflush(stdout); + rte_delay_ms(CHECK_INTERVAL); + } + + /* set the print_flag if all ports up or timeout */ + if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) { + print_flag = 1; + printf("done\n"); + } + } +} + +/* Initialise ports/queues etc. and start main loop on each core */ +int +main(int argc, char** argv) +{ + int ret; + unsigned i,high_port; + uint8_t nb_sys_ports, port; + + /* Associate signal_hanlder function with USR signals */ + signal(SIGUSR1, signal_handler); + signal(SIGUSR2, signal_handler); + + /* Initialise EAL */ + ret = rte_eal_init(argc, argv); + if (ret < 0) + FATAL_ERROR("Could not initialise EAL (%d)", ret); + argc -= ret; + argv += ret; + + /* Parse application arguments (after the EAL ones) */ + parse_args(argc, argv); + + /* Create the mbuf pool */ + pktmbuf_pool = rte_pktmbuf_pool_create("mbuf_pool", NB_MBUF, + MEMPOOL_CACHE_SZ, 0, MBUF_DATA_SZ, rte_socket_id()); + if (pktmbuf_pool == NULL) { + FATAL_ERROR("Could not initialise mbuf pool"); + return -1; + } + + /* Get number of ports found in scan */ + nb_sys_ports = rte_eth_dev_count(); + if (nb_sys_ports == 0) + FATAL_ERROR("No supported Ethernet device found"); + /* Find highest port set in portmask */ + for (high_port = (sizeof(ports_mask) * 8) - 1; + (high_port != 0) && !(ports_mask & (1 << high_port)); + high_port--) + ; /* empty body */ + if (high_port > nb_sys_ports) + FATAL_ERROR("Port mask requires more ports than available"); + + /* Initialise each port */ + for (port = 0; port < nb_sys_ports; port++) { + /* Skip ports that are not enabled */ + if ((ports_mask & (1 << port)) == 0) { + continue; + } + init_port(port); + } + check_all_ports_link_status(nb_sys_ports, ports_mask); + + /* Launch per-lcore function on every lcore */ + rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER); + RTE_LCORE_FOREACH_SLAVE(i) { + if (rte_eal_wait_lcore(i) < 0) + return -1; + } + + return 0; +} diff --git a/examples/helloworld/Makefile b/examples/helloworld/Makefile new file mode 100644 index 00000000..d2cca7a7 --- /dev/null +++ b/examples/helloworld/Makefile @@ -0,0 +1,50 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overriden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +# binary name +APP = helloworld + +# all source are stored in SRCS-y +SRCS-y := main.c + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) + +include $(RTE_SDK)/mk/rte.extapp.mk diff --git a/examples/helloworld/main.c b/examples/helloworld/main.c new file mode 100644 index 00000000..8b7a2de0 --- /dev/null +++ b/examples/helloworld/main.c @@ -0,0 +1,77 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdio.h> +#include <string.h> +#include <stdint.h> +#include <errno.h> +#include <sys/queue.h> + +#include <rte_memory.h> +#include <rte_memzone.h> +#include <rte_launch.h> +#include <rte_eal.h> +#include <rte_per_lcore.h> +#include <rte_lcore.h> +#include <rte_debug.h> + +static int +lcore_hello(__attribute__((unused)) void *arg) +{ + unsigned lcore_id; + lcore_id = rte_lcore_id(); + printf("hello from core %u\n", lcore_id); + return 0; +} + +int +main(int argc, char **argv) +{ + int ret; + unsigned lcore_id; + + ret = rte_eal_init(argc, argv); + if (ret < 0) + rte_panic("Cannot init EAL\n"); + + /* call lcore_hello() on every slave lcore */ + RTE_LCORE_FOREACH_SLAVE(lcore_id) { + rte_eal_remote_launch(lcore_hello, NULL, lcore_id); + } + + /* call it on master lcore too */ + lcore_hello(NULL); + + rte_eal_mp_wait_lcore(); + return 0; +} diff --git a/examples/ip_fragmentation/Makefile b/examples/ip_fragmentation/Makefile new file mode 100644 index 00000000..c321e6a1 --- /dev/null +++ b/examples/ip_fragmentation/Makefile @@ -0,0 +1,57 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overriden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +# binary name +APP = ip_fragmentation + +# all source are stored in SRCS-y +SRCS-y := main.c + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) + +# workaround for a gcc bug with noreturn attribute +# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603 +ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y) +CFLAGS_main.o += -Wno-return-type +endif + +include $(RTE_SDK)/mk/rte.extapp.mk diff --git a/examples/ip_fragmentation/main.c b/examples/ip_fragmentation/main.c new file mode 100644 index 00000000..81a49187 --- /dev/null +++ b/examples/ip_fragmentation/main.c @@ -0,0 +1,965 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <inttypes.h> +#include <sys/types.h> +#include <sys/param.h> +#include <string.h> +#include <sys/queue.h> +#include <stdarg.h> +#include <errno.h> +#include <getopt.h> + +#include <rte_common.h> +#include <rte_byteorder.h> +#include <rte_log.h> +#include <rte_memory.h> +#include <rte_memcpy.h> +#include <rte_memzone.h> +#include <rte_eal.h> +#include <rte_per_lcore.h> +#include <rte_launch.h> +#include <rte_atomic.h> +#include <rte_cycles.h> +#include <rte_prefetch.h> +#include <rte_lcore.h> +#include <rte_per_lcore.h> +#include <rte_branch_prediction.h> +#include <rte_interrupts.h> +#include <rte_pci.h> +#include <rte_random.h> +#include <rte_debug.h> +#include <rte_ether.h> +#include <rte_ethdev.h> +#include <rte_ring.h> +#include <rte_mempool.h> +#include <rte_mbuf.h> +#include <rte_lpm.h> +#include <rte_lpm6.h> +#include <rte_ip.h> +#include <rte_string_fns.h> + +#include <rte_ip_frag.h> + +#define RTE_LOGTYPE_IP_FRAG RTE_LOGTYPE_USER1 + +/* allow max jumbo frame 9.5 KB */ +#define JUMBO_FRAME_MAX_SIZE 0x2600 + +#define ROUNDUP_DIV(a, b) (((a) + (b) - 1) / (b)) + +/* + * Default byte size for the IPv6 Maximum Transfer Unit (MTU). + * This value includes the size of IPv6 header. + */ +#define IPV4_MTU_DEFAULT ETHER_MTU +#define IPV6_MTU_DEFAULT ETHER_MTU + +/* + * Default payload in bytes for the IPv6 packet. + */ +#define IPV4_DEFAULT_PAYLOAD (IPV4_MTU_DEFAULT - sizeof(struct ipv4_hdr)) +#define IPV6_DEFAULT_PAYLOAD (IPV6_MTU_DEFAULT - sizeof(struct ipv6_hdr)) + +/* + * Max number of fragments per packet expected - defined by config file. + */ +#define MAX_PACKET_FRAG RTE_LIBRTE_IP_FRAG_MAX_FRAG + +#define NB_MBUF 8192 + +#define MAX_PKT_BURST 32 +#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */ + +/* Configure how many packets ahead to prefetch, when reading packets */ +#define PREFETCH_OFFSET 3 + +/* + * Configurable number of RX/TX ring descriptors + */ +#define RTE_TEST_RX_DESC_DEFAULT 128 +#define RTE_TEST_TX_DESC_DEFAULT 512 +static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; +static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; + +/* ethernet addresses of ports */ +static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; + +#ifndef IPv4_BYTES +#define IPv4_BYTES_FMT "%" PRIu8 ".%" PRIu8 ".%" PRIu8 ".%" PRIu8 +#define IPv4_BYTES(addr) \ + (uint8_t) (((addr) >> 24) & 0xFF),\ + (uint8_t) (((addr) >> 16) & 0xFF),\ + (uint8_t) (((addr) >> 8) & 0xFF),\ + (uint8_t) ((addr) & 0xFF) +#endif + +#ifndef IPv6_BYTES +#define IPv6_BYTES_FMT "%02x%02x:%02x%02x:%02x%02x:%02x%02x:"\ + "%02x%02x:%02x%02x:%02x%02x:%02x%02x" +#define IPv6_BYTES(addr) \ + addr[0], addr[1], addr[2], addr[3], \ + addr[4], addr[5], addr[6], addr[7], \ + addr[8], addr[9], addr[10], addr[11],\ + addr[12], addr[13],addr[14], addr[15] +#endif + +#define IPV6_ADDR_LEN 16 + +/* mask of enabled ports */ +static int enabled_port_mask = 0; + +static int rx_queue_per_lcore = 1; + +#define MBUF_TABLE_SIZE (2 * MAX(MAX_PKT_BURST, MAX_PACKET_FRAG)) + +struct mbuf_table { + uint16_t len; + struct rte_mbuf *m_table[MBUF_TABLE_SIZE]; +}; + +struct rx_queue { + struct rte_mempool *direct_pool; + struct rte_mempool *indirect_pool; + struct rte_lpm *lpm; + struct rte_lpm6 *lpm6; + uint8_t portid; +}; + +#define MAX_RX_QUEUE_PER_LCORE 16 +#define MAX_TX_QUEUE_PER_PORT 16 +struct lcore_queue_conf { + uint16_t n_rx_queue; + uint16_t tx_queue_id[RTE_MAX_ETHPORTS]; + struct rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE]; + struct mbuf_table tx_mbufs[RTE_MAX_ETHPORTS]; +} __rte_cache_aligned; +struct lcore_queue_conf lcore_queue_conf[RTE_MAX_LCORE]; + +static const struct rte_eth_conf port_conf = { + .rxmode = { + .max_rx_pkt_len = JUMBO_FRAME_MAX_SIZE, + .split_hdr_size = 0, + .header_split = 0, /**< Header Split disabled */ + .hw_ip_checksum = 1, /**< IP checksum offload enabled */ + .hw_vlan_filter = 0, /**< VLAN filtering disabled */ + .jumbo_frame = 1, /**< Jumbo Frame Support enabled */ + .hw_strip_crc = 0, /**< CRC stripped by hardware */ + }, + .txmode = { + .mq_mode = ETH_MQ_TX_NONE, + }, +}; + +/* + * IPv4 forwarding table + */ +struct l3fwd_ipv4_route { + uint32_t ip; + uint8_t depth; + uint8_t if_out; +}; + +struct l3fwd_ipv4_route l3fwd_ipv4_route_array[] = { + {IPv4(100,10,0,0), 16, 0}, + {IPv4(100,20,0,0), 16, 1}, + {IPv4(100,30,0,0), 16, 2}, + {IPv4(100,40,0,0), 16, 3}, + {IPv4(100,50,0,0), 16, 4}, + {IPv4(100,60,0,0), 16, 5}, + {IPv4(100,70,0,0), 16, 6}, + {IPv4(100,80,0,0), 16, 7}, +}; + +/* + * IPv6 forwarding table + */ + +struct l3fwd_ipv6_route { + uint8_t ip[IPV6_ADDR_LEN]; + uint8_t depth; + uint8_t if_out; +}; + +static struct l3fwd_ipv6_route l3fwd_ipv6_route_array[] = { + {{1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 0}, + {{2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 1}, + {{3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 2}, + {{4,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 3}, + {{5,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 4}, + {{6,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 5}, + {{7,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 6}, + {{8,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 7}, +}; + +#define LPM_MAX_RULES 1024 +#define LPM6_MAX_RULES 1024 +#define LPM6_NUMBER_TBL8S (1 << 16) + +struct rte_lpm6_config lpm6_config = { + .max_rules = LPM6_MAX_RULES, + .number_tbl8s = LPM6_NUMBER_TBL8S, + .flags = 0 +}; + +static struct rte_mempool *socket_direct_pool[RTE_MAX_NUMA_NODES]; +static struct rte_mempool *socket_indirect_pool[RTE_MAX_NUMA_NODES]; +static struct rte_lpm *socket_lpm[RTE_MAX_NUMA_NODES]; +static struct rte_lpm6 *socket_lpm6[RTE_MAX_NUMA_NODES]; + +/* Send burst of packets on an output interface */ +static inline int +send_burst(struct lcore_queue_conf *qconf, uint16_t n, uint8_t port) +{ + struct rte_mbuf **m_table; + int ret; + uint16_t queueid; + + queueid = qconf->tx_queue_id[port]; + m_table = (struct rte_mbuf **)qconf->tx_mbufs[port].m_table; + + ret = rte_eth_tx_burst(port, queueid, m_table, n); + if (unlikely(ret < n)) { + do { + rte_pktmbuf_free(m_table[ret]); + } while (++ret < n); + } + + return 0; +} + +static inline void +l3fwd_simple_forward(struct rte_mbuf *m, struct lcore_queue_conf *qconf, + uint8_t queueid, uint8_t port_in) +{ + struct rx_queue *rxq; + uint32_t i, len, next_hop_ipv4; + uint8_t next_hop_ipv6, port_out, ipv6; + int32_t len2; + + ipv6 = 0; + rxq = &qconf->rx_queue_list[queueid]; + + /* by default, send everything back to the source port */ + port_out = port_in; + + /* Remove the Ethernet header and trailer from the input packet */ + rte_pktmbuf_adj(m, (uint16_t)sizeof(struct ether_hdr)); + + /* Build transmission burst */ + len = qconf->tx_mbufs[port_out].len; + + /* if this is an IPv4 packet */ + if (RTE_ETH_IS_IPV4_HDR(m->packet_type)) { + struct ipv4_hdr *ip_hdr; + uint32_t ip_dst; + /* Read the lookup key (i.e. ip_dst) from the input packet */ + ip_hdr = rte_pktmbuf_mtod(m, struct ipv4_hdr *); + ip_dst = rte_be_to_cpu_32(ip_hdr->dst_addr); + + /* Find destination port */ + if (rte_lpm_lookup(rxq->lpm, ip_dst, &next_hop_ipv4) == 0 && + (enabled_port_mask & 1 << next_hop_ipv4) != 0) { + port_out = next_hop_ipv4; + + /* Build transmission burst for new port */ + len = qconf->tx_mbufs[port_out].len; + } + + /* if we don't need to do any fragmentation */ + if (likely (IPV4_MTU_DEFAULT >= m->pkt_len)) { + qconf->tx_mbufs[port_out].m_table[len] = m; + len2 = 1; + } else { + len2 = rte_ipv4_fragment_packet(m, + &qconf->tx_mbufs[port_out].m_table[len], + (uint16_t)(MBUF_TABLE_SIZE - len), + IPV4_MTU_DEFAULT, + rxq->direct_pool, rxq->indirect_pool); + + /* Free input packet */ + rte_pktmbuf_free(m); + + /* If we fail to fragment the packet */ + if (unlikely (len2 < 0)) + return; + } + } else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) { + /* if this is an IPv6 packet */ + struct ipv6_hdr *ip_hdr; + + ipv6 = 1; + + /* Read the lookup key (i.e. ip_dst) from the input packet */ + ip_hdr = rte_pktmbuf_mtod(m, struct ipv6_hdr *); + + /* Find destination port */ + if (rte_lpm6_lookup(rxq->lpm6, ip_hdr->dst_addr, &next_hop_ipv6) == 0 && + (enabled_port_mask & 1 << next_hop_ipv6) != 0) { + port_out = next_hop_ipv6; + + /* Build transmission burst for new port */ + len = qconf->tx_mbufs[port_out].len; + } + + /* if we don't need to do any fragmentation */ + if (likely (IPV6_MTU_DEFAULT >= m->pkt_len)) { + qconf->tx_mbufs[port_out].m_table[len] = m; + len2 = 1; + } else { + len2 = rte_ipv6_fragment_packet(m, + &qconf->tx_mbufs[port_out].m_table[len], + (uint16_t)(MBUF_TABLE_SIZE - len), + IPV6_MTU_DEFAULT, + rxq->direct_pool, rxq->indirect_pool); + + /* Free input packet */ + rte_pktmbuf_free(m); + + /* If we fail to fragment the packet */ + if (unlikely (len2 < 0)) + return; + } + } + /* else, just forward the packet */ + else { + qconf->tx_mbufs[port_out].m_table[len] = m; + len2 = 1; + } + + for (i = len; i < len + len2; i ++) { + void *d_addr_bytes; + + m = qconf->tx_mbufs[port_out].m_table[i]; + struct ether_hdr *eth_hdr = (struct ether_hdr *) + rte_pktmbuf_prepend(m, (uint16_t)sizeof(struct ether_hdr)); + if (eth_hdr == NULL) { + rte_panic("No headroom in mbuf.\n"); + } + + m->l2_len = sizeof(struct ether_hdr); + + /* 02:00:00:00:00:xx */ + d_addr_bytes = ð_hdr->d_addr.addr_bytes[0]; + *((uint64_t *)d_addr_bytes) = 0x000000000002 + ((uint64_t)port_out << 40); + + /* src addr */ + ether_addr_copy(&ports_eth_addr[port_out], ð_hdr->s_addr); + if (ipv6) + eth_hdr->ether_type = rte_be_to_cpu_16(ETHER_TYPE_IPv6); + else + eth_hdr->ether_type = rte_be_to_cpu_16(ETHER_TYPE_IPv4); + } + + len += len2; + + if (likely(len < MAX_PKT_BURST)) { + qconf->tx_mbufs[port_out].len = (uint16_t)len; + return; + } + + /* Transmit packets */ + send_burst(qconf, (uint16_t)len, port_out); + qconf->tx_mbufs[port_out].len = 0; +} + +/* main processing loop */ +static int +main_loop(__attribute__((unused)) void *dummy) +{ + struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; + unsigned lcore_id; + uint64_t prev_tsc, diff_tsc, cur_tsc; + int i, j, nb_rx; + uint8_t portid; + struct lcore_queue_conf *qconf; + const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US; + + prev_tsc = 0; + + lcore_id = rte_lcore_id(); + qconf = &lcore_queue_conf[lcore_id]; + + if (qconf->n_rx_queue == 0) { + RTE_LOG(INFO, IP_FRAG, "lcore %u has nothing to do\n", lcore_id); + return 0; + } + + RTE_LOG(INFO, IP_FRAG, "entering main loop on lcore %u\n", lcore_id); + + for (i = 0; i < qconf->n_rx_queue; i++) { + + portid = qconf->rx_queue_list[i].portid; + RTE_LOG(INFO, IP_FRAG, " -- lcoreid=%u portid=%d\n", lcore_id, + (int) portid); + } + + while (1) { + + cur_tsc = rte_rdtsc(); + + /* + * TX burst queue drain + */ + diff_tsc = cur_tsc - prev_tsc; + if (unlikely(diff_tsc > drain_tsc)) { + + /* + * This could be optimized (use queueid instead of + * portid), but it is not called so often + */ + for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) { + if (qconf->tx_mbufs[portid].len == 0) + continue; + send_burst(&lcore_queue_conf[lcore_id], + qconf->tx_mbufs[portid].len, + portid); + qconf->tx_mbufs[portid].len = 0; + } + + prev_tsc = cur_tsc; + } + + /* + * Read packet from RX queues + */ + for (i = 0; i < qconf->n_rx_queue; i++) { + + portid = qconf->rx_queue_list[i].portid; + nb_rx = rte_eth_rx_burst(portid, 0, pkts_burst, + MAX_PKT_BURST); + + /* Prefetch first packets */ + for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) { + rte_prefetch0(rte_pktmbuf_mtod( + pkts_burst[j], void *)); + } + + /* Prefetch and forward already prefetched packets */ + for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) { + rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[ + j + PREFETCH_OFFSET], void *)); + l3fwd_simple_forward(pkts_burst[j], qconf, i, portid); + } + + /* Forward remaining prefetched packets */ + for (; j < nb_rx; j++) { + l3fwd_simple_forward(pkts_burst[j], qconf, i, portid); + } + } + } +} + +/* display usage */ +static void +print_usage(const char *prgname) +{ + printf("%s [EAL options] -- -p PORTMASK [-q NQ]\n" + " -p PORTMASK: hexadecimal bitmask of ports to configure\n" + " -q NQ: number of queue (=ports) per lcore (default is 1)\n", + prgname); +} + +static int +parse_portmask(const char *portmask) +{ + char *end = NULL; + unsigned long pm; + + /* parse hexadecimal string */ + pm = strtoul(portmask, &end, 16); + if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0')) + return -1; + + if (pm == 0) + return -1; + + return pm; +} + +static int +parse_nqueue(const char *q_arg) +{ + char *end = NULL; + unsigned long n; + + /* parse hexadecimal string */ + n = strtoul(q_arg, &end, 10); + if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0')) + return -1; + if (n == 0) + return -1; + if (n >= MAX_RX_QUEUE_PER_LCORE) + return -1; + + return n; +} + +/* Parse the argument given in the command line of the application */ +static int +parse_args(int argc, char **argv) +{ + int opt, ret; + char **argvopt; + int option_index; + char *prgname = argv[0]; + static struct option lgopts[] = { + {NULL, 0, 0, 0} + }; + + argvopt = argv; + + while ((opt = getopt_long(argc, argvopt, "p:q:", + lgopts, &option_index)) != EOF) { + + switch (opt) { + /* portmask */ + case 'p': + enabled_port_mask = parse_portmask(optarg); + if (enabled_port_mask < 0) { + printf("invalid portmask\n"); + print_usage(prgname); + return -1; + } + break; + + /* nqueue */ + case 'q': + rx_queue_per_lcore = parse_nqueue(optarg); + if (rx_queue_per_lcore < 0) { + printf("invalid queue number\n"); + print_usage(prgname); + return -1; + } + break; + + /* long options */ + case 0: + print_usage(prgname); + return -1; + + default: + print_usage(prgname); + return -1; + } + } + + if (enabled_port_mask == 0) { + printf("portmask not specified\n"); + print_usage(prgname); + return -1; + } + + if (optind >= 0) + argv[optind-1] = prgname; + + ret = optind-1; + optind = 0; /* reset getopt lib */ + return ret; +} + +static void +print_ethaddr(const char *name, struct ether_addr *eth_addr) +{ + char buf[ETHER_ADDR_FMT_SIZE]; + ether_format_addr(buf, ETHER_ADDR_FMT_SIZE, eth_addr); + printf("%s%s", name, buf); +} + +/* Check the link status of all ports in up to 9s, and print them finally */ +static void +check_all_ports_link_status(uint8_t port_num, uint32_t port_mask) +{ +#define CHECK_INTERVAL 100 /* 100ms */ +#define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */ + uint8_t portid, count, all_ports_up, print_flag = 0; + struct rte_eth_link link; + + printf("\nChecking link status"); + fflush(stdout); + for (count = 0; count <= MAX_CHECK_TIME; count++) { + all_ports_up = 1; + for (portid = 0; portid < port_num; portid++) { + if ((port_mask & (1 << portid)) == 0) + continue; + memset(&link, 0, sizeof(link)); + rte_eth_link_get_nowait(portid, &link); + /* print link status if flag set */ + if (print_flag == 1) { + if (link.link_status) + printf("Port %d Link Up - speed %u " + "Mbps - %s\n", (uint8_t)portid, + (unsigned)link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? + ("full-duplex") : ("half-duplex\n")); + else + printf("Port %d Link Down\n", + (uint8_t)portid); + continue; + } + /* clear all_ports_up flag if any link down */ + if (link.link_status == ETH_LINK_DOWN) { + all_ports_up = 0; + break; + } + } + /* after finally printing all link status, get out */ + if (print_flag == 1) + break; + + if (all_ports_up == 0) { + printf("."); + fflush(stdout); + rte_delay_ms(CHECK_INTERVAL); + } + + /* set the print_flag if all ports up or timeout */ + if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) { + print_flag = 1; + printf("\ndone\n"); + } + } +} + +static int +init_routing_table(void) +{ + struct rte_lpm *lpm; + struct rte_lpm6 *lpm6; + int socket, ret; + unsigned i; + + for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++) { + if (socket_lpm[socket]) { + lpm = socket_lpm[socket]; + /* populate the LPM table */ + for (i = 0; i < RTE_DIM(l3fwd_ipv4_route_array); i++) { + ret = rte_lpm_add(lpm, + l3fwd_ipv4_route_array[i].ip, + l3fwd_ipv4_route_array[i].depth, + l3fwd_ipv4_route_array[i].if_out); + + if (ret < 0) { + RTE_LOG(ERR, IP_FRAG, "Unable to add entry %i to the l3fwd " + "LPM table\n", i); + return -1; + } + + RTE_LOG(INFO, IP_FRAG, "Socket %i: adding route " IPv4_BYTES_FMT + "/%d (port %d)\n", + socket, + IPv4_BYTES(l3fwd_ipv4_route_array[i].ip), + l3fwd_ipv4_route_array[i].depth, + l3fwd_ipv4_route_array[i].if_out); + } + } + + if (socket_lpm6[socket]) { + lpm6 = socket_lpm6[socket]; + /* populate the LPM6 table */ + for (i = 0; i < RTE_DIM(l3fwd_ipv6_route_array); i++) { + ret = rte_lpm6_add(lpm6, + l3fwd_ipv6_route_array[i].ip, + l3fwd_ipv6_route_array[i].depth, + l3fwd_ipv6_route_array[i].if_out); + + if (ret < 0) { + RTE_LOG(ERR, IP_FRAG, "Unable to add entry %i to the l3fwd " + "LPM6 table\n", i); + return -1; + } + + RTE_LOG(INFO, IP_FRAG, "Socket %i: adding route " IPv6_BYTES_FMT + "/%d (port %d)\n", + socket, + IPv6_BYTES(l3fwd_ipv6_route_array[i].ip), + l3fwd_ipv6_route_array[i].depth, + l3fwd_ipv6_route_array[i].if_out); + } + } + } + return 0; +} + +static int +init_mem(void) +{ + char buf[PATH_MAX]; + struct rte_mempool *mp; + struct rte_lpm *lpm; + struct rte_lpm6 *lpm6; + struct rte_lpm_config lpm_config; + int socket; + unsigned lcore_id; + + /* traverse through lcores and initialize structures on each socket */ + + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { + + if (rte_lcore_is_enabled(lcore_id) == 0) + continue; + + socket = rte_lcore_to_socket_id(lcore_id); + + if (socket == SOCKET_ID_ANY) + socket = 0; + + if (socket_direct_pool[socket] == NULL) { + RTE_LOG(INFO, IP_FRAG, "Creating direct mempool on socket %i\n", + socket); + snprintf(buf, sizeof(buf), "pool_direct_%i", socket); + + mp = rte_pktmbuf_pool_create(buf, NB_MBUF, 32, + 0, RTE_MBUF_DEFAULT_BUF_SIZE, socket); + if (mp == NULL) { + RTE_LOG(ERR, IP_FRAG, "Cannot create direct mempool\n"); + return -1; + } + socket_direct_pool[socket] = mp; + } + + if (socket_indirect_pool[socket] == NULL) { + RTE_LOG(INFO, IP_FRAG, "Creating indirect mempool on socket %i\n", + socket); + snprintf(buf, sizeof(buf), "pool_indirect_%i", socket); + + mp = rte_pktmbuf_pool_create(buf, NB_MBUF, 32, 0, 0, + socket); + if (mp == NULL) { + RTE_LOG(ERR, IP_FRAG, "Cannot create indirect mempool\n"); + return -1; + } + socket_indirect_pool[socket] = mp; + } + + if (socket_lpm[socket] == NULL) { + RTE_LOG(INFO, IP_FRAG, "Creating LPM table on socket %i\n", socket); + snprintf(buf, sizeof(buf), "IP_FRAG_LPM_%i", socket); + + lpm_config.max_rules = LPM_MAX_RULES; + lpm_config.number_tbl8s = 256; + lpm_config.flags = 0; + + lpm = rte_lpm_create(buf, socket, &lpm_config); + if (lpm == NULL) { + RTE_LOG(ERR, IP_FRAG, "Cannot create LPM table\n"); + return -1; + } + socket_lpm[socket] = lpm; + } + + if (socket_lpm6[socket] == NULL) { + RTE_LOG(INFO, IP_FRAG, "Creating LPM6 table on socket %i\n", socket); + snprintf(buf, sizeof(buf), "IP_FRAG_LPM_%i", socket); + + lpm6 = rte_lpm6_create("IP_FRAG_LPM6", socket, &lpm6_config); + if (lpm6 == NULL) { + RTE_LOG(ERR, IP_FRAG, "Cannot create LPM table\n"); + return -1; + } + socket_lpm6[socket] = lpm6; + } + } + + return 0; +} + +int +main(int argc, char **argv) +{ + struct lcore_queue_conf *qconf; + struct rte_eth_dev_info dev_info; + struct rte_eth_txconf *txconf; + struct rx_queue *rxq; + int socket, ret; + unsigned nb_ports; + uint16_t queueid = 0; + unsigned lcore_id = 0, rx_lcore_id = 0; + uint32_t n_tx_queue, nb_lcores; + uint8_t portid; + + /* init EAL */ + ret = rte_eal_init(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "rte_eal_init failed"); + argc -= ret; + argv += ret; + + /* parse application arguments (after the EAL ones) */ + ret = parse_args(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Invalid arguments"); + + nb_ports = rte_eth_dev_count(); + if (nb_ports > RTE_MAX_ETHPORTS) + nb_ports = RTE_MAX_ETHPORTS; + else if (nb_ports == 0) + rte_exit(EXIT_FAILURE, "No ports found!\n"); + + nb_lcores = rte_lcore_count(); + + /* initialize structures (mempools, lpm etc.) */ + if (init_mem() < 0) + rte_panic("Cannot initialize memory structures!\n"); + + /* check if portmask has non-existent ports */ + if (enabled_port_mask & ~(RTE_LEN2MASK(nb_ports, unsigned))) + rte_exit(EXIT_FAILURE, "Non-existent ports in portmask!\n"); + + /* initialize all ports */ + for (portid = 0; portid < nb_ports; portid++) { + /* skip ports that are not enabled */ + if ((enabled_port_mask & (1 << portid)) == 0) { + printf("Skipping disabled port %d\n", portid); + continue; + } + + qconf = &lcore_queue_conf[rx_lcore_id]; + + /* get the lcore_id for this port */ + while (rte_lcore_is_enabled(rx_lcore_id) == 0 || + qconf->n_rx_queue == (unsigned)rx_queue_per_lcore) { + + rx_lcore_id ++; + if (rx_lcore_id >= RTE_MAX_LCORE) + rte_exit(EXIT_FAILURE, "Not enough cores\n"); + + qconf = &lcore_queue_conf[rx_lcore_id]; + } + + socket = (int) rte_lcore_to_socket_id(rx_lcore_id); + if (socket == SOCKET_ID_ANY) + socket = 0; + + rxq = &qconf->rx_queue_list[qconf->n_rx_queue]; + rxq->portid = portid; + rxq->direct_pool = socket_direct_pool[socket]; + rxq->indirect_pool = socket_indirect_pool[socket]; + rxq->lpm = socket_lpm[socket]; + rxq->lpm6 = socket_lpm6[socket]; + qconf->n_rx_queue++; + + /* init port */ + printf("Initializing port %d on lcore %u...", portid, + rx_lcore_id); + fflush(stdout); + + n_tx_queue = nb_lcores; + if (n_tx_queue > MAX_TX_QUEUE_PER_PORT) + n_tx_queue = MAX_TX_QUEUE_PER_PORT; + ret = rte_eth_dev_configure(portid, 1, (uint16_t)n_tx_queue, + &port_conf); + if (ret < 0) { + printf("\n"); + rte_exit(EXIT_FAILURE, "Cannot configure device: " + "err=%d, port=%d\n", + ret, portid); + } + + /* init one RX queue */ + ret = rte_eth_rx_queue_setup(portid, 0, nb_rxd, + socket, NULL, + socket_direct_pool[socket]); + if (ret < 0) { + printf("\n"); + rte_exit(EXIT_FAILURE, "rte_eth_rx_queue_setup: " + "err=%d, port=%d\n", + ret, portid); + } + + rte_eth_macaddr_get(portid, &ports_eth_addr[portid]); + print_ethaddr(" Address:", &ports_eth_addr[portid]); + printf("\n"); + + /* init one TX queue per couple (lcore,port) */ + queueid = 0; + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { + if (rte_lcore_is_enabled(lcore_id) == 0) + continue; + + socket = (int) rte_lcore_to_socket_id(lcore_id); + printf("txq=%u,%d ", lcore_id, queueid); + fflush(stdout); + + rte_eth_dev_info_get(portid, &dev_info); + txconf = &dev_info.default_txconf; + txconf->txq_flags = 0; + ret = rte_eth_tx_queue_setup(portid, queueid, nb_txd, + socket, txconf); + if (ret < 0) { + printf("\n"); + rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup: " + "err=%d, port=%d\n", ret, portid); + } + + qconf = &lcore_queue_conf[lcore_id]; + qconf->tx_queue_id[portid] = queueid; + queueid++; + } + + printf("\n"); + } + + printf("\n"); + + /* start ports */ + for (portid = 0; portid < nb_ports; portid++) { + if ((enabled_port_mask & (1 << portid)) == 0) { + continue; + } + /* Start device */ + ret = rte_eth_dev_start(portid); + if (ret < 0) + rte_exit(EXIT_FAILURE, "rte_eth_dev_start: err=%d, port=%d\n", + ret, portid); + + rte_eth_promiscuous_enable(portid); + } + + if (init_routing_table() < 0) + rte_exit(EXIT_FAILURE, "Cannot init routing table\n"); + + check_all_ports_link_status((uint8_t)nb_ports, enabled_port_mask); + + /* launch per-lcore init on every lcore */ + rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER); + RTE_LCORE_FOREACH_SLAVE(lcore_id) { + if (rte_eal_wait_lcore(lcore_id) < 0) + return -1; + } + + return 0; +} diff --git a/examples/ip_pipeline/Makefile b/examples/ip_pipeline/Makefile new file mode 100644 index 00000000..10fe1ba9 --- /dev/null +++ b/examples/ip_pipeline/Makefile @@ -0,0 +1,79 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overridden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +DIRS-(CONFIG_RTE_LIBRTE_PIPELINE) += pipeline + +include $(RTE_SDK)/mk/rte.vars.mk + +# binary name +APP = ip_pipeline + +VPATH += $(SRCDIR)/pipeline + +INC += $(wildcard *.h) $(wildcard pipeline/*.h) + +# all source are stored in SRCS-y +SRCS-$(CONFIG_RTE_LIBRTE_PIPELINE) := main.c +SRCS-$(CONFIG_RTE_LIBRTE_PIPELINE) += config_parse.c +SRCS-$(CONFIG_RTE_LIBRTE_PIPELINE) += config_parse_tm.c +SRCS-$(CONFIG_RTE_LIBRTE_PIPELINE) += config_check.c +SRCS-$(CONFIG_RTE_LIBRTE_PIPELINE) += init.c +SRCS-$(CONFIG_RTE_LIBRTE_PIPELINE) += thread.c +SRCS-$(CONFIG_RTE_LIBRTE_PIPELINE) += thread_fe.c +SRCS-$(CONFIG_RTE_LIBRTE_PIPELINE) += cpu_core_map.c + +SRCS-$(CONFIG_RTE_LIBRTE_PIPELINE) += pipeline_common_be.c +SRCS-$(CONFIG_RTE_LIBRTE_PIPELINE) += pipeline_common_fe.c +SRCS-$(CONFIG_RTE_LIBRTE_PIPELINE) += pipeline_master_be.c +SRCS-$(CONFIG_RTE_LIBRTE_PIPELINE) += pipeline_master.c +SRCS-$(CONFIG_RTE_LIBRTE_PIPELINE) += pipeline_passthrough_be.c +SRCS-$(CONFIG_RTE_LIBRTE_PIPELINE) += pipeline_passthrough.c +SRCS-$(CONFIG_RTE_LIBRTE_PIPELINE) += pipeline_firewall_be.c +SRCS-$(CONFIG_RTE_LIBRTE_PIPELINE) += pipeline_firewall.c +SRCS-$(CONFIG_RTE_LIBRTE_PIPELINE) += pipeline_flow_classification_be.c +SRCS-$(CONFIG_RTE_LIBRTE_PIPELINE) += pipeline_flow_classification.c +SRCS-$(CONFIG_RTE_LIBRTE_PIPELINE) += pipeline_flow_actions_be.c +SRCS-$(CONFIG_RTE_LIBRTE_PIPELINE) += pipeline_flow_actions.c +SRCS-$(CONFIG_RTE_LIBRTE_PIPELINE) += pipeline_routing_be.c +SRCS-$(CONFIG_RTE_LIBRTE_PIPELINE) += pipeline_routing.c + +CFLAGS += -I$(SRCDIR) -I$(SRCDIR)/pipeline +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) -Wno-error=unused-function -Wno-error=unused-variable + +include $(RTE_SDK)/mk/rte.extapp.mk diff --git a/examples/ip_pipeline/app.h b/examples/ip_pipeline/app.h new file mode 100644 index 00000000..55a98417 --- /dev/null +++ b/examples/ip_pipeline/app.h @@ -0,0 +1,949 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_APP_H__ +#define __INCLUDE_APP_H__ + +#include <stdint.h> +#include <string.h> + +#include <rte_common.h> +#include <rte_mempool.h> +#include <rte_ring.h> +#include <rte_sched.h> +#include <cmdline_parse.h> + +#include <rte_ethdev.h> + +#include "cpu_core_map.h" +#include "pipeline.h" + +#define APP_PARAM_NAME_SIZE PIPELINE_NAME_SIZE +#define APP_LINK_PCI_BDF_SIZE 16 +struct app_mempool_params { + char *name; + uint32_t parsed; + uint32_t buffer_size; + uint32_t pool_size; + uint32_t cache_size; + uint32_t cpu_socket_id; +}; + +struct app_link_params { + char *name; + uint32_t parsed; + uint32_t pmd_id; /* Generated based on port mask */ + uint32_t arp_q; /* 0 = Disabled (packets go to default queue 0) */ + uint32_t tcp_syn_q; /* 0 = Disabled (pkts go to default queue) */ + uint32_t ip_local_q; /* 0 = Disabled (pkts go to default queue 0) */ + uint32_t tcp_local_q; /* 0 = Disabled (pkts go to default queue 0) */ + uint32_t udp_local_q; /* 0 = Disabled (pkts go to default queue 0) */ + uint32_t sctp_local_q; /* 0 = Disabled (pkts go to default queue 0) */ + uint32_t state; /* DOWN = 0, UP = 1 */ + uint32_t ip; /* 0 = Invalid */ + uint32_t depth; /* Valid only when IP is valid */ + uint64_t mac_addr; /* Read from HW */ + char pci_bdf[APP_LINK_PCI_BDF_SIZE]; + + struct rte_eth_conf conf; + uint8_t promisc; +}; + +struct app_pktq_hwq_in_params { + char *name; + uint32_t parsed; + uint32_t mempool_id; /* Position in the app->mempool_params */ + uint32_t size; + uint32_t burst; + + struct rte_eth_rxconf conf; +}; + +struct app_pktq_hwq_out_params { + char *name; + uint32_t parsed; + uint32_t size; + uint32_t burst; + uint32_t dropless; + uint64_t n_retries; + struct rte_eth_txconf conf; +}; + +struct app_pktq_swq_params { + char *name; + uint32_t parsed; + uint32_t size; + uint32_t burst_read; + uint32_t burst_write; + uint32_t dropless; + uint64_t n_retries; + uint32_t cpu_socket_id; + uint32_t ipv4_frag; + uint32_t ipv6_frag; + uint32_t ipv4_ras; + uint32_t ipv6_ras; + uint32_t mtu; + uint32_t metadata_size; + uint32_t mempool_direct_id; + uint32_t mempool_indirect_id; +}; + +#ifndef APP_FILE_NAME_SIZE +#define APP_FILE_NAME_SIZE 256 +#endif + +#ifndef APP_MAX_SCHED_SUBPORTS +#define APP_MAX_SCHED_SUBPORTS 8 +#endif + +#ifndef APP_MAX_SCHED_PIPES +#define APP_MAX_SCHED_PIPES 4096 +#endif + +struct app_pktq_tm_params { + char *name; + uint32_t parsed; + const char *file_name; + struct rte_sched_port_params sched_port_params; + struct rte_sched_subport_params + sched_subport_params[APP_MAX_SCHED_SUBPORTS]; + struct rte_sched_pipe_params + sched_pipe_profiles[RTE_SCHED_PIPE_PROFILES_PER_PORT]; + int sched_pipe_to_profile[APP_MAX_SCHED_SUBPORTS * APP_MAX_SCHED_PIPES]; + uint32_t burst_read; + uint32_t burst_write; +}; + +struct app_pktq_source_params { + char *name; + uint32_t parsed; + uint32_t mempool_id; /* Position in the app->mempool_params array */ + uint32_t burst; + char *file_name; /* Full path of PCAP file to be copied to mbufs */ + uint32_t n_bytes_per_pkt; +}; + +struct app_pktq_sink_params { + char *name; + uint8_t parsed; + char *file_name; /* Full path of PCAP file to be copied to mbufs */ + uint32_t n_pkts_to_dump; +}; + +struct app_msgq_params { + char *name; + uint32_t parsed; + uint32_t size; + uint32_t cpu_socket_id; +}; + +enum app_pktq_in_type { + APP_PKTQ_IN_HWQ, + APP_PKTQ_IN_SWQ, + APP_PKTQ_IN_TM, + APP_PKTQ_IN_SOURCE, +}; + +struct app_pktq_in_params { + enum app_pktq_in_type type; + uint32_t id; /* Position in the appropriate app array */ +}; + +enum app_pktq_out_type { + APP_PKTQ_OUT_HWQ, + APP_PKTQ_OUT_SWQ, + APP_PKTQ_OUT_TM, + APP_PKTQ_OUT_SINK, +}; + +struct app_pktq_out_params { + enum app_pktq_out_type type; + uint32_t id; /* Position in the appropriate app array */ +}; + +#ifndef APP_PIPELINE_TYPE_SIZE +#define APP_PIPELINE_TYPE_SIZE 64 +#endif + +#define APP_MAX_PIPELINE_PKTQ_IN PIPELINE_MAX_PORT_IN +#define APP_MAX_PIPELINE_PKTQ_OUT PIPELINE_MAX_PORT_OUT +#define APP_MAX_PIPELINE_MSGQ_IN PIPELINE_MAX_MSGQ_IN +#define APP_MAX_PIPELINE_MSGQ_OUT PIPELINE_MAX_MSGQ_OUT + +#define APP_MAX_PIPELINE_ARGS PIPELINE_MAX_ARGS + +struct app_pipeline_params { + char *name; + uint8_t parsed; + + char type[APP_PIPELINE_TYPE_SIZE]; + + uint32_t socket_id; + uint32_t core_id; + uint32_t hyper_th_id; + + struct app_pktq_in_params pktq_in[APP_MAX_PIPELINE_PKTQ_IN]; + struct app_pktq_out_params pktq_out[APP_MAX_PIPELINE_PKTQ_OUT]; + uint32_t msgq_in[APP_MAX_PIPELINE_MSGQ_IN]; + uint32_t msgq_out[APP_MAX_PIPELINE_MSGQ_OUT]; + + uint32_t n_pktq_in; + uint32_t n_pktq_out; + uint32_t n_msgq_in; + uint32_t n_msgq_out; + + uint32_t timer_period; + + char *args_name[APP_MAX_PIPELINE_ARGS]; + char *args_value[APP_MAX_PIPELINE_ARGS]; + uint32_t n_args; +}; + +struct app_pipeline_data { + void *be; + void *fe; + struct pipeline_type *ptype; + uint64_t timer_period; + uint32_t enabled; +}; + +struct app_thread_pipeline_data { + uint32_t pipeline_id; + void *be; + pipeline_be_op_run f_run; + pipeline_be_op_timer f_timer; + uint64_t timer_period; + uint64_t deadline; +}; + +#ifndef APP_MAX_THREAD_PIPELINES +#define APP_MAX_THREAD_PIPELINES 16 +#endif + +#ifndef APP_THREAD_TIMER_PERIOD +#define APP_THREAD_TIMER_PERIOD 1 +#endif + +struct app_thread_data { + struct app_thread_pipeline_data regular[APP_MAX_THREAD_PIPELINES]; + struct app_thread_pipeline_data custom[APP_MAX_THREAD_PIPELINES]; + + uint32_t n_regular; + uint32_t n_custom; + + uint64_t timer_period; + uint64_t thread_req_deadline; + + uint64_t deadline; + + struct rte_ring *msgq_in; + struct rte_ring *msgq_out; + + uint64_t headroom_time; + uint64_t headroom_cycles; + double headroom_ratio; +}; + +#ifndef APP_MAX_LINKS +#define APP_MAX_LINKS 16 +#endif + +struct app_eal_params { + /* Map lcore set to physical cpu set */ + char *coremap; + + /* Core ID that is used as master */ + uint32_t master_lcore_present; + uint32_t master_lcore; + + /* Number of memory channels */ + uint32_t channels_present; + uint32_t channels; + + /* Memory to allocate (see also --socket-mem) */ + uint32_t memory_present; + uint32_t memory; + + /* Force number of memory ranks (don't detect) */ + uint32_t ranks_present; + uint32_t ranks; + + /* Add a PCI device in black list. */ + char *pci_blacklist[APP_MAX_LINKS]; + + /* Add a PCI device in white list. */ + char *pci_whitelist[APP_MAX_LINKS]; + + /* Add a virtual device. */ + char *vdev[APP_MAX_LINKS]; + + /* Use VMware TSC map instead of native RDTSC */ + uint32_t vmware_tsc_map_present; + int vmware_tsc_map; + + /* Type of this process (primary|secondary|auto) */ + char *proc_type; + + /* Set syslog facility */ + char *syslog; + + /* Set default log level */ + uint32_t log_level_present; + uint32_t log_level; + + /* Display version information on startup */ + uint32_t version_present; + int version; + + /* This help */ + uint32_t help_present; + int help; + + /* Use malloc instead of hugetlbfs */ + uint32_t no_huge_present; + int no_huge; + + /* Disable PCI */ + uint32_t no_pci_present; + int no_pci; + + /* Disable HPET */ + uint32_t no_hpet_present; + int no_hpet; + + /* No shared config (mmap'd files) */ + uint32_t no_shconf_present; + int no_shconf; + + /* Add driver */ + char *add_driver; + + /* Memory to allocate on sockets (comma separated values)*/ + char *socket_mem; + + /* Directory where hugetlbfs is mounted */ + char *huge_dir; + + /* Prefix for hugepage filenames */ + char *file_prefix; + + /* Base virtual address */ + char *base_virtaddr; + + /* Create /dev/uioX (usually done by hotplug) */ + uint32_t create_uio_dev_present; + int create_uio_dev; + + /* Interrupt mode for VFIO (legacy|msi|msix) */ + char *vfio_intr; + + /* Support running on Xen dom0 without hugetlbfs */ + uint32_t xen_dom0_present; + int xen_dom0; +}; + +#ifndef APP_APPNAME_SIZE +#define APP_APPNAME_SIZE 256 +#endif + +#ifndef APP_MAX_MEMPOOLS +#define APP_MAX_MEMPOOLS 8 +#endif + +#ifndef APP_LINK_MAX_HWQ_IN +#define APP_LINK_MAX_HWQ_IN 64 +#endif + +#ifndef APP_LINK_MAX_HWQ_OUT +#define APP_LINK_MAX_HWQ_OUT 64 +#endif + +#define APP_MAX_HWQ_IN (APP_MAX_LINKS * APP_LINK_MAX_HWQ_IN) + +#define APP_MAX_HWQ_OUT (APP_MAX_LINKS * APP_LINK_MAX_HWQ_OUT) + +#ifndef APP_MAX_PKTQ_SWQ +#define APP_MAX_PKTQ_SWQ 256 +#endif + +#define APP_MAX_PKTQ_TM APP_MAX_LINKS + +#ifndef APP_MAX_PKTQ_SOURCE +#define APP_MAX_PKTQ_SOURCE 16 +#endif + +#ifndef APP_MAX_PKTQ_SINK +#define APP_MAX_PKTQ_SINK 16 +#endif + +#ifndef APP_MAX_MSGQ +#define APP_MAX_MSGQ 64 +#endif + +#ifndef APP_MAX_PIPELINES +#define APP_MAX_PIPELINES 64 +#endif + +#ifndef APP_EAL_ARGC +#define APP_EAL_ARGC 32 +#endif + +#ifndef APP_MAX_PIPELINE_TYPES +#define APP_MAX_PIPELINE_TYPES 64 +#endif + +#ifndef APP_MAX_THREADS +#define APP_MAX_THREADS RTE_MAX_LCORE +#endif + +#ifndef APP_MAX_CMDS +#define APP_MAX_CMDS 64 +#endif + +#ifndef APP_THREAD_HEADROOM_STATS_COLLECT +#define APP_THREAD_HEADROOM_STATS_COLLECT 1 +#endif + +struct app_params { + /* Config */ + char app_name[APP_APPNAME_SIZE]; + const char *config_file; + const char *script_file; + const char *parser_file; + const char *output_file; + const char *preproc; + const char *preproc_args; + uint64_t port_mask; + uint32_t log_level; + + struct app_eal_params eal_params; + struct app_mempool_params mempool_params[APP_MAX_MEMPOOLS]; + struct app_link_params link_params[APP_MAX_LINKS]; + struct app_pktq_hwq_in_params hwq_in_params[APP_MAX_HWQ_IN]; + struct app_pktq_hwq_out_params hwq_out_params[APP_MAX_HWQ_OUT]; + struct app_pktq_swq_params swq_params[APP_MAX_PKTQ_SWQ]; + struct app_pktq_tm_params tm_params[APP_MAX_PKTQ_TM]; + struct app_pktq_source_params source_params[APP_MAX_PKTQ_SOURCE]; + struct app_pktq_sink_params sink_params[APP_MAX_PKTQ_SINK]; + struct app_msgq_params msgq_params[APP_MAX_MSGQ]; + struct app_pipeline_params pipeline_params[APP_MAX_PIPELINES]; + + uint32_t n_mempools; + uint32_t n_links; + uint32_t n_pktq_hwq_in; + uint32_t n_pktq_hwq_out; + uint32_t n_pktq_swq; + uint32_t n_pktq_tm; + uint32_t n_pktq_source; + uint32_t n_pktq_sink; + uint32_t n_msgq; + uint32_t n_pipelines; + + /* Init */ + char *eal_argv[1 + APP_EAL_ARGC]; + struct cpu_core_map *core_map; + uint64_t core_mask; + struct rte_mempool *mempool[APP_MAX_MEMPOOLS]; + struct rte_ring *swq[APP_MAX_PKTQ_SWQ]; + struct rte_sched_port *tm[APP_MAX_PKTQ_TM]; + struct rte_ring *msgq[APP_MAX_MSGQ]; + struct pipeline_type pipeline_type[APP_MAX_PIPELINE_TYPES]; + struct app_pipeline_data pipeline_data[APP_MAX_PIPELINES]; + struct app_thread_data thread_data[APP_MAX_THREADS]; + cmdline_parse_ctx_t cmds[APP_MAX_CMDS + 1]; + + int eal_argc; + uint32_t n_pipeline_types; + uint32_t n_cmds; +}; + +#define APP_PARAM_VALID(obj) ((obj)->name != NULL) + +#define APP_PARAM_COUNT(obj_array, n_objs) \ +{ \ + size_t i; \ + \ + n_objs = 0; \ + for (i = 0; i < RTE_DIM(obj_array); i++) \ + if (APP_PARAM_VALID(&((obj_array)[i]))) \ + n_objs++; \ +} + +#define APP_PARAM_FIND(obj_array, key) \ +({ \ + ssize_t obj_idx; \ + const ssize_t obj_count = RTE_DIM(obj_array); \ + \ + for (obj_idx = 0; obj_idx < obj_count; obj_idx++) { \ + if (!APP_PARAM_VALID(&((obj_array)[obj_idx]))) \ + continue; \ + \ + if (strcmp(key, (obj_array)[obj_idx].name) == 0) \ + break; \ + } \ + obj_idx < obj_count ? obj_idx : -ENOENT; \ +}) + +#define APP_PARAM_FIND_BY_ID(obj_array, prefix, id, obj) \ +do { \ + char name[APP_PARAM_NAME_SIZE]; \ + ssize_t pos; \ + \ + sprintf(name, prefix "%" PRIu32, id); \ + pos = APP_PARAM_FIND(obj_array, name); \ + obj = (pos < 0) ? NULL : &((obj_array)[pos]); \ +} while (0) + +#define APP_PARAM_GET_ID(obj, prefix, id) \ +do \ + sscanf(obj->name, prefix "%" SCNu32, &id); \ +while (0) \ + +#define APP_PARAM_ADD(obj_array, obj_name) \ +({ \ + ssize_t obj_idx; \ + const ssize_t obj_count = RTE_DIM(obj_array); \ + \ + obj_idx = APP_PARAM_FIND(obj_array, obj_name); \ + if (obj_idx < 0) { \ + for (obj_idx = 0; obj_idx < obj_count; obj_idx++) { \ + if (!APP_PARAM_VALID(&((obj_array)[obj_idx]))) \ + break; \ + } \ + \ + if (obj_idx < obj_count) { \ + (obj_array)[obj_idx].name = strdup(obj_name); \ + if ((obj_array)[obj_idx].name == NULL) \ + obj_idx = -EINVAL; \ + } else \ + obj_idx = -ENOMEM; \ + } \ + obj_idx; \ +}) + +#define APP_CHECK(exp, fmt, ...) \ +do { \ + if (!(exp)) { \ + fprintf(stderr, fmt "\n", ## __VA_ARGS__); \ + abort(); \ + } \ +} while (0) + +enum app_log_level { + APP_LOG_LEVEL_HIGH = 1, + APP_LOG_LEVEL_LOW, + APP_LOG_LEVELS +}; + +#define APP_LOG(app, level, fmt, ...) \ +do { \ + if (app->log_level >= APP_LOG_LEVEL_ ## level) \ + fprintf(stdout, "[APP] " fmt "\n", ## __VA_ARGS__); \ +} while (0) + +static inline uint32_t +app_link_get_n_rxq(struct app_params *app, struct app_link_params *link) +{ + uint32_t n_rxq = 0, link_id, i; + uint32_t n_pktq_hwq_in = RTE_MIN(app->n_pktq_hwq_in, + RTE_DIM(app->hwq_in_params)); + + APP_PARAM_GET_ID(link, "LINK", link_id); + + for (i = 0; i < n_pktq_hwq_in; i++) { + struct app_pktq_hwq_in_params *p = &app->hwq_in_params[i]; + uint32_t rxq_link_id, rxq_queue_id; + + sscanf(p->name, "RXQ%" SCNu32 ".%" SCNu32, + &rxq_link_id, &rxq_queue_id); + if (rxq_link_id == link_id) + n_rxq++; + } + + return n_rxq; +} + +static inline uint32_t +app_link_get_n_txq(struct app_params *app, struct app_link_params *link) +{ + uint32_t n_txq = 0, link_id, i; + uint32_t n_pktq_hwq_out = RTE_MIN(app->n_pktq_hwq_out, + RTE_DIM(app->hwq_out_params)); + + APP_PARAM_GET_ID(link, "LINK", link_id); + + for (i = 0; i < n_pktq_hwq_out; i++) { + struct app_pktq_hwq_out_params *p = &app->hwq_out_params[i]; + uint32_t txq_link_id, txq_queue_id; + + sscanf(p->name, "TXQ%" SCNu32 ".%" SCNu32, + &txq_link_id, &txq_queue_id); + if (txq_link_id == link_id) + n_txq++; + } + + return n_txq; +} + +static inline uint32_t +app_rxq_get_readers(struct app_params *app, struct app_pktq_hwq_in_params *rxq) +{ + uint32_t pos = rxq - app->hwq_in_params; + uint32_t n_pipelines = RTE_MIN(app->n_pipelines, + RTE_DIM(app->pipeline_params)); + uint32_t n_readers = 0, i; + + for (i = 0; i < n_pipelines; i++) { + struct app_pipeline_params *p = &app->pipeline_params[i]; + uint32_t n_pktq_in = RTE_MIN(p->n_pktq_in, RTE_DIM(p->pktq_in)); + uint32_t j; + + for (j = 0; j < n_pktq_in; j++) { + struct app_pktq_in_params *pktq = &p->pktq_in[j]; + + if ((pktq->type == APP_PKTQ_IN_HWQ) && + (pktq->id == pos)) + n_readers++; + } + } + + return n_readers; +} + +static inline uint32_t +app_swq_get_readers(struct app_params *app, struct app_pktq_swq_params *swq) +{ + uint32_t pos = swq - app->swq_params; + uint32_t n_pipelines = RTE_MIN(app->n_pipelines, + RTE_DIM(app->pipeline_params)); + uint32_t n_readers = 0, i; + + for (i = 0; i < n_pipelines; i++) { + struct app_pipeline_params *p = &app->pipeline_params[i]; + uint32_t n_pktq_in = RTE_MIN(p->n_pktq_in, RTE_DIM(p->pktq_in)); + uint32_t j; + + for (j = 0; j < n_pktq_in; j++) { + struct app_pktq_in_params *pktq = &p->pktq_in[j]; + + if ((pktq->type == APP_PKTQ_IN_SWQ) && + (pktq->id == pos)) + n_readers++; + } + } + + return n_readers; +} + +static inline uint32_t +app_tm_get_readers(struct app_params *app, struct app_pktq_tm_params *tm) +{ + uint32_t pos = tm - app->tm_params; + uint32_t n_pipelines = RTE_MIN(app->n_pipelines, + RTE_DIM(app->pipeline_params)); + uint32_t n_readers = 0, i; + + for (i = 0; i < n_pipelines; i++) { + struct app_pipeline_params *p = &app->pipeline_params[i]; + uint32_t n_pktq_in = RTE_MIN(p->n_pktq_in, RTE_DIM(p->pktq_in)); + uint32_t j; + + for (j = 0; j < n_pktq_in; j++) { + struct app_pktq_in_params *pktq = &p->pktq_in[j]; + + if ((pktq->type == APP_PKTQ_IN_TM) && + (pktq->id == pos)) + n_readers++; + } + } + + return n_readers; +} + +static inline uint32_t +app_source_get_readers(struct app_params *app, +struct app_pktq_source_params *source) +{ + uint32_t pos = source - app->source_params; + uint32_t n_pipelines = RTE_MIN(app->n_pipelines, + RTE_DIM(app->pipeline_params)); + uint32_t n_readers = 0, i; + + for (i = 0; i < n_pipelines; i++) { + struct app_pipeline_params *p = &app->pipeline_params[i]; + uint32_t n_pktq_in = RTE_MIN(p->n_pktq_in, RTE_DIM(p->pktq_in)); + uint32_t j; + + for (j = 0; j < n_pktq_in; j++) { + struct app_pktq_in_params *pktq = &p->pktq_in[j]; + + if ((pktq->type == APP_PKTQ_IN_SOURCE) && + (pktq->id == pos)) + n_readers++; + } + } + + return n_readers; +} + +static inline uint32_t +app_msgq_get_readers(struct app_params *app, struct app_msgq_params *msgq) +{ + uint32_t pos = msgq - app->msgq_params; + uint32_t n_pipelines = RTE_MIN(app->n_pipelines, + RTE_DIM(app->pipeline_params)); + uint32_t n_readers = 0, i; + + for (i = 0; i < n_pipelines; i++) { + struct app_pipeline_params *p = &app->pipeline_params[i]; + uint32_t n_msgq_in = RTE_MIN(p->n_msgq_in, RTE_DIM(p->msgq_in)); + uint32_t j; + + for (j = 0; j < n_msgq_in; j++) + if (p->msgq_in[j] == pos) + n_readers++; + } + + return n_readers; +} + +static inline uint32_t +app_txq_get_writers(struct app_params *app, struct app_pktq_hwq_out_params *txq) +{ + uint32_t pos = txq - app->hwq_out_params; + uint32_t n_pipelines = RTE_MIN(app->n_pipelines, + RTE_DIM(app->pipeline_params)); + uint32_t n_writers = 0, i; + + for (i = 0; i < n_pipelines; i++) { + struct app_pipeline_params *p = &app->pipeline_params[i]; + uint32_t n_pktq_out = RTE_MIN(p->n_pktq_out, + RTE_DIM(p->pktq_out)); + uint32_t j; + + for (j = 0; j < n_pktq_out; j++) { + struct app_pktq_out_params *pktq = &p->pktq_out[j]; + + if ((pktq->type == APP_PKTQ_OUT_HWQ) && + (pktq->id == pos)) + n_writers++; + } + } + + return n_writers; +} + +static inline uint32_t +app_swq_get_writers(struct app_params *app, struct app_pktq_swq_params *swq) +{ + uint32_t pos = swq - app->swq_params; + uint32_t n_pipelines = RTE_MIN(app->n_pipelines, + RTE_DIM(app->pipeline_params)); + uint32_t n_writers = 0, i; + + for (i = 0; i < n_pipelines; i++) { + struct app_pipeline_params *p = &app->pipeline_params[i]; + uint32_t n_pktq_out = RTE_MIN(p->n_pktq_out, + RTE_DIM(p->pktq_out)); + uint32_t j; + + for (j = 0; j < n_pktq_out; j++) { + struct app_pktq_out_params *pktq = &p->pktq_out[j]; + + if ((pktq->type == APP_PKTQ_OUT_SWQ) && + (pktq->id == pos)) + n_writers++; + } + } + + return n_writers; +} + +static inline uint32_t +app_tm_get_writers(struct app_params *app, struct app_pktq_tm_params *tm) +{ + uint32_t pos = tm - app->tm_params; + uint32_t n_pipelines = RTE_MIN(app->n_pipelines, + RTE_DIM(app->pipeline_params)); + uint32_t n_writers = 0, i; + + for (i = 0; i < n_pipelines; i++) { + struct app_pipeline_params *p = &app->pipeline_params[i]; + uint32_t n_pktq_out = RTE_MIN(p->n_pktq_out, + RTE_DIM(p->pktq_out)); + uint32_t j; + + for (j = 0; j < n_pktq_out; j++) { + struct app_pktq_out_params *pktq = &p->pktq_out[j]; + + if ((pktq->type == APP_PKTQ_OUT_TM) && + (pktq->id == pos)) + n_writers++; + } + } + + return n_writers; +} + +static inline uint32_t +app_sink_get_writers(struct app_params *app, struct app_pktq_sink_params *sink) +{ + uint32_t pos = sink - app->sink_params; + uint32_t n_pipelines = RTE_MIN(app->n_pipelines, + RTE_DIM(app->pipeline_params)); + uint32_t n_writers = 0, i; + + for (i = 0; i < n_pipelines; i++) { + struct app_pipeline_params *p = &app->pipeline_params[i]; + uint32_t n_pktq_out = RTE_MIN(p->n_pktq_out, + RTE_DIM(p->pktq_out)); + uint32_t j; + + for (j = 0; j < n_pktq_out; j++) { + struct app_pktq_out_params *pktq = &p->pktq_out[j]; + + if ((pktq->type == APP_PKTQ_OUT_SINK) && + (pktq->id == pos)) + n_writers++; + } + } + + return n_writers; +} + +static inline uint32_t +app_msgq_get_writers(struct app_params *app, struct app_msgq_params *msgq) +{ + uint32_t pos = msgq - app->msgq_params; + uint32_t n_pipelines = RTE_MIN(app->n_pipelines, + RTE_DIM(app->pipeline_params)); + uint32_t n_writers = 0, i; + + for (i = 0; i < n_pipelines; i++) { + struct app_pipeline_params *p = &app->pipeline_params[i]; + uint32_t n_msgq_out = RTE_MIN(p->n_msgq_out, + RTE_DIM(p->msgq_out)); + uint32_t j; + + for (j = 0; j < n_msgq_out; j++) + if (p->msgq_out[j] == pos) + n_writers++; + } + + return n_writers; +} + +static inline struct app_link_params * +app_get_link_for_rxq(struct app_params *app, struct app_pktq_hwq_in_params *p) +{ + char link_name[APP_PARAM_NAME_SIZE]; + ssize_t link_param_idx; + uint32_t rxq_link_id, rxq_queue_id; + + sscanf(p->name, "RXQ%" SCNu32 ".%" SCNu32, + &rxq_link_id, &rxq_queue_id); + sprintf(link_name, "LINK%" PRIu32, rxq_link_id); + link_param_idx = APP_PARAM_FIND(app->link_params, link_name); + APP_CHECK((link_param_idx >= 0), + "Cannot find %s for %s", link_name, p->name); + + return &app->link_params[link_param_idx]; +} + +static inline struct app_link_params * +app_get_link_for_txq(struct app_params *app, struct app_pktq_hwq_out_params *p) +{ + char link_name[APP_PARAM_NAME_SIZE]; + ssize_t link_param_idx; + uint32_t txq_link_id, txq_queue_id; + + sscanf(p->name, "TXQ%" SCNu32 ".%" SCNu32, + &txq_link_id, &txq_queue_id); + sprintf(link_name, "LINK%" PRIu32, txq_link_id); + link_param_idx = APP_PARAM_FIND(app->link_params, link_name); + APP_CHECK((link_param_idx >= 0), + "Cannot find %s for %s", link_name, p->name); + + return &app->link_params[link_param_idx]; +} + +static inline struct app_link_params * +app_get_link_for_tm(struct app_params *app, struct app_pktq_tm_params *p_tm) +{ + char link_name[APP_PARAM_NAME_SIZE]; + uint32_t link_id; + ssize_t link_param_idx; + + sscanf(p_tm->name, "TM%" PRIu32, &link_id); + sprintf(link_name, "LINK%" PRIu32, link_id); + link_param_idx = APP_PARAM_FIND(app->link_params, link_name); + APP_CHECK((link_param_idx >= 0), + "Cannot find %s for %s", link_name, p_tm->name); + + return &app->link_params[link_param_idx]; +} + +int app_config_init(struct app_params *app); + +int app_config_args(struct app_params *app, + int argc, char **argv); + +int app_config_preproc(struct app_params *app); + +int app_config_parse(struct app_params *app, + const char *file_name); + +int app_config_parse_tm(struct app_params *app); + +void app_config_save(struct app_params *app, + const char *file_name); + +int app_config_check(struct app_params *app); + +int app_init(struct app_params *app); + +int app_thread(void *arg); + +int app_pipeline_type_register(struct app_params *app, + struct pipeline_type *ptype); + +struct pipeline_type *app_pipeline_type_find(struct app_params *app, + char *name); + +void app_link_up_internal(struct app_params *app, + struct app_link_params *cp); + +void app_link_down_internal(struct app_params *app, + struct app_link_params *cp); + +#endif diff --git a/examples/ip_pipeline/config/edge_router_downstream.cfg b/examples/ip_pipeline/config/edge_router_downstream.cfg new file mode 100644 index 00000000..85bbab8f --- /dev/null +++ b/examples/ip_pipeline/config/edge_router_downstream.cfg @@ -0,0 +1,85 @@ +; BSD LICENSE +; +; Copyright(c) 2015 Intel Corporation. All rights reserved. +; All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +; An edge router typically sits between two networks such as the provider +; core network and the provider access network. A typical packet processing +; pipeline for the downstream traffic (i.e. traffic from core to access +; network) contains the following functional blocks: Packet RX & Routing, +; Traffic management and Packet TX. The input packets are assumed to be +; IPv4, while the output packets are Q-in-Q IPv4. + +; A simple implementation for this functional pipeline is presented below. + +; Packet Rx & Traffic Management Packet Tx +; Routing (Pass-Through) (Pass-Through) +; _____________________ SWQ0 ______________________ SWQ4 _____________________ +; RXQ0.0 --->| |----->| |----->| |---> TXQ0.0 +; | | SWQ1 | | SWQ5 | | +; RXQ1.0 --->| |----->| |----->| |---> TXQ1.0 +; | (P1) | SWQ2 | (P2) | SWQ6 | (P3) | +; RXQ2.0 --->| |----->| |----->| |---> TXQ2.0 +; | | SWQ3 | | SWQ7 | | +; RXQ3.0 --->| |----->| |----->| |---> TXQ3.0 +; |_____________________| |______________________| |_____________________| +; | _|_ ^ _|_ ^ _|_ ^ _|_ ^ +; | |___|||___|||___|||___|| +; +--> SINK0 |___|||___|||___|||___|| +; (route miss) |__| |__| |__| |__| +; TM0 TM1 TM2 TM3 + +[PIPELINE0] +type = MASTER +core = 0 + +[PIPELINE1] +type = ROUTING +core = 1 +pktq_in = RXQ0.0 RXQ1.0 RXQ2.0 RXQ3.0 +pktq_out = SWQ0 SWQ1 SWQ2 SWQ3 SINK0 +encap = ethernet_qinq +qinq_sched = test +ip_hdr_offset = 270; mbuf (128) + headroom (128) + ethernet header (14) = 270 + +[PIPELINE2] +type = PASS-THROUGH +core = 2 +pktq_in = SWQ0 SWQ1 SWQ2 SWQ3 TM0 TM1 TM2 TM3 +pktq_out = TM0 TM1 TM2 TM3 SWQ4 SWQ5 SWQ6 SWQ7 + +[PIPELINE3] +type = PASS-THROUGH +core = 3 +pktq_in = SWQ4 SWQ5 SWQ6 SWQ7 +pktq_out = TXQ0.0 TXQ1.0 TXQ2.0 TXQ3.0 + +[MEMPOOL0] +pool_size = 2M diff --git a/examples/ip_pipeline/config/edge_router_downstream.sh b/examples/ip_pipeline/config/edge_router_downstream.sh new file mode 100644 index 00000000..ce46beb5 --- /dev/null +++ b/examples/ip_pipeline/config/edge_router_downstream.sh @@ -0,0 +1,10 @@ +################################################################################ +# Routing: Ether QinQ, ARP off +################################################################################ +p 1 route add default 4 #SINK0 +p 1 route add 0.0.0.0 10 port 0 ether a0:b0:c0:d0:e0:f0 qinq 256 257 +p 1 route add 0.64.0.0 10 port 1 ether a1:b1:c1:d1:e1:f1 qinq 258 259 +p 1 route add 0.128.0.0 10 port 2 ether a2:b2:c2:d2:e2:f2 qinq 260 261 +p 1 route add 0.192.0.0 10 port 3 ether a3:b3:c3:d3:e3:f3 qinq 262 263 + +p 1 route ls diff --git a/examples/ip_pipeline/config/edge_router_upstream.cfg b/examples/ip_pipeline/config/edge_router_upstream.cfg new file mode 100644 index 00000000..a08c5cce --- /dev/null +++ b/examples/ip_pipeline/config/edge_router_upstream.cfg @@ -0,0 +1,110 @@ +; BSD LICENSE +; +; Copyright(c) 2015 Intel Corporation. All rights reserved. +; All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +; An edge router typically sits between two networks such as the provider +; core network and the provider access network. A typical packet processing +; pipeline for the upstream traffic (i.e. traffic from access to core +; network) contains the following functional blocks: Packet RX & Firewall, +; Flow classification, Metering, Routing and Packet TX. The input packets +; are assumed to be Q-in-Q IPv4, while the output packets are MPLS IPv4 +; (with variable number of labels per route). + +; A simple implementation for this functional pipeline is presented below. + +; Packet Rx & Pass-Through Flow-Classification Flow-Actions Routing +: Firewall +; __________ SWQ0 __________ SWQ4 __________ SWQ8 __________ SWQ12 __________ +; RXQ0.0 --->| |------>| |------>| |------>| |------>| |------> TXQ0.0 +; | | SWQ1 | | SWQ5 | | SWQ9 | | SWQ13 | | +; RXQ1.0 --->| |------>| |------>| |------>| |------>| |------> TXQ1.0 +; | (P1) | SWQ2 | (P2) | SWQ6 | (P3) | SWQ10 | (P4) | SWQ14 | (P5) | +; RXQ2.0 --->| |------>| |------>| |------>| |------>| |------> TXQ2.0 +; | | SWQ3 | | SWQ7 | | SWQ11 | | SWQ15 | | +; RXQ3.0 --->| |------>| |------>| |------>| |------>| |------> TXQ3.0 +; |__________| |__________| |__________| |__________| |__________| +; | | | +; +--> SINK0 (Default) +--> SINK1 (Default) +--> SINK2 (Route Miss) + + +[PIPELINE0] +type = MASTER +core = 0 + +[PIPELINE1] +type = FIREWALL +core = 1 +pktq_in = RXQ0.0 RXQ1.0 RXQ2.0 RXQ3.0 +pktq_out = SWQ0 SWQ1 SWQ2 SWQ3 SINK0 +n_rules = 4096 +pkt_type = qinq_ipv4 + +[PIPELINE2] +type = PASS-THROUGH +core = 2 +pktq_in = SWQ0 SWQ1 SWQ2 SWQ3 +pktq_out = SWQ4 SWQ5 SWQ6 SWQ7 +dma_size = 8 +dma_dst_offset = 128; mbuf (128) +dma_src_offset = 268; mbuf (128) + headroom (128) + 1st ethertype offset (12) = 268 +dma_src_mask = 00000FFF00000FFF; qinq +dma_hash_offset = 136; dma_dst_offset + dma_size = 136 + +[PIPELINE3] +type = FLOW_CLASSIFICATION +core = 2 +pktq_in = SWQ4 SWQ5 SWQ6 SWQ7 +pktq_out = SWQ8 SWQ9 SWQ10 SWQ11 SINK1 +n_flows = 65536 +key_size = 8; dma_size +key_offset = 128; dma_dst_offset +hash_offset = 136; dma_hash_offset +flowid_offset = 192; mbuf (128) + 64 + +[PIPELINE4] +type = FLOW_ACTIONS +core = 3 +pktq_in = SWQ8 SWQ9 SWQ10 SWQ11 +pktq_out = SWQ12 SWQ13 SWQ14 SWQ15 +n_flows = 65536 +n_meters_per_flow = 1 +flow_id_offset = 192; flowid_offset +ip_hdr_offset = 278; mbuf (128) + headroom (128) + ethernet (14) + qinq (8) = 278 +color_offset = 196; flowid_offset + sizeof(flow_id) + +[PIPELINE5] +type = ROUTING +core = 4 +pktq_in = SWQ12 SWQ13 SWQ14 SWQ15 +pktq_out = TXQ0.0 TXQ1.0 TXQ2.0 TXQ3.0 SINK2 +encap = ethernet_mpls +mpls_color_mark = yes +ip_hdr_offset = 278; mbuf (128) + headroom (128) + ethernet (14) + qinq (8) = 278 +color_offset = 196; flowid_offset + sizeof(flow_id) diff --git a/examples/ip_pipeline/config/edge_router_upstream.sh b/examples/ip_pipeline/config/edge_router_upstream.sh new file mode 100644 index 00000000..eeba600c --- /dev/null +++ b/examples/ip_pipeline/config/edge_router_upstream.sh @@ -0,0 +1,38 @@ +################################################ +# Firewall Rules:4 for 4 ports +################################################ +p 1 firewall add ipv4 1 0.0.0.0 8 0.0.0.0 10 0 0 0 0 6 1 0 +p 1 firewall add ipv4 1 0.0.0.0 8 0.64.0.0 10 0 0 0 0 6 1 1 +p 1 firewall add ipv4 1 0.0.0.0 8 0.128.0.0 10 0 0 0 0 6 1 2 +p 1 firewall add ipv4 1 0.0.0.0 8 0.192.0.0 10 0 0 0 0 6 1 3 +p 1 firewall add default 4 #SINK0 + + +################################################################################ +# Flow classification +################################################################################ +p 3 flow add default 4 #SINK1 +p 3 flow add qinq all 65536 4 + +################################################################################ +# Flow Actions - Metering +################################################################################ +p 4 flows 65536 meter 0 trtcm 1250000000 1250000000 100000000 100000000 +p 4 flows 65536 ports 4 + +################################################################################ +# Routing: Ether MPLS, ARP off +################################################################################ +p 5 route add default 4 #SINK2 +p 5 route add 0.0.0.0 10 port 0 ether a0:b0:c0:d0:e0:f0 mpls 0:1 +p 5 route add 0.64.0.0 10 port 1 ether a1:b1:c1:d1:e1:f1 mpls 10:11 +p 5 route add 0.128.0.0 10 port 2 ether a2:b2:c2:d2:e2:f2 mpls 20:21 +p 5 route add 0.192.0.0 10 port 3 ether a3:b3:c3:d3:e3:f3 mpls 30:31 + +################################################################################ +# List all configurations +################################################################################ +p 1 firewall ls +#p 3 flow ls +#p 4 flow actions ls +p 5 route ls diff --git a/examples/ip_pipeline/config/ip_pipeline.cfg b/examples/ip_pipeline/config/ip_pipeline.cfg new file mode 100644 index 00000000..095ed25e --- /dev/null +++ b/examples/ip_pipeline/config/ip_pipeline.cfg @@ -0,0 +1,9 @@ +[PIPELINE0] +type = MASTER +core = 0 + +[PIPELINE1] +type = PASS-THROUGH +core = 1 +pktq_in = RXQ0.0 RXQ1.0 RXQ2.0 RXQ3.0 +pktq_out = TXQ0.0 TXQ1.0 TXQ2.0 TXQ3.0 diff --git a/examples/ip_pipeline/config/ip_pipeline.sh b/examples/ip_pipeline/config/ip_pipeline.sh new file mode 100644 index 00000000..4fca2597 --- /dev/null +++ b/examples/ip_pipeline/config/ip_pipeline.sh @@ -0,0 +1,5 @@ +# +#run config/ip_pipeline.sh +# + +p 1 ping diff --git a/examples/ip_pipeline/config/l2fwd.cfg b/examples/ip_pipeline/config/l2fwd.cfg new file mode 100644 index 00000000..c743a143 --- /dev/null +++ b/examples/ip_pipeline/config/l2fwd.cfg @@ -0,0 +1,55 @@ +; BSD LICENSE +; +; Copyright(c) 2015 Intel Corporation. All rights reserved. +; All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +; +; The pass-through pipeline below connects the input ports to the output ports +; as follows: RXQ0.0 -> TXQ1.0, RXQ1.0 -> TXQ0.0, RXQ2.0 -> TXQ3.0 and +; RXQ3.0 -> TXQ2.0. +; ________________ +; RXQ0.0 --->|................|---> TXQ1.0 +; | | +; RXQ1.0 --->|................|---> TXQ0.0 +; | Pass-through | +; RXQ2.0 --->|................|---> TXQ3.0 +; | | +; RXQ3.0 --->|................|---> TXQ2.0 +; |________________| +; + +[PIPELINE0] +type = MASTER +core = 0 + +[PIPELINE1] +type = PASS-THROUGH +core = 1 +pktq_in = RXQ0.0 RXQ1.0 RXQ2.0 RXQ3.0 +pktq_out = TXQ1.0 TXQ0.0 TXQ3.0 TXQ2.0 diff --git a/examples/ip_pipeline/config/l3fwd.cfg b/examples/ip_pipeline/config/l3fwd.cfg new file mode 100644 index 00000000..5449dc32 --- /dev/null +++ b/examples/ip_pipeline/config/l3fwd.cfg @@ -0,0 +1,63 @@ +; BSD LICENSE +; +; Copyright(c) 2015 Intel Corporation. All rights reserved. +; All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +; _______________ +; RXQ0.0 --->| |---> TXQ0.0 +; | | +; RXQ1.0 --->| |---> TXQ1.0 +; | Routing | +; RXQ2.0 --->| |---> TXQ2.0 +; | | +; RXQ3.0 --->| |---> TXQ3.0 +; |_______________| +; | +; +-----------> SINK0 (route miss) +; +; Input packet: Ethernet/IPv4 +; +; Packet buffer layout: +; # Field Name Offset (Bytes) Size (Bytes) +; 0 Mbuf 0 128 +; 1 Headroom 128 128 +; 2 Ethernet header 256 14 +; 3 IPv4 header 270 20 + +[PIPELINE0] +type = MASTER +core = 0 + +[PIPELINE1] +type = ROUTING +core = 1 +pktq_in = RXQ0.0 RXQ1.0 RXQ2.0 RXQ3.0 +pktq_out = TXQ0.0 TXQ1.0 TXQ2.0 TXQ3.0 SINK0 +encap = ethernet; encap = ethernet / ethernet_qinq / ethernet_mpls +ip_hdr_offset = 270 diff --git a/examples/ip_pipeline/config/l3fwd.sh b/examples/ip_pipeline/config/l3fwd.sh new file mode 100644 index 00000000..27740103 --- /dev/null +++ b/examples/ip_pipeline/config/l3fwd.sh @@ -0,0 +1,9 @@ +################################################################################ +# Routing: encap = ethernet, arp = off +################################################################################ +p 1 route add default 4 #SINK0 +p 1 route add 0.0.0.0 10 port 0 ether a0:b0:c0:d0:e0:f0 +p 1 route add 0.64.0.0 10 port 1 ether a1:b1:c1:d1:e1:f1 +p 1 route add 0.128.0.0 10 port 2 ether a2:b2:c2:d2:e2:f2 +p 1 route add 0.192.0.0 10 port 3 ether a3:b3:c3:d3:e3:f3 +p 1 route ls diff --git a/examples/ip_pipeline/config/tm_profile.cfg b/examples/ip_pipeline/config/tm_profile.cfg new file mode 100644 index 00000000..2dfb215e --- /dev/null +++ b/examples/ip_pipeline/config/tm_profile.cfg @@ -0,0 +1,105 @@ +; BSD LICENSE +; +; Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +; All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +; This file enables the following hierarchical scheduler configuration for each +; 10GbE output port: +; * Single subport (subport 0): +; - Subport rate set to 100% of port rate +; - Each of the 4 traffic classes has rate set to 100% of port rate +; * 4K pipes per subport 0 (pipes 0 .. 4095) with identical configuration: +; - Pipe rate set to 1/4K of port rate +; - Each of the 4 traffic classes has rate set to 100% of pipe rate +; - Within each traffic class, the byte-level WRR weights for the 4 queues +; are set to 1:1:1:1 +; +; For more details, please refer to chapter "Quality of Service (QoS) Framework" +; of Data Plane Development Kit (DPDK) Programmer's Guide. + +; Port configuration +[port] +frame overhead = 24 ; frame overhead = Preamble (7) + SFD (1) + FCS (4) + IFG (12) +mtu = 1522; mtu = Q-in-Q MTU (FCS not included) +number of subports per port = 1 +number of pipes per subport = 4096 +queue sizes = 64 64 64 64 + +; Subport configuration +[subport 0] +tb rate = 1250000000 ; Bytes per second +tb size = 1000000 ; Bytes + +tc 0 rate = 1250000000 ; Bytes per second +tc 1 rate = 1250000000 ; Bytes per second +tc 2 rate = 1250000000 ; Bytes per second +tc 3 rate = 1250000000 ; Bytes per second +tc period = 10 ; Milliseconds + +pipe 0-4095 = 0 ; These pipes are configured with pipe profile 0 + +; Pipe configuration +[pipe profile 0] +tb rate = 305175 ; Bytes per second +tb size = 1000000 ; Bytes + +tc 0 rate = 305175 ; Bytes per second +tc 1 rate = 305175 ; Bytes per second +tc 2 rate = 305175 ; Bytes per second +tc 3 rate = 305175 ; Bytes per second +tc period = 40 ; Milliseconds + +tc 3 oversubscription weight = 1 + +tc 0 wrr weights = 1 1 1 1 +tc 1 wrr weights = 1 1 1 1 +tc 2 wrr weights = 1 1 1 1 +tc 3 wrr weights = 1 1 1 1 + +; RED params per traffic class and color (Green / Yellow / Red) +[red] +tc 0 wred min = 48 40 32 +tc 0 wred max = 64 64 64 +tc 0 wred inv prob = 10 10 10 +tc 0 wred weight = 9 9 9 + +tc 1 wred min = 48 40 32 +tc 1 wred max = 64 64 64 +tc 1 wred inv prob = 10 10 10 +tc 1 wred weight = 9 9 9 + +tc 2 wred min = 48 40 32 +tc 2 wred max = 64 64 64 +tc 2 wred inv prob = 10 10 10 +tc 2 wred weight = 9 9 9 + +tc 3 wred min = 48 40 32 +tc 3 wred max = 64 64 64 +tc 3 wred inv prob = 10 10 10 +tc 3 wred weight = 9 9 9 diff --git a/examples/ip_pipeline/config_check.c b/examples/ip_pipeline/config_check.c new file mode 100644 index 00000000..fd9ff495 --- /dev/null +++ b/examples/ip_pipeline/config_check.c @@ -0,0 +1,444 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdio.h> + +#include <rte_ip.h> + +#include "app.h" + +static void +check_mempools(struct app_params *app) +{ + uint32_t i; + + for (i = 0; i < app->n_mempools; i++) { + struct app_mempool_params *p = &app->mempool_params[i]; + + APP_CHECK((p->pool_size > 0), + "Mempool %s size is 0\n", p->name); + + APP_CHECK((p->cache_size > 0), + "Mempool %s cache size is 0\n", p->name); + + APP_CHECK(rte_is_power_of_2(p->cache_size), + "Mempool %s cache size not a power of 2\n", p->name); + } +} + +static void +check_links(struct app_params *app) +{ + uint32_t i; + + /* Check that number of links matches the port mask */ + if (app->port_mask) { + uint32_t n_links_port_mask = + __builtin_popcountll(app->port_mask); + + APP_CHECK((app->n_links == n_links_port_mask), + "Not enough links provided in the PORT_MASK\n"); + } + + for (i = 0; i < app->n_links; i++) { + struct app_link_params *link = &app->link_params[i]; + uint32_t rxq_max, n_rxq, n_txq, link_id, i; + + APP_PARAM_GET_ID(link, "LINK", link_id); + + /* Check that link RXQs are contiguous */ + rxq_max = 0; + if (link->arp_q > rxq_max) + rxq_max = link->arp_q; + if (link->tcp_syn_q > rxq_max) + rxq_max = link->tcp_syn_q; + if (link->ip_local_q > rxq_max) + rxq_max = link->ip_local_q; + if (link->tcp_local_q > rxq_max) + rxq_max = link->tcp_local_q; + if (link->udp_local_q > rxq_max) + rxq_max = link->udp_local_q; + if (link->sctp_local_q > rxq_max) + rxq_max = link->sctp_local_q; + + for (i = 1; i <= rxq_max; i++) + APP_CHECK(((link->arp_q == i) || + (link->tcp_syn_q == i) || + (link->ip_local_q == i) || + (link->tcp_local_q == i) || + (link->udp_local_q == i) || + (link->sctp_local_q == i)), + "%s RXQs are not contiguous (A)\n", link->name); + + n_rxq = app_link_get_n_rxq(app, link); + + APP_CHECK((n_rxq), "%s does not have any RXQ\n", link->name); + + APP_CHECK((n_rxq == rxq_max + 1), + "%s RXQs are not contiguous (B)\n", link->name); + + for (i = 0; i < n_rxq; i++) { + char name[APP_PARAM_NAME_SIZE]; + int pos; + + sprintf(name, "RXQ%" PRIu32 ".%" PRIu32, + link_id, i); + pos = APP_PARAM_FIND(app->hwq_in_params, name); + APP_CHECK((pos >= 0), + "%s RXQs are not contiguous (C)\n", link->name); + } + + /* Check that link RXQs are contiguous */ + n_txq = app_link_get_n_txq(app, link); + + APP_CHECK((n_txq), "%s does not have any TXQ\n", link->name); + + for (i = 0; i < n_txq; i++) { + char name[APP_PARAM_NAME_SIZE]; + int pos; + + sprintf(name, "TXQ%" PRIu32 ".%" PRIu32, + link_id, i); + pos = APP_PARAM_FIND(app->hwq_out_params, name); + APP_CHECK((pos >= 0), + "%s TXQs are not contiguous\n", link->name); + } + } +} + +static void +check_rxqs(struct app_params *app) +{ + uint32_t i; + + for (i = 0; i < app->n_pktq_hwq_in; i++) { + struct app_pktq_hwq_in_params *p = &app->hwq_in_params[i]; + uint32_t n_readers = app_rxq_get_readers(app, p); + + APP_CHECK((p->size > 0), + "%s size is 0\n", p->name); + + APP_CHECK((rte_is_power_of_2(p->size)), + "%s size is not a power of 2\n", p->name); + + APP_CHECK((p->burst > 0), + "%s burst size is 0\n", p->name); + + APP_CHECK((p->burst <= p->size), + "%s burst size is bigger than its size\n", p->name); + + APP_CHECK((n_readers != 0), + "%s has no reader\n", p->name); + + APP_CHECK((n_readers == 1), + "%s has more than one reader\n", p->name); + } +} + +static void +check_txqs(struct app_params *app) +{ + uint32_t i; + + for (i = 0; i < app->n_pktq_hwq_out; i++) { + struct app_pktq_hwq_out_params *p = &app->hwq_out_params[i]; + uint32_t n_writers = app_txq_get_writers(app, p); + + APP_CHECK((p->size > 0), + "%s size is 0\n", p->name); + + APP_CHECK((rte_is_power_of_2(p->size)), + "%s size is not a power of 2\n", p->name); + + APP_CHECK((p->burst > 0), + "%s burst size is 0\n", p->name); + + APP_CHECK((p->burst <= p->size), + "%s burst size is bigger than its size\n", p->name); + + APP_CHECK((n_writers != 0), + "%s has no writer\n", p->name); + + APP_CHECK((n_writers == 1), + "%s has more than one writer\n", p->name); + } +} + +static void +check_swqs(struct app_params *app) +{ + uint32_t i; + + for (i = 0; i < app->n_pktq_swq; i++) { + struct app_pktq_swq_params *p = &app->swq_params[i]; + uint32_t n_readers = app_swq_get_readers(app, p); + uint32_t n_writers = app_swq_get_writers(app, p); + uint32_t n_flags; + + APP_CHECK((p->size > 0), + "%s size is 0\n", p->name); + + APP_CHECK((rte_is_power_of_2(p->size)), + "%s size is not a power of 2\n", p->name); + + APP_CHECK((p->burst_read > 0), + "%s read burst size is 0\n", p->name); + + APP_CHECK((p->burst_read <= p->size), + "%s read burst size is bigger than its size\n", + p->name); + + APP_CHECK((p->burst_write > 0), + "%s write burst size is 0\n", p->name); + + APP_CHECK((p->burst_write <= p->size), + "%s write burst size is bigger than its size\n", + p->name); + + APP_CHECK((n_readers != 0), + "%s has no reader\n", p->name); + + if (n_readers > 1) + APP_LOG(app, LOW, "%s has more than one reader", p->name); + + APP_CHECK((n_writers != 0), + "%s has no writer\n", p->name); + + if (n_writers > 1) + APP_LOG(app, LOW, "%s has more than one writer", p->name); + + n_flags = p->ipv4_frag + p->ipv6_frag + p->ipv4_ras + p->ipv6_ras; + + APP_CHECK((n_flags < 2), + "%s has more than one fragmentation or reassembly mode enabled\n", + p->name); + + APP_CHECK((!((n_readers > 1) && (n_flags == 1))), + "%s has more than one reader when fragmentation or reassembly" + " mode enabled\n", + p->name); + + APP_CHECK((!((n_writers > 1) && (n_flags == 1))), + "%s has more than one writer when fragmentation or reassembly" + " mode enabled\n", + p->name); + + n_flags = p->ipv4_ras + p->ipv6_ras; + + APP_CHECK((!((p->dropless == 1) && (n_flags == 1))), + "%s has dropless when reassembly mode enabled\n", p->name); + + n_flags = p->ipv4_frag + p->ipv6_frag; + + if (n_flags == 1) { + uint16_t ip_hdr_size = (p->ipv4_frag) ? sizeof(struct ipv4_hdr) : + sizeof(struct ipv6_hdr); + + APP_CHECK((p->mtu > ip_hdr_size), + "%s mtu size is smaller than ip header\n", p->name); + + APP_CHECK((!((p->mtu - ip_hdr_size) % 8)), + "%s mtu size is incorrect\n", p->name); + } + } +} + +static void +check_tms(struct app_params *app) +{ + uint32_t i; + + for (i = 0; i < app->n_pktq_tm; i++) { + struct app_pktq_tm_params *p = &app->tm_params[i]; + uint32_t n_readers = app_tm_get_readers(app, p); + uint32_t n_writers = app_tm_get_writers(app, p); + + APP_CHECK((n_readers != 0), + "%s has no reader\n", p->name); + + APP_CHECK((n_readers == 1), + "%s has more than one reader\n", p->name); + + APP_CHECK((n_writers != 0), + "%s has no writer\n", p->name); + + APP_CHECK((n_writers == 1), + "%s has more than one writer\n", p->name); + } +} + +static void +check_sources(struct app_params *app) +{ + uint32_t i; + + for (i = 0; i < app->n_pktq_source; i++) { + struct app_pktq_source_params *p = &app->source_params[i]; + uint32_t n_readers = app_source_get_readers(app, p); + + APP_CHECK((n_readers != 0), + "%s has no reader\n", p->name); + + APP_CHECK((n_readers == 1), + "%s has more than one reader\n", p->name); + } +} + +static void +check_sinks(struct app_params *app) +{ + uint32_t i; + + for (i = 0; i < app->n_pktq_sink; i++) { + struct app_pktq_sink_params *p = &app->sink_params[i]; + uint32_t n_writers = app_sink_get_writers(app, p); + + APP_CHECK((n_writers != 0), + "%s has no writer\n", p->name); + + APP_CHECK((n_writers == 1), + "%s has more than one writer\n", p->name); + } +} + +static void +check_msgqs(struct app_params *app) +{ + uint32_t i; + + for (i = 0; i < app->n_msgq; i++) { + struct app_msgq_params *p = &app->msgq_params[i]; + uint32_t n_readers = app_msgq_get_readers(app, p); + uint32_t n_writers = app_msgq_get_writers(app, p); + uint32_t msgq_req_pipeline, msgq_rsp_pipeline; + uint32_t msgq_req_core, msgq_rsp_core; + + APP_CHECK((p->size > 0), + "%s size is 0\n", p->name); + + APP_CHECK((rte_is_power_of_2(p->size)), + "%s size is not a power of 2\n", p->name); + + msgq_req_pipeline = (strncmp(p->name, "MSGQ-REQ-PIPELINE", + strlen("MSGQ-REQ-PIPELINE")) == 0); + + msgq_rsp_pipeline = (strncmp(p->name, "MSGQ-RSP-PIPELINE", + strlen("MSGQ-RSP-PIPELINE")) == 0); + + msgq_req_core = (strncmp(p->name, "MSGQ-REQ-CORE", + strlen("MSGQ-REQ-CORE")) == 0); + + msgq_rsp_core = (strncmp(p->name, "MSGQ-RSP-CORE", + strlen("MSGQ-RSP-CORE")) == 0); + + if ((msgq_req_pipeline == 0) && + (msgq_rsp_pipeline == 0) && + (msgq_req_core == 0) && + (msgq_rsp_core == 0)) { + APP_CHECK((n_readers != 0), + "%s has no reader\n", p->name); + + APP_CHECK((n_readers == 1), + "%s has more than one reader\n", p->name); + + APP_CHECK((n_writers != 0), + "%s has no writer\n", p->name); + + APP_CHECK((n_writers == 1), + "%s has more than one writer\n", p->name); + } + + if (msgq_req_pipeline) { + struct app_pipeline_params *pipeline; + uint32_t pipeline_id; + + APP_PARAM_GET_ID(p, "MSGQ-REQ-PIPELINE", pipeline_id); + + APP_PARAM_FIND_BY_ID(app->pipeline_params, + "PIPELINE", + pipeline_id, + pipeline); + + APP_CHECK((pipeline != NULL), + "%s is not associated with a valid pipeline\n", + p->name); + } + + if (msgq_rsp_pipeline) { + struct app_pipeline_params *pipeline; + uint32_t pipeline_id; + + APP_PARAM_GET_ID(p, "MSGQ-RSP-PIPELINE", pipeline_id); + + APP_PARAM_FIND_BY_ID(app->pipeline_params, + "PIPELINE", + pipeline_id, + pipeline); + + APP_CHECK((pipeline != NULL), + "%s is not associated with a valid pipeline\n", + p->name); + } + } +} + +static void +check_pipelines(struct app_params *app) +{ + uint32_t i; + + for (i = 0; i < app->n_pipelines; i++) { + struct app_pipeline_params *p = &app->pipeline_params[i]; + + APP_CHECK((p->n_msgq_in == p->n_msgq_out), + "%s number of input MSGQs does not match " + "the number of output MSGQs\n", p->name); + } +} + +int +app_config_check(struct app_params *app) +{ + check_mempools(app); + check_links(app); + check_rxqs(app); + check_txqs(app); + check_swqs(app); + check_tms(app); + check_sources(app); + check_sinks(app); + check_msgqs(app); + check_pipelines(app); + + return 0; +} diff --git a/examples/ip_pipeline/config_parse.c b/examples/ip_pipeline/config_parse.c new file mode 100644 index 00000000..e5efd03e --- /dev/null +++ b/examples/ip_pipeline/config_parse.c @@ -0,0 +1,3383 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include <stdint.h> +#include <stdlib.h> +#include <stdio.h> +#include <ctype.h> +#include <getopt.h> +#include <errno.h> +#include <stdarg.h> +#include <string.h> +#include <libgen.h> +#include <unistd.h> +#include <sys/wait.h> + +#include <rte_errno.h> +#include <rte_cfgfile.h> +#include <rte_string_fns.h> + +#include "app.h" +#include "parser.h" + +/** + * Default config values + **/ + +static struct app_params app_params_default = { + .config_file = "./config/ip_pipeline.cfg", + .log_level = APP_LOG_LEVEL_HIGH, + .port_mask = 0, + + .eal_params = { + .channels = 4, + }, +}; + +static const struct app_mempool_params mempool_params_default = { + .parsed = 0, + .buffer_size = 2048 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM, + .pool_size = 32 * 1024, + .cache_size = 256, + .cpu_socket_id = 0, +}; + +static const struct app_link_params link_params_default = { + .parsed = 0, + .pmd_id = 0, + .arp_q = 0, + .tcp_syn_q = 0, + .ip_local_q = 0, + .tcp_local_q = 0, + .udp_local_q = 0, + .sctp_local_q = 0, + .state = 0, + .ip = 0, + .depth = 0, + .mac_addr = 0, + .pci_bdf = {0}, + + .conf = { + .link_speeds = 0, + .rxmode = { + .mq_mode = ETH_MQ_RX_NONE, + + .header_split = 0, /* Header split */ + .hw_ip_checksum = 0, /* IP checksum offload */ + .hw_vlan_filter = 0, /* VLAN filtering */ + .hw_vlan_strip = 0, /* VLAN strip */ + .hw_vlan_extend = 0, /* Extended VLAN */ + .jumbo_frame = 0, /* Jumbo frame support */ + .hw_strip_crc = 0, /* CRC strip by HW */ + .enable_scatter = 0, /* Scattered packets RX handler */ + + .max_rx_pkt_len = 9000, /* Jumbo frame max packet len */ + .split_hdr_size = 0, /* Header split buffer size */ + }, + .txmode = { + .mq_mode = ETH_MQ_TX_NONE, + }, + .lpbk_mode = 0, + }, + + .promisc = 1, +}; + +static const struct app_pktq_hwq_in_params default_hwq_in_params = { + .parsed = 0, + .mempool_id = 0, + .size = 128, + .burst = 32, + + .conf = { + .rx_thresh = { + .pthresh = 8, + .hthresh = 8, + .wthresh = 4, + }, + .rx_free_thresh = 64, + .rx_drop_en = 0, + .rx_deferred_start = 0, + } +}; + +static const struct app_pktq_hwq_out_params default_hwq_out_params = { + .parsed = 0, + .size = 512, + .burst = 32, + .dropless = 0, + .n_retries = 0, + + .conf = { + .tx_thresh = { + .pthresh = 36, + .hthresh = 0, + .wthresh = 0, + }, + .tx_rs_thresh = 0, + .tx_free_thresh = 0, + .txq_flags = ETH_TXQ_FLAGS_NOMULTSEGS | + ETH_TXQ_FLAGS_NOOFFLOADS, + .tx_deferred_start = 0, + } +}; + +static const struct app_pktq_swq_params default_swq_params = { + .parsed = 0, + .size = 256, + .burst_read = 32, + .burst_write = 32, + .dropless = 0, + .n_retries = 0, + .cpu_socket_id = 0, + .ipv4_frag = 0, + .ipv6_frag = 0, + .ipv4_ras = 0, + .ipv6_ras = 0, + .mtu = 0, + .metadata_size = 0, + .mempool_direct_id = 0, + .mempool_indirect_id = 0, +}; + +struct app_pktq_tm_params default_tm_params = { + .parsed = 0, + .file_name = "./config/tm_profile.cfg", + .burst_read = 64, + .burst_write = 32, +}; + +struct app_pktq_source_params default_source_params = { + .parsed = 0, + .mempool_id = 0, + .burst = 32, + .file_name = NULL, + .n_bytes_per_pkt = 0, +}; + +struct app_pktq_sink_params default_sink_params = { + .parsed = 0, + .file_name = NULL, + .n_pkts_to_dump = 0, +}; + +struct app_msgq_params default_msgq_params = { + .parsed = 0, + .size = 64, + .cpu_socket_id = 0, +}; + +struct app_pipeline_params default_pipeline_params = { + .parsed = 0, + .socket_id = 0, + .core_id = 0, + .hyper_th_id = 0, + .n_pktq_in = 0, + .n_pktq_out = 0, + .n_msgq_in = 0, + .n_msgq_out = 0, + .timer_period = 1, + .n_args = 0, +}; + +static const char app_usage[] = + "Usage: %s [-f CONFIG_FILE] [-s SCRIPT_FILE] [-p PORT_MASK] " + "[-l LOG_LEVEL] [--preproc PREPROCESSOR] [--preproc-args ARGS]\n" + "\n" + "Arguments:\n" + "\t-f CONFIG_FILE: Default config file is %s\n" + "\t-p PORT_MASK: Mask of NIC port IDs in hex format (generated from " + "config file when not provided)\n" + "\t-s SCRIPT_FILE: No CLI script file is run when not specified\n" + "\t-l LOG_LEVEL: 0 = NONE, 1 = HIGH PRIO (default), 2 = LOW PRIO\n" + "\t--preproc PREPROCESSOR: Configuration file pre-processor\n" + "\t--preproc-args ARGS: Arguments to be passed to pre-processor\n" + "\n"; + +static void +app_print_usage(char *prgname) +{ + rte_exit(0, app_usage, prgname, app_params_default.config_file); +} + +#define skip_white_spaces(pos) \ +({ \ + __typeof__(pos) _p = (pos); \ + for ( ; isspace(*_p); _p++); \ + _p; \ +}) + +#define PARSER_PARAM_ADD_CHECK(result, params_array, section_name) \ +do { \ + APP_CHECK((result != -EINVAL), \ + "Parse error: no free memory"); \ + APP_CHECK((result != -ENOMEM), \ + "Parse error: too many \"%s\" sections", section_name); \ + APP_CHECK(((result >= 0) && (params_array)[result].parsed == 0),\ + "Parse error: duplicate \"%s\" section", section_name); \ + APP_CHECK((result >= 0), \ + "Parse error in section \"%s\"", section_name); \ +} while (0) + +int +parser_read_arg_bool(const char *p) +{ + p = skip_white_spaces(p); + int result = -EINVAL; + + if (((p[0] == 'y') && (p[1] == 'e') && (p[2] == 's')) || + ((p[0] == 'Y') && (p[1] == 'E') && (p[2] == 'S'))) { + p += 3; + result = 1; + } + + if (((p[0] == 'o') && (p[1] == 'n')) || + ((p[0] == 'O') && (p[1] == 'N'))) { + p += 2; + result = 1; + } + + if (((p[0] == 'n') && (p[1] == 'o')) || + ((p[0] == 'N') && (p[1] == 'O'))) { + p += 2; + result = 0; + } + + if (((p[0] == 'o') && (p[1] == 'f') && (p[2] == 'f')) || + ((p[0] == 'O') && (p[1] == 'F') && (p[2] == 'F'))) { + p += 3; + result = 0; + } + + p = skip_white_spaces(p); + + if (p[0] != '\0') + return -EINVAL; + + return result; +} + +#define PARSE_ERROR(exp, section, entry) \ +APP_CHECK(exp, "Parse error in section \"%s\": entry \"%s\"\n", section, entry) + +#define PARSE_ERROR_MESSAGE(exp, section, entry, message) \ +APP_CHECK(exp, "Parse error in section \"%s\", entry \"%s\": %s\n", \ + section, entry, message) + + +#define PARSE_ERROR_MALLOC(exp) \ +APP_CHECK(exp, "Parse error: no free memory\n") + +#define PARSE_ERROR_SECTION(exp, section) \ +APP_CHECK(exp, "Parse error in section \"%s\"", section) + +#define PARSE_ERROR_SECTION_NO_ENTRIES(exp, section) \ +APP_CHECK(exp, "Parse error in section \"%s\": no entries\n", section) + +#define PARSE_WARNING_IGNORED(exp, section, entry) \ +do \ +if (!(exp)) \ + fprintf(stderr, "Parse warning in section \"%s\": " \ + "entry \"%s\" is ignored\n", section, entry); \ +while (0) + +#define PARSE_ERROR_INVALID(exp, section, entry) \ +APP_CHECK(exp, "Parse error in section \"%s\": unrecognized entry \"%s\"\n",\ + section, entry) + +#define PARSE_ERROR_DUPLICATE(exp, section, entry) \ +APP_CHECK(exp, "Parse error in section \"%s\": duplicate entry \"%s\"\n",\ + section, entry) + +int +parser_read_uint64(uint64_t *value, const char *p) +{ + char *next; + uint64_t val; + + p = skip_white_spaces(p); + if (!isdigit(*p)) + return -EINVAL; + + val = strtoul(p, &next, 10); + if (p == next) + return -EINVAL; + + p = next; + switch (*p) { + case 'T': + val *= 1024ULL; + /* fall through */ + case 'G': + val *= 1024ULL; + /* fall through */ + case 'M': + val *= 1024ULL; + /* fall through */ + case 'k': + case 'K': + val *= 1024ULL; + p++; + break; + } + + p = skip_white_spaces(p); + if (*p != '\0') + return -EINVAL; + + *value = val; + return 0; +} + +int +parser_read_uint32(uint32_t *value, const char *p) +{ + uint64_t val = 0; + int ret = parser_read_uint64(&val, p); + + if (ret < 0) + return ret; + + if (val > UINT32_MAX) + return -ERANGE; + + *value = val; + return 0; +} + +int +parse_pipeline_core(uint32_t *socket, + uint32_t *core, + uint32_t *ht, + const char *entry) +{ + size_t num_len; + char num[8]; + + uint32_t s = 0, c = 0, h = 0, val; + uint8_t s_parsed = 0, c_parsed = 0, h_parsed = 0; + const char *next = skip_white_spaces(entry); + char type; + + /* Expect <CORE> or [sX][cY][h]. At least one parameter is required. */ + while (*next != '\0') { + /* If everything parsed nothing should left */ + if (s_parsed && c_parsed && h_parsed) + return -EINVAL; + + type = *next; + switch (type) { + case 's': + case 'S': + if (s_parsed || c_parsed || h_parsed) + return -EINVAL; + s_parsed = 1; + next++; + break; + case 'c': + case 'C': + if (c_parsed || h_parsed) + return -EINVAL; + c_parsed = 1; + next++; + break; + case 'h': + case 'H': + if (h_parsed) + return -EINVAL; + h_parsed = 1; + next++; + break; + default: + /* If it start from digit it must be only core id. */ + if (!isdigit(*next) || s_parsed || c_parsed || h_parsed) + return -EINVAL; + + type = 'C'; + } + + for (num_len = 0; *next != '\0'; next++, num_len++) { + if (num_len == RTE_DIM(num)) + return -EINVAL; + + if (!isdigit(*next)) + break; + + num[num_len] = *next; + } + + if (num_len == 0 && type != 'h' && type != 'H') + return -EINVAL; + + if (num_len != 0 && (type == 'h' || type == 'H')) + return -EINVAL; + + num[num_len] = '\0'; + val = strtol(num, NULL, 10); + + h = 0; + switch (type) { + case 's': + case 'S': + s = val; + break; + case 'c': + case 'C': + c = val; + break; + case 'h': + case 'H': + h = 1; + break; + } + } + + *socket = s; + *core = c; + *ht = h; + return 0; +} + +static uint32_t +get_hex_val(char c) +{ + switch (c) { + case '0': case '1': case '2': case '3': case '4': case '5': + case '6': case '7': case '8': case '9': + return c - '0'; + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + return c - 'A' + 10; + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + return c - 'a' + 10; + default: + return 0; + } +} + +int +parse_hex_string(char *src, uint8_t *dst, uint32_t *size) +{ + char *c; + uint32_t len, i; + + /* Check input parameters */ + if ((src == NULL) || + (dst == NULL) || + (size == NULL) || + (*size == 0)) + return -1; + + len = strlen(src); + if (((len & 3) != 0) || + (len > (*size) * 2)) + return -1; + *size = len / 2; + + for (c = src; *c != 0; c++) { + if ((((*c) >= '0') && ((*c) <= '9')) || + (((*c) >= 'A') && ((*c) <= 'F')) || + (((*c) >= 'a') && ((*c) <= 'f'))) + continue; + + return -1; + } + + /* Convert chars to bytes */ + for (i = 0; i < *size; i++) + dst[i] = get_hex_val(src[2 * i]) * 16 + + get_hex_val(src[2 * i + 1]); + + return 0; +} + +static size_t +skip_digits(const char *src) +{ + size_t i; + + for (i = 0; isdigit(src[i]); i++); + + return i; +} + +static int +validate_name(const char *name, const char *prefix, int num) +{ + size_t i, j; + + for (i = 0; (name[i] != '\0') && (prefix[i] != '\0'); i++) { + if (name[i] != prefix[i]) + return -1; + } + + if (prefix[i] != '\0') + return -1; + + if (!num) { + if (name[i] != '\0') + return -1; + else + return 0; + } + + if (num == 2) { + j = skip_digits(&name[i]); + i += j; + if ((j == 0) || (name[i] != '.')) + return -1; + i++; + } + + if (num == 1) { + j = skip_digits(&name[i]); + i += j; + if ((j == 0) || (name[i] != '\0')) + return -1; + } + + return 0; +} + +static void +parse_eal(struct app_params *app, + const char *section_name, + struct rte_cfgfile *cfg) +{ + struct app_eal_params *p = &app->eal_params; + struct rte_cfgfile_entry *entries; + int n_entries, i; + + n_entries = rte_cfgfile_section_num_entries(cfg, section_name); + PARSE_ERROR_SECTION_NO_ENTRIES((n_entries > 0), section_name); + + entries = malloc(n_entries * sizeof(struct rte_cfgfile_entry)); + PARSE_ERROR_MALLOC(entries != NULL); + + rte_cfgfile_section_entries(cfg, section_name, entries, n_entries); + + for (i = 0; i < n_entries; i++) { + struct rte_cfgfile_entry *entry = &entries[i]; + + /* coremask */ + if (strcmp(entry->name, "c") == 0) { + PARSE_WARNING_IGNORED(0, section_name, entry->name); + continue; + } + + /* corelist */ + if (strcmp(entry->name, "l") == 0) { + PARSE_WARNING_IGNORED(0, section_name, entry->name); + continue; + } + + /* coremap */ + if (strcmp(entry->name, "lcores") == 0) { + PARSE_ERROR_DUPLICATE((p->coremap == NULL), + section_name, + entry->name); + p->coremap = strdup(entry->value); + continue; + } + + /* master_lcore */ + if (strcmp(entry->name, "master_lcore") == 0) { + int status; + + PARSE_ERROR_DUPLICATE((p->master_lcore_present == 0), + section_name, + entry->name); + p->master_lcore_present = 1; + + status = parser_read_uint32(&p->master_lcore, + entry->value); + PARSE_ERROR((status == 0), section_name, entry->name); + continue; + } + + /* channels */ + if (strcmp(entry->name, "n") == 0) { + int status; + + PARSE_ERROR_DUPLICATE((p->channels_present == 0), + section_name, + entry->name); + p->channels_present = 1; + + status = parser_read_uint32(&p->channels, entry->value); + PARSE_ERROR((status == 0), section_name, entry->name); + continue; + } + + /* memory */ + if (strcmp(entry->name, "m") == 0) { + int status; + + PARSE_ERROR_DUPLICATE((p->memory_present == 0), + section_name, + entry->name); + p->memory_present = 1; + + status = parser_read_uint32(&p->memory, entry->value); + PARSE_ERROR((status == 0), section_name, entry->name); + continue; + } + + /* ranks */ + if (strcmp(entry->name, "r") == 0) { + int status; + + PARSE_ERROR_DUPLICATE((p->ranks_present == 0), + section_name, + entry->name); + p->ranks_present = 1; + + status = parser_read_uint32(&p->ranks, entry->value); + PARSE_ERROR((status == 0), section_name, entry->name); + continue; + } + + /* pci_blacklist */ + if ((strcmp(entry->name, "pci_blacklist") == 0) || + (strcmp(entry->name, "b") == 0)) { + uint32_t i; + + for (i = 0; i < APP_MAX_LINKS; i++) { + if (p->pci_blacklist[i]) + continue; + + p->pci_blacklist[i] = + strdup(entry->value); + PARSE_ERROR_MALLOC(p->pci_blacklist[i]); + + break; + } + + PARSE_ERROR_MESSAGE((i < APP_MAX_LINKS), + section_name, entry->name, + "too many elements"); + continue; + } + + /* pci_whitelist */ + if ((strcmp(entry->name, "pci_whitelist") == 0) || + (strcmp(entry->name, "w") == 0)) { + uint32_t i; + + PARSE_ERROR_MESSAGE((app->port_mask != 0), + section_name, entry->name, "entry to be " + "generated by the application (port_mask " + "not provided)"); + + for (i = 0; i < APP_MAX_LINKS; i++) { + if (p->pci_whitelist[i]) + continue; + + p->pci_whitelist[i] = strdup(entry->value); + PARSE_ERROR_MALLOC(p->pci_whitelist[i]); + + break; + } + + PARSE_ERROR_MESSAGE((i < APP_MAX_LINKS), + section_name, entry->name, + "too many elements"); + continue; + } + + /* vdev */ + if (strcmp(entry->name, "vdev") == 0) { + uint32_t i; + + for (i = 0; i < APP_MAX_LINKS; i++) { + if (p->vdev[i]) + continue; + + p->vdev[i] = strdup(entry->value); + PARSE_ERROR_MALLOC(p->vdev[i]); + + break; + } + + PARSE_ERROR_MESSAGE((i < APP_MAX_LINKS), + section_name, entry->name, + "too many elements"); + continue; + } + + /* vmware_tsc_map */ + if (strcmp(entry->name, "vmware_tsc_map") == 0) { + int val; + + PARSE_ERROR_DUPLICATE((p->vmware_tsc_map_present == 0), + section_name, + entry->name); + p->vmware_tsc_map_present = 1; + + val = parser_read_arg_bool(entry->value); + PARSE_ERROR((val >= 0), section_name, entry->name); + p->vmware_tsc_map = val; + continue; + } + + /* proc_type */ + if (strcmp(entry->name, "proc_type") == 0) { + PARSE_ERROR_DUPLICATE((p->proc_type == NULL), + section_name, + entry->name); + p->proc_type = strdup(entry->value); + continue; + } + + /* syslog */ + if (strcmp(entry->name, "syslog") == 0) { + PARSE_ERROR_DUPLICATE((p->syslog == NULL), + section_name, + entry->name); + p->syslog = strdup(entry->value); + continue; + } + + /* log_level */ + if (strcmp(entry->name, "log_level") == 0) { + int status; + + PARSE_ERROR_DUPLICATE((p->log_level_present == 0), + section_name, + entry->name); + p->log_level_present = 1; + + status = parser_read_uint32(&p->log_level, + entry->value); + PARSE_ERROR((status == 0), section_name, entry->name); + continue; + } + + /* version */ + if (strcmp(entry->name, "v") == 0) { + int val; + + PARSE_ERROR_DUPLICATE((p->version_present == 0), + section_name, + entry->name); + p->version_present = 1; + + val = parser_read_arg_bool(entry->value); + PARSE_ERROR((val >= 0), section_name, entry->name); + p->version = val; + continue; + } + + /* help */ + if ((strcmp(entry->name, "help") == 0) || + (strcmp(entry->name, "h") == 0)) { + int val; + + PARSE_ERROR_DUPLICATE((p->help_present == 0), + section_name, + entry->name); + p->help_present = 1; + + val = parser_read_arg_bool(entry->value); + PARSE_ERROR((val >= 0), section_name, entry->name); + p->help = val; + continue; + } + + /* no_huge */ + if (strcmp(entry->name, "no_huge") == 0) { + int val; + + PARSE_ERROR_DUPLICATE((p->no_huge_present == 0), + section_name, + entry->name); + p->no_huge_present = 1; + + val = parser_read_arg_bool(entry->value); + PARSE_ERROR((val >= 0), section_name, entry->name); + p->no_huge = val; + continue; + } + + /* no_pci */ + if (strcmp(entry->name, "no_pci") == 0) { + int val; + + PARSE_ERROR_DUPLICATE((p->no_pci_present == 0), + section_name, + entry->name); + p->no_pci_present = 1; + + val = parser_read_arg_bool(entry->value); + PARSE_ERROR((val >= 0), section_name, entry->name); + p->no_pci = val; + continue; + } + + /* no_hpet */ + if (strcmp(entry->name, "no_hpet") == 0) { + int val; + + PARSE_ERROR_DUPLICATE((p->no_hpet_present == 0), + section_name, + entry->name); + p->no_hpet_present = 1; + + val = parser_read_arg_bool(entry->value); + PARSE_ERROR((val >= 0), section_name, entry->name); + p->no_hpet = val; + continue; + } + + /* no_shconf */ + if (strcmp(entry->name, "no_shconf") == 0) { + int val; + + PARSE_ERROR_DUPLICATE((p->no_shconf_present == 0), + section_name, + entry->name); + p->no_shconf_present = 1; + + val = parser_read_arg_bool(entry->value); + PARSE_ERROR((val >= 0), section_name, entry->name); + p->no_shconf = val; + continue; + } + + /* add_driver */ + if (strcmp(entry->name, "d") == 0) { + PARSE_ERROR_DUPLICATE((p->add_driver == NULL), + section_name, + entry->name); + p->add_driver = strdup(entry->value); + continue; + } + + /* socket_mem */ + if (strcmp(entry->name, "socket_mem") == 0) { + PARSE_ERROR_DUPLICATE((p->socket_mem == NULL), + section_name, + entry->name); + p->socket_mem = strdup(entry->value); + continue; + } + + /* huge_dir */ + if (strcmp(entry->name, "huge_dir") == 0) { + PARSE_ERROR_DUPLICATE((p->huge_dir == NULL), + section_name, + entry->name); + p->huge_dir = strdup(entry->value); + continue; + } + + /* file_prefix */ + if (strcmp(entry->name, "file_prefix") == 0) { + PARSE_ERROR_DUPLICATE((p->file_prefix == NULL), + section_name, + entry->name); + p->file_prefix = strdup(entry->value); + continue; + } + + /* base_virtaddr */ + if (strcmp(entry->name, "base_virtaddr") == 0) { + PARSE_ERROR_DUPLICATE((p->base_virtaddr == NULL), + section_name, + entry->name); + p->base_virtaddr = strdup(entry->value); + continue; + } + + /* create_uio_dev */ + if (strcmp(entry->name, "create_uio_dev") == 0) { + int val; + + PARSE_ERROR_DUPLICATE((p->create_uio_dev_present == 0), + section_name, + entry->name); + p->create_uio_dev_present = 1; + + val = parser_read_arg_bool(entry->value); + PARSE_ERROR((val >= 0), section_name, entry->name); + p->create_uio_dev = val; + continue; + } + + /* vfio_intr */ + if (strcmp(entry->name, "vfio_intr") == 0) { + PARSE_ERROR_DUPLICATE((p->vfio_intr == NULL), + section_name, + entry->name); + p->vfio_intr = strdup(entry->value); + continue; + } + + /* xen_dom0 */ + if (strcmp(entry->name, "xen_dom0") == 0) { + int val; + + PARSE_ERROR_DUPLICATE((p->xen_dom0_present == 0), + section_name, + entry->name); + p->xen_dom0_present = 1; + + val = parser_read_arg_bool(entry->value); + PARSE_ERROR((val >= 0), section_name, entry->name); + p->xen_dom0 = val; + continue; + } + + /* unrecognized */ + PARSE_ERROR_INVALID(0, section_name, entry->name); + } + + free(entries); +} + +static int +parse_pipeline_pcap_source(struct app_params *app, + struct app_pipeline_params *p, + const char *file_name, const char *cp_size) +{ + const char *next = NULL; + char *end; + uint32_t i; + int parse_file = 0; + + if (file_name && !cp_size) { + next = file_name; + parse_file = 1; /* parse file path */ + } else if (cp_size && !file_name) { + next = cp_size; + parse_file = 0; /* parse copy size */ + } else + return -EINVAL; + + char name[APP_PARAM_NAME_SIZE]; + size_t name_len; + + if (p->n_pktq_in == 0) + return -EINVAL; + + i = 0; + while (*next != '\0') { + uint32_t id; + + if (i >= p->n_pktq_in) + return -EINVAL; + + id = p->pktq_in[i].id; + + end = strchr(next, ' '); + if (!end) + name_len = strlen(next); + else + name_len = end - next; + + if (name_len == 0 || name_len == sizeof(name)) + return -EINVAL; + + strncpy(name, next, name_len); + name[name_len] = '\0'; + next += name_len; + if (*next != '\0') + next++; + + if (parse_file) { + app->source_params[id].file_name = strdup(name); + if (app->source_params[id].file_name == NULL) + return -ENOMEM; + } else { + if (parser_read_uint32( + &app->source_params[id].n_bytes_per_pkt, + name) != 0) { + if (app->source_params[id]. + file_name != NULL) + free(app->source_params[id]. + file_name); + return -EINVAL; + } + } + + i++; + + if (i == p->n_pktq_in) + return 0; + } + + return -EINVAL; +} + +static int +parse_pipeline_pcap_sink(struct app_params *app, + struct app_pipeline_params *p, + const char *file_name, const char *n_pkts_to_dump) +{ + const char *next = NULL; + char *end; + uint32_t i; + int parse_file = 0; + + if (file_name && !n_pkts_to_dump) { + next = file_name; + parse_file = 1; /* parse file path */ + } else if (n_pkts_to_dump && !file_name) { + next = n_pkts_to_dump; + parse_file = 0; /* parse copy size */ + } else + return -EINVAL; + + char name[APP_PARAM_NAME_SIZE]; + size_t name_len; + + if (p->n_pktq_out == 0) + return -EINVAL; + + i = 0; + while (*next != '\0') { + uint32_t id; + + if (i >= p->n_pktq_out) + return -EINVAL; + + id = p->pktq_out[i].id; + + end = strchr(next, ' '); + if (!end) + name_len = strlen(next); + else + name_len = end - next; + + if (name_len == 0 || name_len == sizeof(name)) + return -EINVAL; + + strncpy(name, next, name_len); + name[name_len] = '\0'; + next += name_len; + if (*next != '\0') + next++; + + if (parse_file) { + app->sink_params[id].file_name = strdup(name); + if (app->sink_params[id].file_name == NULL) + return -ENOMEM; + } else { + if (parser_read_uint32( + &app->sink_params[id].n_pkts_to_dump, + name) != 0) { + if (app->sink_params[id].file_name != + NULL) + free(app->sink_params[id]. + file_name); + return -EINVAL; + } + } + + i++; + + if (i == p->n_pktq_out) + return 0; + } + + return -EINVAL; +} + +static int +parse_pipeline_pktq_in(struct app_params *app, + struct app_pipeline_params *p, + const char *value) +{ + const char *next = value; + char *end; + char name[APP_PARAM_NAME_SIZE]; + size_t name_len; + + while (*next != '\0') { + enum app_pktq_in_type type; + int id; + char *end_space; + char *end_tab; + + next = skip_white_spaces(next); + if (!next) + break; + + end_space = strchr(next, ' '); + end_tab = strchr(next, ' '); + + if (end_space && (!end_tab)) + end = end_space; + else if ((!end_space) && end_tab) + end = end_tab; + else if (end_space && end_tab) + end = RTE_MIN(end_space, end_tab); + else + end = NULL; + + if (!end) + name_len = strlen(next); + else + name_len = end - next; + + if (name_len == 0 || name_len == sizeof(name)) + return -EINVAL; + + strncpy(name, next, name_len); + name[name_len] = '\0'; + next += name_len; + if (*next != '\0') + next++; + + if (validate_name(name, "RXQ", 2) == 0) { + type = APP_PKTQ_IN_HWQ; + id = APP_PARAM_ADD(app->hwq_in_params, name); + } else if (validate_name(name, "SWQ", 1) == 0) { + type = APP_PKTQ_IN_SWQ; + id = APP_PARAM_ADD(app->swq_params, name); + } else if (validate_name(name, "TM", 1) == 0) { + type = APP_PKTQ_IN_TM; + id = APP_PARAM_ADD(app->tm_params, name); + } else if (validate_name(name, "SOURCE", 1) == 0) { + type = APP_PKTQ_IN_SOURCE; + id = APP_PARAM_ADD(app->source_params, name); + } else + return -EINVAL; + + if (id < 0) + return id; + + p->pktq_in[p->n_pktq_in].type = type; + p->pktq_in[p->n_pktq_in].id = (uint32_t) id; + p->n_pktq_in++; + } + + return 0; +} + +static int +parse_pipeline_pktq_out(struct app_params *app, + struct app_pipeline_params *p, + const char *value) +{ + const char *next = value; + char *end; + char name[APP_PARAM_NAME_SIZE]; + size_t name_len; + + while (*next != '\0') { + enum app_pktq_out_type type; + int id; + char *end_space; + char *end_tab; + + next = skip_white_spaces(next); + if (!next) + break; + + end_space = strchr(next, ' '); + end_tab = strchr(next, ' '); + + if (end_space && (!end_tab)) + end = end_space; + else if ((!end_space) && end_tab) + end = end_tab; + else if (end_space && end_tab) + end = RTE_MIN(end_space, end_tab); + else + end = NULL; + + if (!end) + name_len = strlen(next); + else + name_len = end - next; + + if (name_len == 0 || name_len == sizeof(name)) + return -EINVAL; + + strncpy(name, next, name_len); + name[name_len] = '\0'; + next += name_len; + if (*next != '\0') + next++; + if (validate_name(name, "TXQ", 2) == 0) { + type = APP_PKTQ_OUT_HWQ; + id = APP_PARAM_ADD(app->hwq_out_params, name); + } else if (validate_name(name, "SWQ", 1) == 0) { + type = APP_PKTQ_OUT_SWQ; + id = APP_PARAM_ADD(app->swq_params, name); + } else if (validate_name(name, "TM", 1) == 0) { + type = APP_PKTQ_OUT_TM; + id = APP_PARAM_ADD(app->tm_params, name); + } else if (validate_name(name, "SINK", 1) == 0) { + type = APP_PKTQ_OUT_SINK; + id = APP_PARAM_ADD(app->sink_params, name); + } else + return -EINVAL; + + if (id < 0) + return id; + + p->pktq_out[p->n_pktq_out].type = type; + p->pktq_out[p->n_pktq_out].id = id; + p->n_pktq_out++; + } + + return 0; +} + +static int +parse_pipeline_msgq_in(struct app_params *app, + struct app_pipeline_params *p, + const char *value) +{ + const char *next = value; + char *end; + char name[APP_PARAM_NAME_SIZE]; + size_t name_len; + ssize_t idx; + + while (*next != '\0') { + char *end_space; + char *end_tab; + + next = skip_white_spaces(next); + if (!next) + break; + + end_space = strchr(next, ' '); + end_tab = strchr(next, ' '); + + if (end_space && (!end_tab)) + end = end_space; + else if ((!end_space) && end_tab) + end = end_tab; + else if (end_space && end_tab) + end = RTE_MIN(end_space, end_tab); + else + end = NULL; + + if (!end) + name_len = strlen(next); + else + name_len = end - next; + + if (name_len == 0 || name_len == sizeof(name)) + return -EINVAL; + + strncpy(name, next, name_len); + name[name_len] = '\0'; + next += name_len; + if (*next != '\0') + next++; + + if (validate_name(name, "MSGQ", 1) != 0) + return -EINVAL; + + idx = APP_PARAM_ADD(app->msgq_params, name); + if (idx < 0) + return idx; + + p->msgq_in[p->n_msgq_in] = idx; + p->n_msgq_in++; + } + + return 0; +} + +static int +parse_pipeline_msgq_out(struct app_params *app, + struct app_pipeline_params *p, + const char *value) +{ + const char *next = value; + char *end; + char name[APP_PARAM_NAME_SIZE]; + size_t name_len; + ssize_t idx; + + while (*next != '\0') { + char *end_space; + char *end_tab; + + next = skip_white_spaces(next); + if (!next) + break; + + end_space = strchr(next, ' '); + end_tab = strchr(next, ' '); + + if (end_space && (!end_tab)) + end = end_space; + else if ((!end_space) && end_tab) + end = end_tab; + else if (end_space && end_tab) + end = RTE_MIN(end_space, end_tab); + else + end = NULL; + + if (!end) + name_len = strlen(next); + else + name_len = end - next; + + if (name_len == 0 || name_len == sizeof(name)) + return -EINVAL; + + strncpy(name, next, name_len); + name[name_len] = '\0'; + next += name_len; + if (*next != '\0') + next++; + + if (validate_name(name, "MSGQ", 1) != 0) + return -EINVAL; + + idx = APP_PARAM_ADD(app->msgq_params, name); + if (idx < 0) + return idx; + + p->msgq_out[p->n_msgq_out] = idx; + p->n_msgq_out++; + } + + return 0; +} + +static void +parse_pipeline(struct app_params *app, + const char *section_name, + struct rte_cfgfile *cfg) +{ + char name[CFG_NAME_LEN]; + struct app_pipeline_params *param; + struct rte_cfgfile_entry *entries; + ssize_t param_idx; + int n_entries, i; + + n_entries = rte_cfgfile_section_num_entries(cfg, section_name); + PARSE_ERROR_SECTION_NO_ENTRIES((n_entries > 0), section_name); + + entries = malloc(n_entries * sizeof(struct rte_cfgfile_entry)); + PARSE_ERROR_MALLOC(entries != NULL); + + rte_cfgfile_section_entries(cfg, section_name, entries, n_entries); + + param_idx = APP_PARAM_ADD(app->pipeline_params, section_name); + PARSER_PARAM_ADD_CHECK(param_idx, app->pipeline_params, section_name); + + param = &app->pipeline_params[param_idx]; + + for (i = 0; i < n_entries; i++) { + struct rte_cfgfile_entry *ent = &entries[i]; + + if (strcmp(ent->name, "type") == 0) { + int w_size = snprintf(param->type, RTE_DIM(param->type), + "%s", ent->value); + + PARSE_ERROR(((w_size > 0) && + (w_size < (int)RTE_DIM(param->type))), + section_name, + ent->name); + continue; + } + + if (strcmp(ent->name, "core") == 0) { + int status = parse_pipeline_core( + ¶m->socket_id, ¶m->core_id, + ¶m->hyper_th_id, ent->value); + + PARSE_ERROR((status == 0), section_name, + ent->name); + continue; + } + + if (strcmp(ent->name, "pktq_in") == 0) { + int status = parse_pipeline_pktq_in(app, param, + ent->value); + + PARSE_ERROR((status == 0), section_name, + ent->name); + continue; + } + + if (strcmp(ent->name, "pktq_out") == 0) { + int status = parse_pipeline_pktq_out(app, param, + ent->value); + + PARSE_ERROR((status == 0), section_name, + ent->name); + continue; + } + + if (strcmp(ent->name, "msgq_in") == 0) { + int status = parse_pipeline_msgq_in(app, param, + ent->value); + + PARSE_ERROR((status == 0), section_name, + ent->name); + continue; + } + + if (strcmp(ent->name, "msgq_out") == 0) { + int status = parse_pipeline_msgq_out(app, param, + ent->value); + + PARSE_ERROR((status == 0), section_name, + ent->name); + continue; + } + + if (strcmp(ent->name, "timer_period") == 0) { + int status = parser_read_uint32( + ¶m->timer_period, + ent->value); + + PARSE_ERROR((status == 0), section_name, + ent->name); + continue; + } + + if (strcmp(ent->name, "pcap_file_rd") == 0) { + int status; + +#ifndef RTE_PORT_PCAP + PARSE_ERROR_INVALID(0, section_name, ent->name); +#endif + + status = parse_pipeline_pcap_source(app, + param, ent->value, NULL); + + PARSE_ERROR((status == 0), section_name, + ent->name); + continue; + } + + if (strcmp(ent->name, "pcap_bytes_rd_per_pkt") == 0) { + int status; + +#ifndef RTE_PORT_PCAP + PARSE_ERROR_INVALID(0, section_name, ent->name); +#endif + + status = parse_pipeline_pcap_source(app, + param, NULL, ent->value); + + PARSE_ERROR((status == 0), section_name, + ent->name); + continue; + } + + if (strcmp(ent->name, "pcap_file_wr") == 0) { + int status; + +#ifndef RTE_PORT_PCAP + PARSE_ERROR_INVALID(0, section_name, ent->name); +#endif + + status = parse_pipeline_pcap_sink(app, param, + ent->value, NULL); + + PARSE_ERROR((status == 0), section_name, + ent->name); + continue; + } + + if (strcmp(ent->name, "pcap_n_pkt_wr") == 0) { + int status; + +#ifndef RTE_PORT_PCAP + PARSE_ERROR_INVALID(0, section_name, ent->name); +#endif + + status = parse_pipeline_pcap_sink(app, param, + NULL, ent->value); + + PARSE_ERROR((status == 0), section_name, + ent->name); + continue; + } + + /* pipeline type specific items */ + APP_CHECK((param->n_args < APP_MAX_PIPELINE_ARGS), + "Parse error in section \"%s\": too many " + "pipeline specified parameters", section_name); + + param->args_name[param->n_args] = strdup(ent->name); + param->args_value[param->n_args] = strdup(ent->value); + + APP_CHECK((param->args_name[param->n_args] != NULL) && + (param->args_value[param->n_args] != NULL), + "Parse error: no free memory"); + + param->n_args++; + } + + param->parsed = 1; + + snprintf(name, sizeof(name), "MSGQ-REQ-%s", section_name); + param_idx = APP_PARAM_ADD(app->msgq_params, name); + PARSER_PARAM_ADD_CHECK(param_idx, app->msgq_params, name); + app->msgq_params[param_idx].cpu_socket_id = param->socket_id; + param->msgq_in[param->n_msgq_in++] = param_idx; + + snprintf(name, sizeof(name), "MSGQ-RSP-%s", section_name); + param_idx = APP_PARAM_ADD(app->msgq_params, name); + PARSER_PARAM_ADD_CHECK(param_idx, app->msgq_params, name); + app->msgq_params[param_idx].cpu_socket_id = param->socket_id; + param->msgq_out[param->n_msgq_out++] = param_idx; + + snprintf(name, sizeof(name), "MSGQ-REQ-CORE-s%" PRIu32 "c%" PRIu32 "%s", + param->socket_id, + param->core_id, + (param->hyper_th_id) ? "h" : ""); + param_idx = APP_PARAM_ADD(app->msgq_params, name); + PARSER_PARAM_ADD_CHECK(param_idx, app->msgq_params, name); + app->msgq_params[param_idx].cpu_socket_id = param->socket_id; + + snprintf(name, sizeof(name), "MSGQ-RSP-CORE-s%" PRIu32 "c%" PRIu32 "%s", + param->socket_id, + param->core_id, + (param->hyper_th_id) ? "h" : ""); + param_idx = APP_PARAM_ADD(app->msgq_params, name); + PARSER_PARAM_ADD_CHECK(param_idx, app->msgq_params, name); + app->msgq_params[param_idx].cpu_socket_id = param->socket_id; + + free(entries); +} + +static void +parse_mempool(struct app_params *app, + const char *section_name, + struct rte_cfgfile *cfg) +{ + struct app_mempool_params *param; + struct rte_cfgfile_entry *entries; + ssize_t param_idx; + int n_entries, i; + + n_entries = rte_cfgfile_section_num_entries(cfg, section_name); + PARSE_ERROR_SECTION_NO_ENTRIES((n_entries > 0), section_name); + + entries = malloc(n_entries * sizeof(struct rte_cfgfile_entry)); + PARSE_ERROR_MALLOC(entries != NULL); + + rte_cfgfile_section_entries(cfg, section_name, entries, n_entries); + + param_idx = APP_PARAM_ADD(app->mempool_params, section_name); + PARSER_PARAM_ADD_CHECK(param_idx, app->mempool_params, section_name); + + param = &app->mempool_params[param_idx]; + + for (i = 0; i < n_entries; i++) { + struct rte_cfgfile_entry *ent = &entries[i]; + + if (strcmp(ent->name, "buffer_size") == 0) { + int status = parser_read_uint32( + ¶m->buffer_size, ent->value); + + PARSE_ERROR((status == 0), section_name, + ent->name); + continue; + } + + if (strcmp(ent->name, "pool_size") == 0) { + int status = parser_read_uint32( + ¶m->pool_size, ent->value); + + PARSE_ERROR((status == 0), section_name, + ent->name); + continue; + } + + if (strcmp(ent->name, "cache_size") == 0) { + int status = parser_read_uint32( + ¶m->cache_size, ent->value); + + PARSE_ERROR((status == 0), section_name, + ent->name); + continue; + } + + if (strcmp(ent->name, "cpu") == 0) { + int status = parser_read_uint32( + ¶m->cpu_socket_id, ent->value); + + PARSE_ERROR((status == 0), section_name, + ent->name); + continue; + } + + /* unrecognized */ + PARSE_ERROR_INVALID(0, section_name, ent->name); + } + + param->parsed = 1; + + free(entries); +} + +static void +parse_link(struct app_params *app, + const char *section_name, + struct rte_cfgfile *cfg) +{ + struct app_link_params *param; + struct rte_cfgfile_entry *entries; + int n_entries, i; + int pci_bdf_present = 0; + ssize_t param_idx; + + n_entries = rte_cfgfile_section_num_entries(cfg, section_name); + PARSE_ERROR_SECTION_NO_ENTRIES((n_entries > 0), section_name); + + entries = malloc(n_entries * sizeof(struct rte_cfgfile_entry)); + PARSE_ERROR_MALLOC(entries != NULL); + + rte_cfgfile_section_entries(cfg, section_name, entries, n_entries); + + param_idx = APP_PARAM_ADD(app->link_params, section_name); + PARSER_PARAM_ADD_CHECK(param_idx, app->link_params, section_name); + + param = &app->link_params[param_idx]; + + for (i = 0; i < n_entries; i++) { + struct rte_cfgfile_entry *ent = &entries[i]; + + if (strcmp(ent->name, "promisc") == 0) { + int status = parser_read_arg_bool(ent->value); + + PARSE_ERROR((status != -EINVAL), section_name, + ent->name); + param->promisc = status; + continue; + } + + if (strcmp(ent->name, "arp_q") == 0) { + int status = parser_read_uint32(¶m->arp_q, + ent->value); + + PARSE_ERROR((status == 0), section_name, + ent->name); + continue; + } + + if (strcmp(ent->name, "tcp_syn_q") == 0) { + int status = parser_read_uint32( + ¶m->tcp_syn_q, ent->value); + + PARSE_ERROR((status == 0), section_name, ent->name); + continue; + } + + if (strcmp(ent->name, "ip_local_q") == 0) { + int status = parser_read_uint32( + ¶m->ip_local_q, ent->value); + + PARSE_ERROR((status == 0), section_name, + ent->name); + continue; + } + + + if (strcmp(ent->name, "tcp_local_q") == 0) { + int status = parser_read_uint32( + ¶m->tcp_local_q, ent->value); + + PARSE_ERROR((status == 0), section_name, + ent->name); + continue; + } + + if (strcmp(ent->name, "udp_local_q") == 0) { + int status = parser_read_uint32( + ¶m->udp_local_q, ent->value); + + PARSE_ERROR((status == 0), section_name, + ent->name); + continue; + } + + if (strcmp(ent->name, "sctp_local_q") == 0) { + int status = parser_read_uint32( + ¶m->sctp_local_q, ent->value); + + PARSE_ERROR((status == 0), section_name, + ent->name); + continue; + } + + if (strcmp(ent->name, "pci_bdf") == 0) { + PARSE_ERROR_DUPLICATE((pci_bdf_present == 0), + section_name, ent->name); + + snprintf(param->pci_bdf, APP_LINK_PCI_BDF_SIZE, + "%s", ent->value); + pci_bdf_present = 1; + continue; + } + + /* unrecognized */ + PARSE_ERROR_INVALID(0, section_name, ent->name); + } + + /* Check for mandatory fields */ + if (app->port_mask) + PARSE_ERROR_MESSAGE((pci_bdf_present == 0), + section_name, "pci_bdf", + "entry not allowed (port_mask is provided)"); + else + PARSE_ERROR_MESSAGE((pci_bdf_present), + section_name, "pci_bdf", + "this entry is mandatory (port_mask is not " + "provided)"); + + param->parsed = 1; + + free(entries); +} + +static void +parse_rxq(struct app_params *app, + const char *section_name, + struct rte_cfgfile *cfg) +{ + struct app_pktq_hwq_in_params *param; + struct rte_cfgfile_entry *entries; + int n_entries, i; + ssize_t param_idx; + + n_entries = rte_cfgfile_section_num_entries(cfg, section_name); + PARSE_ERROR_SECTION_NO_ENTRIES((n_entries > 0), section_name); + + entries = malloc(n_entries * sizeof(struct rte_cfgfile_entry)); + PARSE_ERROR_MALLOC(entries != NULL); + + rte_cfgfile_section_entries(cfg, section_name, entries, n_entries); + + param_idx = APP_PARAM_ADD(app->hwq_in_params, section_name); + PARSER_PARAM_ADD_CHECK(param_idx, app->hwq_in_params, section_name); + + param = &app->hwq_in_params[param_idx]; + + for (i = 0; i < n_entries; i++) { + struct rte_cfgfile_entry *ent = &entries[i]; + + if (strcmp(ent->name, "mempool") == 0) { + int status = validate_name(ent->value, + "MEMPOOL", 1); + ssize_t idx; + + PARSE_ERROR((status == 0), section_name, + ent->name); + idx = APP_PARAM_ADD(app->mempool_params, + ent->value); + PARSER_PARAM_ADD_CHECK(idx, app->mempool_params, + section_name); + param->mempool_id = idx; + continue; + } + + if (strcmp(ent->name, "size") == 0) { + int status = parser_read_uint32(¶m->size, + ent->value); + + PARSE_ERROR((status == 0), section_name, + ent->name); + continue; + } + + if (strcmp(ent->name, "burst") == 0) { + int status = parser_read_uint32(¶m->burst, + ent->value); + + PARSE_ERROR((status == 0), section_name, + ent->name); + continue; + } + + /* unrecognized */ + PARSE_ERROR_INVALID(0, section_name, ent->name); + } + + param->parsed = 1; + + free(entries); +} + +static void +parse_txq(struct app_params *app, + const char *section_name, + struct rte_cfgfile *cfg) +{ + struct app_pktq_hwq_out_params *param; + struct rte_cfgfile_entry *entries; + int n_entries, i; + ssize_t param_idx; + + n_entries = rte_cfgfile_section_num_entries(cfg, section_name); + PARSE_ERROR_SECTION_NO_ENTRIES((n_entries > 0), section_name); + + entries = malloc(n_entries * sizeof(struct rte_cfgfile_entry)); + PARSE_ERROR_MALLOC(entries != NULL); + + rte_cfgfile_section_entries(cfg, section_name, entries, n_entries); + + param_idx = APP_PARAM_ADD(app->hwq_out_params, section_name); + PARSER_PARAM_ADD_CHECK(param_idx, app->hwq_out_params, section_name); + + param = &app->hwq_out_params[param_idx]; + + for (i = 0; i < n_entries; i++) { + struct rte_cfgfile_entry *ent = &entries[i]; + + if (strcmp(ent->name, "size") == 0) { + int status = parser_read_uint32(¶m->size, + ent->value); + + PARSE_ERROR((status == 0), section_name, + ent->name); + continue; + } + + if (strcmp(ent->name, "burst") == 0) { + int status = parser_read_uint32(¶m->burst, + ent->value); + + PARSE_ERROR((status == 0), section_name, + ent->name); + continue; + } + + if (strcmp(ent->name, "dropless") == 0) { + int status = parser_read_arg_bool(ent->value); + + + PARSE_ERROR((status != -EINVAL), section_name, + ent->name); + param->dropless = status; + continue; + } + + /* unrecognized */ + PARSE_ERROR_INVALID(0, section_name, ent->name); + } + + param->parsed = 1; + + free(entries); +} + +static void +parse_swq(struct app_params *app, + const char *section_name, + struct rte_cfgfile *cfg) +{ + struct app_pktq_swq_params *param; + struct rte_cfgfile_entry *entries; + int n_entries, i; + uint32_t mtu_present = 0; + uint32_t metadata_size_present = 0; + uint32_t mempool_direct_present = 0; + uint32_t mempool_indirect_present = 0; + + ssize_t param_idx; + + n_entries = rte_cfgfile_section_num_entries(cfg, section_name); + PARSE_ERROR_SECTION_NO_ENTRIES((n_entries > 0), section_name); + + entries = malloc(n_entries * sizeof(struct rte_cfgfile_entry)); + PARSE_ERROR_MALLOC(entries != NULL); + + rte_cfgfile_section_entries(cfg, section_name, entries, n_entries); + + param_idx = APP_PARAM_ADD(app->swq_params, section_name); + PARSER_PARAM_ADD_CHECK(param_idx, app->swq_params, section_name); + + param = &app->swq_params[param_idx]; + + for (i = 0; i < n_entries; i++) { + struct rte_cfgfile_entry *ent = &entries[i]; + + if (strcmp(ent->name, "size") == 0) { + int status = parser_read_uint32(¶m->size, + ent->value); + + PARSE_ERROR((status == 0), section_name, + ent->name); + continue; + } + + if (strcmp(ent->name, "burst_read") == 0) { + int status = parser_read_uint32(& + param->burst_read, ent->value); + + PARSE_ERROR((status == 0), section_name, + ent->name); + continue; + } + + if (strcmp(ent->name, "burst_write") == 0) { + int status = parser_read_uint32( + ¶m->burst_write, ent->value); + + PARSE_ERROR((status == 0), section_name, + ent->name); + continue; + } + + if (strcmp(ent->name, "dropless") == 0) { + int status = parser_read_arg_bool(ent->value); + + PARSE_ERROR((status != -EINVAL), section_name, + ent->name); + param->dropless = status; + continue; + } + + if (strcmp(ent->name, "n_retries") == 0) { + int status = parser_read_uint64(¶m->n_retries, + ent->value); + + PARSE_ERROR((status == 0), section_name, + ent->name); + continue; + } + + if (strcmp(ent->name, "cpu") == 0) { + int status = parser_read_uint32( + ¶m->cpu_socket_id, ent->value); + + PARSE_ERROR((status == 0), section_name, ent->name); + continue; + } + + if (strcmp(ent->name, "ipv4_frag") == 0) { + int status = parser_read_arg_bool(ent->value); + + PARSE_ERROR((status != -EINVAL), section_name, + ent->name); + + param->ipv4_frag = status; + if (param->mtu == 0) + param->mtu = 1500; + + continue; + } + + if (strcmp(ent->name, "ipv6_frag") == 0) { + int status = parser_read_arg_bool(ent->value); + + PARSE_ERROR((status != -EINVAL), section_name, + ent->name); + param->ipv6_frag = status; + if (param->mtu == 0) + param->mtu = 1320; + continue; + } + + if (strcmp(ent->name, "ipv4_ras") == 0) { + int status = parser_read_arg_bool(ent->value); + + PARSE_ERROR((status != -EINVAL), section_name, + ent->name); + param->ipv4_ras = status; + continue; + } + + if (strcmp(ent->name, "ipv6_ras") == 0) { + int status = parser_read_arg_bool(ent->value); + + PARSE_ERROR((status != -EINVAL), section_name, + ent->name); + param->ipv6_ras = status; + continue; + } + + if (strcmp(ent->name, "mtu") == 0) { + int status = parser_read_uint32(¶m->mtu, + ent->value); + + PARSE_ERROR((status == 0), section_name, + ent->name); + mtu_present = 1; + continue; + } + + if (strcmp(ent->name, "metadata_size") == 0) { + int status = parser_read_uint32( + ¶m->metadata_size, ent->value); + + PARSE_ERROR((status == 0), section_name, + ent->name); + metadata_size_present = 1; + continue; + } + + if (strcmp(ent->name, "mempool_direct") == 0) { + int status = validate_name(ent->value, + "MEMPOOL", 1); + ssize_t idx; + + PARSE_ERROR((status == 0), section_name, + ent->name); + + idx = APP_PARAM_ADD(app->mempool_params, + ent->value); + PARSER_PARAM_ADD_CHECK(idx, app->mempool_params, + section_name); + param->mempool_direct_id = idx; + mempool_direct_present = 1; + continue; + } + + if (strcmp(ent->name, "mempool_indirect") == 0) { + int status = validate_name(ent->value, + "MEMPOOL", 1); + ssize_t idx; + + PARSE_ERROR((status == 0), section_name, + ent->name); + idx = APP_PARAM_ADD(app->mempool_params, + ent->value); + PARSER_PARAM_ADD_CHECK(idx, app->mempool_params, + section_name); + param->mempool_indirect_id = idx; + mempool_indirect_present = 1; + continue; + } + + /* unrecognized */ + PARSE_ERROR_INVALID(0, section_name, ent->name); + } + + APP_CHECK(((mtu_present) && + ((param->ipv4_frag == 1) || (param->ipv6_frag == 1))), + "Parse error in section \"%s\": IPv4/IPv6 fragmentation " + "is off, therefore entry \"mtu\" is not allowed", + section_name); + + APP_CHECK(((metadata_size_present) && + ((param->ipv4_frag == 1) || (param->ipv6_frag == 1))), + "Parse error in section \"%s\": IPv4/IPv6 fragmentation " + "is off, therefore entry \"metadata_size\" is " + "not allowed", section_name); + + APP_CHECK(((mempool_direct_present) && + ((param->ipv4_frag == 1) || (param->ipv6_frag == 1))), + "Parse error in section \"%s\": IPv4/IPv6 fragmentation " + "is off, therefore entry \"mempool_direct\" is " + "not allowed", section_name); + + APP_CHECK(((mempool_indirect_present) && + ((param->ipv4_frag == 1) || (param->ipv6_frag == 1))), + "Parse error in section \"%s\": IPv4/IPv6 fragmentation " + "is off, therefore entry \"mempool_indirect\" is " + "not allowed", section_name); + + param->parsed = 1; + + free(entries); +} + +static void +parse_tm(struct app_params *app, + const char *section_name, + struct rte_cfgfile *cfg) +{ + struct app_pktq_tm_params *param; + struct rte_cfgfile_entry *entries; + int n_entries, i; + ssize_t param_idx; + + n_entries = rte_cfgfile_section_num_entries(cfg, section_name); + PARSE_ERROR_SECTION_NO_ENTRIES((n_entries > 0), section_name); + + entries = malloc(n_entries * sizeof(struct rte_cfgfile_entry)); + PARSE_ERROR_MALLOC(entries != NULL); + + rte_cfgfile_section_entries(cfg, section_name, entries, n_entries); + + param_idx = APP_PARAM_ADD(app->tm_params, section_name); + PARSER_PARAM_ADD_CHECK(param_idx, app->tm_params, section_name); + + param = &app->tm_params[param_idx]; + + for (i = 0; i < n_entries; i++) { + struct rte_cfgfile_entry *ent = &entries[i]; + + if (strcmp(ent->name, "cfg") == 0) { + param->file_name = strdup(ent->value); + PARSE_ERROR_MALLOC(param->file_name != NULL); + continue; + } + + if (strcmp(ent->name, "burst_read") == 0) { + int status = parser_read_uint32( + ¶m->burst_read, ent->value); + + PARSE_ERROR((status == 0), section_name, + ent->name); + continue; + } + + if (strcmp(ent->name, "burst_write") == 0) { + int status = parser_read_uint32( + ¶m->burst_write, ent->value); + + PARSE_ERROR((status == 0), section_name, + ent->name); + continue; + } + + /* unrecognized */ + PARSE_ERROR_INVALID(0, section_name, ent->name); + } + + param->parsed = 1; + + free(entries); +} + +static void +parse_source(struct app_params *app, + const char *section_name, + struct rte_cfgfile *cfg) +{ + struct app_pktq_source_params *param; + struct rte_cfgfile_entry *entries; + int n_entries, i; + ssize_t param_idx; + uint32_t pcap_file_present = 0; + uint32_t pcap_size_present = 0; + + n_entries = rte_cfgfile_section_num_entries(cfg, section_name); + PARSE_ERROR_SECTION_NO_ENTRIES((n_entries > 0), section_name); + + entries = malloc(n_entries * sizeof(struct rte_cfgfile_entry)); + PARSE_ERROR_MALLOC(entries != NULL); + + rte_cfgfile_section_entries(cfg, section_name, entries, n_entries); + + param_idx = APP_PARAM_ADD(app->source_params, section_name); + PARSER_PARAM_ADD_CHECK(param_idx, app->source_params, section_name); + + param = &app->source_params[param_idx]; + + for (i = 0; i < n_entries; i++) { + struct rte_cfgfile_entry *ent = &entries[i]; + + if (strcmp(ent->name, "mempool") == 0) { + int status = validate_name(ent->value, + "MEMPOOL", 1); + ssize_t idx; + + PARSE_ERROR((status == 0), section_name, + ent->name); + idx = APP_PARAM_ADD(app->mempool_params, + ent->value); + PARSER_PARAM_ADD_CHECK(idx, app->mempool_params, + section_name); + param->mempool_id = idx; + continue; + } + + if (strcmp(ent->name, "burst") == 0) { + int status = parser_read_uint32(¶m->burst, + ent->value); + + PARSE_ERROR((status == 0), section_name, + ent->name); + continue; + } + + if (strcmp(ent->name, "pcap_file_rd")) { + PARSE_ERROR_DUPLICATE((pcap_file_present == 0), + section_name, ent->name); + + param->file_name = strdup(ent->value); + + PARSE_ERROR_MALLOC(param->file_name != NULL); + pcap_file_present = 1; + + continue; + } + + if (strcmp(ent->name, "pcap_bytes_rd_per_pkt") == 0) { + int status; + + PARSE_ERROR_DUPLICATE((pcap_size_present == 0), + section_name, ent->name); + + status = parser_read_uint32( + ¶m->n_bytes_per_pkt, ent->value); + + PARSE_ERROR((status == 0), section_name, + ent->name); + pcap_size_present = 1; + + continue; + } + + /* unrecognized */ + PARSE_ERROR_INVALID(0, section_name, ent->name); + } + + param->parsed = 1; + + free(entries); +} + +static void +parse_sink(struct app_params *app, + const char *section_name, + struct rte_cfgfile *cfg) +{ + struct app_pktq_sink_params *param; + struct rte_cfgfile_entry *entries; + int n_entries, i; + ssize_t param_idx; + uint32_t pcap_file_present = 0; + uint32_t pcap_n_pkt_present = 0; + + n_entries = rte_cfgfile_section_num_entries(cfg, section_name); + PARSE_ERROR_SECTION_NO_ENTRIES((n_entries > 0), section_name); + + entries = malloc(n_entries * sizeof(struct rte_cfgfile_entry)); + PARSE_ERROR_MALLOC(entries != NULL); + + rte_cfgfile_section_entries(cfg, section_name, entries, n_entries); + + param_idx = APP_PARAM_ADD(app->sink_params, section_name); + PARSER_PARAM_ADD_CHECK(param_idx, app->sink_params, section_name); + + param = &app->sink_params[param_idx]; + + for (i = 0; i < n_entries; i++) { + struct rte_cfgfile_entry *ent = &entries[i]; + + if (strcmp(ent->name, "pcap_file_wr")) { + PARSE_ERROR_DUPLICATE((pcap_file_present == 0), + section_name, ent->name); + + param->file_name = strdup(ent->value); + + PARSE_ERROR_MALLOC((param->file_name != NULL)); + + continue; + } + + if (strcmp(ent->name, "pcap_n_pkt_wr")) { + int status; + + PARSE_ERROR_DUPLICATE((pcap_n_pkt_present == 0), + section_name, ent->name); + + status = parser_read_uint32( + ¶m->n_pkts_to_dump, ent->value); + + PARSE_ERROR((status == 0), section_name, + ent->name); + + continue; + } + + /* unrecognized */ + PARSE_ERROR_INVALID(0, section_name, ent->name); + } + + param->parsed = 1; + + free(entries); +} + +static void +parse_msgq_req_pipeline(struct app_params *app, + const char *section_name, + struct rte_cfgfile *cfg) +{ + struct app_msgq_params *param; + struct rte_cfgfile_entry *entries; + int n_entries, i; + ssize_t param_idx; + + n_entries = rte_cfgfile_section_num_entries(cfg, section_name); + PARSE_ERROR_SECTION_NO_ENTRIES((n_entries > 0), section_name); + + entries = malloc(n_entries * sizeof(struct rte_cfgfile_entry)); + PARSE_ERROR_MALLOC(entries != NULL); + + rte_cfgfile_section_entries(cfg, section_name, entries, n_entries); + + param_idx = APP_PARAM_ADD(app->msgq_params, section_name); + PARSER_PARAM_ADD_CHECK(param_idx, app->msgq_params, section_name); + + param = &app->msgq_params[param_idx]; + + for (i = 0; i < n_entries; i++) { + struct rte_cfgfile_entry *ent = &entries[i]; + + if (strcmp(ent->name, "size") == 0) { + int status = parser_read_uint32(¶m->size, + ent->value); + + PARSE_ERROR((status == 0), section_name, + ent->name); + continue; + } + + /* unrecognized */ + PARSE_ERROR_INVALID(0, section_name, ent->name); + } + + param->parsed = 1; + free(entries); +} + +static void +parse_msgq_rsp_pipeline(struct app_params *app, + const char *section_name, + struct rte_cfgfile *cfg) +{ + struct app_msgq_params *param; + struct rte_cfgfile_entry *entries; + int n_entries, i; + ssize_t param_idx; + + n_entries = rte_cfgfile_section_num_entries(cfg, section_name); + PARSE_ERROR_SECTION_NO_ENTRIES((n_entries > 0), section_name); + + entries = malloc(n_entries * sizeof(struct rte_cfgfile_entry)); + PARSE_ERROR_MALLOC(entries != NULL); + + rte_cfgfile_section_entries(cfg, section_name, entries, n_entries); + + param_idx = APP_PARAM_ADD(app->msgq_params, section_name); + PARSER_PARAM_ADD_CHECK(param_idx, app->msgq_params, section_name); + + param = &app->msgq_params[param_idx]; + + for (i = 0; i < n_entries; i++) { + struct rte_cfgfile_entry *ent = &entries[i]; + + if (strcmp(ent->name, "size") == 0) { + int status = parser_read_uint32(¶m->size, + ent->value); + + PARSE_ERROR((status == 0), section_name, + ent->name); + continue; + } + + /* unrecognized */ + PARSE_ERROR_INVALID(0, section_name, ent->name); + } + + param->parsed = 1; + + free(entries); +} + +static void +parse_msgq(struct app_params *app, + const char *section_name, + struct rte_cfgfile *cfg) +{ + struct app_msgq_params *param; + struct rte_cfgfile_entry *entries; + int n_entries, i; + ssize_t param_idx; + + n_entries = rte_cfgfile_section_num_entries(cfg, section_name); + PARSE_ERROR_SECTION_NO_ENTRIES((n_entries > 0), section_name); + + entries = malloc(n_entries * sizeof(struct rte_cfgfile_entry)); + PARSE_ERROR_MALLOC(entries != NULL); + + rte_cfgfile_section_entries(cfg, section_name, entries, n_entries); + + param_idx = APP_PARAM_ADD(app->msgq_params, section_name); + PARSER_PARAM_ADD_CHECK(param_idx, app->msgq_params, section_name); + + param = &app->msgq_params[param_idx]; + + for (i = 0; i < n_entries; i++) { + struct rte_cfgfile_entry *ent = &entries[i]; + + if (strcmp(ent->name, "size") == 0) { + int status = parser_read_uint32(¶m->size, + ent->value); + + PARSE_ERROR((status == 0), section_name, + ent->name); + continue; + } + + if (strcmp(ent->name, "cpu") == 0) { + int status = parser_read_uint32( + ¶m->cpu_socket_id, ent->value); + + PARSE_ERROR((status == 0), section_name, + ent->name); + continue; + } + + /* unrecognized */ + PARSE_ERROR_INVALID(0, section_name, ent->name); + } + + param->parsed = 1; + + free(entries); +} + +typedef void (*config_section_load)(struct app_params *p, + const char *section_name, + struct rte_cfgfile *cfg); + +struct config_section { + const char prefix[CFG_NAME_LEN]; + int numbers; + config_section_load load; +}; + +static const struct config_section cfg_file_scheme[] = { + {"EAL", 0, parse_eal}, + {"PIPELINE", 1, parse_pipeline}, + {"MEMPOOL", 1, parse_mempool}, + {"LINK", 1, parse_link}, + {"RXQ", 2, parse_rxq}, + {"TXQ", 2, parse_txq}, + {"SWQ", 1, parse_swq}, + {"TM", 1, parse_tm}, + {"SOURCE", 1, parse_source}, + {"SINK", 1, parse_sink}, + {"MSGQ-REQ-PIPELINE", 1, parse_msgq_req_pipeline}, + {"MSGQ-RSP-PIPELINE", 1, parse_msgq_rsp_pipeline}, + {"MSGQ", 1, parse_msgq}, +}; + +static void +create_implicit_mempools(struct app_params *app) +{ + ssize_t idx; + + idx = APP_PARAM_ADD(app->mempool_params, "MEMPOOL0"); + PARSER_PARAM_ADD_CHECK(idx, app->mempool_params, "start-up"); +} + +static void +create_implicit_links_from_port_mask(struct app_params *app, + uint64_t port_mask) +{ + uint32_t pmd_id, link_id; + + link_id = 0; + for (pmd_id = 0; pmd_id < RTE_MAX_ETHPORTS; pmd_id++) { + char name[APP_PARAM_NAME_SIZE]; + ssize_t idx; + + if ((port_mask & (1LLU << pmd_id)) == 0) + continue; + + snprintf(name, sizeof(name), "LINK%" PRIu32, link_id); + idx = APP_PARAM_ADD(app->link_params, name); + PARSER_PARAM_ADD_CHECK(idx, app->link_params, name); + + app->link_params[idx].pmd_id = pmd_id; + link_id++; + } +} + +static void +assign_link_pmd_id_from_pci_bdf(struct app_params *app) +{ + uint32_t i; + + for (i = 0; i < app->n_links; i++) { + struct app_link_params *link = &app->link_params[i]; + + link->pmd_id = i; + } +} + +int +app_config_parse(struct app_params *app, const char *file_name) +{ + struct rte_cfgfile *cfg; + char **section_names; + int i, j, sect_count; + + /* Implicit mempools */ + create_implicit_mempools(app); + + /* Port mask */ + if (app->port_mask) + create_implicit_links_from_port_mask(app, app->port_mask); + + /* Load application configuration file */ + cfg = rte_cfgfile_load(file_name, 0); + APP_CHECK((cfg != NULL), "Parse error: Unable to load config " + "file %s", file_name); + + sect_count = rte_cfgfile_num_sections(cfg, NULL, 0); + APP_CHECK((sect_count > 0), "Parse error: number of sections " + "in file \"%s\" return %d", file_name, + sect_count); + + section_names = malloc(sect_count * sizeof(char *)); + PARSE_ERROR_MALLOC(section_names != NULL); + + for (i = 0; i < sect_count; i++) + section_names[i] = malloc(CFG_NAME_LEN); + + rte_cfgfile_sections(cfg, section_names, sect_count); + + for (i = 0; i < sect_count; i++) { + const struct config_section *sch_s; + int len, cfg_name_len; + + cfg_name_len = strlen(section_names[i]); + + /* Find section type */ + for (j = 0; j < (int)RTE_DIM(cfg_file_scheme); j++) { + sch_s = &cfg_file_scheme[j]; + len = strlen(sch_s->prefix); + + if (cfg_name_len < len) + continue; + + /* After section name we expect only '\0' or digit or + * digit dot digit, so protect against false matching, + * for example: "ABC" should match section name + * "ABC0.0", but it should not match section_name + * "ABCDEF". + */ + if ((section_names[i][len] != '\0') && + !isdigit(section_names[i][len])) + continue; + + if (strncmp(sch_s->prefix, section_names[i], len) == 0) + break; + } + + APP_CHECK(j < (int)RTE_DIM(cfg_file_scheme), + "Parse error: unknown section %s", + section_names[i]); + + APP_CHECK(validate_name(section_names[i], + sch_s->prefix, + sch_s->numbers) == 0, + "Parse error: invalid section name \"%s\"", + section_names[i]); + + sch_s->load(app, section_names[i], cfg); + } + + for (i = 0; i < sect_count; i++) + free(section_names[i]); + + free(section_names); + + rte_cfgfile_close(cfg); + + APP_PARAM_COUNT(app->mempool_params, app->n_mempools); + APP_PARAM_COUNT(app->link_params, app->n_links); + APP_PARAM_COUNT(app->hwq_in_params, app->n_pktq_hwq_in); + APP_PARAM_COUNT(app->hwq_out_params, app->n_pktq_hwq_out); + APP_PARAM_COUNT(app->swq_params, app->n_pktq_swq); + APP_PARAM_COUNT(app->tm_params, app->n_pktq_tm); + APP_PARAM_COUNT(app->source_params, app->n_pktq_source); + APP_PARAM_COUNT(app->sink_params, app->n_pktq_sink); + APP_PARAM_COUNT(app->msgq_params, app->n_msgq); + APP_PARAM_COUNT(app->pipeline_params, app->n_pipelines); + +#ifdef RTE_PORT_PCAP + for (i = 0; i < (int)app->n_pktq_source; i++) { + struct app_pktq_source_params *p = &app->source_params[i]; + + APP_CHECK((p->file_name), "Parse error: missing " + "mandatory field \"pcap_file_rd\" for \"%s\"", + p->name); + } +#else + for (i = 0; i < (int)app->n_pktq_source; i++) { + struct app_pktq_source_params *p = &app->source_params[i]; + + APP_CHECK((!p->file_name), "Parse error: invalid field " + "\"pcap_file_rd\" for \"%s\"", p->name); + } +#endif + + if (app->port_mask == 0) + assign_link_pmd_id_from_pci_bdf(app); + + /* Save configuration to output file */ + app_config_save(app, app->output_file); + + /* Load TM configuration files */ + app_config_parse_tm(app); + + return 0; +} + +static void +save_eal_params(struct app_params *app, FILE *f) +{ + struct app_eal_params *p = &app->eal_params; + uint32_t i; + + fprintf(f, "[EAL]\n"); + + if (p->coremap) + fprintf(f, "%s = %s\n", "lcores", p->coremap); + + if (p->master_lcore_present) + fprintf(f, "%s = %" PRIu32 "\n", + "master_lcore", p->master_lcore); + + fprintf(f, "%s = %" PRIu32 "\n", "n", p->channels); + + if (p->memory_present) + fprintf(f, "%s = %" PRIu32 "\n", "m", p->memory); + + if (p->ranks_present) + fprintf(f, "%s = %" PRIu32 "\n", "r", p->ranks); + + for (i = 0; i < APP_MAX_LINKS; i++) { + if (p->pci_blacklist[i] == NULL) + break; + + fprintf(f, "%s = %s\n", "pci_blacklist", + p->pci_blacklist[i]); + } + + for (i = 0; i < APP_MAX_LINKS; i++) { + if (p->pci_whitelist[i] == NULL) + break; + + fprintf(f, "%s = %s\n", "pci_whitelist", + p->pci_whitelist[i]); + } + + for (i = 0; i < APP_MAX_LINKS; i++) { + if (p->vdev[i] == NULL) + break; + + fprintf(f, "%s = %s\n", "vdev", + p->vdev[i]); + } + + if (p->vmware_tsc_map_present) + fprintf(f, "%s = %s\n", "vmware_tsc_map", + (p->vmware_tsc_map) ? "yes" : "no"); + + if (p->proc_type) + fprintf(f, "%s = %s\n", "proc_type", p->proc_type); + + if (p->syslog) + fprintf(f, "%s = %s\n", "syslog", p->syslog); + + if (p->log_level_present) + fprintf(f, "%s = %" PRIu32 "\n", "log_level", p->log_level); + + if (p->version_present) + fprintf(f, "%s = %s\n", "v", (p->version) ? "yes" : "no"); + + if (p->help_present) + fprintf(f, "%s = %s\n", "help", (p->help) ? "yes" : "no"); + + if (p->no_huge_present) + fprintf(f, "%s = %s\n", "no_huge", (p->no_huge) ? "yes" : "no"); + + if (p->no_pci_present) + fprintf(f, "%s = %s\n", "no_pci", (p->no_pci) ? "yes" : "no"); + + if (p->no_hpet_present) + fprintf(f, "%s = %s\n", "no_hpet", (p->no_hpet) ? "yes" : "no"); + + if (p->no_shconf_present) + fprintf(f, "%s = %s\n", "no_shconf", + (p->no_shconf) ? "yes" : "no"); + + if (p->add_driver) + fprintf(f, "%s = %s\n", "d", p->add_driver); + + if (p->socket_mem) + fprintf(f, "%s = %s\n", "socket_mem", p->socket_mem); + + if (p->huge_dir) + fprintf(f, "%s = %s\n", "huge_dir", p->huge_dir); + + if (p->file_prefix) + fprintf(f, "%s = %s\n", "file_prefix", p->file_prefix); + + if (p->base_virtaddr) + fprintf(f, "%s = %s\n", "base_virtaddr", p->base_virtaddr); + + if (p->create_uio_dev_present) + fprintf(f, "%s = %s\n", "create_uio_dev", + (p->create_uio_dev) ? "yes" : "no"); + + if (p->vfio_intr) + fprintf(f, "%s = %s\n", "vfio_intr", p->vfio_intr); + + if (p->xen_dom0_present) + fprintf(f, "%s = %s\n", "xen_dom0", + (p->xen_dom0) ? "yes" : "no"); + + fputc('\n', f); +} + +static void +save_mempool_params(struct app_params *app, FILE *f) +{ + struct app_mempool_params *p; + size_t i, count; + + count = RTE_DIM(app->mempool_params); + for (i = 0; i < count; i++) { + p = &app->mempool_params[i]; + if (!APP_PARAM_VALID(p)) + continue; + + fprintf(f, "[%s]\n", p->name); + fprintf(f, "%s = %" PRIu32 "\n", "buffer_size", p->buffer_size); + fprintf(f, "%s = %" PRIu32 "\n", "pool_size", p->pool_size); + fprintf(f, "%s = %" PRIu32 "\n", "cache_size", p->cache_size); + fprintf(f, "%s = %" PRIu32 "\n", "cpu", p->cpu_socket_id); + + fputc('\n', f); + } +} + +static void +save_links_params(struct app_params *app, FILE *f) +{ + struct app_link_params *p; + size_t i, count; + + count = RTE_DIM(app->link_params); + for (i = 0; i < count; i++) { + p = &app->link_params[i]; + if (!APP_PARAM_VALID(p)) + continue; + + fprintf(f, "[%s]\n", p->name); + fprintf(f, "; %s = %" PRIu32 "\n", "pmd_id", p->pmd_id); + fprintf(f, "%s = %s\n", "promisc", p->promisc ? "yes" : "no"); + fprintf(f, "%s = %" PRIu32 "\n", "arp_q", p->arp_q); + fprintf(f, "%s = %" PRIu32 "\n", "tcp_syn_q", + p->tcp_syn_q); + fprintf(f, "%s = %" PRIu32 "\n", "ip_local_q", p->ip_local_q); + fprintf(f, "%s = %" PRIu32 "\n", "tcp_local_q", p->tcp_local_q); + fprintf(f, "%s = %" PRIu32 "\n", "udp_local_q", p->udp_local_q); + fprintf(f, "%s = %" PRIu32 "\n", "sctp_local_q", + p->sctp_local_q); + + if (strlen(p->pci_bdf)) + fprintf(f, "%s = %s\n", "pci_bdf", p->pci_bdf); + + fputc('\n', f); + } +} + +static void +save_rxq_params(struct app_params *app, FILE *f) +{ + struct app_pktq_hwq_in_params *p; + size_t i, count; + + count = RTE_DIM(app->hwq_in_params); + for (i = 0; i < count; i++) { + p = &app->hwq_in_params[i]; + if (!APP_PARAM_VALID(p)) + continue; + + fprintf(f, "[%s]\n", p->name); + fprintf(f, "%s = %s\n", + "mempool", + app->mempool_params[p->mempool_id].name); + fprintf(f, "%s = %" PRIu32 "\n", "size", p->size); + fprintf(f, "%s = %" PRIu32 "\n", "burst", p->burst); + + fputc('\n', f); + } +} + +static void +save_txq_params(struct app_params *app, FILE *f) +{ + struct app_pktq_hwq_out_params *p; + size_t i, count; + + count = RTE_DIM(app->hwq_out_params); + for (i = 0; i < count; i++) { + p = &app->hwq_out_params[i]; + if (!APP_PARAM_VALID(p)) + continue; + + fprintf(f, "[%s]\n", p->name); + fprintf(f, "%s = %" PRIu32 "\n", "size", p->size); + fprintf(f, "%s = %" PRIu32 "\n", "burst", p->burst); + fprintf(f, "%s = %s\n", + "dropless", + p->dropless ? "yes" : "no"); + + fputc('\n', f); + } +} + +static void +save_swq_params(struct app_params *app, FILE *f) +{ + struct app_pktq_swq_params *p; + size_t i, count; + + count = RTE_DIM(app->swq_params); + for (i = 0; i < count; i++) { + p = &app->swq_params[i]; + if (!APP_PARAM_VALID(p)) + continue; + + fprintf(f, "[%s]\n", p->name); + fprintf(f, "%s = %" PRIu32 "\n", "size", p->size); + fprintf(f, "%s = %" PRIu32 "\n", "burst_read", p->burst_read); + fprintf(f, "%s = %" PRIu32 "\n", "burst_write", p->burst_write); + fprintf(f, "%s = %s\n", "dropless", p->dropless ? "yes" : "no"); + fprintf(f, "%s = %" PRIu64 "\n", "n_retries", p->n_retries); + fprintf(f, "%s = %" PRIu32 "\n", "cpu", p->cpu_socket_id); + fprintf(f, "%s = %s\n", "ipv4_frag", p->ipv4_frag ? "yes" : "no"); + fprintf(f, "%s = %s\n", "ipv6_frag", p->ipv6_frag ? "yes" : "no"); + fprintf(f, "%s = %s\n", "ipv4_ras", p->ipv4_ras ? "yes" : "no"); + fprintf(f, "%s = %s\n", "ipv6_ras", p->ipv6_ras ? "yes" : "no"); + if ((p->ipv4_frag == 1) || (p->ipv6_frag == 1)) { + fprintf(f, "%s = %" PRIu32 "\n", "mtu", p->mtu); + fprintf(f, "%s = %" PRIu32 "\n", "metadata_size", p->metadata_size); + fprintf(f, "%s = %s\n", + "mempool_direct", + app->mempool_params[p->mempool_direct_id].name); + fprintf(f, "%s = %s\n", + "mempool_indirect", + app->mempool_params[p->mempool_indirect_id].name); + } + + fputc('\n', f); + } +} + +static void +save_tm_params(struct app_params *app, FILE *f) +{ + struct app_pktq_tm_params *p; + size_t i, count; + + count = RTE_DIM(app->tm_params); + for (i = 0; i < count; i++) { + p = &app->tm_params[i]; + if (!APP_PARAM_VALID(p)) + continue; + + fprintf(f, "[%s]\n", p->name); + fprintf(f, "%s = %s\n", "cfg", p->file_name); + fprintf(f, "%s = %" PRIu32 "\n", "burst_read", p->burst_read); + fprintf(f, "%s = %" PRIu32 "\n", "burst_write", p->burst_write); + + fputc('\n', f); + } +} + +static void +save_source_params(struct app_params *app, FILE *f) +{ + struct app_pktq_source_params *p; + size_t i, count; + + count = RTE_DIM(app->source_params); + for (i = 0; i < count; i++) { + p = &app->source_params[i]; + if (!APP_PARAM_VALID(p)) + continue; + + fprintf(f, "[%s]\n", p->name); + fprintf(f, "%s = %s\n", + "mempool", + app->mempool_params[p->mempool_id].name); + fprintf(f, "%s = %" PRIu32 "\n", "burst", p->burst); + fprintf(f, "%s = %s\n", "pcap_file_rd", p->file_name); + fprintf(f, "%s = %" PRIu32 "\n", "pcap_bytes_rd_per_pkt", + p->n_bytes_per_pkt); + fputc('\n', f); + } +} + +static void +save_sink_params(struct app_params *app, FILE *f) +{ + struct app_pktq_sink_params *p; + size_t i, count; + + count = RTE_DIM(app->sink_params); + for (i = 0; i < count; i++) { + p = &app->sink_params[i]; + if (!APP_PARAM_VALID(p)) + continue; + + fprintf(f, "[%s]\n", p->name); + fprintf(f, "%s = %s\n", "pcap_file_wr", p->file_name); + fprintf(f, "%s = %" PRIu32 "\n", + "pcap_n_pkt_wr", p->n_pkts_to_dump); + fputc('\n', f); + } +} + +static void +save_msgq_params(struct app_params *app, FILE *f) +{ + struct app_msgq_params *p; + size_t i, count; + + count = RTE_DIM(app->msgq_params); + for (i = 0; i < count; i++) { + p = &app->msgq_params[i]; + if (!APP_PARAM_VALID(p)) + continue; + + fprintf(f, "[%s]\n", p->name); + fprintf(f, "%s = %" PRIu32 "\n", "size", p->size); + fprintf(f, "%s = %" PRIu32 "\n", "cpu", p->cpu_socket_id); + + fputc('\n', f); + } +} + +static void +save_pipeline_params(struct app_params *app, FILE *f) +{ + size_t i, count; + + count = RTE_DIM(app->pipeline_params); + for (i = 0; i < count; i++) { + struct app_pipeline_params *p = &app->pipeline_params[i]; + + if (!APP_PARAM_VALID(p)) + continue; + + /* section name */ + fprintf(f, "[%s]\n", p->name); + + /* type */ + fprintf(f, "type = %s\n", p->type); + + /* core */ + fprintf(f, "core = s%" PRIu32 "c%" PRIu32 "%s\n", + p->socket_id, + p->core_id, + (p->hyper_th_id) ? "h" : ""); + + /* pktq_in */ + if (p->n_pktq_in) { + uint32_t j; + + fprintf(f, "pktq_in ="); + for (j = 0; j < p->n_pktq_in; j++) { + struct app_pktq_in_params *pp = &p->pktq_in[j]; + char *name; + + switch (pp->type) { + case APP_PKTQ_IN_HWQ: + name = app->hwq_in_params[pp->id].name; + break; + case APP_PKTQ_IN_SWQ: + name = app->swq_params[pp->id].name; + break; + case APP_PKTQ_IN_TM: + name = app->tm_params[pp->id].name; + break; + case APP_PKTQ_IN_SOURCE: + name = app->source_params[pp->id].name; + break; + default: + APP_CHECK(0, "System error " + "occurred while saving " + "parameter to file"); + } + + fprintf(f, " %s", name); + } + fprintf(f, "\n"); + } + + /* pktq_in */ + if (p->n_pktq_out) { + uint32_t j; + + fprintf(f, "pktq_out ="); + for (j = 0; j < p->n_pktq_out; j++) { + struct app_pktq_out_params *pp = + &p->pktq_out[j]; + char *name; + + switch (pp->type) { + case APP_PKTQ_OUT_HWQ: + name = app->hwq_out_params[pp->id].name; + break; + case APP_PKTQ_OUT_SWQ: + name = app->swq_params[pp->id].name; + break; + case APP_PKTQ_OUT_TM: + name = app->tm_params[pp->id].name; + break; + case APP_PKTQ_OUT_SINK: + name = app->sink_params[pp->id].name; + break; + default: + APP_CHECK(0, "System error " + "occurred while saving " + "parameter to file"); + } + + fprintf(f, " %s", name); + } + fprintf(f, "\n"); + } + + /* msgq_in */ + if (p->n_msgq_in) { + uint32_t j; + + fprintf(f, "msgq_in ="); + for (j = 0; j < p->n_msgq_in; j++) { + uint32_t id = p->msgq_in[j]; + char *name = app->msgq_params[id].name; + + fprintf(f, " %s", name); + } + fprintf(f, "\n"); + } + + /* msgq_out */ + if (p->n_msgq_out) { + uint32_t j; + + fprintf(f, "msgq_out ="); + for (j = 0; j < p->n_msgq_out; j++) { + uint32_t id = p->msgq_out[j]; + char *name = app->msgq_params[id].name; + + fprintf(f, " %s", name); + } + fprintf(f, "\n"); + } + + /* timer_period */ + fprintf(f, "timer_period = %" PRIu32 "\n", p->timer_period); + + /* args */ + if (p->n_args) { + uint32_t j; + + for (j = 0; j < p->n_args; j++) + fprintf(f, "%s = %s\n", p->args_name[j], + p->args_value[j]); + } + + fprintf(f, "\n"); + } +} + +void +app_config_save(struct app_params *app, const char *file_name) +{ + FILE *file; + char *name, *dir_name; + int status; + + name = strdup(file_name); + dir_name = dirname(name); + status = access(dir_name, W_OK); + APP_CHECK((status == 0), + "Error: need write access privilege to directory " + "\"%s\" to save configuration\n", dir_name); + + file = fopen(file_name, "w"); + APP_CHECK((file != NULL), + "Error: failed to save configuration to file \"%s\"", + file_name); + + save_eal_params(app, file); + save_pipeline_params(app, file); + save_mempool_params(app, file); + save_links_params(app, file); + save_rxq_params(app, file); + save_txq_params(app, file); + save_swq_params(app, file); + save_tm_params(app, file); + save_source_params(app, file); + save_sink_params(app, file); + save_msgq_params(app, file); + + fclose(file); + free(name); +} + +int +app_config_init(struct app_params *app) +{ + size_t i; + + memcpy(app, &app_params_default, sizeof(struct app_params)); + + for (i = 0; i < RTE_DIM(app->mempool_params); i++) + memcpy(&app->mempool_params[i], + &mempool_params_default, + sizeof(struct app_mempool_params)); + + for (i = 0; i < RTE_DIM(app->link_params); i++) + memcpy(&app->link_params[i], + &link_params_default, + sizeof(struct app_link_params)); + + for (i = 0; i < RTE_DIM(app->hwq_in_params); i++) + memcpy(&app->hwq_in_params[i], + &default_hwq_in_params, + sizeof(default_hwq_in_params)); + + for (i = 0; i < RTE_DIM(app->hwq_out_params); i++) + memcpy(&app->hwq_out_params[i], + &default_hwq_out_params, + sizeof(default_hwq_out_params)); + + for (i = 0; i < RTE_DIM(app->swq_params); i++) + memcpy(&app->swq_params[i], + &default_swq_params, + sizeof(default_swq_params)); + + for (i = 0; i < RTE_DIM(app->tm_params); i++) + memcpy(&app->tm_params[i], + &default_tm_params, + sizeof(default_tm_params)); + + for (i = 0; i < RTE_DIM(app->source_params); i++) + memcpy(&app->source_params[i], + &default_source_params, + sizeof(default_source_params)); + + for (i = 0; i < RTE_DIM(app->sink_params); i++) + memcpy(&app->sink_params[i], + &default_sink_params, + sizeof(default_sink_params)); + + for (i = 0; i < RTE_DIM(app->msgq_params); i++) + memcpy(&app->msgq_params[i], + &default_msgq_params, + sizeof(default_msgq_params)); + + for (i = 0; i < RTE_DIM(app->pipeline_params); i++) + memcpy(&app->pipeline_params[i], + &default_pipeline_params, + sizeof(default_pipeline_params)); + + return 0; +} + +static char * +filenamedup(const char *filename, const char *suffix) +{ + char *s = malloc(strlen(filename) + strlen(suffix) + 1); + + if (!s) + return NULL; + + sprintf(s, "%s%s", filename, suffix); + return s; +} + +int +app_config_args(struct app_params *app, int argc, char **argv) +{ + const char *optname; + int opt, option_index; + int f_present, s_present, p_present, l_present; + int preproc_present, preproc_params_present; + int scaned = 0; + + static struct option lgopts[] = { + { "preproc", 1, 0, 0 }, + { "preproc-args", 1, 0, 0 }, + { NULL, 0, 0, 0 } + }; + + /* Copy application name */ + strncpy(app->app_name, argv[0], APP_APPNAME_SIZE - 1); + + f_present = 0; + s_present = 0; + p_present = 0; + l_present = 0; + preproc_present = 0; + preproc_params_present = 0; + + while ((opt = getopt_long(argc, argv, "f:s:p:l:", lgopts, + &option_index)) != EOF) + switch (opt) { + case 'f': + if (f_present) + rte_panic("Error: Config file is provided " + "more than once\n"); + f_present = 1; + + if (!strlen(optarg)) + rte_panic("Error: Config file name is null\n"); + + app->config_file = strdup(optarg); + if (app->config_file == NULL) + rte_panic("Error: Memory allocation failure\n"); + + break; + + case 's': + if (s_present) + rte_panic("Error: Script file is provided " + "more than once\n"); + s_present = 1; + + if (!strlen(optarg)) + rte_panic("Error: Script file name is null\n"); + + app->script_file = strdup(optarg); + if (app->script_file == NULL) + rte_panic("Error: Memory allocation failure\n"); + + break; + + case 'p': + if (p_present) + rte_panic("Error: PORT_MASK is provided " + "more than once\n"); + p_present = 1; + + if ((sscanf(optarg, "%" SCNx64 "%n", &app->port_mask, + &scaned) != 1) || + ((size_t) scaned != strlen(optarg))) + rte_panic("Error: PORT_MASK is not " + "a hexadecimal integer\n"); + + if (app->port_mask == 0) + rte_panic("Error: PORT_MASK is null\n"); + + break; + + case 'l': + if (l_present) + rte_panic("Error: LOG_LEVEL is provided " + "more than once\n"); + l_present = 1; + + if ((sscanf(optarg, "%" SCNu32 "%n", &app->log_level, + &scaned) != 1) || + ((size_t) scaned != strlen(optarg)) || + (app->log_level >= APP_LOG_LEVELS)) + rte_panic("Error: LOG_LEVEL invalid value\n"); + + break; + + case 0: + optname = lgopts[option_index].name; + + if (strcmp(optname, "preproc") == 0) { + if (preproc_present) + rte_panic("Error: Preprocessor argument " + "is provided more than once\n"); + preproc_present = 1; + + app->preproc = strdup(optarg); + break; + } + + if (strcmp(optname, "preproc-args") == 0) { + if (preproc_params_present) + rte_panic("Error: Preprocessor args " + "are provided more than once\n"); + preproc_params_present = 1; + + app->preproc_args = strdup(optarg); + break; + } + + app_print_usage(argv[0]); + break; + + default: + app_print_usage(argv[0]); + } + + optind = 0; /* reset getopt lib */ + + /* Check dependencies between args */ + if (preproc_params_present && (preproc_present == 0)) + rte_panic("Error: Preprocessor args specified while " + "preprocessor is not defined\n"); + + app->parser_file = preproc_present ? + filenamedup(app->config_file, ".preproc") : + strdup(app->config_file); + app->output_file = filenamedup(app->config_file, ".out"); + + return 0; +} + +int +app_config_preproc(struct app_params *app) +{ + char buffer[256]; + int status; + + if (app->preproc == NULL) + return 0; + + status = access(app->config_file, F_OK | R_OK); + APP_CHECK((status == 0), "Error: Unable to open file %s", + app->config_file); + + snprintf(buffer, sizeof(buffer), "%s %s %s > %s", + app->preproc, + app->preproc_args ? app->preproc_args : "", + app->config_file, + app->parser_file); + + status = system(buffer); + APP_CHECK((WIFEXITED(status) && (WEXITSTATUS(status) == 0)), + "Error occurred while pre-processing file \"%s\"\n", + app->config_file); + + return status; +} diff --git a/examples/ip_pipeline/config_parse_tm.c b/examples/ip_pipeline/config_parse_tm.c new file mode 100644 index 00000000..e75eed71 --- /dev/null +++ b/examples/ip_pipeline/config_parse_tm.c @@ -0,0 +1,448 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include <stdint.h> +#include <stdlib.h> +#include <stdio.h> +#include <ctype.h> +#include <getopt.h> +#include <errno.h> +#include <stdarg.h> +#include <string.h> +#include <libgen.h> +#include <unistd.h> + +#include <rte_errno.h> +#include <rte_cfgfile.h> +#include <rte_string_fns.h> + +#include "app.h" + +static int +tm_cfgfile_load_sched_port( + struct rte_cfgfile *file, + struct rte_sched_port_params *port_params) +{ + const char *entry; + int j; + + entry = rte_cfgfile_get_entry(file, "port", "frame overhead"); + if (entry) + port_params->frame_overhead = (uint32_t)atoi(entry); + + entry = rte_cfgfile_get_entry(file, "port", "mtu"); + if (entry) + port_params->mtu = (uint32_t)atoi(entry); + + entry = rte_cfgfile_get_entry(file, + "port", + "number of subports per port"); + if (entry) + port_params->n_subports_per_port = (uint32_t) atoi(entry); + + entry = rte_cfgfile_get_entry(file, + "port", + "number of pipes per subport"); + if (entry) + port_params->n_pipes_per_subport = (uint32_t) atoi(entry); + + entry = rte_cfgfile_get_entry(file, "port", "queue sizes"); + if (entry) { + char *next; + + for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) { + port_params->qsize[j] = (uint16_t) + strtol(entry, &next, 10); + if (next == NULL) + break; + entry = next; + } + } + +#ifdef RTE_SCHED_RED + for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) { + char str[32]; + + /* Parse WRED min thresholds */ + snprintf(str, sizeof(str), "tc %" PRId32 " wred min", j); + entry = rte_cfgfile_get_entry(file, "red", str); + if (entry) { + char *next; + int k; + + /* for each packet colour (green, yellow, red) */ + for (k = 0; k < e_RTE_METER_COLORS; k++) { + port_params->red_params[j][k].min_th + = (uint16_t)strtol(entry, &next, 10); + if (next == NULL) + break; + entry = next; + } + } + + /* Parse WRED max thresholds */ + snprintf(str, sizeof(str), "tc %" PRId32 " wred max", j); + entry = rte_cfgfile_get_entry(file, "red", str); + if (entry) { + char *next; + int k; + + /* for each packet colour (green, yellow, red) */ + for (k = 0; k < e_RTE_METER_COLORS; k++) { + port_params->red_params[j][k].max_th + = (uint16_t)strtol(entry, &next, 10); + if (next == NULL) + break; + entry = next; + } + } + + /* Parse WRED inverse mark probabilities */ + snprintf(str, sizeof(str), "tc %" PRId32 " wred inv prob", j); + entry = rte_cfgfile_get_entry(file, "red", str); + if (entry) { + char *next; + int k; + + /* for each packet colour (green, yellow, red) */ + for (k = 0; k < e_RTE_METER_COLORS; k++) { + port_params->red_params[j][k].maxp_inv + = (uint8_t)strtol(entry, &next, 10); + + if (next == NULL) + break; + entry = next; + } + } + + /* Parse WRED EWMA filter weights */ + snprintf(str, sizeof(str), "tc %" PRId32 " wred weight", j); + entry = rte_cfgfile_get_entry(file, "red", str); + if (entry) { + char *next; + int k; + + /* for each packet colour (green, yellow, red) */ + for (k = 0; k < e_RTE_METER_COLORS; k++) { + port_params->red_params[j][k].wq_log2 + = (uint8_t)strtol(entry, &next, 10); + if (next == NULL) + break; + entry = next; + } + } + } +#endif /* RTE_SCHED_RED */ + + return 0; +} + +static int +tm_cfgfile_load_sched_pipe( + struct rte_cfgfile *file, + struct rte_sched_port_params *port_params, + struct rte_sched_pipe_params *pipe_params) +{ + int i, j; + char *next; + const char *entry; + int profiles; + + profiles = rte_cfgfile_num_sections(file, + "pipe profile", sizeof("pipe profile") - 1); + port_params->n_pipe_profiles = profiles; + + for (j = 0; j < profiles; j++) { + char pipe_name[32]; + + snprintf(pipe_name, sizeof(pipe_name), + "pipe profile %" PRId32, j); + + entry = rte_cfgfile_get_entry(file, pipe_name, "tb rate"); + if (entry) + pipe_params[j].tb_rate = (uint32_t) atoi(entry); + + entry = rte_cfgfile_get_entry(file, pipe_name, "tb size"); + if (entry) + pipe_params[j].tb_size = (uint32_t) atoi(entry); + + entry = rte_cfgfile_get_entry(file, pipe_name, "tc period"); + if (entry) + pipe_params[j].tc_period = (uint32_t) atoi(entry); + + entry = rte_cfgfile_get_entry(file, pipe_name, "tc 0 rate"); + if (entry) + pipe_params[j].tc_rate[0] = (uint32_t) atoi(entry); + + entry = rte_cfgfile_get_entry(file, pipe_name, "tc 1 rate"); + if (entry) + pipe_params[j].tc_rate[1] = (uint32_t) atoi(entry); + + entry = rte_cfgfile_get_entry(file, pipe_name, "tc 2 rate"); + if (entry) + pipe_params[j].tc_rate[2] = (uint32_t) atoi(entry); + + entry = rte_cfgfile_get_entry(file, pipe_name, "tc 3 rate"); + if (entry) + pipe_params[j].tc_rate[3] = (uint32_t) atoi(entry); + +#ifdef RTE_SCHED_SUBPORT_TC_OV + entry = rte_cfgfile_get_entry(file, pipe_name, + "tc 3 oversubscription weight"); + if (entry) + pipe_params[j].tc_ov_weight = (uint8_t)atoi(entry); +#endif + + entry = rte_cfgfile_get_entry(file, + pipe_name, + "tc 0 wrr weights"); + if (entry) + for (i = 0; i < RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; i++) { + pipe_params[j].wrr_weights[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE*0 + i] = + (uint8_t) strtol(entry, &next, 10); + if (next == NULL) + break; + entry = next; + } + + entry = rte_cfgfile_get_entry(file, pipe_name, "tc 1 wrr weights"); + if (entry) + for (i = 0; i < RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; i++) { + pipe_params[j].wrr_weights[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE*1 + i] = + (uint8_t) strtol(entry, &next, 10); + if (next == NULL) + break; + entry = next; + } + + entry = rte_cfgfile_get_entry(file, pipe_name, "tc 2 wrr weights"); + if (entry) + for (i = 0; i < RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; i++) { + pipe_params[j].wrr_weights[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE*2 + i] = + (uint8_t) strtol(entry, &next, 10); + if (next == NULL) + break; + entry = next; + } + + entry = rte_cfgfile_get_entry(file, pipe_name, "tc 3 wrr weights"); + if (entry) + for (i = 0; i < RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; i++) { + pipe_params[j].wrr_weights[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE*3 + i] = + (uint8_t) strtol(entry, &next, 10); + if (next == NULL) + break; + entry = next; + } + } + return 0; +} + +static int +tm_cfgfile_load_sched_subport( + struct rte_cfgfile *file, + struct rte_sched_subport_params *subport_params, + int *pipe_to_profile) +{ + const char *entry; + int i, j, k; + + for (i = 0; i < APP_MAX_SCHED_SUBPORTS; i++) { + char sec_name[CFG_NAME_LEN]; + + snprintf(sec_name, sizeof(sec_name), + "subport %" PRId32, i); + + if (rte_cfgfile_has_section(file, sec_name)) { + entry = rte_cfgfile_get_entry(file, + sec_name, + "tb rate"); + if (entry) + subport_params[i].tb_rate = + (uint32_t) atoi(entry); + + entry = rte_cfgfile_get_entry(file, + sec_name, + "tb size"); + if (entry) + subport_params[i].tb_size = + (uint32_t) atoi(entry); + + entry = rte_cfgfile_get_entry(file, + sec_name, + "tc period"); + if (entry) + subport_params[i].tc_period = + (uint32_t) atoi(entry); + + entry = rte_cfgfile_get_entry(file, + sec_name, + "tc 0 rate"); + if (entry) + subport_params[i].tc_rate[0] = + (uint32_t) atoi(entry); + + entry = rte_cfgfile_get_entry(file, + sec_name, + "tc 1 rate"); + if (entry) + subport_params[i].tc_rate[1] = + (uint32_t) atoi(entry); + + entry = rte_cfgfile_get_entry(file, + sec_name, + "tc 2 rate"); + if (entry) + subport_params[i].tc_rate[2] = + (uint32_t) atoi(entry); + + entry = rte_cfgfile_get_entry(file, + sec_name, + "tc 3 rate"); + if (entry) + subport_params[i].tc_rate[3] = + (uint32_t) atoi(entry); + + int n_entries = rte_cfgfile_section_num_entries(file, + sec_name); + struct rte_cfgfile_entry entries[n_entries]; + + rte_cfgfile_section_entries(file, + sec_name, + entries, + n_entries); + + for (j = 0; j < n_entries; j++) + if (strncmp("pipe", + entries[j].name, + sizeof("pipe") - 1) == 0) { + int profile; + char *tokens[2] = {NULL, NULL}; + int n_tokens; + int begin, end; + char name[CFG_NAME_LEN + 1]; + + profile = atoi(entries[j].value); + strncpy(name, + entries[j].name, + sizeof(name)); + n_tokens = rte_strsplit( + &name[sizeof("pipe")], + strnlen(name, CFG_NAME_LEN), + tokens, 2, '-'); + + begin = atoi(tokens[0]); + if (n_tokens == 2) + end = atoi(tokens[1]); + else + end = begin; + + if ((end >= APP_MAX_SCHED_PIPES) || + (begin > end)) + return -1; + + for (k = begin; k <= end; k++) { + char profile_name[CFG_NAME_LEN]; + + snprintf(profile_name, + sizeof(profile_name), + "pipe profile %" PRId32, + profile); + if (rte_cfgfile_has_section(file, profile_name)) + pipe_to_profile[i * APP_MAX_SCHED_PIPES + k] = profile; + else + rte_exit(EXIT_FAILURE, + "Wrong pipe profile %s\n", + entries[j].value); + } + } + } + } + + return 0; +} + +static int +tm_cfgfile_load(struct app_pktq_tm_params *tm) +{ + struct rte_cfgfile *file; + uint32_t i; + + memset(tm->sched_subport_params, 0, sizeof(tm->sched_subport_params)); + memset(tm->sched_pipe_profiles, 0, sizeof(tm->sched_pipe_profiles)); + memset(&tm->sched_port_params, 0, sizeof(tm->sched_port_params)); + for (i = 0; i < APP_MAX_SCHED_SUBPORTS * APP_MAX_SCHED_PIPES; i++) + tm->sched_pipe_to_profile[i] = -1; + + tm->sched_port_params.pipe_profiles = &tm->sched_pipe_profiles[0]; + + if (tm->file_name[0] == '\0') + return -1; + + file = rte_cfgfile_load(tm->file_name, 0); + if (file == NULL) + return -1; + + tm_cfgfile_load_sched_port(file, + &tm->sched_port_params); + tm_cfgfile_load_sched_subport(file, + tm->sched_subport_params, + tm->sched_pipe_to_profile); + tm_cfgfile_load_sched_pipe(file, + &tm->sched_port_params, + tm->sched_pipe_profiles); + + rte_cfgfile_close(file); + return 0; +} + +int +app_config_parse_tm(struct app_params *app) +{ + uint32_t i; + + for (i = 0; i < RTE_DIM(app->tm_params); i++) { + struct app_pktq_tm_params *p = &app->tm_params[i]; + int status; + + if (!APP_PARAM_VALID(p)) + break; + + status = tm_cfgfile_load(p); + APP_CHECK(status == 0, + "Parse error for %s configuration file \"%s\"\n", + p->name, + p->file_name); + } + + return 0; +} diff --git a/examples/ip_pipeline/cpu_core_map.c b/examples/ip_pipeline/cpu_core_map.c new file mode 100644 index 00000000..cb088b1c --- /dev/null +++ b/examples/ip_pipeline/cpu_core_map.c @@ -0,0 +1,492 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <inttypes.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#include <rte_lcore.h> + +#include "cpu_core_map.h" + +struct cpu_core_map { + uint32_t n_max_sockets; + uint32_t n_max_cores_per_socket; + uint32_t n_max_ht_per_core; + uint32_t n_sockets; + uint32_t n_cores_per_socket; + uint32_t n_ht_per_core; + int map[0]; +}; + +static inline uint32_t +cpu_core_map_pos(struct cpu_core_map *map, + uint32_t socket_id, + uint32_t core_id, + uint32_t ht_id) +{ + return (socket_id * map->n_max_cores_per_socket + core_id) * + map->n_max_ht_per_core + ht_id; +} + +static int +cpu_core_map_compute_eal(struct cpu_core_map *map); + +static int +cpu_core_map_compute_linux(struct cpu_core_map *map); + +static int +cpu_core_map_compute_and_check(struct cpu_core_map *map); + +struct cpu_core_map * +cpu_core_map_init(uint32_t n_max_sockets, + uint32_t n_max_cores_per_socket, + uint32_t n_max_ht_per_core, + uint32_t eal_initialized) +{ + uint32_t map_size, map_mem_size, i; + struct cpu_core_map *map; + int status; + + /* Check input arguments */ + if ((n_max_sockets == 0) || + (n_max_cores_per_socket == 0) || + (n_max_ht_per_core == 0)) + return NULL; + + /* Memory allocation */ + map_size = n_max_sockets * n_max_cores_per_socket * n_max_ht_per_core; + map_mem_size = sizeof(struct cpu_core_map) + map_size * sizeof(int); + map = (struct cpu_core_map *) malloc(map_mem_size); + if (map == NULL) + return NULL; + + /* Initialization */ + map->n_max_sockets = n_max_sockets; + map->n_max_cores_per_socket = n_max_cores_per_socket; + map->n_max_ht_per_core = n_max_ht_per_core; + map->n_sockets = 0; + map->n_cores_per_socket = 0; + map->n_ht_per_core = 0; + + for (i = 0; i < map_size; i++) + map->map[i] = -1; + + status = (eal_initialized) ? + cpu_core_map_compute_eal(map) : + cpu_core_map_compute_linux(map); + + if (status) { + free(map); + return NULL; + } + + status = cpu_core_map_compute_and_check(map); + if (status) { + free(map); + return NULL; + } + + return map; +} + +int +cpu_core_map_compute_eal(struct cpu_core_map *map) +{ + uint32_t socket_id, core_id, ht_id; + + /* Compute map */ + for (socket_id = 0; socket_id < map->n_max_sockets; socket_id++) { + uint32_t n_detected, core_id_contig; + int lcore_id; + + n_detected = 0; + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { + struct lcore_config *p = &lcore_config[lcore_id]; + + if ((p->detected) && (p->socket_id == socket_id)) + n_detected++; + } + + core_id_contig = 0; + + for (core_id = 0; n_detected ; core_id++) { + ht_id = 0; + + for (lcore_id = 0; + lcore_id < RTE_MAX_LCORE; + lcore_id++) { + struct lcore_config *p = + &lcore_config[lcore_id]; + + if ((p->detected) && + (p->socket_id == socket_id) && + (p->core_id == core_id)) { + uint32_t pos = cpu_core_map_pos(map, + socket_id, + core_id_contig, + ht_id); + + map->map[pos] = lcore_id; + ht_id++; + n_detected--; + } + } + + if (ht_id) { + core_id_contig++; + if (core_id_contig == + map->n_max_cores_per_socket) + return -1; + } + } + } + + return 0; +} + +int +cpu_core_map_compute_and_check(struct cpu_core_map *map) +{ + uint32_t socket_id, core_id, ht_id; + + /* Compute n_ht_per_core, n_cores_per_socket, n_sockets */ + for (ht_id = 0; ht_id < map->n_max_ht_per_core; ht_id++) { + if (map->map[ht_id] == -1) + break; + + map->n_ht_per_core++; + } + + if (map->n_ht_per_core == 0) + return -1; + + for (core_id = 0; core_id < map->n_max_cores_per_socket; core_id++) { + uint32_t pos = core_id * map->n_max_ht_per_core; + + if (map->map[pos] == -1) + break; + + map->n_cores_per_socket++; + } + + if (map->n_cores_per_socket == 0) + return -1; + + for (socket_id = 0; socket_id < map->n_max_sockets; socket_id++) { + uint32_t pos = socket_id * map->n_max_cores_per_socket * + map->n_max_ht_per_core; + + if (map->map[pos] == -1) + break; + + map->n_sockets++; + } + + if (map->n_sockets == 0) + return -1; + + /* Check that each socket has exactly the same number of cores + and that each core has exactly the same number of hyper-threads */ + for (socket_id = 0; socket_id < map->n_sockets; socket_id++) { + for (core_id = 0; core_id < map->n_cores_per_socket; core_id++) + for (ht_id = 0; + ht_id < map->n_max_ht_per_core; + ht_id++) { + uint32_t pos = (socket_id * + map->n_max_cores_per_socket + core_id) * + map->n_max_ht_per_core + ht_id; + + if (((ht_id < map->n_ht_per_core) && + (map->map[pos] == -1)) || + ((ht_id >= map->n_ht_per_core) && + (map->map[pos] != -1))) + return -1; + } + + for ( ; core_id < map->n_max_cores_per_socket; core_id++) + for (ht_id = 0; + ht_id < map->n_max_ht_per_core; + ht_id++) { + uint32_t pos = cpu_core_map_pos(map, + socket_id, + core_id, + ht_id); + + if (map->map[pos] != -1) + return -1; + } + } + + return 0; +} + +#define FILE_LINUX_CPU_N_LCORES \ + "/sys/devices/system/cpu/present" + +static int +cpu_core_map_get_n_lcores_linux(void) +{ + char buffer[64], *string; + FILE *fd; + + fd = fopen(FILE_LINUX_CPU_N_LCORES, "r"); + if (fd == NULL) + return -1; + + if (fgets(buffer, sizeof(buffer), fd) == NULL) { + fclose(fd); + return -1; + } + + fclose(fd); + + string = index(buffer, '-'); + if (string == NULL) + return -1; + + return atoi(++string) + 1; +} + +#define FILE_LINUX_CPU_CORE_ID \ + "/sys/devices/system/cpu/cpu%" PRIu32 "/topology/core_id" + +static int +cpu_core_map_get_core_id_linux(int lcore_id) +{ + char buffer[64]; + FILE *fd; + int core_id; + + snprintf(buffer, sizeof(buffer), FILE_LINUX_CPU_CORE_ID, lcore_id); + fd = fopen(buffer, "r"); + if (fd == NULL) + return -1; + + if (fgets(buffer, sizeof(buffer), fd) == NULL) { + fclose(fd); + return -1; + } + + fclose(fd); + + core_id = atoi(buffer); + return core_id; +} + +#define FILE_LINUX_CPU_SOCKET_ID \ + "/sys/devices/system/cpu/cpu%" PRIu32 "/topology/physical_package_id" + +static int +cpu_core_map_get_socket_id_linux(int lcore_id) +{ + char buffer[64]; + FILE *fd; + int socket_id; + + snprintf(buffer, sizeof(buffer), FILE_LINUX_CPU_SOCKET_ID, lcore_id); + fd = fopen(buffer, "r"); + if (fd == NULL) + return -1; + + if (fgets(buffer, sizeof(buffer), fd) == NULL) { + fclose(fd); + return -1; + } + + fclose(fd); + + socket_id = atoi(buffer); + return socket_id; +} + +int +cpu_core_map_compute_linux(struct cpu_core_map *map) +{ + uint32_t socket_id, core_id, ht_id; + int n_lcores; + + n_lcores = cpu_core_map_get_n_lcores_linux(); + if (n_lcores <= 0) + return -1; + + /* Compute map */ + for (socket_id = 0; socket_id < map->n_max_sockets; socket_id++) { + uint32_t n_detected, core_id_contig; + int lcore_id; + + n_detected = 0; + for (lcore_id = 0; lcore_id < n_lcores; lcore_id++) { + int lcore_socket_id = + cpu_core_map_get_socket_id_linux(lcore_id); + + if (lcore_socket_id < 0) + return -1; + + if (((uint32_t) lcore_socket_id) == socket_id) + n_detected++; + } + + core_id_contig = 0; + + for (core_id = 0; n_detected ; core_id++) { + ht_id = 0; + + for (lcore_id = 0; lcore_id < n_lcores; lcore_id++) { + int lcore_socket_id = + cpu_core_map_get_socket_id_linux( + lcore_id); + + if (lcore_socket_id < 0) + return -1; + + int lcore_core_id = + cpu_core_map_get_core_id_linux( + lcore_id); + + if (lcore_core_id < 0) + return -1; + + if (((uint32_t) lcore_socket_id == socket_id) && + ((uint32_t) lcore_core_id == core_id)) { + uint32_t pos = cpu_core_map_pos(map, + socket_id, + core_id_contig, + ht_id); + + map->map[pos] = lcore_id; + ht_id++; + n_detected--; + } + } + + if (ht_id) { + core_id_contig++; + if (core_id_contig == + map->n_max_cores_per_socket) + return -1; + } + } + } + + return 0; +} + +void +cpu_core_map_print(struct cpu_core_map *map) +{ + uint32_t socket_id, core_id, ht_id; + + if (map == NULL) + return; + + for (socket_id = 0; socket_id < map->n_sockets; socket_id++) { + printf("Socket %" PRIu32 ":\n", socket_id); + + for (core_id = 0; + core_id < map->n_cores_per_socket; + core_id++) { + printf("[%" PRIu32 "] = [", core_id); + + for (ht_id = 0; ht_id < map->n_ht_per_core; ht_id++) { + int lcore_id = cpu_core_map_get_lcore_id(map, + socket_id, + core_id, + ht_id); + + uint32_t core_id_noncontig = + cpu_core_map_get_core_id_linux( + lcore_id); + + printf(" %" PRId32 " (%" PRIu32 ") ", + lcore_id, + core_id_noncontig); + } + + printf("]\n"); + } + } +} + +uint32_t +cpu_core_map_get_n_sockets(struct cpu_core_map *map) +{ + if (map == NULL) + return 0; + + return map->n_sockets; +} + +uint32_t +cpu_core_map_get_n_cores_per_socket(struct cpu_core_map *map) +{ + if (map == NULL) + return 0; + + return map->n_cores_per_socket; +} + +uint32_t +cpu_core_map_get_n_ht_per_core(struct cpu_core_map *map) +{ + if (map == NULL) + return 0; + + return map->n_ht_per_core; +} + +int +cpu_core_map_get_lcore_id(struct cpu_core_map *map, + uint32_t socket_id, + uint32_t core_id, + uint32_t ht_id) +{ + uint32_t pos; + + if ((map == NULL) || + (socket_id >= map->n_sockets) || + (core_id >= map->n_cores_per_socket) || + (ht_id >= map->n_ht_per_core)) + return -1; + + pos = cpu_core_map_pos(map, socket_id, core_id, ht_id); + + return map->map[pos]; +} + +void +cpu_core_map_free(struct cpu_core_map *map) +{ + free(map); +} diff --git a/examples/ip_pipeline/cpu_core_map.h b/examples/ip_pipeline/cpu_core_map.h new file mode 100644 index 00000000..5c2ec729 --- /dev/null +++ b/examples/ip_pipeline/cpu_core_map.h @@ -0,0 +1,69 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_CPU_CORE_MAP_H__ +#define __INCLUDE_CPU_CORE_MAP_H__ + +#include <stdio.h> + +#include <rte_lcore.h> + +struct cpu_core_map; + +struct cpu_core_map * +cpu_core_map_init(uint32_t n_max_sockets, + uint32_t n_max_cores_per_socket, + uint32_t n_max_ht_per_core, + uint32_t eal_initialized); + +uint32_t +cpu_core_map_get_n_sockets(struct cpu_core_map *map); + +uint32_t +cpu_core_map_get_n_cores_per_socket(struct cpu_core_map *map); + +uint32_t +cpu_core_map_get_n_ht_per_core(struct cpu_core_map *map); + +int +cpu_core_map_get_lcore_id(struct cpu_core_map *map, + uint32_t socket_id, + uint32_t core_id, + uint32_t ht_id); + +void cpu_core_map_print(struct cpu_core_map *map); + +void +cpu_core_map_free(struct cpu_core_map *map); + +#endif diff --git a/examples/ip_pipeline/init.c b/examples/ip_pipeline/init.c new file mode 100644 index 00000000..83422e88 --- /dev/null +++ b/examples/ip_pipeline/init.c @@ -0,0 +1,1637 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <inttypes.h> +#include <stdio.h> +#include <string.h> + +#include <rte_cycles.h> +#include <rte_ethdev.h> +#include <rte_ether.h> +#include <rte_ip.h> +#include <rte_eal.h> +#include <rte_malloc.h> + +#include "app.h" +#include "pipeline.h" +#include "pipeline_common_fe.h" +#include "pipeline_master.h" +#include "pipeline_passthrough.h" +#include "pipeline_firewall.h" +#include "pipeline_flow_classification.h" +#include "pipeline_flow_actions.h" +#include "pipeline_routing.h" +#include "thread_fe.h" + +#define APP_NAME_SIZE 32 + +static void +app_init_core_map(struct app_params *app) +{ + APP_LOG(app, HIGH, "Initializing CPU core map ..."); + app->core_map = cpu_core_map_init(4, 32, 4, 0); + + if (app->core_map == NULL) + rte_panic("Cannot create CPU core map\n"); + + if (app->log_level >= APP_LOG_LEVEL_LOW) + cpu_core_map_print(app->core_map); +} + +static void +app_init_core_mask(struct app_params *app) +{ + uint64_t mask = 0; + uint32_t i; + + for (i = 0; i < app->n_pipelines; i++) { + struct app_pipeline_params *p = &app->pipeline_params[i]; + int lcore_id; + + lcore_id = cpu_core_map_get_lcore_id(app->core_map, + p->socket_id, + p->core_id, + p->hyper_th_id); + + if (lcore_id < 0) + rte_panic("Cannot create CPU core mask\n"); + + mask |= 1LLU << lcore_id; + } + + app->core_mask = mask; + APP_LOG(app, HIGH, "CPU core mask = 0x%016" PRIx64, app->core_mask); +} + +static void +app_init_eal(struct app_params *app) +{ + char buffer[256]; + struct app_eal_params *p = &app->eal_params; + uint32_t n_args = 0; + uint32_t i; + int status; + + app->eal_argv[n_args++] = strdup(app->app_name); + + snprintf(buffer, sizeof(buffer), "-c%" PRIx64, app->core_mask); + app->eal_argv[n_args++] = strdup(buffer); + + if (p->coremap) { + snprintf(buffer, sizeof(buffer), "--lcores=%s", p->coremap); + app->eal_argv[n_args++] = strdup(buffer); + } + + if (p->master_lcore_present) { + snprintf(buffer, + sizeof(buffer), + "--master-lcore=%" PRIu32, + p->master_lcore); + app->eal_argv[n_args++] = strdup(buffer); + } + + snprintf(buffer, sizeof(buffer), "-n%" PRIu32, p->channels); + app->eal_argv[n_args++] = strdup(buffer); + + if (p->memory_present) { + snprintf(buffer, sizeof(buffer), "-m%" PRIu32, p->memory); + app->eal_argv[n_args++] = strdup(buffer); + } + + if (p->ranks_present) { + snprintf(buffer, sizeof(buffer), "-r%" PRIu32, p->ranks); + app->eal_argv[n_args++] = strdup(buffer); + } + + for (i = 0; i < APP_MAX_LINKS; i++) { + if (p->pci_blacklist[i] == NULL) + break; + + snprintf(buffer, + sizeof(buffer), + "--pci-blacklist=%s", + p->pci_blacklist[i]); + app->eal_argv[n_args++] = strdup(buffer); + } + + if (app->port_mask != 0) + for (i = 0; i < APP_MAX_LINKS; i++) { + if (p->pci_whitelist[i] == NULL) + break; + + snprintf(buffer, + sizeof(buffer), + "--pci-whitelist=%s", + p->pci_whitelist[i]); + app->eal_argv[n_args++] = strdup(buffer); + } + else + for (i = 0; i < app->n_links; i++) { + char *pci_bdf = app->link_params[i].pci_bdf; + + snprintf(buffer, + sizeof(buffer), + "--pci-whitelist=%s", + pci_bdf); + app->eal_argv[n_args++] = strdup(buffer); + } + + for (i = 0; i < APP_MAX_LINKS; i++) { + if (p->vdev[i] == NULL) + break; + + snprintf(buffer, + sizeof(buffer), + "--vdev=%s", + p->vdev[i]); + app->eal_argv[n_args++] = strdup(buffer); + } + + if ((p->vmware_tsc_map_present) && p->vmware_tsc_map) { + snprintf(buffer, sizeof(buffer), "--vmware-tsc-map"); + app->eal_argv[n_args++] = strdup(buffer); + } + + if (p->proc_type) { + snprintf(buffer, + sizeof(buffer), + "--proc-type=%s", + p->proc_type); + app->eal_argv[n_args++] = strdup(buffer); + } + + if (p->syslog) { + snprintf(buffer, sizeof(buffer), "--syslog=%s", p->syslog); + app->eal_argv[n_args++] = strdup(buffer); + } + + if (p->log_level_present) { + snprintf(buffer, + sizeof(buffer), + "--log-level=%" PRIu32, + p->log_level); + app->eal_argv[n_args++] = strdup(buffer); + } + + if ((p->version_present) && p->version) { + snprintf(buffer, sizeof(buffer), "-v"); + app->eal_argv[n_args++] = strdup(buffer); + } + + if ((p->help_present) && p->help) { + snprintf(buffer, sizeof(buffer), "--help"); + app->eal_argv[n_args++] = strdup(buffer); + } + + if ((p->no_huge_present) && p->no_huge) { + snprintf(buffer, sizeof(buffer), "--no-huge"); + app->eal_argv[n_args++] = strdup(buffer); + } + + if ((p->no_pci_present) && p->no_pci) { + snprintf(buffer, sizeof(buffer), "--no-pci"); + app->eal_argv[n_args++] = strdup(buffer); + } + + if ((p->no_hpet_present) && p->no_hpet) { + snprintf(buffer, sizeof(buffer), "--no-hpet"); + app->eal_argv[n_args++] = strdup(buffer); + } + + if ((p->no_shconf_present) && p->no_shconf) { + snprintf(buffer, sizeof(buffer), "--no-shconf"); + app->eal_argv[n_args++] = strdup(buffer); + } + + if (p->add_driver) { + snprintf(buffer, sizeof(buffer), "-d=%s", p->add_driver); + app->eal_argv[n_args++] = strdup(buffer); + } + + if (p->socket_mem) { + snprintf(buffer, + sizeof(buffer), + "--socket-mem=%s", + p->socket_mem); + app->eal_argv[n_args++] = strdup(buffer); + } + + if (p->huge_dir) { + snprintf(buffer, sizeof(buffer), "--huge-dir=%s", p->huge_dir); + app->eal_argv[n_args++] = strdup(buffer); + } + + if (p->file_prefix) { + snprintf(buffer, + sizeof(buffer), + "--file-prefix=%s", + p->file_prefix); + app->eal_argv[n_args++] = strdup(buffer); + } + + if (p->base_virtaddr) { + snprintf(buffer, + sizeof(buffer), + "--base-virtaddr=%s", + p->base_virtaddr); + app->eal_argv[n_args++] = strdup(buffer); + } + + if ((p->create_uio_dev_present) && p->create_uio_dev) { + snprintf(buffer, sizeof(buffer), "--create-uio-dev"); + app->eal_argv[n_args++] = strdup(buffer); + } + + if (p->vfio_intr) { + snprintf(buffer, + sizeof(buffer), + "--vfio-intr=%s", + p->vfio_intr); + app->eal_argv[n_args++] = strdup(buffer); + } + + if ((p->xen_dom0_present) && (p->xen_dom0)) { + snprintf(buffer, sizeof(buffer), "--xen-dom0"); + app->eal_argv[n_args++] = strdup(buffer); + } + + snprintf(buffer, sizeof(buffer), "--"); + app->eal_argv[n_args++] = strdup(buffer); + + app->eal_argc = n_args; + + APP_LOG(app, HIGH, "Initializing EAL ..."); + if (app->log_level >= APP_LOG_LEVEL_LOW) { + int i; + + fprintf(stdout, "[APP] EAL arguments: \""); + for (i = 1; i < app->eal_argc; i++) + fprintf(stdout, "%s ", app->eal_argv[i]); + fprintf(stdout, "\"\n"); + } + + status = rte_eal_init(app->eal_argc, app->eal_argv); + if (status < 0) + rte_panic("EAL init error\n"); +} + +static void +app_init_mempool(struct app_params *app) +{ + uint32_t i; + + for (i = 0; i < app->n_mempools; i++) { + struct app_mempool_params *p = &app->mempool_params[i]; + + APP_LOG(app, HIGH, "Initializing %s ...", p->name); + app->mempool[i] = rte_mempool_create( + p->name, + p->pool_size, + p->buffer_size, + p->cache_size, + sizeof(struct rte_pktmbuf_pool_private), + rte_pktmbuf_pool_init, NULL, + rte_pktmbuf_init, NULL, + p->cpu_socket_id, + 0); + + if (app->mempool[i] == NULL) + rte_panic("%s init error\n", p->name); + } +} + +static inline int +app_link_filter_arp_add(struct app_link_params *link) +{ + struct rte_eth_ethertype_filter filter = { + .ether_type = ETHER_TYPE_ARP, + .flags = 0, + .queue = link->arp_q, + }; + + return rte_eth_dev_filter_ctrl(link->pmd_id, + RTE_ETH_FILTER_ETHERTYPE, + RTE_ETH_FILTER_ADD, + &filter); +} + +static inline int +app_link_filter_tcp_syn_add(struct app_link_params *link) +{ + struct rte_eth_syn_filter filter = { + .hig_pri = 1, + .queue = link->tcp_syn_q, + }; + + return rte_eth_dev_filter_ctrl(link->pmd_id, + RTE_ETH_FILTER_SYN, + RTE_ETH_FILTER_ADD, + &filter); +} + +static inline int +app_link_filter_ip_add(struct app_link_params *l1, struct app_link_params *l2) +{ + struct rte_eth_ntuple_filter filter = { + .flags = RTE_5TUPLE_FLAGS, + .dst_ip = rte_bswap32(l2->ip), + .dst_ip_mask = UINT32_MAX, /* Enable */ + .src_ip = 0, + .src_ip_mask = 0, /* Disable */ + .dst_port = 0, + .dst_port_mask = 0, /* Disable */ + .src_port = 0, + .src_port_mask = 0, /* Disable */ + .proto = 0, + .proto_mask = 0, /* Disable */ + .tcp_flags = 0, + .priority = 1, /* Lowest */ + .queue = l1->ip_local_q, + }; + + return rte_eth_dev_filter_ctrl(l1->pmd_id, + RTE_ETH_FILTER_NTUPLE, + RTE_ETH_FILTER_ADD, + &filter); +} + +static inline int +app_link_filter_ip_del(struct app_link_params *l1, struct app_link_params *l2) +{ + struct rte_eth_ntuple_filter filter = { + .flags = RTE_5TUPLE_FLAGS, + .dst_ip = rte_bswap32(l2->ip), + .dst_ip_mask = UINT32_MAX, /* Enable */ + .src_ip = 0, + .src_ip_mask = 0, /* Disable */ + .dst_port = 0, + .dst_port_mask = 0, /* Disable */ + .src_port = 0, + .src_port_mask = 0, /* Disable */ + .proto = 0, + .proto_mask = 0, /* Disable */ + .tcp_flags = 0, + .priority = 1, /* Lowest */ + .queue = l1->ip_local_q, + }; + + return rte_eth_dev_filter_ctrl(l1->pmd_id, + RTE_ETH_FILTER_NTUPLE, + RTE_ETH_FILTER_DELETE, + &filter); +} + +static inline int +app_link_filter_tcp_add(struct app_link_params *l1, struct app_link_params *l2) +{ + struct rte_eth_ntuple_filter filter = { + .flags = RTE_5TUPLE_FLAGS, + .dst_ip = rte_bswap32(l2->ip), + .dst_ip_mask = UINT32_MAX, /* Enable */ + .src_ip = 0, + .src_ip_mask = 0, /* Disable */ + .dst_port = 0, + .dst_port_mask = 0, /* Disable */ + .src_port = 0, + .src_port_mask = 0, /* Disable */ + .proto = IPPROTO_TCP, + .proto_mask = UINT8_MAX, /* Enable */ + .tcp_flags = 0, + .priority = 2, /* Higher priority than IP */ + .queue = l1->tcp_local_q, + }; + + return rte_eth_dev_filter_ctrl(l1->pmd_id, + RTE_ETH_FILTER_NTUPLE, + RTE_ETH_FILTER_ADD, + &filter); +} + +static inline int +app_link_filter_tcp_del(struct app_link_params *l1, struct app_link_params *l2) +{ + struct rte_eth_ntuple_filter filter = { + .flags = RTE_5TUPLE_FLAGS, + .dst_ip = rte_bswap32(l2->ip), + .dst_ip_mask = UINT32_MAX, /* Enable */ + .src_ip = 0, + .src_ip_mask = 0, /* Disable */ + .dst_port = 0, + .dst_port_mask = 0, /* Disable */ + .src_port = 0, + .src_port_mask = 0, /* Disable */ + .proto = IPPROTO_TCP, + .proto_mask = UINT8_MAX, /* Enable */ + .tcp_flags = 0, + .priority = 2, /* Higher priority than IP */ + .queue = l1->tcp_local_q, + }; + + return rte_eth_dev_filter_ctrl(l1->pmd_id, + RTE_ETH_FILTER_NTUPLE, + RTE_ETH_FILTER_DELETE, + &filter); +} + +static inline int +app_link_filter_udp_add(struct app_link_params *l1, struct app_link_params *l2) +{ + struct rte_eth_ntuple_filter filter = { + .flags = RTE_5TUPLE_FLAGS, + .dst_ip = rte_bswap32(l2->ip), + .dst_ip_mask = UINT32_MAX, /* Enable */ + .src_ip = 0, + .src_ip_mask = 0, /* Disable */ + .dst_port = 0, + .dst_port_mask = 0, /* Disable */ + .src_port = 0, + .src_port_mask = 0, /* Disable */ + .proto = IPPROTO_UDP, + .proto_mask = UINT8_MAX, /* Enable */ + .tcp_flags = 0, + .priority = 2, /* Higher priority than IP */ + .queue = l1->udp_local_q, + }; + + return rte_eth_dev_filter_ctrl(l1->pmd_id, + RTE_ETH_FILTER_NTUPLE, + RTE_ETH_FILTER_ADD, + &filter); +} + +static inline int +app_link_filter_udp_del(struct app_link_params *l1, struct app_link_params *l2) +{ + struct rte_eth_ntuple_filter filter = { + .flags = RTE_5TUPLE_FLAGS, + .dst_ip = rte_bswap32(l2->ip), + .dst_ip_mask = UINT32_MAX, /* Enable */ + .src_ip = 0, + .src_ip_mask = 0, /* Disable */ + .dst_port = 0, + .dst_port_mask = 0, /* Disable */ + .src_port = 0, + .src_port_mask = 0, /* Disable */ + .proto = IPPROTO_UDP, + .proto_mask = UINT8_MAX, /* Enable */ + .tcp_flags = 0, + .priority = 2, /* Higher priority than IP */ + .queue = l1->udp_local_q, + }; + + return rte_eth_dev_filter_ctrl(l1->pmd_id, + RTE_ETH_FILTER_NTUPLE, + RTE_ETH_FILTER_DELETE, + &filter); +} + +static inline int +app_link_filter_sctp_add(struct app_link_params *l1, struct app_link_params *l2) +{ + struct rte_eth_ntuple_filter filter = { + .flags = RTE_5TUPLE_FLAGS, + .dst_ip = rte_bswap32(l2->ip), + .dst_ip_mask = UINT32_MAX, /* Enable */ + .src_ip = 0, + .src_ip_mask = 0, /* Disable */ + .dst_port = 0, + .dst_port_mask = 0, /* Disable */ + .src_port = 0, + .src_port_mask = 0, /* Disable */ + .proto = IPPROTO_SCTP, + .proto_mask = UINT8_MAX, /* Enable */ + .tcp_flags = 0, + .priority = 2, /* Higher priority than IP */ + .queue = l1->sctp_local_q, + }; + + return rte_eth_dev_filter_ctrl(l1->pmd_id, + RTE_ETH_FILTER_NTUPLE, + RTE_ETH_FILTER_ADD, + &filter); +} + +static inline int +app_link_filter_sctp_del(struct app_link_params *l1, struct app_link_params *l2) +{ + struct rte_eth_ntuple_filter filter = { + .flags = RTE_5TUPLE_FLAGS, + .dst_ip = rte_bswap32(l2->ip), + .dst_ip_mask = UINT32_MAX, /* Enable */ + .src_ip = 0, + .src_ip_mask = 0, /* Disable */ + .dst_port = 0, + .dst_port_mask = 0, /* Disable */ + .src_port = 0, + .src_port_mask = 0, /* Disable */ + .proto = IPPROTO_SCTP, + .proto_mask = UINT8_MAX, /* Enable */ + .tcp_flags = 0, + .priority = 2, /* Higher priority than IP */ + .queue = l1->sctp_local_q, + }; + + return rte_eth_dev_filter_ctrl(l1->pmd_id, + RTE_ETH_FILTER_NTUPLE, + RTE_ETH_FILTER_DELETE, + &filter); +} + +static void +app_link_set_arp_filter(struct app_params *app, struct app_link_params *cp) +{ + if (cp->arp_q != 0) { + int status = app_link_filter_arp_add(cp); + + APP_LOG(app, LOW, "%s (%" PRIu32 "): " + "Adding ARP filter (queue = %" PRIu32 ")", + cp->name, cp->pmd_id, cp->arp_q); + + if (status) + rte_panic("%s (%" PRIu32 "): " + "Error adding ARP filter " + "(queue = %" PRIu32 ") (%" PRId32 ")\n", + cp->name, cp->pmd_id, cp->arp_q, status); + } +} + +static void +app_link_set_tcp_syn_filter(struct app_params *app, struct app_link_params *cp) +{ + if (cp->tcp_syn_q != 0) { + int status = app_link_filter_tcp_syn_add(cp); + + APP_LOG(app, LOW, "%s (%" PRIu32 "): " + "Adding TCP SYN filter (queue = %" PRIu32 ")", + cp->name, cp->pmd_id, cp->tcp_syn_q); + + if (status) + rte_panic("%s (%" PRIu32 "): " + "Error adding TCP SYN filter " + "(queue = %" PRIu32 ") (%" PRId32 ")\n", + cp->name, cp->pmd_id, cp->tcp_syn_q, + status); + } +} + +static int +app_link_is_virtual(struct app_link_params *p) +{ + uint32_t pmd_id = p->pmd_id; + struct rte_eth_dev *dev = &rte_eth_devices[pmd_id]; + + if (dev->dev_type == RTE_ETH_DEV_VIRTUAL) + return 1; + + return 0; +} + +void +app_link_up_internal(struct app_params *app, struct app_link_params *cp) +{ + uint32_t i; + int status; + + if (app_link_is_virtual(cp)) { + cp->state = 1; + return; + } + + /* For each link, add filters for IP of current link */ + if (cp->ip != 0) { + for (i = 0; i < app->n_links; i++) { + struct app_link_params *p = &app->link_params[i]; + + /* IP */ + if (p->ip_local_q != 0) { + int status = app_link_filter_ip_add(p, cp); + + APP_LOG(app, LOW, "%s (%" PRIu32 "): " + "Adding IP filter (queue= %" PRIu32 + ", IP = 0x%08" PRIx32 ")", + p->name, p->pmd_id, p->ip_local_q, + cp->ip); + + if (status) + rte_panic("%s (%" PRIu32 "): " + "Error adding IP " + "filter (queue= %" PRIu32 ", " + "IP = 0x%08" PRIx32 + ") (%" PRId32 ")\n", + p->name, p->pmd_id, + p->ip_local_q, cp->ip, status); + } + + /* TCP */ + if (p->tcp_local_q != 0) { + int status = app_link_filter_tcp_add(p, cp); + + APP_LOG(app, LOW, "%s (%" PRIu32 "): " + "Adding TCP filter " + "(queue = %" PRIu32 + ", IP = 0x%08" PRIx32 ")", + p->name, p->pmd_id, p->tcp_local_q, + cp->ip); + + if (status) + rte_panic("%s (%" PRIu32 "): " + "Error adding TCP " + "filter (queue = %" PRIu32 ", " + "IP = 0x%08" PRIx32 + ") (%" PRId32 ")\n", + p->name, p->pmd_id, + p->tcp_local_q, cp->ip, status); + } + + /* UDP */ + if (p->udp_local_q != 0) { + int status = app_link_filter_udp_add(p, cp); + + APP_LOG(app, LOW, "%s (%" PRIu32 "): " + "Adding UDP filter " + "(queue = %" PRIu32 + ", IP = 0x%08" PRIx32 ")", + p->name, p->pmd_id, p->udp_local_q, + cp->ip); + + if (status) + rte_panic("%s (%" PRIu32 "): " + "Error adding UDP " + "filter (queue = %" PRIu32 ", " + "IP = 0x%08" PRIx32 + ") (%" PRId32 ")\n", + p->name, p->pmd_id, + p->udp_local_q, cp->ip, status); + } + + /* SCTP */ + if (p->sctp_local_q != 0) { + int status = app_link_filter_sctp_add(p, cp); + + APP_LOG(app, LOW, "%s (%" PRIu32 + "): Adding SCTP filter " + "(queue = %" PRIu32 + ", IP = 0x%08" PRIx32 ")", + p->name, p->pmd_id, p->sctp_local_q, + cp->ip); + + if (status) + rte_panic("%s (%" PRIu32 "): " + "Error adding SCTP " + "filter (queue = %" PRIu32 ", " + "IP = 0x%08" PRIx32 + ") (%" PRId32 ")\n", + p->name, p->pmd_id, + p->sctp_local_q, cp->ip, + status); + } + } + } + + /* PMD link up */ + status = rte_eth_dev_set_link_up(cp->pmd_id); + if (status < 0) + rte_panic("%s (%" PRIu32 "): PMD set link up error %" + PRId32 "\n", cp->name, cp->pmd_id, status); + + /* Mark link as UP */ + cp->state = 1; +} + +void +app_link_down_internal(struct app_params *app, struct app_link_params *cp) +{ + uint32_t i; + int status; + + if (app_link_is_virtual(cp)) { + cp->state = 0; + return; + } + + /* PMD link down */ + status = rte_eth_dev_set_link_down(cp->pmd_id); + if (status < 0) + rte_panic("%s (%" PRIu32 "): PMD set link down error %" + PRId32 "\n", cp->name, cp->pmd_id, status); + + /* Mark link as DOWN */ + cp->state = 0; + + /* Return if current link IP is not valid */ + if (cp->ip == 0) + return; + + /* For each link, remove filters for IP of current link */ + for (i = 0; i < app->n_links; i++) { + struct app_link_params *p = &app->link_params[i]; + + /* IP */ + if (p->ip_local_q != 0) { + int status = app_link_filter_ip_del(p, cp); + + APP_LOG(app, LOW, "%s (%" PRIu32 + "): Deleting IP filter " + "(queue = %" PRIu32 ", IP = 0x%" PRIx32 ")", + p->name, p->pmd_id, p->ip_local_q, cp->ip); + + if (status) + rte_panic("%s (%" PRIu32 + "): Error deleting IP filter " + "(queue = %" PRIu32 + ", IP = 0x%" PRIx32 + ") (%" PRId32 ")\n", + p->name, p->pmd_id, p->ip_local_q, + cp->ip, status); + } + + /* TCP */ + if (p->tcp_local_q != 0) { + int status = app_link_filter_tcp_del(p, cp); + + APP_LOG(app, LOW, "%s (%" PRIu32 + "): Deleting TCP filter " + "(queue = %" PRIu32 + ", IP = 0x%" PRIx32 ")", + p->name, p->pmd_id, p->tcp_local_q, cp->ip); + + if (status) + rte_panic("%s (%" PRIu32 + "): Error deleting TCP filter " + "(queue = %" PRIu32 + ", IP = 0x%" PRIx32 + ") (%" PRId32 ")\n", + p->name, p->pmd_id, p->tcp_local_q, + cp->ip, status); + } + + /* UDP */ + if (p->udp_local_q != 0) { + int status = app_link_filter_udp_del(p, cp); + + APP_LOG(app, LOW, "%s (%" PRIu32 + "): Deleting UDP filter " + "(queue = %" PRIu32 ", IP = 0x%" PRIx32 ")", + p->name, p->pmd_id, p->udp_local_q, cp->ip); + + if (status) + rte_panic("%s (%" PRIu32 + "): Error deleting UDP filter " + "(queue = %" PRIu32 + ", IP = 0x%" PRIx32 + ") (%" PRId32 ")\n", + p->name, p->pmd_id, p->udp_local_q, + cp->ip, status); + } + + /* SCTP */ + if (p->sctp_local_q != 0) { + int status = app_link_filter_sctp_del(p, cp); + + APP_LOG(app, LOW, "%s (%" PRIu32 + "): Deleting SCTP filter " + "(queue = %" PRIu32 + ", IP = 0x%" PRIx32 ")", + p->name, p->pmd_id, p->sctp_local_q, cp->ip); + + if (status) + rte_panic("%s (%" PRIu32 + "): Error deleting SCTP filter " + "(queue = %" PRIu32 + ", IP = 0x%" PRIx32 + ") (%" PRId32 ")\n", + p->name, p->pmd_id, p->sctp_local_q, + cp->ip, status); + } + } +} + +static void +app_check_link(struct app_params *app) +{ + uint32_t all_links_up, i; + + all_links_up = 1; + + for (i = 0; i < app->n_links; i++) { + struct app_link_params *p = &app->link_params[i]; + struct rte_eth_link link_params; + + memset(&link_params, 0, sizeof(link_params)); + rte_eth_link_get(p->pmd_id, &link_params); + + APP_LOG(app, HIGH, "%s (%" PRIu32 ") (%" PRIu32 " Gbps) %s", + p->name, + p->pmd_id, + link_params.link_speed / 1000, + link_params.link_status ? "UP" : "DOWN"); + + if (link_params.link_status == ETH_LINK_DOWN) + all_links_up = 0; + } + + if (all_links_up == 0) + rte_panic("Some links are DOWN\n"); +} + +static uint32_t +is_any_swq_frag_or_ras(struct app_params *app) +{ + uint32_t i; + + for (i = 0; i < app->n_pktq_swq; i++) { + struct app_pktq_swq_params *p = &app->swq_params[i]; + + if ((p->ipv4_frag == 1) || (p->ipv6_frag == 1) || + (p->ipv4_ras == 1) || (p->ipv6_ras == 1)) + return 1; + } + + return 0; +} + +static void +app_init_link_frag_ras(struct app_params *app) +{ + uint32_t i; + + if (is_any_swq_frag_or_ras(app)) { + for (i = 0; i < app->n_pktq_hwq_out; i++) { + struct app_pktq_hwq_out_params *p_txq = &app->hwq_out_params[i]; + + p_txq->conf.txq_flags &= ~ETH_TXQ_FLAGS_NOMULTSEGS; + } + } +} + +static inline int +app_get_cpu_socket_id(uint32_t pmd_id) +{ + int status = rte_eth_dev_socket_id(pmd_id); + + return (status != SOCKET_ID_ANY) ? status : 0; +} + +static void +app_init_link(struct app_params *app) +{ + uint32_t i; + + app_init_link_frag_ras(app); + + for (i = 0; i < app->n_links; i++) { + struct app_link_params *p_link = &app->link_params[i]; + uint32_t link_id, n_hwq_in, n_hwq_out, j; + int status; + + sscanf(p_link->name, "LINK%" PRIu32, &link_id); + n_hwq_in = app_link_get_n_rxq(app, p_link); + n_hwq_out = app_link_get_n_txq(app, p_link); + + APP_LOG(app, HIGH, "Initializing %s (%" PRIu32") " + "(%" PRIu32 " RXQ, %" PRIu32 " TXQ) ...", + p_link->name, + p_link->pmd_id, + n_hwq_in, + n_hwq_out); + + /* LINK */ + status = rte_eth_dev_configure( + p_link->pmd_id, + n_hwq_in, + n_hwq_out, + &p_link->conf); + if (status < 0) + rte_panic("%s (%" PRId32 "): " + "init error (%" PRId32 ")\n", + p_link->name, p_link->pmd_id, status); + + rte_eth_macaddr_get(p_link->pmd_id, + (struct ether_addr *) &p_link->mac_addr); + + if (p_link->promisc) + rte_eth_promiscuous_enable(p_link->pmd_id); + + /* RXQ */ + for (j = 0; j < app->n_pktq_hwq_in; j++) { + struct app_pktq_hwq_in_params *p_rxq = + &app->hwq_in_params[j]; + uint32_t rxq_link_id, rxq_queue_id; + + sscanf(p_rxq->name, "RXQ%" PRIu32 ".%" PRIu32, + &rxq_link_id, &rxq_queue_id); + if (rxq_link_id != link_id) + continue; + + status = rte_eth_rx_queue_setup( + p_link->pmd_id, + rxq_queue_id, + p_rxq->size, + app_get_cpu_socket_id(p_link->pmd_id), + &p_rxq->conf, + app->mempool[p_rxq->mempool_id]); + if (status < 0) + rte_panic("%s (%" PRIu32 "): " + "%s init error (%" PRId32 ")\n", + p_link->name, + p_link->pmd_id, + p_rxq->name, + status); + } + + /* TXQ */ + for (j = 0; j < app->n_pktq_hwq_out; j++) { + struct app_pktq_hwq_out_params *p_txq = + &app->hwq_out_params[j]; + uint32_t txq_link_id, txq_queue_id; + + sscanf(p_txq->name, "TXQ%" PRIu32 ".%" PRIu32, + &txq_link_id, &txq_queue_id); + if (txq_link_id != link_id) + continue; + + status = rte_eth_tx_queue_setup( + p_link->pmd_id, + txq_queue_id, + p_txq->size, + app_get_cpu_socket_id(p_link->pmd_id), + &p_txq->conf); + if (status < 0) + rte_panic("%s (%" PRIu32 "): " + "%s init error (%" PRId32 ")\n", + p_link->name, + p_link->pmd_id, + p_txq->name, + status); + } + + /* LINK START */ + status = rte_eth_dev_start(p_link->pmd_id); + if (status < 0) + rte_panic("Cannot start %s (error %" PRId32 ")\n", + p_link->name, status); + + /* LINK UP */ + app_link_set_arp_filter(app, p_link); + app_link_set_tcp_syn_filter(app, p_link); + app_link_up_internal(app, p_link); + } + + app_check_link(app); +} + +static void +app_init_swq(struct app_params *app) +{ + uint32_t i; + + for (i = 0; i < app->n_pktq_swq; i++) { + struct app_pktq_swq_params *p = &app->swq_params[i]; + unsigned flags = 0; + + if (app_swq_get_readers(app, p) == 1) + flags |= RING_F_SC_DEQ; + if (app_swq_get_writers(app, p) == 1) + flags |= RING_F_SP_ENQ; + + APP_LOG(app, HIGH, "Initializing %s...", p->name); + app->swq[i] = rte_ring_create( + p->name, + p->size, + p->cpu_socket_id, + flags); + + if (app->swq[i] == NULL) + rte_panic("%s init error\n", p->name); + } +} + +static void +app_init_tm(struct app_params *app) +{ + uint32_t i; + + for (i = 0; i < app->n_pktq_tm; i++) { + struct app_pktq_tm_params *p_tm = &app->tm_params[i]; + struct app_link_params *p_link; + struct rte_eth_link link_eth_params; + struct rte_sched_port *sched; + uint32_t n_subports, subport_id; + int status; + + p_link = app_get_link_for_tm(app, p_tm); + /* LINK */ + rte_eth_link_get(p_link->pmd_id, &link_eth_params); + + /* TM */ + p_tm->sched_port_params.name = p_tm->name; + p_tm->sched_port_params.socket = + app_get_cpu_socket_id(p_link->pmd_id); + p_tm->sched_port_params.rate = + (uint64_t) link_eth_params.link_speed * 1000 * 1000 / 8; + + APP_LOG(app, HIGH, "Initializing %s ...", p_tm->name); + sched = rte_sched_port_config(&p_tm->sched_port_params); + if (sched == NULL) + rte_panic("%s init error\n", p_tm->name); + app->tm[i] = sched; + + /* Subport */ + n_subports = p_tm->sched_port_params.n_subports_per_port; + for (subport_id = 0; subport_id < n_subports; subport_id++) { + uint32_t n_pipes_per_subport, pipe_id; + + status = rte_sched_subport_config(sched, + subport_id, + &p_tm->sched_subport_params[subport_id]); + if (status) + rte_panic("%s subport %" PRIu32 + " init error (%" PRId32 ")\n", + p_tm->name, subport_id, status); + + /* Pipe */ + n_pipes_per_subport = + p_tm->sched_port_params.n_pipes_per_subport; + for (pipe_id = 0; + pipe_id < n_pipes_per_subport; + pipe_id++) { + int profile_id = p_tm->sched_pipe_to_profile[ + subport_id * APP_MAX_SCHED_PIPES + + pipe_id]; + + if (profile_id == -1) + continue; + + status = rte_sched_pipe_config(sched, + subport_id, + pipe_id, + profile_id); + if (status) + rte_panic("%s subport %" PRIu32 + " pipe %" PRIu32 + " (profile %" PRId32 ") " + "init error (% " PRId32 ")\n", + p_tm->name, subport_id, pipe_id, + profile_id, status); + } + } + } +} + +static void +app_init_msgq(struct app_params *app) +{ + uint32_t i; + + for (i = 0; i < app->n_msgq; i++) { + struct app_msgq_params *p = &app->msgq_params[i]; + + APP_LOG(app, HIGH, "Initializing %s ...", p->name); + app->msgq[i] = rte_ring_create( + p->name, + p->size, + p->cpu_socket_id, + RING_F_SP_ENQ | RING_F_SC_DEQ); + + if (app->msgq[i] == NULL) + rte_panic("%s init error\n", p->name); + } +} + +static void app_pipeline_params_get(struct app_params *app, + struct app_pipeline_params *p_in, + struct pipeline_params *p_out) +{ + uint32_t i; + uint32_t mempool_id; + + snprintf(p_out->name, PIPELINE_NAME_SIZE, "%s", p_in->name); + + p_out->socket_id = (int) p_in->socket_id; + + p_out->log_level = app->log_level; + + /* pktq_in */ + p_out->n_ports_in = p_in->n_pktq_in; + for (i = 0; i < p_in->n_pktq_in; i++) { + struct app_pktq_in_params *in = &p_in->pktq_in[i]; + struct pipeline_port_in_params *out = &p_out->port_in[i]; + + switch (in->type) { + case APP_PKTQ_IN_HWQ: + { + struct app_pktq_hwq_in_params *p_hwq_in = + &app->hwq_in_params[in->id]; + struct app_link_params *p_link = + app_get_link_for_rxq(app, p_hwq_in); + uint32_t rxq_link_id, rxq_queue_id; + + sscanf(p_hwq_in->name, "RXQ%" SCNu32 ".%" SCNu32, + &rxq_link_id, + &rxq_queue_id); + + out->type = PIPELINE_PORT_IN_ETHDEV_READER; + out->params.ethdev.port_id = p_link->pmd_id; + out->params.ethdev.queue_id = rxq_queue_id; + out->burst_size = p_hwq_in->burst; + break; + } + case APP_PKTQ_IN_SWQ: + { + struct app_pktq_swq_params *swq_params = &app->swq_params[in->id]; + + if ((swq_params->ipv4_frag == 0) && (swq_params->ipv6_frag == 0)) { + if (app_swq_get_readers(app, swq_params) == 1) { + out->type = PIPELINE_PORT_IN_RING_READER; + out->params.ring.ring = app->swq[in->id]; + out->burst_size = app->swq_params[in->id].burst_read; + } else { + out->type = PIPELINE_PORT_IN_RING_MULTI_READER; + out->params.ring_multi.ring = app->swq[in->id]; + out->burst_size = swq_params->burst_read; + } + } else { + if (swq_params->ipv4_frag == 1) { + struct rte_port_ring_reader_ipv4_frag_params *params = + &out->params.ring_ipv4_frag; + + out->type = PIPELINE_PORT_IN_RING_READER_IPV4_FRAG; + params->ring = app->swq[in->id]; + params->mtu = swq_params->mtu; + params->metadata_size = swq_params->metadata_size; + params->pool_direct = + app->mempool[swq_params->mempool_direct_id]; + params->pool_indirect = + app->mempool[swq_params->mempool_indirect_id]; + out->burst_size = swq_params->burst_read; + } else { + struct rte_port_ring_reader_ipv6_frag_params *params = + &out->params.ring_ipv6_frag; + + out->type = PIPELINE_PORT_IN_RING_READER_IPV6_FRAG; + params->ring = app->swq[in->id]; + params->mtu = swq_params->mtu; + params->metadata_size = swq_params->metadata_size; + params->pool_direct = + app->mempool[swq_params->mempool_direct_id]; + params->pool_indirect = + app->mempool[swq_params->mempool_indirect_id]; + out->burst_size = swq_params->burst_read; + } + } + break; + } + case APP_PKTQ_IN_TM: + out->type = PIPELINE_PORT_IN_SCHED_READER; + out->params.sched.sched = app->tm[in->id]; + out->burst_size = app->tm_params[in->id].burst_read; + break; + case APP_PKTQ_IN_SOURCE: + mempool_id = app->source_params[in->id].mempool_id; + out->type = PIPELINE_PORT_IN_SOURCE; + out->params.source.mempool = app->mempool[mempool_id]; + out->burst_size = app->source_params[in->id].burst; + +#ifdef RTE_NEXT_ABI + if (app->source_params[in->id].file_name + != NULL) { + out->params.source.file_name = strdup( + app->source_params[in->id]. + file_name); + if (out->params.source.file_name == NULL) { + out->params.source. + n_bytes_per_pkt = 0; + break; + } + out->params.source.n_bytes_per_pkt = + app->source_params[in->id]. + n_bytes_per_pkt; + } +#endif + + break; + default: + break; + } + } + + /* pktq_out */ + p_out->n_ports_out = p_in->n_pktq_out; + for (i = 0; i < p_in->n_pktq_out; i++) { + struct app_pktq_out_params *in = &p_in->pktq_out[i]; + struct pipeline_port_out_params *out = &p_out->port_out[i]; + + switch (in->type) { + case APP_PKTQ_OUT_HWQ: + { + struct app_pktq_hwq_out_params *p_hwq_out = + &app->hwq_out_params[in->id]; + struct app_link_params *p_link = + app_get_link_for_txq(app, p_hwq_out); + uint32_t txq_link_id, txq_queue_id; + + sscanf(p_hwq_out->name, + "TXQ%" SCNu32 ".%" SCNu32, + &txq_link_id, + &txq_queue_id); + + if (p_hwq_out->dropless == 0) { + struct rte_port_ethdev_writer_params *params = + &out->params.ethdev; + + out->type = PIPELINE_PORT_OUT_ETHDEV_WRITER; + params->port_id = p_link->pmd_id; + params->queue_id = txq_queue_id; + params->tx_burst_sz = + app->hwq_out_params[in->id].burst; + } else { + struct rte_port_ethdev_writer_nodrop_params + *params = &out->params.ethdev_nodrop; + + out->type = + PIPELINE_PORT_OUT_ETHDEV_WRITER_NODROP; + params->port_id = p_link->pmd_id; + params->queue_id = txq_queue_id; + params->tx_burst_sz = p_hwq_out->burst; + params->n_retries = p_hwq_out->n_retries; + } + break; + } + case APP_PKTQ_OUT_SWQ: + { + struct app_pktq_swq_params *swq_params = &app->swq_params[in->id]; + + if ((swq_params->ipv4_ras == 0) && (swq_params->ipv6_ras == 0)) { + if (app_swq_get_writers(app, swq_params) == 1) { + if (app->swq_params[in->id].dropless == 0) { + struct rte_port_ring_writer_params *params = + &out->params.ring; + + out->type = PIPELINE_PORT_OUT_RING_WRITER; + params->ring = app->swq[in->id]; + params->tx_burst_sz = + app->swq_params[in->id].burst_write; + } else { + struct rte_port_ring_writer_nodrop_params + *params = &out->params.ring_nodrop; + + out->type = + PIPELINE_PORT_OUT_RING_WRITER_NODROP; + params->ring = app->swq[in->id]; + params->tx_burst_sz = + app->swq_params[in->id].burst_write; + params->n_retries = + app->swq_params[in->id].n_retries; + } + } else { + if (swq_params->dropless == 0) { + struct rte_port_ring_multi_writer_params *params = + &out->params.ring_multi; + + out->type = PIPELINE_PORT_OUT_RING_MULTI_WRITER; + params->ring = app->swq[in->id]; + params->tx_burst_sz = swq_params->burst_write; + } else { + struct rte_port_ring_multi_writer_nodrop_params + *params = &out->params.ring_multi_nodrop; + + out->type = PIPELINE_PORT_OUT_RING_MULTI_WRITER_NODROP; + params->ring = app->swq[in->id]; + params->tx_burst_sz = swq_params->burst_write; + params->n_retries = swq_params->n_retries; + } + } + } else { + if (swq_params->ipv4_ras == 1) { + struct rte_port_ring_writer_ipv4_ras_params *params = + &out->params.ring_ipv4_ras; + + out->type = PIPELINE_PORT_OUT_RING_WRITER_IPV4_RAS; + params->ring = app->swq[in->id]; + params->tx_burst_sz = swq_params->burst_write; + } else { + struct rte_port_ring_writer_ipv6_ras_params *params = + &out->params.ring_ipv6_ras; + + out->type = PIPELINE_PORT_OUT_RING_WRITER_IPV6_RAS; + params->ring = app->swq[in->id]; + params->tx_burst_sz = swq_params->burst_write; + } + } + break; + } + case APP_PKTQ_OUT_TM: { + struct rte_port_sched_writer_params *params = + &out->params.sched; + + out->type = PIPELINE_PORT_OUT_SCHED_WRITER; + params->sched = app->tm[in->id]; + params->tx_burst_sz = + app->tm_params[in->id].burst_write; + break; + } + case APP_PKTQ_OUT_SINK: + out->type = PIPELINE_PORT_OUT_SINK; + if (app->sink_params[in->id].file_name != NULL) { + out->params.sink.file_name = strdup( + app->sink_params[in->id]. + file_name); + if (out->params.sink.file_name == NULL) { + out->params.sink.max_n_pkts = 0; + break; + } + out->params.sink.max_n_pkts = + app->sink_params[in->id]. + n_pkts_to_dump; + } else { + out->params.sink.file_name = NULL; + out->params.sink.max_n_pkts = 0; + } + break; + default: + break; + } + } + + /* msgq */ + p_out->n_msgq = p_in->n_msgq_in; + + for (i = 0; i < p_in->n_msgq_in; i++) + p_out->msgq_in[i] = app->msgq[p_in->msgq_in[i]]; + + for (i = 0; i < p_in->n_msgq_out; i++) + p_out->msgq_out[i] = app->msgq[p_in->msgq_out[i]]; + + /* args */ + p_out->n_args = p_in->n_args; + for (i = 0; i < p_in->n_args; i++) { + p_out->args_name[i] = p_in->args_name[i]; + p_out->args_value[i] = p_in->args_value[i]; + } +} + +static void +app_init_pipelines(struct app_params *app) +{ + uint32_t p_id; + + for (p_id = 0; p_id < app->n_pipelines; p_id++) { + struct app_pipeline_params *params = + &app->pipeline_params[p_id]; + struct app_pipeline_data *data = &app->pipeline_data[p_id]; + struct pipeline_type *ptype; + struct pipeline_params pp; + + APP_LOG(app, HIGH, "Initializing %s ...", params->name); + + ptype = app_pipeline_type_find(app, params->type); + if (ptype == NULL) + rte_panic("Init error: Unknown pipeline type \"%s\"\n", + params->type); + + app_pipeline_params_get(app, params, &pp); + + /* Back-end */ + data->be = NULL; + if (ptype->be_ops->f_init) { + data->be = ptype->be_ops->f_init(&pp, (void *) app); + + if (data->be == NULL) + rte_panic("Pipeline instance \"%s\" back-end " + "init error\n", params->name); + } + + /* Front-end */ + data->fe = NULL; + if (ptype->fe_ops->f_init) { + data->fe = ptype->fe_ops->f_init(&pp, (void *) app); + + if (data->fe == NULL) + rte_panic("Pipeline instance \"%s\" front-end " + "init error\n", params->name); + } + + data->ptype = ptype; + + data->timer_period = (rte_get_tsc_hz() * + params->timer_period) / 100; + } +} + +static void +app_init_threads(struct app_params *app) +{ + uint64_t time = rte_get_tsc_cycles(); + uint32_t p_id; + + for (p_id = 0; p_id < app->n_pipelines; p_id++) { + struct app_pipeline_params *params = + &app->pipeline_params[p_id]; + struct app_pipeline_data *data = &app->pipeline_data[p_id]; + struct pipeline_type *ptype; + struct app_thread_data *t; + struct app_thread_pipeline_data *p; + int lcore_id; + + lcore_id = cpu_core_map_get_lcore_id(app->core_map, + params->socket_id, + params->core_id, + params->hyper_th_id); + + if (lcore_id < 0) + rte_panic("Invalid core s%" PRIu32 "c%" PRIu32 "%s\n", + params->socket_id, + params->core_id, + (params->hyper_th_id) ? "h" : ""); + + t = &app->thread_data[lcore_id]; + + t->timer_period = (rte_get_tsc_hz() * APP_THREAD_TIMER_PERIOD) / 1000; + t->thread_req_deadline = time + t->timer_period; + + t->headroom_cycles = 0; + t->headroom_time = rte_get_tsc_cycles(); + t->headroom_ratio = 0.0; + + t->msgq_in = app_thread_msgq_in_get(app, + params->socket_id, + params->core_id, + params->hyper_th_id); + if (t->msgq_in == NULL) + rte_panic("Init error: Cannot find MSGQ_IN for thread %" PRId32, + lcore_id); + + t->msgq_out = app_thread_msgq_out_get(app, + params->socket_id, + params->core_id, + params->hyper_th_id); + if (t->msgq_out == NULL) + rte_panic("Init error: Cannot find MSGQ_OUT for thread %" PRId32, + lcore_id); + + ptype = app_pipeline_type_find(app, params->type); + if (ptype == NULL) + rte_panic("Init error: Unknown pipeline " + "type \"%s\"\n", params->type); + + p = (ptype->be_ops->f_run == NULL) ? + &t->regular[t->n_regular] : + &t->custom[t->n_custom]; + + p->pipeline_id = p_id; + p->be = data->be; + p->f_run = ptype->be_ops->f_run; + p->f_timer = ptype->be_ops->f_timer; + p->timer_period = data->timer_period; + p->deadline = time + data->timer_period; + + data->enabled = 1; + + if (ptype->be_ops->f_run == NULL) + t->n_regular++; + else + t->n_custom++; + } +} + +int app_init(struct app_params *app) +{ + app_init_core_map(app); + app_init_core_mask(app); + + app_init_eal(app); + app_init_mempool(app); + app_init_link(app); + app_init_swq(app); + app_init_tm(app); + app_init_msgq(app); + + app_pipeline_common_cmd_push(app); + app_pipeline_thread_cmd_push(app); + app_pipeline_type_register(app, &pipeline_master); + app_pipeline_type_register(app, &pipeline_passthrough); + app_pipeline_type_register(app, &pipeline_flow_classification); + app_pipeline_type_register(app, &pipeline_flow_actions); + app_pipeline_type_register(app, &pipeline_firewall); + app_pipeline_type_register(app, &pipeline_routing); + + app_init_pipelines(app); + app_init_threads(app); + + return 0; +} + +static int +app_pipeline_type_cmd_push(struct app_params *app, + struct pipeline_type *ptype) +{ + cmdline_parse_ctx_t *cmds; + uint32_t n_cmds, i; + + /* Check input arguments */ + if ((app == NULL) || + (ptype == NULL)) + return -EINVAL; + + n_cmds = pipeline_type_cmds_count(ptype); + if (n_cmds == 0) + return 0; + + cmds = ptype->fe_ops->cmds; + + /* Check for available slots in the application commands array */ + if (n_cmds > APP_MAX_CMDS - app->n_cmds) + return -ENOMEM; + + /* Push pipeline commands into the application */ + memcpy(&app->cmds[app->n_cmds], + cmds, + n_cmds * sizeof(cmdline_parse_ctx_t)); + + for (i = 0; i < n_cmds; i++) + app->cmds[app->n_cmds + i]->data = app; + + app->n_cmds += n_cmds; + app->cmds[app->n_cmds] = NULL; + + return 0; +} + +int +app_pipeline_type_register(struct app_params *app, struct pipeline_type *ptype) +{ + uint32_t n_cmds, i; + + /* Check input arguments */ + if ((app == NULL) || + (ptype == NULL) || + (ptype->name == NULL) || + (strlen(ptype->name) == 0) || + (ptype->be_ops->f_init == NULL) || + (ptype->be_ops->f_timer == NULL)) + return -EINVAL; + + /* Check for duplicate entry */ + for (i = 0; i < app->n_pipeline_types; i++) + if (strcmp(app->pipeline_type[i].name, ptype->name) == 0) + return -EEXIST; + + /* Check for resource availability */ + n_cmds = pipeline_type_cmds_count(ptype); + if ((app->n_pipeline_types == APP_MAX_PIPELINE_TYPES) || + (n_cmds > APP_MAX_CMDS - app->n_cmds)) + return -ENOMEM; + + /* Copy pipeline type */ + memcpy(&app->pipeline_type[app->n_pipeline_types++], + ptype, + sizeof(struct pipeline_type)); + + /* Copy CLI commands */ + if (n_cmds) + app_pipeline_type_cmd_push(app, ptype); + + return 0; +} + +struct +pipeline_type *app_pipeline_type_find(struct app_params *app, char *name) +{ + uint32_t i; + + for (i = 0; i < app->n_pipeline_types; i++) + if (strcmp(app->pipeline_type[i].name, name) == 0) + return &app->pipeline_type[i]; + + return NULL; +} diff --git a/examples/ip_pipeline/main.c b/examples/ip_pipeline/main.c new file mode 100644 index 00000000..4944dcfb --- /dev/null +++ b/examples/ip_pipeline/main.c @@ -0,0 +1,64 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "app.h" + +static struct app_params app; + +int +main(int argc, char **argv) +{ + rte_openlog_stream(stderr); + + /* Config */ + app_config_init(&app); + + app_config_args(&app, argc, argv); + + app_config_preproc(&app); + + app_config_parse(&app, app.parser_file); + + app_config_check(&app); + + /* Init */ + app_init(&app); + + /* Run-time */ + rte_eal_mp_remote_launch( + app_thread, + (void *) &app, + CALL_MASTER); + + return 0; +} diff --git a/examples/ip_pipeline/parser.h b/examples/ip_pipeline/parser.h new file mode 100644 index 00000000..58b59daf --- /dev/null +++ b/examples/ip_pipeline/parser.h @@ -0,0 +1,50 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_PARSER_H__ +#define __INCLUDE_PARSER_H__ + +int +parser_read_arg_bool(const char *p); + +int +parser_read_uint64(uint64_t *value, const char *p); + +int +parser_read_uint32(uint32_t *value, const char *p); + +int +parse_hex_string(char *src, uint8_t *dst, uint32_t *size); + +#endif + diff --git a/examples/ip_pipeline/pipeline.h b/examples/ip_pipeline/pipeline.h new file mode 100644 index 00000000..dab9c36d --- /dev/null +++ b/examples/ip_pipeline/pipeline.h @@ -0,0 +1,93 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_PIPELINE_H__ +#define __INCLUDE_PIPELINE_H__ + +#include <cmdline_parse.h> + +#include "pipeline_be.h" + +/* + * Pipeline type front-end operations + */ + +typedef void* (*pipeline_fe_op_init)(struct pipeline_params *params, void *arg); + +typedef int (*pipeline_fe_op_free)(void *pipeline); + +struct pipeline_fe_ops { + pipeline_fe_op_init f_init; + pipeline_fe_op_free f_free; + cmdline_parse_ctx_t *cmds; +}; + +/* + * Pipeline type + */ + +struct pipeline_type { + const char *name; + + /* pipeline back-end */ + struct pipeline_be_ops *be_ops; + + /* pipeline front-end */ + struct pipeline_fe_ops *fe_ops; +}; + +static inline uint32_t +pipeline_type_cmds_count(struct pipeline_type *ptype) +{ + cmdline_parse_ctx_t *cmds; + uint32_t n_cmds; + + if (ptype->fe_ops == NULL) + return 0; + + cmds = ptype->fe_ops->cmds; + if (cmds == NULL) + return 0; + + for (n_cmds = 0; cmds[n_cmds]; n_cmds++); + + return n_cmds; +} + +int +parse_pipeline_core(uint32_t *socket, + uint32_t *core, + uint32_t *ht, + const char *entry); + +#endif diff --git a/examples/ip_pipeline/pipeline/hash_func.h b/examples/ip_pipeline/pipeline/hash_func.h new file mode 100644 index 00000000..9db7173f --- /dev/null +++ b/examples/ip_pipeline/pipeline/hash_func.h @@ -0,0 +1,351 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef __INCLUDE_HASH_FUNC_H__ +#define __INCLUDE_HASH_FUNC_H__ + +static inline uint64_t +hash_xor_key8(void *key, __rte_unused uint32_t key_size, uint64_t seed) +{ + uint64_t *k = key; + uint64_t xor0; + + xor0 = seed ^ k[0]; + + return (xor0 >> 32) ^ xor0; +} + +static inline uint64_t +hash_xor_key16(void *key, __rte_unused uint32_t key_size, uint64_t seed) +{ + uint64_t *k = key; + uint64_t xor0; + + xor0 = (k[0] ^ seed) ^ k[1]; + + return (xor0 >> 32) ^ xor0; +} + +static inline uint64_t +hash_xor_key24(void *key, __rte_unused uint32_t key_size, uint64_t seed) +{ + uint64_t *k = key; + uint64_t xor0; + + xor0 = (k[0] ^ seed) ^ k[1]; + + xor0 ^= k[2]; + + return (xor0 >> 32) ^ xor0; +} + +static inline uint64_t +hash_xor_key32(void *key, __rte_unused uint32_t key_size, uint64_t seed) +{ + uint64_t *k = key; + uint64_t xor0, xor1; + + xor0 = (k[0] ^ seed) ^ k[1]; + xor1 = k[2] ^ k[3]; + + xor0 ^= xor1; + + return (xor0 >> 32) ^ xor0; +} + +static inline uint64_t +hash_xor_key40(void *key, __rte_unused uint32_t key_size, uint64_t seed) +{ + uint64_t *k = key; + uint64_t xor0, xor1; + + xor0 = (k[0] ^ seed) ^ k[1]; + xor1 = k[2] ^ k[3]; + + xor0 ^= xor1; + + xor0 ^= k[4]; + + return (xor0 >> 32) ^ xor0; +} + +static inline uint64_t +hash_xor_key48(void *key, __rte_unused uint32_t key_size, uint64_t seed) +{ + uint64_t *k = key; + uint64_t xor0, xor1, xor2; + + xor0 = (k[0] ^ seed) ^ k[1]; + xor1 = k[2] ^ k[3]; + xor2 = k[4] ^ k[5]; + + xor0 ^= xor1; + + xor0 ^= xor2; + + return (xor0 >> 32) ^ xor0; +} + +static inline uint64_t +hash_xor_key56(void *key, __rte_unused uint32_t key_size, uint64_t seed) +{ + uint64_t *k = key; + uint64_t xor0, xor1, xor2; + + xor0 = (k[0] ^ seed) ^ k[1]; + xor1 = k[2] ^ k[3]; + xor2 = k[4] ^ k[5]; + + xor0 ^= xor1; + xor2 ^= k[6]; + + xor0 ^= xor2; + + return (xor0 >> 32) ^ xor0; +} + +static inline uint64_t +hash_xor_key64(void *key, __rte_unused uint32_t key_size, uint64_t seed) +{ + uint64_t *k = key; + uint64_t xor0, xor1, xor2, xor3; + + xor0 = (k[0] ^ seed) ^ k[1]; + xor1 = k[2] ^ k[3]; + xor2 = k[4] ^ k[5]; + xor3 = k[6] ^ k[7]; + + xor0 ^= xor1; + xor2 ^= xor3; + + xor0 ^= xor2; + + return (xor0 >> 32) ^ xor0; +} + +#if defined(RTE_ARCH_X86_64) && defined(RTE_MACHINE_CPUFLAG_SSE4_2) + +#include <x86intrin.h> + +static inline uint64_t +hash_crc_key8(void *key, __rte_unused uint32_t key_size, uint64_t seed) +{ + uint64_t *k = key; + uint64_t crc0; + + crc0 = _mm_crc32_u64(seed, k[0]); + + return crc0; +} + +static inline uint64_t +hash_crc_key16(void *key, __rte_unused uint32_t key_size, uint64_t seed) +{ + uint64_t *k = key; + uint64_t k0, crc0, crc1; + + k0 = k[0]; + + crc0 = _mm_crc32_u64(k0, seed); + crc1 = _mm_crc32_u64(k0 >> 32, k[1]); + + crc0 ^= crc1; + + return crc0; +} + +static inline uint64_t +hash_crc_key24(void *key, __rte_unused uint32_t key_size, uint64_t seed) +{ + uint64_t *k = key; + uint64_t k0, k2, crc0, crc1; + + k0 = k[0]; + k2 = k[2]; + + crc0 = _mm_crc32_u64(k0, seed); + crc1 = _mm_crc32_u64(k0 >> 32, k[1]); + + crc0 = _mm_crc32_u64(crc0, k2); + + crc0 ^= crc1; + + return crc0; +} + +static inline uint64_t +hash_crc_key32(void *key, __rte_unused uint32_t key_size, uint64_t seed) +{ + uint64_t *k = key; + uint64_t k0, k2, crc0, crc1, crc2, crc3; + + k0 = k[0]; + k2 = k[2]; + + crc0 = _mm_crc32_u64(k0, seed); + crc1 = _mm_crc32_u64(k0 >> 32, k[1]); + + crc2 = _mm_crc32_u64(k2, k[3]); + crc3 = k2 >> 32; + + crc0 = _mm_crc32_u64(crc0, crc1); + crc1 = _mm_crc32_u64(crc2, crc3); + + crc0 ^= crc1; + + return crc0; +} + +static inline uint64_t +hash_crc_key40(void *key, __rte_unused uint32_t key_size, uint64_t seed) +{ + uint64_t *k = key; + uint64_t k0, k2, crc0, crc1, crc2, crc3; + + k0 = k[0]; + k2 = k[2]; + + crc0 = _mm_crc32_u64(k0, seed); + crc1 = _mm_crc32_u64(k0 >> 32, k[1]); + + crc2 = _mm_crc32_u64(k2, k[3]); + crc3 = _mm_crc32_u64(k2 >> 32, k[4]); + + crc0 = _mm_crc32_u64(crc0, crc1); + crc1 = _mm_crc32_u64(crc2, crc3); + + crc0 ^= crc1; + + return crc0; +} + +static inline uint64_t +hash_crc_key48(void *key, __rte_unused uint32_t key_size, uint64_t seed) +{ + uint64_t *k = key; + uint64_t k0, k2, k5, crc0, crc1, crc2, crc3; + + k0 = k[0]; + k2 = k[2]; + k5 = k[5]; + + crc0 = _mm_crc32_u64(k0, seed); + crc1 = _mm_crc32_u64(k0 >> 32, k[1]); + + crc2 = _mm_crc32_u64(k2, k[3]); + crc3 = _mm_crc32_u64(k2 >> 32, k[4]); + + crc0 = _mm_crc32_u64(crc0, (crc1 << 32) ^ crc2); + crc1 = _mm_crc32_u64(crc3, k5); + + crc0 ^= crc1; + + return crc0; +} + +static inline uint64_t +hash_crc_key56(void *key, __rte_unused uint32_t key_size, uint64_t seed) +{ + uint64_t *k = key; + uint64_t k0, k2, k5, crc0, crc1, crc2, crc3, crc4, crc5; + + k0 = k[0]; + k2 = k[2]; + k5 = k[5]; + + crc0 = _mm_crc32_u64(k0, seed); + crc1 = _mm_crc32_u64(k0 >> 32, k[1]); + + crc2 = _mm_crc32_u64(k2, k[3]); + crc3 = _mm_crc32_u64(k2 >> 32, k[4]); + + crc4 = _mm_crc32_u64(k5, k[6]); + crc5 = k5 >> 32; + + crc0 = _mm_crc32_u64(crc0, (crc1 << 32) ^ crc2); + crc1 = _mm_crc32_u64(crc3, (crc4 << 32) ^ crc5); + + crc0 ^= crc1; + + return crc0; +} + +static inline uint64_t +hash_crc_key64(void *key, __rte_unused uint32_t key_size, uint64_t seed) +{ + uint64_t *k = key; + uint64_t k0, k2, k5, crc0, crc1, crc2, crc3, crc4, crc5; + + k0 = k[0]; + k2 = k[2]; + k5 = k[5]; + + crc0 = _mm_crc32_u64(k0, seed); + crc1 = _mm_crc32_u64(k0 >> 32, k[1]); + + crc2 = _mm_crc32_u64(k2, k[3]); + crc3 = _mm_crc32_u64(k2 >> 32, k[4]); + + crc4 = _mm_crc32_u64(k5, k[6]); + crc5 = _mm_crc32_u64(k5 >> 32, k[7]); + + crc0 = _mm_crc32_u64(crc0, (crc1 << 32) ^ crc2); + crc1 = _mm_crc32_u64(crc3, (crc4 << 32) ^ crc5); + + crc0 ^= crc1; + + return crc0; +} + +#define hash_default_key8 hash_crc_key8 +#define hash_default_key16 hash_crc_key16 +#define hash_default_key24 hash_crc_key24 +#define hash_default_key32 hash_crc_key32 +#define hash_default_key40 hash_crc_key40 +#define hash_default_key48 hash_crc_key48 +#define hash_default_key56 hash_crc_key56 +#define hash_default_key64 hash_crc_key64 + +#else + +#define hash_default_key8 hash_xor_key8 +#define hash_default_key16 hash_xor_key16 +#define hash_default_key24 hash_xor_key24 +#define hash_default_key32 hash_xor_key32 +#define hash_default_key40 hash_xor_key40 +#define hash_default_key48 hash_xor_key48 +#define hash_default_key56 hash_xor_key56 +#define hash_default_key64 hash_xor_key64 + +#endif + +#endif diff --git a/examples/ip_pipeline/pipeline/pipeline_actions_common.h b/examples/ip_pipeline/pipeline/pipeline_actions_common.h new file mode 100644 index 00000000..ab08612d --- /dev/null +++ b/examples/ip_pipeline/pipeline/pipeline_actions_common.h @@ -0,0 +1,231 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef __INCLUDE_PIPELINE_ACTIONS_COMMON_H__ +#define __INCLUDE_PIPELINE_ACTIONS_COMMON_H__ + +#include <stdint.h> + +#include <rte_common.h> +#include <rte_cycles.h> +#include <rte_mbuf.h> +#include <rte_pipeline.h> + +#define PIPELINE_PORT_IN_AH(f_ah, f_pkt_work, f_pkt4_work) \ +static int \ +f_ah( \ + __rte_unused struct rte_pipeline *p, \ + struct rte_mbuf **pkts, \ + uint32_t n_pkts, \ + void *arg) \ +{ \ + uint32_t i; \ + \ + for (i = 0; i < (n_pkts & (~0x3LLU)); i += 4) \ + f_pkt4_work(&pkts[i], arg); \ + \ + for ( ; i < n_pkts; i++) \ + f_pkt_work(pkts[i], arg); \ + \ + return 0; \ +} + +#define PIPELINE_PORT_IN_AH_HIJACK_ALL(f_ah, f_pkt_work, f_pkt4_work) \ +static int \ +f_ah( \ + struct rte_pipeline *p, \ + struct rte_mbuf **pkts, \ + uint32_t n_pkts, \ + void *arg) \ +{ \ + uint64_t pkt_mask = RTE_LEN2MASK(n_pkts, uint64_t); \ + uint32_t i; \ + \ + rte_pipeline_ah_packet_hijack(p, pkt_mask); \ + \ + for (i = 0; i < (n_pkts & (~0x3LLU)); i += 4) \ + f_pkt4_work(&pkts[i], arg); \ + \ + for ( ; i < n_pkts; i++) \ + f_pkt_work(pkts[i], arg); \ + \ + return 0; \ +} + +#define PIPELINE_TABLE_AH_HIT(f_ah, f_pkt_work, f_pkt4_work) \ +static int \ +f_ah( \ + __rte_unused struct rte_pipeline *p, \ + struct rte_mbuf **pkts, \ + uint64_t pkts_in_mask, \ + struct rte_pipeline_table_entry **entries, \ + void *arg) \ +{ \ + if ((pkts_in_mask & (pkts_in_mask + 1)) == 0) { \ + uint64_t n_pkts = __builtin_popcountll(pkts_in_mask); \ + uint32_t i; \ + \ + for (i = 0; i < (n_pkts & (~0x3LLU)); i += 4) \ + f_pkt4_work(&pkts[i], &entries[i], arg); \ + \ + for ( ; i < n_pkts; i++) \ + f_pkt_work(pkts[i], entries[i], arg); \ + } else \ + for ( ; pkts_in_mask; ) { \ + uint32_t pos = __builtin_ctzll(pkts_in_mask); \ + uint64_t pkt_mask = 1LLU << pos; \ + \ + pkts_in_mask &= ~pkt_mask; \ + f_pkt_work(pkts[pos], entries[pos], arg); \ + } \ + \ + return 0; \ +} + +#define PIPELINE_TABLE_AH_MISS(f_ah, f_pkt_work, f_pkt4_work) \ +static int \ +f_ah( \ + __rte_unused struct rte_pipeline *p, \ + struct rte_mbuf **pkts, \ + uint64_t pkts_in_mask, \ + struct rte_pipeline_table_entry *entry, \ + void *arg) \ +{ \ + if ((pkts_in_mask & (pkts_in_mask + 1)) == 0) { \ + uint64_t n_pkts = __builtin_popcountll(pkts_in_mask); \ + uint32_t i; \ + \ + for (i = 0; i < (n_pkts & (~0x3LLU)); i += 4) \ + f_pkt4_work(&pkts[i], entry, arg); \ + \ + for ( ; i < n_pkts; i++) \ + f_pkt_work(pkts[i], entry, arg); \ + } else \ + for ( ; pkts_in_mask; ) { \ + uint32_t pos = __builtin_ctzll(pkts_in_mask); \ + uint64_t pkt_mask = 1LLU << pos; \ + \ + pkts_in_mask &= ~pkt_mask; \ + f_pkt_work(pkts[pos], entry, arg); \ + } \ + \ + return 0; \ +} + +#define PIPELINE_TABLE_AH_HIT_DROP_TIME(f_ah, f_pkt_work, f_pkt4_work) \ +static int \ +f_ah( \ + struct rte_pipeline *p, \ + struct rte_mbuf **pkts, \ + uint64_t pkts_mask, \ + struct rte_pipeline_table_entry **entries, \ + void *arg) \ +{ \ + uint64_t pkts_in_mask = pkts_mask; \ + uint64_t pkts_out_mask = pkts_mask; \ + uint64_t time = rte_rdtsc(); \ + \ + if ((pkts_in_mask & (pkts_in_mask + 1)) == 0) { \ + uint64_t n_pkts = __builtin_popcountll(pkts_in_mask); \ + uint32_t i; \ + \ + for (i = 0; i < (n_pkts & (~0x3LLU)); i += 4) { \ + uint64_t mask = f_pkt4_work(&pkts[i], \ + &entries[i], arg, time); \ + pkts_out_mask ^= mask << i; \ + } \ + \ + for ( ; i < n_pkts; i++) { \ + uint64_t mask = f_pkt_work(pkts[i], \ + entries[i], arg, time); \ + pkts_out_mask ^= mask << i; \ + } \ + } else \ + for ( ; pkts_in_mask; ) { \ + uint32_t pos = __builtin_ctzll(pkts_in_mask); \ + uint64_t pkt_mask = 1LLU << pos; \ + uint64_t mask = f_pkt_work(pkts[pos], \ + entries[pos], arg, time); \ + \ + pkts_in_mask &= ~pkt_mask; \ + pkts_out_mask ^= mask << pos; \ + } \ + \ + rte_pipeline_ah_packet_drop(p, pkts_out_mask ^ pkts_mask); \ + \ + return 0; \ +} + +#define PIPELINE_TABLE_AH_MISS_DROP_TIME(f_ah, f_pkt_work, f_pkt4_work) \ +static int \ +f_ah( \ + struct rte_pipeline *p, \ + struct rte_mbuf **pkts, \ + uint64_t pkts_mask, \ + struct rte_pipeline_table_entry *entry, \ + void *arg) \ +{ \ + uint64_t pkts_in_mask = pkts_mask; \ + uint64_t pkts_out_mask = pkts_mask; \ + uint64_t time = rte_rdtsc(); \ + \ + if ((pkts_in_mask & (pkts_in_mask + 1)) == 0) { \ + uint64_t n_pkts = __builtin_popcountll(pkts_in_mask); \ + uint32_t i; \ + \ + for (i = 0; i < (n_pkts & (~0x3LLU)); i += 4) { \ + uint64_t mask = f_pkt4_work(&pkts[i], \ + entry, arg, time); \ + pkts_out_mask ^= mask << i; \ + } \ + \ + for ( ; i < n_pkts; i++) { \ + uint64_t mask = f_pkt_work(pkts[i], entry, arg, time);\ + pkts_out_mask ^= mask << i; \ + } \ + } else \ + for ( ; pkts_in_mask; ) { \ + uint32_t pos = __builtin_ctzll(pkts_in_mask); \ + uint64_t pkt_mask = 1LLU << pos; \ + uint64_t mask = f_pkt_work(pkts[pos], \ + entry, arg, time); \ + \ + pkts_in_mask &= ~pkt_mask; \ + pkts_out_mask ^= mask << pos; \ + } \ + \ + rte_pipeline_ah_packet_drop(p, pkts_out_mask ^ pkts_mask); \ + \ + return 0; \ +} + +#endif diff --git a/examples/ip_pipeline/pipeline/pipeline_common_be.c b/examples/ip_pipeline/pipeline/pipeline_common_be.c new file mode 100644 index 00000000..50dcb694 --- /dev/null +++ b/examples/ip_pipeline/pipeline/pipeline_common_be.c @@ -0,0 +1,206 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <rte_common.h> +#include <rte_ring.h> +#include <rte_malloc.h> + +#include "pipeline_common_be.h" + +void * +pipeline_msg_req_ping_handler(__rte_unused struct pipeline *p, + void *msg) +{ + struct pipeline_msg_rsp *rsp = msg; + + rsp->status = 0; /* OK */ + + return rsp; +} + +void * +pipeline_msg_req_stats_port_in_handler(struct pipeline *p, + void *msg) +{ + struct pipeline_stats_msg_req *req = msg; + struct pipeline_stats_port_in_msg_rsp *rsp = msg; + uint32_t port_id; + + /* Check request */ + if (req->id >= p->n_ports_in) { + rsp->status = -1; + return rsp; + } + port_id = p->port_in_id[req->id]; + + /* Process request */ + rsp->status = rte_pipeline_port_in_stats_read(p->p, + port_id, + &rsp->stats, + 1); + + return rsp; +} + +void * +pipeline_msg_req_stats_port_out_handler(struct pipeline *p, + void *msg) +{ + struct pipeline_stats_msg_req *req = msg; + struct pipeline_stats_port_out_msg_rsp *rsp = msg; + uint32_t port_id; + + /* Check request */ + if (req->id >= p->n_ports_out) { + rsp->status = -1; + return rsp; + } + port_id = p->port_out_id[req->id]; + + /* Process request */ + rsp->status = rte_pipeline_port_out_stats_read(p->p, + port_id, + &rsp->stats, + 1); + + return rsp; +} + +void * +pipeline_msg_req_stats_table_handler(struct pipeline *p, + void *msg) +{ + struct pipeline_stats_msg_req *req = msg; + struct pipeline_stats_table_msg_rsp *rsp = msg; + uint32_t table_id; + + /* Check request */ + if (req->id >= p->n_tables) { + rsp->status = -1; + return rsp; + } + table_id = p->table_id[req->id]; + + /* Process request */ + rsp->status = rte_pipeline_table_stats_read(p->p, + table_id, + &rsp->stats, + 1); + + return rsp; +} + +void * +pipeline_msg_req_port_in_enable_handler(struct pipeline *p, + void *msg) +{ + struct pipeline_port_in_msg_req *req = msg; + struct pipeline_msg_rsp *rsp = msg; + uint32_t port_id; + + /* Check request */ + if (req->port_id >= p->n_ports_in) { + rsp->status = -1; + return rsp; + } + port_id = p->port_in_id[req->port_id]; + + /* Process request */ + rsp->status = rte_pipeline_port_in_enable(p->p, + port_id); + + return rsp; +} + +void * +pipeline_msg_req_port_in_disable_handler(struct pipeline *p, + void *msg) +{ + struct pipeline_port_in_msg_req *req = msg; + struct pipeline_msg_rsp *rsp = msg; + uint32_t port_id; + + /* Check request */ + if (req->port_id >= p->n_ports_in) { + rsp->status = -1; + return rsp; + } + port_id = p->port_in_id[req->port_id]; + + /* Process request */ + rsp->status = rte_pipeline_port_in_disable(p->p, + port_id); + + return rsp; +} + +void * +pipeline_msg_req_invalid_handler(__rte_unused struct pipeline *p, + void *msg) +{ + struct pipeline_msg_rsp *rsp = msg; + + rsp->status = -1; /* Error */ + + return rsp; +} + +int +pipeline_msg_req_handle(struct pipeline *p) +{ + uint32_t msgq_id; + + for (msgq_id = 0; msgq_id < p->n_msgq; msgq_id++) { + for ( ; ; ) { + struct pipeline_msg_req *req; + pipeline_msg_req_handler f_handle; + + req = pipeline_msg_recv(p, msgq_id); + if (req == NULL) + break; + + f_handle = (req->type < PIPELINE_MSG_REQS) ? + p->handlers[req->type] : + pipeline_msg_req_invalid_handler; + + if (f_handle == NULL) + f_handle = pipeline_msg_req_invalid_handler; + + pipeline_msg_send(p, + msgq_id, + f_handle(p, (void *) req)); + } + } + + return 0; +} diff --git a/examples/ip_pipeline/pipeline/pipeline_common_be.h b/examples/ip_pipeline/pipeline/pipeline_common_be.h new file mode 100644 index 00000000..07fdca09 --- /dev/null +++ b/examples/ip_pipeline/pipeline/pipeline_common_be.h @@ -0,0 +1,163 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_PIPELINE_COMMON_BE_H__ +#define __INCLUDE_PIPELINE_COMMON_BE_H__ + +#include <rte_common.h> +#include <rte_ring.h> +#include <rte_pipeline.h> + +#include "pipeline_be.h" + +struct pipeline; + +enum pipeline_msg_req_type { + PIPELINE_MSG_REQ_PING = 0, + PIPELINE_MSG_REQ_STATS_PORT_IN, + PIPELINE_MSG_REQ_STATS_PORT_OUT, + PIPELINE_MSG_REQ_STATS_TABLE, + PIPELINE_MSG_REQ_PORT_IN_ENABLE, + PIPELINE_MSG_REQ_PORT_IN_DISABLE, + PIPELINE_MSG_REQ_CUSTOM, + PIPELINE_MSG_REQS +}; + +typedef void *(*pipeline_msg_req_handler)(struct pipeline *p, void *msg); + +struct pipeline { + struct rte_pipeline *p; + uint32_t port_in_id[PIPELINE_MAX_PORT_IN]; + uint32_t port_out_id[PIPELINE_MAX_PORT_OUT]; + uint32_t table_id[PIPELINE_MAX_TABLES]; + struct rte_ring *msgq_in[PIPELINE_MAX_MSGQ_IN]; + struct rte_ring *msgq_out[PIPELINE_MAX_MSGQ_OUT]; + + uint32_t n_ports_in; + uint32_t n_ports_out; + uint32_t n_tables; + uint32_t n_msgq; + + pipeline_msg_req_handler handlers[PIPELINE_MSG_REQS]; + char name[PIPELINE_NAME_SIZE]; + uint32_t log_level; +}; + +enum pipeline_log_level { + PIPELINE_LOG_LEVEL_HIGH = 1, + PIPELINE_LOG_LEVEL_LOW, + PIPELINE_LOG_LEVELS +}; + +#define PLOG(p, level, fmt, ...) \ +do { \ + if (p->log_level >= PIPELINE_LOG_LEVEL_ ## level) \ + fprintf(stdout, "[%s] " fmt "\n", p->name, ## __VA_ARGS__);\ +} while (0) + +static inline void * +pipeline_msg_recv(struct pipeline *p, + uint32_t msgq_id) +{ + struct rte_ring *r = p->msgq_in[msgq_id]; + void *msg; + int status = rte_ring_sc_dequeue(r, &msg); + + if (status != 0) + return NULL; + + return msg; +} + +static inline void +pipeline_msg_send(struct pipeline *p, + uint32_t msgq_id, + void *msg) +{ + struct rte_ring *r = p->msgq_out[msgq_id]; + int status; + + do { + status = rte_ring_sp_enqueue(r, msg); + } while (status == -ENOBUFS); +} + +struct pipeline_msg_req { + enum pipeline_msg_req_type type; +}; + +struct pipeline_stats_msg_req { + enum pipeline_msg_req_type type; + uint32_t id; +}; + +struct pipeline_port_in_msg_req { + enum pipeline_msg_req_type type; + uint32_t port_id; +}; + +struct pipeline_custom_msg_req { + enum pipeline_msg_req_type type; + uint32_t subtype; +}; + +struct pipeline_msg_rsp { + int status; +}; + +struct pipeline_stats_port_in_msg_rsp { + int status; + struct rte_pipeline_port_in_stats stats; +}; + +struct pipeline_stats_port_out_msg_rsp { + int status; + struct rte_pipeline_port_out_stats stats; +}; + +struct pipeline_stats_table_msg_rsp { + int status; + struct rte_pipeline_table_stats stats; +}; + +void *pipeline_msg_req_ping_handler(struct pipeline *p, void *msg); +void *pipeline_msg_req_stats_port_in_handler(struct pipeline *p, void *msg); +void *pipeline_msg_req_stats_port_out_handler(struct pipeline *p, void *msg); +void *pipeline_msg_req_stats_table_handler(struct pipeline *p, void *msg); +void *pipeline_msg_req_port_in_enable_handler(struct pipeline *p, void *msg); +void *pipeline_msg_req_port_in_disable_handler(struct pipeline *p, void *msg); +void *pipeline_msg_req_invalid_handler(struct pipeline *p, void *msg); + +int pipeline_msg_req_handle(struct pipeline *p); + +#endif diff --git a/examples/ip_pipeline/pipeline/pipeline_common_fe.c b/examples/ip_pipeline/pipeline/pipeline_common_fe.c new file mode 100644 index 00000000..a691d422 --- /dev/null +++ b/examples/ip_pipeline/pipeline/pipeline_common_fe.c @@ -0,0 +1,1310 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdio.h> +#include <fcntl.h> +#include <unistd.h> + +#include <rte_common.h> +#include <rte_ring.h> +#include <rte_malloc.h> +#include <cmdline_rdline.h> +#include <cmdline_parse.h> +#include <cmdline_parse_num.h> +#include <cmdline_parse_string.h> +#include <cmdline_parse_ipaddr.h> +#include <cmdline_parse_etheraddr.h> +#include <cmdline_socket.h> +#include <cmdline.h> + +#include "pipeline_common_fe.h" + +int +app_pipeline_ping(struct app_params *app, + uint32_t pipeline_id) +{ + struct app_pipeline_params *p; + struct pipeline_msg_req *req; + struct pipeline_msg_rsp *rsp; + int status = 0; + + /* Check input arguments */ + if (app == NULL) + return -1; + + APP_PARAM_FIND_BY_ID(app->pipeline_params, "PIPELINE", pipeline_id, p); + if (p == NULL) + return -1; + + /* Message buffer allocation */ + req = app_msg_alloc(app); + if (req == NULL) + return -1; + + /* Fill in request */ + req->type = PIPELINE_MSG_REQ_PING; + + /* Send request and wait for response */ + rsp = app_msg_send_recv(app, pipeline_id, req, MSG_TIMEOUT_DEFAULT); + if (rsp == NULL) + return -1; + + /* Check response */ + status = rsp->status; + + /* Message buffer free */ + app_msg_free(app, rsp); + + return status; +} + +int +app_pipeline_stats_port_in(struct app_params *app, + uint32_t pipeline_id, + uint32_t port_id, + struct rte_pipeline_port_in_stats *stats) +{ + struct app_pipeline_params *p; + struct pipeline_stats_msg_req *req; + struct pipeline_stats_port_in_msg_rsp *rsp; + int status = 0; + + /* Check input arguments */ + if ((app == NULL) || + (stats == NULL)) + return -1; + + APP_PARAM_FIND_BY_ID(app->pipeline_params, "PIPELINE", pipeline_id, p); + if ((p == NULL) || + (port_id >= p->n_pktq_in)) + return -1; + + /* Message buffer allocation */ + req = app_msg_alloc(app); + if (req == NULL) + return -1; + + /* Fill in request */ + req->type = PIPELINE_MSG_REQ_STATS_PORT_IN; + req->id = port_id; + + /* Send request and wait for response */ + rsp = (struct pipeline_stats_port_in_msg_rsp *) + app_msg_send_recv(app, pipeline_id, req, MSG_TIMEOUT_DEFAULT); + if (rsp == NULL) + return -1; + + /* Check response */ + status = rsp->status; + if (status == 0) + memcpy(stats, &rsp->stats, sizeof(rsp->stats)); + + /* Message buffer free */ + app_msg_free(app, rsp); + + return status; +} + +int +app_pipeline_stats_port_out(struct app_params *app, + uint32_t pipeline_id, + uint32_t port_id, + struct rte_pipeline_port_out_stats *stats) +{ + struct app_pipeline_params *p; + struct pipeline_stats_msg_req *req; + struct pipeline_stats_port_out_msg_rsp *rsp; + int status = 0; + + /* Check input arguments */ + if ((app == NULL) || + (pipeline_id >= app->n_pipelines) || + (stats == NULL)) + return -1; + + APP_PARAM_FIND_BY_ID(app->pipeline_params, "PIPELINE", pipeline_id, p); + if ((p == NULL) || + (port_id >= p->n_pktq_out)) + return -1; + + /* Message buffer allocation */ + req = app_msg_alloc(app); + if (req == NULL) + return -1; + + /* Fill in request */ + req->type = PIPELINE_MSG_REQ_STATS_PORT_OUT; + req->id = port_id; + + /* Send request and wait for response */ + rsp = app_msg_send_recv(app, pipeline_id, req, MSG_TIMEOUT_DEFAULT); + if (rsp == NULL) + return -1; + + /* Check response */ + status = rsp->status; + if (status == 0) + memcpy(stats, &rsp->stats, sizeof(rsp->stats)); + + /* Message buffer free */ + app_msg_free(app, rsp); + + return status; +} + +int +app_pipeline_stats_table(struct app_params *app, + uint32_t pipeline_id, + uint32_t table_id, + struct rte_pipeline_table_stats *stats) +{ + struct app_pipeline_params *p; + struct pipeline_stats_msg_req *req; + struct pipeline_stats_table_msg_rsp *rsp; + int status = 0; + + /* Check input arguments */ + if ((app == NULL) || + (stats == NULL)) + return -1; + + APP_PARAM_FIND_BY_ID(app->pipeline_params, "PIPELINE", pipeline_id, p); + if (p == NULL) + return -1; + + /* Message buffer allocation */ + req = app_msg_alloc(app); + if (req == NULL) + return -1; + + /* Fill in request */ + req->type = PIPELINE_MSG_REQ_STATS_TABLE; + req->id = table_id; + + /* Send request and wait for response */ + rsp = app_msg_send_recv(app, pipeline_id, req, MSG_TIMEOUT_DEFAULT); + if (rsp == NULL) + return -1; + + /* Check response */ + status = rsp->status; + if (status == 0) + memcpy(stats, &rsp->stats, sizeof(rsp->stats)); + + /* Message buffer free */ + app_msg_free(app, rsp); + + return status; +} + +int +app_pipeline_port_in_enable(struct app_params *app, + uint32_t pipeline_id, + uint32_t port_id) +{ + struct app_pipeline_params *p; + struct pipeline_port_in_msg_req *req; + struct pipeline_msg_rsp *rsp; + int status = 0; + + /* Check input arguments */ + if (app == NULL) + return -1; + + APP_PARAM_FIND_BY_ID(app->pipeline_params, "PIPELINE", pipeline_id, p); + if ((p == NULL) || + (port_id >= p->n_pktq_in)) + return -1; + + /* Message buffer allocation */ + req = app_msg_alloc(app); + if (req == NULL) + return -1; + + /* Fill in request */ + req->type = PIPELINE_MSG_REQ_PORT_IN_ENABLE; + req->port_id = port_id; + + /* Send request and wait for response */ + rsp = app_msg_send_recv(app, pipeline_id, req, MSG_TIMEOUT_DEFAULT); + if (rsp == NULL) + return -1; + + /* Check response */ + status = rsp->status; + + /* Message buffer free */ + app_msg_free(app, rsp); + + return status; +} + +int +app_pipeline_port_in_disable(struct app_params *app, + uint32_t pipeline_id, + uint32_t port_id) +{ + struct app_pipeline_params *p; + struct pipeline_port_in_msg_req *req; + struct pipeline_msg_rsp *rsp; + int status = 0; + + /* Check input arguments */ + if (app == NULL) + return -1; + + APP_PARAM_FIND_BY_ID(app->pipeline_params, "PIPELINE", pipeline_id, p); + if ((p == NULL) || + (port_id >= p->n_pktq_in)) + return -1; + + /* Message buffer allocation */ + req = app_msg_alloc(app); + if (req == NULL) + return -1; + + /* Fill in request */ + req->type = PIPELINE_MSG_REQ_PORT_IN_DISABLE; + req->port_id = port_id; + + /* Send request and wait for response */ + rsp = app_msg_send_recv(app, pipeline_id, req, MSG_TIMEOUT_DEFAULT); + if (rsp == NULL) + return -1; + + /* Check response */ + status = rsp->status; + + /* Message buffer free */ + app_msg_free(app, rsp); + + return status; +} + +int +app_link_config(struct app_params *app, + uint32_t link_id, + uint32_t ip, + uint32_t depth) +{ + struct app_link_params *p; + uint32_t i, netmask, host, bcast; + + /* Check input arguments */ + if (app == NULL) + return -1; + + APP_PARAM_FIND_BY_ID(app->link_params, "LINK", link_id, p); + if (p == NULL) { + APP_LOG(app, HIGH, "LINK%" PRIu32 " is not a valid link", + link_id); + return -1; + } + + if (p->state) { + APP_LOG(app, HIGH, "%s is UP, please bring it DOWN first", + p->name); + return -1; + } + + netmask = (~0U) << (32 - depth); + host = ip & netmask; + bcast = host | (~netmask); + + if ((ip == 0) || + (ip == UINT32_MAX) || + (ip == host) || + (ip == bcast)) { + APP_LOG(app, HIGH, "Illegal IP address"); + return -1; + } + + for (i = 0; i < app->n_links; i++) { + struct app_link_params *link = &app->link_params[i]; + + if (strcmp(p->name, link->name) == 0) + continue; + + if (link->ip == ip) { + APP_LOG(app, HIGH, + "%s is already assigned this IP address", + link->name); + return -1; + } + } + + if ((depth == 0) || (depth > 32)) { + APP_LOG(app, HIGH, "Illegal value for depth parameter " + "(%" PRIu32 ")", + depth); + return -1; + } + + /* Save link parameters */ + p->ip = ip; + p->depth = depth; + + return 0; +} + +int +app_link_up(struct app_params *app, + uint32_t link_id) +{ + struct app_link_params *p; + + /* Check input arguments */ + if (app == NULL) + return -1; + + APP_PARAM_FIND_BY_ID(app->link_params, "LINK", link_id, p); + if (p == NULL) { + APP_LOG(app, HIGH, "LINK%" PRIu32 " is not a valid link", + link_id); + return -1; + } + + /* Check link state */ + if (p->state) { + APP_LOG(app, HIGH, "%s is already UP", p->name); + return 0; + } + + /* Check that IP address is valid */ + if (p->ip == 0) { + APP_LOG(app, HIGH, "%s IP address is not set", p->name); + return 0; + } + + app_link_up_internal(app, p); + + return 0; +} + +int +app_link_down(struct app_params *app, + uint32_t link_id) +{ + struct app_link_params *p; + + /* Check input arguments */ + if (app == NULL) + return -1; + + APP_PARAM_FIND_BY_ID(app->link_params, "LINK", link_id, p); + if (p == NULL) { + APP_LOG(app, HIGH, "LINK%" PRIu32 " is not a valid link", + link_id); + return -1; + } + + /* Check link state */ + if (p->state == 0) { + APP_LOG(app, HIGH, "%s is already DOWN", p->name); + return 0; + } + + app_link_down_internal(app, p); + + return 0; +} + +/* + * ping + */ + +struct cmd_ping_result { + cmdline_fixed_string_t p_string; + uint32_t pipeline_id; + cmdline_fixed_string_t ping_string; +}; + +static void +cmd_ping_parsed( + void *parsed_result, + __rte_unused struct cmdline *cl, + void *data) +{ + struct cmd_ping_result *params = parsed_result; + struct app_params *app = data; + int status; + + status = app_pipeline_ping(app, params->pipeline_id); + if (status != 0) + printf("Command failed\n"); +} + +cmdline_parse_token_string_t cmd_ping_p_string = + TOKEN_STRING_INITIALIZER(struct cmd_ping_result, p_string, "p"); + +cmdline_parse_token_num_t cmd_ping_pipeline_id = + TOKEN_NUM_INITIALIZER(struct cmd_ping_result, pipeline_id, UINT32); + +cmdline_parse_token_string_t cmd_ping_ping_string = + TOKEN_STRING_INITIALIZER(struct cmd_ping_result, ping_string, "ping"); + +cmdline_parse_inst_t cmd_ping = { + .f = cmd_ping_parsed, + .data = NULL, + .help_str = "Pipeline ping", + .tokens = { + (void *) &cmd_ping_p_string, + (void *) &cmd_ping_pipeline_id, + (void *) &cmd_ping_ping_string, + NULL, + }, +}; + +/* + * stats port in + */ + +struct cmd_stats_port_in_result { + cmdline_fixed_string_t p_string; + uint32_t pipeline_id; + cmdline_fixed_string_t stats_string; + cmdline_fixed_string_t port_string; + cmdline_fixed_string_t in_string; + uint32_t port_in_id; + +}; +static void +cmd_stats_port_in_parsed( + void *parsed_result, + __rte_unused struct cmdline *cl, + void *data) +{ + struct cmd_stats_port_in_result *params = parsed_result; + struct app_params *app = data; + struct rte_pipeline_port_in_stats stats; + int status; + + status = app_pipeline_stats_port_in(app, + params->pipeline_id, + params->port_in_id, + &stats); + + if (status != 0) { + printf("Command failed\n"); + return; + } + + /* Display stats */ + printf("Pipeline %" PRIu32 " - stats for input port %" PRIu32 ":\n" + "\tPkts in: %" PRIu64 "\n" + "\tPkts dropped by AH: %" PRIu64 "\n" + "\tPkts dropped by other: %" PRIu64 "\n", + params->pipeline_id, + params->port_in_id, + stats.stats.n_pkts_in, + stats.n_pkts_dropped_by_ah, + stats.stats.n_pkts_drop); +} + +cmdline_parse_token_string_t cmd_stats_port_in_p_string = + TOKEN_STRING_INITIALIZER(struct cmd_stats_port_in_result, p_string, + "p"); + +cmdline_parse_token_num_t cmd_stats_port_in_pipeline_id = + TOKEN_NUM_INITIALIZER(struct cmd_stats_port_in_result, pipeline_id, + UINT32); + +cmdline_parse_token_string_t cmd_stats_port_in_stats_string = + TOKEN_STRING_INITIALIZER(struct cmd_stats_port_in_result, stats_string, + "stats"); + +cmdline_parse_token_string_t cmd_stats_port_in_port_string = + TOKEN_STRING_INITIALIZER(struct cmd_stats_port_in_result, port_string, + "port"); + +cmdline_parse_token_string_t cmd_stats_port_in_in_string = + TOKEN_STRING_INITIALIZER(struct cmd_stats_port_in_result, in_string, + "in"); + + cmdline_parse_token_num_t cmd_stats_port_in_port_in_id = + TOKEN_NUM_INITIALIZER(struct cmd_stats_port_in_result, port_in_id, + UINT32); + +cmdline_parse_inst_t cmd_stats_port_in = { + .f = cmd_stats_port_in_parsed, + .data = NULL, + .help_str = "Pipeline input port stats", + .tokens = { + (void *) &cmd_stats_port_in_p_string, + (void *) &cmd_stats_port_in_pipeline_id, + (void *) &cmd_stats_port_in_stats_string, + (void *) &cmd_stats_port_in_port_string, + (void *) &cmd_stats_port_in_in_string, + (void *) &cmd_stats_port_in_port_in_id, + NULL, + }, +}; + +/* + * stats port out + */ + +struct cmd_stats_port_out_result { + cmdline_fixed_string_t p_string; + uint32_t pipeline_id; + cmdline_fixed_string_t stats_string; + cmdline_fixed_string_t port_string; + cmdline_fixed_string_t out_string; + uint32_t port_out_id; +}; + +static void +cmd_stats_port_out_parsed( + void *parsed_result, + __rte_unused struct cmdline *cl, + void *data) +{ + + struct cmd_stats_port_out_result *params = parsed_result; + struct app_params *app = data; + struct rte_pipeline_port_out_stats stats; + int status; + + status = app_pipeline_stats_port_out(app, + params->pipeline_id, + params->port_out_id, + &stats); + + if (status != 0) { + printf("Command failed\n"); + return; + } + + /* Display stats */ + printf("Pipeline %" PRIu32 " - stats for output port %" PRIu32 ":\n" + "\tPkts in: %" PRIu64 "\n" + "\tPkts dropped by AH: %" PRIu64 "\n" + "\tPkts dropped by other: %" PRIu64 "\n", + params->pipeline_id, + params->port_out_id, + stats.stats.n_pkts_in, + stats.n_pkts_dropped_by_ah, + stats.stats.n_pkts_drop); +} + +cmdline_parse_token_string_t cmd_stats_port_out_p_string = + TOKEN_STRING_INITIALIZER(struct cmd_stats_port_out_result, p_string, + "p"); + +cmdline_parse_token_num_t cmd_stats_port_out_pipeline_id = + TOKEN_NUM_INITIALIZER(struct cmd_stats_port_out_result, pipeline_id, + UINT32); + +cmdline_parse_token_string_t cmd_stats_port_out_stats_string = + TOKEN_STRING_INITIALIZER(struct cmd_stats_port_out_result, stats_string, + "stats"); + +cmdline_parse_token_string_t cmd_stats_port_out_port_string = + TOKEN_STRING_INITIALIZER(struct cmd_stats_port_out_result, port_string, + "port"); + +cmdline_parse_token_string_t cmd_stats_port_out_out_string = + TOKEN_STRING_INITIALIZER(struct cmd_stats_port_out_result, out_string, + "out"); + +cmdline_parse_token_num_t cmd_stats_port_out_port_out_id = + TOKEN_NUM_INITIALIZER(struct cmd_stats_port_out_result, port_out_id, + UINT32); + +cmdline_parse_inst_t cmd_stats_port_out = { + .f = cmd_stats_port_out_parsed, + .data = NULL, + .help_str = "Pipeline output port stats", + .tokens = { + (void *) &cmd_stats_port_out_p_string, + (void *) &cmd_stats_port_out_pipeline_id, + (void *) &cmd_stats_port_out_stats_string, + (void *) &cmd_stats_port_out_port_string, + (void *) &cmd_stats_port_out_out_string, + (void *) &cmd_stats_port_out_port_out_id, + NULL, + }, +}; + +/* + * stats table + */ + +struct cmd_stats_table_result { + cmdline_fixed_string_t p_string; + uint32_t pipeline_id; + cmdline_fixed_string_t stats_string; + cmdline_fixed_string_t table_string; + uint32_t table_id; +}; + +static void +cmd_stats_table_parsed( + void *parsed_result, + __rte_unused struct cmdline *cl, + void *data) +{ + struct cmd_stats_table_result *params = parsed_result; + struct app_params *app = data; + struct rte_pipeline_table_stats stats; + int status; + + status = app_pipeline_stats_table(app, + params->pipeline_id, + params->table_id, + &stats); + + if (status != 0) { + printf("Command failed\n"); + return; + } + + /* Display stats */ + printf("Pipeline %" PRIu32 " - stats for table %" PRIu32 ":\n" + "\tPkts in: %" PRIu64 "\n" + "\tPkts in with lookup miss: %" PRIu64 "\n" + "\tPkts in with lookup hit dropped by AH: %" PRIu64 "\n" + "\tPkts in with lookup hit dropped by others: %" PRIu64 "\n" + "\tPkts in with lookup miss dropped by AH: %" PRIu64 "\n" + "\tPkts in with lookup miss dropped by others: %" PRIu64 "\n", + params->pipeline_id, + params->table_id, + stats.stats.n_pkts_in, + stats.stats.n_pkts_lookup_miss, + stats.n_pkts_dropped_by_lkp_hit_ah, + stats.n_pkts_dropped_lkp_hit, + stats.n_pkts_dropped_by_lkp_miss_ah, + stats.n_pkts_dropped_lkp_miss); +} + +cmdline_parse_token_string_t cmd_stats_table_p_string = + TOKEN_STRING_INITIALIZER(struct cmd_stats_table_result, p_string, + "p"); + +cmdline_parse_token_num_t cmd_stats_table_pipeline_id = + TOKEN_NUM_INITIALIZER(struct cmd_stats_table_result, pipeline_id, + UINT32); + +cmdline_parse_token_string_t cmd_stats_table_stats_string = + TOKEN_STRING_INITIALIZER(struct cmd_stats_table_result, stats_string, + "stats"); + +cmdline_parse_token_string_t cmd_stats_table_table_string = + TOKEN_STRING_INITIALIZER(struct cmd_stats_table_result, table_string, + "table"); + +cmdline_parse_token_num_t cmd_stats_table_table_id = + TOKEN_NUM_INITIALIZER(struct cmd_stats_table_result, table_id, UINT32); + +cmdline_parse_inst_t cmd_stats_table = { + .f = cmd_stats_table_parsed, + .data = NULL, + .help_str = "Pipeline table stats", + .tokens = { + (void *) &cmd_stats_table_p_string, + (void *) &cmd_stats_table_pipeline_id, + (void *) &cmd_stats_table_stats_string, + (void *) &cmd_stats_table_table_string, + (void *) &cmd_stats_table_table_id, + NULL, + }, +}; + +/* + * port in enable + */ + +struct cmd_port_in_enable_result { + cmdline_fixed_string_t p_string; + uint32_t pipeline_id; + cmdline_fixed_string_t port_string; + cmdline_fixed_string_t in_string; + uint32_t port_in_id; + cmdline_fixed_string_t enable_string; +}; + +static void +cmd_port_in_enable_parsed( + void *parsed_result, + __rte_unused struct cmdline *cl, + void *data) +{ + struct cmd_port_in_enable_result *params = parsed_result; + struct app_params *app = data; + int status; + + status = app_pipeline_port_in_enable(app, + params->pipeline_id, + params->port_in_id); + + if (status != 0) + printf("Command failed\n"); +} + +cmdline_parse_token_string_t cmd_port_in_enable_p_string = + TOKEN_STRING_INITIALIZER(struct cmd_port_in_enable_result, p_string, + "p"); + +cmdline_parse_token_num_t cmd_port_in_enable_pipeline_id = + TOKEN_NUM_INITIALIZER(struct cmd_port_in_enable_result, pipeline_id, + UINT32); + +cmdline_parse_token_string_t cmd_port_in_enable_port_string = + TOKEN_STRING_INITIALIZER(struct cmd_port_in_enable_result, port_string, + "port"); + +cmdline_parse_token_string_t cmd_port_in_enable_in_string = + TOKEN_STRING_INITIALIZER(struct cmd_port_in_enable_result, in_string, + "in"); + +cmdline_parse_token_num_t cmd_port_in_enable_port_in_id = + TOKEN_NUM_INITIALIZER(struct cmd_port_in_enable_result, port_in_id, + UINT32); + +cmdline_parse_token_string_t cmd_port_in_enable_enable_string = + TOKEN_STRING_INITIALIZER(struct cmd_port_in_enable_result, + enable_string, "enable"); + +cmdline_parse_inst_t cmd_port_in_enable = { + .f = cmd_port_in_enable_parsed, + .data = NULL, + .help_str = "Pipeline input port enable", + .tokens = { + (void *) &cmd_port_in_enable_p_string, + (void *) &cmd_port_in_enable_pipeline_id, + (void *) &cmd_port_in_enable_port_string, + (void *) &cmd_port_in_enable_in_string, + (void *) &cmd_port_in_enable_port_in_id, + (void *) &cmd_port_in_enable_enable_string, + NULL, + }, +}; + +/* + * port in disable + */ + +struct cmd_port_in_disable_result { + cmdline_fixed_string_t p_string; + uint32_t pipeline_id; + cmdline_fixed_string_t port_string; + cmdline_fixed_string_t in_string; + uint32_t port_in_id; + cmdline_fixed_string_t disable_string; +}; + +static void +cmd_port_in_disable_parsed( + void *parsed_result, + __rte_unused struct cmdline *cl, + void *data) +{ + struct cmd_port_in_disable_result *params = parsed_result; + struct app_params *app = data; + int status; + + status = app_pipeline_port_in_disable(app, + params->pipeline_id, + params->port_in_id); + + if (status != 0) + printf("Command failed\n"); +} + +cmdline_parse_token_string_t cmd_port_in_disable_p_string = + TOKEN_STRING_INITIALIZER(struct cmd_port_in_disable_result, p_string, + "p"); + +cmdline_parse_token_num_t cmd_port_in_disable_pipeline_id = + TOKEN_NUM_INITIALIZER(struct cmd_port_in_disable_result, pipeline_id, + UINT32); + +cmdline_parse_token_string_t cmd_port_in_disable_port_string = + TOKEN_STRING_INITIALIZER(struct cmd_port_in_disable_result, port_string, + "port"); + +cmdline_parse_token_string_t cmd_port_in_disable_in_string = + TOKEN_STRING_INITIALIZER(struct cmd_port_in_disable_result, in_string, + "in"); + +cmdline_parse_token_num_t cmd_port_in_disable_port_in_id = + TOKEN_NUM_INITIALIZER(struct cmd_port_in_disable_result, port_in_id, + UINT32); + +cmdline_parse_token_string_t cmd_port_in_disable_disable_string = + TOKEN_STRING_INITIALIZER(struct cmd_port_in_disable_result, + disable_string, "disable"); + +cmdline_parse_inst_t cmd_port_in_disable = { + .f = cmd_port_in_disable_parsed, + .data = NULL, + .help_str = "Pipeline input port disable", + .tokens = { + (void *) &cmd_port_in_disable_p_string, + (void *) &cmd_port_in_disable_pipeline_id, + (void *) &cmd_port_in_disable_port_string, + (void *) &cmd_port_in_disable_in_string, + (void *) &cmd_port_in_disable_port_in_id, + (void *) &cmd_port_in_disable_disable_string, + NULL, + }, +}; + +/* + * link config + */ + +static void +print_link_info(struct app_link_params *p) +{ + struct rte_eth_stats stats; + struct ether_addr *mac_addr; + uint32_t netmask = (~0U) << (32 - p->depth); + uint32_t host = p->ip & netmask; + uint32_t bcast = host | (~netmask); + + memset(&stats, 0, sizeof(stats)); + rte_eth_stats_get(p->pmd_id, &stats); + + mac_addr = (struct ether_addr *) &p->mac_addr; + + if (strlen(p->pci_bdf)) + printf("%s(%s): flags=<%s>\n", + p->name, + p->pci_bdf, + (p->state) ? "UP" : "DOWN"); + else + printf("%s: flags=<%s>\n", + p->name, + (p->state) ? "UP" : "DOWN"); + + if (p->ip) + printf("\tinet %" PRIu32 ".%" PRIu32 + ".%" PRIu32 ".%" PRIu32 + " netmask %" PRIu32 ".%" PRIu32 + ".%" PRIu32 ".%" PRIu32 " " + "broadcast %" PRIu32 ".%" PRIu32 + ".%" PRIu32 ".%" PRIu32 "\n", + (p->ip >> 24) & 0xFF, + (p->ip >> 16) & 0xFF, + (p->ip >> 8) & 0xFF, + p->ip & 0xFF, + (netmask >> 24) & 0xFF, + (netmask >> 16) & 0xFF, + (netmask >> 8) & 0xFF, + netmask & 0xFF, + (bcast >> 24) & 0xFF, + (bcast >> 16) & 0xFF, + (bcast >> 8) & 0xFF, + bcast & 0xFF); + + printf("\tether %02" PRIx32 ":%02" PRIx32 ":%02" PRIx32 + ":%02" PRIx32 ":%02" PRIx32 ":%02" PRIx32 "\n", + mac_addr->addr_bytes[0], + mac_addr->addr_bytes[1], + mac_addr->addr_bytes[2], + mac_addr->addr_bytes[3], + mac_addr->addr_bytes[4], + mac_addr->addr_bytes[5]); + + printf("\tRX packets %" PRIu64 + " bytes %" PRIu64 + "\n", + stats.ipackets, + stats.ibytes); + + printf("\tRX errors %" PRIu64 + " missed %" PRIu64 + " no-mbuf %" PRIu64 + "\n", + stats.ierrors, + stats.imissed, + stats.rx_nombuf); + + printf("\tTX packets %" PRIu64 + " bytes %" PRIu64 "\n", + stats.opackets, + stats.obytes); + + printf("\tTX errors %" PRIu64 + "\n", + stats.oerrors); + + printf("\n"); +} + +struct cmd_link_config_result { + cmdline_fixed_string_t link_string; + uint32_t link_id; + cmdline_fixed_string_t config_string; + cmdline_ipaddr_t ip; + uint32_t depth; +}; + +static void +cmd_link_config_parsed( + void *parsed_result, + __attribute__((unused)) struct cmdline *cl, + void *data) +{ + struct cmd_link_config_result *params = parsed_result; + struct app_params *app = data; + int status; + + uint32_t link_id = params->link_id; + uint32_t ip = rte_bswap32((uint32_t) params->ip.addr.ipv4.s_addr); + uint32_t depth = params->depth; + + status = app_link_config(app, link_id, ip, depth); + if (status) + printf("Command failed\n"); + else { + struct app_link_params *p; + + APP_PARAM_FIND_BY_ID(app->link_params, "LINK", link_id, p); + print_link_info(p); + } +} + +cmdline_parse_token_string_t cmd_link_config_link_string = + TOKEN_STRING_INITIALIZER(struct cmd_link_config_result, link_string, + "link"); + +cmdline_parse_token_num_t cmd_link_config_link_id = + TOKEN_NUM_INITIALIZER(struct cmd_link_config_result, link_id, UINT32); + +cmdline_parse_token_string_t cmd_link_config_config_string = + TOKEN_STRING_INITIALIZER(struct cmd_link_config_result, config_string, + "config"); + +cmdline_parse_token_ipaddr_t cmd_link_config_ip = + TOKEN_IPV4_INITIALIZER(struct cmd_link_config_result, ip); + +cmdline_parse_token_num_t cmd_link_config_depth = + TOKEN_NUM_INITIALIZER(struct cmd_link_config_result, depth, UINT32); + +cmdline_parse_inst_t cmd_link_config = { + .f = cmd_link_config_parsed, + .data = NULL, + .help_str = "Link configuration", + .tokens = { + (void *)&cmd_link_config_link_string, + (void *)&cmd_link_config_link_id, + (void *)&cmd_link_config_config_string, + (void *)&cmd_link_config_ip, + (void *)&cmd_link_config_depth, + NULL, + }, +}; + +/* + * link up + */ + +struct cmd_link_up_result { + cmdline_fixed_string_t link_string; + uint32_t link_id; + cmdline_fixed_string_t up_string; +}; + +static void +cmd_link_up_parsed( + void *parsed_result, + __attribute__((unused)) struct cmdline *cl, + void *data) +{ + struct cmd_link_up_result *params = parsed_result; + struct app_params *app = data; + int status; + + status = app_link_up(app, params->link_id); + if (status != 0) + printf("Command failed\n"); + else { + struct app_link_params *p; + + APP_PARAM_FIND_BY_ID(app->link_params, "LINK", params->link_id, + p); + print_link_info(p); + } +} + +cmdline_parse_token_string_t cmd_link_up_link_string = + TOKEN_STRING_INITIALIZER(struct cmd_link_up_result, link_string, + "link"); + +cmdline_parse_token_num_t cmd_link_up_link_id = + TOKEN_NUM_INITIALIZER(struct cmd_link_up_result, link_id, UINT32); + +cmdline_parse_token_string_t cmd_link_up_up_string = + TOKEN_STRING_INITIALIZER(struct cmd_link_up_result, up_string, "up"); + +cmdline_parse_inst_t cmd_link_up = { + .f = cmd_link_up_parsed, + .data = NULL, + .help_str = "Link UP", + .tokens = { + (void *)&cmd_link_up_link_string, + (void *)&cmd_link_up_link_id, + (void *)&cmd_link_up_up_string, + NULL, + }, +}; + +/* + * link down + */ + +struct cmd_link_down_result { + cmdline_fixed_string_t link_string; + uint32_t link_id; + cmdline_fixed_string_t down_string; +}; + +static void +cmd_link_down_parsed( + void *parsed_result, + __attribute__((unused)) struct cmdline *cl, + void *data) +{ + struct cmd_link_down_result *params = parsed_result; + struct app_params *app = data; + int status; + + status = app_link_down(app, params->link_id); + if (status != 0) + printf("Command failed\n"); + else { + struct app_link_params *p; + + APP_PARAM_FIND_BY_ID(app->link_params, "LINK", params->link_id, + p); + print_link_info(p); + } +} + +cmdline_parse_token_string_t cmd_link_down_link_string = + TOKEN_STRING_INITIALIZER(struct cmd_link_down_result, link_string, + "link"); + +cmdline_parse_token_num_t cmd_link_down_link_id = + TOKEN_NUM_INITIALIZER(struct cmd_link_down_result, link_id, UINT32); + +cmdline_parse_token_string_t cmd_link_down_down_string = + TOKEN_STRING_INITIALIZER(struct cmd_link_down_result, down_string, + "down"); + +cmdline_parse_inst_t cmd_link_down = { + .f = cmd_link_down_parsed, + .data = NULL, + .help_str = "Link DOWN", + .tokens = { + (void *) &cmd_link_down_link_string, + (void *) &cmd_link_down_link_id, + (void *) &cmd_link_down_down_string, + NULL, + }, +}; + +/* + * link ls + */ + +struct cmd_link_ls_result { + cmdline_fixed_string_t link_string; + cmdline_fixed_string_t ls_string; +}; + +static void +cmd_link_ls_parsed( + __attribute__((unused)) void *parsed_result, + __attribute__((unused)) struct cmdline *cl, + void *data) +{ + struct app_params *app = data; + uint32_t link_id; + + for (link_id = 0; link_id < app->n_links; link_id++) { + struct app_link_params *p; + + APP_PARAM_FIND_BY_ID(app->link_params, "LINK", link_id, p); + print_link_info(p); + } +} + +cmdline_parse_token_string_t cmd_link_ls_link_string = + TOKEN_STRING_INITIALIZER(struct cmd_link_ls_result, link_string, + "link"); + +cmdline_parse_token_string_t cmd_link_ls_ls_string = + TOKEN_STRING_INITIALIZER(struct cmd_link_ls_result, ls_string, "ls"); + +cmdline_parse_inst_t cmd_link_ls = { + .f = cmd_link_ls_parsed, + .data = NULL, + .help_str = "Link list", + .tokens = { + (void *)&cmd_link_ls_link_string, + (void *)&cmd_link_ls_ls_string, + NULL, + }, +}; + +/* + * quit + */ + +struct cmd_quit_result { + cmdline_fixed_string_t quit; +}; + +static void +cmd_quit_parsed( + __rte_unused void *parsed_result, + struct cmdline *cl, + __rte_unused void *data) +{ + cmdline_quit(cl); +} + +static cmdline_parse_token_string_t cmd_quit_quit = + TOKEN_STRING_INITIALIZER(struct cmd_quit_result, quit, "quit"); + +static cmdline_parse_inst_t cmd_quit = { + .f = cmd_quit_parsed, + .data = NULL, + .help_str = "Quit", + .tokens = { + (void *) &cmd_quit_quit, + NULL, + }, +}; + +/* + * run + */ + +static void +app_run_file( + cmdline_parse_ctx_t *ctx, + const char *file_name) +{ + struct cmdline *file_cl; + int fd; + + fd = open(file_name, O_RDONLY); + if (fd < 0) { + printf("Cannot open file \"%s\"\n", file_name); + return; + } + + file_cl = cmdline_new(ctx, "", fd, 1); + cmdline_interact(file_cl); + close(fd); +} + +struct cmd_run_file_result { + cmdline_fixed_string_t run_string; + char file_name[APP_FILE_NAME_SIZE]; +}; + +static void +cmd_run_parsed( + void *parsed_result, + struct cmdline *cl, + __attribute__((unused)) void *data) +{ + struct cmd_run_file_result *params = parsed_result; + + app_run_file(cl->ctx, params->file_name); +} + +cmdline_parse_token_string_t cmd_run_run_string = + TOKEN_STRING_INITIALIZER(struct cmd_run_file_result, run_string, + "run"); + +cmdline_parse_token_string_t cmd_run_file_name = + TOKEN_STRING_INITIALIZER(struct cmd_run_file_result, file_name, NULL); + +cmdline_parse_inst_t cmd_run = { + .f = cmd_run_parsed, + .data = NULL, + .help_str = "Run CLI script file", + .tokens = { + (void *) &cmd_run_run_string, + (void *) &cmd_run_file_name, + NULL, + }, +}; + +static cmdline_parse_ctx_t pipeline_common_cmds[] = { + (cmdline_parse_inst_t *) &cmd_quit, + (cmdline_parse_inst_t *) &cmd_run, + + (cmdline_parse_inst_t *) &cmd_link_config, + (cmdline_parse_inst_t *) &cmd_link_up, + (cmdline_parse_inst_t *) &cmd_link_down, + (cmdline_parse_inst_t *) &cmd_link_ls, + + (cmdline_parse_inst_t *) &cmd_ping, + (cmdline_parse_inst_t *) &cmd_stats_port_in, + (cmdline_parse_inst_t *) &cmd_stats_port_out, + (cmdline_parse_inst_t *) &cmd_stats_table, + (cmdline_parse_inst_t *) &cmd_port_in_enable, + (cmdline_parse_inst_t *) &cmd_port_in_disable, + NULL, +}; + +int +app_pipeline_common_cmd_push(struct app_params *app) +{ + uint32_t n_cmds, i; + + /* Check for available slots in the application commands array */ + n_cmds = RTE_DIM(pipeline_common_cmds) - 1; + if (n_cmds > APP_MAX_CMDS - app->n_cmds) + return -ENOMEM; + + /* Push pipeline commands into the application */ + memcpy(&app->cmds[app->n_cmds], + pipeline_common_cmds, + n_cmds * sizeof(cmdline_parse_ctx_t)); + + for (i = 0; i < n_cmds; i++) + app->cmds[app->n_cmds + i]->data = app; + + app->n_cmds += n_cmds; + app->cmds[app->n_cmds] = NULL; + + return 0; +} diff --git a/examples/ip_pipeline/pipeline/pipeline_common_fe.h b/examples/ip_pipeline/pipeline/pipeline_common_fe.h new file mode 100644 index 00000000..cfad963d --- /dev/null +++ b/examples/ip_pipeline/pipeline/pipeline_common_fe.h @@ -0,0 +1,234 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_PIPELINE_COMMON_FE_H__ +#define __INCLUDE_PIPELINE_COMMON_FE_H__ + +#include <rte_common.h> +#include <rte_cycles.h> +#include <rte_malloc.h> +#include <cmdline_parse.h> + +#include "pipeline_common_be.h" +#include "pipeline.h" +#include "app.h" + +#ifndef MSG_TIMEOUT_DEFAULT +#define MSG_TIMEOUT_DEFAULT 1000 +#endif + +static inline struct app_pipeline_data * +app_pipeline_data(struct app_params *app, uint32_t id) +{ + struct app_pipeline_params *params; + + APP_PARAM_FIND_BY_ID(app->pipeline_params, "PIPELINE", id, params); + if (params == NULL) + return NULL; + + return &app->pipeline_data[params - app->pipeline_params]; +} + +static inline void * +app_pipeline_data_fe(struct app_params *app, uint32_t id, struct pipeline_type *ptype) +{ + struct app_pipeline_data *pipeline_data; + + pipeline_data = app_pipeline_data(app, id); + if (pipeline_data == NULL) + return NULL; + + if (strcmp(pipeline_data->ptype->name, ptype->name) != 0) + return NULL; + + if (pipeline_data->enabled == 0) + return NULL; + + return pipeline_data->fe; +} + +static inline struct rte_ring * +app_pipeline_msgq_in_get(struct app_params *app, + uint32_t pipeline_id) +{ + struct app_msgq_params *p; + + APP_PARAM_FIND_BY_ID(app->msgq_params, + "MSGQ-REQ-PIPELINE", + pipeline_id, + p); + if (p == NULL) + return NULL; + + return app->msgq[p - app->msgq_params]; +} + +static inline struct rte_ring * +app_pipeline_msgq_out_get(struct app_params *app, + uint32_t pipeline_id) +{ + struct app_msgq_params *p; + + APP_PARAM_FIND_BY_ID(app->msgq_params, + "MSGQ-RSP-PIPELINE", + pipeline_id, + p); + if (p == NULL) + return NULL; + + return app->msgq[p - app->msgq_params]; +} + +static inline void * +app_msg_alloc(__rte_unused struct app_params *app) +{ + return rte_malloc(NULL, 2048, RTE_CACHE_LINE_SIZE); +} + +static inline void +app_msg_free(__rte_unused struct app_params *app, + void *msg) +{ + rte_free(msg); +} + +static inline void +app_msg_send(struct app_params *app, + uint32_t pipeline_id, + void *msg) +{ + struct rte_ring *r = app_pipeline_msgq_in_get(app, pipeline_id); + int status; + + do { + status = rte_ring_sp_enqueue(r, msg); + } while (status == -ENOBUFS); +} + +static inline void * +app_msg_recv(struct app_params *app, + uint32_t pipeline_id) +{ + struct rte_ring *r = app_pipeline_msgq_out_get(app, pipeline_id); + void *msg; + int status = rte_ring_sc_dequeue(r, &msg); + + if (status != 0) + return NULL; + + return msg; +} + +static inline void * +app_msg_send_recv(struct app_params *app, + uint32_t pipeline_id, + void *msg, + uint32_t timeout_ms) +{ + struct rte_ring *r_req = app_pipeline_msgq_in_get(app, pipeline_id); + struct rte_ring *r_rsp = app_pipeline_msgq_out_get(app, pipeline_id); + uint64_t hz = rte_get_tsc_hz(); + void *msg_recv; + uint64_t deadline; + int status; + + /* send */ + do { + status = rte_ring_sp_enqueue(r_req, (void *) msg); + } while (status == -ENOBUFS); + + /* recv */ + deadline = (timeout_ms) ? + (rte_rdtsc() + ((hz * timeout_ms) / 1000)) : + UINT64_MAX; + + do { + if (rte_rdtsc() > deadline) + return NULL; + + status = rte_ring_sc_dequeue(r_rsp, &msg_recv); + } while (status != 0); + + return msg_recv; +} + +int +app_pipeline_ping(struct app_params *app, + uint32_t pipeline_id); + +int +app_pipeline_stats_port_in(struct app_params *app, + uint32_t pipeline_id, + uint32_t port_id, + struct rte_pipeline_port_in_stats *stats); + +int +app_pipeline_stats_port_out(struct app_params *app, + uint32_t pipeline_id, + uint32_t port_id, + struct rte_pipeline_port_out_stats *stats); + +int +app_pipeline_stats_table(struct app_params *app, + uint32_t pipeline_id, + uint32_t table_id, + struct rte_pipeline_table_stats *stats); + +int +app_pipeline_port_in_enable(struct app_params *app, + uint32_t pipeline_id, + uint32_t port_id); + +int +app_pipeline_port_in_disable(struct app_params *app, + uint32_t pipeline_id, + uint32_t port_id); + +int +app_link_config(struct app_params *app, + uint32_t link_id, + uint32_t ip, + uint32_t depth); + +int +app_link_up(struct app_params *app, + uint32_t link_id); + +int +app_link_down(struct app_params *app, + uint32_t link_id); + +int +app_pipeline_common_cmd_push(struct app_params *app); + +#endif diff --git a/examples/ip_pipeline/pipeline/pipeline_firewall.c b/examples/ip_pipeline/pipeline/pipeline_firewall.c new file mode 100644 index 00000000..fd897d5c --- /dev/null +++ b/examples/ip_pipeline/pipeline/pipeline_firewall.c @@ -0,0 +1,1869 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdio.h> +#include <string.h> +#include <sys/queue.h> +#include <netinet/in.h> + +#include <rte_common.h> +#include <rte_hexdump.h> +#include <rte_malloc.h> +#include <cmdline_rdline.h> +#include <cmdline_parse.h> +#include <cmdline_parse_num.h> +#include <cmdline_parse_string.h> +#include <cmdline_parse_ipaddr.h> +#include <cmdline_parse_etheraddr.h> +#include <cmdline_socket.h> + +#include "app.h" +#include "pipeline_common_fe.h" +#include "pipeline_firewall.h" + +#define BUF_SIZE 1024 + +struct app_pipeline_firewall_rule { + struct pipeline_firewall_key key; + int32_t priority; + uint32_t port_id; + void *entry_ptr; + + TAILQ_ENTRY(app_pipeline_firewall_rule) node; +}; + +struct app_pipeline_firewall { + /* parameters */ + uint32_t n_ports_in; + uint32_t n_ports_out; + + /* rules */ + TAILQ_HEAD(, app_pipeline_firewall_rule) rules; + uint32_t n_rules; + uint32_t default_rule_present; + uint32_t default_rule_port_id; + void *default_rule_entry_ptr; +}; + +struct app_pipeline_add_bulk_params { + struct pipeline_firewall_key *keys; + uint32_t n_keys; + uint32_t *priorities; + uint32_t *port_ids; +}; + +struct app_pipeline_del_bulk_params { + struct pipeline_firewall_key *keys; + uint32_t n_keys; +}; + +static void +print_firewall_ipv4_rule(struct app_pipeline_firewall_rule *rule) +{ + printf("Prio = %" PRId32 " (SA = %" PRIu32 ".%" PRIu32 + ".%" PRIu32 ".%" PRIu32 "/%" PRIu32 ", " + "DA = %" PRIu32 ".%" PRIu32 + ".%"PRIu32 ".%" PRIu32 "/%" PRIu32 ", " + "SP = %" PRIu32 "-%" PRIu32 ", " + "DP = %" PRIu32 "-%" PRIu32 ", " + "Proto = %" PRIu32 " / 0x%" PRIx32 ") => " + "Port = %" PRIu32 " (entry ptr = %p)\n", + + rule->priority, + + (rule->key.key.ipv4_5tuple.src_ip >> 24) & 0xFF, + (rule->key.key.ipv4_5tuple.src_ip >> 16) & 0xFF, + (rule->key.key.ipv4_5tuple.src_ip >> 8) & 0xFF, + rule->key.key.ipv4_5tuple.src_ip & 0xFF, + rule->key.key.ipv4_5tuple.src_ip_mask, + + (rule->key.key.ipv4_5tuple.dst_ip >> 24) & 0xFF, + (rule->key.key.ipv4_5tuple.dst_ip >> 16) & 0xFF, + (rule->key.key.ipv4_5tuple.dst_ip >> 8) & 0xFF, + rule->key.key.ipv4_5tuple.dst_ip & 0xFF, + rule->key.key.ipv4_5tuple.dst_ip_mask, + + rule->key.key.ipv4_5tuple.src_port_from, + rule->key.key.ipv4_5tuple.src_port_to, + + rule->key.key.ipv4_5tuple.dst_port_from, + rule->key.key.ipv4_5tuple.dst_port_to, + + rule->key.key.ipv4_5tuple.proto, + rule->key.key.ipv4_5tuple.proto_mask, + + rule->port_id, + rule->entry_ptr); +} + +static struct app_pipeline_firewall_rule * +app_pipeline_firewall_rule_find(struct app_pipeline_firewall *p, + struct pipeline_firewall_key *key) +{ + struct app_pipeline_firewall_rule *r; + + TAILQ_FOREACH(r, &p->rules, node) + if (memcmp(key, + &r->key, + sizeof(struct pipeline_firewall_key)) == 0) + return r; + + return NULL; +} + +static int +app_pipeline_firewall_ls( + struct app_params *app, + uint32_t pipeline_id) +{ + struct app_pipeline_firewall *p; + struct app_pipeline_firewall_rule *rule; + uint32_t n_rules; + int priority; + + /* Check input arguments */ + if (app == NULL) + return -1; + + p = app_pipeline_data_fe(app, pipeline_id, &pipeline_firewall); + if (p == NULL) + return -1; + + n_rules = p->n_rules; + for (priority = 0; n_rules; priority++) + TAILQ_FOREACH(rule, &p->rules, node) + if (rule->priority == priority) { + print_firewall_ipv4_rule(rule); + n_rules--; + } + + if (p->default_rule_present) + printf("Default rule: port %" PRIu32 " (entry ptr = %p)\n", + p->default_rule_port_id, + p->default_rule_entry_ptr); + else + printf("Default rule: DROP\n"); + + printf("\n"); + + return 0; +} + +static void* +app_pipeline_firewall_init(struct pipeline_params *params, + __rte_unused void *arg) +{ + struct app_pipeline_firewall *p; + uint32_t size; + + /* Check input arguments */ + if ((params == NULL) || + (params->n_ports_in == 0) || + (params->n_ports_out == 0)) + return NULL; + + /* Memory allocation */ + size = RTE_CACHE_LINE_ROUNDUP(sizeof(struct app_pipeline_firewall)); + p = rte_zmalloc(NULL, size, RTE_CACHE_LINE_SIZE); + if (p == NULL) + return NULL; + + /* Initialization */ + p->n_ports_in = params->n_ports_in; + p->n_ports_out = params->n_ports_out; + + TAILQ_INIT(&p->rules); + p->n_rules = 0; + p->default_rule_present = 0; + p->default_rule_port_id = 0; + p->default_rule_entry_ptr = NULL; + + return (void *) p; +} + +static int +app_pipeline_firewall_free(void *pipeline) +{ + struct app_pipeline_firewall *p = pipeline; + + /* Check input arguments */ + if (p == NULL) + return -1; + + /* Free resources */ + while (!TAILQ_EMPTY(&p->rules)) { + struct app_pipeline_firewall_rule *rule; + + rule = TAILQ_FIRST(&p->rules); + TAILQ_REMOVE(&p->rules, rule, node); + rte_free(rule); + } + + rte_free(p); + return 0; +} + +static int +app_pipeline_firewall_key_check_and_normalize(struct pipeline_firewall_key *key) +{ + switch (key->type) { + case PIPELINE_FIREWALL_IPV4_5TUPLE: + { + uint32_t src_ip_depth = key->key.ipv4_5tuple.src_ip_mask; + uint32_t dst_ip_depth = key->key.ipv4_5tuple.dst_ip_mask; + uint16_t src_port_from = key->key.ipv4_5tuple.src_port_from; + uint16_t src_port_to = key->key.ipv4_5tuple.src_port_to; + uint16_t dst_port_from = key->key.ipv4_5tuple.dst_port_from; + uint16_t dst_port_to = key->key.ipv4_5tuple.dst_port_to; + + uint32_t src_ip_netmask = 0; + uint32_t dst_ip_netmask = 0; + + if ((src_ip_depth > 32) || + (dst_ip_depth > 32) || + (src_port_from > src_port_to) || + (dst_port_from > dst_port_to)) + return -1; + + if (src_ip_depth) + src_ip_netmask = (~0U) << (32 - src_ip_depth); + + if (dst_ip_depth) + dst_ip_netmask = ((~0U) << (32 - dst_ip_depth)); + + key->key.ipv4_5tuple.src_ip &= src_ip_netmask; + key->key.ipv4_5tuple.dst_ip &= dst_ip_netmask; + + return 0; + } + + default: + return -1; + } +} + +static int +app_pipeline_add_bulk_parse_file(char *filename, + struct app_pipeline_add_bulk_params *params) +{ + FILE *f; + char file_buf[BUF_SIZE]; + uint32_t i; + int status = 0; + + f = fopen(filename, "r"); + if (f == NULL) + return -1; + + params->n_keys = 0; + while (fgets(file_buf, BUF_SIZE, f) != NULL) + params->n_keys++; + rewind(f); + + if (params->n_keys == 0) { + status = -1; + goto end; + } + + params->keys = rte_malloc(NULL, + params->n_keys * sizeof(struct pipeline_firewall_key), + RTE_CACHE_LINE_SIZE); + if (params->keys == NULL) { + status = -1; + goto end; + } + + params->priorities = rte_malloc(NULL, + params->n_keys * sizeof(uint32_t), + RTE_CACHE_LINE_SIZE); + if (params->priorities == NULL) { + status = -1; + goto end; + } + + params->port_ids = rte_malloc(NULL, + params->n_keys * sizeof(uint32_t), + RTE_CACHE_LINE_SIZE); + if (params->port_ids == NULL) { + status = -1; + goto end; + } + + i = 0; + while (fgets(file_buf, BUF_SIZE, f) != NULL) { + char *str; + + str = strtok(file_buf, " "); + if (str == NULL) { + status = -1; + goto end; + } + params->priorities[i] = atoi(str); + + str = strtok(NULL, " ."); + if (str == NULL) { + status = -1; + goto end; + } + params->keys[i].key.ipv4_5tuple.src_ip = atoi(str)<<24; + + str = strtok(NULL, " ."); + if (str == NULL) { + status = -1; + goto end; + } + params->keys[i].key.ipv4_5tuple.src_ip |= atoi(str)<<16; + + str = strtok(NULL, " ."); + if (str == NULL) { + status = -1; + goto end; + } + params->keys[i].key.ipv4_5tuple.src_ip |= atoi(str)<<8; + + str = strtok(NULL, " ."); + if (str == NULL) { + status = -1; + goto end; + } + params->keys[i].key.ipv4_5tuple.src_ip |= atoi(str); + + str = strtok(NULL, " "); + if (str == NULL) { + status = -1; + goto end; + } + params->keys[i].key.ipv4_5tuple.src_ip_mask = atoi(str); + + str = strtok(NULL, " ."); + if (str == NULL) { + status = -1; + goto end; + } + params->keys[i].key.ipv4_5tuple.dst_ip = atoi(str)<<24; + + str = strtok(NULL, " ."); + if (str == NULL) { + status = -1; + goto end; + } + params->keys[i].key.ipv4_5tuple.dst_ip |= atoi(str)<<16; + + str = strtok(NULL, " ."); + if (str == NULL) { + status = -1; + goto end; + } + params->keys[i].key.ipv4_5tuple.dst_ip |= atoi(str)<<8; + + str = strtok(NULL, " ."); + if (str == NULL) { + status = -1; + goto end; + } + params->keys[i].key.ipv4_5tuple.dst_ip |= atoi(str); + + str = strtok(NULL, " "); + if (str == NULL) { + status = -1; + goto end; + } + params->keys[i].key.ipv4_5tuple.dst_ip_mask = atoi(str); + + str = strtok(NULL, " "); + if (str == NULL) { + status = -1; + goto end; + } + params->keys[i].key.ipv4_5tuple.src_port_from = atoi(str); + + str = strtok(NULL, " "); + if (str == NULL) { + status = -1; + goto end; + } + params->keys[i].key.ipv4_5tuple.src_port_to = atoi(str); + + str = strtok(NULL, " "); + if (str == NULL) { + status = -1; + goto end; + } + params->keys[i].key.ipv4_5tuple.dst_port_from = atoi(str); + + str = strtok(NULL, " "); + if (str == NULL) { + status = -1; + goto end; + } + params->keys[i].key.ipv4_5tuple.dst_port_to = atoi(str); + + str = strtok(NULL, " "); + if (str == NULL) { + status = -1; + goto end; + } + params->keys[i].key.ipv4_5tuple.proto = atoi(str); + + str = strtok(NULL, " "); + if (str == NULL) { + status = -1; + goto end; + } + /* Need to add 2 to str to skip leading 0x */ + params->keys[i].key.ipv4_5tuple.proto_mask = strtol(str+2, NULL, 16); + + str = strtok(NULL, " "); + if (str == NULL) { + status = -1; + goto end; + } + params->port_ids[i] = atoi(str); + params->keys[i].type = PIPELINE_FIREWALL_IPV4_5TUPLE; + + i++; + } + +end: + fclose(f); + return status; +} + +static int +app_pipeline_del_bulk_parse_file(char *filename, + struct app_pipeline_del_bulk_params *params) +{ + FILE *f; + char file_buf[BUF_SIZE]; + uint32_t i; + int status = 0; + + f = fopen(filename, "r"); + if (f == NULL) + return -1; + + params->n_keys = 0; + while (fgets(file_buf, BUF_SIZE, f) != NULL) + params->n_keys++; + rewind(f); + + if (params->n_keys == 0) { + status = -1; + goto end; + } + + params->keys = rte_malloc(NULL, + params->n_keys * sizeof(struct pipeline_firewall_key), + RTE_CACHE_LINE_SIZE); + if (params->keys == NULL) { + status = -1; + goto end; + } + + i = 0; + while (fgets(file_buf, BUF_SIZE, f) != NULL) { + char *str; + + str = strtok(file_buf, " ."); + if (str == NULL) { + status = -1; + goto end; + } + params->keys[i].key.ipv4_5tuple.src_ip = atoi(str)<<24; + + str = strtok(NULL, " ."); + if (str == NULL) { + status = -1; + goto end; + } + params->keys[i].key.ipv4_5tuple.src_ip |= atoi(str)<<16; + + str = strtok(NULL, " ."); + if (str == NULL) { + status = -1; + goto end; + } + params->keys[i].key.ipv4_5tuple.src_ip |= atoi(str)<<8; + + str = strtok(NULL, " ."); + if (str == NULL) { + status = -1; + goto end; + } + params->keys[i].key.ipv4_5tuple.src_ip |= atoi(str); + + str = strtok(NULL, " "); + if (str == NULL) { + status = -1; + goto end; + } + params->keys[i].key.ipv4_5tuple.src_ip_mask = atoi(str); + + str = strtok(NULL, " ."); + if (str == NULL) { + status = -1; + goto end; + } + params->keys[i].key.ipv4_5tuple.dst_ip = atoi(str)<<24; + + str = strtok(NULL, " ."); + if (str == NULL) { + status = -1; + goto end; + } + params->keys[i].key.ipv4_5tuple.dst_ip |= atoi(str)<<16; + + str = strtok(NULL, " ."); + if (str == NULL) { + status = -1; + goto end; + } + params->keys[i].key.ipv4_5tuple.dst_ip |= atoi(str)<<8; + + str = strtok(NULL, " ."); + if (str == NULL) { + status = -1; + goto end; + } + params->keys[i].key.ipv4_5tuple.dst_ip |= atoi(str); + + str = strtok(NULL, " "); + if (str == NULL) { + status = -1; + goto end; + } + params->keys[i].key.ipv4_5tuple.dst_ip_mask = atoi(str); + + str = strtok(NULL, " "); + if (str == NULL) { + status = -1; + goto end; + } + params->keys[i].key.ipv4_5tuple.src_port_from = atoi(str); + + str = strtok(NULL, " "); + if (str == NULL) { + status = -1; + goto end; + } + params->keys[i].key.ipv4_5tuple.src_port_to = atoi(str); + + str = strtok(NULL, " "); + if (str == NULL) { + status = -1; + goto end; + } + params->keys[i].key.ipv4_5tuple.dst_port_from = atoi(str); + + str = strtok(NULL, " "); + if (str == NULL) { + status = -1; + goto end; + } + params->keys[i].key.ipv4_5tuple.dst_port_to = atoi(str); + + str = strtok(NULL, " "); + if (str == NULL) { + status = -1; + goto end; + } + params->keys[i].key.ipv4_5tuple.proto = atoi(str); + + str = strtok(NULL, " "); + if (str == NULL) { + status = -1; + goto end; + } + /* Need to add 2 to str to skip leading 0x */ + params->keys[i].key.ipv4_5tuple.proto_mask = strtol(str+2, NULL, 16); + + params->keys[i].type = PIPELINE_FIREWALL_IPV4_5TUPLE; + + i++; + } + + for (i = 0; i < params->n_keys; i++) { + if (app_pipeline_firewall_key_check_and_normalize(¶ms->keys[i]) != 0) { + status = -1; + goto end; + } + } + +end: + fclose(f); + return status; +} + +int +app_pipeline_firewall_add_rule(struct app_params *app, + uint32_t pipeline_id, + struct pipeline_firewall_key *key, + uint32_t priority, + uint32_t port_id) +{ + struct app_pipeline_firewall *p; + struct app_pipeline_firewall_rule *rule; + struct pipeline_firewall_add_msg_req *req; + struct pipeline_firewall_add_msg_rsp *rsp; + int new_rule; + + /* Check input arguments */ + if ((app == NULL) || + (key == NULL) || + (key->type != PIPELINE_FIREWALL_IPV4_5TUPLE)) + return -1; + + p = app_pipeline_data_fe(app, pipeline_id, &pipeline_firewall); + if (p == NULL) + return -1; + + if (port_id >= p->n_ports_out) + return -1; + + if (app_pipeline_firewall_key_check_and_normalize(key) != 0) + return -1; + + /* Find existing rule or allocate new rule */ + rule = app_pipeline_firewall_rule_find(p, key); + new_rule = (rule == NULL); + if (rule == NULL) { + rule = rte_malloc(NULL, sizeof(*rule), RTE_CACHE_LINE_SIZE); + + if (rule == NULL) + return -1; + } + + /* Allocate and write request */ + req = app_msg_alloc(app); + if (req == NULL) { + if (new_rule) + rte_free(rule); + return -1; + } + + req->type = PIPELINE_MSG_REQ_CUSTOM; + req->subtype = PIPELINE_FIREWALL_MSG_REQ_ADD; + memcpy(&req->key, key, sizeof(*key)); + req->priority = priority; + req->port_id = port_id; + + /* Send request and wait for response */ + rsp = app_msg_send_recv(app, pipeline_id, req, MSG_TIMEOUT_DEFAULT); + if (rsp == NULL) { + if (new_rule) + rte_free(rule); + return -1; + } + + /* Read response and write rule */ + if (rsp->status || + (rsp->entry_ptr == NULL) || + ((new_rule == 0) && (rsp->key_found == 0)) || + ((new_rule == 1) && (rsp->key_found == 1))) { + app_msg_free(app, rsp); + if (new_rule) + rte_free(rule); + return -1; + } + + memcpy(&rule->key, key, sizeof(*key)); + rule->priority = priority; + rule->port_id = port_id; + rule->entry_ptr = rsp->entry_ptr; + + /* Commit rule */ + if (new_rule) { + TAILQ_INSERT_TAIL(&p->rules, rule, node); + p->n_rules++; + } + + print_firewall_ipv4_rule(rule); + + /* Free response */ + app_msg_free(app, rsp); + + return 0; +} + +int +app_pipeline_firewall_delete_rule(struct app_params *app, + uint32_t pipeline_id, + struct pipeline_firewall_key *key) +{ + struct app_pipeline_firewall *p; + struct app_pipeline_firewall_rule *rule; + struct pipeline_firewall_del_msg_req *req; + struct pipeline_firewall_del_msg_rsp *rsp; + + /* Check input arguments */ + if ((app == NULL) || + (key == NULL) || + (key->type != PIPELINE_FIREWALL_IPV4_5TUPLE)) + return -1; + + p = app_pipeline_data_fe(app, pipeline_id, &pipeline_firewall); + if (p == NULL) + return -1; + + if (app_pipeline_firewall_key_check_and_normalize(key) != 0) + return -1; + + /* Find rule */ + rule = app_pipeline_firewall_rule_find(p, key); + if (rule == NULL) + return 0; + + /* Allocate and write request */ + req = app_msg_alloc(app); + if (req == NULL) + return -1; + + req->type = PIPELINE_MSG_REQ_CUSTOM; + req->subtype = PIPELINE_FIREWALL_MSG_REQ_DEL; + memcpy(&req->key, key, sizeof(*key)); + + /* Send request and wait for response */ + rsp = app_msg_send_recv(app, pipeline_id, req, MSG_TIMEOUT_DEFAULT); + if (rsp == NULL) + return -1; + + /* Read response */ + if (rsp->status || !rsp->key_found) { + app_msg_free(app, rsp); + return -1; + } + + /* Remove rule */ + TAILQ_REMOVE(&p->rules, rule, node); + p->n_rules--; + rte_free(rule); + + /* Free response */ + app_msg_free(app, rsp); + + return 0; +} + +int +app_pipeline_firewall_add_bulk(struct app_params *app, + uint32_t pipeline_id, + struct pipeline_firewall_key *keys, + uint32_t n_keys, + uint32_t *priorities, + uint32_t *port_ids) +{ + struct app_pipeline_firewall *p; + struct pipeline_firewall_add_bulk_msg_req *req; + struct pipeline_firewall_add_bulk_msg_rsp *rsp; + + struct app_pipeline_firewall_rule **rules; + int *new_rules; + + int *keys_found; + void **entries_ptr; + + uint32_t i; + int status = 0; + + /* Check input arguments */ + if (app == NULL) + return -1; + + p = app_pipeline_data_fe(app, pipeline_id, &pipeline_firewall); + if (p == NULL) + return -1; + + rules = rte_malloc(NULL, + n_keys * sizeof(struct app_pipeline_firewall_rule *), + RTE_CACHE_LINE_SIZE); + if (rules == NULL) + return -1; + + new_rules = rte_malloc(NULL, + n_keys * sizeof(int), + RTE_CACHE_LINE_SIZE); + if (new_rules == NULL) { + rte_free(rules); + return -1; + } + + /* check data integrity and add to rule list */ + for (i = 0; i < n_keys; i++) { + if (port_ids[i] >= p->n_ports_out) { + rte_free(rules); + rte_free(new_rules); + return -1; + } + + if (app_pipeline_firewall_key_check_and_normalize(&keys[i]) != 0) { + rte_free(rules); + rte_free(new_rules); + return -1; + } + + rules[i] = app_pipeline_firewall_rule_find(p, &keys[i]); + new_rules[i] = (rules[i] == NULL); + if (rules[i] == NULL) { + rules[i] = rte_malloc(NULL, sizeof(*rules[i]), + RTE_CACHE_LINE_SIZE); + + if (rules[i] == NULL) { + uint32_t j; + + for (j = 0; j <= i; j++) + if (new_rules[j]) + rte_free(rules[j]); + + rte_free(rules); + rte_free(new_rules); + return -1; + } + } + } + + keys_found = rte_malloc(NULL, + n_keys * sizeof(int), + RTE_CACHE_LINE_SIZE); + if (keys_found == NULL) { + uint32_t j; + + for (j = 0; j < n_keys; j++) + if (new_rules[j]) + rte_free(rules[j]); + + rte_free(rules); + rte_free(new_rules); + return -1; + } + + entries_ptr = rte_malloc(NULL, + n_keys * sizeof(struct rte_pipeline_table_entry *), + RTE_CACHE_LINE_SIZE); + if (entries_ptr == NULL) { + uint32_t j; + + for (j = 0; j < n_keys; j++) + if (new_rules[j]) + rte_free(rules[j]); + + rte_free(rules); + rte_free(new_rules); + rte_free(keys_found); + return -1; + } + for (i = 0; i < n_keys; i++) { + entries_ptr[i] = rte_malloc(NULL, + sizeof(struct rte_pipeline_table_entry), + RTE_CACHE_LINE_SIZE); + + if (entries_ptr[i] == NULL) { + uint32_t j; + + for (j = 0; j < n_keys; j++) + if (new_rules[j]) + rte_free(rules[j]); + + for (j = 0; j <= i; j++) + rte_free(entries_ptr[j]); + + rte_free(rules); + rte_free(new_rules); + rte_free(keys_found); + rte_free(entries_ptr); + return -1; + } + } + + /* Allocate and write request */ + req = app_msg_alloc(app); + if (req == NULL) { + uint32_t j; + + for (j = 0; j < n_keys; j++) + if (new_rules[j]) + rte_free(rules[j]); + + for (j = 0; j < n_keys; j++) + rte_free(entries_ptr[j]); + + rte_free(rules); + rte_free(new_rules); + rte_free(keys_found); + rte_free(entries_ptr); + return -1; + } + + req->type = PIPELINE_MSG_REQ_CUSTOM; + req->subtype = PIPELINE_FIREWALL_MSG_REQ_ADD_BULK; + + req->keys = keys; + req->n_keys = n_keys; + req->port_ids = port_ids; + req->priorities = priorities; + req->keys_found = keys_found; + req->entries_ptr = entries_ptr; + + /* Send request and wait for response */ + rsp = app_msg_send_recv(app, pipeline_id, req, MSG_TIMEOUT_DEFAULT); + if (rsp == NULL) { + uint32_t j; + + for (j = 0; j < n_keys; j++) + if (new_rules[j]) + rte_free(rules[j]); + + for (j = 0; j < n_keys; j++) + rte_free(entries_ptr[j]); + + rte_free(rules); + rte_free(new_rules); + rte_free(keys_found); + rte_free(entries_ptr); + return -1; + } + + if (rsp->status) { + for (i = 0; i < n_keys; i++) + if (new_rules[i]) + rte_free(rules[i]); + + for (i = 0; i < n_keys; i++) + rte_free(entries_ptr[i]); + + status = -1; + goto cleanup; + } + + for (i = 0; i < n_keys; i++) { + if (entries_ptr[i] == NULL || + ((new_rules[i] == 0) && (keys_found[i] == 0)) || + ((new_rules[i] == 1) && (keys_found[i] == 1))) { + for (i = 0; i < n_keys; i++) + if (new_rules[i]) + rte_free(rules[i]); + + for (i = 0; i < n_keys; i++) + rte_free(entries_ptr[i]); + + status = -1; + goto cleanup; + } + } + + for (i = 0; i < n_keys; i++) { + memcpy(&rules[i]->key, &keys[i], sizeof(keys[i])); + rules[i]->priority = priorities[i]; + rules[i]->port_id = port_ids[i]; + rules[i]->entry_ptr = entries_ptr[i]; + + /* Commit rule */ + if (new_rules[i]) { + TAILQ_INSERT_TAIL(&p->rules, rules[i], node); + p->n_rules++; + } + + print_firewall_ipv4_rule(rules[i]); + } + +cleanup: + app_msg_free(app, rsp); + rte_free(rules); + rte_free(new_rules); + rte_free(keys_found); + rte_free(entries_ptr); + + return status; +} + +int +app_pipeline_firewall_delete_bulk(struct app_params *app, + uint32_t pipeline_id, + struct pipeline_firewall_key *keys, + uint32_t n_keys) +{ + struct app_pipeline_firewall *p; + struct pipeline_firewall_del_bulk_msg_req *req; + struct pipeline_firewall_del_bulk_msg_rsp *rsp; + + struct app_pipeline_firewall_rule **rules; + int *keys_found; + + uint32_t i; + int status = 0; + + /* Check input arguments */ + if (app == NULL) + return -1; + + p = app_pipeline_data_fe(app, pipeline_id, &pipeline_firewall); + if (p == NULL) + return -1; + + rules = rte_malloc(NULL, + n_keys * sizeof(struct app_pipeline_firewall_rule *), + RTE_CACHE_LINE_SIZE); + if (rules == NULL) + return -1; + + for (i = 0; i < n_keys; i++) { + if (app_pipeline_firewall_key_check_and_normalize(&keys[i]) != 0) { + return -1; + } + + rules[i] = app_pipeline_firewall_rule_find(p, &keys[i]); + } + + keys_found = rte_malloc(NULL, + n_keys * sizeof(int), + RTE_CACHE_LINE_SIZE); + if (keys_found == NULL) { + rte_free(rules); + return -1; + } + + /* Allocate and write request */ + req = app_msg_alloc(app); + if (req == NULL) { + rte_free(rules); + rte_free(keys_found); + return -1; + } + + req->type = PIPELINE_MSG_REQ_CUSTOM; + req->subtype = PIPELINE_FIREWALL_MSG_REQ_DEL_BULK; + + req->keys = keys; + req->n_keys = n_keys; + req->keys_found = keys_found; + + /* Send request and wait for response */ + rsp = app_msg_send_recv(app, pipeline_id, req, MSG_TIMEOUT_DEFAULT); + if (rsp == NULL) { + rte_free(rules); + rte_free(keys_found); + return -1; + } + + if (rsp->status) { + status = -1; + goto cleanup; + } + + for (i = 0; i < n_keys; i++) { + if (keys_found[i] == 0) { + status = -1; + goto cleanup; + } + } + + for (i = 0; i < n_keys; i++) { + TAILQ_REMOVE(&p->rules, rules[i], node); + p->n_rules--; + rte_free(rules[i]); + } + +cleanup: + app_msg_free(app, rsp); + rte_free(rules); + rte_free(keys_found); + + return status; +} + +int +app_pipeline_firewall_add_default_rule(struct app_params *app, + uint32_t pipeline_id, + uint32_t port_id) +{ + struct app_pipeline_firewall *p; + struct pipeline_firewall_add_default_msg_req *req; + struct pipeline_firewall_add_default_msg_rsp *rsp; + + /* Check input arguments */ + if (app == NULL) + return -1; + + p = app_pipeline_data_fe(app, pipeline_id, &pipeline_firewall); + if (p == NULL) + return -1; + + if (port_id >= p->n_ports_out) + return -1; + + /* Allocate and write request */ + req = app_msg_alloc(app); + if (req == NULL) + return -1; + + req->type = PIPELINE_MSG_REQ_CUSTOM; + req->subtype = PIPELINE_FIREWALL_MSG_REQ_ADD_DEFAULT; + req->port_id = port_id; + + /* Send request and wait for response */ + rsp = app_msg_send_recv(app, pipeline_id, req, MSG_TIMEOUT_DEFAULT); + if (rsp == NULL) + return -1; + + /* Read response and write rule */ + if (rsp->status || (rsp->entry_ptr == NULL)) { + app_msg_free(app, rsp); + return -1; + } + + p->default_rule_port_id = port_id; + p->default_rule_entry_ptr = rsp->entry_ptr; + + /* Commit rule */ + p->default_rule_present = 1; + + /* Free response */ + app_msg_free(app, rsp); + + return 0; +} + +int +app_pipeline_firewall_delete_default_rule(struct app_params *app, + uint32_t pipeline_id) +{ + struct app_pipeline_firewall *p; + struct pipeline_firewall_del_default_msg_req *req; + struct pipeline_firewall_del_default_msg_rsp *rsp; + + /* Check input arguments */ + if (app == NULL) + return -1; + + p = app_pipeline_data_fe(app, pipeline_id, &pipeline_firewall); + if (p == NULL) + return -1; + + /* Allocate and write request */ + req = app_msg_alloc(app); + if (req == NULL) + return -1; + + req->type = PIPELINE_MSG_REQ_CUSTOM; + req->subtype = PIPELINE_FIREWALL_MSG_REQ_DEL_DEFAULT; + + /* Send request and wait for response */ + rsp = app_msg_send_recv(app, pipeline_id, req, MSG_TIMEOUT_DEFAULT); + if (rsp == NULL) + return -1; + + /* Read response and write rule */ + if (rsp->status) { + app_msg_free(app, rsp); + return -1; + } + + /* Commit rule */ + p->default_rule_present = 0; + + /* Free response */ + app_msg_free(app, rsp); + + return 0; +} + +/* + * p firewall add ipv4 + */ + +struct cmd_firewall_add_ipv4_result { + cmdline_fixed_string_t p_string; + uint32_t pipeline_id; + cmdline_fixed_string_t firewall_string; + cmdline_fixed_string_t add_string; + cmdline_fixed_string_t ipv4_string; + int32_t priority; + cmdline_ipaddr_t src_ip; + uint32_t src_ip_mask; + cmdline_ipaddr_t dst_ip; + uint32_t dst_ip_mask; + uint16_t src_port_from; + uint16_t src_port_to; + uint16_t dst_port_from; + uint16_t dst_port_to; + uint8_t proto; + uint8_t proto_mask; + uint8_t port_id; +}; + +static void +cmd_firewall_add_ipv4_parsed( + void *parsed_result, + __attribute__((unused)) struct cmdline *cl, + void *data) +{ + struct cmd_firewall_add_ipv4_result *params = parsed_result; + struct app_params *app = data; + struct pipeline_firewall_key key; + int status; + + key.type = PIPELINE_FIREWALL_IPV4_5TUPLE; + key.key.ipv4_5tuple.src_ip = rte_bswap32( + (uint32_t) params->src_ip.addr.ipv4.s_addr); + key.key.ipv4_5tuple.src_ip_mask = params->src_ip_mask; + key.key.ipv4_5tuple.dst_ip = rte_bswap32( + (uint32_t) params->dst_ip.addr.ipv4.s_addr); + key.key.ipv4_5tuple.dst_ip_mask = params->dst_ip_mask; + key.key.ipv4_5tuple.src_port_from = params->src_port_from; + key.key.ipv4_5tuple.src_port_to = params->src_port_to; + key.key.ipv4_5tuple.dst_port_from = params->dst_port_from; + key.key.ipv4_5tuple.dst_port_to = params->dst_port_to; + key.key.ipv4_5tuple.proto = params->proto; + key.key.ipv4_5tuple.proto_mask = params->proto_mask; + + status = app_pipeline_firewall_add_rule(app, + params->pipeline_id, + &key, + params->priority, + params->port_id); + + if (status != 0) { + printf("Command failed\n"); + return; + } +} + +cmdline_parse_token_string_t cmd_firewall_add_ipv4_p_string = + TOKEN_STRING_INITIALIZER(struct cmd_firewall_add_ipv4_result, p_string, + "p"); + +cmdline_parse_token_num_t cmd_firewall_add_ipv4_pipeline_id = + TOKEN_NUM_INITIALIZER(struct cmd_firewall_add_ipv4_result, pipeline_id, + UINT32); + +cmdline_parse_token_string_t cmd_firewall_add_ipv4_firewall_string = + TOKEN_STRING_INITIALIZER(struct cmd_firewall_add_ipv4_result, + firewall_string, "firewall"); + +cmdline_parse_token_string_t cmd_firewall_add_ipv4_add_string = + TOKEN_STRING_INITIALIZER(struct cmd_firewall_add_ipv4_result, + add_string, "add"); + +cmdline_parse_token_string_t cmd_firewall_add_ipv4_ipv4_string = + TOKEN_STRING_INITIALIZER(struct cmd_firewall_add_ipv4_result, + ipv4_string, "ipv4"); + +cmdline_parse_token_num_t cmd_firewall_add_ipv4_priority = + TOKEN_NUM_INITIALIZER(struct cmd_firewall_add_ipv4_result, priority, + INT32); + +cmdline_parse_token_ipaddr_t cmd_firewall_add_ipv4_src_ip = + TOKEN_IPV4_INITIALIZER(struct cmd_firewall_add_ipv4_result, src_ip); + +cmdline_parse_token_num_t cmd_firewall_add_ipv4_src_ip_mask = + TOKEN_NUM_INITIALIZER(struct cmd_firewall_add_ipv4_result, src_ip_mask, + UINT32); + +cmdline_parse_token_ipaddr_t cmd_firewall_add_ipv4_dst_ip = + TOKEN_IPV4_INITIALIZER(struct cmd_firewall_add_ipv4_result, dst_ip); + +cmdline_parse_token_num_t cmd_firewall_add_ipv4_dst_ip_mask = + TOKEN_NUM_INITIALIZER(struct cmd_firewall_add_ipv4_result, dst_ip_mask, + UINT32); + +cmdline_parse_token_num_t cmd_firewall_add_ipv4_src_port_from = + TOKEN_NUM_INITIALIZER(struct cmd_firewall_add_ipv4_result, + src_port_from, UINT16); + +cmdline_parse_token_num_t cmd_firewall_add_ipv4_src_port_to = + TOKEN_NUM_INITIALIZER(struct cmd_firewall_add_ipv4_result, + src_port_to, UINT16); + +cmdline_parse_token_num_t cmd_firewall_add_ipv4_dst_port_from = + TOKEN_NUM_INITIALIZER(struct cmd_firewall_add_ipv4_result, + dst_port_from, UINT16); + +cmdline_parse_token_num_t cmd_firewall_add_ipv4_dst_port_to = + TOKEN_NUM_INITIALIZER(struct cmd_firewall_add_ipv4_result, + dst_port_to, UINT16); + +cmdline_parse_token_num_t cmd_firewall_add_ipv4_proto = + TOKEN_NUM_INITIALIZER(struct cmd_firewall_add_ipv4_result, + proto, UINT8); + +cmdline_parse_token_num_t cmd_firewall_add_ipv4_proto_mask = + TOKEN_NUM_INITIALIZER(struct cmd_firewall_add_ipv4_result, + proto_mask, UINT8); + +cmdline_parse_token_num_t cmd_firewall_add_ipv4_port_id = + TOKEN_NUM_INITIALIZER(struct cmd_firewall_add_ipv4_result, + port_id, UINT8); + +cmdline_parse_inst_t cmd_firewall_add_ipv4 = { + .f = cmd_firewall_add_ipv4_parsed, + .data = NULL, + .help_str = "Firewall rule add", + .tokens = { + (void *) &cmd_firewall_add_ipv4_p_string, + (void *) &cmd_firewall_add_ipv4_pipeline_id, + (void *) &cmd_firewall_add_ipv4_firewall_string, + (void *) &cmd_firewall_add_ipv4_add_string, + (void *) &cmd_firewall_add_ipv4_ipv4_string, + (void *) &cmd_firewall_add_ipv4_priority, + (void *) &cmd_firewall_add_ipv4_src_ip, + (void *) &cmd_firewall_add_ipv4_src_ip_mask, + (void *) &cmd_firewall_add_ipv4_dst_ip, + (void *) &cmd_firewall_add_ipv4_dst_ip_mask, + (void *) &cmd_firewall_add_ipv4_src_port_from, + (void *) &cmd_firewall_add_ipv4_src_port_to, + (void *) &cmd_firewall_add_ipv4_dst_port_from, + (void *) &cmd_firewall_add_ipv4_dst_port_to, + (void *) &cmd_firewall_add_ipv4_proto, + (void *) &cmd_firewall_add_ipv4_proto_mask, + (void *) &cmd_firewall_add_ipv4_port_id, + NULL, + }, +}; + +/* + * p firewall del ipv4 + */ + +struct cmd_firewall_del_ipv4_result { + cmdline_fixed_string_t p_string; + uint32_t pipeline_id; + cmdline_fixed_string_t firewall_string; + cmdline_fixed_string_t del_string; + cmdline_fixed_string_t ipv4_string; + cmdline_ipaddr_t src_ip; + uint32_t src_ip_mask; + cmdline_ipaddr_t dst_ip; + uint32_t dst_ip_mask; + uint16_t src_port_from; + uint16_t src_port_to; + uint16_t dst_port_from; + uint16_t dst_port_to; + uint8_t proto; + uint8_t proto_mask; +}; + +static void +cmd_firewall_del_ipv4_parsed( + void *parsed_result, + __attribute__((unused)) struct cmdline *cl, + void *data) +{ + struct cmd_firewall_del_ipv4_result *params = parsed_result; + struct app_params *app = data; + struct pipeline_firewall_key key; + int status; + + key.type = PIPELINE_FIREWALL_IPV4_5TUPLE; + key.key.ipv4_5tuple.src_ip = rte_bswap32( + (uint32_t) params->src_ip.addr.ipv4.s_addr); + key.key.ipv4_5tuple.src_ip_mask = params->src_ip_mask; + key.key.ipv4_5tuple.dst_ip = rte_bswap32( + (uint32_t) params->dst_ip.addr.ipv4.s_addr); + key.key.ipv4_5tuple.dst_ip_mask = params->dst_ip_mask; + key.key.ipv4_5tuple.src_port_from = params->src_port_from; + key.key.ipv4_5tuple.src_port_to = params->src_port_to; + key.key.ipv4_5tuple.dst_port_from = params->dst_port_from; + key.key.ipv4_5tuple.dst_port_to = params->dst_port_to; + key.key.ipv4_5tuple.proto = params->proto; + key.key.ipv4_5tuple.proto_mask = params->proto_mask; + + status = app_pipeline_firewall_delete_rule(app, + params->pipeline_id, + &key); + + if (status != 0) { + printf("Command failed\n"); + return; + } +} + +cmdline_parse_token_string_t cmd_firewall_del_ipv4_p_string = + TOKEN_STRING_INITIALIZER(struct cmd_firewall_del_ipv4_result, p_string, + "p"); + +cmdline_parse_token_num_t cmd_firewall_del_ipv4_pipeline_id = + TOKEN_NUM_INITIALIZER(struct cmd_firewall_del_ipv4_result, pipeline_id, + UINT32); + +cmdline_parse_token_string_t cmd_firewall_del_ipv4_firewall_string = + TOKEN_STRING_INITIALIZER(struct cmd_firewall_del_ipv4_result, + firewall_string, "firewall"); + +cmdline_parse_token_string_t cmd_firewall_del_ipv4_del_string = + TOKEN_STRING_INITIALIZER(struct cmd_firewall_del_ipv4_result, + del_string, "del"); + +cmdline_parse_token_string_t cmd_firewall_del_ipv4_ipv4_string = + TOKEN_STRING_INITIALIZER(struct cmd_firewall_del_ipv4_result, + ipv4_string, "ipv4"); + +cmdline_parse_token_ipaddr_t cmd_firewall_del_ipv4_src_ip = + TOKEN_IPV4_INITIALIZER(struct cmd_firewall_del_ipv4_result, src_ip); + +cmdline_parse_token_num_t cmd_firewall_del_ipv4_src_ip_mask = + TOKEN_NUM_INITIALIZER(struct cmd_firewall_del_ipv4_result, src_ip_mask, + UINT32); + +cmdline_parse_token_ipaddr_t cmd_firewall_del_ipv4_dst_ip = + TOKEN_IPV4_INITIALIZER(struct cmd_firewall_del_ipv4_result, dst_ip); + +cmdline_parse_token_num_t cmd_firewall_del_ipv4_dst_ip_mask = + TOKEN_NUM_INITIALIZER(struct cmd_firewall_del_ipv4_result, dst_ip_mask, + UINT32); + +cmdline_parse_token_num_t cmd_firewall_del_ipv4_src_port_from = + TOKEN_NUM_INITIALIZER(struct cmd_firewall_del_ipv4_result, + src_port_from, UINT16); + +cmdline_parse_token_num_t cmd_firewall_del_ipv4_src_port_to = + TOKEN_NUM_INITIALIZER(struct cmd_firewall_del_ipv4_result, src_port_to, + UINT16); + +cmdline_parse_token_num_t cmd_firewall_del_ipv4_dst_port_from = + TOKEN_NUM_INITIALIZER(struct cmd_firewall_del_ipv4_result, + dst_port_from, UINT16); + +cmdline_parse_token_num_t cmd_firewall_del_ipv4_dst_port_to = + TOKEN_NUM_INITIALIZER(struct cmd_firewall_del_ipv4_result, + dst_port_to, UINT16); + +cmdline_parse_token_num_t cmd_firewall_del_ipv4_proto = + TOKEN_NUM_INITIALIZER(struct cmd_firewall_del_ipv4_result, + proto, UINT8); + +cmdline_parse_token_num_t cmd_firewall_del_ipv4_proto_mask = + TOKEN_NUM_INITIALIZER(struct cmd_firewall_del_ipv4_result, proto_mask, + UINT8); + +cmdline_parse_inst_t cmd_firewall_del_ipv4 = { + .f = cmd_firewall_del_ipv4_parsed, + .data = NULL, + .help_str = "Firewall rule delete", + .tokens = { + (void *) &cmd_firewall_del_ipv4_p_string, + (void *) &cmd_firewall_del_ipv4_pipeline_id, + (void *) &cmd_firewall_del_ipv4_firewall_string, + (void *) &cmd_firewall_del_ipv4_del_string, + (void *) &cmd_firewall_del_ipv4_ipv4_string, + (void *) &cmd_firewall_del_ipv4_src_ip, + (void *) &cmd_firewall_del_ipv4_src_ip_mask, + (void *) &cmd_firewall_del_ipv4_dst_ip, + (void *) &cmd_firewall_del_ipv4_dst_ip_mask, + (void *) &cmd_firewall_del_ipv4_src_port_from, + (void *) &cmd_firewall_del_ipv4_src_port_to, + (void *) &cmd_firewall_del_ipv4_dst_port_from, + (void *) &cmd_firewall_del_ipv4_dst_port_to, + (void *) &cmd_firewall_del_ipv4_proto, + (void *) &cmd_firewall_del_ipv4_proto_mask, + NULL, + }, +}; + +/* + * p firewall add bulk + */ + +struct cmd_firewall_add_bulk_result { + cmdline_fixed_string_t p_string; + uint32_t pipeline_id; + cmdline_fixed_string_t firewall_string; + cmdline_fixed_string_t add_string; + cmdline_fixed_string_t bulk_string; + cmdline_fixed_string_t file_path; +}; + +static void +cmd_firewall_add_bulk_parsed( + void *parsed_result, + __attribute__((unused)) struct cmdline *cl, + void *data) +{ + struct cmd_firewall_add_bulk_result *params = parsed_result; + struct app_params *app = data; + int status; + + struct app_pipeline_add_bulk_params add_bulk_params; + + status = app_pipeline_add_bulk_parse_file(params->file_path, &add_bulk_params); + if (status != 0) { + printf("Command failed\n"); + goto end; + } + + status = app_pipeline_firewall_add_bulk(app, params->pipeline_id, add_bulk_params.keys, + add_bulk_params.n_keys, add_bulk_params.priorities, add_bulk_params.port_ids); + if (status != 0) { + printf("Command failed\n"); + goto end; + } + +end: + rte_free(add_bulk_params.keys); + rte_free(add_bulk_params.priorities); + rte_free(add_bulk_params.port_ids); +} + +cmdline_parse_token_string_t cmd_firewall_add_bulk_p_string = + TOKEN_STRING_INITIALIZER(struct cmd_firewall_add_bulk_result, p_string, + "p"); + +cmdline_parse_token_num_t cmd_firewall_add_bulk_pipeline_id = + TOKEN_NUM_INITIALIZER(struct cmd_firewall_add_bulk_result, pipeline_id, + UINT32); + +cmdline_parse_token_string_t cmd_firewall_add_bulk_firewall_string = + TOKEN_STRING_INITIALIZER(struct cmd_firewall_add_bulk_result, + firewall_string, "firewall"); + +cmdline_parse_token_string_t cmd_firewall_add_bulk_add_string = + TOKEN_STRING_INITIALIZER(struct cmd_firewall_add_bulk_result, + add_string, "add"); + +cmdline_parse_token_string_t cmd_firewall_add_bulk_bulk_string = + TOKEN_STRING_INITIALIZER(struct cmd_firewall_add_bulk_result, + bulk_string, "bulk"); + +cmdline_parse_token_string_t cmd_firewall_add_bulk_file_path_string = + TOKEN_STRING_INITIALIZER(struct cmd_firewall_add_bulk_result, + file_path, NULL); + +cmdline_parse_inst_t cmd_firewall_add_bulk = { + .f = cmd_firewall_add_bulk_parsed, + .data = NULL, + .help_str = "Firewall rule add bulk", + .tokens = { + (void *) &cmd_firewall_add_bulk_p_string, + (void *) &cmd_firewall_add_bulk_pipeline_id, + (void *) &cmd_firewall_add_bulk_firewall_string, + (void *) &cmd_firewall_add_bulk_add_string, + (void *) &cmd_firewall_add_bulk_bulk_string, + (void *) &cmd_firewall_add_bulk_file_path_string, + NULL, + }, +}; + +/* + * p firewall del bulk + */ + +struct cmd_firewall_del_bulk_result { + cmdline_fixed_string_t p_string; + uint32_t pipeline_id; + cmdline_fixed_string_t firewall_string; + cmdline_fixed_string_t del_string; + cmdline_fixed_string_t bulk_string; + cmdline_fixed_string_t file_path; +}; + +static void +cmd_firewall_del_bulk_parsed( + void *parsed_result, + __attribute__((unused)) struct cmdline *cl, + void *data) +{ + struct cmd_firewall_del_bulk_result *params = parsed_result; + struct app_params *app = data; + int status; + + struct app_pipeline_del_bulk_params del_bulk_params; + + status = app_pipeline_del_bulk_parse_file(params->file_path, &del_bulk_params); + if (status != 0) { + printf("Command failed\n"); + goto end; + } + + status = app_pipeline_firewall_delete_bulk(app, params->pipeline_id, + del_bulk_params.keys, del_bulk_params.n_keys); + if (status != 0) { + printf("Command failed\n"); + goto end; + } + +end: + rte_free(del_bulk_params.keys); +} + +cmdline_parse_token_string_t cmd_firewall_del_bulk_p_string = + TOKEN_STRING_INITIALIZER(struct cmd_firewall_del_bulk_result, p_string, + "p"); + +cmdline_parse_token_num_t cmd_firewall_del_bulk_pipeline_id = + TOKEN_NUM_INITIALIZER(struct cmd_firewall_del_bulk_result, pipeline_id, + UINT32); + +cmdline_parse_token_string_t cmd_firewall_del_bulk_firewall_string = + TOKEN_STRING_INITIALIZER(struct cmd_firewall_del_bulk_result, + firewall_string, "firewall"); + +cmdline_parse_token_string_t cmd_firewall_del_bulk_add_string = + TOKEN_STRING_INITIALIZER(struct cmd_firewall_del_bulk_result, + del_string, "del"); + +cmdline_parse_token_string_t cmd_firewall_del_bulk_bulk_string = + TOKEN_STRING_INITIALIZER(struct cmd_firewall_del_bulk_result, + bulk_string, "bulk"); + +cmdline_parse_token_string_t cmd_firewall_del_bulk_file_path_string = + TOKEN_STRING_INITIALIZER(struct cmd_firewall_del_bulk_result, + file_path, NULL); + +cmdline_parse_inst_t cmd_firewall_del_bulk = { + .f = cmd_firewall_del_bulk_parsed, + .data = NULL, + .help_str = "Firewall rule del bulk", + .tokens = { + (void *) &cmd_firewall_del_bulk_p_string, + (void *) &cmd_firewall_del_bulk_pipeline_id, + (void *) &cmd_firewall_del_bulk_firewall_string, + (void *) &cmd_firewall_del_bulk_add_string, + (void *) &cmd_firewall_del_bulk_bulk_string, + (void *) &cmd_firewall_del_bulk_file_path_string, + NULL, + }, +}; + +/* + * p firewall add default + */ +struct cmd_firewall_add_default_result { + cmdline_fixed_string_t p_string; + uint32_t pipeline_id; + cmdline_fixed_string_t firewall_string; + cmdline_fixed_string_t add_string; + cmdline_fixed_string_t default_string; + uint8_t port_id; +}; + +static void +cmd_firewall_add_default_parsed( + void *parsed_result, + __attribute__((unused)) struct cmdline *cl, + void *data) +{ + struct cmd_firewall_add_default_result *params = parsed_result; + struct app_params *app = data; + int status; + + status = app_pipeline_firewall_add_default_rule(app, + params->pipeline_id, + params->port_id); + + if (status != 0) { + printf("Command failed\n"); + return; + } +} + +cmdline_parse_token_string_t cmd_firewall_add_default_p_string = + TOKEN_STRING_INITIALIZER(struct cmd_firewall_add_default_result, + p_string, "p"); + +cmdline_parse_token_num_t cmd_firewall_add_default_pipeline_id = + TOKEN_NUM_INITIALIZER(struct cmd_firewall_add_default_result, + pipeline_id, UINT32); + +cmdline_parse_token_string_t cmd_firewall_add_default_firewall_string = + TOKEN_STRING_INITIALIZER(struct cmd_firewall_add_default_result, + firewall_string, "firewall"); + +cmdline_parse_token_string_t cmd_firewall_add_default_add_string = + TOKEN_STRING_INITIALIZER(struct cmd_firewall_add_default_result, + add_string, "add"); + +cmdline_parse_token_string_t cmd_firewall_add_default_default_string = + TOKEN_STRING_INITIALIZER(struct cmd_firewall_add_default_result, + default_string, "default"); + +cmdline_parse_token_num_t cmd_firewall_add_default_port_id = + TOKEN_NUM_INITIALIZER(struct cmd_firewall_add_default_result, port_id, + UINT8); + +cmdline_parse_inst_t cmd_firewall_add_default = { + .f = cmd_firewall_add_default_parsed, + .data = NULL, + .help_str = "Firewall default rule add", + .tokens = { + (void *) &cmd_firewall_add_default_p_string, + (void *) &cmd_firewall_add_default_pipeline_id, + (void *) &cmd_firewall_add_default_firewall_string, + (void *) &cmd_firewall_add_default_add_string, + (void *) &cmd_firewall_add_default_default_string, + (void *) &cmd_firewall_add_default_port_id, + NULL, + }, +}; + +/* + * p firewall del default + */ +struct cmd_firewall_del_default_result { + cmdline_fixed_string_t p_string; + uint32_t pipeline_id; + cmdline_fixed_string_t firewall_string; + cmdline_fixed_string_t del_string; + cmdline_fixed_string_t default_string; +}; + +static void +cmd_firewall_del_default_parsed( + void *parsed_result, + __attribute__((unused)) struct cmdline *cl, + void *data) +{ + struct cmd_firewall_del_default_result *params = parsed_result; + struct app_params *app = data; + int status; + + status = app_pipeline_firewall_delete_default_rule(app, + params->pipeline_id); + + if (status != 0) { + printf("Command failed\n"); + return; + } +} + +cmdline_parse_token_string_t cmd_firewall_del_default_p_string = + TOKEN_STRING_INITIALIZER(struct cmd_firewall_del_default_result, + p_string, "p"); + +cmdline_parse_token_num_t cmd_firewall_del_default_pipeline_id = + TOKEN_NUM_INITIALIZER(struct cmd_firewall_del_default_result, + pipeline_id, UINT32); + +cmdline_parse_token_string_t cmd_firewall_del_default_firewall_string = + TOKEN_STRING_INITIALIZER(struct cmd_firewall_del_default_result, + firewall_string, "firewall"); + +cmdline_parse_token_string_t cmd_firewall_del_default_del_string = + TOKEN_STRING_INITIALIZER(struct cmd_firewall_del_default_result, + del_string, "del"); + +cmdline_parse_token_string_t cmd_firewall_del_default_default_string = + TOKEN_STRING_INITIALIZER(struct cmd_firewall_del_default_result, + default_string, "default"); + +cmdline_parse_inst_t cmd_firewall_del_default = { + .f = cmd_firewall_del_default_parsed, + .data = NULL, + .help_str = "Firewall default rule delete", + .tokens = { + (void *) &cmd_firewall_del_default_p_string, + (void *) &cmd_firewall_del_default_pipeline_id, + (void *) &cmd_firewall_del_default_firewall_string, + (void *) &cmd_firewall_del_default_del_string, + (void *) &cmd_firewall_del_default_default_string, + NULL, + }, +}; + +/* + * p firewall ls + */ + +struct cmd_firewall_ls_result { + cmdline_fixed_string_t p_string; + uint32_t pipeline_id; + cmdline_fixed_string_t firewall_string; + cmdline_fixed_string_t ls_string; +}; + +static void +cmd_firewall_ls_parsed( + void *parsed_result, + __attribute__((unused)) struct cmdline *cl, + void *data) +{ + struct cmd_firewall_ls_result *params = parsed_result; + struct app_params *app = data; + int status; + + status = app_pipeline_firewall_ls(app, params->pipeline_id); + + if (status != 0) { + printf("Command failed\n"); + return; + } +} + +cmdline_parse_token_string_t cmd_firewall_ls_p_string = + TOKEN_STRING_INITIALIZER(struct cmd_firewall_ls_result, p_string, + "p"); + +cmdline_parse_token_num_t cmd_firewall_ls_pipeline_id = + TOKEN_NUM_INITIALIZER(struct cmd_firewall_ls_result, pipeline_id, + UINT32); + +cmdline_parse_token_string_t cmd_firewall_ls_firewall_string = + TOKEN_STRING_INITIALIZER(struct cmd_firewall_ls_result, + firewall_string, "firewall"); + +cmdline_parse_token_string_t cmd_firewall_ls_ls_string = + TOKEN_STRING_INITIALIZER(struct cmd_firewall_ls_result, ls_string, + "ls"); + +cmdline_parse_inst_t cmd_firewall_ls = { + .f = cmd_firewall_ls_parsed, + .data = NULL, + .help_str = "Firewall rule list", + .tokens = { + (void *) &cmd_firewall_ls_p_string, + (void *) &cmd_firewall_ls_pipeline_id, + (void *) &cmd_firewall_ls_firewall_string, + (void *) &cmd_firewall_ls_ls_string, + NULL, + }, +}; + +static cmdline_parse_ctx_t pipeline_cmds[] = { + (cmdline_parse_inst_t *) &cmd_firewall_add_ipv4, + (cmdline_parse_inst_t *) &cmd_firewall_del_ipv4, + (cmdline_parse_inst_t *) &cmd_firewall_add_bulk, + (cmdline_parse_inst_t *) &cmd_firewall_del_bulk, + (cmdline_parse_inst_t *) &cmd_firewall_add_default, + (cmdline_parse_inst_t *) &cmd_firewall_del_default, + (cmdline_parse_inst_t *) &cmd_firewall_ls, + NULL, +}; + +static struct pipeline_fe_ops pipeline_firewall_fe_ops = { + .f_init = app_pipeline_firewall_init, + .f_free = app_pipeline_firewall_free, + .cmds = pipeline_cmds, +}; + +struct pipeline_type pipeline_firewall = { + .name = "FIREWALL", + .be_ops = &pipeline_firewall_be_ops, + .fe_ops = &pipeline_firewall_fe_ops, +}; diff --git a/examples/ip_pipeline/pipeline/pipeline_firewall.h b/examples/ip_pipeline/pipeline/pipeline_firewall.h new file mode 100644 index 00000000..ccc4e64b --- /dev/null +++ b/examples/ip_pipeline/pipeline/pipeline_firewall.h @@ -0,0 +1,77 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_PIPELINE_FIREWALL_H__ +#define __INCLUDE_PIPELINE_FIREWALL_H__ + +#include "pipeline.h" +#include "pipeline_firewall_be.h" + +int +app_pipeline_firewall_add_rule(struct app_params *app, + uint32_t pipeline_id, + struct pipeline_firewall_key *key, + uint32_t priority, + uint32_t port_id); + +int +app_pipeline_firewall_delete_rule(struct app_params *app, + uint32_t pipeline_id, + struct pipeline_firewall_key *key); + +int +app_pipeline_firewall_add_bulk(struct app_params *app, + uint32_t pipeline_id, + struct pipeline_firewall_key *keys, + uint32_t n_keys, + uint32_t *priorities, + uint32_t *port_ids); + +int +app_pipeline_firewall_delete_bulk(struct app_params *app, + uint32_t pipeline_id, + struct pipeline_firewall_key *keys, + uint32_t n_keys); + +int +app_pipeline_firewall_add_default_rule(struct app_params *app, + uint32_t pipeline_id, + uint32_t port_id); + +int +app_pipeline_firewall_delete_default_rule(struct app_params *app, + uint32_t pipeline_id); + +extern struct pipeline_type pipeline_firewall; + +#endif diff --git a/examples/ip_pipeline/pipeline/pipeline_firewall_be.c b/examples/ip_pipeline/pipeline/pipeline_firewall_be.c new file mode 100644 index 00000000..e7a8a4c5 --- /dev/null +++ b/examples/ip_pipeline/pipeline/pipeline_firewall_be.c @@ -0,0 +1,907 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <string.h> + +#include <rte_common.h> +#include <rte_malloc.h> +#include <rte_ether.h> +#include <rte_ip.h> +#include <rte_tcp.h> +#include <rte_byteorder.h> +#include <rte_table_acl.h> + +#include "pipeline_firewall_be.h" +#include "parser.h" + +struct pipeline_firewall { + struct pipeline p; + pipeline_msg_req_handler custom_handlers[PIPELINE_FIREWALL_MSG_REQS]; + + uint32_t n_rules; + uint32_t n_rule_fields; + struct rte_acl_field_def *field_format; + uint32_t field_format_size; +} __rte_cache_aligned; + +static void * +pipeline_firewall_msg_req_custom_handler(struct pipeline *p, void *msg); + +static pipeline_msg_req_handler handlers[] = { + [PIPELINE_MSG_REQ_PING] = + pipeline_msg_req_ping_handler, + [PIPELINE_MSG_REQ_STATS_PORT_IN] = + pipeline_msg_req_stats_port_in_handler, + [PIPELINE_MSG_REQ_STATS_PORT_OUT] = + pipeline_msg_req_stats_port_out_handler, + [PIPELINE_MSG_REQ_STATS_TABLE] = + pipeline_msg_req_stats_table_handler, + [PIPELINE_MSG_REQ_PORT_IN_ENABLE] = + pipeline_msg_req_port_in_enable_handler, + [PIPELINE_MSG_REQ_PORT_IN_DISABLE] = + pipeline_msg_req_port_in_disable_handler, + [PIPELINE_MSG_REQ_CUSTOM] = + pipeline_firewall_msg_req_custom_handler, +}; + +static void * +pipeline_firewall_msg_req_add_handler(struct pipeline *p, void *msg); + +static void * +pipeline_firewall_msg_req_del_handler(struct pipeline *p, void *msg); + +static void * +pipeline_firewall_msg_req_add_bulk_handler(struct pipeline *p, void *msg); + +static void * +pipeline_firewall_msg_req_del_bulk_handler(struct pipeline *p, void *msg); + +static void * +pipeline_firewall_msg_req_add_default_handler(struct pipeline *p, void *msg); + +static void * +pipeline_firewall_msg_req_del_default_handler(struct pipeline *p, void *msg); + +static pipeline_msg_req_handler custom_handlers[] = { + [PIPELINE_FIREWALL_MSG_REQ_ADD] = + pipeline_firewall_msg_req_add_handler, + [PIPELINE_FIREWALL_MSG_REQ_DEL] = + pipeline_firewall_msg_req_del_handler, + [PIPELINE_FIREWALL_MSG_REQ_ADD_BULK] = + pipeline_firewall_msg_req_add_bulk_handler, + [PIPELINE_FIREWALL_MSG_REQ_DEL_BULK] = + pipeline_firewall_msg_req_del_bulk_handler, + [PIPELINE_FIREWALL_MSG_REQ_ADD_DEFAULT] = + pipeline_firewall_msg_req_add_default_handler, + [PIPELINE_FIREWALL_MSG_REQ_DEL_DEFAULT] = + pipeline_firewall_msg_req_del_default_handler, +}; + +/* + * Firewall table + */ +struct firewall_table_entry { + struct rte_pipeline_table_entry head; +}; + +static struct rte_acl_field_def field_format_ipv4[] = { + /* Protocol */ + [0] = { + .type = RTE_ACL_FIELD_TYPE_BITMASK, + .size = sizeof(uint8_t), + .field_index = 0, + .input_index = 0, + .offset = sizeof(struct ether_hdr) + + offsetof(struct ipv4_hdr, next_proto_id), + }, + + /* Source IP address (IPv4) */ + [1] = { + .type = RTE_ACL_FIELD_TYPE_MASK, + .size = sizeof(uint32_t), + .field_index = 1, + .input_index = 1, + .offset = sizeof(struct ether_hdr) + + offsetof(struct ipv4_hdr, src_addr), + }, + + /* Destination IP address (IPv4) */ + [2] = { + .type = RTE_ACL_FIELD_TYPE_MASK, + .size = sizeof(uint32_t), + .field_index = 2, + .input_index = 2, + .offset = sizeof(struct ether_hdr) + + offsetof(struct ipv4_hdr, dst_addr), + }, + + /* Source Port */ + [3] = { + .type = RTE_ACL_FIELD_TYPE_RANGE, + .size = sizeof(uint16_t), + .field_index = 3, + .input_index = 3, + .offset = sizeof(struct ether_hdr) + + sizeof(struct ipv4_hdr) + + offsetof(struct tcp_hdr, src_port), + }, + + /* Destination Port */ + [4] = { + .type = RTE_ACL_FIELD_TYPE_RANGE, + .size = sizeof(uint16_t), + .field_index = 4, + .input_index = 4, + .offset = sizeof(struct ether_hdr) + + sizeof(struct ipv4_hdr) + + offsetof(struct tcp_hdr, dst_port), + }, +}; + +#define SIZEOF_VLAN_HDR 4 + +static struct rte_acl_field_def field_format_vlan_ipv4[] = { + /* Protocol */ + [0] = { + .type = RTE_ACL_FIELD_TYPE_BITMASK, + .size = sizeof(uint8_t), + .field_index = 0, + .input_index = 0, + .offset = sizeof(struct ether_hdr) + + SIZEOF_VLAN_HDR + + offsetof(struct ipv4_hdr, next_proto_id), + }, + + /* Source IP address (IPv4) */ + [1] = { + .type = RTE_ACL_FIELD_TYPE_MASK, + .size = sizeof(uint32_t), + .field_index = 1, + .input_index = 1, + .offset = sizeof(struct ether_hdr) + + SIZEOF_VLAN_HDR + + offsetof(struct ipv4_hdr, src_addr), + }, + + /* Destination IP address (IPv4) */ + [2] = { + .type = RTE_ACL_FIELD_TYPE_MASK, + .size = sizeof(uint32_t), + .field_index = 2, + .input_index = 2, + .offset = sizeof(struct ether_hdr) + + SIZEOF_VLAN_HDR + + offsetof(struct ipv4_hdr, dst_addr), + }, + + /* Source Port */ + [3] = { + .type = RTE_ACL_FIELD_TYPE_RANGE, + .size = sizeof(uint16_t), + .field_index = 3, + .input_index = 3, + .offset = sizeof(struct ether_hdr) + + SIZEOF_VLAN_HDR + + sizeof(struct ipv4_hdr) + + offsetof(struct tcp_hdr, src_port), + }, + + /* Destination Port */ + [4] = { + .type = RTE_ACL_FIELD_TYPE_RANGE, + .size = sizeof(uint16_t), + .field_index = 4, + .input_index = 4, + .offset = sizeof(struct ether_hdr) + + SIZEOF_VLAN_HDR + + sizeof(struct ipv4_hdr) + + offsetof(struct tcp_hdr, dst_port), + }, +}; + +#define SIZEOF_QINQ_HEADER 8 + +static struct rte_acl_field_def field_format_qinq_ipv4[] = { + /* Protocol */ + [0] = { + .type = RTE_ACL_FIELD_TYPE_BITMASK, + .size = sizeof(uint8_t), + .field_index = 0, + .input_index = 0, + .offset = sizeof(struct ether_hdr) + + SIZEOF_QINQ_HEADER + + offsetof(struct ipv4_hdr, next_proto_id), + }, + + /* Source IP address (IPv4) */ + [1] = { + .type = RTE_ACL_FIELD_TYPE_MASK, + .size = sizeof(uint32_t), + .field_index = 1, + .input_index = 1, + .offset = sizeof(struct ether_hdr) + + SIZEOF_QINQ_HEADER + + offsetof(struct ipv4_hdr, src_addr), + }, + + /* Destination IP address (IPv4) */ + [2] = { + .type = RTE_ACL_FIELD_TYPE_MASK, + .size = sizeof(uint32_t), + .field_index = 2, + .input_index = 2, + .offset = sizeof(struct ether_hdr) + + SIZEOF_QINQ_HEADER + + offsetof(struct ipv4_hdr, dst_addr), + }, + + /* Source Port */ + [3] = { + .type = RTE_ACL_FIELD_TYPE_RANGE, + .size = sizeof(uint16_t), + .field_index = 3, + .input_index = 3, + .offset = sizeof(struct ether_hdr) + + SIZEOF_QINQ_HEADER + + sizeof(struct ipv4_hdr) + + offsetof(struct tcp_hdr, src_port), + }, + + /* Destination Port */ + [4] = { + .type = RTE_ACL_FIELD_TYPE_RANGE, + .size = sizeof(uint16_t), + .field_index = 4, + .input_index = 4, + .offset = sizeof(struct ether_hdr) + + SIZEOF_QINQ_HEADER + + sizeof(struct ipv4_hdr) + + offsetof(struct tcp_hdr, dst_port), + }, +}; + +static int +pipeline_firewall_parse_args(struct pipeline_firewall *p, + struct pipeline_params *params) +{ + uint32_t n_rules_present = 0; + uint32_t pkt_type_present = 0; + uint32_t i; + + /* defaults */ + p->n_rules = 4 * 1024; + p->n_rule_fields = RTE_DIM(field_format_ipv4); + p->field_format = field_format_ipv4; + p->field_format_size = sizeof(field_format_ipv4); + + for (i = 0; i < params->n_args; i++) { + char *arg_name = params->args_name[i]; + char *arg_value = params->args_value[i]; + + if (strcmp(arg_name, "n_rules") == 0) { + int status; + + PIPELINE_PARSE_ERR_DUPLICATE( + n_rules_present == 0, params->name, + arg_name); + n_rules_present = 1; + + status = parser_read_uint32(&p->n_rules, + arg_value); + PIPELINE_PARSE_ERR_INV_VAL((status != -EINVAL), + params->name, arg_name, arg_value); + PIPELINE_PARSE_ERR_OUT_RNG((status != -ERANGE), + params->name, arg_name, arg_value); + continue; + } + + if (strcmp(arg_name, "pkt_type") == 0) { + PIPELINE_PARSE_ERR_DUPLICATE( + pkt_type_present == 0, params->name, + arg_name); + pkt_type_present = 1; + + /* ipv4 */ + if (strcmp(arg_value, "ipv4") == 0) { + p->n_rule_fields = RTE_DIM(field_format_ipv4); + p->field_format = field_format_ipv4; + p->field_format_size = + sizeof(field_format_ipv4); + continue; + } + + /* vlan_ipv4 */ + if (strcmp(arg_value, "vlan_ipv4") == 0) { + p->n_rule_fields = + RTE_DIM(field_format_vlan_ipv4); + p->field_format = field_format_vlan_ipv4; + p->field_format_size = + sizeof(field_format_vlan_ipv4); + continue; + } + + /* qinq_ipv4 */ + if (strcmp(arg_value, "qinq_ipv4") == 0) { + p->n_rule_fields = + RTE_DIM(field_format_qinq_ipv4); + p->field_format = field_format_qinq_ipv4; + p->field_format_size = + sizeof(field_format_qinq_ipv4); + continue; + } + + /* other */ + PIPELINE_PARSE_ERR_INV_VAL(0, params->name, + arg_name, arg_value); + } + + /* other */ + PIPELINE_PARSE_ERR_INV_ENT(0, params->name, arg_name); + } + + return 0; +} + +static void * +pipeline_firewall_init(struct pipeline_params *params, + __rte_unused void *arg) +{ + struct pipeline *p; + struct pipeline_firewall *p_fw; + uint32_t size, i; + + /* Check input arguments */ + if ((params == NULL) || + (params->n_ports_in == 0) || + (params->n_ports_out == 0)) + return NULL; + + /* Memory allocation */ + size = RTE_CACHE_LINE_ROUNDUP(sizeof(struct pipeline_firewall)); + p = rte_zmalloc(NULL, size, RTE_CACHE_LINE_SIZE); + p_fw = (struct pipeline_firewall *) p; + if (p == NULL) + return NULL; + + strcpy(p->name, params->name); + p->log_level = params->log_level; + + PLOG(p, HIGH, "Firewall"); + + /* Parse arguments */ + if (pipeline_firewall_parse_args(p_fw, params)) + return NULL; + + /* Pipeline */ + { + struct rte_pipeline_params pipeline_params = { + .name = params->name, + .socket_id = params->socket_id, + .offset_port_id = 0, + }; + + p->p = rte_pipeline_create(&pipeline_params); + if (p->p == NULL) { + rte_free(p); + return NULL; + } + } + + /* Input ports */ + p->n_ports_in = params->n_ports_in; + for (i = 0; i < p->n_ports_in; i++) { + struct rte_pipeline_port_in_params port_params = { + .ops = pipeline_port_in_params_get_ops( + ¶ms->port_in[i]), + .arg_create = pipeline_port_in_params_convert( + ¶ms->port_in[i]), + .f_action = NULL, + .arg_ah = NULL, + .burst_size = params->port_in[i].burst_size, + }; + + int status = rte_pipeline_port_in_create(p->p, + &port_params, + &p->port_in_id[i]); + + if (status) { + rte_pipeline_free(p->p); + rte_free(p); + return NULL; + } + } + + /* Output ports */ + p->n_ports_out = params->n_ports_out; + for (i = 0; i < p->n_ports_out; i++) { + struct rte_pipeline_port_out_params port_params = { + .ops = pipeline_port_out_params_get_ops( + ¶ms->port_out[i]), + .arg_create = pipeline_port_out_params_convert( + ¶ms->port_out[i]), + .f_action = NULL, + .arg_ah = NULL, + }; + + int status = rte_pipeline_port_out_create(p->p, + &port_params, + &p->port_out_id[i]); + + if (status) { + rte_pipeline_free(p->p); + rte_free(p); + return NULL; + } + } + + /* Tables */ + p->n_tables = 1; + { + struct rte_table_acl_params table_acl_params = { + .name = params->name, + .n_rules = p_fw->n_rules, + .n_rule_fields = p_fw->n_rule_fields, + }; + + struct rte_pipeline_table_params table_params = { + .ops = &rte_table_acl_ops, + .arg_create = &table_acl_params, + .f_action_hit = NULL, + .f_action_miss = NULL, + .arg_ah = NULL, + .action_data_size = + sizeof(struct firewall_table_entry) - + sizeof(struct rte_pipeline_table_entry), + }; + + int status; + + memcpy(table_acl_params.field_format, + p_fw->field_format, + p_fw->field_format_size); + + status = rte_pipeline_table_create(p->p, + &table_params, + &p->table_id[0]); + + if (status) { + rte_pipeline_free(p->p); + rte_free(p); + return NULL; + } + } + + /* Connecting input ports to tables */ + for (i = 0; i < p->n_ports_in; i++) { + int status = rte_pipeline_port_in_connect_to_table(p->p, + p->port_in_id[i], + p->table_id[0]); + + if (status) { + rte_pipeline_free(p->p); + rte_free(p); + return NULL; + } + } + + /* Enable input ports */ + for (i = 0; i < p->n_ports_in; i++) { + int status = rte_pipeline_port_in_enable(p->p, + p->port_in_id[i]); + + if (status) { + rte_pipeline_free(p->p); + rte_free(p); + return NULL; + } + } + + /* Check pipeline consistency */ + if (rte_pipeline_check(p->p) < 0) { + rte_pipeline_free(p->p); + rte_free(p); + return NULL; + } + + /* Message queues */ + p->n_msgq = params->n_msgq; + for (i = 0; i < p->n_msgq; i++) + p->msgq_in[i] = params->msgq_in[i]; + for (i = 0; i < p->n_msgq; i++) + p->msgq_out[i] = params->msgq_out[i]; + + /* Message handlers */ + memcpy(p->handlers, handlers, sizeof(p->handlers)); + memcpy(p_fw->custom_handlers, + custom_handlers, + sizeof(p_fw->custom_handlers)); + + return p; +} + +static int +pipeline_firewall_free(void *pipeline) +{ + struct pipeline *p = (struct pipeline *) pipeline; + + /* Check input arguments */ + if (p == NULL) + return -1; + + /* Free resources */ + rte_pipeline_free(p->p); + rte_free(p); + return 0; +} + +static int +pipeline_firewall_track(void *pipeline, + __rte_unused uint32_t port_in, + uint32_t *port_out) +{ + struct pipeline *p = (struct pipeline *) pipeline; + + /* Check input arguments */ + if ((p == NULL) || + (port_in >= p->n_ports_in) || + (port_out == NULL)) + return -1; + + if (p->n_ports_in == 1) { + *port_out = 0; + return 0; + } + + return -1; +} + +static int +pipeline_firewall_timer(void *pipeline) +{ + struct pipeline *p = (struct pipeline *) pipeline; + + pipeline_msg_req_handle(p); + rte_pipeline_flush(p->p); + + return 0; +} + +void * +pipeline_firewall_msg_req_custom_handler(struct pipeline *p, + void *msg) +{ + struct pipeline_firewall *p_fw = (struct pipeline_firewall *) p; + struct pipeline_custom_msg_req *req = msg; + pipeline_msg_req_handler f_handle; + + f_handle = (req->subtype < PIPELINE_FIREWALL_MSG_REQS) ? + p_fw->custom_handlers[req->subtype] : + pipeline_msg_req_invalid_handler; + + if (f_handle == NULL) + f_handle = pipeline_msg_req_invalid_handler; + + return f_handle(p, req); +} + +void * +pipeline_firewall_msg_req_add_handler(struct pipeline *p, void *msg) +{ + struct pipeline_firewall_add_msg_req *req = msg; + struct pipeline_firewall_add_msg_rsp *rsp = msg; + + struct rte_table_acl_rule_add_params params; + struct firewall_table_entry entry = { + .head = { + .action = RTE_PIPELINE_ACTION_PORT, + {.port_id = p->port_out_id[req->port_id]}, + }, + }; + + memset(¶ms, 0, sizeof(params)); + + switch (req->key.type) { + case PIPELINE_FIREWALL_IPV4_5TUPLE: + params.priority = req->priority; + params.field_value[0].value.u8 = + req->key.key.ipv4_5tuple.proto; + params.field_value[0].mask_range.u8 = + req->key.key.ipv4_5tuple.proto_mask; + params.field_value[1].value.u32 = + req->key.key.ipv4_5tuple.src_ip; + params.field_value[1].mask_range.u32 = + req->key.key.ipv4_5tuple.src_ip_mask; + params.field_value[2].value.u32 = + req->key.key.ipv4_5tuple.dst_ip; + params.field_value[2].mask_range.u32 = + req->key.key.ipv4_5tuple.dst_ip_mask; + params.field_value[3].value.u16 = + req->key.key.ipv4_5tuple.src_port_from; + params.field_value[3].mask_range.u16 = + req->key.key.ipv4_5tuple.src_port_to; + params.field_value[4].value.u16 = + req->key.key.ipv4_5tuple.dst_port_from; + params.field_value[4].mask_range.u16 = + req->key.key.ipv4_5tuple.dst_port_to; + break; + + default: + rsp->status = -1; /* Error */ + return rsp; + } + + rsp->status = rte_pipeline_table_entry_add(p->p, + p->table_id[0], + ¶ms, + (struct rte_pipeline_table_entry *) &entry, + &rsp->key_found, + (struct rte_pipeline_table_entry **) &rsp->entry_ptr); + + return rsp; +} + +void * +pipeline_firewall_msg_req_del_handler(struct pipeline *p, void *msg) +{ + struct pipeline_firewall_del_msg_req *req = msg; + struct pipeline_firewall_del_msg_rsp *rsp = msg; + + struct rte_table_acl_rule_delete_params params; + + memset(¶ms, 0, sizeof(params)); + + switch (req->key.type) { + case PIPELINE_FIREWALL_IPV4_5TUPLE: + params.field_value[0].value.u8 = + req->key.key.ipv4_5tuple.proto; + params.field_value[0].mask_range.u8 = + req->key.key.ipv4_5tuple.proto_mask; + params.field_value[1].value.u32 = + req->key.key.ipv4_5tuple.src_ip; + params.field_value[1].mask_range.u32 = + req->key.key.ipv4_5tuple.src_ip_mask; + params.field_value[2].value.u32 = + req->key.key.ipv4_5tuple.dst_ip; + params.field_value[2].mask_range.u32 = + req->key.key.ipv4_5tuple.dst_ip_mask; + params.field_value[3].value.u16 = + req->key.key.ipv4_5tuple.src_port_from; + params.field_value[3].mask_range.u16 = + req->key.key.ipv4_5tuple.src_port_to; + params.field_value[4].value.u16 = + req->key.key.ipv4_5tuple.dst_port_from; + params.field_value[4].mask_range.u16 = + req->key.key.ipv4_5tuple.dst_port_to; + break; + + default: + rsp->status = -1; /* Error */ + return rsp; + } + + rsp->status = rte_pipeline_table_entry_delete(p->p, + p->table_id[0], + ¶ms, + &rsp->key_found, + NULL); + + return rsp; +} + +static void * +pipeline_firewall_msg_req_add_bulk_handler(struct pipeline *p, void *msg) +{ + struct pipeline_firewall_add_bulk_msg_req *req = msg; + struct pipeline_firewall_add_bulk_msg_rsp *rsp = msg; + + struct rte_table_acl_rule_add_params *params[req->n_keys]; + struct firewall_table_entry *entries[req->n_keys]; + + uint32_t i, n_keys; + + n_keys = req->n_keys; + + for (i = 0; i < n_keys; i++) { + entries[i] = rte_malloc(NULL, + sizeof(struct firewall_table_entry), + RTE_CACHE_LINE_SIZE); + if (entries[i] == NULL) { + rsp->status = -1; + return rsp; + } + + params[i] = rte_malloc(NULL, + sizeof(struct rte_table_acl_rule_add_params), + RTE_CACHE_LINE_SIZE); + if (params[i] == NULL) { + rsp->status = -1; + return rsp; + } + + entries[i]->head.action = RTE_PIPELINE_ACTION_PORT; + entries[i]->head.port_id = p->port_out_id[req->port_ids[i]]; + + switch (req->keys[i].type) { + case PIPELINE_FIREWALL_IPV4_5TUPLE: + params[i]->priority = req->priorities[i]; + params[i]->field_value[0].value.u8 = + req->keys[i].key.ipv4_5tuple.proto; + params[i]->field_value[0].mask_range.u8 = + req->keys[i].key.ipv4_5tuple.proto_mask; + params[i]->field_value[1].value.u32 = + req->keys[i].key.ipv4_5tuple.src_ip; + params[i]->field_value[1].mask_range.u32 = + req->keys[i].key.ipv4_5tuple.src_ip_mask; + params[i]->field_value[2].value.u32 = + req->keys[i].key.ipv4_5tuple.dst_ip; + params[i]->field_value[2].mask_range.u32 = + req->keys[i].key.ipv4_5tuple.dst_ip_mask; + params[i]->field_value[3].value.u16 = + req->keys[i].key.ipv4_5tuple.src_port_from; + params[i]->field_value[3].mask_range.u16 = + req->keys[i].key.ipv4_5tuple.src_port_to; + params[i]->field_value[4].value.u16 = + req->keys[i].key.ipv4_5tuple.dst_port_from; + params[i]->field_value[4].mask_range.u16 = + req->keys[i].key.ipv4_5tuple.dst_port_to; + break; + + default: + rsp->status = -1; /* Error */ + + for (i = 0; i < n_keys; i++) { + rte_free(entries[i]); + rte_free(params[i]); + } + + return rsp; + } + } + + rsp->status = rte_pipeline_table_entry_add_bulk(p->p, p->table_id[0], + (void *)params, (struct rte_pipeline_table_entry **)entries, + n_keys, req->keys_found, + (struct rte_pipeline_table_entry **)req->entries_ptr); + + for (i = 0; i < n_keys; i++) { + rte_free(entries[i]); + rte_free(params[i]); + } + + return rsp; +} + +static void * +pipeline_firewall_msg_req_del_bulk_handler(struct pipeline *p, void *msg) +{ + struct pipeline_firewall_del_bulk_msg_req *req = msg; + struct pipeline_firewall_del_bulk_msg_rsp *rsp = msg; + + struct rte_table_acl_rule_delete_params *params[req->n_keys]; + + uint32_t i, n_keys; + + n_keys = req->n_keys; + + for (i = 0; i < n_keys; i++) { + params[i] = rte_malloc(NULL, + sizeof(struct rte_table_acl_rule_delete_params), + RTE_CACHE_LINE_SIZE); + if (params[i] == NULL) { + rsp->status = -1; + return rsp; + } + + switch (req->keys[i].type) { + case PIPELINE_FIREWALL_IPV4_5TUPLE: + params[i]->field_value[0].value.u8 = + req->keys[i].key.ipv4_5tuple.proto; + params[i]->field_value[0].mask_range.u8 = + req->keys[i].key.ipv4_5tuple.proto_mask; + params[i]->field_value[1].value.u32 = + req->keys[i].key.ipv4_5tuple.src_ip; + params[i]->field_value[1].mask_range.u32 = + req->keys[i].key.ipv4_5tuple.src_ip_mask; + params[i]->field_value[2].value.u32 = + req->keys[i].key.ipv4_5tuple.dst_ip; + params[i]->field_value[2].mask_range.u32 = + req->keys[i].key.ipv4_5tuple.dst_ip_mask; + params[i]->field_value[3].value.u16 = + req->keys[i].key.ipv4_5tuple.src_port_from; + params[i]->field_value[3].mask_range.u16 = + req->keys[i].key.ipv4_5tuple.src_port_to; + params[i]->field_value[4].value.u16 = + req->keys[i].key.ipv4_5tuple.dst_port_from; + params[i]->field_value[4].mask_range.u16 = + req->keys[i].key.ipv4_5tuple.dst_port_to; + break; + + default: + rsp->status = -1; /* Error */ + + for (i = 0; i < n_keys; i++) + rte_free(params[i]); + + return rsp; + } + } + + rsp->status = rte_pipeline_table_entry_delete_bulk(p->p, p->table_id[0], + (void **)¶ms, n_keys, req->keys_found, NULL); + + for (i = 0; i < n_keys; i++) + rte_free(params[i]); + + return rsp; +} + +void * +pipeline_firewall_msg_req_add_default_handler(struct pipeline *p, void *msg) +{ + struct pipeline_firewall_add_default_msg_req *req = msg; + struct pipeline_firewall_add_default_msg_rsp *rsp = msg; + + struct firewall_table_entry default_entry = { + .head = { + .action = RTE_PIPELINE_ACTION_PORT, + {.port_id = p->port_out_id[req->port_id]}, + }, + }; + + rsp->status = rte_pipeline_table_default_entry_add(p->p, + p->table_id[0], + (struct rte_pipeline_table_entry *) &default_entry, + (struct rte_pipeline_table_entry **) &rsp->entry_ptr); + + return rsp; +} + +void * +pipeline_firewall_msg_req_del_default_handler(struct pipeline *p, void *msg) +{ + struct pipeline_firewall_del_default_msg_rsp *rsp = msg; + + rsp->status = rte_pipeline_table_default_entry_delete(p->p, + p->table_id[0], + NULL); + + return rsp; +} + +struct pipeline_be_ops pipeline_firewall_be_ops = { + .f_init = pipeline_firewall_init, + .f_free = pipeline_firewall_free, + .f_run = NULL, + .f_timer = pipeline_firewall_timer, + .f_track = pipeline_firewall_track, +}; diff --git a/examples/ip_pipeline/pipeline/pipeline_firewall_be.h b/examples/ip_pipeline/pipeline/pipeline_firewall_be.h new file mode 100644 index 00000000..f5b0522f --- /dev/null +++ b/examples/ip_pipeline/pipeline/pipeline_firewall_be.h @@ -0,0 +1,176 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_PIPELINE_FIREWALL_BE_H__ +#define __INCLUDE_PIPELINE_FIREWALL_BE_H__ + +#include "pipeline_common_be.h" + +enum pipeline_firewall_key_type { + PIPELINE_FIREWALL_IPV4_5TUPLE, +}; + +struct pipeline_firewall_key_ipv4_5tuple { + uint32_t src_ip; + uint32_t src_ip_mask; + uint32_t dst_ip; + uint32_t dst_ip_mask; + uint16_t src_port_from; + uint16_t src_port_to; + uint16_t dst_port_from; + uint16_t dst_port_to; + uint8_t proto; + uint8_t proto_mask; +}; + +struct pipeline_firewall_key { + enum pipeline_firewall_key_type type; + union { + struct pipeline_firewall_key_ipv4_5tuple ipv4_5tuple; + } key; +}; + +enum pipeline_firewall_msg_req_type { + PIPELINE_FIREWALL_MSG_REQ_ADD = 0, + PIPELINE_FIREWALL_MSG_REQ_DEL, + PIPELINE_FIREWALL_MSG_REQ_ADD_BULK, + PIPELINE_FIREWALL_MSG_REQ_DEL_BULK, + PIPELINE_FIREWALL_MSG_REQ_ADD_DEFAULT, + PIPELINE_FIREWALL_MSG_REQ_DEL_DEFAULT, + PIPELINE_FIREWALL_MSG_REQS +}; + +/* + * MSG ADD + */ +struct pipeline_firewall_add_msg_req { + enum pipeline_msg_req_type type; + enum pipeline_firewall_msg_req_type subtype; + + /* key */ + struct pipeline_firewall_key key; + + /* data */ + int32_t priority; + uint32_t port_id; +}; + +struct pipeline_firewall_add_msg_rsp { + int status; + int key_found; + void *entry_ptr; +}; + +/* + * MSG DEL + */ +struct pipeline_firewall_del_msg_req { + enum pipeline_msg_req_type type; + enum pipeline_firewall_msg_req_type subtype; + + /* key */ + struct pipeline_firewall_key key; +}; + +struct pipeline_firewall_del_msg_rsp { + int status; + int key_found; +}; + +/* + * MSG ADD BULK + */ +struct pipeline_firewall_add_bulk_msg_req { + enum pipeline_msg_req_type type; + enum pipeline_firewall_msg_req_type subtype; + + struct pipeline_firewall_key *keys; + uint32_t n_keys; + + uint32_t *priorities; + uint32_t *port_ids; + int *keys_found; + void **entries_ptr; +}; +struct pipeline_firewall_add_bulk_msg_rsp { + int status; +}; + +/* + * MSG DEL BULK + */ +struct pipeline_firewall_del_bulk_msg_req { + enum pipeline_msg_req_type type; + enum pipeline_firewall_msg_req_type subtype; + + /* key */ + struct pipeline_firewall_key *keys; + uint32_t n_keys; + int *keys_found; +}; + +struct pipeline_firewall_del_bulk_msg_rsp { + int status; +}; + +/* + * MSG ADD DEFAULT + */ +struct pipeline_firewall_add_default_msg_req { + enum pipeline_msg_req_type type; + enum pipeline_firewall_msg_req_type subtype; + + /* data */ + uint32_t port_id; +}; + +struct pipeline_firewall_add_default_msg_rsp { + int status; + void *entry_ptr; +}; + +/* + * MSG DEL DEFAULT + */ +struct pipeline_firewall_del_default_msg_req { + enum pipeline_msg_req_type type; + enum pipeline_firewall_msg_req_type subtype; +}; + +struct pipeline_firewall_del_default_msg_rsp { + int status; +}; + +extern struct pipeline_be_ops pipeline_firewall_be_ops; + +#endif diff --git a/examples/ip_pipeline/pipeline/pipeline_flow_actions.c b/examples/ip_pipeline/pipeline/pipeline_flow_actions.c new file mode 100644 index 00000000..4012121f --- /dev/null +++ b/examples/ip_pipeline/pipeline/pipeline_flow_actions.c @@ -0,0 +1,1814 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdio.h> +#include <string.h> +#include <sys/queue.h> +#include <netinet/in.h> + +#include <rte_common.h> +#include <rte_hexdump.h> +#include <rte_malloc.h> +#include <cmdline_rdline.h> +#include <cmdline_parse.h> +#include <cmdline_parse_num.h> +#include <cmdline_parse_string.h> +#include <cmdline_parse_ipaddr.h> +#include <cmdline_parse_etheraddr.h> + +#include "app.h" +#include "pipeline_common_fe.h" +#include "pipeline_flow_actions.h" +#include "hash_func.h" + +/* + * Flow actions pipeline + */ +#ifndef N_FLOWS_BULK +#define N_FLOWS_BULK 4096 +#endif + +struct app_pipeline_fa_flow { + struct pipeline_fa_flow_params params; + void *entry_ptr; +}; + +struct app_pipeline_fa_dscp { + uint32_t traffic_class; + enum rte_meter_color color; +}; + +struct app_pipeline_fa { + /* Parameters */ + uint32_t n_ports_in; + uint32_t n_ports_out; + struct pipeline_fa_params params; + + /* Flows */ + struct app_pipeline_fa_dscp dscp[PIPELINE_FA_N_DSCP]; + struct app_pipeline_fa_flow *flows; +} __rte_cache_aligned; + +static void* +app_pipeline_fa_init(struct pipeline_params *params, + __rte_unused void *arg) +{ + struct app_pipeline_fa *p; + uint32_t size, i; + + /* Check input arguments */ + if ((params == NULL) || + (params->n_ports_in == 0) || + (params->n_ports_out == 0)) + return NULL; + + /* Memory allocation */ + size = RTE_CACHE_LINE_ROUNDUP(sizeof(struct app_pipeline_fa)); + p = rte_zmalloc(NULL, size, RTE_CACHE_LINE_SIZE); + if (p == NULL) + return NULL; + + /* Initialization */ + p->n_ports_in = params->n_ports_in; + p->n_ports_out = params->n_ports_out; + if (pipeline_fa_parse_args(&p->params, params)) { + rte_free(p); + return NULL; + } + + /* Memory allocation */ + size = RTE_CACHE_LINE_ROUNDUP( + p->params.n_flows * sizeof(struct app_pipeline_fa_flow)); + p->flows = rte_zmalloc(NULL, size, RTE_CACHE_LINE_SIZE); + if (p->flows == NULL) { + rte_free(p); + return NULL; + } + + /* Initialization of flow table */ + for (i = 0; i < p->params.n_flows; i++) + pipeline_fa_flow_params_set_default(&p->flows[i].params); + + /* Initialization of DSCP table */ + for (i = 0; i < RTE_DIM(p->dscp); i++) { + p->dscp[i].traffic_class = 0; + p->dscp[i].color = e_RTE_METER_GREEN; + } + + return (void *) p; +} + +static int +app_pipeline_fa_free(void *pipeline) +{ + struct app_pipeline_fa *p = pipeline; + + /* Check input arguments */ + if (p == NULL) + return -1; + + /* Free resources */ + rte_free(p->flows); + rte_free(p); + + return 0; +} + +static int +flow_params_check(struct app_pipeline_fa *p, + __rte_unused uint32_t meter_update_mask, + uint32_t policer_update_mask, + uint32_t port_update, + struct pipeline_fa_flow_params *params) +{ + uint32_t mask, i; + + /* Meter */ + + /* Policer */ + for (i = 0, mask = 1; i < PIPELINE_FA_N_TC_MAX; i++, mask <<= 1) { + struct pipeline_fa_policer_params *p = ¶ms->p[i]; + uint32_t j; + + if ((mask & policer_update_mask) == 0) + continue; + + for (j = 0; j < e_RTE_METER_COLORS; j++) { + struct pipeline_fa_policer_action *action = + &p->action[j]; + + if ((action->drop == 0) && + (action->color >= e_RTE_METER_COLORS)) + return -1; + } + } + + /* Port */ + if (port_update && (params->port_id >= p->n_ports_out)) + return -1; + + return 0; +} + +int +app_pipeline_fa_flow_config(struct app_params *app, + uint32_t pipeline_id, + uint32_t flow_id, + uint32_t meter_update_mask, + uint32_t policer_update_mask, + uint32_t port_update, + struct pipeline_fa_flow_params *params) +{ + struct app_pipeline_fa *p; + struct app_pipeline_fa_flow *flow; + + struct pipeline_fa_flow_config_msg_req *req; + struct pipeline_fa_flow_config_msg_rsp *rsp; + + uint32_t i, mask; + + /* Check input arguments */ + if ((app == NULL) || + ((meter_update_mask == 0) && + (policer_update_mask == 0) && + (port_update == 0)) || + (meter_update_mask >= (1 << PIPELINE_FA_N_TC_MAX)) || + (policer_update_mask >= (1 << PIPELINE_FA_N_TC_MAX)) || + (params == NULL)) + return -1; + + p = app_pipeline_data_fe(app, pipeline_id, + &pipeline_flow_actions); + if (p == NULL) + return -1; + + if (flow_params_check(p, + meter_update_mask, + policer_update_mask, + port_update, + params) != 0) + return -1; + + flow_id %= p->params.n_flows; + flow = &p->flows[flow_id]; + + /* Allocate and write request */ + req = app_msg_alloc(app); + if (req == NULL) + return -1; + + req->type = PIPELINE_MSG_REQ_CUSTOM; + req->subtype = PIPELINE_FA_MSG_REQ_FLOW_CONFIG; + req->entry_ptr = flow->entry_ptr; + req->flow_id = flow_id; + req->meter_update_mask = meter_update_mask; + req->policer_update_mask = policer_update_mask; + req->port_update = port_update; + memcpy(&req->params, params, sizeof(*params)); + + /* Send request and wait for response */ + rsp = app_msg_send_recv(app, pipeline_id, req, MSG_TIMEOUT_DEFAULT); + if (rsp == NULL) + return -1; + + /* Read response */ + if (rsp->status || + (rsp->entry_ptr == NULL)) { + app_msg_free(app, rsp); + return -1; + } + + /* Commit flow */ + for (i = 0, mask = 1; i < PIPELINE_FA_N_TC_MAX; i++, mask <<= 1) { + if ((mask & meter_update_mask) == 0) + continue; + + memcpy(&flow->params.m[i], ¶ms->m[i], sizeof(params->m[i])); + } + + for (i = 0, mask = 1; i < PIPELINE_FA_N_TC_MAX; i++, mask <<= 1) { + if ((mask & policer_update_mask) == 0) + continue; + + memcpy(&flow->params.p[i], ¶ms->p[i], sizeof(params->p[i])); + } + + if (port_update) + flow->params.port_id = params->port_id; + + flow->entry_ptr = rsp->entry_ptr; + + /* Free response */ + app_msg_free(app, rsp); + + return 0; +} + +int +app_pipeline_fa_flow_config_bulk(struct app_params *app, + uint32_t pipeline_id, + uint32_t *flow_id, + uint32_t n_flows, + uint32_t meter_update_mask, + uint32_t policer_update_mask, + uint32_t port_update, + struct pipeline_fa_flow_params *params) +{ + struct app_pipeline_fa *p; + struct pipeline_fa_flow_config_bulk_msg_req *req; + struct pipeline_fa_flow_config_bulk_msg_rsp *rsp; + void **req_entry_ptr; + uint32_t *req_flow_id; + uint32_t i; + + /* Check input arguments */ + if ((app == NULL) || + (flow_id == NULL) || + (n_flows == 0) || + ((meter_update_mask == 0) && + (policer_update_mask == 0) && + (port_update == 0)) || + (meter_update_mask >= (1 << PIPELINE_FA_N_TC_MAX)) || + (policer_update_mask >= (1 << PIPELINE_FA_N_TC_MAX)) || + (params == NULL)) + return -1; + + p = app_pipeline_data_fe(app, pipeline_id, + &pipeline_flow_actions); + if (p == NULL) + return -1; + + for (i = 0; i < n_flows; i++) { + struct pipeline_fa_flow_params *flow_params = ¶ms[i]; + + if (flow_params_check(p, + meter_update_mask, + policer_update_mask, + port_update, + flow_params) != 0) + return -1; + } + + /* Allocate and write request */ + req_entry_ptr = (void **) rte_malloc(NULL, + n_flows * sizeof(void *), + RTE_CACHE_LINE_SIZE); + if (req_entry_ptr == NULL) + return -1; + + req_flow_id = (uint32_t *) rte_malloc(NULL, + n_flows * sizeof(uint32_t), + RTE_CACHE_LINE_SIZE); + if (req_flow_id == NULL) { + rte_free(req_entry_ptr); + return -1; + } + + for (i = 0; i < n_flows; i++) { + uint32_t fid = flow_id[i] % p->params.n_flows; + struct app_pipeline_fa_flow *flow = &p->flows[fid]; + + req_flow_id[i] = fid; + req_entry_ptr[i] = flow->entry_ptr; + } + + req = app_msg_alloc(app); + if (req == NULL) { + rte_free(req_flow_id); + rte_free(req_entry_ptr); + return -1; + } + + req->type = PIPELINE_MSG_REQ_CUSTOM; + req->subtype = PIPELINE_FA_MSG_REQ_FLOW_CONFIG_BULK; + req->entry_ptr = req_entry_ptr; + req->flow_id = req_flow_id; + req->n_flows = n_flows; + req->meter_update_mask = meter_update_mask; + req->policer_update_mask = policer_update_mask; + req->port_update = port_update; + req->params = params; + + /* Send request and wait for response */ + rsp = app_msg_send_recv(app, pipeline_id, req, MSG_TIMEOUT_DEFAULT); + if (rsp == NULL) { + rte_free(req_flow_id); + rte_free(req_entry_ptr); + return -1; + } + + /* Read response */ + + /* Commit flows */ + for (i = 0; i < rsp->n_flows; i++) { + uint32_t fid = flow_id[i] % p->params.n_flows; + struct app_pipeline_fa_flow *flow = &p->flows[fid]; + struct pipeline_fa_flow_params *flow_params = ¶ms[i]; + void *entry_ptr = req_entry_ptr[i]; + uint32_t j, mask; + + for (j = 0, mask = 1; j < PIPELINE_FA_N_TC_MAX; + j++, mask <<= 1) { + if ((mask & meter_update_mask) == 0) + continue; + + memcpy(&flow->params.m[j], + &flow_params->m[j], + sizeof(flow_params->m[j])); + } + + for (j = 0, mask = 1; j < PIPELINE_FA_N_TC_MAX; + j++, mask <<= 1) { + if ((mask & policer_update_mask) == 0) + continue; + + memcpy(&flow->params.p[j], + &flow_params->p[j], + sizeof(flow_params->p[j])); + } + + if (port_update) + flow->params.port_id = flow_params->port_id; + + flow->entry_ptr = entry_ptr; + } + + /* Free response */ + app_msg_free(app, rsp); + rte_free(req_flow_id); + rte_free(req_entry_ptr); + + return (rsp->n_flows == n_flows) ? 0 : -1; +} + +int +app_pipeline_fa_dscp_config(struct app_params *app, + uint32_t pipeline_id, + uint32_t dscp, + uint32_t traffic_class, + enum rte_meter_color color) +{ + struct app_pipeline_fa *p; + + struct pipeline_fa_dscp_config_msg_req *req; + struct pipeline_fa_dscp_config_msg_rsp *rsp; + + /* Check input arguments */ + if ((app == NULL) || + (dscp >= PIPELINE_FA_N_DSCP) || + (traffic_class >= PIPELINE_FA_N_TC_MAX) || + (color >= e_RTE_METER_COLORS)) + return -1; + + p = app_pipeline_data_fe(app, pipeline_id, + &pipeline_flow_actions); + if (p == NULL) + return -1; + + if (p->params.dscp_enabled == 0) + return -1; + + /* Allocate and write request */ + req = app_msg_alloc(app); + if (req == NULL) + return -1; + + req->type = PIPELINE_MSG_REQ_CUSTOM; + req->subtype = PIPELINE_FA_MSG_REQ_DSCP_CONFIG; + req->dscp = dscp; + req->traffic_class = traffic_class; + req->color = color; + + /* Send request and wait for response */ + rsp = app_msg_send_recv(app, pipeline_id, req, MSG_TIMEOUT_DEFAULT); + if (rsp == NULL) + return -1; + + /* Read response */ + if (rsp->status) { + app_msg_free(app, rsp); + return -1; + } + + /* Commit DSCP */ + p->dscp[dscp].traffic_class = traffic_class; + p->dscp[dscp].color = color; + + /* Free response */ + app_msg_free(app, rsp); + + return 0; +} + +int +app_pipeline_fa_flow_policer_stats_read(struct app_params *app, + uint32_t pipeline_id, + uint32_t flow_id, + uint32_t policer_id, + int clear, + struct pipeline_fa_policer_stats *stats) +{ + struct app_pipeline_fa *p; + struct app_pipeline_fa_flow *flow; + + struct pipeline_fa_policer_stats_msg_req *req; + struct pipeline_fa_policer_stats_msg_rsp *rsp; + + /* Check input arguments */ + if ((app == NULL) || (stats == NULL)) + return -1; + + p = app_pipeline_data_fe(app, pipeline_id, + &pipeline_flow_actions); + if (p == NULL) + return -1; + + flow_id %= p->params.n_flows; + flow = &p->flows[flow_id]; + + if ((policer_id >= p->params.n_meters_per_flow) || + (flow->entry_ptr == NULL)) + return -1; + + /* Allocate and write request */ + req = app_msg_alloc(app); + if (req == NULL) + return -1; + + req->type = PIPELINE_MSG_REQ_CUSTOM; + req->subtype = PIPELINE_FA_MSG_REQ_POLICER_STATS_READ; + req->entry_ptr = flow->entry_ptr; + req->policer_id = policer_id; + req->clear = clear; + + /* Send request and wait for response */ + rsp = app_msg_send_recv(app, pipeline_id, req, MSG_TIMEOUT_DEFAULT); + if (rsp == NULL) + return -1; + + /* Read response */ + if (rsp->status) { + app_msg_free(app, rsp); + return -1; + } + + memcpy(stats, &rsp->stats, sizeof(*stats)); + + /* Free response */ + app_msg_free(app, rsp); + + return 0; +} + +static const char * +color_to_string(enum rte_meter_color color) +{ + switch (color) { + case e_RTE_METER_GREEN: return "G"; + case e_RTE_METER_YELLOW: return "Y"; + case e_RTE_METER_RED: return "R"; + default: return "?"; + } +} + +static int +string_to_color(char *s, enum rte_meter_color *c) +{ + if (strcmp(s, "G") == 0) { + *c = e_RTE_METER_GREEN; + return 0; + } + + if (strcmp(s, "Y") == 0) { + *c = e_RTE_METER_YELLOW; + return 0; + } + + if (strcmp(s, "R") == 0) { + *c = e_RTE_METER_RED; + return 0; + } + + return -1; +} + +static const char * +policer_action_to_string(struct pipeline_fa_policer_action *a) +{ + if (a->drop) + return "D"; + + return color_to_string(a->color); +} + +static int +string_to_policer_action(char *s, struct pipeline_fa_policer_action *a) +{ + if (strcmp(s, "G") == 0) { + a->drop = 0; + a->color = e_RTE_METER_GREEN; + return 0; + } + + if (strcmp(s, "Y") == 0) { + a->drop = 0; + a->color = e_RTE_METER_YELLOW; + return 0; + } + + if (strcmp(s, "R") == 0) { + a->drop = 0; + a->color = e_RTE_METER_RED; + return 0; + } + + if (strcmp(s, "D") == 0) { + a->drop = 1; + a->color = e_RTE_METER_GREEN; + return 0; + } + + return -1; +} + +static void +print_flow(struct app_pipeline_fa *p, + uint32_t flow_id, + struct app_pipeline_fa_flow *flow) +{ + uint32_t i; + + printf("Flow ID = %" PRIu32 "\n", flow_id); + + for (i = 0; i < p->params.n_meters_per_flow; i++) { + struct rte_meter_trtcm_params *meter = &flow->params.m[i]; + struct pipeline_fa_policer_params *policer = &flow->params.p[i]; + + printf("\ttrTCM [CIR = %" PRIu64 + ", CBS = %" PRIu64 ", PIR = %" PRIu64 + ", PBS = %" PRIu64 "] Policer [G : %s, Y : %s, R : %s]\n", + meter->cir, + meter->cbs, + meter->pir, + meter->pbs, + policer_action_to_string(&policer->action[e_RTE_METER_GREEN]), + policer_action_to_string(&policer->action[e_RTE_METER_YELLOW]), + policer_action_to_string(&policer->action[e_RTE_METER_RED])); + } + + printf("\tPort %u (entry_ptr = %p)\n", + flow->params.port_id, + flow->entry_ptr); +} + + +static int +app_pipeline_fa_flow_ls(struct app_params *app, + uint32_t pipeline_id) +{ + struct app_pipeline_fa *p; + uint32_t i; + + /* Check input arguments */ + if (app == NULL) + return -1; + + p = app_pipeline_data_fe(app, pipeline_id, + &pipeline_flow_actions); + if (p == NULL) + return -1; + + for (i = 0; i < p->params.n_flows; i++) { + struct app_pipeline_fa_flow *flow = &p->flows[i]; + + print_flow(p, i, flow); + } + + return 0; +} + +static int +app_pipeline_fa_dscp_ls(struct app_params *app, + uint32_t pipeline_id) +{ + struct app_pipeline_fa *p; + uint32_t i; + + /* Check input arguments */ + if (app == NULL) + return -1; + + p = app_pipeline_data_fe(app, pipeline_id, + &pipeline_flow_actions); + if (p == NULL) + return -1; + + if (p->params.dscp_enabled == 0) + return -1; + + for (i = 0; i < RTE_DIM(p->dscp); i++) { + struct app_pipeline_fa_dscp *dscp = &p->dscp[i]; + + printf("DSCP = %2" PRIu32 ": Traffic class = %" PRIu32 + ", Color = %s\n", + i, + dscp->traffic_class, + color_to_string(dscp->color)); + } + + return 0; +} + +/* + * Flow meter configuration (single flow) + * + * p <pipeline ID> flow <flow ID> meter <meter ID> trtcm <trtcm params> + */ + +struct cmd_fa_meter_config_result { + cmdline_fixed_string_t p_string; + uint32_t pipeline_id; + cmdline_fixed_string_t flow_string; + uint32_t flow_id; + cmdline_fixed_string_t meter_string; + uint32_t meter_id; + cmdline_fixed_string_t trtcm_string; + uint64_t cir; + uint64_t pir; + uint64_t cbs; + uint64_t pbs; +}; + +static void +cmd_fa_meter_config_parsed( + void *parsed_result, + __rte_unused struct cmdline *cl, + void *data) +{ + struct cmd_fa_meter_config_result *params = parsed_result; + struct app_params *app = data; + struct pipeline_fa_flow_params flow_params; + int status; + + if (params->meter_id >= PIPELINE_FA_N_TC_MAX) { + printf("Command failed\n"); + return; + } + + flow_params.m[params->meter_id].cir = params->cir; + flow_params.m[params->meter_id].pir = params->pir; + flow_params.m[params->meter_id].cbs = params->cbs; + flow_params.m[params->meter_id].pbs = params->pbs; + + status = app_pipeline_fa_flow_config(app, + params->pipeline_id, + params->flow_id, + 1 << params->meter_id, + 0, + 0, + &flow_params); + + if (status != 0) + printf("Command failed\n"); +} + +cmdline_parse_token_string_t cmd_fa_meter_config_p_string = + TOKEN_STRING_INITIALIZER(struct cmd_fa_meter_config_result, + p_string, "p"); + +cmdline_parse_token_num_t cmd_fa_meter_config_pipeline_id = + TOKEN_NUM_INITIALIZER(struct cmd_fa_meter_config_result, + pipeline_id, UINT32); + +cmdline_parse_token_string_t cmd_fa_meter_config_flow_string = + TOKEN_STRING_INITIALIZER(struct cmd_fa_meter_config_result, + flow_string, "flow"); + +cmdline_parse_token_num_t cmd_fa_meter_config_flow_id = + TOKEN_NUM_INITIALIZER(struct cmd_fa_meter_config_result, + flow_id, UINT32); + +cmdline_parse_token_string_t cmd_fa_meter_config_meter_string = + TOKEN_STRING_INITIALIZER(struct cmd_fa_meter_config_result, + meter_string, "meter"); + +cmdline_parse_token_num_t cmd_fa_meter_config_meter_id = + TOKEN_NUM_INITIALIZER(struct cmd_fa_meter_config_result, + meter_id, UINT32); + +cmdline_parse_token_string_t cmd_fa_meter_config_trtcm_string = + TOKEN_STRING_INITIALIZER(struct cmd_fa_meter_config_result, + trtcm_string, "trtcm"); + +cmdline_parse_token_num_t cmd_fa_meter_config_cir = + TOKEN_NUM_INITIALIZER(struct cmd_fa_meter_config_result, cir, UINT64); + +cmdline_parse_token_num_t cmd_fa_meter_config_pir = + TOKEN_NUM_INITIALIZER(struct cmd_fa_meter_config_result, pir, UINT64); + +cmdline_parse_token_num_t cmd_fa_meter_config_cbs = + TOKEN_NUM_INITIALIZER(struct cmd_fa_meter_config_result, cbs, UINT64); + +cmdline_parse_token_num_t cmd_fa_meter_config_pbs = + TOKEN_NUM_INITIALIZER(struct cmd_fa_meter_config_result, pbs, UINT64); + +cmdline_parse_inst_t cmd_fa_meter_config = { + .f = cmd_fa_meter_config_parsed, + .data = NULL, + .help_str = "Flow meter configuration (single flow) ", + .tokens = { + (void *) &cmd_fa_meter_config_p_string, + (void *) &cmd_fa_meter_config_pipeline_id, + (void *) &cmd_fa_meter_config_flow_string, + (void *) &cmd_fa_meter_config_flow_id, + (void *) &cmd_fa_meter_config_meter_string, + (void *) &cmd_fa_meter_config_meter_id, + (void *) &cmd_fa_meter_config_trtcm_string, + (void *) &cmd_fa_meter_config_cir, + (void *) &cmd_fa_meter_config_pir, + (void *) &cmd_fa_meter_config_cbs, + (void *) &cmd_fa_meter_config_pbs, + NULL, + }, +}; + +/* + * Flow meter configuration (multiple flows) + * + * p <pipeline ID> flows <n_flows> meter <meter ID> trtcm <trtcm params> + */ + +struct cmd_fa_meter_config_bulk_result { + cmdline_fixed_string_t p_string; + uint32_t pipeline_id; + cmdline_fixed_string_t flows_string; + uint32_t n_flows; + cmdline_fixed_string_t meter_string; + uint32_t meter_id; + cmdline_fixed_string_t trtcm_string; + uint64_t cir; + uint64_t pir; + uint64_t cbs; + uint64_t pbs; +}; + +static void +cmd_fa_meter_config_bulk_parsed( + void *parsed_result, + __rte_unused struct cmdline *cl, + void *data) +{ + struct cmd_fa_meter_config_bulk_result *params = parsed_result; + struct app_params *app = data; + struct pipeline_fa_flow_params flow_template, *flow_params; + uint32_t *flow_id; + uint32_t i; + + if ((params->n_flows == 0) || + (params->meter_id >= PIPELINE_FA_N_TC_MAX)) { + printf("Invalid arguments\n"); + return; + } + + flow_id = (uint32_t *) rte_malloc(NULL, + N_FLOWS_BULK * sizeof(uint32_t), + RTE_CACHE_LINE_SIZE); + if (flow_id == NULL) { + printf("Memory allocation failed\n"); + return; + } + + flow_params = (struct pipeline_fa_flow_params *) rte_malloc(NULL, + N_FLOWS_BULK * sizeof(struct pipeline_fa_flow_params), + RTE_CACHE_LINE_SIZE); + if (flow_params == NULL) { + rte_free(flow_id); + printf("Memory allocation failed\n"); + return; + } + + memset(&flow_template, 0, sizeof(flow_template)); + flow_template.m[params->meter_id].cir = params->cir; + flow_template.m[params->meter_id].pir = params->pir; + flow_template.m[params->meter_id].cbs = params->cbs; + flow_template.m[params->meter_id].pbs = params->pbs; + + for (i = 0; i < params->n_flows; i++) { + uint32_t pos = i % N_FLOWS_BULK; + + flow_id[pos] = i; + memcpy(&flow_params[pos], + &flow_template, + sizeof(flow_template)); + + if ((pos == N_FLOWS_BULK - 1) || + (i == params->n_flows - 1)) { + int status; + + status = app_pipeline_fa_flow_config_bulk(app, + params->pipeline_id, + flow_id, + pos + 1, + 1 << params->meter_id, + 0, + 0, + flow_params); + + if (status != 0) { + printf("Command failed\n"); + + break; + } + } + } + + rte_free(flow_params); + rte_free(flow_id); + +} + +cmdline_parse_token_string_t cmd_fa_meter_config_bulk_p_string = + TOKEN_STRING_INITIALIZER(struct cmd_fa_meter_config_bulk_result, + p_string, "p"); + +cmdline_parse_token_num_t cmd_fa_meter_config_bulk_pipeline_id = + TOKEN_NUM_INITIALIZER(struct cmd_fa_meter_config_bulk_result, + pipeline_id, UINT32); + +cmdline_parse_token_string_t cmd_fa_meter_config_bulk_flows_string = + TOKEN_STRING_INITIALIZER(struct cmd_fa_meter_config_bulk_result, + flows_string, "flows"); + +cmdline_parse_token_num_t cmd_fa_meter_config_bulk_n_flows = + TOKEN_NUM_INITIALIZER(struct cmd_fa_meter_config_bulk_result, + n_flows, UINT32); + +cmdline_parse_token_string_t cmd_fa_meter_config_bulk_meter_string = + TOKEN_STRING_INITIALIZER(struct cmd_fa_meter_config_bulk_result, + meter_string, "meter"); + +cmdline_parse_token_num_t cmd_fa_meter_config_bulk_meter_id = + TOKEN_NUM_INITIALIZER(struct cmd_fa_meter_config_bulk_result, + meter_id, UINT32); + +cmdline_parse_token_string_t cmd_fa_meter_config_bulk_trtcm_string = + TOKEN_STRING_INITIALIZER(struct cmd_fa_meter_config_bulk_result, + trtcm_string, "trtcm"); + +cmdline_parse_token_num_t cmd_fa_meter_config_bulk_cir = + TOKEN_NUM_INITIALIZER(struct cmd_fa_meter_config_bulk_result, + cir, UINT64); + +cmdline_parse_token_num_t cmd_fa_meter_config_bulk_pir = + TOKEN_NUM_INITIALIZER(struct cmd_fa_meter_config_bulk_result, + pir, UINT64); + +cmdline_parse_token_num_t cmd_fa_meter_config_bulk_cbs = + TOKEN_NUM_INITIALIZER(struct cmd_fa_meter_config_bulk_result, + cbs, UINT64); + +cmdline_parse_token_num_t cmd_fa_meter_config_bulk_pbs = + TOKEN_NUM_INITIALIZER(struct cmd_fa_meter_config_bulk_result, + pbs, UINT64); + +cmdline_parse_inst_t cmd_fa_meter_config_bulk = { + .f = cmd_fa_meter_config_bulk_parsed, + .data = NULL, + .help_str = "Flow meter configuration (multiple flows)", + .tokens = { + (void *) &cmd_fa_meter_config_bulk_p_string, + (void *) &cmd_fa_meter_config_bulk_pipeline_id, + (void *) &cmd_fa_meter_config_bulk_flows_string, + (void *) &cmd_fa_meter_config_bulk_n_flows, + (void *) &cmd_fa_meter_config_bulk_meter_string, + (void *) &cmd_fa_meter_config_bulk_meter_id, + (void *) &cmd_fa_meter_config_bulk_trtcm_string, + (void *) &cmd_fa_meter_config_cir, + (void *) &cmd_fa_meter_config_pir, + (void *) &cmd_fa_meter_config_cbs, + (void *) &cmd_fa_meter_config_pbs, + NULL, + }, +}; + +/* + * Flow policer configuration (single flow) + * + * p <pipeline ID> flow <flow ID> policer <policer ID> + * G <action> Y <action> R <action> + * + * <action> = G (green) | Y (yellow) | R (red) | D (drop) + */ + +struct cmd_fa_policer_config_result { + cmdline_fixed_string_t p_string; + uint32_t pipeline_id; + cmdline_fixed_string_t flow_string; + uint32_t flow_id; + cmdline_fixed_string_t policer_string; + uint32_t policer_id; + cmdline_fixed_string_t green_string; + cmdline_fixed_string_t g_action; + cmdline_fixed_string_t yellow_string; + cmdline_fixed_string_t y_action; + cmdline_fixed_string_t red_string; + cmdline_fixed_string_t r_action; +}; + +static void +cmd_fa_policer_config_parsed( + void *parsed_result, + __rte_unused struct cmdline *cl, + void *data) +{ + struct cmd_fa_policer_config_result *params = parsed_result; + struct app_params *app = data; + struct pipeline_fa_flow_params flow_params; + int status; + + if (params->policer_id >= PIPELINE_FA_N_TC_MAX) { + printf("Command failed\n"); + return; + } + + status = string_to_policer_action(params->g_action, + &flow_params.p[params->policer_id].action[e_RTE_METER_GREEN]); + if (status) + printf("Invalid policer green action\n"); + + status = string_to_policer_action(params->y_action, + &flow_params.p[params->policer_id].action[e_RTE_METER_YELLOW]); + if (status) + printf("Invalid policer yellow action\n"); + + status = string_to_policer_action(params->r_action, + &flow_params.p[params->policer_id].action[e_RTE_METER_RED]); + if (status) + printf("Invalid policer red action\n"); + + status = app_pipeline_fa_flow_config(app, + params->pipeline_id, + params->flow_id, + 0, + 1 << params->policer_id, + 0, + &flow_params); + + if (status != 0) + printf("Command failed\n"); + +} + +cmdline_parse_token_string_t cmd_fa_policer_config_p_string = + TOKEN_STRING_INITIALIZER(struct cmd_fa_policer_config_result, + p_string, "p"); + +cmdline_parse_token_num_t cmd_fa_policer_config_pipeline_id = + TOKEN_NUM_INITIALIZER(struct cmd_fa_policer_config_result, + pipeline_id, UINT32); + +cmdline_parse_token_string_t cmd_fa_policer_config_flow_string = + TOKEN_STRING_INITIALIZER(struct cmd_fa_policer_config_result, + flow_string, "flow"); + +cmdline_parse_token_num_t cmd_fa_policer_config_flow_id = + TOKEN_NUM_INITIALIZER(struct cmd_fa_policer_config_result, + flow_id, UINT32); + +cmdline_parse_token_string_t cmd_fa_policer_config_policer_string = + TOKEN_STRING_INITIALIZER(struct cmd_fa_policer_config_result, + policer_string, "policer"); + +cmdline_parse_token_num_t cmd_fa_policer_config_policer_id = + TOKEN_NUM_INITIALIZER(struct cmd_fa_policer_config_result, + policer_id, UINT32); + +cmdline_parse_token_string_t cmd_fa_policer_config_green_string = + TOKEN_STRING_INITIALIZER(struct cmd_fa_policer_config_result, + green_string, "G"); + +cmdline_parse_token_string_t cmd_fa_policer_config_g_action = + TOKEN_STRING_INITIALIZER(struct cmd_fa_policer_config_result, + g_action, "R#Y#G#D"); + +cmdline_parse_token_string_t cmd_fa_policer_config_yellow_string = + TOKEN_STRING_INITIALIZER(struct cmd_fa_policer_config_result, + yellow_string, "Y"); + +cmdline_parse_token_string_t cmd_fa_policer_config_y_action = + TOKEN_STRING_INITIALIZER(struct cmd_fa_policer_config_result, + y_action, "R#Y#G#D"); + +cmdline_parse_token_string_t cmd_fa_policer_config_red_string = + TOKEN_STRING_INITIALIZER(struct cmd_fa_policer_config_result, + red_string, "R"); + +cmdline_parse_token_string_t cmd_fa_policer_config_r_action = + TOKEN_STRING_INITIALIZER(struct cmd_fa_policer_config_result, + r_action, "R#Y#G#D"); + +cmdline_parse_inst_t cmd_fa_policer_config = { + .f = cmd_fa_policer_config_parsed, + .data = NULL, + .help_str = "Flow policer configuration (single flow)", + .tokens = { + (void *) &cmd_fa_policer_config_p_string, + (void *) &cmd_fa_policer_config_pipeline_id, + (void *) &cmd_fa_policer_config_flow_string, + (void *) &cmd_fa_policer_config_flow_id, + (void *) &cmd_fa_policer_config_policer_string, + (void *) &cmd_fa_policer_config_policer_id, + (void *) &cmd_fa_policer_config_green_string, + (void *) &cmd_fa_policer_config_g_action, + (void *) &cmd_fa_policer_config_yellow_string, + (void *) &cmd_fa_policer_config_y_action, + (void *) &cmd_fa_policer_config_red_string, + (void *) &cmd_fa_policer_config_r_action, + NULL, + }, +}; + +/* + * Flow policer configuration (multiple flows) + * + * p <pipeline ID> flows <n_flows> policer <policer ID> + * G <action> Y <action> R <action> + * + * <action> = G (green) | Y (yellow) | R (red) | D (drop) + */ + +struct cmd_fa_policer_config_bulk_result { + cmdline_fixed_string_t p_string; + uint32_t pipeline_id; + cmdline_fixed_string_t flows_string; + uint32_t n_flows; + cmdline_fixed_string_t policer_string; + uint32_t policer_id; + cmdline_fixed_string_t green_string; + cmdline_fixed_string_t g_action; + cmdline_fixed_string_t yellow_string; + cmdline_fixed_string_t y_action; + cmdline_fixed_string_t red_string; + cmdline_fixed_string_t r_action; +}; + +static void +cmd_fa_policer_config_bulk_parsed( + void *parsed_result, + __rte_unused struct cmdline *cl, + void *data) +{ + struct cmd_fa_policer_config_bulk_result *params = parsed_result; + struct app_params *app = data; + struct pipeline_fa_flow_params flow_template, *flow_params; + uint32_t *flow_id, i; + int status; + + if ((params->n_flows == 0) || + (params->policer_id >= PIPELINE_FA_N_TC_MAX)) { + printf("Invalid arguments\n"); + return; + } + + flow_id = (uint32_t *) rte_malloc(NULL, + N_FLOWS_BULK * sizeof(uint32_t), + RTE_CACHE_LINE_SIZE); + if (flow_id == NULL) { + printf("Memory allocation failed\n"); + return; + } + + flow_params = (struct pipeline_fa_flow_params *) rte_malloc(NULL, + N_FLOWS_BULK * sizeof(struct pipeline_fa_flow_params), + RTE_CACHE_LINE_SIZE); + if (flow_params == NULL) { + rte_free(flow_id); + printf("Memory allocation failed\n"); + return; + } + + memset(&flow_template, 0, sizeof(flow_template)); + + status = string_to_policer_action(params->g_action, + &flow_template.p[params->policer_id].action[e_RTE_METER_GREEN]); + if (status) + printf("Invalid policer green action\n"); + + status = string_to_policer_action(params->y_action, + &flow_template.p[params->policer_id].action[e_RTE_METER_YELLOW]); + if (status) + printf("Invalid policer yellow action\n"); + + status = string_to_policer_action(params->r_action, + &flow_template.p[params->policer_id].action[e_RTE_METER_RED]); + if (status) + printf("Invalid policer red action\n"); + + for (i = 0; i < params->n_flows; i++) { + uint32_t pos = i % N_FLOWS_BULK; + + flow_id[pos] = i; + memcpy(&flow_params[pos], &flow_template, + sizeof(flow_template)); + + if ((pos == N_FLOWS_BULK - 1) || + (i == params->n_flows - 1)) { + int status; + + status = app_pipeline_fa_flow_config_bulk(app, + params->pipeline_id, + flow_id, + pos + 1, + 0, + 1 << params->policer_id, + 0, + flow_params); + + if (status != 0) { + printf("Command failed\n"); + + break; + } + } + } + + rte_free(flow_params); + rte_free(flow_id); + +} + +cmdline_parse_token_string_t cmd_fa_policer_config_bulk_p_string = + TOKEN_STRING_INITIALIZER(struct cmd_fa_policer_config_bulk_result, + p_string, "p"); + +cmdline_parse_token_num_t cmd_fa_policer_config_bulk_pipeline_id = + TOKEN_NUM_INITIALIZER(struct cmd_fa_policer_config_bulk_result, + pipeline_id, UINT32); + +cmdline_parse_token_string_t cmd_fa_policer_config_bulk_flows_string = + TOKEN_STRING_INITIALIZER(struct cmd_fa_policer_config_bulk_result, + flows_string, "flows"); + +cmdline_parse_token_num_t cmd_fa_policer_config_bulk_n_flows = + TOKEN_NUM_INITIALIZER(struct cmd_fa_policer_config_bulk_result, + n_flows, UINT32); + +cmdline_parse_token_string_t cmd_fa_policer_config_bulk_policer_string = + TOKEN_STRING_INITIALIZER(struct cmd_fa_policer_config_bulk_result, + policer_string, "policer"); + +cmdline_parse_token_num_t cmd_fa_policer_config_bulk_policer_id = + TOKEN_NUM_INITIALIZER(struct cmd_fa_policer_config_bulk_result, + policer_id, UINT32); + +cmdline_parse_token_string_t cmd_fa_policer_config_bulk_green_string = + TOKEN_STRING_INITIALIZER(struct cmd_fa_policer_config_bulk_result, + green_string, "G"); + +cmdline_parse_token_string_t cmd_fa_policer_config_bulk_g_action = + TOKEN_STRING_INITIALIZER(struct cmd_fa_policer_config_bulk_result, + g_action, "R#Y#G#D"); + +cmdline_parse_token_string_t cmd_fa_policer_config_bulk_yellow_string = + TOKEN_STRING_INITIALIZER(struct cmd_fa_policer_config_bulk_result, + yellow_string, "Y"); + +cmdline_parse_token_string_t cmd_fa_policer_config_bulk_y_action = + TOKEN_STRING_INITIALIZER(struct cmd_fa_policer_config_bulk_result, + y_action, "R#Y#G#D"); + +cmdline_parse_token_string_t cmd_fa_policer_config_bulk_red_string = + TOKEN_STRING_INITIALIZER(struct cmd_fa_policer_config_bulk_result, + red_string, "R"); + +cmdline_parse_token_string_t cmd_fa_policer_config_bulk_r_action = + TOKEN_STRING_INITIALIZER(struct cmd_fa_policer_config_bulk_result, + r_action, "R#Y#G#D"); + +cmdline_parse_inst_t cmd_fa_policer_config_bulk = { + .f = cmd_fa_policer_config_bulk_parsed, + .data = NULL, + .help_str = "Flow policer configuration (multiple flows)", + .tokens = { + (void *) &cmd_fa_policer_config_bulk_p_string, + (void *) &cmd_fa_policer_config_bulk_pipeline_id, + (void *) &cmd_fa_policer_config_bulk_flows_string, + (void *) &cmd_fa_policer_config_bulk_n_flows, + (void *) &cmd_fa_policer_config_bulk_policer_string, + (void *) &cmd_fa_policer_config_bulk_policer_id, + (void *) &cmd_fa_policer_config_bulk_green_string, + (void *) &cmd_fa_policer_config_bulk_g_action, + (void *) &cmd_fa_policer_config_bulk_yellow_string, + (void *) &cmd_fa_policer_config_bulk_y_action, + (void *) &cmd_fa_policer_config_bulk_red_string, + (void *) &cmd_fa_policer_config_bulk_r_action, + NULL, + }, +}; + +/* + * Flow output port configuration (single flow) + * + * p <pipeline ID> flow <flow ID> port <port ID> + */ + +struct cmd_fa_output_port_config_result { + cmdline_fixed_string_t p_string; + uint32_t pipeline_id; + cmdline_fixed_string_t flow_string; + uint32_t flow_id; + cmdline_fixed_string_t port_string; + uint32_t port_id; +}; + +static void +cmd_fa_output_port_config_parsed( + void *parsed_result, + __rte_unused struct cmdline *cl, + void *data) +{ + struct cmd_fa_output_port_config_result *params = parsed_result; + struct app_params *app = data; + struct pipeline_fa_flow_params flow_params; + int status; + + flow_params.port_id = params->port_id; + + status = app_pipeline_fa_flow_config(app, + params->pipeline_id, + params->flow_id, + 0, + 0, + 1, + &flow_params); + + if (status != 0) + printf("Command failed\n"); +} + +cmdline_parse_token_string_t cmd_fa_output_port_config_p_string = + TOKEN_STRING_INITIALIZER(struct cmd_fa_output_port_config_result, + p_string, "p"); + +cmdline_parse_token_num_t cmd_fa_output_port_config_pipeline_id = + TOKEN_NUM_INITIALIZER(struct cmd_fa_output_port_config_result, + pipeline_id, UINT32); + +cmdline_parse_token_string_t cmd_fa_output_port_config_flow_string = + TOKEN_STRING_INITIALIZER(struct cmd_fa_output_port_config_result, + flow_string, "flow"); + +cmdline_parse_token_num_t cmd_fa_output_port_config_flow_id = + TOKEN_NUM_INITIALIZER(struct cmd_fa_output_port_config_result, + flow_id, UINT32); + +cmdline_parse_token_string_t cmd_fa_output_port_config_port_string = + TOKEN_STRING_INITIALIZER(struct cmd_fa_output_port_config_result, + port_string, "port"); + +cmdline_parse_token_num_t cmd_fa_output_port_config_port_id = + TOKEN_NUM_INITIALIZER(struct cmd_fa_output_port_config_result, + port_id, UINT32); + +cmdline_parse_inst_t cmd_fa_output_port_config = { + .f = cmd_fa_output_port_config_parsed, + .data = NULL, + .help_str = "Flow output port configuration (single flow)", + .tokens = { + (void *) &cmd_fa_output_port_config_p_string, + (void *) &cmd_fa_output_port_config_pipeline_id, + (void *) &cmd_fa_output_port_config_flow_string, + (void *) &cmd_fa_output_port_config_flow_id, + (void *) &cmd_fa_output_port_config_port_string, + (void *) &cmd_fa_output_port_config_port_id, + NULL, + }, +}; + +/* + * Flow output port configuration (multiple flows) + * + * p <pipeline ID> flows <n_flows> ports <n_ports> + */ + +struct cmd_fa_output_port_config_bulk_result { + cmdline_fixed_string_t p_string; + uint32_t pipeline_id; + cmdline_fixed_string_t flows_string; + uint32_t n_flows; + cmdline_fixed_string_t ports_string; + uint32_t n_ports; +}; + +static void +cmd_fa_output_port_config_bulk_parsed( + void *parsed_result, + __rte_unused struct cmdline *cl, + void *data) +{ + struct cmd_fa_output_port_config_bulk_result *params = parsed_result; + struct app_params *app = data; + struct pipeline_fa_flow_params *flow_params; + uint32_t *flow_id; + uint32_t i; + + if (params->n_flows == 0) { + printf("Invalid arguments\n"); + return; + } + + flow_id = (uint32_t *) rte_malloc(NULL, + N_FLOWS_BULK * sizeof(uint32_t), + RTE_CACHE_LINE_SIZE); + if (flow_id == NULL) { + printf("Memory allocation failed\n"); + return; + } + + flow_params = (struct pipeline_fa_flow_params *) rte_malloc(NULL, + N_FLOWS_BULK * sizeof(struct pipeline_fa_flow_params), + RTE_CACHE_LINE_SIZE); + if (flow_params == NULL) { + rte_free(flow_id); + printf("Memory allocation failed\n"); + return; + } + + for (i = 0; i < params->n_flows; i++) { + uint32_t pos = i % N_FLOWS_BULK; + uint32_t port_id = i % params->n_ports; + + flow_id[pos] = i; + + memset(&flow_params[pos], 0, sizeof(flow_params[pos])); + flow_params[pos].port_id = port_id; + + if ((pos == N_FLOWS_BULK - 1) || + (i == params->n_flows - 1)) { + int status; + + status = app_pipeline_fa_flow_config_bulk(app, + params->pipeline_id, + flow_id, + pos + 1, + 0, + 0, + 1, + flow_params); + + if (status != 0) { + printf("Command failed\n"); + + break; + } + } + } + + rte_free(flow_params); + rte_free(flow_id); + +} + +cmdline_parse_token_string_t cmd_fa_output_port_config_bulk_p_string = + TOKEN_STRING_INITIALIZER(struct cmd_fa_output_port_config_bulk_result, + p_string, "p"); + +cmdline_parse_token_num_t cmd_fa_output_port_config_bulk_pipeline_id = + TOKEN_NUM_INITIALIZER(struct cmd_fa_output_port_config_bulk_result, + pipeline_id, UINT32); + +cmdline_parse_token_string_t cmd_fa_output_port_config_bulk_flows_string = + TOKEN_STRING_INITIALIZER(struct cmd_fa_output_port_config_bulk_result, + flows_string, "flows"); + +cmdline_parse_token_num_t cmd_fa_output_port_config_bulk_n_flows = + TOKEN_NUM_INITIALIZER(struct cmd_fa_output_port_config_bulk_result, + n_flows, UINT32); + +cmdline_parse_token_string_t cmd_fa_output_port_config_bulk_ports_string = + TOKEN_STRING_INITIALIZER(struct cmd_fa_output_port_config_bulk_result, + ports_string, "ports"); + +cmdline_parse_token_num_t cmd_fa_output_port_config_bulk_n_ports = + TOKEN_NUM_INITIALIZER(struct cmd_fa_output_port_config_bulk_result, + n_ports, UINT32); + +cmdline_parse_inst_t cmd_fa_output_port_config_bulk = { + .f = cmd_fa_output_port_config_bulk_parsed, + .data = NULL, + .help_str = "Flow output port configuration (multiple flows)", + .tokens = { + (void *) &cmd_fa_output_port_config_bulk_p_string, + (void *) &cmd_fa_output_port_config_bulk_pipeline_id, + (void *) &cmd_fa_output_port_config_bulk_flows_string, + (void *) &cmd_fa_output_port_config_bulk_n_flows, + (void *) &cmd_fa_output_port_config_bulk_ports_string, + (void *) &cmd_fa_output_port_config_bulk_n_ports, + NULL, + }, +}; + +/* + * Flow DiffServ Code Point (DSCP) translation table configuration + * + * p <pipeline ID> dscp <DSCP ID> class <traffic class ID> color <color> + * + * <color> = G (green) | Y (yellow) | R (red) +*/ + +struct cmd_fa_dscp_config_result { + cmdline_fixed_string_t p_string; + uint32_t pipeline_id; + cmdline_fixed_string_t dscp_string; + uint32_t dscp_id; + cmdline_fixed_string_t class_string; + uint32_t traffic_class_id; + cmdline_fixed_string_t color_string; + cmdline_fixed_string_t color; + +}; + +static void +cmd_fa_dscp_config_parsed( + void *parsed_result, + __rte_unused struct cmdline *cl, + void *data) +{ + struct cmd_fa_dscp_config_result *params = parsed_result; + struct app_params *app = data; + enum rte_meter_color color; + int status; + + status = string_to_color(params->color, &color); + if (status) { + printf("Invalid color\n"); + return; + } + + status = app_pipeline_fa_dscp_config(app, + params->pipeline_id, + params->dscp_id, + params->traffic_class_id, + color); + + if (status != 0) + printf("Command failed\n"); +} + +cmdline_parse_token_string_t cmd_fa_dscp_config_p_string = + TOKEN_STRING_INITIALIZER(struct cmd_fa_dscp_config_result, + p_string, "p"); + +cmdline_parse_token_num_t cmd_fa_dscp_config_pipeline_id = + TOKEN_NUM_INITIALIZER(struct cmd_fa_dscp_config_result, + pipeline_id, UINT32); + +cmdline_parse_token_string_t cmd_fa_dscp_config_dscp_string = + TOKEN_STRING_INITIALIZER(struct cmd_fa_dscp_config_result, + dscp_string, "dscp"); + +cmdline_parse_token_num_t cmd_fa_dscp_config_dscp_id = + TOKEN_NUM_INITIALIZER(struct cmd_fa_dscp_config_result, + dscp_id, UINT32); + +cmdline_parse_token_string_t cmd_fa_dscp_config_class_string = + TOKEN_STRING_INITIALIZER(struct cmd_fa_dscp_config_result, + class_string, "class"); + +cmdline_parse_token_num_t cmd_fa_dscp_config_traffic_class_id = + TOKEN_NUM_INITIALIZER(struct cmd_fa_dscp_config_result, + traffic_class_id, UINT32); + +cmdline_parse_token_string_t cmd_fa_dscp_config_color_string = + TOKEN_STRING_INITIALIZER(struct cmd_fa_dscp_config_result, + color_string, "color"); + +cmdline_parse_token_string_t cmd_fa_dscp_config_color = + TOKEN_STRING_INITIALIZER(struct cmd_fa_dscp_config_result, + color, "G#Y#R"); + +cmdline_parse_inst_t cmd_fa_dscp_config = { + .f = cmd_fa_dscp_config_parsed, + .data = NULL, + .help_str = "Flow DSCP translation table configuration", + .tokens = { + (void *) &cmd_fa_dscp_config_p_string, + (void *) &cmd_fa_dscp_config_pipeline_id, + (void *) &cmd_fa_dscp_config_dscp_string, + (void *) &cmd_fa_dscp_config_dscp_id, + (void *) &cmd_fa_dscp_config_class_string, + (void *) &cmd_fa_dscp_config_traffic_class_id, + (void *) &cmd_fa_dscp_config_color_string, + (void *) &cmd_fa_dscp_config_color, + NULL, + }, +}; + +/* + * Flow policer stats read + * + * p <pipeline ID> flow <flow ID> policer <policer ID> stats + */ + +struct cmd_fa_policer_stats_result { + cmdline_fixed_string_t p_string; + uint32_t pipeline_id; + cmdline_fixed_string_t flow_string; + uint32_t flow_id; + cmdline_fixed_string_t policer_string; + uint32_t policer_id; + cmdline_fixed_string_t stats_string; +}; + +static void +cmd_fa_policer_stats_parsed( + void *parsed_result, + __rte_unused struct cmdline *cl, + void *data) +{ + struct cmd_fa_policer_stats_result *params = parsed_result; + struct app_params *app = data; + struct pipeline_fa_policer_stats stats; + int status; + + status = app_pipeline_fa_flow_policer_stats_read(app, + params->pipeline_id, + params->flow_id, + params->policer_id, + 1, + &stats); + if (status != 0) { + printf("Command failed\n"); + return; + } + + /* Display stats */ + printf("\tPkts G: %" PRIu64 + "\tPkts Y: %" PRIu64 + "\tPkts R: %" PRIu64 + "\tPkts D: %" PRIu64 "\n", + stats.n_pkts[e_RTE_METER_GREEN], + stats.n_pkts[e_RTE_METER_YELLOW], + stats.n_pkts[e_RTE_METER_RED], + stats.n_pkts_drop); +} + +cmdline_parse_token_string_t cmd_fa_policer_stats_p_string = + TOKEN_STRING_INITIALIZER(struct cmd_fa_policer_stats_result, + p_string, "p"); + +cmdline_parse_token_num_t cmd_fa_policer_stats_pipeline_id = + TOKEN_NUM_INITIALIZER(struct cmd_fa_policer_stats_result, + pipeline_id, UINT32); + +cmdline_parse_token_string_t cmd_fa_policer_stats_flow_string = + TOKEN_STRING_INITIALIZER(struct cmd_fa_policer_stats_result, + flow_string, "flow"); + +cmdline_parse_token_num_t cmd_fa_policer_stats_flow_id = + TOKEN_NUM_INITIALIZER(struct cmd_fa_policer_stats_result, + flow_id, UINT32); + +cmdline_parse_token_string_t cmd_fa_policer_stats_policer_string = + TOKEN_STRING_INITIALIZER(struct cmd_fa_policer_stats_result, + policer_string, "policer"); + +cmdline_parse_token_num_t cmd_fa_policer_stats_policer_id = + TOKEN_NUM_INITIALIZER(struct cmd_fa_policer_stats_result, + policer_id, UINT32); + +cmdline_parse_token_string_t cmd_fa_policer_stats_string = + TOKEN_STRING_INITIALIZER(struct cmd_fa_policer_stats_result, + stats_string, "stats"); + +cmdline_parse_inst_t cmd_fa_policer_stats = { + .f = cmd_fa_policer_stats_parsed, + .data = NULL, + .help_str = "Flow policer stats read", + .tokens = { + (void *) &cmd_fa_policer_stats_p_string, + (void *) &cmd_fa_policer_stats_pipeline_id, + (void *) &cmd_fa_policer_stats_flow_string, + (void *) &cmd_fa_policer_stats_flow_id, + (void *) &cmd_fa_policer_stats_policer_string, + (void *) &cmd_fa_policer_stats_policer_id, + (void *) &cmd_fa_policer_stats_string, + NULL, + }, +}; + +/* + * Flow list + * + * p <pipeline ID> flow ls + */ + +struct cmd_fa_flow_ls_result { + cmdline_fixed_string_t p_string; + uint32_t pipeline_id; + cmdline_fixed_string_t flow_string; + cmdline_fixed_string_t actions_string; + cmdline_fixed_string_t ls_string; +}; + +static void +cmd_fa_flow_ls_parsed( + void *parsed_result, + __rte_unused struct cmdline *cl, + void *data) +{ + struct cmd_fa_flow_ls_result *params = parsed_result; + struct app_params *app = data; + int status; + + status = app_pipeline_fa_flow_ls(app, params->pipeline_id); + if (status != 0) + printf("Command failed\n"); +} + +cmdline_parse_token_string_t cmd_fa_flow_ls_p_string = + TOKEN_STRING_INITIALIZER(struct cmd_fa_flow_ls_result, + p_string, "p"); + +cmdline_parse_token_num_t cmd_fa_flow_ls_pipeline_id = + TOKEN_NUM_INITIALIZER(struct cmd_fa_flow_ls_result, + pipeline_id, UINT32); + +cmdline_parse_token_string_t cmd_fa_flow_ls_flow_string = + TOKEN_STRING_INITIALIZER(struct cmd_fa_flow_ls_result, + flow_string, "flow"); + +cmdline_parse_token_string_t cmd_fa_flow_ls_actions_string = + TOKEN_STRING_INITIALIZER(struct cmd_fa_flow_ls_result, + actions_string, "actions"); + +cmdline_parse_token_string_t cmd_fa_flow_ls_ls_string = + TOKEN_STRING_INITIALIZER(struct cmd_fa_flow_ls_result, + ls_string, "ls"); + +cmdline_parse_inst_t cmd_fa_flow_ls = { + .f = cmd_fa_flow_ls_parsed, + .data = NULL, + .help_str = "Flow actions list", + .tokens = { + (void *) &cmd_fa_flow_ls_p_string, + (void *) &cmd_fa_flow_ls_pipeline_id, + (void *) &cmd_fa_flow_ls_flow_string, + (void *) &cmd_fa_flow_ls_actions_string, + (void *) &cmd_fa_flow_ls_ls_string, + NULL, + }, +}; + +/* + * Flow DiffServ Code Point (DSCP) translation table list + * + * p <pipeline ID> dscp ls + */ + +struct cmd_fa_dscp_ls_result { + cmdline_fixed_string_t p_string; + uint32_t pipeline_id; + cmdline_fixed_string_t dscp_string; + cmdline_fixed_string_t ls_string; +}; + +static void +cmd_fa_dscp_ls_parsed( + void *parsed_result, + __rte_unused struct cmdline *cl, + void *data) +{ + struct cmd_fa_dscp_ls_result *params = parsed_result; + struct app_params *app = data; + int status; + + status = app_pipeline_fa_dscp_ls(app, params->pipeline_id); + if (status != 0) + printf("Command failed\n"); +} + +cmdline_parse_token_string_t cmd_fa_dscp_ls_p_string = + TOKEN_STRING_INITIALIZER(struct cmd_fa_dscp_ls_result, + p_string, "p"); + +cmdline_parse_token_num_t cmd_fa_dscp_ls_pipeline_id = + TOKEN_NUM_INITIALIZER(struct cmd_fa_dscp_ls_result, + pipeline_id, UINT32); + +cmdline_parse_token_string_t cmd_fa_dscp_ls_dscp_string = + TOKEN_STRING_INITIALIZER(struct cmd_fa_dscp_ls_result, + dscp_string, "dscp"); + +cmdline_parse_token_string_t cmd_fa_dscp_ls_string = + TOKEN_STRING_INITIALIZER(struct cmd_fa_dscp_ls_result, ls_string, + "ls"); + +cmdline_parse_inst_t cmd_fa_dscp_ls = { + .f = cmd_fa_dscp_ls_parsed, + .data = NULL, + .help_str = "Flow DSCP translaton table list", + .tokens = { + (void *) &cmd_fa_dscp_ls_p_string, + (void *) &cmd_fa_dscp_ls_pipeline_id, + (void *) &cmd_fa_dscp_ls_dscp_string, + (void *) &cmd_fa_dscp_ls_string, + NULL, + }, +}; + +static cmdline_parse_ctx_t pipeline_cmds[] = { + (cmdline_parse_inst_t *) &cmd_fa_meter_config, + (cmdline_parse_inst_t *) &cmd_fa_meter_config_bulk, + (cmdline_parse_inst_t *) &cmd_fa_policer_config, + (cmdline_parse_inst_t *) &cmd_fa_policer_config_bulk, + (cmdline_parse_inst_t *) &cmd_fa_output_port_config, + (cmdline_parse_inst_t *) &cmd_fa_output_port_config_bulk, + (cmdline_parse_inst_t *) &cmd_fa_dscp_config, + (cmdline_parse_inst_t *) &cmd_fa_policer_stats, + (cmdline_parse_inst_t *) &cmd_fa_flow_ls, + (cmdline_parse_inst_t *) &cmd_fa_dscp_ls, + NULL, +}; + +static struct pipeline_fe_ops pipeline_flow_actions_fe_ops = { + .f_init = app_pipeline_fa_init, + .f_free = app_pipeline_fa_free, + .cmds = pipeline_cmds, +}; + +struct pipeline_type pipeline_flow_actions = { + .name = "FLOW_ACTIONS", + .be_ops = &pipeline_flow_actions_be_ops, + .fe_ops = &pipeline_flow_actions_fe_ops, +}; diff --git a/examples/ip_pipeline/pipeline/pipeline_flow_actions.h b/examples/ip_pipeline/pipeline/pipeline_flow_actions.h new file mode 100644 index 00000000..f2cd0cbb --- /dev/null +++ b/examples/ip_pipeline/pipeline/pipeline_flow_actions.h @@ -0,0 +1,78 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_PIPELINE_FLOW_ACTIONS_H__ +#define __INCLUDE_PIPELINE_FLOW_ACTIONS_H__ + +#include <rte_meter.h> + +#include "pipeline.h" +#include "pipeline_flow_actions_be.h" + +int +app_pipeline_fa_flow_config(struct app_params *app, + uint32_t pipeline_id, + uint32_t flow_id, + uint32_t meter_update_mask, + uint32_t policer_update_mask, + uint32_t port_update, + struct pipeline_fa_flow_params *params); + +int +app_pipeline_fa_flow_config_bulk(struct app_params *app, + uint32_t pipeline_id, + uint32_t *flow_id, + uint32_t n_flows, + uint32_t meter_update_mask, + uint32_t policer_update_mask, + uint32_t port_update, + struct pipeline_fa_flow_params *params); + +int +app_pipeline_fa_dscp_config(struct app_params *app, + uint32_t pipeline_id, + uint32_t dscp, + uint32_t traffic_class, + enum rte_meter_color color); + +int +app_pipeline_fa_flow_policer_stats_read(struct app_params *app, + uint32_t pipeline_id, + uint32_t flow_id, + uint32_t policer_id, + int clear, + struct pipeline_fa_policer_stats *stats); + +extern struct pipeline_type pipeline_flow_actions; + +#endif diff --git a/examples/ip_pipeline/pipeline/pipeline_flow_actions_be.c b/examples/ip_pipeline/pipeline/pipeline_flow_actions_be.c new file mode 100644 index 00000000..3ad3ee63 --- /dev/null +++ b/examples/ip_pipeline/pipeline/pipeline_flow_actions_be.c @@ -0,0 +1,1011 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <string.h> + +#include <rte_common.h> +#include <rte_malloc.h> +#include <rte_cycles.h> +#include <rte_table_array.h> +#include <rte_byteorder.h> +#include <rte_ip.h> + +#include "pipeline_actions_common.h" +#include "pipeline_flow_actions_be.h" +#include "parser.h" +#include "hash_func.h" + +int +pipeline_fa_flow_params_set_default(struct pipeline_fa_flow_params *params) +{ + uint32_t i; + + if (params == NULL) + return -1; + + for (i = 0; i < PIPELINE_FA_N_TC_MAX; i++) { + struct rte_meter_trtcm_params *m = ¶ms->m[i]; + + m->cir = 1; + m->cbs = 1; + m->pir = 1; + m->pbs = 2; + } + + for (i = 0; i < PIPELINE_FA_N_TC_MAX; i++) { + struct pipeline_fa_policer_params *p = ¶ms->p[i]; + uint32_t j; + + for (j = 0; j < e_RTE_METER_COLORS; j++) { + struct pipeline_fa_policer_action *a = &p->action[j]; + + a->drop = 0; + a->color = (enum rte_meter_color) j; + } + } + + params->port_id = 0; + + return 0; +} + +struct dscp_entry { + uint32_t traffic_class; + enum rte_meter_color color; +}; + +struct pipeline_flow_actions { + struct pipeline p; + struct pipeline_fa_params params; + pipeline_msg_req_handler custom_handlers[PIPELINE_FA_MSG_REQS]; + + struct dscp_entry dscp[PIPELINE_FA_N_DSCP]; +} __rte_cache_aligned; + +static void * +pipeline_fa_msg_req_custom_handler(struct pipeline *p, void *msg); + +static pipeline_msg_req_handler handlers[] = { + [PIPELINE_MSG_REQ_PING] = + pipeline_msg_req_ping_handler, + [PIPELINE_MSG_REQ_STATS_PORT_IN] = + pipeline_msg_req_stats_port_in_handler, + [PIPELINE_MSG_REQ_STATS_PORT_OUT] = + pipeline_msg_req_stats_port_out_handler, + [PIPELINE_MSG_REQ_STATS_TABLE] = + pipeline_msg_req_stats_table_handler, + [PIPELINE_MSG_REQ_PORT_IN_ENABLE] = + pipeline_msg_req_port_in_enable_handler, + [PIPELINE_MSG_REQ_PORT_IN_DISABLE] = + pipeline_msg_req_port_in_disable_handler, + [PIPELINE_MSG_REQ_CUSTOM] = + pipeline_fa_msg_req_custom_handler, +}; + +static void * +pipeline_fa_msg_req_flow_config_handler(struct pipeline *p, void *msg); + +static void * +pipeline_fa_msg_req_flow_config_bulk_handler(struct pipeline *p, void *msg); + +static void * +pipeline_fa_msg_req_dscp_config_handler(struct pipeline *p, void *msg); + +static void * +pipeline_fa_msg_req_policer_stats_read_handler(struct pipeline *p, void *msg); + +static pipeline_msg_req_handler custom_handlers[] = { + [PIPELINE_FA_MSG_REQ_FLOW_CONFIG] = + pipeline_fa_msg_req_flow_config_handler, + [PIPELINE_FA_MSG_REQ_FLOW_CONFIG_BULK] = + pipeline_fa_msg_req_flow_config_bulk_handler, + [PIPELINE_FA_MSG_REQ_DSCP_CONFIG] = + pipeline_fa_msg_req_dscp_config_handler, + [PIPELINE_FA_MSG_REQ_POLICER_STATS_READ] = + pipeline_fa_msg_req_policer_stats_read_handler, +}; + +/* + * Flow table + */ +struct meter_policer { + struct rte_meter_trtcm meter; + struct pipeline_fa_policer_params policer; + struct pipeline_fa_policer_stats stats; +}; + +struct flow_table_entry { + struct rte_pipeline_table_entry head; + struct meter_policer mp[PIPELINE_FA_N_TC_MAX]; +}; + +static int +flow_table_entry_set_meter(struct flow_table_entry *entry, + uint32_t meter_id, + struct pipeline_fa_flow_params *params) +{ + struct rte_meter_trtcm *meter = &entry->mp[meter_id].meter; + struct rte_meter_trtcm_params *meter_params = ¶ms->m[meter_id]; + + return rte_meter_trtcm_config(meter, meter_params); +} + +static void +flow_table_entry_set_policer(struct flow_table_entry *entry, + uint32_t policer_id, + struct pipeline_fa_flow_params *params) +{ + struct pipeline_fa_policer_params *p0 = &entry->mp[policer_id].policer; + struct pipeline_fa_policer_params *p1 = ¶ms->p[policer_id]; + + memcpy(p0, p1, sizeof(*p0)); +} + +static void +flow_table_entry_set_port_id(struct pipeline_flow_actions *p, + struct flow_table_entry *entry, + struct pipeline_fa_flow_params *params) +{ + entry->head.action = RTE_PIPELINE_ACTION_PORT; + entry->head.port_id = p->p.port_out_id[params->port_id]; +} + +static int +flow_table_entry_set_default(struct pipeline_flow_actions *p, + struct flow_table_entry *entry) +{ + struct pipeline_fa_flow_params params; + uint32_t i; + + pipeline_fa_flow_params_set_default(¶ms); + + memset(entry, 0, sizeof(*entry)); + + flow_table_entry_set_port_id(p, entry, ¶ms); + + for (i = 0; i < PIPELINE_FA_N_TC_MAX; i++) { + int status; + + status = flow_table_entry_set_meter(entry, i, ¶ms); + if (status) + return status; + } + + for (i = 0; i < PIPELINE_FA_N_TC_MAX; i++) + flow_table_entry_set_policer(entry, i, ¶ms); + + return 0; +} + +static inline uint64_t +pkt_work( + struct rte_mbuf *pkt, + struct rte_pipeline_table_entry *table_entry, + void *arg, + uint64_t time) +{ + struct pipeline_flow_actions *p = arg; + struct flow_table_entry *entry = + (struct flow_table_entry *) table_entry; + + struct ipv4_hdr *pkt_ip = (struct ipv4_hdr *) + RTE_MBUF_METADATA_UINT32_PTR(pkt, p->params.ip_hdr_offset); + enum rte_meter_color *pkt_color = (enum rte_meter_color *) + RTE_MBUF_METADATA_UINT32_PTR(pkt, p->params.color_offset); + + /* Read (IP header) */ + uint32_t total_length = rte_bswap16(pkt_ip->total_length); + uint32_t dscp = pkt_ip->type_of_service >> 2; + + uint32_t tc = p->dscp[dscp].traffic_class; + enum rte_meter_color color = p->dscp[dscp].color; + + struct rte_meter_trtcm *meter = &entry->mp[tc].meter; + struct pipeline_fa_policer_params *policer = &entry->mp[tc].policer; + struct pipeline_fa_policer_stats *stats = &entry->mp[tc].stats; + + /* Read (entry), compute */ + enum rte_meter_color color2 = rte_meter_trtcm_color_aware_check(meter, + time, + total_length, + color); + + enum rte_meter_color color3 = policer->action[color2].color; + uint64_t drop = policer->action[color2].drop; + + /* Read (entry), write (entry, color) */ + stats->n_pkts[color3] += drop ^ 1LLU; + stats->n_pkts_drop += drop; + *pkt_color = color3; + + return drop; +} + +static inline uint64_t +pkt4_work( + struct rte_mbuf **pkts, + struct rte_pipeline_table_entry **table_entries, + void *arg, + uint64_t time) +{ + struct pipeline_flow_actions *p = arg; + + struct flow_table_entry *entry0 = + (struct flow_table_entry *) table_entries[0]; + struct flow_table_entry *entry1 = + (struct flow_table_entry *) table_entries[1]; + struct flow_table_entry *entry2 = + (struct flow_table_entry *) table_entries[2]; + struct flow_table_entry *entry3 = + (struct flow_table_entry *) table_entries[3]; + + struct ipv4_hdr *pkt0_ip = (struct ipv4_hdr *) + RTE_MBUF_METADATA_UINT32_PTR(pkts[0], p->params.ip_hdr_offset); + struct ipv4_hdr *pkt1_ip = (struct ipv4_hdr *) + RTE_MBUF_METADATA_UINT32_PTR(pkts[1], p->params.ip_hdr_offset); + struct ipv4_hdr *pkt2_ip = (struct ipv4_hdr *) + RTE_MBUF_METADATA_UINT32_PTR(pkts[2], p->params.ip_hdr_offset); + struct ipv4_hdr *pkt3_ip = (struct ipv4_hdr *) + RTE_MBUF_METADATA_UINT32_PTR(pkts[3], p->params.ip_hdr_offset); + + enum rte_meter_color *pkt0_color = (enum rte_meter_color *) + RTE_MBUF_METADATA_UINT32_PTR(pkts[0], p->params.color_offset); + enum rte_meter_color *pkt1_color = (enum rte_meter_color *) + RTE_MBUF_METADATA_UINT32_PTR(pkts[1], p->params.color_offset); + enum rte_meter_color *pkt2_color = (enum rte_meter_color *) + RTE_MBUF_METADATA_UINT32_PTR(pkts[2], p->params.color_offset); + enum rte_meter_color *pkt3_color = (enum rte_meter_color *) + RTE_MBUF_METADATA_UINT32_PTR(pkts[3], p->params.color_offset); + + /* Read (IP header) */ + uint32_t total_length0 = rte_bswap16(pkt0_ip->total_length); + uint32_t dscp0 = pkt0_ip->type_of_service >> 2; + + uint32_t total_length1 = rte_bswap16(pkt1_ip->total_length); + uint32_t dscp1 = pkt1_ip->type_of_service >> 2; + + uint32_t total_length2 = rte_bswap16(pkt2_ip->total_length); + uint32_t dscp2 = pkt2_ip->type_of_service >> 2; + + uint32_t total_length3 = rte_bswap16(pkt3_ip->total_length); + uint32_t dscp3 = pkt3_ip->type_of_service >> 2; + + uint32_t tc0 = p->dscp[dscp0].traffic_class; + enum rte_meter_color color0 = p->dscp[dscp0].color; + + uint32_t tc1 = p->dscp[dscp1].traffic_class; + enum rte_meter_color color1 = p->dscp[dscp1].color; + + uint32_t tc2 = p->dscp[dscp2].traffic_class; + enum rte_meter_color color2 = p->dscp[dscp2].color; + + uint32_t tc3 = p->dscp[dscp3].traffic_class; + enum rte_meter_color color3 = p->dscp[dscp3].color; + + struct rte_meter_trtcm *meter0 = &entry0->mp[tc0].meter; + struct pipeline_fa_policer_params *policer0 = &entry0->mp[tc0].policer; + struct pipeline_fa_policer_stats *stats0 = &entry0->mp[tc0].stats; + + struct rte_meter_trtcm *meter1 = &entry1->mp[tc1].meter; + struct pipeline_fa_policer_params *policer1 = &entry1->mp[tc1].policer; + struct pipeline_fa_policer_stats *stats1 = &entry1->mp[tc1].stats; + + struct rte_meter_trtcm *meter2 = &entry2->mp[tc2].meter; + struct pipeline_fa_policer_params *policer2 = &entry2->mp[tc2].policer; + struct pipeline_fa_policer_stats *stats2 = &entry2->mp[tc2].stats; + + struct rte_meter_trtcm *meter3 = &entry3->mp[tc3].meter; + struct pipeline_fa_policer_params *policer3 = &entry3->mp[tc3].policer; + struct pipeline_fa_policer_stats *stats3 = &entry3->mp[tc3].stats; + + /* Read (entry), compute, write (entry) */ + enum rte_meter_color color2_0 = rte_meter_trtcm_color_aware_check( + meter0, + time, + total_length0, + color0); + + enum rte_meter_color color2_1 = rte_meter_trtcm_color_aware_check( + meter1, + time, + total_length1, + color1); + + enum rte_meter_color color2_2 = rte_meter_trtcm_color_aware_check( + meter2, + time, + total_length2, + color2); + + enum rte_meter_color color2_3 = rte_meter_trtcm_color_aware_check( + meter3, + time, + total_length3, + color3); + + enum rte_meter_color color3_0 = policer0->action[color2_0].color; + enum rte_meter_color color3_1 = policer1->action[color2_1].color; + enum rte_meter_color color3_2 = policer2->action[color2_2].color; + enum rte_meter_color color3_3 = policer3->action[color2_3].color; + + uint64_t drop0 = policer0->action[color2_0].drop; + uint64_t drop1 = policer1->action[color2_1].drop; + uint64_t drop2 = policer2->action[color2_2].drop; + uint64_t drop3 = policer3->action[color2_3].drop; + + /* Read (entry), write (entry, color) */ + stats0->n_pkts[color3_0] += drop0 ^ 1LLU; + stats0->n_pkts_drop += drop0; + + stats1->n_pkts[color3_1] += drop1 ^ 1LLU; + stats1->n_pkts_drop += drop1; + + stats2->n_pkts[color3_2] += drop2 ^ 1LLU; + stats2->n_pkts_drop += drop2; + + stats3->n_pkts[color3_3] += drop3 ^ 1LLU; + stats3->n_pkts_drop += drop3; + + *pkt0_color = color3_0; + *pkt1_color = color3_1; + *pkt2_color = color3_2; + *pkt3_color = color3_3; + + return drop0 | (drop1 << 1) | (drop2 << 2) | (drop3 << 3); +} + +PIPELINE_TABLE_AH_HIT_DROP_TIME(fa_table_ah_hit, pkt_work, pkt4_work); + +static rte_pipeline_table_action_handler_hit +get_fa_table_ah_hit(__rte_unused struct pipeline_flow_actions *p) +{ + return fa_table_ah_hit; +} + +/* + * Argument parsing + */ +int +pipeline_fa_parse_args(struct pipeline_fa_params *p, + struct pipeline_params *params) +{ + uint32_t n_flows_present = 0; + uint32_t n_meters_per_flow_present = 0; + uint32_t flow_id_offset_present = 0; + uint32_t ip_hdr_offset_present = 0; + uint32_t color_offset_present = 0; + uint32_t i; + + /* Default values */ + p->n_meters_per_flow = 1; + p->dscp_enabled = 0; + + for (i = 0; i < params->n_args; i++) { + char *arg_name = params->args_name[i]; + char *arg_value = params->args_value[i]; + + /* n_flows */ + if (strcmp(arg_name, "n_flows") == 0) { + int status; + + PIPELINE_PARSE_ERR_DUPLICATE( + n_flows_present == 0, params->name, + arg_name); + n_flows_present = 1; + + status = parser_read_uint32(&p->n_flows, + arg_value); + PIPELINE_PARSE_ERR_INV_VAL(((status != -EINVAL) && + (p->n_flows != 0)), params->name, + arg_name, arg_value); + PIPELINE_PARSE_ERR_OUT_RNG((status != -ERANGE), + params->name, arg_name, arg_value); + + continue; + } + + /* n_meters_per_flow */ + if (strcmp(arg_name, "n_meters_per_flow") == 0) { + int status; + + PIPELINE_PARSE_ERR_DUPLICATE( + n_meters_per_flow_present == 0, + params->name, arg_name); + n_meters_per_flow_present = 1; + + status = parser_read_uint32(&p->n_meters_per_flow, + arg_value); + PIPELINE_PARSE_ERR_INV_VAL(((status != -EINVAL) && + (p->n_meters_per_flow != 0)), + params->name, arg_name, arg_value); + PIPELINE_PARSE_ERR_OUT_RNG(((status != -ERANGE) && + (p->n_meters_per_flow <= + PIPELINE_FA_N_TC_MAX)), params->name, + arg_name, arg_value); + + continue; + } + + /* flow_id_offset */ + if (strcmp(arg_name, "flow_id_offset") == 0) { + int status; + + PIPELINE_PARSE_ERR_DUPLICATE( + flow_id_offset_present == 0, + params->name, arg_name); + flow_id_offset_present = 1; + + status = parser_read_uint32(&p->flow_id_offset, + arg_value); + PIPELINE_PARSE_ERR_INV_VAL((status != -EINVAL), + params->name, arg_name, arg_value); + PIPELINE_PARSE_ERR_OUT_RNG((status != -ERANGE), + params->name, arg_name, arg_value); + + continue; + } + + /* ip_hdr_offset */ + if (strcmp(arg_name, "ip_hdr_offset") == 0) { + int status; + + PIPELINE_PARSE_ERR_DUPLICATE( + ip_hdr_offset_present == 0, + params->name, arg_name); + ip_hdr_offset_present = 1; + + status = parser_read_uint32(&p->ip_hdr_offset, + arg_value); + PIPELINE_PARSE_ERR_INV_VAL((status != -EINVAL), + params->name, arg_name, arg_value); + PIPELINE_PARSE_ERR_OUT_RNG((status != -ERANGE), + params->name, arg_name, arg_value); + + continue; + } + + /* color_offset */ + if (strcmp(arg_name, "color_offset") == 0) { + int status; + + PIPELINE_PARSE_ERR_DUPLICATE( + color_offset_present == 0, params->name, + arg_name); + color_offset_present = 1; + + status = parser_read_uint32(&p->color_offset, + arg_value); + PIPELINE_PARSE_ERR_INV_VAL((status != -EINVAL), + params->name, arg_name, arg_value); + PIPELINE_PARSE_ERR_OUT_RNG((status != -ERANGE), + params->name, arg_name, arg_value); + + p->dscp_enabled = 1; + + continue; + } + + /* Unknown argument */ + PIPELINE_PARSE_ERR_INV_ENT(0, params->name, arg_name); + } + + /* Check that mandatory arguments are present */ + PIPELINE_PARSE_ERR_MANDATORY((n_flows_present), params->name, + "n_flows"); + PIPELINE_PARSE_ERR_MANDATORY((flow_id_offset_present), + params->name, "flow_id_offset"); + PIPELINE_PARSE_ERR_MANDATORY((ip_hdr_offset_present), + params->name, "ip_hdr_offset"); + PIPELINE_PARSE_ERR_MANDATORY((color_offset_present), params->name, + "color_offset"); + + return 0; +} + +static void +dscp_init(struct pipeline_flow_actions *p) +{ + uint32_t i; + + for (i = 0; i < PIPELINE_FA_N_DSCP; i++) { + p->dscp[i].traffic_class = 0; + p->dscp[i].color = e_RTE_METER_GREEN; + } +} + +static void *pipeline_fa_init(struct pipeline_params *params, + __rte_unused void *arg) +{ + struct pipeline *p; + struct pipeline_flow_actions *p_fa; + uint32_t size, i; + + /* Check input arguments */ + if (params == NULL) + return NULL; + + if (params->n_ports_in != params->n_ports_out) + return NULL; + + /* Memory allocation */ + size = RTE_CACHE_LINE_ROUNDUP( + sizeof(struct pipeline_flow_actions)); + p = rte_zmalloc(NULL, size, RTE_CACHE_LINE_SIZE); + if (p == NULL) + return NULL; + p_fa = (struct pipeline_flow_actions *) p; + + strcpy(p->name, params->name); + p->log_level = params->log_level; + + PLOG(p, HIGH, "Flow actions"); + + /* Parse arguments */ + if (pipeline_fa_parse_args(&p_fa->params, params)) + return NULL; + + dscp_init(p_fa); + + /* Pipeline */ + { + struct rte_pipeline_params pipeline_params = { + .name = params->name, + .socket_id = params->socket_id, + .offset_port_id = 0, + }; + + p->p = rte_pipeline_create(&pipeline_params); + if (p->p == NULL) { + rte_free(p); + return NULL; + } + } + + /* Input ports */ + p->n_ports_in = params->n_ports_in; + for (i = 0; i < p->n_ports_in; i++) { + struct rte_pipeline_port_in_params port_params = { + .ops = pipeline_port_in_params_get_ops( + ¶ms->port_in[i]), + .arg_create = pipeline_port_in_params_convert( + ¶ms->port_in[i]), + .f_action = NULL, + .arg_ah = NULL, + .burst_size = params->port_in[i].burst_size, + }; + + int status = rte_pipeline_port_in_create(p->p, + &port_params, + &p->port_in_id[i]); + + if (status) { + rte_pipeline_free(p->p); + rte_free(p); + return NULL; + } + } + + /* Output ports */ + p->n_ports_out = params->n_ports_out; + for (i = 0; i < p->n_ports_out; i++) { + struct rte_pipeline_port_out_params port_params = { + .ops = pipeline_port_out_params_get_ops( + ¶ms->port_out[i]), + .arg_create = pipeline_port_out_params_convert( + ¶ms->port_out[i]), + .f_action = NULL, + .arg_ah = NULL, + }; + + int status = rte_pipeline_port_out_create(p->p, + &port_params, + &p->port_out_id[i]); + + if (status) { + rte_pipeline_free(p->p); + rte_free(p); + return NULL; + } + } + + /* Tables */ + p->n_tables = 1; + { + struct rte_table_array_params table_array_params = { + .n_entries = p_fa->params.n_flows, + .offset = p_fa->params.flow_id_offset, + }; + + struct rte_pipeline_table_params table_params = { + .ops = &rte_table_array_ops, + .arg_create = &table_array_params, + .f_action_hit = get_fa_table_ah_hit(p_fa), + .f_action_miss = NULL, + .arg_ah = p_fa, + .action_data_size = + sizeof(struct flow_table_entry) - + sizeof(struct rte_pipeline_table_entry), + }; + + int status; + + status = rte_pipeline_table_create(p->p, + &table_params, + &p->table_id[0]); + + if (status) { + rte_pipeline_free(p->p); + rte_free(p); + return NULL; + } + } + + /* Connecting input ports to tables */ + for (i = 0; i < p->n_ports_in; i++) { + int status = rte_pipeline_port_in_connect_to_table(p->p, + p->port_in_id[i], + p->table_id[0]); + + if (status) { + rte_pipeline_free(p->p); + rte_free(p); + return NULL; + } + } + + /* Enable input ports */ + for (i = 0; i < p->n_ports_in; i++) { + int status = rte_pipeline_port_in_enable(p->p, + p->port_in_id[i]); + + if (status) { + rte_pipeline_free(p->p); + rte_free(p); + return NULL; + } + } + + /* Initialize table entries */ + for (i = 0; i < p_fa->params.n_flows; i++) { + struct rte_table_array_key key = { + .pos = i, + }; + + struct flow_table_entry entry; + struct rte_pipeline_table_entry *entry_ptr; + int key_found, status; + + flow_table_entry_set_default(p_fa, &entry); + + status = rte_pipeline_table_entry_add(p->p, + p->table_id[0], + &key, + (struct rte_pipeline_table_entry *) &entry, + &key_found, + &entry_ptr); + + if (status) { + rte_pipeline_free(p->p); + rte_free(p); + return NULL; + } + } + + /* Check pipeline consistency */ + if (rte_pipeline_check(p->p) < 0) { + rte_pipeline_free(p->p); + rte_free(p); + return NULL; + } + + /* Message queues */ + p->n_msgq = params->n_msgq; + for (i = 0; i < p->n_msgq; i++) + p->msgq_in[i] = params->msgq_in[i]; + for (i = 0; i < p->n_msgq; i++) + p->msgq_out[i] = params->msgq_out[i]; + + /* Message handlers */ + memcpy(p->handlers, handlers, sizeof(p->handlers)); + memcpy(p_fa->custom_handlers, + custom_handlers, + sizeof(p_fa->custom_handlers)); + + return p; +} + +static int +pipeline_fa_free(void *pipeline) +{ + struct pipeline *p = (struct pipeline *) pipeline; + + /* Check input arguments */ + if (p == NULL) + return -1; + + /* Free resources */ + rte_pipeline_free(p->p); + rte_free(p); + return 0; +} + +static int +pipeline_fa_track(void *pipeline, + __rte_unused uint32_t port_in, + uint32_t *port_out) +{ + struct pipeline *p = (struct pipeline *) pipeline; + + /* Check input arguments */ + if ((p == NULL) || + (port_in >= p->n_ports_in) || + (port_out == NULL)) + return -1; + + if (p->n_ports_in == 1) { + *port_out = 0; + return 0; + } + + return -1; +} + +static int +pipeline_fa_timer(void *pipeline) +{ + struct pipeline *p = (struct pipeline *) pipeline; + + pipeline_msg_req_handle(p); + rte_pipeline_flush(p->p); + + return 0; +} + +void * +pipeline_fa_msg_req_custom_handler(struct pipeline *p, void *msg) +{ + struct pipeline_flow_actions *p_fa = + (struct pipeline_flow_actions *) p; + struct pipeline_custom_msg_req *req = msg; + pipeline_msg_req_handler f_handle; + + f_handle = (req->subtype < PIPELINE_FA_MSG_REQS) ? + p_fa->custom_handlers[req->subtype] : + pipeline_msg_req_invalid_handler; + + if (f_handle == NULL) + f_handle = pipeline_msg_req_invalid_handler; + + return f_handle(p, req); +} + +void * +pipeline_fa_msg_req_flow_config_handler(struct pipeline *p, void *msg) +{ + struct pipeline_flow_actions *p_fa = (struct pipeline_flow_actions *) p; + struct pipeline_fa_flow_config_msg_req *req = msg; + struct pipeline_fa_flow_config_msg_rsp *rsp = msg; + struct flow_table_entry *entry; + uint32_t mask, i; + + /* Set flow table entry to default if not configured before */ + if (req->entry_ptr == NULL) { + struct rte_table_array_key key = { + .pos = req->flow_id % p_fa->params.n_flows, + }; + + struct flow_table_entry default_entry; + + int key_found, status; + + flow_table_entry_set_default(p_fa, &default_entry); + + status = rte_pipeline_table_entry_add(p->p, + p->table_id[0], + &key, + (struct rte_pipeline_table_entry *) &default_entry, + &key_found, + (struct rte_pipeline_table_entry **) &entry); + if (status) { + rsp->status = -1; + return rsp; + } + } else + entry = (struct flow_table_entry *) req->entry_ptr; + + /* Meter */ + for (i = 0, mask = 1; i < PIPELINE_FA_N_TC_MAX; i++, mask <<= 1) { + int status; + + if ((mask & req->meter_update_mask) == 0) + continue; + + status = flow_table_entry_set_meter(entry, i, &req->params); + if (status) { + rsp->status = -1; + return rsp; + } + } + + /* Policer */ + for (i = 0, mask = 1; i < PIPELINE_FA_N_TC_MAX; i++, mask <<= 1) { + if ((mask & req->policer_update_mask) == 0) + continue; + + flow_table_entry_set_policer(entry, i, &req->params); + } + + /* Port */ + if (req->port_update) + flow_table_entry_set_port_id(p_fa, entry, &req->params); + + /* Response */ + rsp->status = 0; + rsp->entry_ptr = (void *) entry; + return rsp; +} + +void * +pipeline_fa_msg_req_flow_config_bulk_handler(struct pipeline *p, void *msg) +{ + struct pipeline_flow_actions *p_fa = (struct pipeline_flow_actions *) p; + struct pipeline_fa_flow_config_bulk_msg_req *req = msg; + struct pipeline_fa_flow_config_bulk_msg_rsp *rsp = msg; + uint32_t i; + + for (i = 0; i < req->n_flows; i++) { + struct flow_table_entry *entry; + uint32_t j, mask; + + /* Set flow table entry to default if not configured before */ + if (req->entry_ptr[i] == NULL) { + struct rte_table_array_key key = { + .pos = req->flow_id[i] % p_fa->params.n_flows, + }; + + struct flow_table_entry entry_to_add; + + int key_found, status; + + flow_table_entry_set_default(p_fa, &entry_to_add); + + status = rte_pipeline_table_entry_add(p->p, + p->table_id[0], + &key, + (struct rte_pipeline_table_entry *) &entry_to_add, + &key_found, + (struct rte_pipeline_table_entry **) &entry); + if (status) { + rsp->n_flows = i; + return rsp; + } + + req->entry_ptr[i] = (void *) entry; + } else + entry = (struct flow_table_entry *) req->entry_ptr[i]; + + /* Meter */ + for (j = 0, mask = 1; + j < PIPELINE_FA_N_TC_MAX; + j++, mask <<= 1) { + int status; + + if ((mask & req->meter_update_mask) == 0) + continue; + + status = flow_table_entry_set_meter(entry, + j, &req->params[i]); + if (status) { + rsp->n_flows = i; + return rsp; + } + } + + /* Policer */ + for (j = 0, mask = 1; + j < PIPELINE_FA_N_TC_MAX; + j++, mask <<= 1) { + if ((mask & req->policer_update_mask) == 0) + continue; + + flow_table_entry_set_policer(entry, + j, &req->params[i]); + } + + /* Port */ + if (req->port_update) + flow_table_entry_set_port_id(p_fa, + entry, &req->params[i]); + } + + /* Response */ + rsp->n_flows = i; + return rsp; +} + +void * +pipeline_fa_msg_req_dscp_config_handler(struct pipeline *p, void *msg) +{ + struct pipeline_flow_actions *p_fa = (struct pipeline_flow_actions *) p; + struct pipeline_fa_dscp_config_msg_req *req = msg; + struct pipeline_fa_dscp_config_msg_rsp *rsp = msg; + + /* Check request */ + if ((req->dscp >= PIPELINE_FA_N_DSCP) || + (req->traffic_class >= PIPELINE_FA_N_TC_MAX) || + (req->color >= e_RTE_METER_COLORS)) { + rsp->status = -1; + return rsp; + } + + p_fa->dscp[req->dscp].traffic_class = req->traffic_class; + p_fa->dscp[req->dscp].color = req->color; + rsp->status = 0; + return rsp; +} + +void * +pipeline_fa_msg_req_policer_stats_read_handler(__rte_unused struct pipeline *p, + void *msg) +{ + struct pipeline_fa_policer_stats_msg_req *req = msg; + struct pipeline_fa_policer_stats_msg_rsp *rsp = msg; + + struct flow_table_entry *entry = req->entry_ptr; + uint32_t policer_id = req->policer_id; + int clear = req->clear; + + /* Check request */ + if ((req->entry_ptr == NULL) || + (req->policer_id >= PIPELINE_FA_N_TC_MAX)) { + rsp->status = -1; + return rsp; + } + + memcpy(&rsp->stats, + &entry->mp[policer_id].stats, + sizeof(rsp->stats)); + if (clear) + memset(&entry->mp[policer_id].stats, + 0, sizeof(entry->mp[policer_id].stats)); + rsp->status = 0; + return rsp; +} + +struct pipeline_be_ops pipeline_flow_actions_be_ops = { + .f_init = pipeline_fa_init, + .f_free = pipeline_fa_free, + .f_run = NULL, + .f_timer = pipeline_fa_timer, + .f_track = pipeline_fa_track, +}; diff --git a/examples/ip_pipeline/pipeline/pipeline_flow_actions_be.h b/examples/ip_pipeline/pipeline/pipeline_flow_actions_be.h new file mode 100644 index 00000000..456f2cca --- /dev/null +++ b/examples/ip_pipeline/pipeline/pipeline_flow_actions_be.h @@ -0,0 +1,168 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_PIPELINE_FLOW_ACTIONS_BE_H__ +#define __INCLUDE_PIPELINE_FLOW_ACTIONS_BE_H__ + +#include <rte_meter.h> + +#include "pipeline_common_be.h" + +#ifndef PIPELINE_FA_N_TC_MAX +#define PIPELINE_FA_N_TC_MAX 4 +#endif + +#define PIPELINE_FA_N_DSCP 64 + +struct pipeline_fa_params { + uint32_t n_flows; + uint32_t n_meters_per_flow; + uint32_t flow_id_offset; + uint32_t ip_hdr_offset; + uint32_t color_offset; + uint32_t dscp_enabled; +}; + +int +pipeline_fa_parse_args(struct pipeline_fa_params *p, + struct pipeline_params *params); + +struct pipeline_fa_policer_action { + uint32_t drop; + enum rte_meter_color color; +}; + +struct pipeline_fa_policer_params { + struct pipeline_fa_policer_action action[e_RTE_METER_COLORS]; +}; + +struct pipeline_fa_flow_params { + struct rte_meter_trtcm_params m[PIPELINE_FA_N_TC_MAX]; + struct pipeline_fa_policer_params p[PIPELINE_FA_N_TC_MAX]; + uint32_t port_id; +}; + +int +pipeline_fa_flow_params_set_default(struct pipeline_fa_flow_params *params); + +struct pipeline_fa_policer_stats { + uint64_t n_pkts[e_RTE_METER_COLORS]; + uint64_t n_pkts_drop; +}; + +enum pipeline_fa_msg_req_type { + PIPELINE_FA_MSG_REQ_FLOW_CONFIG = 0, + PIPELINE_FA_MSG_REQ_FLOW_CONFIG_BULK, + PIPELINE_FA_MSG_REQ_DSCP_CONFIG, + PIPELINE_FA_MSG_REQ_POLICER_STATS_READ, + PIPELINE_FA_MSG_REQS, +}; + +/* + * MSG FLOW CONFIG + */ +struct pipeline_fa_flow_config_msg_req { + enum pipeline_msg_req_type type; + enum pipeline_fa_msg_req_type subtype; + + void *entry_ptr; + uint32_t flow_id; + + uint32_t meter_update_mask; + uint32_t policer_update_mask; + uint32_t port_update; + struct pipeline_fa_flow_params params; +}; + +struct pipeline_fa_flow_config_msg_rsp { + int status; + void *entry_ptr; +}; + +/* + * MSG FLOW CONFIG BULK + */ +struct pipeline_fa_flow_config_bulk_msg_req { + enum pipeline_msg_req_type type; + enum pipeline_fa_msg_req_type subtype; + + void **entry_ptr; + uint32_t *flow_id; + uint32_t n_flows; + + uint32_t meter_update_mask; + uint32_t policer_update_mask; + uint32_t port_update; + struct pipeline_fa_flow_params *params; +}; + +struct pipeline_fa_flow_config_bulk_msg_rsp { + uint32_t n_flows; +}; + +/* + * MSG DSCP CONFIG + */ +struct pipeline_fa_dscp_config_msg_req { + enum pipeline_msg_req_type type; + enum pipeline_fa_msg_req_type subtype; + + uint32_t dscp; + uint32_t traffic_class; + enum rte_meter_color color; +}; + +struct pipeline_fa_dscp_config_msg_rsp { + int status; +}; + +/* + * MSG POLICER STATS READ + */ +struct pipeline_fa_policer_stats_msg_req { + enum pipeline_msg_req_type type; + enum pipeline_fa_msg_req_type subtype; + + void *entry_ptr; + uint32_t policer_id; + int clear; +}; + +struct pipeline_fa_policer_stats_msg_rsp { + int status; + struct pipeline_fa_policer_stats stats; +}; + +extern struct pipeline_be_ops pipeline_flow_actions_be_ops; + +#endif diff --git a/examples/ip_pipeline/pipeline/pipeline_flow_classification.c b/examples/ip_pipeline/pipeline/pipeline_flow_classification.c new file mode 100644 index 00000000..19215748 --- /dev/null +++ b/examples/ip_pipeline/pipeline/pipeline_flow_classification.c @@ -0,0 +1,2215 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdio.h> +#include <string.h> +#include <sys/queue.h> +#include <netinet/in.h> + +#include <rte_common.h> +#include <rte_hexdump.h> +#include <rte_malloc.h> +#include <cmdline_rdline.h> +#include <cmdline_parse.h> +#include <cmdline_parse_num.h> +#include <cmdline_parse_string.h> +#include <cmdline_parse_ipaddr.h> +#include <cmdline_parse_etheraddr.h> + +#include "app.h" +#include "pipeline_common_fe.h" +#include "pipeline_flow_classification.h" +#include "hash_func.h" + +/* + * Key conversion + */ + +struct pkt_key_qinq { + uint16_t ethertype_svlan; + uint16_t svlan; + uint16_t ethertype_cvlan; + uint16_t cvlan; +} __attribute__((__packed__)); + +struct pkt_key_ipv4_5tuple { + uint8_t ttl; + uint8_t proto; + uint16_t checksum; + uint32_t ip_src; + uint32_t ip_dst; + uint16_t port_src; + uint16_t port_dst; +} __attribute__((__packed__)); + +struct pkt_key_ipv6_5tuple { + uint16_t payload_length; + uint8_t proto; + uint8_t hop_limit; + uint8_t ip_src[16]; + uint8_t ip_dst[16]; + uint16_t port_src; + uint16_t port_dst; +} __attribute__((__packed__)); + +static int +app_pipeline_fc_key_convert(struct pipeline_fc_key *key_in, + uint8_t *key_out, + uint32_t *signature) +{ + uint8_t buffer[PIPELINE_FC_FLOW_KEY_MAX_SIZE]; + void *key_buffer = (key_out) ? key_out : buffer; + + switch (key_in->type) { + case FLOW_KEY_QINQ: + { + struct pkt_key_qinq *qinq = key_buffer; + + qinq->ethertype_svlan = 0; + qinq->svlan = rte_bswap16(key_in->key.qinq.svlan); + qinq->ethertype_cvlan = 0; + qinq->cvlan = rte_bswap16(key_in->key.qinq.cvlan); + + if (signature) + *signature = (uint32_t) hash_default_key8(qinq, 8, 0); + return 0; + } + + case FLOW_KEY_IPV4_5TUPLE: + { + struct pkt_key_ipv4_5tuple *ipv4 = key_buffer; + + ipv4->ttl = 0; + ipv4->proto = key_in->key.ipv4_5tuple.proto; + ipv4->checksum = 0; + ipv4->ip_src = rte_bswap32(key_in->key.ipv4_5tuple.ip_src); + ipv4->ip_dst = rte_bswap32(key_in->key.ipv4_5tuple.ip_dst); + ipv4->port_src = rte_bswap16(key_in->key.ipv4_5tuple.port_src); + ipv4->port_dst = rte_bswap16(key_in->key.ipv4_5tuple.port_dst); + + if (signature) + *signature = (uint32_t) hash_default_key16(ipv4, 16, 0); + return 0; + } + + case FLOW_KEY_IPV6_5TUPLE: + { + struct pkt_key_ipv6_5tuple *ipv6 = key_buffer; + + memset(ipv6, 0, 64); + ipv6->payload_length = 0; + ipv6->proto = key_in->key.ipv6_5tuple.proto; + ipv6->hop_limit = 0; + memcpy(&ipv6->ip_src, &key_in->key.ipv6_5tuple.ip_src, 16); + memcpy(&ipv6->ip_dst, &key_in->key.ipv6_5tuple.ip_dst, 16); + ipv6->port_src = rte_bswap16(key_in->key.ipv6_5tuple.port_src); + ipv6->port_dst = rte_bswap16(key_in->key.ipv6_5tuple.port_dst); + + if (signature) + *signature = (uint32_t) hash_default_key64(ipv6, 64, 0); + return 0; + } + + default: + return -1; + } +} + +/* + * Flow classification pipeline + */ + +struct app_pipeline_fc_flow { + struct pipeline_fc_key key; + uint32_t port_id; + uint32_t flow_id; + uint32_t signature; + void *entry_ptr; + + TAILQ_ENTRY(app_pipeline_fc_flow) node; +}; + +#define N_BUCKETS 65536 + +struct app_pipeline_fc { + /* Parameters */ + uint32_t n_ports_in; + uint32_t n_ports_out; + + /* Flows */ + TAILQ_HEAD(, app_pipeline_fc_flow) flows[N_BUCKETS]; + uint32_t n_flows; + + /* Default flow */ + uint32_t default_flow_present; + uint32_t default_flow_port_id; + void *default_flow_entry_ptr; +}; + +static struct app_pipeline_fc_flow * +app_pipeline_fc_flow_find(struct app_pipeline_fc *p, + struct pipeline_fc_key *key) +{ + struct app_pipeline_fc_flow *f; + uint32_t signature, bucket_id; + + app_pipeline_fc_key_convert(key, NULL, &signature); + bucket_id = signature & (N_BUCKETS - 1); + + TAILQ_FOREACH(f, &p->flows[bucket_id], node) + if ((signature == f->signature) && + (memcmp(key, + &f->key, + sizeof(struct pipeline_fc_key)) == 0)) + return f; + + return NULL; +} + +static void* +app_pipeline_fc_init(struct pipeline_params *params, + __rte_unused void *arg) +{ + struct app_pipeline_fc *p; + uint32_t size, i; + + /* Check input arguments */ + if ((params == NULL) || + (params->n_ports_in == 0) || + (params->n_ports_out == 0)) + return NULL; + + /* Memory allocation */ + size = RTE_CACHE_LINE_ROUNDUP(sizeof(struct app_pipeline_fc)); + p = rte_zmalloc(NULL, size, RTE_CACHE_LINE_SIZE); + if (p == NULL) + return NULL; + + /* Initialization */ + p->n_ports_in = params->n_ports_in; + p->n_ports_out = params->n_ports_out; + + for (i = 0; i < N_BUCKETS; i++) + TAILQ_INIT(&p->flows[i]); + p->n_flows = 0; + + return (void *) p; +} + +static int +app_pipeline_fc_free(void *pipeline) +{ + struct app_pipeline_fc *p = pipeline; + uint32_t i; + + /* Check input arguments */ + if (p == NULL) + return -1; + + /* Free resources */ + for (i = 0; i < N_BUCKETS; i++) + while (!TAILQ_EMPTY(&p->flows[i])) { + struct app_pipeline_fc_flow *flow; + + flow = TAILQ_FIRST(&p->flows[i]); + TAILQ_REMOVE(&p->flows[i], flow, node); + rte_free(flow); + } + + rte_free(p); + return 0; +} + +static int +app_pipeline_fc_key_check(struct pipeline_fc_key *key) +{ + switch (key->type) { + case FLOW_KEY_QINQ: + { + uint16_t svlan = key->key.qinq.svlan; + uint16_t cvlan = key->key.qinq.cvlan; + + if ((svlan & 0xF000) || + (cvlan & 0xF000)) + return -1; + + return 0; + } + + case FLOW_KEY_IPV4_5TUPLE: + return 0; + + case FLOW_KEY_IPV6_5TUPLE: + return 0; + + default: + return -1; + } +} + +int +app_pipeline_fc_add(struct app_params *app, + uint32_t pipeline_id, + struct pipeline_fc_key *key, + uint32_t port_id, + uint32_t flow_id) +{ + struct app_pipeline_fc *p; + struct app_pipeline_fc_flow *flow; + + struct pipeline_fc_add_msg_req *req; + struct pipeline_fc_add_msg_rsp *rsp; + + uint32_t signature; + int new_flow; + + /* Check input arguments */ + if ((app == NULL) || + (key == NULL)) + return -1; + + p = app_pipeline_data_fe(app, pipeline_id, &pipeline_flow_classification); + if (p == NULL) + return -1; + + if (port_id >= p->n_ports_out) + return -1; + + if (app_pipeline_fc_key_check(key) != 0) + return -1; + + /* Find existing flow or allocate new flow */ + flow = app_pipeline_fc_flow_find(p, key); + new_flow = (flow == NULL); + if (flow == NULL) { + flow = rte_malloc(NULL, sizeof(*flow), RTE_CACHE_LINE_SIZE); + + if (flow == NULL) + return -1; + } + + /* Allocate and write request */ + req = app_msg_alloc(app); + if (req == NULL) + return -1; + + req->type = PIPELINE_MSG_REQ_CUSTOM; + req->subtype = PIPELINE_FC_MSG_REQ_FLOW_ADD; + app_pipeline_fc_key_convert(key, req->key, &signature); + req->port_id = port_id; + req->flow_id = flow_id; + + /* Send request and wait for response */ + rsp = app_msg_send_recv(app, pipeline_id, req, MSG_TIMEOUT_DEFAULT); + if (rsp == NULL) { + if (new_flow) + rte_free(flow); + return -1; + } + + /* Read response and write flow */ + if (rsp->status || + (rsp->entry_ptr == NULL) || + ((new_flow == 0) && (rsp->key_found == 0)) || + ((new_flow == 1) && (rsp->key_found == 1))) { + app_msg_free(app, rsp); + if (new_flow) + rte_free(flow); + return -1; + } + + memset(&flow->key, 0, sizeof(flow->key)); + memcpy(&flow->key, key, sizeof(flow->key)); + flow->port_id = port_id; + flow->flow_id = flow_id; + flow->signature = signature; + flow->entry_ptr = rsp->entry_ptr; + + /* Commit rule */ + if (new_flow) { + uint32_t bucket_id = signature & (N_BUCKETS - 1); + + TAILQ_INSERT_TAIL(&p->flows[bucket_id], flow, node); + p->n_flows++; + } + + /* Free response */ + app_msg_free(app, rsp); + + return 0; +} + +int +app_pipeline_fc_add_bulk(struct app_params *app, + uint32_t pipeline_id, + struct pipeline_fc_key *key, + uint32_t *port_id, + uint32_t *flow_id, + uint32_t n_keys) +{ + struct app_pipeline_fc *p; + struct pipeline_fc_add_bulk_msg_req *req; + struct pipeline_fc_add_bulk_msg_rsp *rsp; + + struct app_pipeline_fc_flow **flow; + uint32_t *signature; + int *new_flow; + struct pipeline_fc_add_bulk_flow_req *flow_req; + struct pipeline_fc_add_bulk_flow_rsp *flow_rsp; + + uint32_t i; + int status; + + /* Check input arguments */ + if ((app == NULL) || + (key == NULL) || + (port_id == NULL) || + (flow_id == NULL) || + (n_keys == 0)) + return -1; + + p = app_pipeline_data_fe(app, pipeline_id, &pipeline_flow_classification); + if (p == NULL) + return -1; + + for (i = 0; i < n_keys; i++) + if (port_id[i] >= p->n_ports_out) + return -1; + + for (i = 0; i < n_keys; i++) + if (app_pipeline_fc_key_check(&key[i]) != 0) + return -1; + + /* Memory allocation */ + flow = rte_malloc(NULL, + n_keys * sizeof(struct app_pipeline_fc_flow *), + RTE_CACHE_LINE_SIZE); + if (flow == NULL) + return -1; + + signature = rte_malloc(NULL, + n_keys * sizeof(uint32_t), + RTE_CACHE_LINE_SIZE); + if (signature == NULL) { + rte_free(flow); + return -1; + } + + new_flow = rte_malloc( + NULL, + n_keys * sizeof(int), + RTE_CACHE_LINE_SIZE); + if (new_flow == NULL) { + rte_free(signature); + rte_free(flow); + return -1; + } + + flow_req = rte_malloc(NULL, + n_keys * sizeof(struct pipeline_fc_add_bulk_flow_req), + RTE_CACHE_LINE_SIZE); + if (flow_req == NULL) { + rte_free(new_flow); + rte_free(signature); + rte_free(flow); + return -1; + } + + flow_rsp = rte_malloc(NULL, + n_keys * sizeof(struct pipeline_fc_add_bulk_flow_rsp), + RTE_CACHE_LINE_SIZE); + if (flow_rsp == NULL) { + rte_free(flow_req); + rte_free(new_flow); + rte_free(signature); + rte_free(flow); + return -1; + } + + /* Find existing flow or allocate new flow */ + for (i = 0; i < n_keys; i++) { + flow[i] = app_pipeline_fc_flow_find(p, &key[i]); + new_flow[i] = (flow[i] == NULL); + if (flow[i] == NULL) { + flow[i] = rte_zmalloc(NULL, + sizeof(struct app_pipeline_fc_flow), + RTE_CACHE_LINE_SIZE); + + if (flow[i] == NULL) { + uint32_t j; + + for (j = 0; j < i; j++) + if (new_flow[j]) + rte_free(flow[j]); + + rte_free(flow_rsp); + rte_free(flow_req); + rte_free(new_flow); + rte_free(signature); + rte_free(flow); + return -1; + } + } + } + + /* Allocate and write request */ + req = app_msg_alloc(app); + if (req == NULL) { + for (i = 0; i < n_keys; i++) + if (new_flow[i]) + rte_free(flow[i]); + + rte_free(flow_rsp); + rte_free(flow_req); + rte_free(new_flow); + rte_free(signature); + rte_free(flow); + return -1; + } + + for (i = 0; i < n_keys; i++) { + app_pipeline_fc_key_convert(&key[i], + flow_req[i].key, + &signature[i]); + flow_req[i].port_id = port_id[i]; + flow_req[i].flow_id = flow_id[i]; + } + + req->type = PIPELINE_MSG_REQ_CUSTOM; + req->subtype = PIPELINE_FC_MSG_REQ_FLOW_ADD_BULK; + req->req = flow_req; + req->rsp = flow_rsp; + req->n_keys = n_keys; + + /* Send request and wait for response */ + rsp = app_msg_send_recv(app, pipeline_id, req, 10000); + if (rsp == NULL) { + for (i = 0; i < n_keys; i++) + if (new_flow[i]) + rte_free(flow[i]); + + rte_free(flow_rsp); + rte_free(flow_req); + rte_free(new_flow); + rte_free(signature); + rte_free(flow); + return -1; + } + + /* Read response */ + status = 0; + + for (i = 0; i < rsp->n_keys; i++) + if ((flow_rsp[i].entry_ptr == NULL) || + ((new_flow[i] == 0) && (flow_rsp[i].key_found == 0)) || + ((new_flow[i] == 1) && (flow_rsp[i].key_found == 1))) + status = -1; + + if (rsp->n_keys < n_keys) + status = -1; + + /* Commit flows */ + for (i = 0; i < rsp->n_keys; i++) { + memcpy(&flow[i]->key, &key[i], sizeof(flow[i]->key)); + flow[i]->port_id = port_id[i]; + flow[i]->flow_id = flow_id[i]; + flow[i]->signature = signature[i]; + flow[i]->entry_ptr = flow_rsp[i].entry_ptr; + + if (new_flow[i]) { + uint32_t bucket_id = signature[i] & (N_BUCKETS - 1); + + TAILQ_INSERT_TAIL(&p->flows[bucket_id], flow[i], node); + p->n_flows++; + } + } + + /* Free resources */ + app_msg_free(app, rsp); + + for (i = rsp->n_keys; i < n_keys; i++) + if (new_flow[i]) + rte_free(flow[i]); + + rte_free(flow_rsp); + rte_free(flow_req); + rte_free(new_flow); + rte_free(signature); + rte_free(flow); + + return status; +} + +int +app_pipeline_fc_del(struct app_params *app, + uint32_t pipeline_id, + struct pipeline_fc_key *key) +{ + struct app_pipeline_fc *p; + struct app_pipeline_fc_flow *flow; + + struct pipeline_fc_del_msg_req *req; + struct pipeline_fc_del_msg_rsp *rsp; + + uint32_t signature, bucket_id; + + /* Check input arguments */ + if ((app == NULL) || + (key == NULL)) + return -1; + + p = app_pipeline_data_fe(app, pipeline_id, &pipeline_flow_classification); + if (p == NULL) + return -1; + + if (app_pipeline_fc_key_check(key) != 0) + return -1; + + /* Find rule */ + flow = app_pipeline_fc_flow_find(p, key); + if (flow == NULL) + return 0; + + /* Allocate and write request */ + req = app_msg_alloc(app); + if (req == NULL) + return -1; + + req->type = PIPELINE_MSG_REQ_CUSTOM; + req->subtype = PIPELINE_FC_MSG_REQ_FLOW_DEL; + app_pipeline_fc_key_convert(key, req->key, &signature); + + /* Send request and wait for response */ + rsp = app_msg_send_recv(app, pipeline_id, req, MSG_TIMEOUT_DEFAULT); + if (rsp == NULL) + return -1; + + /* Read response */ + if (rsp->status || !rsp->key_found) { + app_msg_free(app, rsp); + return -1; + } + + /* Remove rule */ + bucket_id = signature & (N_BUCKETS - 1); + TAILQ_REMOVE(&p->flows[bucket_id], flow, node); + p->n_flows--; + rte_free(flow); + + /* Free response */ + app_msg_free(app, rsp); + + return 0; +} + +int +app_pipeline_fc_add_default(struct app_params *app, + uint32_t pipeline_id, + uint32_t port_id) +{ + struct app_pipeline_fc *p; + + struct pipeline_fc_add_default_msg_req *req; + struct pipeline_fc_add_default_msg_rsp *rsp; + + /* Check input arguments */ + if (app == NULL) + return -1; + + p = app_pipeline_data_fe(app, pipeline_id, &pipeline_flow_classification); + if (p == NULL) + return -1; + + if (port_id >= p->n_ports_out) + return -1; + + /* Allocate and write request */ + req = app_msg_alloc(app); + if (req == NULL) + return -1; + + req->type = PIPELINE_MSG_REQ_CUSTOM; + req->subtype = PIPELINE_FC_MSG_REQ_FLOW_ADD_DEFAULT; + req->port_id = port_id; + + /* Send request and wait for response */ + rsp = app_msg_send_recv(app, pipeline_id, req, MSG_TIMEOUT_DEFAULT); + if (rsp == NULL) + return -1; + + /* Read response and write flow */ + if (rsp->status || (rsp->entry_ptr == NULL)) { + app_msg_free(app, rsp); + return -1; + } + + p->default_flow_port_id = port_id; + p->default_flow_entry_ptr = rsp->entry_ptr; + + /* Commit route */ + p->default_flow_present = 1; + + /* Free response */ + app_msg_free(app, rsp); + + return 0; +} + +int +app_pipeline_fc_del_default(struct app_params *app, + uint32_t pipeline_id) +{ + struct app_pipeline_fc *p; + + struct pipeline_fc_del_default_msg_req *req; + struct pipeline_fc_del_default_msg_rsp *rsp; + + /* Check input arguments */ + if (app == NULL) + return -1; + + p = app_pipeline_data_fe(app, pipeline_id, &pipeline_flow_classification); + if (p == NULL) + return -EINVAL; + + /* Allocate and write request */ + req = app_msg_alloc(app); + if (req == NULL) + return -1; + + req->type = PIPELINE_MSG_REQ_CUSTOM; + req->subtype = PIPELINE_FC_MSG_REQ_FLOW_DEL_DEFAULT; + + /* Send request and wait for response */ + rsp = app_msg_send_recv(app, pipeline_id, req, MSG_TIMEOUT_DEFAULT); + if (rsp == NULL) + return -1; + + /* Read response */ + if (rsp->status) { + app_msg_free(app, rsp); + return -1; + } + + /* Commit route */ + p->default_flow_present = 0; + + /* Free response */ + app_msg_free(app, rsp); + + return 0; +} + +/* + * Flow ls + */ + +static void +print_fc_qinq_flow(struct app_pipeline_fc_flow *flow) +{ + printf("(SVLAN = %" PRIu32 ", " + "CVLAN = %" PRIu32 ") => " + "Port = %" PRIu32 ", " + "Flow ID = %" PRIu32 ", " + "(signature = 0x%08" PRIx32 ", " + "entry_ptr = %p)\n", + + flow->key.key.qinq.svlan, + flow->key.key.qinq.cvlan, + flow->port_id, + flow->flow_id, + flow->signature, + flow->entry_ptr); +} + +static void +print_fc_ipv4_5tuple_flow(struct app_pipeline_fc_flow *flow) +{ + printf("(SA = %" PRIu32 ".%" PRIu32 ".%" PRIu32 ".%" PRIu32 ", " + "DA = %" PRIu32 ".%" PRIu32 ".%" PRIu32 ".%" PRIu32 ", " + "SP = %" PRIu32 ", " + "DP = %" PRIu32 ", " + "Proto = %" PRIu32 ") => " + "Port = %" PRIu32 ", " + "Flow ID = %" PRIu32 " " + "(signature = 0x%08" PRIx32 ", " + "entry_ptr = %p)\n", + + (flow->key.key.ipv4_5tuple.ip_src >> 24) & 0xFF, + (flow->key.key.ipv4_5tuple.ip_src >> 16) & 0xFF, + (flow->key.key.ipv4_5tuple.ip_src >> 8) & 0xFF, + flow->key.key.ipv4_5tuple.ip_src & 0xFF, + + (flow->key.key.ipv4_5tuple.ip_dst >> 24) & 0xFF, + (flow->key.key.ipv4_5tuple.ip_dst >> 16) & 0xFF, + (flow->key.key.ipv4_5tuple.ip_dst >> 8) & 0xFF, + flow->key.key.ipv4_5tuple.ip_dst & 0xFF, + + flow->key.key.ipv4_5tuple.port_src, + flow->key.key.ipv4_5tuple.port_dst, + + flow->key.key.ipv4_5tuple.proto, + + flow->port_id, + flow->flow_id, + flow->signature, + flow->entry_ptr); +} + +static void +print_fc_ipv6_5tuple_flow(struct app_pipeline_fc_flow *flow) { + printf("(SA = %02" PRIx32 "%02" PRIx32 ":%02" PRIx32 "%02" PRIx32 + ":%02" PRIx32 "%02" PRIx32 ":%02" PRIx32 "%02" PRIx32 + ":%02" PRIx32 "%02" PRIx32 ":%02" PRIx32 "%02" PRIx32 + ":%02" PRIx32 "%02" PRIx32 ":%02" PRIx32 "%02" PRIx32 ", " + "DA = %02" PRIx32 "%02" PRIx32 ":%02" PRIx32 "%02" PRIx32 + ":%02" PRIx32 "%02" PRIx32 ":%02" PRIx32 "%02" PRIx32 + ":%02" PRIx32 "%02" PRIx32 ":%02" PRIx32 "%02" PRIx32 + ":%02" PRIx32 "%02" PRIx32 ":%02" PRIx32 "%02" PRIx32 ", " + "SP = %" PRIu32 ", " + "DP = %" PRIu32 " " + "Proto = %" PRIu32 " " + "=> Port = %" PRIu32 ", " + "Flow ID = %" PRIu32 " " + "(signature = 0x%08" PRIx32 ", " + "entry_ptr = %p)\n", + + flow->key.key.ipv6_5tuple.ip_src[0], + flow->key.key.ipv6_5tuple.ip_src[1], + flow->key.key.ipv6_5tuple.ip_src[2], + flow->key.key.ipv6_5tuple.ip_src[3], + flow->key.key.ipv6_5tuple.ip_src[4], + flow->key.key.ipv6_5tuple.ip_src[5], + flow->key.key.ipv6_5tuple.ip_src[6], + flow->key.key.ipv6_5tuple.ip_src[7], + flow->key.key.ipv6_5tuple.ip_src[8], + flow->key.key.ipv6_5tuple.ip_src[9], + flow->key.key.ipv6_5tuple.ip_src[10], + flow->key.key.ipv6_5tuple.ip_src[11], + flow->key.key.ipv6_5tuple.ip_src[12], + flow->key.key.ipv6_5tuple.ip_src[13], + flow->key.key.ipv6_5tuple.ip_src[14], + flow->key.key.ipv6_5tuple.ip_src[15], + + flow->key.key.ipv6_5tuple.ip_dst[0], + flow->key.key.ipv6_5tuple.ip_dst[1], + flow->key.key.ipv6_5tuple.ip_dst[2], + flow->key.key.ipv6_5tuple.ip_dst[3], + flow->key.key.ipv6_5tuple.ip_dst[4], + flow->key.key.ipv6_5tuple.ip_dst[5], + flow->key.key.ipv6_5tuple.ip_dst[6], + flow->key.key.ipv6_5tuple.ip_dst[7], + flow->key.key.ipv6_5tuple.ip_dst[8], + flow->key.key.ipv6_5tuple.ip_dst[9], + flow->key.key.ipv6_5tuple.ip_dst[10], + flow->key.key.ipv6_5tuple.ip_dst[11], + flow->key.key.ipv6_5tuple.ip_dst[12], + flow->key.key.ipv6_5tuple.ip_dst[13], + flow->key.key.ipv6_5tuple.ip_dst[14], + flow->key.key.ipv6_5tuple.ip_dst[15], + + flow->key.key.ipv6_5tuple.port_src, + flow->key.key.ipv6_5tuple.port_dst, + + flow->key.key.ipv6_5tuple.proto, + + flow->port_id, + flow->flow_id, + flow->signature, + flow->entry_ptr); +} + +static void +print_fc_flow(struct app_pipeline_fc_flow *flow) +{ + switch (flow->key.type) { + case FLOW_KEY_QINQ: + print_fc_qinq_flow(flow); + break; + + case FLOW_KEY_IPV4_5TUPLE: + print_fc_ipv4_5tuple_flow(flow); + break; + + case FLOW_KEY_IPV6_5TUPLE: + print_fc_ipv6_5tuple_flow(flow); + break; + } +} + +static int +app_pipeline_fc_ls(struct app_params *app, + uint32_t pipeline_id) +{ + struct app_pipeline_fc *p; + struct app_pipeline_fc_flow *flow; + uint32_t i; + + /* Check input arguments */ + if (app == NULL) + return -1; + + p = app_pipeline_data_fe(app, pipeline_id, &pipeline_flow_classification); + if (p == NULL) + return -1; + + for (i = 0; i < N_BUCKETS; i++) + TAILQ_FOREACH(flow, &p->flows[i], node) + print_fc_flow(flow); + + if (p->default_flow_present) + printf("Default flow: port %" PRIu32 " (entry ptr = %p)\n", + p->default_flow_port_id, + p->default_flow_entry_ptr); + else + printf("Default: DROP\n"); + + return 0; +} + +/* + * flow add qinq + */ + +struct cmd_fc_add_qinq_result { + cmdline_fixed_string_t p_string; + uint32_t pipeline_id; + cmdline_fixed_string_t flow_string; + cmdline_fixed_string_t add_string; + cmdline_fixed_string_t qinq_string; + uint16_t svlan; + uint16_t cvlan; + cmdline_fixed_string_t port_string; + uint32_t port; + cmdline_fixed_string_t flowid_string; + uint32_t flow_id; +}; + +static void +cmd_fc_add_qinq_parsed( + void *parsed_result, + __rte_unused struct cmdline *cl, + void *data) +{ + struct cmd_fc_add_qinq_result *params = parsed_result; + struct app_params *app = data; + struct pipeline_fc_key key; + int status; + + memset(&key, 0, sizeof(key)); + key.type = FLOW_KEY_QINQ; + key.key.qinq.svlan = params->svlan; + key.key.qinq.cvlan = params->cvlan; + + status = app_pipeline_fc_add(app, + params->pipeline_id, + &key, + params->port, + params->flow_id); + if (status != 0) + printf("Command failed\n"); +} + +cmdline_parse_token_string_t cmd_fc_add_qinq_p_string = + TOKEN_STRING_INITIALIZER(struct cmd_fc_add_qinq_result, p_string, "p"); + +cmdline_parse_token_num_t cmd_fc_add_qinq_pipeline_id = + TOKEN_NUM_INITIALIZER(struct cmd_fc_add_qinq_result, pipeline_id, + UINT32); + +cmdline_parse_token_string_t cmd_fc_add_qinq_flow_string = + TOKEN_STRING_INITIALIZER(struct cmd_fc_add_qinq_result, flow_string, + "flow"); + +cmdline_parse_token_string_t cmd_fc_add_qinq_add_string = + TOKEN_STRING_INITIALIZER(struct cmd_fc_add_qinq_result, add_string, + "add"); + +cmdline_parse_token_string_t cmd_fc_add_qinq_qinq_string = + TOKEN_STRING_INITIALIZER(struct cmd_fc_add_qinq_result, qinq_string, + "qinq"); + +cmdline_parse_token_num_t cmd_fc_add_qinq_svlan = + TOKEN_NUM_INITIALIZER(struct cmd_fc_add_qinq_result, svlan, UINT16); + +cmdline_parse_token_num_t cmd_fc_add_qinq_cvlan = + TOKEN_NUM_INITIALIZER(struct cmd_fc_add_qinq_result, cvlan, UINT16); + +cmdline_parse_token_string_t cmd_fc_add_qinq_port_string = + TOKEN_STRING_INITIALIZER(struct cmd_fc_add_qinq_result, port_string, + "port"); + +cmdline_parse_token_num_t cmd_fc_add_qinq_port = + TOKEN_NUM_INITIALIZER(struct cmd_fc_add_qinq_result, port, UINT32); + +cmdline_parse_token_string_t cmd_fc_add_qinq_flowid_string = + TOKEN_STRING_INITIALIZER(struct cmd_fc_add_qinq_result, flowid_string, + "flowid"); + +cmdline_parse_token_num_t cmd_fc_add_qinq_flow_id = + TOKEN_NUM_INITIALIZER(struct cmd_fc_add_qinq_result, flow_id, UINT32); + +cmdline_parse_inst_t cmd_fc_add_qinq = { + .f = cmd_fc_add_qinq_parsed, + .data = NULL, + .help_str = "Flow add (Q-in-Q)", + .tokens = { + (void *) &cmd_fc_add_qinq_p_string, + (void *) &cmd_fc_add_qinq_pipeline_id, + (void *) &cmd_fc_add_qinq_flow_string, + (void *) &cmd_fc_add_qinq_add_string, + (void *) &cmd_fc_add_qinq_qinq_string, + (void *) &cmd_fc_add_qinq_svlan, + (void *) &cmd_fc_add_qinq_cvlan, + (void *) &cmd_fc_add_qinq_port_string, + (void *) &cmd_fc_add_qinq_port, + (void *) &cmd_fc_add_qinq_flowid_string, + (void *) &cmd_fc_add_qinq_flow_id, + NULL, + }, +}; + +/* + * flow add qinq all + */ + +struct cmd_fc_add_qinq_all_result { + cmdline_fixed_string_t p_string; + uint32_t pipeline_id; + cmdline_fixed_string_t flow_string; + cmdline_fixed_string_t add_string; + cmdline_fixed_string_t qinq_string; + cmdline_fixed_string_t all_string; + uint32_t n_flows; + uint32_t n_ports; +}; + +#ifndef N_FLOWS_BULK +#define N_FLOWS_BULK 4096 +#endif + +static void +cmd_fc_add_qinq_all_parsed( + void *parsed_result, + __rte_unused struct cmdline *cl, + void *data) +{ + struct cmd_fc_add_qinq_all_result *params = parsed_result; + struct app_params *app = data; + struct pipeline_fc_key *key; + uint32_t *port_id; + uint32_t *flow_id; + uint32_t id; + + /* Check input arguments */ + if (params->n_flows == 0) { + printf("Invalid number of flows\n"); + return; + } + + if (params->n_ports == 0) { + printf("Invalid number of output ports\n"); + return; + } + + /* Memory allocation */ + key = rte_zmalloc(NULL, + N_FLOWS_BULK * sizeof(*key), + RTE_CACHE_LINE_SIZE); + if (key == NULL) { + printf("Memory allocation failed\n"); + return; + } + + port_id = rte_malloc(NULL, + N_FLOWS_BULK * sizeof(*port_id), + RTE_CACHE_LINE_SIZE); + if (port_id == NULL) { + rte_free(key); + printf("Memory allocation failed\n"); + return; + } + + flow_id = rte_malloc(NULL, + N_FLOWS_BULK * sizeof(*flow_id), + RTE_CACHE_LINE_SIZE); + if (flow_id == NULL) { + rte_free(port_id); + rte_free(key); + printf("Memory allocation failed\n"); + return; + } + + /* Flow add */ + for (id = 0; id < params->n_flows; id++) { + uint32_t pos = id & (N_FLOWS_BULK - 1); + + key[pos].type = FLOW_KEY_QINQ; + key[pos].key.qinq.svlan = id >> 12; + key[pos].key.qinq.cvlan = id & 0xFFF; + + port_id[pos] = id % params->n_ports; + flow_id[pos] = id; + + if ((pos == N_FLOWS_BULK - 1) || + (id == params->n_flows - 1)) { + int status; + + status = app_pipeline_fc_add_bulk(app, + params->pipeline_id, + key, + port_id, + flow_id, + pos + 1); + + if (status != 0) { + printf("Command failed\n"); + + break; + } + } + } + + /* Memory free */ + rte_free(flow_id); + rte_free(port_id); + rte_free(key); +} + +cmdline_parse_token_string_t cmd_fc_add_qinq_all_p_string = + TOKEN_STRING_INITIALIZER(struct cmd_fc_add_qinq_all_result, p_string, + "p"); + +cmdline_parse_token_num_t cmd_fc_add_qinq_all_pipeline_id = + TOKEN_NUM_INITIALIZER(struct cmd_fc_add_qinq_all_result, pipeline_id, + UINT32); + +cmdline_parse_token_string_t cmd_fc_add_qinq_all_flow_string = + TOKEN_STRING_INITIALIZER(struct cmd_fc_add_qinq_all_result, flow_string, + "flow"); + +cmdline_parse_token_string_t cmd_fc_add_qinq_all_add_string = + TOKEN_STRING_INITIALIZER(struct cmd_fc_add_qinq_all_result, add_string, + "add"); + +cmdline_parse_token_string_t cmd_fc_add_qinq_all_qinq_string = + TOKEN_STRING_INITIALIZER(struct cmd_fc_add_qinq_all_result, qinq_string, + "qinq"); + +cmdline_parse_token_string_t cmd_fc_add_qinq_all_all_string = + TOKEN_STRING_INITIALIZER(struct cmd_fc_add_qinq_all_result, all_string, + "all"); + +cmdline_parse_token_num_t cmd_fc_add_qinq_all_n_flows = + TOKEN_NUM_INITIALIZER(struct cmd_fc_add_qinq_all_result, n_flows, + UINT32); + +cmdline_parse_token_num_t cmd_fc_add_qinq_all_n_ports = + TOKEN_NUM_INITIALIZER(struct cmd_fc_add_qinq_all_result, n_ports, + UINT32); + +cmdline_parse_inst_t cmd_fc_add_qinq_all = { + .f = cmd_fc_add_qinq_all_parsed, + .data = NULL, + .help_str = "Flow add all (Q-in-Q)", + .tokens = { + (void *) &cmd_fc_add_qinq_all_p_string, + (void *) &cmd_fc_add_qinq_all_pipeline_id, + (void *) &cmd_fc_add_qinq_all_flow_string, + (void *) &cmd_fc_add_qinq_all_add_string, + (void *) &cmd_fc_add_qinq_all_qinq_string, + (void *) &cmd_fc_add_qinq_all_all_string, + (void *) &cmd_fc_add_qinq_all_n_flows, + (void *) &cmd_fc_add_qinq_all_n_ports, + NULL, + }, +}; + +/* + * flow add ipv4_5tuple + */ + +struct cmd_fc_add_ipv4_5tuple_result { + cmdline_fixed_string_t p_string; + uint32_t pipeline_id; + cmdline_fixed_string_t flow_string; + cmdline_fixed_string_t add_string; + cmdline_fixed_string_t ipv4_5tuple_string; + cmdline_ipaddr_t ip_src; + cmdline_ipaddr_t ip_dst; + uint16_t port_src; + uint16_t port_dst; + uint32_t proto; + cmdline_fixed_string_t port_string; + uint32_t port; + cmdline_fixed_string_t flowid_string; + uint32_t flow_id; +}; + +static void +cmd_fc_add_ipv4_5tuple_parsed( + void *parsed_result, + __rte_unused struct cmdline *cl, + void *data) +{ + struct cmd_fc_add_ipv4_5tuple_result *params = parsed_result; + struct app_params *app = data; + struct pipeline_fc_key key; + int status; + + memset(&key, 0, sizeof(key)); + key.type = FLOW_KEY_IPV4_5TUPLE; + key.key.ipv4_5tuple.ip_src = rte_bswap32( + params->ip_src.addr.ipv4.s_addr); + key.key.ipv4_5tuple.ip_dst = rte_bswap32( + params->ip_dst.addr.ipv4.s_addr); + key.key.ipv4_5tuple.port_src = params->port_src; + key.key.ipv4_5tuple.port_dst = params->port_dst; + key.key.ipv4_5tuple.proto = params->proto; + + status = app_pipeline_fc_add(app, + params->pipeline_id, + &key, + params->port, + params->flow_id); + if (status != 0) + printf("Command failed\n"); +} + +cmdline_parse_token_string_t cmd_fc_add_ipv4_5tuple_p_string = + TOKEN_STRING_INITIALIZER(struct cmd_fc_add_ipv4_5tuple_result, p_string, + "p"); + +cmdline_parse_token_num_t cmd_fc_add_ipv4_5tuple_pipeline_id = + TOKEN_NUM_INITIALIZER(struct cmd_fc_add_ipv4_5tuple_result, pipeline_id, + UINT32); + +cmdline_parse_token_string_t cmd_fc_add_ipv4_5tuple_flow_string = + TOKEN_STRING_INITIALIZER(struct cmd_fc_add_ipv4_5tuple_result, + flow_string, "flow"); + +cmdline_parse_token_string_t cmd_fc_add_ipv4_5tuple_add_string = + TOKEN_STRING_INITIALIZER(struct cmd_fc_add_ipv4_5tuple_result, + add_string, "add"); + +cmdline_parse_token_string_t cmd_fc_add_ipv4_5tuple_ipv4_5tuple_string = + TOKEN_STRING_INITIALIZER(struct cmd_fc_add_ipv4_5tuple_result, + ipv4_5tuple_string, "ipv4_5tuple"); + +cmdline_parse_token_ipaddr_t cmd_fc_add_ipv4_5tuple_ip_src = + TOKEN_IPV4_INITIALIZER(struct cmd_fc_add_ipv4_5tuple_result, ip_src); + +cmdline_parse_token_ipaddr_t cmd_fc_add_ipv4_5tuple_ip_dst = + TOKEN_IPV4_INITIALIZER(struct cmd_fc_add_ipv4_5tuple_result, ip_dst); + +cmdline_parse_token_num_t cmd_fc_add_ipv4_5tuple_port_src = + TOKEN_NUM_INITIALIZER(struct cmd_fc_add_ipv4_5tuple_result, port_src, + UINT16); + +cmdline_parse_token_num_t cmd_fc_add_ipv4_5tuple_port_dst = + TOKEN_NUM_INITIALIZER(struct cmd_fc_add_ipv4_5tuple_result, port_dst, + UINT16); + +cmdline_parse_token_num_t cmd_fc_add_ipv4_5tuple_proto = + TOKEN_NUM_INITIALIZER(struct cmd_fc_add_ipv4_5tuple_result, proto, + UINT32); + +cmdline_parse_token_string_t cmd_fc_add_ipv4_5tuple_port_string = + TOKEN_STRING_INITIALIZER(struct cmd_fc_add_ipv4_5tuple_result, port_string, + "port"); + +cmdline_parse_token_num_t cmd_fc_add_ipv4_5tuple_port = + TOKEN_NUM_INITIALIZER(struct cmd_fc_add_ipv4_5tuple_result, port, + UINT32); + +cmdline_parse_token_string_t cmd_fc_add_ipv4_5tuple_flowid_string = + TOKEN_STRING_INITIALIZER(struct cmd_fc_add_ipv4_5tuple_result, + flowid_string, "flowid"); + +cmdline_parse_token_num_t cmd_fc_add_ipv4_5tuple_flow_id = + TOKEN_NUM_INITIALIZER(struct cmd_fc_add_ipv4_5tuple_result, flow_id, + UINT32); + +cmdline_parse_inst_t cmd_fc_add_ipv4_5tuple = { + .f = cmd_fc_add_ipv4_5tuple_parsed, + .data = NULL, + .help_str = "Flow add (IPv4 5-tuple)", + .tokens = { + (void *) &cmd_fc_add_ipv4_5tuple_p_string, + (void *) &cmd_fc_add_ipv4_5tuple_pipeline_id, + (void *) &cmd_fc_add_ipv4_5tuple_flow_string, + (void *) &cmd_fc_add_ipv4_5tuple_add_string, + (void *) &cmd_fc_add_ipv4_5tuple_ipv4_5tuple_string, + (void *) &cmd_fc_add_ipv4_5tuple_ip_src, + (void *) &cmd_fc_add_ipv4_5tuple_ip_dst, + (void *) &cmd_fc_add_ipv4_5tuple_port_src, + (void *) &cmd_fc_add_ipv4_5tuple_port_dst, + (void *) &cmd_fc_add_ipv4_5tuple_proto, + (void *) &cmd_fc_add_ipv4_5tuple_port_string, + (void *) &cmd_fc_add_ipv4_5tuple_port, + (void *) &cmd_fc_add_ipv4_5tuple_flowid_string, + (void *) &cmd_fc_add_ipv4_5tuple_flow_id, + NULL, + }, +}; + +/* + * flow add ipv4_5tuple all + */ + +struct cmd_fc_add_ipv4_5tuple_all_result { + cmdline_fixed_string_t p_string; + uint32_t pipeline_id; + cmdline_fixed_string_t flow_string; + cmdline_fixed_string_t add_string; + cmdline_fixed_string_t ipv4_5tuple_string; + cmdline_fixed_string_t all_string; + uint32_t n_flows; + uint32_t n_ports; +}; + +static void +cmd_fc_add_ipv4_5tuple_all_parsed( + void *parsed_result, + __rte_unused struct cmdline *cl, + void *data) +{ + struct cmd_fc_add_ipv4_5tuple_all_result *params = parsed_result; + struct app_params *app = data; + struct pipeline_fc_key *key; + uint32_t *port_id; + uint32_t *flow_id; + uint32_t id; + + /* Check input parameters */ + if (params->n_flows == 0) { + printf("Invalid number of flows\n"); + return; + } + + if (params->n_ports == 0) { + printf("Invalid number of ports\n"); + return; + } + + /* Memory allocation */ + key = rte_zmalloc(NULL, + N_FLOWS_BULK * sizeof(*key), + RTE_CACHE_LINE_SIZE); + if (key == NULL) { + printf("Memory allocation failed\n"); + return; + } + + port_id = rte_malloc(NULL, + N_FLOWS_BULK * sizeof(*port_id), + RTE_CACHE_LINE_SIZE); + if (port_id == NULL) { + rte_free(key); + printf("Memory allocation failed\n"); + return; + } + + flow_id = rte_malloc(NULL, + N_FLOWS_BULK * sizeof(*flow_id), + RTE_CACHE_LINE_SIZE); + if (flow_id == NULL) { + rte_free(port_id); + rte_free(key); + printf("Memory allocation failed\n"); + return; + } + + /* Flow add */ + for (id = 0; id < params->n_flows; id++) { + uint32_t pos = id & (N_FLOWS_BULK - 1); + + key[pos].type = FLOW_KEY_IPV4_5TUPLE; + key[pos].key.ipv4_5tuple.ip_src = 0; + key[pos].key.ipv4_5tuple.ip_dst = id; + key[pos].key.ipv4_5tuple.port_src = 0; + key[pos].key.ipv4_5tuple.port_dst = 0; + key[pos].key.ipv4_5tuple.proto = 6; + + port_id[pos] = id % params->n_ports; + flow_id[pos] = id; + + if ((pos == N_FLOWS_BULK - 1) || + (id == params->n_flows - 1)) { + int status; + + status = app_pipeline_fc_add_bulk(app, + params->pipeline_id, + key, + port_id, + flow_id, + pos + 1); + + if (status != 0) { + printf("Command failed\n"); + + break; + } + } + } + + /* Memory free */ + rte_free(flow_id); + rte_free(port_id); + rte_free(key); +} + +cmdline_parse_token_string_t cmd_fc_add_ipv4_5tuple_all_p_string = + TOKEN_STRING_INITIALIZER(struct cmd_fc_add_ipv4_5tuple_all_result, + p_string, "p"); + +cmdline_parse_token_num_t cmd_fc_add_ipv4_5tuple_all_pipeline_id = + TOKEN_NUM_INITIALIZER(struct cmd_fc_add_ipv4_5tuple_all_result, + pipeline_id, UINT32); + +cmdline_parse_token_string_t cmd_fc_add_ipv4_5tuple_all_flow_string = + TOKEN_STRING_INITIALIZER(struct cmd_fc_add_ipv4_5tuple_all_result, + flow_string, "flow"); + +cmdline_parse_token_string_t cmd_fc_add_ipv4_5tuple_all_add_string = + TOKEN_STRING_INITIALIZER(struct cmd_fc_add_ipv4_5tuple_all_result, + add_string, "add"); + +cmdline_parse_token_string_t cmd_fc_add_ipv4_5tuple_all_ipv4_5tuple_string = + TOKEN_STRING_INITIALIZER(struct cmd_fc_add_ipv4_5tuple_all_result, + ipv4_5tuple_string, "ipv4_5tuple"); + +cmdline_parse_token_string_t cmd_fc_add_ipv4_5tuple_all_all_string = + TOKEN_STRING_INITIALIZER(struct cmd_fc_add_ipv4_5tuple_all_result, + all_string, "all"); + +cmdline_parse_token_num_t cmd_fc_add_ipv4_5tuple_all_n_flows = + TOKEN_NUM_INITIALIZER(struct cmd_fc_add_ipv4_5tuple_all_result, + n_flows, UINT32); + +cmdline_parse_token_num_t cmd_fc_add_ipv4_5tuple_all_n_ports = + TOKEN_NUM_INITIALIZER(struct cmd_fc_add_ipv4_5tuple_all_result, + n_ports, UINT32); + +cmdline_parse_inst_t cmd_fc_add_ipv4_5tuple_all = { + .f = cmd_fc_add_ipv4_5tuple_all_parsed, + .data = NULL, + .help_str = "Flow add all (IPv4 5-tuple)", + .tokens = { + (void *) &cmd_fc_add_ipv4_5tuple_all_p_string, + (void *) &cmd_fc_add_ipv4_5tuple_all_pipeline_id, + (void *) &cmd_fc_add_ipv4_5tuple_all_flow_string, + (void *) &cmd_fc_add_ipv4_5tuple_all_add_string, + (void *) &cmd_fc_add_ipv4_5tuple_all_ipv4_5tuple_string, + (void *) &cmd_fc_add_ipv4_5tuple_all_all_string, + (void *) &cmd_fc_add_ipv4_5tuple_all_n_flows, + (void *) &cmd_fc_add_ipv4_5tuple_all_n_ports, + NULL, + }, +}; + +/* + * flow add ipv6_5tuple + */ + +struct cmd_fc_add_ipv6_5tuple_result { + cmdline_fixed_string_t p_string; + uint32_t pipeline_id; + cmdline_fixed_string_t flow_string; + cmdline_fixed_string_t add_string; + cmdline_fixed_string_t ipv6_5tuple_string; + cmdline_ipaddr_t ip_src; + cmdline_ipaddr_t ip_dst; + uint16_t port_src; + uint16_t port_dst; + uint32_t proto; + cmdline_fixed_string_t port_string; + uint32_t port; + cmdline_fixed_string_t flowid_string; + uint32_t flow_id; +}; + +static void +cmd_fc_add_ipv6_5tuple_parsed( + void *parsed_result, + __rte_unused struct cmdline *cl, + void *data) +{ + struct cmd_fc_add_ipv6_5tuple_result *params = parsed_result; + struct app_params *app = data; + struct pipeline_fc_key key; + int status; + + memset(&key, 0, sizeof(key)); + key.type = FLOW_KEY_IPV6_5TUPLE; + memcpy(key.key.ipv6_5tuple.ip_src, + params->ip_src.addr.ipv6.s6_addr, + 16); + memcpy(key.key.ipv6_5tuple.ip_dst, + params->ip_dst.addr.ipv6.s6_addr, + 16); + key.key.ipv6_5tuple.port_src = params->port_src; + key.key.ipv6_5tuple.port_dst = params->port_dst; + key.key.ipv6_5tuple.proto = params->proto; + + status = app_pipeline_fc_add(app, + params->pipeline_id, + &key, + params->port, + params->flow_id); + if (status != 0) + printf("Command failed\n"); +} + +cmdline_parse_token_string_t cmd_fc_add_ipv6_5tuple_p_string = + TOKEN_STRING_INITIALIZER(struct cmd_fc_add_ipv6_5tuple_result, + p_string, "p"); + +cmdline_parse_token_num_t cmd_fc_add_ipv6_5tuple_pipeline_id = + TOKEN_NUM_INITIALIZER(struct cmd_fc_add_ipv6_5tuple_result, pipeline_id, + UINT32); + +cmdline_parse_token_string_t cmd_fc_add_ipv6_5tuple_flow_string = + TOKEN_STRING_INITIALIZER(struct cmd_fc_add_ipv6_5tuple_result, + flow_string, "flow"); + +cmdline_parse_token_string_t cmd_fc_add_ipv6_5tuple_add_string = + TOKEN_STRING_INITIALIZER(struct cmd_fc_add_ipv6_5tuple_result, + add_string, "add"); + +cmdline_parse_token_string_t cmd_fc_add_ipv6_5tuple_ipv6_5tuple_string = + TOKEN_STRING_INITIALIZER(struct cmd_fc_add_ipv6_5tuple_result, + ipv6_5tuple_string, "ipv6_5tuple"); + +cmdline_parse_token_ipaddr_t cmd_fc_add_ipv6_5tuple_ip_src = + TOKEN_IPV6_INITIALIZER(struct cmd_fc_add_ipv6_5tuple_result, ip_src); + +cmdline_parse_token_ipaddr_t cmd_fc_add_ipv6_5tuple_ip_dst = + TOKEN_IPV6_INITIALIZER(struct cmd_fc_add_ipv6_5tuple_result, ip_dst); + +cmdline_parse_token_num_t cmd_fc_add_ipv6_5tuple_port_src = + TOKEN_NUM_INITIALIZER(struct cmd_fc_add_ipv6_5tuple_result, port_src, + UINT16); + +cmdline_parse_token_num_t cmd_fc_add_ipv6_5tuple_port_dst = + TOKEN_NUM_INITIALIZER(struct cmd_fc_add_ipv6_5tuple_result, port_dst, + UINT16); + +cmdline_parse_token_num_t cmd_fc_add_ipv6_5tuple_proto = + TOKEN_NUM_INITIALIZER(struct cmd_fc_add_ipv6_5tuple_result, proto, + UINT32); + +cmdline_parse_token_string_t cmd_fc_add_ipv6_5tuple_port_string = + TOKEN_STRING_INITIALIZER(struct cmd_fc_add_ipv6_5tuple_result, + port_string, "port"); + +cmdline_parse_token_num_t cmd_fc_add_ipv6_5tuple_port = + TOKEN_NUM_INITIALIZER(struct cmd_fc_add_ipv6_5tuple_result, port, + UINT32); + +cmdline_parse_token_string_t cmd_fc_add_ipv6_5tuple_flowid_string = + TOKEN_STRING_INITIALIZER(struct cmd_fc_add_ipv6_5tuple_result, + flowid_string, "flowid"); + +cmdline_parse_token_num_t cmd_fc_add_ipv6_5tuple_flow_id = + TOKEN_NUM_INITIALIZER(struct cmd_fc_add_ipv6_5tuple_result, flow_id, + UINT32); + +cmdline_parse_inst_t cmd_fc_add_ipv6_5tuple = { + .f = cmd_fc_add_ipv6_5tuple_parsed, + .data = NULL, + .help_str = "Flow add (IPv6 5-tuple)", + .tokens = { + (void *) &cmd_fc_add_ipv6_5tuple_p_string, + (void *) &cmd_fc_add_ipv6_5tuple_pipeline_id, + (void *) &cmd_fc_add_ipv6_5tuple_flow_string, + (void *) &cmd_fc_add_ipv6_5tuple_add_string, + (void *) &cmd_fc_add_ipv6_5tuple_ipv6_5tuple_string, + (void *) &cmd_fc_add_ipv6_5tuple_ip_src, + (void *) &cmd_fc_add_ipv6_5tuple_ip_dst, + (void *) &cmd_fc_add_ipv6_5tuple_port_src, + (void *) &cmd_fc_add_ipv6_5tuple_port_dst, + (void *) &cmd_fc_add_ipv6_5tuple_proto, + (void *) &cmd_fc_add_ipv6_5tuple_port_string, + (void *) &cmd_fc_add_ipv6_5tuple_port, + (void *) &cmd_fc_add_ipv6_5tuple_flowid_string, + (void *) &cmd_fc_add_ipv6_5tuple_flow_id, + NULL, + }, +}; + +/* + * flow add ipv6_5tuple all + */ + +struct cmd_fc_add_ipv6_5tuple_all_result { + cmdline_fixed_string_t p_string; + uint32_t pipeline_id; + cmdline_fixed_string_t flow_string; + cmdline_fixed_string_t add_string; + cmdline_fixed_string_t ipv6_5tuple_string; + cmdline_fixed_string_t all_string; + uint32_t n_flows; + uint32_t n_ports; +}; + +static void +cmd_fc_add_ipv6_5tuple_all_parsed( + void *parsed_result, + __rte_unused struct cmdline *cl, + void *data) +{ + struct cmd_fc_add_ipv6_5tuple_all_result *params = parsed_result; + struct app_params *app = data; + struct pipeline_fc_key *key; + uint32_t *port_id; + uint32_t *flow_id; + uint32_t id; + + /* Check input parameters */ + if (params->n_flows == 0) { + printf("Invalid number of flows\n"); + return; + } + + if (params->n_ports == 0) { + printf("Invalid number of ports\n"); + return; + } + + /* Memory allocation */ + key = rte_zmalloc(NULL, + N_FLOWS_BULK * sizeof(*key), + RTE_CACHE_LINE_SIZE); + if (key == NULL) { + printf("Memory allocation failed\n"); + return; + } + + port_id = rte_malloc(NULL, + N_FLOWS_BULK * sizeof(*port_id), + RTE_CACHE_LINE_SIZE); + if (port_id == NULL) { + rte_free(key); + printf("Memory allocation failed\n"); + return; + } + + flow_id = rte_malloc(NULL, + N_FLOWS_BULK * sizeof(*flow_id), + RTE_CACHE_LINE_SIZE); + if (flow_id == NULL) { + rte_free(port_id); + rte_free(key); + printf("Memory allocation failed\n"); + return; + } + + /* Flow add */ + for (id = 0; id < params->n_flows; id++) { + uint32_t pos = id & (N_FLOWS_BULK - 1); + uint32_t *x; + + key[pos].type = FLOW_KEY_IPV6_5TUPLE; + x = (uint32_t *) key[pos].key.ipv6_5tuple.ip_dst; + *x = rte_bswap32(id); + key[pos].key.ipv6_5tuple.proto = 6; + + port_id[pos] = id % params->n_ports; + flow_id[pos] = id; + + if ((pos == N_FLOWS_BULK - 1) || + (id == params->n_flows - 1)) { + int status; + + status = app_pipeline_fc_add_bulk(app, + params->pipeline_id, + key, + port_id, + flow_id, + pos + 1); + + if (status != 0) { + printf("Command failed\n"); + + break; + } + } + } + + /* Memory free */ + rte_free(flow_id); + rte_free(port_id); + rte_free(key); +} + +cmdline_parse_token_string_t cmd_fc_add_ipv6_5tuple_all_p_string = + TOKEN_STRING_INITIALIZER(struct cmd_fc_add_ipv6_5tuple_all_result, + p_string, "p"); + +cmdline_parse_token_num_t cmd_fc_add_ipv6_5tuple_all_pipeline_id = + TOKEN_NUM_INITIALIZER(struct cmd_fc_add_ipv6_5tuple_all_result, + pipeline_id, UINT32); + +cmdline_parse_token_string_t cmd_fc_add_ipv6_5tuple_all_flow_string = + TOKEN_STRING_INITIALIZER(struct cmd_fc_add_ipv6_5tuple_all_result, + flow_string, "flow"); + +cmdline_parse_token_string_t cmd_fc_add_ipv6_5tuple_all_add_string = + TOKEN_STRING_INITIALIZER(struct cmd_fc_add_ipv6_5tuple_all_result, + add_string, "add"); + +cmdline_parse_token_string_t cmd_fc_add_ipv6_5tuple_all_ipv6_5tuple_string = + TOKEN_STRING_INITIALIZER(struct cmd_fc_add_ipv6_5tuple_all_result, + ipv6_5tuple_string, "ipv6_5tuple"); + +cmdline_parse_token_string_t cmd_fc_add_ipv6_5tuple_all_all_string = + TOKEN_STRING_INITIALIZER(struct cmd_fc_add_ipv6_5tuple_all_result, + all_string, "all"); + +cmdline_parse_token_num_t cmd_fc_add_ipv6_5tuple_all_n_flows = + TOKEN_NUM_INITIALIZER(struct cmd_fc_add_ipv6_5tuple_all_result, + n_flows, UINT32); + +cmdline_parse_token_num_t cmd_fc_add_ipv6_5tuple_all_n_ports = + TOKEN_NUM_INITIALIZER(struct cmd_fc_add_ipv6_5tuple_all_result, + n_ports, UINT32); + +cmdline_parse_inst_t cmd_fc_add_ipv6_5tuple_all = { + .f = cmd_fc_add_ipv6_5tuple_all_parsed, + .data = NULL, + .help_str = "Flow add all (ipv6 5-tuple)", + .tokens = { + (void *) &cmd_fc_add_ipv6_5tuple_all_p_string, + (void *) &cmd_fc_add_ipv6_5tuple_all_pipeline_id, + (void *) &cmd_fc_add_ipv6_5tuple_all_flow_string, + (void *) &cmd_fc_add_ipv6_5tuple_all_add_string, + (void *) &cmd_fc_add_ipv6_5tuple_all_ipv6_5tuple_string, + (void *) &cmd_fc_add_ipv6_5tuple_all_all_string, + (void *) &cmd_fc_add_ipv6_5tuple_all_n_flows, + (void *) &cmd_fc_add_ipv6_5tuple_all_n_ports, + NULL, + }, +}; + +/* + * flow del qinq + */ +struct cmd_fc_del_qinq_result { + cmdline_fixed_string_t p_string; + uint32_t pipeline_id; + cmdline_fixed_string_t flow_string; + cmdline_fixed_string_t del_string; + cmdline_fixed_string_t qinq_string; + uint16_t svlan; + uint16_t cvlan; +}; + +static void +cmd_fc_del_qinq_parsed( + void *parsed_result, + __rte_unused struct cmdline *cl, + void *data) +{ + struct cmd_fc_del_qinq_result *params = parsed_result; + struct app_params *app = data; + struct pipeline_fc_key key; + int status; + + memset(&key, 0, sizeof(key)); + key.type = FLOW_KEY_QINQ; + key.key.qinq.svlan = params->svlan; + key.key.qinq.cvlan = params->cvlan; + status = app_pipeline_fc_del(app, params->pipeline_id, &key); + + if (status != 0) + printf("Command failed\n"); +} + +cmdline_parse_token_string_t cmd_fc_del_qinq_p_string = + TOKEN_STRING_INITIALIZER(struct cmd_fc_del_qinq_result, p_string, "p"); + +cmdline_parse_token_num_t cmd_fc_del_qinq_pipeline_id = + TOKEN_NUM_INITIALIZER(struct cmd_fc_del_qinq_result, pipeline_id, + UINT32); + +cmdline_parse_token_string_t cmd_fc_del_qinq_flow_string = + TOKEN_STRING_INITIALIZER(struct cmd_fc_del_qinq_result, flow_string, + "flow"); + +cmdline_parse_token_string_t cmd_fc_del_qinq_del_string = + TOKEN_STRING_INITIALIZER(struct cmd_fc_del_qinq_result, del_string, + "del"); + +cmdline_parse_token_string_t cmd_fc_del_qinq_qinq_string = + TOKEN_STRING_INITIALIZER(struct cmd_fc_del_qinq_result, qinq_string, + "qinq"); + +cmdline_parse_token_num_t cmd_fc_del_qinq_svlan = + TOKEN_NUM_INITIALIZER(struct cmd_fc_del_qinq_result, svlan, UINT16); + +cmdline_parse_token_num_t cmd_fc_del_qinq_cvlan = + TOKEN_NUM_INITIALIZER(struct cmd_fc_del_qinq_result, cvlan, UINT16); + +cmdline_parse_inst_t cmd_fc_del_qinq = { + .f = cmd_fc_del_qinq_parsed, + .data = NULL, + .help_str = "Flow delete (Q-in-Q)", + .tokens = { + (void *) &cmd_fc_del_qinq_p_string, + (void *) &cmd_fc_del_qinq_pipeline_id, + (void *) &cmd_fc_del_qinq_flow_string, + (void *) &cmd_fc_del_qinq_del_string, + (void *) &cmd_fc_del_qinq_qinq_string, + (void *) &cmd_fc_del_qinq_svlan, + (void *) &cmd_fc_del_qinq_cvlan, + NULL, + }, +}; + +/* + * flow del ipv4_5tuple + */ + +struct cmd_fc_del_ipv4_5tuple_result { + cmdline_fixed_string_t p_string; + uint32_t pipeline_id; + cmdline_fixed_string_t flow_string; + cmdline_fixed_string_t del_string; + cmdline_fixed_string_t ipv4_5tuple_string; + cmdline_ipaddr_t ip_src; + cmdline_ipaddr_t ip_dst; + uint16_t port_src; + uint16_t port_dst; + uint32_t proto; +}; + +static void +cmd_fc_del_ipv4_5tuple_parsed( + void *parsed_result, + __rte_unused struct cmdline *cl, + void *data) +{ + struct cmd_fc_del_ipv4_5tuple_result *params = parsed_result; + struct app_params *app = data; + struct pipeline_fc_key key; + int status; + + memset(&key, 0, sizeof(key)); + key.type = FLOW_KEY_IPV4_5TUPLE; + key.key.ipv4_5tuple.ip_src = rte_bswap32( + params->ip_src.addr.ipv4.s_addr); + key.key.ipv4_5tuple.ip_dst = rte_bswap32( + params->ip_dst.addr.ipv4.s_addr); + key.key.ipv4_5tuple.port_src = params->port_src; + key.key.ipv4_5tuple.port_dst = params->port_dst; + key.key.ipv4_5tuple.proto = params->proto; + + status = app_pipeline_fc_del(app, params->pipeline_id, &key); + if (status != 0) + printf("Command failed\n"); +} + +cmdline_parse_token_string_t cmd_fc_del_ipv4_5tuple_p_string = + TOKEN_STRING_INITIALIZER(struct cmd_fc_del_ipv4_5tuple_result, + p_string, "p"); + +cmdline_parse_token_num_t cmd_fc_del_ipv4_5tuple_pipeline_id = + TOKEN_NUM_INITIALIZER(struct cmd_fc_del_ipv4_5tuple_result, + pipeline_id, UINT32); + +cmdline_parse_token_string_t cmd_fc_del_ipv4_5tuple_flow_string = + TOKEN_STRING_INITIALIZER(struct cmd_fc_del_ipv4_5tuple_result, + flow_string, "flow"); + +cmdline_parse_token_string_t cmd_fc_del_ipv4_5tuple_del_string = + TOKEN_STRING_INITIALIZER(struct cmd_fc_del_ipv4_5tuple_result, + del_string, "del"); + +cmdline_parse_token_string_t cmd_fc_del_ipv4_5tuple_ipv4_5tuple_string = + TOKEN_STRING_INITIALIZER(struct cmd_fc_del_ipv4_5tuple_result, + ipv4_5tuple_string, "ipv4_5tuple"); + +cmdline_parse_token_ipaddr_t cmd_fc_del_ipv4_5tuple_ip_src = + TOKEN_IPV4_INITIALIZER(struct cmd_fc_del_ipv4_5tuple_result, + ip_src); + +cmdline_parse_token_ipaddr_t cmd_fc_del_ipv4_5tuple_ip_dst = + TOKEN_IPV4_INITIALIZER(struct cmd_fc_del_ipv4_5tuple_result, ip_dst); + +cmdline_parse_token_num_t cmd_fc_del_ipv4_5tuple_port_src = + TOKEN_NUM_INITIALIZER(struct cmd_fc_del_ipv4_5tuple_result, + port_src, UINT16); + +cmdline_parse_token_num_t cmd_fc_del_ipv4_5tuple_port_dst = + TOKEN_NUM_INITIALIZER(struct cmd_fc_del_ipv4_5tuple_result, + port_dst, UINT16); + +cmdline_parse_token_num_t cmd_fc_del_ipv4_5tuple_proto = + TOKEN_NUM_INITIALIZER(struct cmd_fc_del_ipv4_5tuple_result, + proto, UINT32); + +cmdline_parse_inst_t cmd_fc_del_ipv4_5tuple = { + .f = cmd_fc_del_ipv4_5tuple_parsed, + .data = NULL, + .help_str = "Flow delete (IPv4 5-tuple)", + .tokens = { + (void *) &cmd_fc_del_ipv4_5tuple_p_string, + (void *) &cmd_fc_del_ipv4_5tuple_pipeline_id, + (void *) &cmd_fc_del_ipv4_5tuple_flow_string, + (void *) &cmd_fc_del_ipv4_5tuple_del_string, + (void *) &cmd_fc_del_ipv4_5tuple_ipv4_5tuple_string, + (void *) &cmd_fc_del_ipv4_5tuple_ip_src, + (void *) &cmd_fc_del_ipv4_5tuple_ip_dst, + (void *) &cmd_fc_del_ipv4_5tuple_port_src, + (void *) &cmd_fc_del_ipv4_5tuple_port_dst, + (void *) &cmd_fc_del_ipv4_5tuple_proto, + NULL, + }, +}; + +/* + * flow del ipv6_5tuple + */ + +struct cmd_fc_del_ipv6_5tuple_result { + cmdline_fixed_string_t p_string; + uint32_t pipeline_id; + cmdline_fixed_string_t flow_string; + cmdline_fixed_string_t del_string; + cmdline_fixed_string_t ipv6_5tuple_string; + cmdline_ipaddr_t ip_src; + cmdline_ipaddr_t ip_dst; + uint16_t port_src; + uint16_t port_dst; + uint32_t proto; +}; + +static void +cmd_fc_del_ipv6_5tuple_parsed( + void *parsed_result, + __rte_unused struct cmdline *cl, + void *data) +{ + struct cmd_fc_del_ipv6_5tuple_result *params = parsed_result; + struct app_params *app = data; + struct pipeline_fc_key key; + int status; + + memset(&key, 0, sizeof(key)); + key.type = FLOW_KEY_IPV6_5TUPLE; + memcpy(key.key.ipv6_5tuple.ip_src, + params->ip_src.addr.ipv6.s6_addr, + 16); + memcpy(key.key.ipv6_5tuple.ip_dst, + params->ip_dst.addr.ipv6.s6_addr, + 16); + key.key.ipv6_5tuple.port_src = params->port_src; + key.key.ipv6_5tuple.port_dst = params->port_dst; + key.key.ipv6_5tuple.proto = params->proto; + + status = app_pipeline_fc_del(app, params->pipeline_id, &key); + if (status != 0) + printf("Command failed\n"); +} + +cmdline_parse_token_string_t cmd_fc_del_ipv6_5tuple_p_string = + TOKEN_STRING_INITIALIZER(struct cmd_fc_del_ipv6_5tuple_result, + p_string, "p"); + +cmdline_parse_token_num_t cmd_fc_del_ipv6_5tuple_pipeline_id = + TOKEN_NUM_INITIALIZER(struct cmd_fc_del_ipv6_5tuple_result, + pipeline_id, UINT32); + +cmdline_parse_token_string_t cmd_fc_del_ipv6_5tuple_flow_string = + TOKEN_STRING_INITIALIZER(struct cmd_fc_del_ipv6_5tuple_result, + flow_string, "flow"); + +cmdline_parse_token_string_t cmd_fc_del_ipv6_5tuple_del_string = + TOKEN_STRING_INITIALIZER(struct cmd_fc_del_ipv6_5tuple_result, + del_string, "del"); + +cmdline_parse_token_string_t cmd_fc_del_ipv6_5tuple_ipv6_5tuple_string = + TOKEN_STRING_INITIALIZER(struct cmd_fc_del_ipv6_5tuple_result, + ipv6_5tuple_string, "ipv6_5tuple"); + +cmdline_parse_token_ipaddr_t cmd_fc_del_ipv6_5tuple_ip_src = + TOKEN_IPV6_INITIALIZER(struct cmd_fc_del_ipv6_5tuple_result, ip_src); + +cmdline_parse_token_ipaddr_t cmd_fc_del_ipv6_5tuple_ip_dst = + TOKEN_IPV6_INITIALIZER(struct cmd_fc_del_ipv6_5tuple_result, ip_dst); + +cmdline_parse_token_num_t cmd_fc_del_ipv6_5tuple_port_src = + TOKEN_NUM_INITIALIZER(struct cmd_fc_del_ipv6_5tuple_result, port_src, + UINT16); + +cmdline_parse_token_num_t cmd_fc_del_ipv6_5tuple_port_dst = + TOKEN_NUM_INITIALIZER(struct cmd_fc_del_ipv6_5tuple_result, port_dst, + UINT16); + +cmdline_parse_token_num_t cmd_fc_del_ipv6_5tuple_proto = + TOKEN_NUM_INITIALIZER(struct cmd_fc_del_ipv6_5tuple_result, proto, + UINT32); + +cmdline_parse_inst_t cmd_fc_del_ipv6_5tuple = { + .f = cmd_fc_del_ipv6_5tuple_parsed, + .data = NULL, + .help_str = "Flow delete (IPv6 5-tuple)", + .tokens = { + (void *) &cmd_fc_del_ipv6_5tuple_p_string, + (void *) &cmd_fc_del_ipv6_5tuple_pipeline_id, + (void *) &cmd_fc_del_ipv6_5tuple_flow_string, + (void *) &cmd_fc_del_ipv6_5tuple_del_string, + (void *) &cmd_fc_del_ipv6_5tuple_ipv6_5tuple_string, + (void *) &cmd_fc_del_ipv6_5tuple_ip_src, + (void *) &cmd_fc_del_ipv6_5tuple_ip_dst, + (void *) &cmd_fc_del_ipv6_5tuple_port_src, + (void *) &cmd_fc_del_ipv6_5tuple_port_dst, + (void *) &cmd_fc_del_ipv6_5tuple_proto, + NULL, + }, +}; + +/* + * flow add default + */ + +struct cmd_fc_add_default_result { + cmdline_fixed_string_t p_string; + uint32_t pipeline_id; + cmdline_fixed_string_t flow_string; + cmdline_fixed_string_t add_string; + cmdline_fixed_string_t default_string; + uint32_t port; +}; + +static void +cmd_fc_add_default_parsed( + void *parsed_result, + __rte_unused struct cmdline *cl, + void *data) +{ + struct cmd_fc_add_default_result *params = parsed_result; + struct app_params *app = data; + int status; + + status = app_pipeline_fc_add_default(app, params->pipeline_id, + params->port); + + if (status != 0) + printf("Command failed\n"); +} + +cmdline_parse_token_string_t cmd_fc_add_default_p_string = + TOKEN_STRING_INITIALIZER(struct cmd_fc_add_default_result, p_string, + "p"); + +cmdline_parse_token_num_t cmd_fc_add_default_pipeline_id = + TOKEN_NUM_INITIALIZER(struct cmd_fc_add_default_result, pipeline_id, + UINT32); + +cmdline_parse_token_string_t cmd_fc_add_default_flow_string = + TOKEN_STRING_INITIALIZER(struct cmd_fc_add_default_result, flow_string, + "flow"); + +cmdline_parse_token_string_t cmd_fc_add_default_add_string = + TOKEN_STRING_INITIALIZER(struct cmd_fc_add_default_result, add_string, + "add"); + +cmdline_parse_token_string_t cmd_fc_add_default_default_string = + TOKEN_STRING_INITIALIZER(struct cmd_fc_add_default_result, + default_string, "default"); + +cmdline_parse_token_num_t cmd_fc_add_default_port = + TOKEN_NUM_INITIALIZER(struct cmd_fc_add_default_result, port, UINT32); + +cmdline_parse_inst_t cmd_fc_add_default = { + .f = cmd_fc_add_default_parsed, + .data = NULL, + .help_str = "Flow add default", + .tokens = { + (void *) &cmd_fc_add_default_p_string, + (void *) &cmd_fc_add_default_pipeline_id, + (void *) &cmd_fc_add_default_flow_string, + (void *) &cmd_fc_add_default_add_string, + (void *) &cmd_fc_add_default_default_string, + (void *) &cmd_fc_add_default_port, + NULL, + }, +}; + +/* + * flow del default + */ + +struct cmd_fc_del_default_result { + cmdline_fixed_string_t p_string; + uint32_t pipeline_id; + cmdline_fixed_string_t flow_string; + cmdline_fixed_string_t del_string; + cmdline_fixed_string_t default_string; +}; + +static void +cmd_fc_del_default_parsed( + void *parsed_result, + __rte_unused struct cmdline *cl, + void *data) +{ + struct cmd_fc_del_default_result *params = parsed_result; + struct app_params *app = data; + int status; + + status = app_pipeline_fc_del_default(app, params->pipeline_id); + if (status != 0) + printf("Command failed\n"); +} + +cmdline_parse_token_string_t cmd_fc_del_default_p_string = + TOKEN_STRING_INITIALIZER(struct cmd_fc_del_default_result, p_string, + "p"); + +cmdline_parse_token_num_t cmd_fc_del_default_pipeline_id = + TOKEN_NUM_INITIALIZER(struct cmd_fc_del_default_result, pipeline_id, + UINT32); + +cmdline_parse_token_string_t cmd_fc_del_default_flow_string = + TOKEN_STRING_INITIALIZER(struct cmd_fc_del_default_result, flow_string, + "flow"); + +cmdline_parse_token_string_t cmd_fc_del_default_del_string = + TOKEN_STRING_INITIALIZER(struct cmd_fc_del_default_result, del_string, + "del"); + +cmdline_parse_token_string_t cmd_fc_del_default_default_string = + TOKEN_STRING_INITIALIZER(struct cmd_fc_del_default_result, + default_string, "default"); + +cmdline_parse_inst_t cmd_fc_del_default = { + .f = cmd_fc_del_default_parsed, + .data = NULL, + .help_str = "Flow delete default", + .tokens = { + (void *) &cmd_fc_del_default_p_string, + (void *) &cmd_fc_del_default_pipeline_id, + (void *) &cmd_fc_del_default_flow_string, + (void *) &cmd_fc_del_default_del_string, + (void *) &cmd_fc_del_default_default_string, + NULL, + }, +}; + +/* + * flow ls + */ + +struct cmd_fc_ls_result { + cmdline_fixed_string_t p_string; + uint32_t pipeline_id; + cmdline_fixed_string_t flow_string; + cmdline_fixed_string_t ls_string; +}; + +static void +cmd_fc_ls_parsed( + void *parsed_result, + __attribute__((unused)) struct cmdline *cl, + void *data) +{ + struct cmd_fc_ls_result *params = parsed_result; + struct app_params *app = data; + int status; + + status = app_pipeline_fc_ls(app, params->pipeline_id); + if (status != 0) + printf("Command failed\n"); +} + +cmdline_parse_token_string_t cmd_fc_ls_p_string = + TOKEN_STRING_INITIALIZER(struct cmd_fc_ls_result, p_string, "p"); + +cmdline_parse_token_num_t cmd_fc_ls_pipeline_id = + TOKEN_NUM_INITIALIZER(struct cmd_fc_ls_result, pipeline_id, UINT32); + +cmdline_parse_token_string_t cmd_fc_ls_flow_string = + TOKEN_STRING_INITIALIZER(struct cmd_fc_ls_result, + flow_string, "flow"); + +cmdline_parse_token_string_t cmd_fc_ls_ls_string = + TOKEN_STRING_INITIALIZER(struct cmd_fc_ls_result, ls_string, + "ls"); + +cmdline_parse_inst_t cmd_fc_ls = { + .f = cmd_fc_ls_parsed, + .data = NULL, + .help_str = "Flow list", + .tokens = { + (void *) &cmd_fc_ls_p_string, + (void *) &cmd_fc_ls_pipeline_id, + (void *) &cmd_fc_ls_flow_string, + (void *) &cmd_fc_ls_ls_string, + NULL, + }, +}; + +static cmdline_parse_ctx_t pipeline_cmds[] = { + (cmdline_parse_inst_t *) &cmd_fc_add_qinq, + (cmdline_parse_inst_t *) &cmd_fc_add_ipv4_5tuple, + (cmdline_parse_inst_t *) &cmd_fc_add_ipv6_5tuple, + + (cmdline_parse_inst_t *) &cmd_fc_del_qinq, + (cmdline_parse_inst_t *) &cmd_fc_del_ipv4_5tuple, + (cmdline_parse_inst_t *) &cmd_fc_del_ipv6_5tuple, + + (cmdline_parse_inst_t *) &cmd_fc_add_default, + (cmdline_parse_inst_t *) &cmd_fc_del_default, + + (cmdline_parse_inst_t *) &cmd_fc_add_qinq_all, + (cmdline_parse_inst_t *) &cmd_fc_add_ipv4_5tuple_all, + (cmdline_parse_inst_t *) &cmd_fc_add_ipv6_5tuple_all, + + (cmdline_parse_inst_t *) &cmd_fc_ls, + NULL, +}; + +static struct pipeline_fe_ops pipeline_flow_classification_fe_ops = { + .f_init = app_pipeline_fc_init, + .f_free = app_pipeline_fc_free, + .cmds = pipeline_cmds, +}; + +struct pipeline_type pipeline_flow_classification = { + .name = "FLOW_CLASSIFICATION", + .be_ops = &pipeline_flow_classification_be_ops, + .fe_ops = &pipeline_flow_classification_fe_ops, +}; diff --git a/examples/ip_pipeline/pipeline/pipeline_flow_classification.h b/examples/ip_pipeline/pipeline/pipeline_flow_classification.h new file mode 100644 index 00000000..9c775006 --- /dev/null +++ b/examples/ip_pipeline/pipeline/pipeline_flow_classification.h @@ -0,0 +1,107 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_PIPELINE_FLOW_CLASSIFICATION_H__ +#define __INCLUDE_PIPELINE_FLOW_CLASSIFICATION_H__ + +#include "pipeline.h" +#include "pipeline_flow_classification_be.h" + +enum flow_key_type { + FLOW_KEY_QINQ, + FLOW_KEY_IPV4_5TUPLE, + FLOW_KEY_IPV6_5TUPLE, +}; + +struct flow_key_qinq { + uint16_t svlan; + uint16_t cvlan; +}; + +struct flow_key_ipv4_5tuple { + uint32_t ip_src; + uint32_t ip_dst; + uint16_t port_src; + uint16_t port_dst; + uint32_t proto; +}; + +struct flow_key_ipv6_5tuple { + uint8_t ip_src[16]; + uint8_t ip_dst[16]; + uint16_t port_src; + uint16_t port_dst; + uint32_t proto; +}; + +struct pipeline_fc_key { + enum flow_key_type type; + union { + struct flow_key_qinq qinq; + struct flow_key_ipv4_5tuple ipv4_5tuple; + struct flow_key_ipv6_5tuple ipv6_5tuple; + } key; +}; + +int +app_pipeline_fc_add(struct app_params *app, + uint32_t pipeline_id, + struct pipeline_fc_key *key, + uint32_t port_id, + uint32_t flow_id); + +int +app_pipeline_fc_add_bulk(struct app_params *app, + uint32_t pipeline_id, + struct pipeline_fc_key *key, + uint32_t *port_id, + uint32_t *flow_id, + uint32_t n_keys); + +int +app_pipeline_fc_del(struct app_params *app, + uint32_t pipeline_id, + struct pipeline_fc_key *key); + +int +app_pipeline_fc_add_default(struct app_params *app, + uint32_t pipeline_id, + uint32_t port_id); + +int +app_pipeline_fc_del_default(struct app_params *app, + uint32_t pipeline_id); + +extern struct pipeline_type pipeline_flow_classification; + +#endif diff --git a/examples/ip_pipeline/pipeline/pipeline_flow_classification_be.c b/examples/ip_pipeline/pipeline/pipeline_flow_classification_be.c new file mode 100644 index 00000000..70d976d5 --- /dev/null +++ b/examples/ip_pipeline/pipeline/pipeline_flow_classification_be.c @@ -0,0 +1,811 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <string.h> + +#include <rte_common.h> +#include <rte_malloc.h> +#include <rte_table_hash.h> +#include <rte_byteorder.h> +#include <pipeline.h> + +#include "pipeline_flow_classification_be.h" +#include "pipeline_actions_common.h" +#include "parser.h" +#include "hash_func.h" + +struct pipeline_flow_classification { + struct pipeline p; + pipeline_msg_req_handler custom_handlers[PIPELINE_FC_MSG_REQS]; + + uint32_t n_flows; + uint32_t key_size; + uint32_t flow_id; + + uint32_t key_offset; + uint32_t hash_offset; + uint8_t key_mask[PIPELINE_FC_FLOW_KEY_MAX_SIZE]; + uint32_t key_mask_present; + uint32_t flow_id_offset; + +} __rte_cache_aligned; + +static void * +pipeline_fc_msg_req_custom_handler(struct pipeline *p, void *msg); + +static pipeline_msg_req_handler handlers[] = { + [PIPELINE_MSG_REQ_PING] = + pipeline_msg_req_ping_handler, + [PIPELINE_MSG_REQ_STATS_PORT_IN] = + pipeline_msg_req_stats_port_in_handler, + [PIPELINE_MSG_REQ_STATS_PORT_OUT] = + pipeline_msg_req_stats_port_out_handler, + [PIPELINE_MSG_REQ_STATS_TABLE] = + pipeline_msg_req_stats_table_handler, + [PIPELINE_MSG_REQ_PORT_IN_ENABLE] = + pipeline_msg_req_port_in_enable_handler, + [PIPELINE_MSG_REQ_PORT_IN_DISABLE] = + pipeline_msg_req_port_in_disable_handler, + [PIPELINE_MSG_REQ_CUSTOM] = + pipeline_fc_msg_req_custom_handler, +}; + +static void * +pipeline_fc_msg_req_add_handler(struct pipeline *p, void *msg); + +static void * +pipeline_fc_msg_req_add_bulk_handler(struct pipeline *p, void *msg); + +static void * +pipeline_fc_msg_req_del_handler(struct pipeline *p, void *msg); + +static void * +pipeline_fc_msg_req_add_default_handler(struct pipeline *p, void *msg); + +static void * +pipeline_fc_msg_req_del_default_handler(struct pipeline *p, void *msg); + +static pipeline_msg_req_handler custom_handlers[] = { + [PIPELINE_FC_MSG_REQ_FLOW_ADD] = + pipeline_fc_msg_req_add_handler, + [PIPELINE_FC_MSG_REQ_FLOW_ADD_BULK] = + pipeline_fc_msg_req_add_bulk_handler, + [PIPELINE_FC_MSG_REQ_FLOW_DEL] = + pipeline_fc_msg_req_del_handler, + [PIPELINE_FC_MSG_REQ_FLOW_ADD_DEFAULT] = + pipeline_fc_msg_req_add_default_handler, + [PIPELINE_FC_MSG_REQ_FLOW_DEL_DEFAULT] = + pipeline_fc_msg_req_del_default_handler, +}; + +/* + * Flow table + */ +struct flow_table_entry { + struct rte_pipeline_table_entry head; + + uint32_t flow_id; + uint32_t pad; +}; + +rte_table_hash_op_hash hash_func[] = { + hash_default_key8, + hash_default_key16, + hash_default_key24, + hash_default_key32, + hash_default_key40, + hash_default_key48, + hash_default_key56, + hash_default_key64 +}; + +/* + * Flow table AH - Write flow_id to packet meta-data + */ +static inline void +pkt_work_flow_id( + struct rte_mbuf *pkt, + struct rte_pipeline_table_entry *table_entry, + void *arg) +{ + struct pipeline_flow_classification *p_fc = arg; + uint32_t *flow_id_ptr = + RTE_MBUF_METADATA_UINT32_PTR(pkt, p_fc->flow_id_offset); + struct flow_table_entry *entry = + (struct flow_table_entry *) table_entry; + + /* Read */ + uint32_t flow_id = entry->flow_id; + + /* Compute */ + + /* Write */ + *flow_id_ptr = flow_id; +} + +static inline void +pkt4_work_flow_id( + struct rte_mbuf **pkts, + struct rte_pipeline_table_entry **table_entries, + void *arg) +{ + struct pipeline_flow_classification *p_fc = arg; + + uint32_t *flow_id_ptr0 = + RTE_MBUF_METADATA_UINT32_PTR(pkts[0], p_fc->flow_id_offset); + uint32_t *flow_id_ptr1 = + RTE_MBUF_METADATA_UINT32_PTR(pkts[1], p_fc->flow_id_offset); + uint32_t *flow_id_ptr2 = + RTE_MBUF_METADATA_UINT32_PTR(pkts[2], p_fc->flow_id_offset); + uint32_t *flow_id_ptr3 = + RTE_MBUF_METADATA_UINT32_PTR(pkts[3], p_fc->flow_id_offset); + + struct flow_table_entry *entry0 = + (struct flow_table_entry *) table_entries[0]; + struct flow_table_entry *entry1 = + (struct flow_table_entry *) table_entries[1]; + struct flow_table_entry *entry2 = + (struct flow_table_entry *) table_entries[2]; + struct flow_table_entry *entry3 = + (struct flow_table_entry *) table_entries[3]; + + /* Read */ + uint32_t flow_id0 = entry0->flow_id; + uint32_t flow_id1 = entry1->flow_id; + uint32_t flow_id2 = entry2->flow_id; + uint32_t flow_id3 = entry3->flow_id; + + /* Compute */ + + /* Write */ + *flow_id_ptr0 = flow_id0; + *flow_id_ptr1 = flow_id1; + *flow_id_ptr2 = flow_id2; + *flow_id_ptr3 = flow_id3; +} + +PIPELINE_TABLE_AH_HIT(fc_table_ah_hit, + pkt_work_flow_id, pkt4_work_flow_id); + +static rte_pipeline_table_action_handler_hit +get_fc_table_ah_hit(struct pipeline_flow_classification *p) +{ + if (p->flow_id) + return fc_table_ah_hit; + + return NULL; +} + +/* + * Argument parsing + */ +static int +pipeline_fc_parse_args(struct pipeline_flow_classification *p, + struct pipeline_params *params) +{ + uint32_t n_flows_present = 0; + uint32_t key_offset_present = 0; + uint32_t key_size_present = 0; + uint32_t hash_offset_present = 0; + uint32_t key_mask_present = 0; + uint32_t flow_id_offset_present = 0; + + uint32_t i; + char key_mask_str[PIPELINE_FC_FLOW_KEY_MAX_SIZE * 2]; + + p->hash_offset = 0; + + /* default values */ + p->flow_id = 0; + + for (i = 0; i < params->n_args; i++) { + char *arg_name = params->args_name[i]; + char *arg_value = params->args_value[i]; + + /* n_flows */ + if (strcmp(arg_name, "n_flows") == 0) { + int status; + + PIPELINE_PARSE_ERR_DUPLICATE( + n_flows_present == 0, params->name, + arg_name); + n_flows_present = 1; + + status = parser_read_uint32(&p->n_flows, + arg_value); + PIPELINE_PARSE_ERR_INV_VAL(((status != -EINVAL) && + (p->n_flows != 0)), params->name, + arg_name, arg_value); + PIPELINE_PARSE_ERR_OUT_RNG((status != -ERANGE), + params->name, arg_name, arg_value); + + continue; + } + + /* key_offset */ + if (strcmp(arg_name, "key_offset") == 0) { + int status; + + PIPELINE_PARSE_ERR_DUPLICATE( + key_offset_present == 0, params->name, + arg_name); + key_offset_present = 1; + + status = parser_read_uint32(&p->key_offset, + arg_value); + PIPELINE_PARSE_ERR_INV_VAL((status != -EINVAL), + params->name, arg_name, arg_value); + PIPELINE_PARSE_ERR_OUT_RNG((status != -ERANGE), + params->name, arg_name, arg_value); + + continue; + } + + /* key_size */ + if (strcmp(arg_name, "key_size") == 0) { + int status; + + PIPELINE_PARSE_ERR_DUPLICATE( + key_size_present == 0, params->name, + arg_name); + key_size_present = 1; + + status = parser_read_uint32(&p->key_size, + arg_value); + PIPELINE_PARSE_ERR_INV_VAL(((status != -EINVAL) && + (p->key_size != 0) && + (p->key_size % 8 == 0)), + params->name, arg_name, arg_value); + PIPELINE_PARSE_ERR_OUT_RNG(((status != -ERANGE) && + (p->key_size <= + PIPELINE_FC_FLOW_KEY_MAX_SIZE)), + params->name, arg_name, arg_value); + + continue; + } + + /* key_mask */ + if (strcmp(arg_name, "key_mask") == 0) { + int mask_str_len = strlen(arg_value); + + PIPELINE_PARSE_ERR_DUPLICATE( + key_mask_present == 0, + params->name, arg_name); + key_mask_present = 1; + + PIPELINE_ARG_CHECK((mask_str_len < + (PIPELINE_FC_FLOW_KEY_MAX_SIZE * 2)), + "Parse error in section \"%s\": entry " + "\"%s\" is too long", params->name, + arg_name); + + snprintf(key_mask_str, sizeof(key_mask_str), "%s", + arg_value); + + continue; + } + + /* hash_offset */ + if (strcmp(arg_name, "hash_offset") == 0) { + int status; + + PIPELINE_PARSE_ERR_DUPLICATE( + hash_offset_present == 0, params->name, + arg_name); + hash_offset_present = 1; + + status = parser_read_uint32(&p->hash_offset, + arg_value); + PIPELINE_PARSE_ERR_INV_VAL((status != -EINVAL), + params->name, arg_name, arg_value); + PIPELINE_PARSE_ERR_OUT_RNG((status != -ERANGE), + params->name, arg_name, arg_value); + + continue; + } + + /* flow_id_offset */ + if (strcmp(arg_name, "flowid_offset") == 0) { + int status; + + PIPELINE_PARSE_ERR_DUPLICATE( + flow_id_offset_present == 0, params->name, + arg_name); + flow_id_offset_present = 1; + + status = parser_read_uint32(&p->flow_id_offset, + arg_value); + PIPELINE_PARSE_ERR_INV_VAL((status != -EINVAL), + params->name, arg_name, arg_value); + PIPELINE_PARSE_ERR_OUT_RNG((status != -ERANGE), + params->name, arg_name, arg_value); + + p->flow_id = 1; + + continue; + } + + /* Unknown argument */ + PIPELINE_PARSE_ERR_INV_ENT(0, params->name, arg_name); + } + + /* Check that mandatory arguments are present */ + PIPELINE_PARSE_ERR_MANDATORY((n_flows_present), params->name, + "n_flows"); + PIPELINE_PARSE_ERR_MANDATORY((key_offset_present), params->name, + "key_offset"); + PIPELINE_PARSE_ERR_MANDATORY((key_size_present), params->name, + "key_size"); + + if (key_mask_present) { + uint32_t key_size = p->key_size; + int status; + + PIPELINE_ARG_CHECK(((key_size == 8) || (key_size == 16)), + "Parse error in section \"%s\": entry key_mask " + "only allowed for key_size of 8 or 16 bytes", + params->name); + + PIPELINE_ARG_CHECK((strlen(key_mask_str) == + (key_size * 2)), "Parse error in section " + "\"%s\": key_mask should have exactly %u hex " + "digits", params->name, (key_size * 2)); + + PIPELINE_ARG_CHECK((hash_offset_present == 0), "Parse " + "error in section \"%s\": entry hash_offset only " + "allowed when key_mask is not present", + params->name); + + status = parse_hex_string(key_mask_str, p->key_mask, + &p->key_size); + + PIPELINE_PARSE_ERR_INV_VAL(((status == 0) && + (key_size == p->key_size)), params->name, + "key_mask", key_mask_str); + } + + p->key_mask_present = key_mask_present; + + return 0; +} + +static void *pipeline_fc_init(struct pipeline_params *params, + __rte_unused void *arg) +{ + struct pipeline *p; + struct pipeline_flow_classification *p_fc; + uint32_t size, i; + + /* Check input arguments */ + if (params == NULL) + return NULL; + + /* Memory allocation */ + size = RTE_CACHE_LINE_ROUNDUP( + sizeof(struct pipeline_flow_classification)); + p = rte_zmalloc(NULL, size, RTE_CACHE_LINE_SIZE); + if (p == NULL) + return NULL; + p_fc = (struct pipeline_flow_classification *) p; + + strcpy(p->name, params->name); + p->log_level = params->log_level; + + PLOG(p, HIGH, "Flow classification"); + + /* Parse arguments */ + if (pipeline_fc_parse_args(p_fc, params)) + return NULL; + + /* Pipeline */ + { + struct rte_pipeline_params pipeline_params = { + .name = params->name, + .socket_id = params->socket_id, + .offset_port_id = 0, + }; + + p->p = rte_pipeline_create(&pipeline_params); + if (p->p == NULL) { + rte_free(p); + return NULL; + } + } + + /* Input ports */ + p->n_ports_in = params->n_ports_in; + for (i = 0; i < p->n_ports_in; i++) { + struct rte_pipeline_port_in_params port_params = { + .ops = pipeline_port_in_params_get_ops( + ¶ms->port_in[i]), + .arg_create = pipeline_port_in_params_convert( + ¶ms->port_in[i]), + .f_action = NULL, + .arg_ah = NULL, + .burst_size = params->port_in[i].burst_size, + }; + + int status = rte_pipeline_port_in_create(p->p, + &port_params, + &p->port_in_id[i]); + + if (status) { + rte_pipeline_free(p->p); + rte_free(p); + return NULL; + } + } + + /* Output ports */ + p->n_ports_out = params->n_ports_out; + for (i = 0; i < p->n_ports_out; i++) { + struct rte_pipeline_port_out_params port_params = { + .ops = pipeline_port_out_params_get_ops( + ¶ms->port_out[i]), + .arg_create = pipeline_port_out_params_convert( + ¶ms->port_out[i]), + .f_action = NULL, + .arg_ah = NULL, + }; + + int status = rte_pipeline_port_out_create(p->p, + &port_params, + &p->port_out_id[i]); + + if (status) { + rte_pipeline_free(p->p); + rte_free(p); + return NULL; + } + } + + /* Tables */ + p->n_tables = 1; + { + struct rte_table_hash_key8_ext_params + table_hash_key8_params = { + .n_entries = p_fc->n_flows, + .n_entries_ext = p_fc->n_flows, + .signature_offset = p_fc->hash_offset, + .key_offset = p_fc->key_offset, + .f_hash = hash_func[(p_fc->key_size / 8) - 1], + .key_mask = (p_fc->key_mask_present) ? + p_fc->key_mask : NULL, + .seed = 0, + }; + + struct rte_table_hash_key16_ext_params + table_hash_key16_params = { + .n_entries = p_fc->n_flows, + .n_entries_ext = p_fc->n_flows, + .signature_offset = p_fc->hash_offset, + .key_offset = p_fc->key_offset, + .f_hash = hash_func[(p_fc->key_size / 8) - 1], + .key_mask = (p_fc->key_mask_present) ? + p_fc->key_mask : NULL, + .seed = 0, + }; + + struct rte_table_hash_ext_params + table_hash_params = { + .key_size = p_fc->key_size, + .n_keys = p_fc->n_flows, + .n_buckets = p_fc->n_flows / 4, + .n_buckets_ext = p_fc->n_flows / 4, + .f_hash = hash_func[(p_fc->key_size / 8) - 1], + .seed = 0, + .signature_offset = p_fc->hash_offset, + .key_offset = p_fc->key_offset, + }; + + struct rte_pipeline_table_params table_params = { + .ops = NULL, /* set below */ + .arg_create = NULL, /* set below */ + .f_action_hit = get_fc_table_ah_hit(p_fc), + .f_action_miss = NULL, + .arg_ah = p_fc, + .action_data_size = sizeof(struct flow_table_entry) - + sizeof(struct rte_pipeline_table_entry), + }; + + int status; + + switch (p_fc->key_size) { + case 8: + if (p_fc->hash_offset != 0) { + table_params.ops = + &rte_table_hash_key8_ext_ops; + } else { + table_params.ops = + &rte_table_hash_key8_ext_dosig_ops; + } + table_params.arg_create = &table_hash_key8_params; + break; + + case 16: + if (p_fc->hash_offset != 0) { + table_params.ops = + &rte_table_hash_key16_ext_ops; + } else { + table_params.ops = + &rte_table_hash_key16_ext_dosig_ops; + } + table_params.arg_create = &table_hash_key16_params; + break; + + default: + table_params.ops = &rte_table_hash_ext_ops; + table_params.arg_create = &table_hash_params; + } + + status = rte_pipeline_table_create(p->p, + &table_params, + &p->table_id[0]); + + if (status) { + rte_pipeline_free(p->p); + rte_free(p); + return NULL; + } + } + + /* Connecting input ports to tables */ + for (i = 0; i < p->n_ports_in; i++) { + int status = rte_pipeline_port_in_connect_to_table(p->p, + p->port_in_id[i], + p->table_id[0]); + + if (status) { + rte_pipeline_free(p->p); + rte_free(p); + return NULL; + } + } + + /* Enable input ports */ + for (i = 0; i < p->n_ports_in; i++) { + int status = rte_pipeline_port_in_enable(p->p, + p->port_in_id[i]); + + if (status) { + rte_pipeline_free(p->p); + rte_free(p); + return NULL; + } + } + + /* Check pipeline consistency */ + if (rte_pipeline_check(p->p) < 0) { + rte_pipeline_free(p->p); + rte_free(p); + return NULL; + } + + /* Message queues */ + p->n_msgq = params->n_msgq; + for (i = 0; i < p->n_msgq; i++) + p->msgq_in[i] = params->msgq_in[i]; + for (i = 0; i < p->n_msgq; i++) + p->msgq_out[i] = params->msgq_out[i]; + + /* Message handlers */ + memcpy(p->handlers, handlers, sizeof(p->handlers)); + memcpy(p_fc->custom_handlers, + custom_handlers, + sizeof(p_fc->custom_handlers)); + + return p; +} + +static int +pipeline_fc_free(void *pipeline) +{ + struct pipeline *p = (struct pipeline *) pipeline; + + /* Check input arguments */ + if (p == NULL) + return -1; + + /* Free resources */ + rte_pipeline_free(p->p); + rte_free(p); + return 0; +} + +static int +pipeline_fc_track(void *pipeline, + __rte_unused uint32_t port_in, + uint32_t *port_out) +{ + struct pipeline *p = (struct pipeline *) pipeline; + + /* Check input arguments */ + if ((p == NULL) || + (port_in >= p->n_ports_in) || + (port_out == NULL)) + return -1; + + if (p->n_ports_in == 1) { + *port_out = 0; + return 0; + } + + return -1; +} + +static int +pipeline_fc_timer(void *pipeline) +{ + struct pipeline *p = (struct pipeline *) pipeline; + + pipeline_msg_req_handle(p); + rte_pipeline_flush(p->p); + + return 0; +} + +static void * +pipeline_fc_msg_req_custom_handler(struct pipeline *p, void *msg) +{ + struct pipeline_flow_classification *p_fc = + (struct pipeline_flow_classification *) p; + struct pipeline_custom_msg_req *req = msg; + pipeline_msg_req_handler f_handle; + + f_handle = (req->subtype < PIPELINE_FC_MSG_REQS) ? + p_fc->custom_handlers[req->subtype] : + pipeline_msg_req_invalid_handler; + + if (f_handle == NULL) + f_handle = pipeline_msg_req_invalid_handler; + + return f_handle(p, req); +} + +static void * +pipeline_fc_msg_req_add_handler(struct pipeline *p, void *msg) +{ + struct pipeline_fc_add_msg_req *req = msg; + struct pipeline_fc_add_msg_rsp *rsp = msg; + + struct flow_table_entry entry = { + .head = { + .action = RTE_PIPELINE_ACTION_PORT, + {.port_id = p->port_out_id[req->port_id]}, + }, + .flow_id = req->flow_id, + }; + + rsp->status = rte_pipeline_table_entry_add(p->p, + p->table_id[0], + &req->key, + (struct rte_pipeline_table_entry *) &entry, + &rsp->key_found, + (struct rte_pipeline_table_entry **) &rsp->entry_ptr); + + return rsp; +} + +static void * +pipeline_fc_msg_req_add_bulk_handler(struct pipeline *p, void *msg) +{ + struct pipeline_fc_add_bulk_msg_req *req = msg; + struct pipeline_fc_add_bulk_msg_rsp *rsp = msg; + uint32_t i; + + for (i = 0; i < req->n_keys; i++) { + struct pipeline_fc_add_bulk_flow_req *flow_req = &req->req[i]; + struct pipeline_fc_add_bulk_flow_rsp *flow_rsp = &req->rsp[i]; + + struct flow_table_entry entry = { + .head = { + .action = RTE_PIPELINE_ACTION_PORT, + {.port_id = p->port_out_id[flow_req->port_id]}, + }, + .flow_id = flow_req->flow_id, + }; + + int status = rte_pipeline_table_entry_add(p->p, + p->table_id[0], + &flow_req->key, + (struct rte_pipeline_table_entry *) &entry, + &flow_rsp->key_found, + (struct rte_pipeline_table_entry **) + &flow_rsp->entry_ptr); + + if (status) + break; + } + + rsp->n_keys = i; + + return rsp; +} + +static void * +pipeline_fc_msg_req_del_handler(struct pipeline *p, void *msg) +{ + struct pipeline_fc_del_msg_req *req = msg; + struct pipeline_fc_del_msg_rsp *rsp = msg; + + rsp->status = rte_pipeline_table_entry_delete(p->p, + p->table_id[0], + &req->key, + &rsp->key_found, + NULL); + + return rsp; +} + +static void * +pipeline_fc_msg_req_add_default_handler(struct pipeline *p, void *msg) +{ + struct pipeline_fc_add_default_msg_req *req = msg; + struct pipeline_fc_add_default_msg_rsp *rsp = msg; + + struct flow_table_entry default_entry = { + .head = { + .action = RTE_PIPELINE_ACTION_PORT, + {.port_id = p->port_out_id[req->port_id]}, + }, + + .flow_id = 0, + }; + + rsp->status = rte_pipeline_table_default_entry_add(p->p, + p->table_id[0], + (struct rte_pipeline_table_entry *) &default_entry, + (struct rte_pipeline_table_entry **) &rsp->entry_ptr); + + return rsp; +} + +static void * +pipeline_fc_msg_req_del_default_handler(struct pipeline *p, void *msg) +{ + struct pipeline_fc_del_default_msg_rsp *rsp = msg; + + rsp->status = rte_pipeline_table_default_entry_delete(p->p, + p->table_id[0], + NULL); + + return rsp; +} + +struct pipeline_be_ops pipeline_flow_classification_be_ops = { + .f_init = pipeline_fc_init, + .f_free = pipeline_fc_free, + .f_run = NULL, + .f_timer = pipeline_fc_timer, + .f_track = pipeline_fc_track, +}; diff --git a/examples/ip_pipeline/pipeline/pipeline_flow_classification_be.h b/examples/ip_pipeline/pipeline/pipeline_flow_classification_be.h new file mode 100644 index 00000000..d8129b21 --- /dev/null +++ b/examples/ip_pipeline/pipeline/pipeline_flow_classification_be.h @@ -0,0 +1,142 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_PIPELINE_FLOW_CLASSIFICATION_BE_H__ +#define __INCLUDE_PIPELINE_FLOW_CLASSIFICATION_BE_H__ + +#include "pipeline_common_be.h" + +enum pipeline_fc_msg_req_type { + PIPELINE_FC_MSG_REQ_FLOW_ADD = 0, + PIPELINE_FC_MSG_REQ_FLOW_ADD_BULK, + PIPELINE_FC_MSG_REQ_FLOW_DEL, + PIPELINE_FC_MSG_REQ_FLOW_ADD_DEFAULT, + PIPELINE_FC_MSG_REQ_FLOW_DEL_DEFAULT, + PIPELINE_FC_MSG_REQS, +}; + +#ifndef PIPELINE_FC_FLOW_KEY_MAX_SIZE +#define PIPELINE_FC_FLOW_KEY_MAX_SIZE 64 +#endif + +/* + * MSG ADD + */ +struct pipeline_fc_add_msg_req { + enum pipeline_msg_req_type type; + enum pipeline_fc_msg_req_type subtype; + + uint8_t key[PIPELINE_FC_FLOW_KEY_MAX_SIZE]; + + uint32_t port_id; + uint32_t flow_id; +}; + +struct pipeline_fc_add_msg_rsp { + int status; + int key_found; + void *entry_ptr; +}; + +/* + * MSG ADD BULK + */ +struct pipeline_fc_add_bulk_flow_req { + uint8_t key[PIPELINE_FC_FLOW_KEY_MAX_SIZE]; + uint32_t port_id; + uint32_t flow_id; +}; + +struct pipeline_fc_add_bulk_flow_rsp { + int key_found; + void *entry_ptr; +}; + +struct pipeline_fc_add_bulk_msg_req { + enum pipeline_msg_req_type type; + enum pipeline_fc_msg_req_type subtype; + + struct pipeline_fc_add_bulk_flow_req *req; + struct pipeline_fc_add_bulk_flow_rsp *rsp; + uint32_t n_keys; +}; + +struct pipeline_fc_add_bulk_msg_rsp { + uint32_t n_keys; +}; + +/* + * MSG DEL + */ +struct pipeline_fc_del_msg_req { + enum pipeline_msg_req_type type; + enum pipeline_fc_msg_req_type subtype; + + uint8_t key[PIPELINE_FC_FLOW_KEY_MAX_SIZE]; +}; + +struct pipeline_fc_del_msg_rsp { + int status; + int key_found; +}; + +/* + * MSG ADD DEFAULT + */ +struct pipeline_fc_add_default_msg_req { + enum pipeline_msg_req_type type; + enum pipeline_fc_msg_req_type subtype; + + uint32_t port_id; +}; + +struct pipeline_fc_add_default_msg_rsp { + int status; + void *entry_ptr; +}; + +/* + * MSG DEL DEFAULT + */ +struct pipeline_fc_del_default_msg_req { + enum pipeline_msg_req_type type; + enum pipeline_fc_msg_req_type subtype; +}; + +struct pipeline_fc_del_default_msg_rsp { + int status; +}; + +extern struct pipeline_be_ops pipeline_flow_classification_be_ops; + +#endif diff --git a/examples/ip_pipeline/pipeline/pipeline_master.c b/examples/ip_pipeline/pipeline/pipeline_master.c new file mode 100644 index 00000000..1ccdad14 --- /dev/null +++ b/examples/ip_pipeline/pipeline/pipeline_master.c @@ -0,0 +1,47 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "pipeline_master.h" +#include "pipeline_master_be.h" + +static struct pipeline_fe_ops pipeline_master_fe_ops = { + .f_init = NULL, + .f_free = NULL, + .cmds = NULL, +}; + +struct pipeline_type pipeline_master = { + .name = "MASTER", + .be_ops = &pipeline_master_be_ops, + .fe_ops = &pipeline_master_fe_ops, +}; diff --git a/examples/ip_pipeline/pipeline/pipeline_master.h b/examples/ip_pipeline/pipeline/pipeline_master.h new file mode 100644 index 00000000..3fe3030f --- /dev/null +++ b/examples/ip_pipeline/pipeline/pipeline_master.h @@ -0,0 +1,41 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_PIPELINE_MASTER_H__ +#define __INCLUDE_PIPELINE_MASTER_H__ + +#include "pipeline.h" + +extern struct pipeline_type pipeline_master; + +#endif diff --git a/examples/ip_pipeline/pipeline/pipeline_master_be.c b/examples/ip_pipeline/pipeline/pipeline_master_be.c new file mode 100644 index 00000000..ac0cbbc5 --- /dev/null +++ b/examples/ip_pipeline/pipeline/pipeline_master_be.c @@ -0,0 +1,150 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <fcntl.h> +#include <unistd.h> + +#include <rte_common.h> +#include <rte_malloc.h> + +#include <cmdline_parse.h> +#include <cmdline_parse_string.h> +#include <cmdline_socket.h> +#include <cmdline.h> + +#include "app.h" +#include "pipeline_master_be.h" + +struct pipeline_master { + struct app_params *app; + struct cmdline *cl; + int script_file_done; +} __rte_cache_aligned; + +static void* +pipeline_init(__rte_unused struct pipeline_params *params, void *arg) +{ + struct app_params *app = (struct app_params *) arg; + struct pipeline_master *p; + uint32_t size; + + /* Check input arguments */ + if (app == NULL) + return NULL; + + /* Memory allocation */ + size = RTE_CACHE_LINE_ROUNDUP(sizeof(struct pipeline_master)); + p = rte_zmalloc(NULL, size, RTE_CACHE_LINE_SIZE); + if (p == NULL) + return NULL; + + /* Initialization */ + p->app = app; + + p->cl = cmdline_stdin_new(app->cmds, "pipeline> "); + if (p->cl == NULL) { + rte_free(p); + return NULL; + } + + p->script_file_done = 0; + if (app->script_file == NULL) + p->script_file_done = 1; + + return (void *) p; +} + +static int +pipeline_free(void *pipeline) +{ + struct pipeline_master *p = (struct pipeline_master *) pipeline; + + if (p == NULL) + return -EINVAL; + + cmdline_stdin_exit(p->cl); + rte_free(p); + + return 0; +} + +static int +pipeline_run(void *pipeline) +{ + struct pipeline_master *p = (struct pipeline_master *) pipeline; + int status; + + if (p->script_file_done == 0) { + struct app_params *app = p->app; + int fd = open(app->script_file, O_RDONLY); + + if (fd < 0) + printf("Cannot open CLI script file \"%s\"\n", + app->script_file); + else { + struct cmdline *file_cl; + + printf("Running CLI script file \"%s\" ...\n", + app->script_file); + file_cl = cmdline_new(p->cl->ctx, "", fd, 1); + cmdline_interact(file_cl); + close(fd); + } + + p->script_file_done = 1; + } + + status = cmdline_poll(p->cl); + if (status < 0) + rte_panic("CLI poll error (%" PRId32 ")\n", status); + else if (status == RDLINE_EXITED) { + cmdline_stdin_exit(p->cl); + rte_exit(0, "Bye!\n"); + } + + return 0; +} + +static int +pipeline_timer(__rte_unused void *pipeline) +{ + return 0; +} + +struct pipeline_be_ops pipeline_master_be_ops = { + .f_init = pipeline_init, + .f_free = pipeline_free, + .f_run = pipeline_run, + .f_timer = pipeline_timer, + .f_track = NULL, +}; diff --git a/examples/ip_pipeline/pipeline/pipeline_master_be.h b/examples/ip_pipeline/pipeline/pipeline_master_be.h new file mode 100644 index 00000000..00b71fe8 --- /dev/null +++ b/examples/ip_pipeline/pipeline/pipeline_master_be.h @@ -0,0 +1,41 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_PIPELINE_MASTER_BE_H__ +#define __INCLUDE_PIPELINE_MASTER_BE_H__ + +#include "pipeline_common_be.h" + +extern struct pipeline_be_ops pipeline_master_be_ops; + +#endif diff --git a/examples/ip_pipeline/pipeline/pipeline_passthrough.c b/examples/ip_pipeline/pipeline/pipeline_passthrough.c new file mode 100644 index 00000000..fc2cae5e --- /dev/null +++ b/examples/ip_pipeline/pipeline/pipeline_passthrough.c @@ -0,0 +1,47 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "pipeline_passthrough.h" +#include "pipeline_passthrough_be.h" + +static struct pipeline_fe_ops pipeline_passthrough_fe_ops = { + .f_init = NULL, + .f_free = NULL, + .cmds = NULL, +}; + +struct pipeline_type pipeline_passthrough = { + .name = "PASS-THROUGH", + .be_ops = &pipeline_passthrough_be_ops, + .fe_ops = &pipeline_passthrough_fe_ops, +}; diff --git a/examples/ip_pipeline/pipeline/pipeline_passthrough.h b/examples/ip_pipeline/pipeline/pipeline_passthrough.h new file mode 100644 index 00000000..420a8768 --- /dev/null +++ b/examples/ip_pipeline/pipeline/pipeline_passthrough.h @@ -0,0 +1,41 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_PIPELINE_PASSTHROUGH_H__ +#define __INCLUDE_PIPELINE_PASSTHROUGH_H__ + +#include "pipeline.h" + +extern struct pipeline_type pipeline_passthrough; + +#endif diff --git a/examples/ip_pipeline/pipeline/pipeline_passthrough_be.c b/examples/ip_pipeline/pipeline/pipeline_passthrough_be.c new file mode 100644 index 00000000..a0d11aea --- /dev/null +++ b/examples/ip_pipeline/pipeline/pipeline_passthrough_be.c @@ -0,0 +1,804 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <string.h> + +#include <rte_common.h> +#include <rte_malloc.h> +#include <rte_byteorder.h> +#include <rte_table_stub.h> +#include <rte_table_hash.h> +#include <rte_pipeline.h> + +#include "pipeline_passthrough_be.h" +#include "pipeline_actions_common.h" +#include "parser.h" +#include "hash_func.h" + +struct pipeline_passthrough { + struct pipeline p; + struct pipeline_passthrough_params params; + rte_table_hash_op_hash f_hash; +} __rte_cache_aligned; + +static pipeline_msg_req_handler handlers[] = { + [PIPELINE_MSG_REQ_PING] = + pipeline_msg_req_ping_handler, + [PIPELINE_MSG_REQ_STATS_PORT_IN] = + pipeline_msg_req_stats_port_in_handler, + [PIPELINE_MSG_REQ_STATS_PORT_OUT] = + pipeline_msg_req_stats_port_out_handler, + [PIPELINE_MSG_REQ_STATS_TABLE] = + pipeline_msg_req_stats_table_handler, + [PIPELINE_MSG_REQ_PORT_IN_ENABLE] = + pipeline_msg_req_port_in_enable_handler, + [PIPELINE_MSG_REQ_PORT_IN_DISABLE] = + pipeline_msg_req_port_in_disable_handler, + [PIPELINE_MSG_REQ_CUSTOM] = + pipeline_msg_req_invalid_handler, +}; + +static inline __attribute__((always_inline)) void +pkt_work( + struct rte_mbuf *pkt, + void *arg, + uint32_t dma_size, + uint32_t hash_enabled, + uint32_t lb_hash, + uint32_t port_out_pow2) +{ + struct pipeline_passthrough *p = arg; + + uint64_t *dma_dst = RTE_MBUF_METADATA_UINT64_PTR(pkt, + p->params.dma_dst_offset); + uint64_t *dma_src = RTE_MBUF_METADATA_UINT64_PTR(pkt, + p->params.dma_src_offset); + uint64_t *dma_mask = (uint64_t *) p->params.dma_src_mask; + uint32_t *dma_hash = RTE_MBUF_METADATA_UINT32_PTR(pkt, + p->params.dma_hash_offset); + uint32_t i; + + /* Read (dma_src), compute (dma_dst), write (dma_dst) */ + for (i = 0; i < (dma_size / 8); i++) + dma_dst[i] = dma_src[i] & dma_mask[i]; + + /* Read (dma_dst), compute (hash), write (hash) */ + if (hash_enabled) { + uint32_t hash = p->f_hash(dma_dst, dma_size, 0); + *dma_hash = hash; + + if (lb_hash) { + uint32_t port_out; + + if (port_out_pow2) + port_out + = hash & (p->p.n_ports_out - 1); + else + port_out + = hash % p->p.n_ports_out; + + rte_pipeline_port_out_packet_insert(p->p.p, + port_out, pkt); + } + } +} + +static inline __attribute__((always_inline)) void +pkt4_work( + struct rte_mbuf **pkts, + void *arg, + uint32_t dma_size, + uint32_t hash_enabled, + uint32_t lb_hash, + uint32_t port_out_pow2) +{ + struct pipeline_passthrough *p = arg; + + uint64_t *dma_dst0 = RTE_MBUF_METADATA_UINT64_PTR(pkts[0], + p->params.dma_dst_offset); + uint64_t *dma_dst1 = RTE_MBUF_METADATA_UINT64_PTR(pkts[1], + p->params.dma_dst_offset); + uint64_t *dma_dst2 = RTE_MBUF_METADATA_UINT64_PTR(pkts[2], + p->params.dma_dst_offset); + uint64_t *dma_dst3 = RTE_MBUF_METADATA_UINT64_PTR(pkts[3], + p->params.dma_dst_offset); + + uint64_t *dma_src0 = RTE_MBUF_METADATA_UINT64_PTR(pkts[0], + p->params.dma_src_offset); + uint64_t *dma_src1 = RTE_MBUF_METADATA_UINT64_PTR(pkts[1], + p->params.dma_src_offset); + uint64_t *dma_src2 = RTE_MBUF_METADATA_UINT64_PTR(pkts[2], + p->params.dma_src_offset); + uint64_t *dma_src3 = RTE_MBUF_METADATA_UINT64_PTR(pkts[3], + p->params.dma_src_offset); + + uint64_t *dma_mask = (uint64_t *) p->params.dma_src_mask; + + uint32_t *dma_hash0 = RTE_MBUF_METADATA_UINT32_PTR(pkts[0], + p->params.dma_hash_offset); + uint32_t *dma_hash1 = RTE_MBUF_METADATA_UINT32_PTR(pkts[1], + p->params.dma_hash_offset); + uint32_t *dma_hash2 = RTE_MBUF_METADATA_UINT32_PTR(pkts[2], + p->params.dma_hash_offset); + uint32_t *dma_hash3 = RTE_MBUF_METADATA_UINT32_PTR(pkts[3], + p->params.dma_hash_offset); + + uint32_t i; + + /* Read (dma_src), compute (dma_dst), write (dma_dst) */ + for (i = 0; i < (dma_size / 8); i++) { + dma_dst0[i] = dma_src0[i] & dma_mask[i]; + dma_dst1[i] = dma_src1[i] & dma_mask[i]; + dma_dst2[i] = dma_src2[i] & dma_mask[i]; + dma_dst3[i] = dma_src3[i] & dma_mask[i]; + } + + /* Read (dma_dst), compute (hash), write (hash) */ + if (hash_enabled) { + uint32_t hash0 = p->f_hash(dma_dst0, dma_size, 0); + uint32_t hash1 = p->f_hash(dma_dst1, dma_size, 0); + uint32_t hash2 = p->f_hash(dma_dst2, dma_size, 0); + uint32_t hash3 = p->f_hash(dma_dst3, dma_size, 0); + + *dma_hash0 = hash0; + *dma_hash1 = hash1; + *dma_hash2 = hash2; + *dma_hash3 = hash3; + + if (lb_hash) { + uint32_t port_out0, port_out1, port_out2, port_out3; + + if (port_out_pow2) { + port_out0 + = hash0 & (p->p.n_ports_out - 1); + port_out1 + = hash1 & (p->p.n_ports_out - 1); + port_out2 + = hash2 & (p->p.n_ports_out - 1); + port_out3 + = hash3 & (p->p.n_ports_out - 1); + } else { + port_out0 + = hash0 % p->p.n_ports_out; + port_out1 + = hash1 % p->p.n_ports_out; + port_out2 + = hash2 % p->p.n_ports_out; + port_out3 + = hash3 % p->p.n_ports_out; + } + rte_pipeline_port_out_packet_insert(p->p.p, + port_out0, pkts[0]); + rte_pipeline_port_out_packet_insert(p->p.p, + port_out1, pkts[1]); + rte_pipeline_port_out_packet_insert(p->p.p, + port_out2, pkts[2]); + rte_pipeline_port_out_packet_insert(p->p.p, + port_out3, pkts[3]); + } + } +} + +#define PKT_WORK(dma_size, hash_enabled, lb_hash, port_pow2) \ +static inline void \ +pkt_work_size##dma_size##_hash##hash_enabled \ + ##_lb##lb_hash##_pw##port_pow2( \ + struct rte_mbuf *pkt, \ + void *arg) \ +{ \ + pkt_work(pkt, arg, dma_size, hash_enabled, lb_hash, port_pow2); \ +} + +#define PKT4_WORK(dma_size, hash_enabled, lb_hash, port_pow2) \ +static inline void \ +pkt4_work_size##dma_size##_hash##hash_enabled \ + ##_lb##lb_hash##_pw##port_pow2( \ + struct rte_mbuf **pkts, \ + void *arg) \ +{ \ + pkt4_work(pkts, arg, dma_size, hash_enabled, lb_hash, port_pow2); \ +} + +#define port_in_ah(dma_size, hash_enabled, lb_hash, port_pow2) \ +PKT_WORK(dma_size, hash_enabled, lb_hash, port_pow2) \ +PKT4_WORK(dma_size, hash_enabled, lb_hash, port_pow2) \ +PIPELINE_PORT_IN_AH(port_in_ah_size##dma_size##_hash \ + ##hash_enabled##_lb##lb_hash##_pw##port_pow2, \ + pkt_work_size##dma_size##_hash##hash_enabled \ + ##_lb##lb_hash##_pw##port_pow2, \ + pkt4_work_size##dma_size##_hash##hash_enabled \ + ##_lb##lb_hash##_pw##port_pow2) + + +#define port_in_ah_lb(dma_size, hash_enabled, lb_hash, port_pow2) \ +PKT_WORK(dma_size, hash_enabled, lb_hash, port_pow2) \ +PKT4_WORK(dma_size, hash_enabled, lb_hash, port_pow2) \ +PIPELINE_PORT_IN_AH_HIJACK_ALL( \ + port_in_ah_size##dma_size##_hash##hash_enabled \ + ##_lb##lb_hash##_pw##port_pow2, \ + pkt_work_size##dma_size##_hash##hash_enabled \ + ##_lb##lb_hash##_pw##port_pow2, \ + pkt4_work_size##dma_size##_hash##hash_enabled \ + ##_lb##lb_hash##_pw##port_pow2) + +/* Port in AH (dma_size, hash_enabled, lb_hash, port_pow2) */ + +port_in_ah(8, 0, 0, 0) +port_in_ah(8, 1, 0, 0) +port_in_ah_lb(8, 1, 1, 0) +port_in_ah_lb(8, 1, 1, 1) + +port_in_ah(16, 0, 0, 0) +port_in_ah(16, 1, 0, 0) +port_in_ah_lb(16, 1, 1, 0) +port_in_ah_lb(16, 1, 1, 1) + +port_in_ah(24, 0, 0, 0) +port_in_ah(24, 1, 0, 0) +port_in_ah_lb(24, 1, 1, 0) +port_in_ah_lb(24, 1, 1, 1) + +port_in_ah(32, 0, 0, 0) +port_in_ah(32, 1, 0, 0) +port_in_ah_lb(32, 1, 1, 0) +port_in_ah_lb(32, 1, 1, 1) + +port_in_ah(40, 0, 0, 0) +port_in_ah(40, 1, 0, 0) +port_in_ah_lb(40, 1, 1, 0) +port_in_ah_lb(40, 1, 1, 1) + +port_in_ah(48, 0, 0, 0) +port_in_ah(48, 1, 0, 0) +port_in_ah_lb(48, 1, 1, 0) +port_in_ah_lb(48, 1, 1, 1) + +port_in_ah(56, 0, 0, 0) +port_in_ah(56, 1, 0, 0) +port_in_ah_lb(56, 1, 1, 0) +port_in_ah_lb(56, 1, 1, 1) + +port_in_ah(64, 0, 0, 0) +port_in_ah(64, 1, 0, 0) +port_in_ah_lb(64, 1, 1, 0) +port_in_ah_lb(64, 1, 1, 1) + +static rte_pipeline_port_in_action_handler +get_port_in_ah(struct pipeline_passthrough *p) +{ + if (p->params.dma_enabled == 0) + return NULL; + + if (p->params.dma_hash_enabled) { + if (p->params.lb_hash_enabled) { + if (rte_is_power_of_2(p->p.n_ports_out)) + switch (p->params.dma_size) { + + case 8: return port_in_ah_size8_hash1_lb1_pw1; + case 16: return port_in_ah_size16_hash1_lb1_pw1; + case 24: return port_in_ah_size24_hash1_lb1_pw1; + case 32: return port_in_ah_size32_hash1_lb1_pw1; + case 40: return port_in_ah_size40_hash1_lb1_pw1; + case 48: return port_in_ah_size48_hash1_lb1_pw1; + case 56: return port_in_ah_size56_hash1_lb1_pw1; + case 64: return port_in_ah_size64_hash1_lb1_pw1; + default: return NULL; + } + else + switch (p->params.dma_size) { + + case 8: return port_in_ah_size8_hash1_lb1_pw0; + case 16: return port_in_ah_size16_hash1_lb1_pw0; + case 24: return port_in_ah_size24_hash1_lb1_pw0; + case 32: return port_in_ah_size32_hash1_lb1_pw0; + case 40: return port_in_ah_size40_hash1_lb1_pw0; + case 48: return port_in_ah_size48_hash1_lb1_pw0; + case 56: return port_in_ah_size56_hash1_lb1_pw0; + case 64: return port_in_ah_size64_hash1_lb1_pw0; + default: return NULL; + } + } else + switch (p->params.dma_size) { + + case 8: return port_in_ah_size8_hash1_lb0_pw0; + case 16: return port_in_ah_size16_hash1_lb0_pw0; + case 24: return port_in_ah_size24_hash1_lb0_pw0; + case 32: return port_in_ah_size32_hash1_lb0_pw0; + case 40: return port_in_ah_size40_hash1_lb0_pw0; + case 48: return port_in_ah_size48_hash1_lb0_pw0; + case 56: return port_in_ah_size56_hash1_lb0_pw0; + case 64: return port_in_ah_size64_hash1_lb0_pw0; + default: return NULL; + } + } else + switch (p->params.dma_size) { + + case 8: return port_in_ah_size8_hash0_lb0_pw0; + case 16: return port_in_ah_size16_hash0_lb0_pw0; + case 24: return port_in_ah_size24_hash0_lb0_pw0; + case 32: return port_in_ah_size32_hash0_lb0_pw0; + case 40: return port_in_ah_size40_hash0_lb0_pw0; + case 48: return port_in_ah_size48_hash0_lb0_pw0; + case 56: return port_in_ah_size56_hash0_lb0_pw0; + case 64: return port_in_ah_size64_hash0_lb0_pw0; + default: return NULL; + } +} + +int +pipeline_passthrough_parse_args(struct pipeline_passthrough_params *p, + struct pipeline_params *params) +{ + uint32_t dma_dst_offset_present = 0; + uint32_t dma_src_offset_present = 0; + uint32_t dma_src_mask_present = 0; + uint32_t dma_size_present = 0; + uint32_t dma_hash_offset_present = 0; + uint32_t lb_present = 0; + uint32_t i; + char dma_mask_str[PIPELINE_PASSTHROUGH_DMA_SIZE_MAX * 2]; + + /* default values */ + p->dma_enabled = 0; + p->dma_hash_enabled = 0; + p->lb_hash_enabled = 0; + memset(p->dma_src_mask, 0xFF, sizeof(p->dma_src_mask)); + + for (i = 0; i < params->n_args; i++) { + char *arg_name = params->args_name[i]; + char *arg_value = params->args_value[i]; + + /* dma_dst_offset */ + if (strcmp(arg_name, "dma_dst_offset") == 0) { + int status; + + PIPELINE_PARSE_ERR_DUPLICATE( + dma_dst_offset_present == 0, params->name, + arg_name); + dma_dst_offset_present = 1; + + status = parser_read_uint32(&p->dma_dst_offset, + arg_value); + PIPELINE_PARSE_ERR_INV_VAL((status != -EINVAL), + params->name, arg_name, arg_value); + PIPELINE_PARSE_ERR_OUT_RNG((status != -ERANGE), + params->name, arg_name, arg_value); + + p->dma_enabled = 1; + + continue; + } + + /* dma_src_offset */ + if (strcmp(arg_name, "dma_src_offset") == 0) { + int status; + + PIPELINE_PARSE_ERR_DUPLICATE( + dma_src_offset_present == 0, params->name, + arg_name); + dma_src_offset_present = 1; + + status = parser_read_uint32(&p->dma_src_offset, + arg_value); + PIPELINE_PARSE_ERR_INV_VAL((status != -EINVAL), + params->name, arg_name, arg_value); + PIPELINE_PARSE_ERR_OUT_RNG((status != -ERANGE), + params->name, arg_name, arg_value); + + p->dma_enabled = 1; + + continue; + } + + /* dma_size */ + if (strcmp(arg_name, "dma_size") == 0) { + int status; + + PIPELINE_PARSE_ERR_DUPLICATE( + dma_size_present == 0, params->name, + arg_name); + dma_size_present = 1; + + status = parser_read_uint32(&p->dma_size, + arg_value); + PIPELINE_PARSE_ERR_INV_VAL(((status != -EINVAL) && + (p->dma_size != 0) && + ((p->dma_size % 8) == 0)), + params->name, arg_name, arg_value); + PIPELINE_PARSE_ERR_OUT_RNG(((status != -ERANGE) && + (p->dma_size <= + PIPELINE_PASSTHROUGH_DMA_SIZE_MAX)), + params->name, arg_name, arg_value); + + p->dma_enabled = 1; + + continue; + } + + /* dma_src_mask */ + if (strcmp(arg_name, "dma_src_mask") == 0) { + int mask_str_len = strlen(arg_value); + + PIPELINE_PARSE_ERR_DUPLICATE( + dma_src_mask_present == 0, + params->name, arg_name); + dma_src_mask_present = 1; + + PIPELINE_ARG_CHECK((mask_str_len < + (PIPELINE_PASSTHROUGH_DMA_SIZE_MAX * 2)), + "Parse error in section \"%s\": entry " + "\"%s\" too long", params->name, + arg_name); + + snprintf(dma_mask_str, mask_str_len + 1, + "%s", arg_value); + + p->dma_enabled = 1; + + continue; + } + + /* dma_hash_offset */ + if (strcmp(arg_name, "dma_hash_offset") == 0) { + int status; + + PIPELINE_PARSE_ERR_DUPLICATE( + dma_hash_offset_present == 0, + params->name, arg_name); + dma_hash_offset_present = 1; + + status = parser_read_uint32(&p->dma_hash_offset, + arg_value); + PIPELINE_PARSE_ERR_INV_VAL((status != -EINVAL), + params->name, arg_name, arg_value); + PIPELINE_PARSE_ERR_OUT_RNG((status != -ERANGE), + params->name, arg_name, arg_value); + + p->dma_hash_enabled = 1; + p->dma_enabled = 1; + + continue; + } + + /* load_balance mode */ + if (strcmp(arg_name, "lb") == 0) { + PIPELINE_PARSE_ERR_DUPLICATE( + lb_present == 0, + params->name, arg_name); + lb_present = 1; + + if ((strcmp(arg_value, "hash") == 0) || + (strcmp(arg_value, "HASH") == 0)) + p->lb_hash_enabled = 1; + else + PIPELINE_PARSE_ERR_INV_VAL(0, + params->name, + arg_name, + arg_value); + + continue; + } + + /* any other */ + PIPELINE_PARSE_ERR_INV_ENT(0, params->name, arg_name); + } + + /* Check correlations between arguments */ + PIPELINE_ARG_CHECK((dma_dst_offset_present == p->dma_enabled), + "Parse error in section \"%s\": missing entry " + "\"dma_dst_offset\"", params->name); + PIPELINE_ARG_CHECK((dma_src_offset_present == p->dma_enabled), + "Parse error in section \"%s\": missing entry " + "\"dma_src_offset\"", params->name); + PIPELINE_ARG_CHECK((dma_size_present == p->dma_enabled), + "Parse error in section \"%s\": missing entry " + "\"dma_size\"", params->name); + PIPELINE_ARG_CHECK((dma_hash_offset_present == p->dma_enabled), + "Parse error in section \"%s\": missing entry " + "\"dma_hash_offset\"", params->name); + PIPELINE_ARG_CHECK((p->lb_hash_enabled <= p->dma_hash_enabled), + "Parse error in section \"%s\": missing entry " + "\"dma_hash_offset\"", params->name); + + if (dma_src_mask_present) { + uint32_t dma_size = p->dma_size; + int status; + + PIPELINE_ARG_CHECK((strlen(dma_mask_str) == + (dma_size * 2)), "Parse error in section " + "\"%s\": dma_src_mask should have exactly %u hex " + "digits", params->name, (dma_size * 2)); + + status = parse_hex_string(dma_mask_str, p->dma_src_mask, + &p->dma_size); + + PIPELINE_PARSE_ERR_INV_VAL(((status == 0) && + (dma_size == p->dma_size)), params->name, + "dma_src_mask", dma_mask_str); + } + + return 0; +} + + +static rte_table_hash_op_hash +get_hash_function(struct pipeline_passthrough *p) +{ + switch (p->params.dma_size) { + + case 8: return hash_default_key8; + case 16: return hash_default_key16; + case 24: return hash_default_key24; + case 32: return hash_default_key32; + case 40: return hash_default_key40; + case 48: return hash_default_key48; + case 56: return hash_default_key56; + case 64: return hash_default_key64; + default: return NULL; + } +} + +static void* +pipeline_passthrough_init(struct pipeline_params *params, + __rte_unused void *arg) +{ + struct pipeline *p; + struct pipeline_passthrough *p_pt; + uint32_t size, i; + + /* Check input arguments */ + if ((params == NULL) || + (params->n_ports_in == 0) || + (params->n_ports_out == 0) || + (params->n_ports_in < params->n_ports_out) || + (params->n_ports_in % params->n_ports_out)) + return NULL; + + /* Memory allocation */ + size = RTE_CACHE_LINE_ROUNDUP(sizeof(struct pipeline_passthrough)); + p = rte_zmalloc(NULL, size, RTE_CACHE_LINE_SIZE); + p_pt = (struct pipeline_passthrough *) p; + if (p == NULL) + return NULL; + + strcpy(p->name, params->name); + p->log_level = params->log_level; + + PLOG(p, HIGH, "Pass-through"); + + /* Parse arguments */ + if (pipeline_passthrough_parse_args(&p_pt->params, params)) + return NULL; + p_pt->f_hash = get_hash_function(p_pt); + + /* Pipeline */ + { + struct rte_pipeline_params pipeline_params = { + .name = "PASS-THROUGH", + .socket_id = params->socket_id, + .offset_port_id = 0, + }; + + p->p = rte_pipeline_create(&pipeline_params); + if (p->p == NULL) { + rte_free(p); + return NULL; + } + } + + p->n_ports_in = params->n_ports_in; + p->n_ports_out = params->n_ports_out; + p->n_tables = p->n_ports_in; + + /*Input ports*/ + for (i = 0; i < p->n_ports_in; i++) { + struct rte_pipeline_port_in_params port_params = { + .ops = pipeline_port_in_params_get_ops( + ¶ms->port_in[i]), + .arg_create = pipeline_port_in_params_convert( + ¶ms->port_in[i]), + .f_action = get_port_in_ah(p_pt), + .arg_ah = p_pt, + .burst_size = params->port_in[i].burst_size, + }; + + int status = rte_pipeline_port_in_create(p->p, + &port_params, + &p->port_in_id[i]); + + if (status) { + rte_pipeline_free(p->p); + rte_free(p); + return NULL; + } + } + + /* Output ports */ + for (i = 0; i < p->n_ports_out; i++) { + struct rte_pipeline_port_out_params port_params = { + .ops = pipeline_port_out_params_get_ops( + ¶ms->port_out[i]), + .arg_create = pipeline_port_out_params_convert( + ¶ms->port_out[i]), + .f_action = NULL, + .arg_ah = NULL, + }; + + int status = rte_pipeline_port_out_create(p->p, + &port_params, + &p->port_out_id[i]); + + if (status) { + rte_pipeline_free(p->p); + rte_free(p); + return NULL; + } + } + + /* Tables */ + for (i = 0; i < p->n_ports_in; i++) { + struct rte_pipeline_table_params table_params = { + .ops = &rte_table_stub_ops, + .arg_create = NULL, + .f_action_hit = NULL, + .f_action_miss = NULL, + .arg_ah = NULL, + .action_data_size = 0, + }; + + int status = rte_pipeline_table_create(p->p, + &table_params, + &p->table_id[i]); + + if (status) { + rte_pipeline_free(p->p); + rte_free(p); + return NULL; + } + } + + /* Connecting input ports to tables */ + for (i = 0; i < p->n_ports_in; i++) { + int status = rte_pipeline_port_in_connect_to_table(p->p, + p->port_in_id[i], + p->table_id[i]); + + if (status) { + rte_pipeline_free(p->p); + rte_free(p); + return NULL; + } + } + + /* Add entries to tables */ + for (i = 0; i < p->n_ports_in; i++) { + struct rte_pipeline_table_entry default_entry = { + .action = RTE_PIPELINE_ACTION_PORT, + {.port_id = p->port_out_id[ + i / (p->n_ports_in / p->n_ports_out)]}, + }; + + struct rte_pipeline_table_entry *default_entry_ptr; + + int status = rte_pipeline_table_default_entry_add(p->p, + p->table_id[i], + &default_entry, + &default_entry_ptr); + + if (status) { + rte_pipeline_free(p->p); + rte_free(p); + return NULL; + } + } + + /* Enable input ports */ + for (i = 0; i < p->n_ports_in; i++) { + int status = rte_pipeline_port_in_enable(p->p, + p->port_in_id[i]); + + if (status) { + rte_pipeline_free(p->p); + rte_free(p); + return NULL; + } + } + + /* Check pipeline consistency */ + if (rte_pipeline_check(p->p) < 0) { + rte_pipeline_free(p->p); + rte_free(p); + return NULL; + } + + /* Message queues */ + p->n_msgq = params->n_msgq; + for (i = 0; i < p->n_msgq; i++) + p->msgq_in[i] = params->msgq_in[i]; + for (i = 0; i < p->n_msgq; i++) + p->msgq_out[i] = params->msgq_out[i]; + + /* Message handlers */ + memcpy(p->handlers, handlers, sizeof(p->handlers)); + + return p; +} + +static int +pipeline_passthrough_free(void *pipeline) +{ + struct pipeline *p = (struct pipeline *) pipeline; + + /* Check input arguments */ + if (p == NULL) + return -1; + + /* Free resources */ + rte_pipeline_free(p->p); + rte_free(p); + return 0; +} + +static int +pipeline_passthrough_timer(void *pipeline) +{ + struct pipeline *p = (struct pipeline *) pipeline; + + pipeline_msg_req_handle(p); + rte_pipeline_flush(p->p); + + return 0; +} + +static int +pipeline_passthrough_track(void *pipeline, uint32_t port_in, uint32_t *port_out) +{ + struct pipeline *p = (struct pipeline *) pipeline; + + /* Check input arguments */ + if ((p == NULL) || + (port_in >= p->n_ports_in) || + (port_out == NULL)) + return -1; + + *port_out = port_in / p->n_ports_in; + return 0; +} + +struct pipeline_be_ops pipeline_passthrough_be_ops = { + .f_init = pipeline_passthrough_init, + .f_free = pipeline_passthrough_free, + .f_run = NULL, + .f_timer = pipeline_passthrough_timer, + .f_track = pipeline_passthrough_track, +}; diff --git a/examples/ip_pipeline/pipeline/pipeline_passthrough_be.h b/examples/ip_pipeline/pipeline/pipeline_passthrough_be.h new file mode 100644 index 00000000..9368cec7 --- /dev/null +++ b/examples/ip_pipeline/pipeline/pipeline_passthrough_be.h @@ -0,0 +1,59 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_PIPELINE_PASSTHROUGH_BE_H__ +#define __INCLUDE_PIPELINE_PASSTHROUGH_BE_H__ + +#include "pipeline_common_be.h" + +#define PIPELINE_PASSTHROUGH_DMA_SIZE_MAX 64 + +struct pipeline_passthrough_params { + uint32_t dma_enabled; + uint32_t dma_dst_offset; + uint32_t dma_src_offset; + uint8_t dma_src_mask[PIPELINE_PASSTHROUGH_DMA_SIZE_MAX]; + uint32_t dma_size; + + uint32_t dma_hash_enabled; + uint32_t dma_hash_offset; + uint32_t lb_hash_enabled; +}; + +int +pipeline_passthrough_parse_args(struct pipeline_passthrough_params *p, + struct pipeline_params *params); + +extern struct pipeline_be_ops pipeline_passthrough_be_ops; + +#endif diff --git a/examples/ip_pipeline/pipeline/pipeline_routing.c b/examples/ip_pipeline/pipeline/pipeline_routing.c new file mode 100644 index 00000000..eab89f2e --- /dev/null +++ b/examples/ip_pipeline/pipeline/pipeline_routing.c @@ -0,0 +1,2239 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <cmdline_parse.h> +#include <cmdline_parse_num.h> +#include <cmdline_parse_string.h> +#include <cmdline_parse_ipaddr.h> +#include <cmdline_parse_etheraddr.h> + +#include "app.h" +#include "pipeline_common_fe.h" +#include "pipeline_routing.h" + +struct app_pipeline_routing_route { + struct pipeline_routing_route_key key; + struct pipeline_routing_route_data data; + void *entry_ptr; + + TAILQ_ENTRY(app_pipeline_routing_route) node; +}; + +struct app_pipeline_routing_arp_entry { + struct pipeline_routing_arp_key key; + struct ether_addr macaddr; + void *entry_ptr; + + TAILQ_ENTRY(app_pipeline_routing_arp_entry) node; +}; + +struct pipeline_routing { + /* Parameters */ + uint32_t n_ports_in; + uint32_t n_ports_out; + + /* Routes */ + TAILQ_HEAD(, app_pipeline_routing_route) routes; + uint32_t n_routes; + + uint32_t default_route_present; + uint32_t default_route_port_id; + void *default_route_entry_ptr; + + /* ARP entries */ + TAILQ_HEAD(, app_pipeline_routing_arp_entry) arp_entries; + uint32_t n_arp_entries; + + uint32_t default_arp_entry_present; + uint32_t default_arp_entry_port_id; + void *default_arp_entry_ptr; +}; + +static void * +pipeline_routing_init(struct pipeline_params *params, + __rte_unused void *arg) +{ + struct pipeline_routing *p; + uint32_t size; + + /* Check input arguments */ + if ((params == NULL) || + (params->n_ports_in == 0) || + (params->n_ports_out == 0)) + return NULL; + + /* Memory allocation */ + size = RTE_CACHE_LINE_ROUNDUP(sizeof(struct pipeline_routing)); + p = rte_zmalloc(NULL, size, RTE_CACHE_LINE_SIZE); + if (p == NULL) + return NULL; + + /* Initialization */ + p->n_ports_in = params->n_ports_in; + p->n_ports_out = params->n_ports_out; + + TAILQ_INIT(&p->routes); + p->n_routes = 0; + + TAILQ_INIT(&p->arp_entries); + p->n_arp_entries = 0; + + return p; +} + +static int +app_pipeline_routing_free(void *pipeline) +{ + struct pipeline_routing *p = pipeline; + + /* Check input arguments */ + if (p == NULL) + return -1; + + /* Free resources */ + while (!TAILQ_EMPTY(&p->routes)) { + struct app_pipeline_routing_route *route; + + route = TAILQ_FIRST(&p->routes); + TAILQ_REMOVE(&p->routes, route, node); + rte_free(route); + } + + while (!TAILQ_EMPTY(&p->arp_entries)) { + struct app_pipeline_routing_arp_entry *arp_entry; + + arp_entry = TAILQ_FIRST(&p->arp_entries); + TAILQ_REMOVE(&p->arp_entries, arp_entry, node); + rte_free(arp_entry); + } + + rte_free(p); + return 0; +} + +static struct app_pipeline_routing_route * +app_pipeline_routing_find_route(struct pipeline_routing *p, + const struct pipeline_routing_route_key *key) +{ + struct app_pipeline_routing_route *it, *found; + + found = NULL; + TAILQ_FOREACH(it, &p->routes, node) { + if ((key->type == it->key.type) && + (key->key.ipv4.ip == it->key.key.ipv4.ip) && + (key->key.ipv4.depth == it->key.key.ipv4.depth)) { + found = it; + break; + } + } + + return found; +} + +static struct app_pipeline_routing_arp_entry * +app_pipeline_routing_find_arp_entry(struct pipeline_routing *p, + const struct pipeline_routing_arp_key *key) +{ + struct app_pipeline_routing_arp_entry *it, *found; + + found = NULL; + TAILQ_FOREACH(it, &p->arp_entries, node) { + if ((key->type == it->key.type) && + (key->key.ipv4.port_id == it->key.key.ipv4.port_id) && + (key->key.ipv4.ip == it->key.key.ipv4.ip)) { + found = it; + break; + } + } + + return found; +} + +static void +print_route(const struct app_pipeline_routing_route *route) +{ + if (route->key.type == PIPELINE_ROUTING_ROUTE_IPV4) { + const struct pipeline_routing_route_key_ipv4 *key = + &route->key.key.ipv4; + + printf("IP Prefix = %" PRIu32 ".%" PRIu32 + ".%" PRIu32 ".%" PRIu32 "/%" PRIu32 + " => (Port = %" PRIu32, + + (key->ip >> 24) & 0xFF, + (key->ip >> 16) & 0xFF, + (key->ip >> 8) & 0xFF, + key->ip & 0xFF, + + key->depth, + route->data.port_id); + + if (route->data.flags & PIPELINE_ROUTING_ROUTE_ARP) + printf( + ", Next Hop IP = %" PRIu32 ".%" PRIu32 + ".%" PRIu32 ".%" PRIu32, + + (route->data.ethernet.ip >> 24) & 0xFF, + (route->data.ethernet.ip >> 16) & 0xFF, + (route->data.ethernet.ip >> 8) & 0xFF, + route->data.ethernet.ip & 0xFF); + else + printf( + ", Next Hop HWaddress = %02" PRIx32 + ":%02" PRIx32 ":%02" PRIx32 + ":%02" PRIx32 ":%02" PRIx32 + ":%02" PRIx32, + + route->data.ethernet.macaddr.addr_bytes[0], + route->data.ethernet.macaddr.addr_bytes[1], + route->data.ethernet.macaddr.addr_bytes[2], + route->data.ethernet.macaddr.addr_bytes[3], + route->data.ethernet.macaddr.addr_bytes[4], + route->data.ethernet.macaddr.addr_bytes[5]); + + if (route->data.flags & PIPELINE_ROUTING_ROUTE_QINQ) + printf(", QinQ SVLAN = %" PRIu32 " CVLAN = %" PRIu32, + route->data.l2.qinq.svlan, + route->data.l2.qinq.cvlan); + + if (route->data.flags & PIPELINE_ROUTING_ROUTE_MPLS) { + uint32_t i; + + printf(", MPLS labels"); + for (i = 0; i < route->data.l2.mpls.n_labels; i++) + printf(" %" PRIu32, + route->data.l2.mpls.labels[i]); + } + + printf(")\n"); + } +} + +static void +print_arp_entry(const struct app_pipeline_routing_arp_entry *entry) +{ + printf("(Port = %" PRIu32 ", IP = %" PRIu32 ".%" PRIu32 + ".%" PRIu32 ".%" PRIu32 + ") => HWaddress = %02" PRIx32 ":%02" PRIx32 ":%02" PRIx32 + ":%02" PRIx32 ":%02" PRIx32 ":%02" PRIx32 "\n", + + entry->key.key.ipv4.port_id, + (entry->key.key.ipv4.ip >> 24) & 0xFF, + (entry->key.key.ipv4.ip >> 16) & 0xFF, + (entry->key.key.ipv4.ip >> 8) & 0xFF, + entry->key.key.ipv4.ip & 0xFF, + + entry->macaddr.addr_bytes[0], + entry->macaddr.addr_bytes[1], + entry->macaddr.addr_bytes[2], + entry->macaddr.addr_bytes[3], + entry->macaddr.addr_bytes[4], + entry->macaddr.addr_bytes[5]); +} + +static int +app_pipeline_routing_route_ls(struct app_params *app, uint32_t pipeline_id) +{ + struct pipeline_routing *p; + struct app_pipeline_routing_route *it; + + p = app_pipeline_data_fe(app, pipeline_id, &pipeline_routing); + if (p == NULL) + return -EINVAL; + + TAILQ_FOREACH(it, &p->routes, node) + print_route(it); + + if (p->default_route_present) + printf("Default route: port %" PRIu32 " (entry ptr = %p)\n", + p->default_route_port_id, + p->default_route_entry_ptr); + else + printf("Default: DROP\n"); + + return 0; +} + +int +app_pipeline_routing_add_route(struct app_params *app, + uint32_t pipeline_id, + struct pipeline_routing_route_key *key, + struct pipeline_routing_route_data *data) +{ + struct pipeline_routing *p; + + struct pipeline_routing_route_add_msg_req *req; + struct pipeline_routing_route_add_msg_rsp *rsp; + + struct app_pipeline_routing_route *entry; + + int new_entry; + + /* Check input arguments */ + if ((app == NULL) || + (key == NULL) || + (data == NULL)) + return -1; + + p = app_pipeline_data_fe(app, pipeline_id, &pipeline_routing); + if (p == NULL) + return -1; + + switch (key->type) { + case PIPELINE_ROUTING_ROUTE_IPV4: + { + uint32_t depth = key->key.ipv4.depth; + uint32_t netmask; + + /* key */ + if ((depth == 0) || (depth > 32)) + return -1; + + netmask = (~0U) << (32 - depth); + key->key.ipv4.ip &= netmask; + + /* data */ + if (data->port_id >= p->n_ports_out) + return -1; + } + break; + + default: + return -1; + } + + /* Find existing rule or allocate new rule */ + entry = app_pipeline_routing_find_route(p, key); + new_entry = (entry == NULL); + if (entry == NULL) { + entry = rte_malloc(NULL, sizeof(*entry), RTE_CACHE_LINE_SIZE); + + if (entry == NULL) + return -1; + } + + /* Allocate and write request */ + req = app_msg_alloc(app); + if (req == NULL) { + if (new_entry) + rte_free(entry); + return -1; + } + + req->type = PIPELINE_MSG_REQ_CUSTOM; + req->subtype = PIPELINE_ROUTING_MSG_REQ_ROUTE_ADD; + memcpy(&req->key, key, sizeof(*key)); + memcpy(&req->data, data, sizeof(*data)); + + rsp = app_msg_send_recv(app, pipeline_id, req, MSG_TIMEOUT_DEFAULT); + if (rsp == NULL) { + if (new_entry) + rte_free(entry); + return -1; + } + + /* Read response and write entry */ + if (rsp->status || + (rsp->entry_ptr == NULL) || + ((new_entry == 0) && (rsp->key_found == 0)) || + ((new_entry == 1) && (rsp->key_found == 1))) { + app_msg_free(app, rsp); + if (new_entry) + rte_free(entry); + return -1; + } + + memcpy(&entry->key, key, sizeof(*key)); + memcpy(&entry->data, data, sizeof(*data)); + entry->entry_ptr = rsp->entry_ptr; + + /* Commit entry */ + if (new_entry) { + TAILQ_INSERT_TAIL(&p->routes, entry, node); + p->n_routes++; + } + + print_route(entry); + + /* Message buffer free */ + app_msg_free(app, rsp); + return 0; +} + +int +app_pipeline_routing_delete_route(struct app_params *app, + uint32_t pipeline_id, + struct pipeline_routing_route_key *key) +{ + struct pipeline_routing *p; + + struct pipeline_routing_route_delete_msg_req *req; + struct pipeline_routing_route_delete_msg_rsp *rsp; + + struct app_pipeline_routing_route *entry; + + /* Check input arguments */ + if ((app == NULL) || + (key == NULL)) + return -1; + + p = app_pipeline_data_fe(app, pipeline_id, &pipeline_routing); + if (p == NULL) + return -1; + + switch (key->type) { + case PIPELINE_ROUTING_ROUTE_IPV4: + { + uint32_t depth = key->key.ipv4.depth; + uint32_t netmask; + + /* key */ + if ((depth == 0) || (depth > 32)) + return -1; + + netmask = (~0U) << (32 - depth); + key->key.ipv4.ip &= netmask; + } + break; + + default: + return -1; + } + + /* Find rule */ + entry = app_pipeline_routing_find_route(p, key); + if (entry == NULL) + return 0; + + /* Allocate and write request */ + req = app_msg_alloc(app); + if (req == NULL) + return -1; + + req->type = PIPELINE_MSG_REQ_CUSTOM; + req->subtype = PIPELINE_ROUTING_MSG_REQ_ROUTE_DEL; + memcpy(&req->key, key, sizeof(*key)); + + rsp = app_msg_send_recv(app, pipeline_id, req, MSG_TIMEOUT_DEFAULT); + if (rsp == NULL) + return -1; + + /* Read response */ + if (rsp->status || !rsp->key_found) { + app_msg_free(app, rsp); + return -1; + } + + /* Remove route */ + TAILQ_REMOVE(&p->routes, entry, node); + p->n_routes--; + rte_free(entry); + + /* Free response */ + app_msg_free(app, rsp); + + return 0; +} + +int +app_pipeline_routing_add_default_route(struct app_params *app, + uint32_t pipeline_id, + uint32_t port_id) +{ + struct pipeline_routing *p; + + struct pipeline_routing_route_add_default_msg_req *req; + struct pipeline_routing_route_add_default_msg_rsp *rsp; + + /* Check input arguments */ + if (app == NULL) + return -1; + + p = app_pipeline_data_fe(app, pipeline_id, &pipeline_routing); + if (p == NULL) + return -1; + + if (port_id >= p->n_ports_out) + return -1; + + /* Allocate and write request */ + req = app_msg_alloc(app); + if (req == NULL) + return -1; + + req->type = PIPELINE_MSG_REQ_CUSTOM; + req->subtype = PIPELINE_ROUTING_MSG_REQ_ROUTE_ADD_DEFAULT; + req->port_id = port_id; + + /* Send request and wait for response */ + rsp = app_msg_send_recv(app, pipeline_id, req, MSG_TIMEOUT_DEFAULT); + if (rsp == NULL) + return -1; + + /* Read response and write route */ + if (rsp->status || (rsp->entry_ptr == NULL)) { + app_msg_free(app, rsp); + return -1; + } + + p->default_route_port_id = port_id; + p->default_route_entry_ptr = rsp->entry_ptr; + + /* Commit route */ + p->default_route_present = 1; + + /* Free response */ + app_msg_free(app, rsp); + + return 0; +} + +int +app_pipeline_routing_delete_default_route(struct app_params *app, + uint32_t pipeline_id) +{ + struct pipeline_routing *p; + + struct pipeline_routing_arp_delete_default_msg_req *req; + struct pipeline_routing_arp_delete_default_msg_rsp *rsp; + + /* Check input arguments */ + if (app == NULL) + return -1; + + p = app_pipeline_data_fe(app, pipeline_id, &pipeline_routing); + if (p == NULL) + return -1; + + /* Allocate and write request */ + req = app_msg_alloc(app); + if (req == NULL) + return -1; + + req->type = PIPELINE_MSG_REQ_CUSTOM; + req->subtype = PIPELINE_ROUTING_MSG_REQ_ROUTE_DEL_DEFAULT; + + /* Send request and wait for response */ + rsp = app_msg_send_recv(app, pipeline_id, req, MSG_TIMEOUT_DEFAULT); + if (rsp == NULL) + return -1; + + /* Read response and write route */ + if (rsp->status) { + app_msg_free(app, rsp); + return -1; + } + + /* Commit route */ + p->default_route_present = 0; + + /* Free response */ + app_msg_free(app, rsp); + + return 0; +} + +static int +app_pipeline_routing_arp_ls(struct app_params *app, uint32_t pipeline_id) +{ + struct pipeline_routing *p; + struct app_pipeline_routing_arp_entry *it; + + p = app_pipeline_data_fe(app, pipeline_id, &pipeline_routing); + if (p == NULL) + return -EINVAL; + + TAILQ_FOREACH(it, &p->arp_entries, node) + print_arp_entry(it); + + if (p->default_arp_entry_present) + printf("Default entry: port %" PRIu32 " (entry ptr = %p)\n", + p->default_arp_entry_port_id, + p->default_arp_entry_ptr); + else + printf("Default: DROP\n"); + + return 0; +} + +int +app_pipeline_routing_add_arp_entry(struct app_params *app, uint32_t pipeline_id, + struct pipeline_routing_arp_key *key, + struct ether_addr *macaddr) +{ + struct pipeline_routing *p; + + struct pipeline_routing_arp_add_msg_req *req; + struct pipeline_routing_arp_add_msg_rsp *rsp; + + struct app_pipeline_routing_arp_entry *entry; + + int new_entry; + + /* Check input arguments */ + if ((app == NULL) || + (key == NULL) || + (macaddr == NULL)) + return -1; + + p = app_pipeline_data_fe(app, pipeline_id, &pipeline_routing); + if (p == NULL) + return -1; + + switch (key->type) { + case PIPELINE_ROUTING_ARP_IPV4: + { + uint32_t port_id = key->key.ipv4.port_id; + + /* key */ + if (port_id >= p->n_ports_out) + return -1; + } + break; + + default: + return -1; + } + + /* Find existing entry or allocate new */ + entry = app_pipeline_routing_find_arp_entry(p, key); + new_entry = (entry == NULL); + if (entry == NULL) { + entry = rte_malloc(NULL, sizeof(*entry), RTE_CACHE_LINE_SIZE); + + if (entry == NULL) + return -1; + } + + /* Message buffer allocation */ + req = app_msg_alloc(app); + if (req == NULL) { + if (new_entry) + rte_free(entry); + return -1; + } + + req->type = PIPELINE_MSG_REQ_CUSTOM; + req->subtype = PIPELINE_ROUTING_MSG_REQ_ARP_ADD; + memcpy(&req->key, key, sizeof(*key)); + ether_addr_copy(macaddr, &req->macaddr); + + /* Send request and wait for response */ + rsp = app_msg_send_recv(app, pipeline_id, req, MSG_TIMEOUT_DEFAULT); + if (rsp == NULL) { + if (new_entry) + rte_free(entry); + return -1; + } + + /* Read response and write entry */ + if (rsp->status || + (rsp->entry_ptr == NULL) || + ((new_entry == 0) && (rsp->key_found == 0)) || + ((new_entry == 1) && (rsp->key_found == 1))) { + app_msg_free(app, rsp); + if (new_entry) + rte_free(entry); + return -1; + } + + memcpy(&entry->key, key, sizeof(*key)); + ether_addr_copy(macaddr, &entry->macaddr); + entry->entry_ptr = rsp->entry_ptr; + + /* Commit entry */ + if (new_entry) { + TAILQ_INSERT_TAIL(&p->arp_entries, entry, node); + p->n_arp_entries++; + } + + print_arp_entry(entry); + + /* Message buffer free */ + app_msg_free(app, rsp); + return 0; +} + +int +app_pipeline_routing_delete_arp_entry(struct app_params *app, + uint32_t pipeline_id, + struct pipeline_routing_arp_key *key) +{ + struct pipeline_routing *p; + + struct pipeline_routing_arp_delete_msg_req *req; + struct pipeline_routing_arp_delete_msg_rsp *rsp; + + struct app_pipeline_routing_arp_entry *entry; + + /* Check input arguments */ + if ((app == NULL) || + (key == NULL)) + return -1; + + p = app_pipeline_data_fe(app, pipeline_id, &pipeline_routing); + if (p == NULL) + return -EINVAL; + + switch (key->type) { + case PIPELINE_ROUTING_ARP_IPV4: + { + uint32_t port_id = key->key.ipv4.port_id; + + /* key */ + if (port_id >= p->n_ports_out) + return -1; + } + break; + + default: + return -1; + } + + /* Find rule */ + entry = app_pipeline_routing_find_arp_entry(p, key); + if (entry == NULL) + return 0; + + /* Allocate and write request */ + req = app_msg_alloc(app); + if (req == NULL) + return -1; + + req->type = PIPELINE_MSG_REQ_CUSTOM; + req->subtype = PIPELINE_ROUTING_MSG_REQ_ARP_DEL; + memcpy(&req->key, key, sizeof(*key)); + + rsp = app_msg_send_recv(app, pipeline_id, req, MSG_TIMEOUT_DEFAULT); + if (rsp == NULL) + return -1; + + /* Read response */ + if (rsp->status || !rsp->key_found) { + app_msg_free(app, rsp); + return -1; + } + + /* Remove entry */ + TAILQ_REMOVE(&p->arp_entries, entry, node); + p->n_arp_entries--; + rte_free(entry); + + /* Free response */ + app_msg_free(app, rsp); + + return 0; +} + +int +app_pipeline_routing_add_default_arp_entry(struct app_params *app, + uint32_t pipeline_id, + uint32_t port_id) +{ + struct pipeline_routing *p; + + struct pipeline_routing_arp_add_default_msg_req *req; + struct pipeline_routing_arp_add_default_msg_rsp *rsp; + + /* Check input arguments */ + if (app == NULL) + return -1; + + p = app_pipeline_data_fe(app, pipeline_id, &pipeline_routing); + if (p == NULL) + return -1; + + if (port_id >= p->n_ports_out) + return -1; + + /* Allocate and write request */ + req = app_msg_alloc(app); + if (req == NULL) + return -1; + + req->type = PIPELINE_MSG_REQ_CUSTOM; + req->subtype = PIPELINE_ROUTING_MSG_REQ_ARP_ADD_DEFAULT; + req->port_id = port_id; + + /* Send request and wait for response */ + rsp = app_msg_send_recv(app, pipeline_id, req, MSG_TIMEOUT_DEFAULT); + if (rsp == NULL) + return -1; + + /* Read response and write entry */ + if (rsp->status || rsp->entry_ptr == NULL) { + app_msg_free(app, rsp); + return -1; + } + + p->default_arp_entry_port_id = port_id; + p->default_arp_entry_ptr = rsp->entry_ptr; + + /* Commit entry */ + p->default_arp_entry_present = 1; + + /* Free response */ + app_msg_free(app, rsp); + + return 0; +} + +int +app_pipeline_routing_delete_default_arp_entry(struct app_params *app, + uint32_t pipeline_id) +{ + struct pipeline_routing *p; + + struct pipeline_routing_arp_delete_default_msg_req *req; + struct pipeline_routing_arp_delete_default_msg_rsp *rsp; + + /* Check input arguments */ + if (app == NULL) + return -1; + + p = app_pipeline_data_fe(app, pipeline_id, &pipeline_routing); + if (p == NULL) + return -EINVAL; + + /* Allocate and write request */ + req = app_msg_alloc(app); + if (req == NULL) + return -ENOMEM; + + req->type = PIPELINE_MSG_REQ_CUSTOM; + req->subtype = PIPELINE_ROUTING_MSG_REQ_ARP_DEL_DEFAULT; + + /* Send request and wait for response */ + rsp = app_msg_send_recv(app, pipeline_id, req, MSG_TIMEOUT_DEFAULT); + if (rsp == NULL) + return -ETIMEDOUT; + + /* Read response and write entry */ + if (rsp->status) { + app_msg_free(app, rsp); + return rsp->status; + } + + /* Commit entry */ + p->default_arp_entry_present = 0; + + /* Free response */ + app_msg_free(app, rsp); + + return 0; +} + +static int +parse_labels(char *string, uint32_t *labels, uint32_t *n_labels) +{ + uint32_t n_max_labels = *n_labels, count = 0; + + /* Check for void list of labels */ + if (strcmp(string, "<void>") == 0) { + *n_labels = 0; + return 0; + } + + /* At least one label should be present */ + for ( ; (*string != '\0'); ) { + char *next; + int value; + + if (count >= n_max_labels) + return -1; + + if (count > 0) { + if (string[0] != ':') + return -1; + + string++; + } + + value = strtol(string, &next, 10); + if (next == string) + return -1; + string = next; + + labels[count++] = (uint32_t) value; + } + + *n_labels = count; + return 0; +} + +/* + * route add (mpls = no, qinq = no, arp = no) + */ + +struct cmd_route_add1_result { + cmdline_fixed_string_t p_string; + uint32_t p; + cmdline_fixed_string_t route_string; + cmdline_fixed_string_t add_string; + cmdline_ipaddr_t ip; + uint32_t depth; + cmdline_fixed_string_t port_string; + uint32_t port; + cmdline_fixed_string_t ether_string; + struct ether_addr macaddr; +}; + +static void +cmd_route_add1_parsed( + void *parsed_result, + __rte_unused struct cmdline *cl, + void *data) +{ + struct cmd_route_add1_result *params = parsed_result; + struct app_params *app = data; + struct pipeline_routing_route_key key; + struct pipeline_routing_route_data route_data; + int status; + + /* Create route */ + key.type = PIPELINE_ROUTING_ROUTE_IPV4; + key.key.ipv4.ip = rte_bswap32((uint32_t) params->ip.addr.ipv4.s_addr); + key.key.ipv4.depth = params->depth; + + route_data.flags = 0; + route_data.port_id = params->port; + route_data.ethernet.macaddr = params->macaddr; + + status = app_pipeline_routing_add_route(app, + params->p, + &key, + &route_data); + + if (status != 0) { + printf("Command failed\n"); + return; + } +} + +static cmdline_parse_token_string_t cmd_route_add1_p_string = + TOKEN_STRING_INITIALIZER(struct cmd_route_add1_result, p_string, + "p"); + +static cmdline_parse_token_num_t cmd_route_add1_p = + TOKEN_NUM_INITIALIZER(struct cmd_route_add1_result, p, UINT32); + +static cmdline_parse_token_string_t cmd_route_add1_route_string = + TOKEN_STRING_INITIALIZER(struct cmd_route_add1_result, route_string, + "route"); + +static cmdline_parse_token_string_t cmd_route_add1_add_string = + TOKEN_STRING_INITIALIZER(struct cmd_route_add1_result, add_string, + "add"); + +static cmdline_parse_token_ipaddr_t cmd_route_add1_ip = + TOKEN_IPV4_INITIALIZER(struct cmd_route_add1_result, ip); + +static cmdline_parse_token_num_t cmd_route_add1_depth = + TOKEN_NUM_INITIALIZER(struct cmd_route_add1_result, depth, UINT32); + +static cmdline_parse_token_string_t cmd_route_add1_port_string = + TOKEN_STRING_INITIALIZER(struct cmd_route_add1_result, port_string, + "port"); + +static cmdline_parse_token_num_t cmd_route_add1_port = + TOKEN_NUM_INITIALIZER(struct cmd_route_add1_result, port, UINT32); + +static cmdline_parse_token_string_t cmd_route_add1_ether_string = + TOKEN_STRING_INITIALIZER(struct cmd_route_add1_result, ether_string, + "ether"); + +static cmdline_parse_token_etheraddr_t cmd_route_add1_macaddr = + TOKEN_ETHERADDR_INITIALIZER(struct cmd_route_add1_result, macaddr); + +static cmdline_parse_inst_t cmd_route_add1 = { + .f = cmd_route_add1_parsed, + .data = NULL, + .help_str = "Route add (mpls = no, qinq = no, arp = no)", + .tokens = { + (void *)&cmd_route_add1_p_string, + (void *)&cmd_route_add1_p, + (void *)&cmd_route_add1_route_string, + (void *)&cmd_route_add1_add_string, + (void *)&cmd_route_add1_ip, + (void *)&cmd_route_add1_depth, + (void *)&cmd_route_add1_port_string, + (void *)&cmd_route_add1_port, + (void *)&cmd_route_add1_ether_string, + (void *)&cmd_route_add1_macaddr, + NULL, + }, +}; + +/* + * route add (mpls = no, qinq = no, arp = yes) + */ + +struct cmd_route_add2_result { + cmdline_fixed_string_t p_string; + uint32_t p; + cmdline_fixed_string_t route_string; + cmdline_fixed_string_t add_string; + cmdline_ipaddr_t ip; + uint32_t depth; + cmdline_fixed_string_t port_string; + uint32_t port; + cmdline_fixed_string_t ether_string; + cmdline_ipaddr_t nh_ip; +}; + +static void +cmd_route_add2_parsed( + void *parsed_result, + __rte_unused struct cmdline *cl, + void *data) +{ + struct cmd_route_add2_result *params = parsed_result; + struct app_params *app = data; + struct pipeline_routing_route_key key; + struct pipeline_routing_route_data route_data; + int status; + + /* Create route */ + key.type = PIPELINE_ROUTING_ROUTE_IPV4; + key.key.ipv4.ip = rte_bswap32((uint32_t) params->ip.addr.ipv4.s_addr); + key.key.ipv4.depth = params->depth; + + route_data.flags = PIPELINE_ROUTING_ROUTE_ARP; + route_data.port_id = params->port; + route_data.ethernet.ip = + rte_bswap32((uint32_t) params->nh_ip.addr.ipv4.s_addr); + + status = app_pipeline_routing_add_route(app, + params->p, + &key, + &route_data); + + if (status != 0) { + printf("Command failed\n"); + return; + } +} + +static cmdline_parse_token_string_t cmd_route_add2_p_string = + TOKEN_STRING_INITIALIZER(struct cmd_route_add2_result, p_string, + "p"); + +static cmdline_parse_token_num_t cmd_route_add2_p = + TOKEN_NUM_INITIALIZER(struct cmd_route_add2_result, p, UINT32); + +static cmdline_parse_token_string_t cmd_route_add2_route_string = + TOKEN_STRING_INITIALIZER(struct cmd_route_add2_result, route_string, + "route"); + +static cmdline_parse_token_string_t cmd_route_add2_add_string = + TOKEN_STRING_INITIALIZER(struct cmd_route_add2_result, add_string, + "add"); + +static cmdline_parse_token_ipaddr_t cmd_route_add2_ip = + TOKEN_IPV4_INITIALIZER(struct cmd_route_add2_result, ip); + +static cmdline_parse_token_num_t cmd_route_add2_depth = + TOKEN_NUM_INITIALIZER(struct cmd_route_add2_result, depth, UINT32); + +static cmdline_parse_token_string_t cmd_route_add2_port_string = + TOKEN_STRING_INITIALIZER(struct cmd_route_add2_result, port_string, + "port"); + +static cmdline_parse_token_num_t cmd_route_add2_port = + TOKEN_NUM_INITIALIZER(struct cmd_route_add2_result, port, UINT32); + +static cmdline_parse_token_string_t cmd_route_add2_ether_string = + TOKEN_STRING_INITIALIZER(struct cmd_route_add2_result, ether_string, + "ether"); + +static cmdline_parse_token_ipaddr_t cmd_route_add2_nh_ip = + TOKEN_IPV4_INITIALIZER(struct cmd_route_add2_result, nh_ip); + +static cmdline_parse_inst_t cmd_route_add2 = { + .f = cmd_route_add2_parsed, + .data = NULL, + .help_str = "Route add (mpls = no, qinq = no, arp = yes)", + .tokens = { + (void *)&cmd_route_add2_p_string, + (void *)&cmd_route_add2_p, + (void *)&cmd_route_add2_route_string, + (void *)&cmd_route_add2_add_string, + (void *)&cmd_route_add2_ip, + (void *)&cmd_route_add2_depth, + (void *)&cmd_route_add2_port_string, + (void *)&cmd_route_add2_port, + (void *)&cmd_route_add2_ether_string, + (void *)&cmd_route_add2_nh_ip, + NULL, + }, +}; + +/* + * route add (mpls = no, qinq = yes, arp = no) + */ + +struct cmd_route_add3_result { + cmdline_fixed_string_t p_string; + uint32_t p; + cmdline_fixed_string_t route_string; + cmdline_fixed_string_t add_string; + cmdline_ipaddr_t ip; + uint32_t depth; + cmdline_fixed_string_t port_string; + uint32_t port; + cmdline_fixed_string_t ether_string; + struct ether_addr macaddr; + cmdline_fixed_string_t qinq_string; + uint32_t svlan; + uint32_t cvlan; +}; + +static void +cmd_route_add3_parsed( + void *parsed_result, + __rte_unused struct cmdline *cl, + void *data) +{ + struct cmd_route_add3_result *params = parsed_result; + struct app_params *app = data; + struct pipeline_routing_route_key key; + struct pipeline_routing_route_data route_data; + int status; + + /* Create route */ + key.type = PIPELINE_ROUTING_ROUTE_IPV4; + key.key.ipv4.ip = rte_bswap32((uint32_t) params->ip.addr.ipv4.s_addr); + key.key.ipv4.depth = params->depth; + + route_data.flags = PIPELINE_ROUTING_ROUTE_QINQ; + route_data.port_id = params->port; + route_data.ethernet.macaddr = params->macaddr; + route_data.l2.qinq.svlan = params->svlan; + route_data.l2.qinq.cvlan = params->cvlan; + + status = app_pipeline_routing_add_route(app, + params->p, + &key, + &route_data); + + if (status != 0) { + printf("Command failed\n"); + return; + } +} + +static cmdline_parse_token_string_t cmd_route_add3_p_string = + TOKEN_STRING_INITIALIZER(struct cmd_route_add3_result, p_string, + "p"); + +static cmdline_parse_token_num_t cmd_route_add3_p = + TOKEN_NUM_INITIALIZER(struct cmd_route_add3_result, p, UINT32); + +static cmdline_parse_token_string_t cmd_route_add3_route_string = + TOKEN_STRING_INITIALIZER(struct cmd_route_add3_result, route_string, + "route"); + +static cmdline_parse_token_string_t cmd_route_add3_add_string = + TOKEN_STRING_INITIALIZER(struct cmd_route_add3_result, add_string, + "add"); + +static cmdline_parse_token_ipaddr_t cmd_route_add3_ip = + TOKEN_IPV4_INITIALIZER(struct cmd_route_add3_result, ip); + +static cmdline_parse_token_num_t cmd_route_add3_depth = + TOKEN_NUM_INITIALIZER(struct cmd_route_add3_result, depth, UINT32); + +static cmdline_parse_token_string_t cmd_route_add3_port_string = + TOKEN_STRING_INITIALIZER(struct cmd_route_add3_result, port_string, + "port"); + +static cmdline_parse_token_num_t cmd_route_add3_port = + TOKEN_NUM_INITIALIZER(struct cmd_route_add3_result, port, UINT32); + +static cmdline_parse_token_string_t cmd_route_add3_ether_string = + TOKEN_STRING_INITIALIZER(struct cmd_route_add3_result, ether_string, + "ether"); + +static cmdline_parse_token_etheraddr_t cmd_route_add3_macaddr = + TOKEN_ETHERADDR_INITIALIZER(struct cmd_route_add3_result, macaddr); + +static cmdline_parse_token_string_t cmd_route_add3_qinq_string = + TOKEN_STRING_INITIALIZER(struct cmd_route_add3_result, qinq_string, + "qinq"); + +static cmdline_parse_token_num_t cmd_route_add3_svlan = + TOKEN_NUM_INITIALIZER(struct cmd_route_add3_result, svlan, UINT32); + +static cmdline_parse_token_num_t cmd_route_add3_cvlan = + TOKEN_NUM_INITIALIZER(struct cmd_route_add3_result, cvlan, UINT32); + +static cmdline_parse_inst_t cmd_route_add3 = { + .f = cmd_route_add3_parsed, + .data = NULL, + .help_str = "Route add (qinq = yes, arp = no)", + .tokens = { + (void *)&cmd_route_add3_p_string, + (void *)&cmd_route_add3_p, + (void *)&cmd_route_add3_route_string, + (void *)&cmd_route_add3_add_string, + (void *)&cmd_route_add3_ip, + (void *)&cmd_route_add3_depth, + (void *)&cmd_route_add3_port_string, + (void *)&cmd_route_add3_port, + (void *)&cmd_route_add3_ether_string, + (void *)&cmd_route_add3_macaddr, + (void *)&cmd_route_add3_qinq_string, + (void *)&cmd_route_add3_svlan, + (void *)&cmd_route_add3_cvlan, + NULL, + }, +}; + +/* + * route add (mpls = no, qinq = yes, arp = yes) + */ + +struct cmd_route_add4_result { + cmdline_fixed_string_t p_string; + uint32_t p; + cmdline_fixed_string_t route_string; + cmdline_fixed_string_t add_string; + cmdline_ipaddr_t ip; + uint32_t depth; + cmdline_fixed_string_t port_string; + uint32_t port; + cmdline_fixed_string_t ether_string; + cmdline_ipaddr_t nh_ip; + cmdline_fixed_string_t qinq_string; + uint32_t svlan; + uint32_t cvlan; +}; + +static void +cmd_route_add4_parsed( + void *parsed_result, + __rte_unused struct cmdline *cl, + void *data) +{ + struct cmd_route_add4_result *params = parsed_result; + struct app_params *app = data; + struct pipeline_routing_route_key key; + struct pipeline_routing_route_data route_data; + int status; + + /* Create route */ + key.type = PIPELINE_ROUTING_ROUTE_IPV4; + key.key.ipv4.ip = rte_bswap32((uint32_t) params->ip.addr.ipv4.s_addr); + key.key.ipv4.depth = params->depth; + + route_data.flags = PIPELINE_ROUTING_ROUTE_QINQ | + PIPELINE_ROUTING_ROUTE_ARP; + route_data.port_id = params->port; + route_data.ethernet.ip = + rte_bswap32((uint32_t) params->nh_ip.addr.ipv4.s_addr); + route_data.l2.qinq.svlan = params->svlan; + route_data.l2.qinq.cvlan = params->cvlan; + + status = app_pipeline_routing_add_route(app, + params->p, + &key, + &route_data); + + if (status != 0) { + printf("Command failed\n"); + return; + } +} + +static cmdline_parse_token_string_t cmd_route_add4_p_string = + TOKEN_STRING_INITIALIZER(struct cmd_route_add4_result, p_string, + "p"); + +static cmdline_parse_token_num_t cmd_route_add4_p = + TOKEN_NUM_INITIALIZER(struct cmd_route_add4_result, p, UINT32); + +static cmdline_parse_token_string_t cmd_route_add4_route_string = + TOKEN_STRING_INITIALIZER(struct cmd_route_add4_result, route_string, + "route"); + +static cmdline_parse_token_string_t cmd_route_add4_add_string = + TOKEN_STRING_INITIALIZER(struct cmd_route_add4_result, add_string, + "add"); + +static cmdline_parse_token_ipaddr_t cmd_route_add4_ip = + TOKEN_IPV4_INITIALIZER(struct cmd_route_add4_result, ip); + +static cmdline_parse_token_num_t cmd_route_add4_depth = + TOKEN_NUM_INITIALIZER(struct cmd_route_add4_result, depth, UINT32); + +static cmdline_parse_token_string_t cmd_route_add4_port_string = + TOKEN_STRING_INITIALIZER(struct cmd_route_add4_result, port_string, + "port"); + +static cmdline_parse_token_num_t cmd_route_add4_port = + TOKEN_NUM_INITIALIZER(struct cmd_route_add4_result, port, UINT32); + +static cmdline_parse_token_string_t cmd_route_add4_ether_string = + TOKEN_STRING_INITIALIZER(struct cmd_route_add4_result, ether_string, + "ether"); + +static cmdline_parse_token_ipaddr_t cmd_route_add4_nh_ip = + TOKEN_IPV4_INITIALIZER(struct cmd_route_add4_result, nh_ip); + +static cmdline_parse_token_string_t cmd_route_add4_qinq_string = + TOKEN_STRING_INITIALIZER(struct cmd_route_add4_result, qinq_string, + "qinq"); + +static cmdline_parse_token_num_t cmd_route_add4_svlan = + TOKEN_NUM_INITIALIZER(struct cmd_route_add4_result, svlan, UINT32); + +static cmdline_parse_token_num_t cmd_route_add4_cvlan = + TOKEN_NUM_INITIALIZER(struct cmd_route_add4_result, cvlan, UINT32); + +static cmdline_parse_inst_t cmd_route_add4 = { + .f = cmd_route_add4_parsed, + .data = NULL, + .help_str = "Route add (qinq = yes, arp = yes)", + .tokens = { + (void *)&cmd_route_add4_p_string, + (void *)&cmd_route_add4_p, + (void *)&cmd_route_add4_route_string, + (void *)&cmd_route_add4_add_string, + (void *)&cmd_route_add4_ip, + (void *)&cmd_route_add4_depth, + (void *)&cmd_route_add4_port_string, + (void *)&cmd_route_add4_port, + (void *)&cmd_route_add4_ether_string, + (void *)&cmd_route_add4_nh_ip, + (void *)&cmd_route_add4_qinq_string, + (void *)&cmd_route_add4_svlan, + (void *)&cmd_route_add4_cvlan, + NULL, + }, +}; + +/* + * route add (mpls = yes, qinq = no, arp = no) + */ + +struct cmd_route_add5_result { + cmdline_fixed_string_t p_string; + uint32_t p; + cmdline_fixed_string_t route_string; + cmdline_fixed_string_t add_string; + cmdline_ipaddr_t ip; + uint32_t depth; + cmdline_fixed_string_t port_string; + uint32_t port; + cmdline_fixed_string_t ether_string; + struct ether_addr macaddr; + cmdline_fixed_string_t mpls_string; + cmdline_fixed_string_t mpls_labels; +}; + +static void +cmd_route_add5_parsed( + void *parsed_result, + __rte_unused struct cmdline *cl, + void *data) +{ + struct cmd_route_add5_result *params = parsed_result; + struct app_params *app = data; + struct pipeline_routing_route_key key; + struct pipeline_routing_route_data route_data; + uint32_t mpls_labels[PIPELINE_ROUTING_MPLS_LABELS_MAX]; + uint32_t n_labels = RTE_DIM(mpls_labels); + uint32_t i; + int status; + + /* Parse MPLS labels */ + status = parse_labels(params->mpls_labels, mpls_labels, &n_labels); + if (status) { + printf("MPLS labels parse error\n"); + return; + } + + /* Create route */ + key.type = PIPELINE_ROUTING_ROUTE_IPV4; + key.key.ipv4.ip = rte_bswap32((uint32_t) params->ip.addr.ipv4.s_addr); + key.key.ipv4.depth = params->depth; + + route_data.flags = PIPELINE_ROUTING_ROUTE_MPLS; + route_data.port_id = params->port; + route_data.ethernet.macaddr = params->macaddr; + for (i = 0; i < n_labels; i++) + route_data.l2.mpls.labels[i] = mpls_labels[i]; + route_data.l2.mpls.n_labels = n_labels; + + status = app_pipeline_routing_add_route(app, + params->p, + &key, + &route_data); + + if (status != 0) { + printf("Command failed\n"); + return; + } +} + +static cmdline_parse_token_string_t cmd_route_add5_p_string = + TOKEN_STRING_INITIALIZER(struct cmd_route_add5_result, p_string, + "p"); + +static cmdline_parse_token_num_t cmd_route_add5_p = + TOKEN_NUM_INITIALIZER(struct cmd_route_add5_result, p, UINT32); + +static cmdline_parse_token_string_t cmd_route_add5_route_string = + TOKEN_STRING_INITIALIZER(struct cmd_route_add5_result, route_string, + "route"); + +static cmdline_parse_token_string_t cmd_route_add5_add_string = + TOKEN_STRING_INITIALIZER(struct cmd_route_add5_result, add_string, + "add"); + +static cmdline_parse_token_ipaddr_t cmd_route_add5_ip = + TOKEN_IPV4_INITIALIZER(struct cmd_route_add5_result, ip); + +static cmdline_parse_token_num_t cmd_route_add5_depth = + TOKEN_NUM_INITIALIZER(struct cmd_route_add5_result, depth, UINT32); + +static cmdline_parse_token_string_t cmd_route_add5_port_string = + TOKEN_STRING_INITIALIZER(struct cmd_route_add5_result, port_string, + "port"); + +static cmdline_parse_token_num_t cmd_route_add5_port = + TOKEN_NUM_INITIALIZER(struct cmd_route_add5_result, port, UINT32); + +static cmdline_parse_token_string_t cmd_route_add5_ether_string = + TOKEN_STRING_INITIALIZER(struct cmd_route_add5_result, ether_string, + "ether"); + +static cmdline_parse_token_etheraddr_t cmd_route_add5_macaddr = + TOKEN_ETHERADDR_INITIALIZER(struct cmd_route_add5_result, macaddr); + +static cmdline_parse_token_string_t cmd_route_add5_mpls_string = + TOKEN_STRING_INITIALIZER(struct cmd_route_add5_result, mpls_string, + "mpls"); + +static cmdline_parse_token_string_t cmd_route_add5_mpls_labels = + TOKEN_STRING_INITIALIZER(struct cmd_route_add5_result, mpls_labels, + NULL); + +static cmdline_parse_inst_t cmd_route_add5 = { + .f = cmd_route_add5_parsed, + .data = NULL, + .help_str = "Route add (mpls = yes, arp = no)", + .tokens = { + (void *)&cmd_route_add5_p_string, + (void *)&cmd_route_add5_p, + (void *)&cmd_route_add5_route_string, + (void *)&cmd_route_add5_add_string, + (void *)&cmd_route_add5_ip, + (void *)&cmd_route_add5_depth, + (void *)&cmd_route_add5_port_string, + (void *)&cmd_route_add5_port, + (void *)&cmd_route_add5_ether_string, + (void *)&cmd_route_add5_macaddr, + (void *)&cmd_route_add5_mpls_string, + (void *)&cmd_route_add5_mpls_labels, + NULL, + }, +}; + +/* + * route add (mpls = yes, qinq = no, arp = yes) + */ + +struct cmd_route_add6_result { + cmdline_fixed_string_t p_string; + uint32_t p; + cmdline_fixed_string_t route_string; + cmdline_fixed_string_t add_string; + cmdline_ipaddr_t ip; + uint32_t depth; + cmdline_fixed_string_t port_string; + uint32_t port; + cmdline_fixed_string_t ether_string; + cmdline_ipaddr_t nh_ip; + cmdline_fixed_string_t mpls_string; + cmdline_fixed_string_t mpls_labels; +}; + +static void +cmd_route_add6_parsed( + void *parsed_result, + __rte_unused struct cmdline *cl, + void *data) +{ + struct cmd_route_add6_result *params = parsed_result; + struct app_params *app = data; + struct pipeline_routing_route_key key; + struct pipeline_routing_route_data route_data; + uint32_t mpls_labels[PIPELINE_ROUTING_MPLS_LABELS_MAX]; + uint32_t n_labels = RTE_DIM(mpls_labels); + uint32_t i; + int status; + + /* Parse MPLS labels */ + status = parse_labels(params->mpls_labels, mpls_labels, &n_labels); + if (status) { + printf("MPLS labels parse error\n"); + return; + } + + /* Create route */ + key.type = PIPELINE_ROUTING_ROUTE_IPV4; + key.key.ipv4.ip = rte_bswap32((uint32_t) params->ip.addr.ipv4.s_addr); + key.key.ipv4.depth = params->depth; + + route_data.flags = PIPELINE_ROUTING_ROUTE_MPLS | + PIPELINE_ROUTING_ROUTE_ARP; + route_data.port_id = params->port; + route_data.ethernet.ip = + rte_bswap32((uint32_t) params->nh_ip.addr.ipv4.s_addr); + for (i = 0; i < n_labels; i++) + route_data.l2.mpls.labels[i] = mpls_labels[i]; + route_data.l2.mpls.n_labels = n_labels; + + status = app_pipeline_routing_add_route(app, + params->p, + &key, + &route_data); + + if (status != 0) { + printf("Command failed\n"); + return; + } +} + +static cmdline_parse_token_string_t cmd_route_add6_p_string = + TOKEN_STRING_INITIALIZER(struct cmd_route_add6_result, p_string, + "p"); + +static cmdline_parse_token_num_t cmd_route_add6_p = + TOKEN_NUM_INITIALIZER(struct cmd_route_add6_result, p, UINT32); + +static cmdline_parse_token_string_t cmd_route_add6_route_string = + TOKEN_STRING_INITIALIZER(struct cmd_route_add6_result, route_string, + "route"); + +static cmdline_parse_token_string_t cmd_route_add6_add_string = + TOKEN_STRING_INITIALIZER(struct cmd_route_add6_result, add_string, + "add"); + +static cmdline_parse_token_ipaddr_t cmd_route_add6_ip = + TOKEN_IPV4_INITIALIZER(struct cmd_route_add6_result, ip); + +static cmdline_parse_token_num_t cmd_route_add6_depth = + TOKEN_NUM_INITIALIZER(struct cmd_route_add6_result, depth, UINT32); + +static cmdline_parse_token_string_t cmd_route_add6_port_string = + TOKEN_STRING_INITIALIZER(struct cmd_route_add6_result, port_string, + "port"); + +static cmdline_parse_token_num_t cmd_route_add6_port = + TOKEN_NUM_INITIALIZER(struct cmd_route_add6_result, port, UINT32); + +static cmdline_parse_token_string_t cmd_route_add6_ether_string = + TOKEN_STRING_INITIALIZER(struct cmd_route_add6_result, ether_string, + "ether"); + +static cmdline_parse_token_ipaddr_t cmd_route_add6_nh_ip = + TOKEN_IPV4_INITIALIZER(struct cmd_route_add6_result, nh_ip); + +static cmdline_parse_token_string_t cmd_route_add6_mpls_string = + TOKEN_STRING_INITIALIZER(struct cmd_route_add6_result, mpls_string, + "mpls"); + +static cmdline_parse_token_string_t cmd_route_add6_mpls_labels = + TOKEN_STRING_INITIALIZER(struct cmd_route_add6_result, mpls_labels, + NULL); + +static cmdline_parse_inst_t cmd_route_add6 = { + .f = cmd_route_add6_parsed, + .data = NULL, + .help_str = "Route add (mpls = yes, arp = yes)", + .tokens = { + (void *)&cmd_route_add6_p_string, + (void *)&cmd_route_add6_p, + (void *)&cmd_route_add6_route_string, + (void *)&cmd_route_add6_add_string, + (void *)&cmd_route_add6_ip, + (void *)&cmd_route_add6_depth, + (void *)&cmd_route_add6_port_string, + (void *)&cmd_route_add6_port, + (void *)&cmd_route_add6_ether_string, + (void *)&cmd_route_add6_nh_ip, + (void *)&cmd_route_add6_mpls_string, + (void *)&cmd_route_add6_mpls_labels, + NULL, + }, +}; + +/* + * route del + */ + +struct cmd_route_del_result { + cmdline_fixed_string_t p_string; + uint32_t p; + cmdline_fixed_string_t route_string; + cmdline_fixed_string_t del_string; + cmdline_ipaddr_t ip; + uint32_t depth; +}; + +static void +cmd_route_del_parsed( + void *parsed_result, + __rte_unused struct cmdline *cl, + void *data) +{ + struct cmd_route_del_result *params = parsed_result; + struct app_params *app = data; + struct pipeline_routing_route_key key; + + int status; + + /* Create route */ + key.type = PIPELINE_ROUTING_ROUTE_IPV4; + key.key.ipv4.ip = rte_bswap32((uint32_t) params->ip.addr.ipv4.s_addr); + key.key.ipv4.depth = params->depth; + + status = app_pipeline_routing_delete_route(app, params->p, &key); + + if (status != 0) { + printf("Command failed\n"); + return; + } +} + +static cmdline_parse_token_string_t cmd_route_del_p_string = + TOKEN_STRING_INITIALIZER(struct cmd_route_del_result, p_string, + "p"); + +static cmdline_parse_token_num_t cmd_route_del_p = + TOKEN_NUM_INITIALIZER(struct cmd_route_del_result, p, UINT32); + +static cmdline_parse_token_string_t cmd_route_del_route_string = + TOKEN_STRING_INITIALIZER(struct cmd_route_del_result, route_string, + "route"); + +static cmdline_parse_token_string_t cmd_route_del_del_string = + TOKEN_STRING_INITIALIZER(struct cmd_route_del_result, del_string, + "del"); + +static cmdline_parse_token_ipaddr_t cmd_route_del_ip = + TOKEN_IPV4_INITIALIZER(struct cmd_route_del_result, ip); + +static cmdline_parse_token_num_t cmd_route_del_depth = + TOKEN_NUM_INITIALIZER(struct cmd_route_del_result, depth, UINT32); + +static cmdline_parse_inst_t cmd_route_del = { + .f = cmd_route_del_parsed, + .data = NULL, + .help_str = "Route delete", + .tokens = { + (void *)&cmd_route_del_p_string, + (void *)&cmd_route_del_p, + (void *)&cmd_route_del_route_string, + (void *)&cmd_route_del_del_string, + (void *)&cmd_route_del_ip, + (void *)&cmd_route_del_depth, + NULL, + }, +}; + +/* + * route add default + */ + +struct cmd_route_add_default_result { + cmdline_fixed_string_t p_string; + uint32_t p; + cmdline_fixed_string_t route_string; + cmdline_fixed_string_t add_string; + cmdline_fixed_string_t default_string; + uint32_t port; +}; + +static void +cmd_route_add_default_parsed( + void *parsed_result, + __attribute__((unused)) struct cmdline *cl, + void *data) +{ + struct cmd_route_add_default_result *params = parsed_result; + struct app_params *app = data; + int status; + + status = app_pipeline_routing_add_default_route(app, params->p, + params->port); + + if (status != 0) { + printf("Command failed\n"); + return; + } +} + +static cmdline_parse_token_string_t cmd_route_add_default_p_string = + TOKEN_STRING_INITIALIZER(struct cmd_route_add_default_result, p_string, + "p"); + +static cmdline_parse_token_num_t cmd_route_add_default_p = + TOKEN_NUM_INITIALIZER(struct cmd_route_add_default_result, p, UINT32); + +cmdline_parse_token_string_t cmd_route_add_default_route_string = + TOKEN_STRING_INITIALIZER(struct cmd_route_add_default_result, + route_string, "route"); + +cmdline_parse_token_string_t cmd_route_add_default_add_string = + TOKEN_STRING_INITIALIZER(struct cmd_route_add_default_result, + add_string, "add"); + +cmdline_parse_token_string_t cmd_route_add_default_default_string = + TOKEN_STRING_INITIALIZER(struct cmd_route_add_default_result, + default_string, "default"); + +cmdline_parse_token_num_t cmd_route_add_default_port = + TOKEN_NUM_INITIALIZER(struct cmd_route_add_default_result, + port, UINT32); + +cmdline_parse_inst_t cmd_route_add_default = { + .f = cmd_route_add_default_parsed, + .data = NULL, + .help_str = "Route default set", + .tokens = { + (void *)&cmd_route_add_default_p_string, + (void *)&cmd_route_add_default_p, + (void *)&cmd_route_add_default_route_string, + (void *)&cmd_route_add_default_add_string, + (void *)&cmd_route_add_default_default_string, + (void *)&cmd_route_add_default_port, + NULL, + }, +}; + +/* + * route del default + */ + +struct cmd_route_del_default_result { + cmdline_fixed_string_t p_string; + uint32_t p; + cmdline_fixed_string_t route_string; + cmdline_fixed_string_t del_string; + cmdline_fixed_string_t default_string; +}; + +static void +cmd_route_del_default_parsed( + void *parsed_result, + __rte_unused struct cmdline *cl, + void *data) +{ + struct cmd_route_del_default_result *params = parsed_result; + struct app_params *app = data; + int status; + + status = app_pipeline_routing_delete_default_route(app, params->p); + + if (status != 0) { + printf("Command failed\n"); + return; + } +} + +static cmdline_parse_token_string_t cmd_route_del_default_p_string = + TOKEN_STRING_INITIALIZER(struct cmd_route_del_default_result, p_string, + "p"); + +static cmdline_parse_token_num_t cmd_route_del_default_p = + TOKEN_NUM_INITIALIZER(struct cmd_route_del_default_result, p, UINT32); + +static cmdline_parse_token_string_t cmd_route_del_default_route_string = + TOKEN_STRING_INITIALIZER(struct cmd_route_del_default_result, + route_string, "route"); + +static cmdline_parse_token_string_t cmd_route_del_default_del_string = + TOKEN_STRING_INITIALIZER(struct cmd_route_del_default_result, + del_string, "del"); + +static cmdline_parse_token_string_t cmd_route_del_default_default_string = + TOKEN_STRING_INITIALIZER(struct cmd_route_del_default_result, + default_string, "default"); + + +static cmdline_parse_inst_t cmd_route_del_default = { + .f = cmd_route_del_default_parsed, + .data = NULL, + .help_str = "Route default clear", + .tokens = { + (void *)&cmd_route_del_default_p_string, + (void *)&cmd_route_del_default_p, + (void *)&cmd_route_del_default_route_string, + (void *)&cmd_route_del_default_del_string, + (void *)&cmd_route_del_default_default_string, + NULL, + }, +}; + +/* + * route ls + */ + +struct cmd_route_ls_result { + cmdline_fixed_string_t p_string; + uint32_t p; + cmdline_fixed_string_t route_string; + cmdline_fixed_string_t ls_string; +}; + +static void +cmd_route_ls_parsed( + void *parsed_result, + __rte_unused struct cmdline *cl, + void *data) +{ + struct cmd_route_ls_result *params = parsed_result; + struct app_params *app = data; + int status; + + status = app_pipeline_routing_route_ls(app, params->p); + + if (status != 0) { + printf("Command failed\n"); + return; + } +} + +static cmdline_parse_token_string_t cmd_route_ls_p_string = + TOKEN_STRING_INITIALIZER(struct cmd_route_ls_result, p_string, "p"); + +static cmdline_parse_token_num_t cmd_route_ls_p = + TOKEN_NUM_INITIALIZER(struct cmd_route_ls_result, p, UINT32); + +static cmdline_parse_token_string_t cmd_route_ls_route_string = + TOKEN_STRING_INITIALIZER(struct cmd_route_ls_result, + route_string, "route"); + +static cmdline_parse_token_string_t cmd_route_ls_ls_string = + TOKEN_STRING_INITIALIZER(struct cmd_route_ls_result, ls_string, + "ls"); + +static cmdline_parse_inst_t cmd_route_ls = { + .f = cmd_route_ls_parsed, + .data = NULL, + .help_str = "Route list", + .tokens = { + (void *)&cmd_route_ls_p_string, + (void *)&cmd_route_ls_p, + (void *)&cmd_route_ls_route_string, + (void *)&cmd_route_ls_ls_string, + NULL, + }, +}; + +/* + * arp add + */ + +struct cmd_arp_add_result { + cmdline_fixed_string_t p_string; + uint32_t p; + cmdline_fixed_string_t arp_string; + cmdline_fixed_string_t add_string; + uint32_t port_id; + cmdline_ipaddr_t ip; + struct ether_addr macaddr; + +}; + +static void +cmd_arp_add_parsed( + void *parsed_result, + __rte_unused struct cmdline *cl, + void *data) +{ + struct cmd_arp_add_result *params = parsed_result; + struct app_params *app = data; + + struct pipeline_routing_arp_key key; + int status; + + key.type = PIPELINE_ROUTING_ARP_IPV4; + key.key.ipv4.port_id = params->port_id; + key.key.ipv4.ip = rte_cpu_to_be_32(params->ip.addr.ipv4.s_addr); + + status = app_pipeline_routing_add_arp_entry(app, + params->p, + &key, + ¶ms->macaddr); + + if (status != 0) { + printf("Command failed\n"); + return; + } +} + +static cmdline_parse_token_string_t cmd_arp_add_p_string = + TOKEN_STRING_INITIALIZER(struct cmd_arp_add_result, p_string, + "p"); + +static cmdline_parse_token_num_t cmd_arp_add_p = + TOKEN_NUM_INITIALIZER(struct cmd_arp_add_result, p, UINT32); + +static cmdline_parse_token_string_t cmd_arp_add_arp_string = + TOKEN_STRING_INITIALIZER(struct cmd_arp_add_result, arp_string, "arp"); + +static cmdline_parse_token_string_t cmd_arp_add_add_string = + TOKEN_STRING_INITIALIZER(struct cmd_arp_add_result, add_string, "add"); + +static cmdline_parse_token_num_t cmd_arp_add_port_id = + TOKEN_NUM_INITIALIZER(struct cmd_arp_add_result, port_id, UINT32); + +static cmdline_parse_token_ipaddr_t cmd_arp_add_ip = + TOKEN_IPV4_INITIALIZER(struct cmd_arp_add_result, ip); + +static cmdline_parse_token_etheraddr_t cmd_arp_add_macaddr = + TOKEN_ETHERADDR_INITIALIZER(struct cmd_arp_add_result, macaddr); + +static cmdline_parse_inst_t cmd_arp_add = { + .f = cmd_arp_add_parsed, + .data = NULL, + .help_str = "ARP add", + .tokens = { + (void *)&cmd_arp_add_p_string, + (void *)&cmd_arp_add_p, + (void *)&cmd_arp_add_arp_string, + (void *)&cmd_arp_add_add_string, + (void *)&cmd_arp_add_port_id, + (void *)&cmd_arp_add_ip, + (void *)&cmd_arp_add_macaddr, + NULL, + }, +}; + +/* + * arp del + */ + +struct cmd_arp_del_result { + cmdline_fixed_string_t p_string; + uint32_t p; + cmdline_fixed_string_t arp_string; + cmdline_fixed_string_t del_string; + uint32_t port_id; + cmdline_ipaddr_t ip; +}; + +static void +cmd_arp_del_parsed( + void *parsed_result, + __rte_unused struct cmdline *cl, + void *data) +{ + struct cmd_arp_del_result *params = parsed_result; + struct app_params *app = data; + + struct pipeline_routing_arp_key key; + int status; + + key.type = PIPELINE_ROUTING_ARP_IPV4; + key.key.ipv4.ip = rte_cpu_to_be_32(params->ip.addr.ipv4.s_addr); + key.key.ipv4.port_id = params->port_id; + + status = app_pipeline_routing_delete_arp_entry(app, params->p, &key); + + if (status != 0) { + printf("Command failed\n"); + return; + } +} + +static cmdline_parse_token_string_t cmd_arp_del_p_string = + TOKEN_STRING_INITIALIZER(struct cmd_arp_del_result, p_string, + "p"); + +static cmdline_parse_token_num_t cmd_arp_del_p = + TOKEN_NUM_INITIALIZER(struct cmd_arp_del_result, p, UINT32); + +static cmdline_parse_token_string_t cmd_arp_del_arp_string = + TOKEN_STRING_INITIALIZER(struct cmd_arp_del_result, arp_string, "arp"); + +static cmdline_parse_token_string_t cmd_arp_del_del_string = + TOKEN_STRING_INITIALIZER(struct cmd_arp_del_result, del_string, "del"); + +static cmdline_parse_token_num_t cmd_arp_del_port_id = + TOKEN_NUM_INITIALIZER(struct cmd_arp_del_result, port_id, UINT32); + +static cmdline_parse_token_ipaddr_t cmd_arp_del_ip = + TOKEN_IPV4_INITIALIZER(struct cmd_arp_del_result, ip); + +static cmdline_parse_inst_t cmd_arp_del = { + .f = cmd_arp_del_parsed, + .data = NULL, + .help_str = "ARP delete", + .tokens = { + (void *)&cmd_arp_del_p_string, + (void *)&cmd_arp_del_p, + (void *)&cmd_arp_del_arp_string, + (void *)&cmd_arp_del_del_string, + (void *)&cmd_arp_del_port_id, + (void *)&cmd_arp_del_ip, + NULL, + }, +}; + +/* + * arp add default + */ + +struct cmd_arp_add_default_result { + cmdline_fixed_string_t p_string; + uint32_t p; + cmdline_fixed_string_t arp_string; + cmdline_fixed_string_t add_string; + cmdline_fixed_string_t default_string; + uint32_t port_id; +}; + +static void +cmd_arp_add_default_parsed( + void *parsed_result, + __rte_unused struct cmdline *cl, + void *data) +{ + struct cmd_arp_add_default_result *params = parsed_result; + struct app_params *app = data; + + int status; + + status = app_pipeline_routing_add_default_arp_entry(app, + params->p, + params->port_id); + + if (status != 0) { + printf("Command failed\n"); + return; + } +} + +static cmdline_parse_token_string_t cmd_arp_add_default_p_string = + TOKEN_STRING_INITIALIZER(struct cmd_arp_add_default_result, p_string, + "p"); + +static cmdline_parse_token_num_t cmd_arp_add_default_p = + TOKEN_NUM_INITIALIZER(struct cmd_arp_add_default_result, p, UINT32); + +static cmdline_parse_token_string_t cmd_arp_add_default_arp_string = + TOKEN_STRING_INITIALIZER(struct cmd_arp_add_default_result, arp_string, + "arp"); + +static cmdline_parse_token_string_t cmd_arp_add_default_add_string = + TOKEN_STRING_INITIALIZER(struct cmd_arp_add_default_result, add_string, + "add"); + +static cmdline_parse_token_string_t cmd_arp_add_default_default_string = + TOKEN_STRING_INITIALIZER(struct cmd_arp_add_default_result, + default_string, "default"); + +static cmdline_parse_token_num_t cmd_arp_add_default_port_id = + TOKEN_NUM_INITIALIZER(struct cmd_arp_add_default_result, port_id, + UINT32); + +static cmdline_parse_inst_t cmd_arp_add_default = { + .f = cmd_arp_add_default_parsed, + .data = NULL, + .help_str = "ARP add default", + .tokens = { + (void *)&cmd_arp_add_default_p_string, + (void *)&cmd_arp_add_default_p, + (void *)&cmd_arp_add_default_arp_string, + (void *)&cmd_arp_add_default_add_string, + (void *)&cmd_arp_add_default_default_string, + (void *)&cmd_arp_add_default_port_id, + NULL, + }, +}; + +/* + * arp del default + */ + +struct cmd_arp_del_default_result { + cmdline_fixed_string_t p_string; + uint32_t p; + cmdline_fixed_string_t arp_string; + cmdline_fixed_string_t del_string; + cmdline_fixed_string_t default_string; +}; + +static void +cmd_arp_del_default_parsed( + void *parsed_result, + __rte_unused struct cmdline *cl, + void *data) +{ + struct cmd_arp_del_default_result *params = parsed_result; + struct app_params *app = data; + + int status; + + status = app_pipeline_routing_delete_default_arp_entry(app, params->p); + + if (status != 0) { + printf("Command failed\n"); + return; + } +} + +static cmdline_parse_token_string_t cmd_arp_del_default_p_string = + TOKEN_STRING_INITIALIZER(struct cmd_arp_del_default_result, p_string, + "p"); + +static cmdline_parse_token_num_t cmd_arp_del_default_p = + TOKEN_NUM_INITIALIZER(struct cmd_arp_del_default_result, p, UINT32); + +static cmdline_parse_token_string_t cmd_arp_del_default_arp_string = + TOKEN_STRING_INITIALIZER(struct cmd_arp_del_default_result, arp_string, + "arp"); + +static cmdline_parse_token_string_t cmd_arp_del_default_del_string = + TOKEN_STRING_INITIALIZER(struct cmd_arp_del_default_result, del_string, + "del"); + +static cmdline_parse_token_string_t cmd_arp_del_default_default_string = + TOKEN_STRING_INITIALIZER(struct cmd_arp_del_default_result, + default_string, "default"); + +static cmdline_parse_inst_t cmd_arp_del_default = { + .f = cmd_arp_del_default_parsed, + .data = NULL, + .help_str = "ARP delete default", + .tokens = { + (void *)&cmd_arp_del_default_p_string, + (void *)&cmd_arp_del_default_p, + (void *)&cmd_arp_del_default_arp_string, + (void *)&cmd_arp_del_default_del_string, + (void *)&cmd_arp_del_default_default_string, + NULL, + }, +}; + +/* + * arp ls + */ + +struct cmd_arp_ls_result { + cmdline_fixed_string_t p_string; + uint32_t p; + cmdline_fixed_string_t arp_string; + cmdline_fixed_string_t ls_string; +}; + +static void +cmd_arp_ls_parsed( + void *parsed_result, + __rte_unused struct cmdline *cl, + void *data) +{ + struct cmd_arp_ls_result *params = parsed_result; + struct app_params *app = data; + struct pipeline_routing *p; + + p = app_pipeline_data_fe(app, params->p, &pipeline_routing); + if (p == NULL) + return; + + app_pipeline_routing_arp_ls(app, params->p); +} + +static cmdline_parse_token_string_t cmd_arp_ls_p_string = + TOKEN_STRING_INITIALIZER(struct cmd_arp_ls_result, p_string, + "p"); + +static cmdline_parse_token_num_t cmd_arp_ls_p = + TOKEN_NUM_INITIALIZER(struct cmd_arp_ls_result, p, UINT32); + +static cmdline_parse_token_string_t cmd_arp_ls_arp_string = + TOKEN_STRING_INITIALIZER(struct cmd_arp_ls_result, arp_string, + "arp"); + +static cmdline_parse_token_string_t cmd_arp_ls_ls_string = + TOKEN_STRING_INITIALIZER(struct cmd_arp_ls_result, ls_string, + "ls"); + +static cmdline_parse_inst_t cmd_arp_ls = { + .f = cmd_arp_ls_parsed, + .data = NULL, + .help_str = "ARP list", + .tokens = { + (void *)&cmd_arp_ls_p_string, + (void *)&cmd_arp_ls_p, + (void *)&cmd_arp_ls_arp_string, + (void *)&cmd_arp_ls_ls_string, + NULL, + }, +}; + +static cmdline_parse_ctx_t pipeline_cmds[] = { + (cmdline_parse_inst_t *)&cmd_route_add1, + (cmdline_parse_inst_t *)&cmd_route_add2, + (cmdline_parse_inst_t *)&cmd_route_add3, + (cmdline_parse_inst_t *)&cmd_route_add4, + (cmdline_parse_inst_t *)&cmd_route_add5, + (cmdline_parse_inst_t *)&cmd_route_add6, + (cmdline_parse_inst_t *)&cmd_route_del, + (cmdline_parse_inst_t *)&cmd_route_add_default, + (cmdline_parse_inst_t *)&cmd_route_del_default, + (cmdline_parse_inst_t *)&cmd_route_ls, + (cmdline_parse_inst_t *)&cmd_arp_add, + (cmdline_parse_inst_t *)&cmd_arp_del, + (cmdline_parse_inst_t *)&cmd_arp_add_default, + (cmdline_parse_inst_t *)&cmd_arp_del_default, + (cmdline_parse_inst_t *)&cmd_arp_ls, + NULL, +}; + +static struct pipeline_fe_ops pipeline_routing_fe_ops = { + .f_init = pipeline_routing_init, + .f_free = app_pipeline_routing_free, + .cmds = pipeline_cmds, +}; + +struct pipeline_type pipeline_routing = { + .name = "ROUTING", + .be_ops = &pipeline_routing_be_ops, + .fe_ops = &pipeline_routing_fe_ops, +}; diff --git a/examples/ip_pipeline/pipeline/pipeline_routing.h b/examples/ip_pipeline/pipeline/pipeline_routing.h new file mode 100644 index 00000000..fa41642b --- /dev/null +++ b/examples/ip_pipeline/pipeline/pipeline_routing.h @@ -0,0 +1,93 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_PIPELINE_ROUTING_H__ +#define __INCLUDE_PIPELINE_ROUTING_H__ + +#include "pipeline.h" +#include "pipeline_routing_be.h" + +/* + * Route + */ + +int +app_pipeline_routing_add_route(struct app_params *app, + uint32_t pipeline_id, + struct pipeline_routing_route_key *key, + struct pipeline_routing_route_data *data); + +int +app_pipeline_routing_delete_route(struct app_params *app, + uint32_t pipeline_id, + struct pipeline_routing_route_key *key); + +int +app_pipeline_routing_add_default_route(struct app_params *app, + uint32_t pipeline_id, + uint32_t port_id); + +int +app_pipeline_routing_delete_default_route(struct app_params *app, + uint32_t pipeline_id); + +/* + * ARP + */ + +int +app_pipeline_routing_add_arp_entry(struct app_params *app, + uint32_t pipeline_id, + struct pipeline_routing_arp_key *key, + struct ether_addr *macaddr); + +int +app_pipeline_routing_delete_arp_entry(struct app_params *app, + uint32_t pipeline_id, + struct pipeline_routing_arp_key *key); + +int +app_pipeline_routing_add_default_arp_entry(struct app_params *app, + uint32_t pipeline_id, + uint32_t port_id); + +int +app_pipeline_routing_delete_default_arp_entry(struct app_params *app, + uint32_t pipeline_id); + +/* + * Pipeline type + */ +extern struct pipeline_type pipeline_routing; + +#endif diff --git a/examples/ip_pipeline/pipeline/pipeline_routing_be.c b/examples/ip_pipeline/pipeline/pipeline_routing_be.c new file mode 100644 index 00000000..bc5bf7a5 --- /dev/null +++ b/examples/ip_pipeline/pipeline/pipeline_routing_be.c @@ -0,0 +1,1970 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <string.h> +#include <unistd.h> + +#include <rte_common.h> +#include <rte_malloc.h> +#include <rte_ip.h> +#include <rte_byteorder.h> +#include <rte_table_lpm.h> +#include <rte_table_hash.h> +#include <rte_pipeline.h> + +#include "pipeline_routing_be.h" +#include "pipeline_actions_common.h" +#include "parser.h" +#include "hash_func.h" + +#define MPLS_LABEL(label, exp, s, ttl) \ + (((((uint64_t) (label)) & 0xFFFFFLLU) << 12) | \ + ((((uint64_t) (exp)) & 0x7LLU) << 9) | \ + ((((uint64_t) (s)) & 0x1LLU) << 8) | \ + (((uint64_t) (ttl)) & 0xFFLU)) + +#define RTE_SCHED_PORT_HIERARCHY(subport, pipe, \ + traffic_class, queue, color) \ + ((((uint64_t) (queue)) & 0x3) | \ + ((((uint64_t) (traffic_class)) & 0x3) << 2) | \ + ((((uint64_t) (color)) & 0x3) << 4) | \ + ((((uint64_t) (subport)) & 0xFFFF) << 16) | \ + ((((uint64_t) (pipe)) & 0xFFFFFFFF) << 32)) + + +#define MAC_SRC_DEFAULT 0x112233445566ULL + +#ifndef PIPELINE_ROUTING_LPM_TABLE_NUMBER_TABLE8s +#define PIPELINE_ROUTING_LPM_TABLE_NUMBER_TABLE8s 256 +#endif + +struct pipeline_routing { + struct pipeline p; + struct pipeline_routing_params params; + pipeline_msg_req_handler custom_handlers[PIPELINE_ROUTING_MSG_REQS]; +} __rte_cache_aligned; + +/* + * Message handlers + */ +static void * +pipeline_routing_msg_req_custom_handler(struct pipeline *p, void *msg); + +static pipeline_msg_req_handler handlers[] = { + [PIPELINE_MSG_REQ_PING] = + pipeline_msg_req_ping_handler, + [PIPELINE_MSG_REQ_STATS_PORT_IN] = + pipeline_msg_req_stats_port_in_handler, + [PIPELINE_MSG_REQ_STATS_PORT_OUT] = + pipeline_msg_req_stats_port_out_handler, + [PIPELINE_MSG_REQ_STATS_TABLE] = + pipeline_msg_req_stats_table_handler, + [PIPELINE_MSG_REQ_PORT_IN_ENABLE] = + pipeline_msg_req_port_in_enable_handler, + [PIPELINE_MSG_REQ_PORT_IN_DISABLE] = + pipeline_msg_req_port_in_disable_handler, + [PIPELINE_MSG_REQ_CUSTOM] = + pipeline_routing_msg_req_custom_handler, +}; + +static void * +pipeline_routing_msg_req_route_add_handler(struct pipeline *p, + void *msg); + +static void * +pipeline_routing_msg_req_route_del_handler(struct pipeline *p, + void *msg); + +static void * +pipeline_routing_msg_req_route_add_default_handler(struct pipeline *p, + void *msg); + +static void * +pipeline_routing_msg_req_route_del_default_handler(struct pipeline *p, + void *msg); + +static void * +pipeline_routing_msg_req_arp_add_handler(struct pipeline *p, + void *msg); + +static void * +pipeline_routing_msg_req_arp_del_handler(struct pipeline *p, + void *msg); + +static void * +pipeline_routing_msg_req_arp_add_default_handler(struct pipeline *p, + void *msg); + +static void * +pipeline_routing_msg_req_arp_del_default_handler(struct pipeline *p, + void *msg); + +static pipeline_msg_req_handler custom_handlers[] = { + [PIPELINE_ROUTING_MSG_REQ_ROUTE_ADD] = + pipeline_routing_msg_req_route_add_handler, + [PIPELINE_ROUTING_MSG_REQ_ROUTE_DEL] = + pipeline_routing_msg_req_route_del_handler, + [PIPELINE_ROUTING_MSG_REQ_ROUTE_ADD_DEFAULT] = + pipeline_routing_msg_req_route_add_default_handler, + [PIPELINE_ROUTING_MSG_REQ_ROUTE_DEL_DEFAULT] = + pipeline_routing_msg_req_route_del_default_handler, + [PIPELINE_ROUTING_MSG_REQ_ARP_ADD] = + pipeline_routing_msg_req_arp_add_handler, + [PIPELINE_ROUTING_MSG_REQ_ARP_DEL] = + pipeline_routing_msg_req_arp_del_handler, + [PIPELINE_ROUTING_MSG_REQ_ARP_ADD_DEFAULT] = + pipeline_routing_msg_req_arp_add_default_handler, + [PIPELINE_ROUTING_MSG_REQ_ARP_DEL_DEFAULT] = + pipeline_routing_msg_req_arp_del_default_handler, +}; + +/* + * Routing table + */ +struct routing_table_entry { + struct rte_pipeline_table_entry head; + uint32_t flags; + uint32_t port_id; /* Output port ID */ + uint32_t ip; /* Next hop IP address (only valid for remote routes) */ + + /* ether_l2 */ + uint16_t data_offset; + uint16_t ether_l2_length; + uint64_t slab[4]; + uint16_t slab_offset[4]; +}; + +struct layout { + uint16_t a; + uint32_t b; + uint16_t c; +} __attribute__((__packed__)); + +#define MACADDR_DST_WRITE(slab_ptr, slab) \ +{ \ + struct layout *dst = (struct layout *) (slab_ptr); \ + struct layout *src = (struct layout *) &(slab); \ + \ + dst->b = src->b; \ + dst->c = src->c; \ +} + +static inline __attribute__((always_inline)) void +pkt_work_routing( + struct rte_mbuf *pkt, + struct rte_pipeline_table_entry *table_entry, + void *arg, + int arp, + int qinq, + int qinq_sched, + int mpls, + int mpls_color_mark) +{ + struct pipeline_routing *p_rt = arg; + + struct routing_table_entry *entry = + (struct routing_table_entry *) table_entry; + + struct ipv4_hdr *ip = (struct ipv4_hdr *) + RTE_MBUF_METADATA_UINT8_PTR(pkt, p_rt->params.ip_hdr_offset); + + enum rte_meter_color pkt_color = (enum rte_meter_color) + RTE_MBUF_METADATA_UINT32(pkt, p_rt->params.color_offset); + + struct pipeline_routing_arp_key_ipv4 *arp_key = + (struct pipeline_routing_arp_key_ipv4 *) + RTE_MBUF_METADATA_UINT8_PTR(pkt, p_rt->params.arp_key_offset); + + uint64_t *slab0_ptr, *slab1_ptr, *slab2_ptr, *slab3_ptr, sched; + uint32_t ip_da, nh_ip, port_id; + uint16_t total_length, data_offset, ether_l2_length; + + /* Read */ + total_length = rte_bswap16(ip->total_length); + ip_da = ip->dst_addr; + data_offset = entry->data_offset; + ether_l2_length = entry->ether_l2_length; + slab0_ptr = RTE_MBUF_METADATA_UINT64_PTR(pkt, entry->slab_offset[0]); + slab1_ptr = RTE_MBUF_METADATA_UINT64_PTR(pkt, entry->slab_offset[1]); + slab2_ptr = RTE_MBUF_METADATA_UINT64_PTR(pkt, entry->slab_offset[2]); + slab3_ptr = RTE_MBUF_METADATA_UINT64_PTR(pkt, entry->slab_offset[3]); + + if (arp) { + port_id = entry->port_id; + nh_ip = entry->ip; + if (entry->flags & PIPELINE_ROUTING_ROUTE_LOCAL) + nh_ip = ip_da; + } + + /* Compute */ + total_length += ether_l2_length; + + if (qinq && qinq_sched) { + uint32_t dscp = ip->type_of_service >> 2; + uint32_t svlan, cvlan, tc, tc_q; + + if (qinq_sched == 1) { + uint64_t slab_qinq = rte_bswap64(entry->slab[0]); + + svlan = (slab_qinq >> 48) & 0xFFF; + cvlan = (slab_qinq >> 16) & 0xFFF; + tc = (dscp >> 2) & 0x3; + tc_q = dscp & 0x3; + } else { + uint32_t ip_src = rte_bswap32(ip->src_addr); + + svlan = 0; + cvlan = (ip_src >> 16) & 0xFFF; + tc = (ip_src >> 2) & 0x3; + tc_q = ip_src & 0x3; + } + sched = RTE_SCHED_PORT_HIERARCHY(svlan, + cvlan, + tc, + tc_q, + e_RTE_METER_GREEN); + } + + /* Write */ + pkt->data_off = data_offset; + pkt->data_len = total_length; + pkt->pkt_len = total_length; + + if ((qinq == 0) && (mpls == 0)) { + *slab0_ptr = entry->slab[0]; + + if (arp == 0) + MACADDR_DST_WRITE(slab1_ptr, entry->slab[1]); + } + + if (qinq) { + *slab0_ptr = entry->slab[0]; + *slab1_ptr = entry->slab[1]; + + if (arp == 0) + MACADDR_DST_WRITE(slab2_ptr, entry->slab[2]); + + if (qinq_sched) { + pkt->hash.sched.lo = sched & 0xFFFFFFFF; + pkt->hash.sched.hi = sched >> 32; + } + } + + if (mpls) { + if (mpls_color_mark) { + uint64_t mpls_exp = rte_bswap64( + (MPLS_LABEL(0, pkt_color, 0, 0) << 32) | + MPLS_LABEL(0, pkt_color, 0, 0)); + + *slab0_ptr = entry->slab[0] | mpls_exp; + *slab1_ptr = entry->slab[1] | mpls_exp; + *slab2_ptr = entry->slab[2]; + } else { + *slab0_ptr = entry->slab[0]; + *slab1_ptr = entry->slab[1]; + *slab2_ptr = entry->slab[2]; + } + + if (arp == 0) + MACADDR_DST_WRITE(slab3_ptr, entry->slab[3]); + } + + if (arp) { + arp_key->port_id = port_id; + arp_key->ip = nh_ip; + } +} + +static inline __attribute__((always_inline)) void +pkt4_work_routing( + struct rte_mbuf **pkts, + struct rte_pipeline_table_entry **table_entries, + void *arg, + int arp, + int qinq, + int qinq_sched, + int mpls, + int mpls_color_mark) +{ + struct pipeline_routing *p_rt = arg; + + struct routing_table_entry *entry0 = + (struct routing_table_entry *) table_entries[0]; + struct routing_table_entry *entry1 = + (struct routing_table_entry *) table_entries[1]; + struct routing_table_entry *entry2 = + (struct routing_table_entry *) table_entries[2]; + struct routing_table_entry *entry3 = + (struct routing_table_entry *) table_entries[3]; + + struct ipv4_hdr *ip0 = (struct ipv4_hdr *) + RTE_MBUF_METADATA_UINT8_PTR(pkts[0], + p_rt->params.ip_hdr_offset); + struct ipv4_hdr *ip1 = (struct ipv4_hdr *) + RTE_MBUF_METADATA_UINT8_PTR(pkts[1], + p_rt->params.ip_hdr_offset); + struct ipv4_hdr *ip2 = (struct ipv4_hdr *) + RTE_MBUF_METADATA_UINT8_PTR(pkts[2], + p_rt->params.ip_hdr_offset); + struct ipv4_hdr *ip3 = (struct ipv4_hdr *) + RTE_MBUF_METADATA_UINT8_PTR(pkts[3], + p_rt->params.ip_hdr_offset); + + enum rte_meter_color pkt0_color = (enum rte_meter_color) + RTE_MBUF_METADATA_UINT32(pkts[0], p_rt->params.color_offset); + enum rte_meter_color pkt1_color = (enum rte_meter_color) + RTE_MBUF_METADATA_UINT32(pkts[1], p_rt->params.color_offset); + enum rte_meter_color pkt2_color = (enum rte_meter_color) + RTE_MBUF_METADATA_UINT32(pkts[2], p_rt->params.color_offset); + enum rte_meter_color pkt3_color = (enum rte_meter_color) + RTE_MBUF_METADATA_UINT32(pkts[3], p_rt->params.color_offset); + + struct pipeline_routing_arp_key_ipv4 *arp_key0 = + (struct pipeline_routing_arp_key_ipv4 *) + RTE_MBUF_METADATA_UINT8_PTR(pkts[0], + p_rt->params.arp_key_offset); + struct pipeline_routing_arp_key_ipv4 *arp_key1 = + (struct pipeline_routing_arp_key_ipv4 *) + RTE_MBUF_METADATA_UINT8_PTR(pkts[1], + p_rt->params.arp_key_offset); + struct pipeline_routing_arp_key_ipv4 *arp_key2 = + (struct pipeline_routing_arp_key_ipv4 *) + RTE_MBUF_METADATA_UINT8_PTR(pkts[2], + p_rt->params.arp_key_offset); + struct pipeline_routing_arp_key_ipv4 *arp_key3 = + (struct pipeline_routing_arp_key_ipv4 *) + RTE_MBUF_METADATA_UINT8_PTR(pkts[3], + p_rt->params.arp_key_offset); + + uint64_t *slab0_ptr0, *slab1_ptr0, *slab2_ptr0, *slab3_ptr0; + uint64_t *slab0_ptr1, *slab1_ptr1, *slab2_ptr1, *slab3_ptr1; + uint64_t *slab0_ptr2, *slab1_ptr2, *slab2_ptr2, *slab3_ptr2; + uint64_t *slab0_ptr3, *slab1_ptr3, *slab2_ptr3, *slab3_ptr3; + uint64_t sched0, sched1, sched2, sched3; + + uint32_t ip_da0, nh_ip0, port_id0; + uint32_t ip_da1, nh_ip1, port_id1; + uint32_t ip_da2, nh_ip2, port_id2; + uint32_t ip_da3, nh_ip3, port_id3; + + uint16_t total_length0, data_offset0, ether_l2_length0; + uint16_t total_length1, data_offset1, ether_l2_length1; + uint16_t total_length2, data_offset2, ether_l2_length2; + uint16_t total_length3, data_offset3, ether_l2_length3; + + /* Read */ + total_length0 = rte_bswap16(ip0->total_length); + total_length1 = rte_bswap16(ip1->total_length); + total_length2 = rte_bswap16(ip2->total_length); + total_length3 = rte_bswap16(ip3->total_length); + + ip_da0 = ip0->dst_addr; + ip_da1 = ip1->dst_addr; + ip_da2 = ip2->dst_addr; + ip_da3 = ip3->dst_addr; + + data_offset0 = entry0->data_offset; + data_offset1 = entry1->data_offset; + data_offset2 = entry2->data_offset; + data_offset3 = entry3->data_offset; + + ether_l2_length0 = entry0->ether_l2_length; + ether_l2_length1 = entry1->ether_l2_length; + ether_l2_length2 = entry2->ether_l2_length; + ether_l2_length3 = entry3->ether_l2_length; + + slab0_ptr0 = RTE_MBUF_METADATA_UINT64_PTR(pkts[0], + entry0->slab_offset[0]); + slab1_ptr0 = RTE_MBUF_METADATA_UINT64_PTR(pkts[0], + entry0->slab_offset[1]); + slab2_ptr0 = RTE_MBUF_METADATA_UINT64_PTR(pkts[0], + entry0->slab_offset[2]); + slab3_ptr0 = RTE_MBUF_METADATA_UINT64_PTR(pkts[0], + entry0->slab_offset[3]); + + slab0_ptr1 = RTE_MBUF_METADATA_UINT64_PTR(pkts[1], + entry1->slab_offset[0]); + slab1_ptr1 = RTE_MBUF_METADATA_UINT64_PTR(pkts[1], + entry1->slab_offset[1]); + slab2_ptr1 = RTE_MBUF_METADATA_UINT64_PTR(pkts[1], + entry1->slab_offset[2]); + slab3_ptr1 = RTE_MBUF_METADATA_UINT64_PTR(pkts[1], + entry1->slab_offset[3]); + + slab0_ptr2 = RTE_MBUF_METADATA_UINT64_PTR(pkts[2], + entry2->slab_offset[0]); + slab1_ptr2 = RTE_MBUF_METADATA_UINT64_PTR(pkts[2], + entry2->slab_offset[1]); + slab2_ptr2 = RTE_MBUF_METADATA_UINT64_PTR(pkts[2], + entry2->slab_offset[2]); + slab3_ptr2 = RTE_MBUF_METADATA_UINT64_PTR(pkts[2], + entry2->slab_offset[3]); + + slab0_ptr3 = RTE_MBUF_METADATA_UINT64_PTR(pkts[3], + entry3->slab_offset[0]); + slab1_ptr3 = RTE_MBUF_METADATA_UINT64_PTR(pkts[3], + entry3->slab_offset[1]); + slab2_ptr3 = RTE_MBUF_METADATA_UINT64_PTR(pkts[3], + entry3->slab_offset[2]); + slab3_ptr3 = RTE_MBUF_METADATA_UINT64_PTR(pkts[3], + entry3->slab_offset[3]); + + if (arp) { + port_id0 = entry0->port_id; + nh_ip0 = entry0->ip; + if (entry0->flags & PIPELINE_ROUTING_ROUTE_LOCAL) + nh_ip0 = ip_da0; + + port_id1 = entry1->port_id; + nh_ip1 = entry1->ip; + if (entry1->flags & PIPELINE_ROUTING_ROUTE_LOCAL) + nh_ip1 = ip_da1; + + port_id2 = entry2->port_id; + nh_ip2 = entry2->ip; + if (entry2->flags & PIPELINE_ROUTING_ROUTE_LOCAL) + nh_ip2 = ip_da2; + + port_id3 = entry3->port_id; + nh_ip3 = entry3->ip; + if (entry3->flags & PIPELINE_ROUTING_ROUTE_LOCAL) + nh_ip3 = ip_da3; + } + + /* Compute */ + total_length0 += ether_l2_length0; + total_length1 += ether_l2_length1; + total_length2 += ether_l2_length2; + total_length3 += ether_l2_length3; + + if (qinq && qinq_sched) { + uint32_t dscp0 = ip0->type_of_service >> 2; + uint32_t dscp1 = ip1->type_of_service >> 2; + uint32_t dscp2 = ip2->type_of_service >> 2; + uint32_t dscp3 = ip3->type_of_service >> 2; + uint32_t svlan0, cvlan0, tc0, tc_q0; + uint32_t svlan1, cvlan1, tc1, tc_q1; + uint32_t svlan2, cvlan2, tc2, tc_q2; + uint32_t svlan3, cvlan3, tc3, tc_q3; + + if (qinq_sched == 1) { + uint64_t slab_qinq0 = rte_bswap64(entry0->slab[0]); + uint64_t slab_qinq1 = rte_bswap64(entry1->slab[0]); + uint64_t slab_qinq2 = rte_bswap64(entry2->slab[0]); + uint64_t slab_qinq3 = rte_bswap64(entry3->slab[0]); + + svlan0 = (slab_qinq0 >> 48) & 0xFFF; + svlan1 = (slab_qinq1 >> 48) & 0xFFF; + svlan2 = (slab_qinq2 >> 48) & 0xFFF; + svlan3 = (slab_qinq3 >> 48) & 0xFFF; + + cvlan0 = (slab_qinq0 >> 16) & 0xFFF; + cvlan1 = (slab_qinq1 >> 16) & 0xFFF; + cvlan2 = (slab_qinq2 >> 16) & 0xFFF; + cvlan3 = (slab_qinq3 >> 16) & 0xFFF; + + tc0 = (dscp0 >> 2) & 0x3; + tc1 = (dscp1 >> 2) & 0x3; + tc2 = (dscp2 >> 2) & 0x3; + tc3 = (dscp3 >> 2) & 0x3; + + tc_q0 = dscp0 & 0x3; + tc_q1 = dscp1 & 0x3; + tc_q2 = dscp2 & 0x3; + tc_q3 = dscp3 & 0x3; + } else { + uint32_t ip_src0 = rte_bswap32(ip0->src_addr); + uint32_t ip_src1 = rte_bswap32(ip1->src_addr); + uint32_t ip_src2 = rte_bswap32(ip2->src_addr); + uint32_t ip_src3 = rte_bswap32(ip3->src_addr); + + svlan0 = 0; + svlan1 = 0; + svlan2 = 0; + svlan3 = 0; + + cvlan0 = (ip_src0 >> 16) & 0xFFF; + cvlan1 = (ip_src1 >> 16) & 0xFFF; + cvlan2 = (ip_src2 >> 16) & 0xFFF; + cvlan3 = (ip_src3 >> 16) & 0xFFF; + + tc0 = (ip_src0 >> 2) & 0x3; + tc1 = (ip_src1 >> 2) & 0x3; + tc2 = (ip_src2 >> 2) & 0x3; + tc3 = (ip_src3 >> 2) & 0x3; + + tc_q0 = ip_src0 & 0x3; + tc_q1 = ip_src1 & 0x3; + tc_q2 = ip_src2 & 0x3; + tc_q3 = ip_src3 & 0x3; + } + + sched0 = RTE_SCHED_PORT_HIERARCHY(svlan0, + cvlan0, + tc0, + tc_q0, + e_RTE_METER_GREEN); + sched1 = RTE_SCHED_PORT_HIERARCHY(svlan1, + cvlan1, + tc1, + tc_q1, + e_RTE_METER_GREEN); + sched2 = RTE_SCHED_PORT_HIERARCHY(svlan2, + cvlan2, + tc2, + tc_q2, + e_RTE_METER_GREEN); + sched3 = RTE_SCHED_PORT_HIERARCHY(svlan3, + cvlan3, + tc3, + tc_q3, + e_RTE_METER_GREEN); + + } + + /* Write */ + pkts[0]->data_off = data_offset0; + pkts[1]->data_off = data_offset1; + pkts[2]->data_off = data_offset2; + pkts[3]->data_off = data_offset3; + + pkts[0]->data_len = total_length0; + pkts[1]->data_len = total_length1; + pkts[2]->data_len = total_length2; + pkts[3]->data_len = total_length3; + + pkts[0]->pkt_len = total_length0; + pkts[1]->pkt_len = total_length1; + pkts[2]->pkt_len = total_length2; + pkts[3]->pkt_len = total_length3; + + if ((qinq == 0) && (mpls == 0)) { + *slab0_ptr0 = entry0->slab[0]; + *slab0_ptr1 = entry1->slab[0]; + *slab0_ptr2 = entry2->slab[0]; + *slab0_ptr3 = entry3->slab[0]; + + if (arp == 0) { + MACADDR_DST_WRITE(slab1_ptr0, entry0->slab[1]); + MACADDR_DST_WRITE(slab1_ptr1, entry1->slab[1]); + MACADDR_DST_WRITE(slab1_ptr2, entry2->slab[1]); + MACADDR_DST_WRITE(slab1_ptr3, entry3->slab[1]); + } + } + + if (qinq) { + *slab0_ptr0 = entry0->slab[0]; + *slab0_ptr1 = entry1->slab[0]; + *slab0_ptr2 = entry2->slab[0]; + *slab0_ptr3 = entry3->slab[0]; + + *slab1_ptr0 = entry0->slab[1]; + *slab1_ptr1 = entry1->slab[1]; + *slab1_ptr2 = entry2->slab[1]; + *slab1_ptr3 = entry3->slab[1]; + + if (arp == 0) { + MACADDR_DST_WRITE(slab2_ptr0, entry0->slab[2]); + MACADDR_DST_WRITE(slab2_ptr1, entry1->slab[2]); + MACADDR_DST_WRITE(slab2_ptr2, entry2->slab[2]); + MACADDR_DST_WRITE(slab2_ptr3, entry3->slab[2]); + } + + if (qinq_sched) { + pkts[0]->hash.sched.lo = sched0 & 0xFFFFFFFF; + pkts[0]->hash.sched.hi = sched0 >> 32; + pkts[1]->hash.sched.lo = sched1 & 0xFFFFFFFF; + pkts[1]->hash.sched.hi = sched1 >> 32; + pkts[2]->hash.sched.lo = sched2 & 0xFFFFFFFF; + pkts[2]->hash.sched.hi = sched2 >> 32; + pkts[3]->hash.sched.lo = sched3 & 0xFFFFFFFF; + pkts[3]->hash.sched.hi = sched3 >> 32; + } + } + + if (mpls) { + if (mpls_color_mark) { + uint64_t mpls_exp0 = rte_bswap64( + (MPLS_LABEL(0, pkt0_color, 0, 0) << 32) | + MPLS_LABEL(0, pkt0_color, 0, 0)); + uint64_t mpls_exp1 = rte_bswap64( + (MPLS_LABEL(0, pkt1_color, 0, 0) << 32) | + MPLS_LABEL(0, pkt1_color, 0, 0)); + uint64_t mpls_exp2 = rte_bswap64( + (MPLS_LABEL(0, pkt2_color, 0, 0) << 32) | + MPLS_LABEL(0, pkt2_color, 0, 0)); + uint64_t mpls_exp3 = rte_bswap64( + (MPLS_LABEL(0, pkt3_color, 0, 0) << 32) | + MPLS_LABEL(0, pkt3_color, 0, 0)); + + *slab0_ptr0 = entry0->slab[0] | mpls_exp0; + *slab0_ptr1 = entry1->slab[0] | mpls_exp1; + *slab0_ptr2 = entry2->slab[0] | mpls_exp2; + *slab0_ptr3 = entry3->slab[0] | mpls_exp3; + + *slab1_ptr0 = entry0->slab[1] | mpls_exp0; + *slab1_ptr1 = entry1->slab[1] | mpls_exp1; + *slab1_ptr2 = entry2->slab[1] | mpls_exp2; + *slab1_ptr3 = entry3->slab[1] | mpls_exp3; + + *slab2_ptr0 = entry0->slab[2]; + *slab2_ptr1 = entry1->slab[2]; + *slab2_ptr2 = entry2->slab[2]; + *slab2_ptr3 = entry3->slab[2]; + } else { + *slab0_ptr0 = entry0->slab[0]; + *slab0_ptr1 = entry1->slab[0]; + *slab0_ptr2 = entry2->slab[0]; + *slab0_ptr3 = entry3->slab[0]; + + *slab1_ptr0 = entry0->slab[1]; + *slab1_ptr1 = entry1->slab[1]; + *slab1_ptr2 = entry2->slab[1]; + *slab1_ptr3 = entry3->slab[1]; + + *slab2_ptr0 = entry0->slab[2]; + *slab2_ptr1 = entry1->slab[2]; + *slab2_ptr2 = entry2->slab[2]; + *slab2_ptr3 = entry3->slab[2]; + } + + if (arp == 0) { + MACADDR_DST_WRITE(slab3_ptr0, entry0->slab[3]); + MACADDR_DST_WRITE(slab3_ptr1, entry1->slab[3]); + MACADDR_DST_WRITE(slab3_ptr2, entry2->slab[3]); + MACADDR_DST_WRITE(slab3_ptr3, entry3->slab[3]); + } + } + + if (arp) { + arp_key0->port_id = port_id0; + arp_key1->port_id = port_id1; + arp_key2->port_id = port_id2; + arp_key3->port_id = port_id3; + + arp_key0->ip = nh_ip0; + arp_key1->ip = nh_ip1; + arp_key2->ip = nh_ip2; + arp_key3->ip = nh_ip3; + } +} + +#define PKT_WORK_ROUTING_ETHERNET(arp) \ +static inline void \ +pkt_work_routing_ether_arp##arp( \ + struct rte_mbuf *pkt, \ + struct rte_pipeline_table_entry *table_entry, \ + void *arg) \ +{ \ + pkt_work_routing(pkt, table_entry, arg, arp, 0, 0, 0, 0);\ +} + +#define PKT4_WORK_ROUTING_ETHERNET(arp) \ +static inline void \ +pkt4_work_routing_ether_arp##arp( \ + struct rte_mbuf **pkts, \ + struct rte_pipeline_table_entry **table_entries, \ + void *arg) \ +{ \ + pkt4_work_routing(pkts, table_entries, arg, arp, 0, 0, 0, 0);\ +} + +#define routing_table_ah_hit_ether(arp) \ +PKT_WORK_ROUTING_ETHERNET(arp) \ +PKT4_WORK_ROUTING_ETHERNET(arp) \ +PIPELINE_TABLE_AH_HIT(routing_table_ah_hit_ether_arp##arp, \ + pkt_work_routing_ether_arp##arp, \ + pkt4_work_routing_ether_arp##arp) + +routing_table_ah_hit_ether(0) +routing_table_ah_hit_ether(1) + +#define PKT_WORK_ROUTING_ETHERNET_QINQ(sched, arp) \ +static inline void \ +pkt_work_routing_ether_qinq_sched##sched##_arp##arp( \ + struct rte_mbuf *pkt, \ + struct rte_pipeline_table_entry *table_entry, \ + void *arg) \ +{ \ + pkt_work_routing(pkt, table_entry, arg, arp, 1, sched, 0, 0);\ +} + +#define PKT4_WORK_ROUTING_ETHERNET_QINQ(sched, arp) \ +static inline void \ +pkt4_work_routing_ether_qinq_sched##sched##_arp##arp( \ + struct rte_mbuf **pkts, \ + struct rte_pipeline_table_entry **table_entries, \ + void *arg) \ +{ \ + pkt4_work_routing(pkts, table_entries, arg, arp, 1, sched, 0, 0);\ +} + +#define routing_table_ah_hit_ether_qinq(sched, arp) \ +PKT_WORK_ROUTING_ETHERNET_QINQ(sched, arp) \ +PKT4_WORK_ROUTING_ETHERNET_QINQ(sched, arp) \ +PIPELINE_TABLE_AH_HIT(routing_table_ah_hit_ether_qinq_sched##sched##_arp##arp,\ + pkt_work_routing_ether_qinq_sched##sched##_arp##arp, \ + pkt4_work_routing_ether_qinq_sched##sched##_arp##arp) + +routing_table_ah_hit_ether_qinq(0, 0) +routing_table_ah_hit_ether_qinq(1, 0) +routing_table_ah_hit_ether_qinq(2, 0) +routing_table_ah_hit_ether_qinq(0, 1) +routing_table_ah_hit_ether_qinq(1, 1) +routing_table_ah_hit_ether_qinq(2, 1) + +#define PKT_WORK_ROUTING_ETHERNET_MPLS(color, arp) \ +static inline void \ +pkt_work_routing_ether_mpls_color##color##_arp##arp( \ + struct rte_mbuf *pkt, \ + struct rte_pipeline_table_entry *table_entry, \ + void *arg) \ +{ \ + pkt_work_routing(pkt, table_entry, arg, arp, 0, 0, 1, color);\ +} + +#define PKT4_WORK_ROUTING_ETHERNET_MPLS(color, arp) \ +static inline void \ +pkt4_work_routing_ether_mpls_color##color##_arp##arp( \ + struct rte_mbuf **pkts, \ + struct rte_pipeline_table_entry **table_entries, \ + void *arg) \ +{ \ + pkt4_work_routing(pkts, table_entries, arg, arp, 0, 0, 1, color);\ +} + +#define routing_table_ah_hit_ether_mpls(color, arp) \ +PKT_WORK_ROUTING_ETHERNET_MPLS(color, arp) \ +PKT4_WORK_ROUTING_ETHERNET_MPLS(color, arp) \ +PIPELINE_TABLE_AH_HIT(routing_table_ah_hit_ether_mpls_color##color##_arp##arp,\ + pkt_work_routing_ether_mpls_color##color##_arp##arp, \ + pkt4_work_routing_ether_mpls_color##color##_arp##arp) + +routing_table_ah_hit_ether_mpls(0, 0) +routing_table_ah_hit_ether_mpls(1, 0) +routing_table_ah_hit_ether_mpls(0, 1) +routing_table_ah_hit_ether_mpls(1, 1) + +static rte_pipeline_table_action_handler_hit +get_routing_table_ah_hit(struct pipeline_routing *p) +{ + if (p->params.dbg_ah_disable) + return NULL; + + switch (p->params.encap) { + case PIPELINE_ROUTING_ENCAP_ETHERNET: + return (p->params.n_arp_entries) ? + routing_table_ah_hit_ether_arp1 : + routing_table_ah_hit_ether_arp0; + + case PIPELINE_ROUTING_ENCAP_ETHERNET_QINQ: + if (p->params.n_arp_entries) + switch (p->params.qinq_sched) { + case 0: + return routing_table_ah_hit_ether_qinq_sched0_arp1; + case 1: + return routing_table_ah_hit_ether_qinq_sched1_arp1; + case 2: + return routing_table_ah_hit_ether_qinq_sched2_arp1; + default: + return NULL; + } + else + switch (p->params.qinq_sched) { + case 0: + return routing_table_ah_hit_ether_qinq_sched0_arp0; + case 1: + return routing_table_ah_hit_ether_qinq_sched1_arp0; + case 2: + return routing_table_ah_hit_ether_qinq_sched2_arp0; + default: + return NULL; + } + + case PIPELINE_ROUTING_ENCAP_ETHERNET_MPLS: + if (p->params.n_arp_entries) + if (p->params.mpls_color_mark) + return routing_table_ah_hit_ether_mpls_color1_arp1; + else + return routing_table_ah_hit_ether_mpls_color0_arp1; + else + if (p->params.mpls_color_mark) + return routing_table_ah_hit_ether_mpls_color1_arp0; + else + return routing_table_ah_hit_ether_mpls_color0_arp0; + + default: + return NULL; + } +} + +/* + * ARP table + */ +struct arp_table_entry { + struct rte_pipeline_table_entry head; + uint64_t macaddr; +}; + +/** + * ARP table AH + */ +static inline void +pkt_work_arp( + struct rte_mbuf *pkt, + struct rte_pipeline_table_entry *table_entry, + __rte_unused void *arg) +{ + struct arp_table_entry *entry = (struct arp_table_entry *) table_entry; + + /* Read */ + uint64_t macaddr_dst = entry->macaddr; + uint64_t *slab_ptr = (uint64_t *) ((char *) pkt->buf_addr + + (pkt->data_off - 2)); + + /* Compute */ + + /* Write */ + MACADDR_DST_WRITE(slab_ptr, macaddr_dst); +} + +static inline void +pkt4_work_arp( + struct rte_mbuf **pkts, + struct rte_pipeline_table_entry **table_entries, + __rte_unused void *arg) +{ + struct arp_table_entry *entry0 = + (struct arp_table_entry *) table_entries[0]; + struct arp_table_entry *entry1 = + (struct arp_table_entry *) table_entries[1]; + struct arp_table_entry *entry2 = + (struct arp_table_entry *) table_entries[2]; + struct arp_table_entry *entry3 = + (struct arp_table_entry *) table_entries[3]; + + /* Read */ + uint64_t macaddr_dst0 = entry0->macaddr; + uint64_t macaddr_dst1 = entry1->macaddr; + uint64_t macaddr_dst2 = entry2->macaddr; + uint64_t macaddr_dst3 = entry3->macaddr; + + uint64_t *slab_ptr0 = (uint64_t *) ((char *) pkts[0]->buf_addr + + (pkts[0]->data_off - 2)); + uint64_t *slab_ptr1 = (uint64_t *) ((char *) pkts[1]->buf_addr + + (pkts[1]->data_off - 2)); + uint64_t *slab_ptr2 = (uint64_t *) ((char *) pkts[2]->buf_addr + + (pkts[2]->data_off - 2)); + uint64_t *slab_ptr3 = (uint64_t *) ((char *) pkts[3]->buf_addr + + (pkts[3]->data_off - 2)); + + /* Compute */ + + /* Write */ + MACADDR_DST_WRITE(slab_ptr0, macaddr_dst0); + MACADDR_DST_WRITE(slab_ptr1, macaddr_dst1); + MACADDR_DST_WRITE(slab_ptr2, macaddr_dst2); + MACADDR_DST_WRITE(slab_ptr3, macaddr_dst3); +} + +PIPELINE_TABLE_AH_HIT(arp_table_ah_hit, + pkt_work_arp, + pkt4_work_arp); + +static rte_pipeline_table_action_handler_hit +get_arp_table_ah_hit(struct pipeline_routing *p) +{ + if (p->params.dbg_ah_disable) + return NULL; + + return arp_table_ah_hit; +} + +/* + * Argument parsing + */ +int +pipeline_routing_parse_args(struct pipeline_routing_params *p, + struct pipeline_params *params) +{ + uint32_t n_routes_present = 0; + uint32_t encap_present = 0; + uint32_t qinq_sched_present = 0; + uint32_t mpls_color_mark_present = 0; + uint32_t n_arp_entries_present = 0; + uint32_t ip_hdr_offset_present = 0; + uint32_t arp_key_offset_present = 0; + uint32_t color_offset_present = 0; + uint32_t dbg_ah_disable_present = 0; + uint32_t i; + + /* default values */ + p->n_routes = PIPELINE_ROUTING_N_ROUTES_DEFAULT; + p->encap = PIPELINE_ROUTING_ENCAP_ETHERNET; + p->qinq_sched = 0; + p->mpls_color_mark = 0; + p->n_arp_entries = 0; + p->dbg_ah_disable = 0; + + for (i = 0; i < params->n_args; i++) { + char *arg_name = params->args_name[i]; + char *arg_value = params->args_value[i]; + + /* n_routes */ + if (strcmp(arg_name, "n_routes") == 0) { + int status; + + PIPELINE_PARSE_ERR_DUPLICATE( + n_routes_present == 0, params->name, + arg_name); + n_routes_present = 1; + + status = parser_read_uint32(&p->n_routes, + arg_value); + PIPELINE_PARSE_ERR_INV_VAL(((status != -EINVAL) && + (p->n_routes != 0)), params->name, + arg_name, arg_value); + PIPELINE_PARSE_ERR_OUT_RNG((status != -ERANGE), + params->name, arg_name, arg_value); + + continue; + } + + /* encap */ + if (strcmp(arg_name, "encap") == 0) { + PIPELINE_PARSE_ERR_DUPLICATE(encap_present == 0, + params->name, arg_name); + encap_present = 1; + + /* ethernet */ + if (strcmp(arg_value, "ethernet") == 0) { + p->encap = PIPELINE_ROUTING_ENCAP_ETHERNET; + continue; + } + + /* ethernet_qinq */ + if (strcmp(arg_value, "ethernet_qinq") == 0) { + p->encap = PIPELINE_ROUTING_ENCAP_ETHERNET_QINQ; + continue; + } + + /* ethernet_mpls */ + if (strcmp(arg_value, "ethernet_mpls") == 0) { + p->encap = PIPELINE_ROUTING_ENCAP_ETHERNET_MPLS; + continue; + } + + /* any other */ + PIPELINE_PARSE_ERR_INV_VAL(0, params->name, + arg_name, arg_value); + } + + /* qinq_sched */ + if (strcmp(arg_name, "qinq_sched") == 0) { + int status; + + PIPELINE_PARSE_ERR_DUPLICATE( + qinq_sched_present == 0, params->name, + arg_name); + qinq_sched_present = 1; + + status = parser_read_arg_bool(arg_value); + if (status == -EINVAL) { + if (strcmp(arg_value, "test") == 0) { + p->qinq_sched = 2; + continue; + } + } else { + p->qinq_sched = status; + continue; + } + + PIPELINE_PARSE_ERR_INV_VAL(0, params->name, + arg_name, arg_value); + } + + /* mpls_color_mark */ + if (strcmp(arg_name, "mpls_color_mark") == 0) { + int status; + + PIPELINE_PARSE_ERR_DUPLICATE( + mpls_color_mark_present == 0, + params->name, arg_name); + mpls_color_mark_present = 1; + + + status = parser_read_arg_bool(arg_value); + if (status >= 0) { + p->mpls_color_mark = status; + continue; + } + + PIPELINE_PARSE_ERR_INV_VAL(0, params->name, + arg_name, arg_value); + } + + /* n_arp_entries */ + if (strcmp(arg_name, "n_arp_entries") == 0) { + int status; + + PIPELINE_PARSE_ERR_DUPLICATE( + n_arp_entries_present == 0, params->name, + arg_name); + n_arp_entries_present = 1; + + status = parser_read_uint32(&p->n_arp_entries, + arg_value); + PIPELINE_PARSE_ERR_INV_VAL((status != -EINVAL), + params->name, arg_name, arg_value); + PIPELINE_PARSE_ERR_OUT_RNG((status != -ERANGE), + params->name, arg_name, arg_value); + + continue; + } + + /* ip_hdr_offset */ + if (strcmp(arg_name, "ip_hdr_offset") == 0) { + int status; + + PIPELINE_PARSE_ERR_DUPLICATE( + ip_hdr_offset_present == 0, params->name, + arg_name); + ip_hdr_offset_present = 1; + + status = parser_read_uint32(&p->ip_hdr_offset, + arg_value); + PIPELINE_PARSE_ERR_INV_VAL((status != -EINVAL), + params->name, arg_name, arg_value); + PIPELINE_PARSE_ERR_OUT_RNG((status != -ERANGE), + params->name, arg_name, arg_value); + + continue; + } + + /* arp_key_offset */ + if (strcmp(arg_name, "arp_key_offset") == 0) { + int status; + + PIPELINE_PARSE_ERR_DUPLICATE( + arp_key_offset_present == 0, params->name, + arg_name); + arp_key_offset_present = 1; + + status = parser_read_uint32(&p->arp_key_offset, + arg_value); + PIPELINE_PARSE_ERR_INV_VAL((status != -EINVAL), + params->name, arg_name, arg_value); + PIPELINE_PARSE_ERR_OUT_RNG((status != -ERANGE), + params->name, arg_name, arg_value); + + continue; + } + + /* color_offset */ + if (strcmp(arg_name, "color_offset") == 0) { + int status; + + PIPELINE_PARSE_ERR_DUPLICATE( + color_offset_present == 0, params->name, + arg_name); + color_offset_present = 1; + + status = parser_read_uint32(&p->color_offset, + arg_value); + PIPELINE_PARSE_ERR_INV_VAL((status != -EINVAL), + params->name, arg_name, arg_value); + PIPELINE_PARSE_ERR_OUT_RNG((status != -ERANGE), + params->name, arg_name, arg_value); + + continue; + } + + /* debug */ + if (strcmp(arg_name, "dbg_ah_disable") == 0) { + int status; + + PIPELINE_PARSE_ERR_DUPLICATE( + dbg_ah_disable_present == 0, params->name, + arg_name); + dbg_ah_disable_present = 1; + + status = parser_read_arg_bool(arg_value); + if (status >= 0) { + p->dbg_ah_disable = status; + continue; + } + + PIPELINE_PARSE_ERR_INV_VAL(0, params->name, + arg_name, arg_value); + + continue; + } + + /* any other */ + PIPELINE_PARSE_ERR_INV_ENT(0, params->name, arg_name); + } + + /* Check that mandatory arguments are present */ + PIPELINE_PARSE_ERR_MANDATORY(ip_hdr_offset_present, params->name, + "ip_hdr_offset"); + + /* Check relations between arguments */ + switch (p->encap) { + case PIPELINE_ROUTING_ENCAP_ETHERNET: + PIPELINE_ARG_CHECK((!p->qinq_sched), "Parse error in " + "section \"%s\": encap = ethernet, therefore " + "qinq_sched = yes/test is not allowed", + params->name); + PIPELINE_ARG_CHECK((!p->mpls_color_mark), "Parse error " + "in section \"%s\": encap = ethernet, therefore " + "mpls_color_mark = yes is not allowed", + params->name); + PIPELINE_ARG_CHECK((!color_offset_present), "Parse error " + "in section \"%s\": encap = ethernet, therefore " + "color_offset is not allowed", + params->name); + break; + + case PIPELINE_ROUTING_ENCAP_ETHERNET_QINQ: + PIPELINE_ARG_CHECK((!p->mpls_color_mark), "Parse error " + "in section \"%s\": encap = ethernet_qinq, " + "therefore mpls_color_mark = yes is not allowed", + params->name); + PIPELINE_ARG_CHECK((!color_offset_present), "Parse error " + "in section \"%s\": encap = ethernet_qinq, " + "therefore color_offset is not allowed", + params->name); + break; + + case PIPELINE_ROUTING_ENCAP_ETHERNET_MPLS: + PIPELINE_ARG_CHECK((!p->qinq_sched), "Parse error in " + "section \"%s\": encap = ethernet_mpls, therefore " + "qinq_sched = yes/test is not allowed", + params->name); + break; + } + + PIPELINE_ARG_CHECK((!(p->n_arp_entries && + (!arp_key_offset_present))), "Parse error in section " + "\"%s\": n_arp_entries is set while " + "arp_key_offset is not set", params->name); + + PIPELINE_ARG_CHECK((!((p->n_arp_entries == 0) && + arp_key_offset_present)), "Parse error in section " + "\"%s\": arp_key_offset present while " + "n_arp_entries is not set", params->name); + + return 0; +} + +static void * +pipeline_routing_init(struct pipeline_params *params, + __rte_unused void *arg) +{ + struct pipeline *p; + struct pipeline_routing *p_rt; + uint32_t size, i; + + /* Check input arguments */ + if ((params == NULL) || + (params->n_ports_in == 0) || + (params->n_ports_out == 0)) + return NULL; + + /* Memory allocation */ + size = RTE_CACHE_LINE_ROUNDUP(sizeof(struct pipeline_routing)); + p = rte_zmalloc(NULL, size, RTE_CACHE_LINE_SIZE); + p_rt = (struct pipeline_routing *) p; + if (p == NULL) + return NULL; + + strcpy(p->name, params->name); + p->log_level = params->log_level; + + PLOG(p, HIGH, "Routing"); + + /* Parse arguments */ + if (pipeline_routing_parse_args(&p_rt->params, params)) + return NULL; + + /* Pipeline */ + { + struct rte_pipeline_params pipeline_params = { + .name = params->name, + .socket_id = params->socket_id, + .offset_port_id = 0, + }; + + p->p = rte_pipeline_create(&pipeline_params); + if (p->p == NULL) { + rte_free(p); + return NULL; + } + } + + /* Input ports */ + p->n_ports_in = params->n_ports_in; + for (i = 0; i < p->n_ports_in; i++) { + struct rte_pipeline_port_in_params port_params = { + .ops = pipeline_port_in_params_get_ops( + ¶ms->port_in[i]), + .arg_create = pipeline_port_in_params_convert( + ¶ms->port_in[i]), + .f_action = NULL, + .arg_ah = NULL, + .burst_size = params->port_in[i].burst_size, + }; + + int status = rte_pipeline_port_in_create(p->p, + &port_params, + &p->port_in_id[i]); + + if (status) { + rte_pipeline_free(p->p); + rte_free(p); + return NULL; + } + } + + /* Output ports */ + p->n_ports_out = params->n_ports_out; + for (i = 0; i < p->n_ports_out; i++) { + struct rte_pipeline_port_out_params port_params = { + .ops = pipeline_port_out_params_get_ops( + ¶ms->port_out[i]), + .arg_create = pipeline_port_out_params_convert( + ¶ms->port_out[i]), + .f_action = NULL, + .arg_ah = NULL, + }; + + int status = rte_pipeline_port_out_create(p->p, + &port_params, + &p->port_out_id[i]); + + if (status) { + rte_pipeline_free(p->p); + rte_free(p); + return NULL; + } + } + + /* Routing table */ + p->n_tables = 1; + { + struct rte_table_lpm_params table_lpm_params = { + .name = p->name, + .n_rules = p_rt->params.n_routes, + .number_tbl8s = PIPELINE_ROUTING_LPM_TABLE_NUMBER_TABLE8s, + .flags = 0, + .entry_unique_size = sizeof(struct routing_table_entry), + .offset = p_rt->params.ip_hdr_offset + + __builtin_offsetof(struct ipv4_hdr, dst_addr), + }; + + struct rte_pipeline_table_params table_params = { + .ops = &rte_table_lpm_ops, + .arg_create = &table_lpm_params, + .f_action_hit = get_routing_table_ah_hit(p_rt), + .f_action_miss = NULL, + .arg_ah = p_rt, + .action_data_size = + sizeof(struct routing_table_entry) - + sizeof(struct rte_pipeline_table_entry), + }; + + int status; + + status = rte_pipeline_table_create(p->p, + &table_params, + &p->table_id[0]); + + if (status) { + rte_pipeline_free(p->p); + rte_free(p); + return NULL; + } + } + + /* ARP table configuration */ + if (p_rt->params.n_arp_entries) { + struct rte_table_hash_key8_ext_params table_arp_params = { + .n_entries = p_rt->params.n_arp_entries, + .n_entries_ext = p_rt->params.n_arp_entries, + .f_hash = hash_default_key8, + .seed = 0, + .signature_offset = 0, /* Unused */ + .key_offset = p_rt->params.arp_key_offset, + }; + + struct rte_pipeline_table_params table_params = { + .ops = &rte_table_hash_key8_ext_dosig_ops, + .arg_create = &table_arp_params, + .f_action_hit = get_arp_table_ah_hit(p_rt), + .f_action_miss = NULL, + .arg_ah = p_rt, + .action_data_size = sizeof(struct arp_table_entry) - + sizeof(struct rte_pipeline_table_entry), + }; + + int status; + + status = rte_pipeline_table_create(p->p, + &table_params, + &p->table_id[1]); + + if (status) { + rte_pipeline_free(p->p); + rte_free(p); + return NULL; + } + + p->n_tables++; + } + + /* Connecting input ports to tables */ + for (i = 0; i < p->n_ports_in; i++) { + int status = rte_pipeline_port_in_connect_to_table(p->p, + p->port_in_id[i], + p->table_id[0]); + + if (status) { + rte_pipeline_free(p->p); + rte_free(p); + return NULL; + } + } + + /* Enable input ports */ + for (i = 0; i < p->n_ports_in; i++) { + int status = rte_pipeline_port_in_enable(p->p, + p->port_in_id[i]); + + if (status) { + rte_pipeline_free(p->p); + rte_free(p); + return NULL; + } + } + + /* Check pipeline consistency */ + if (rte_pipeline_check(p->p) < 0) { + rte_pipeline_free(p->p); + rte_free(p); + return NULL; + } + + /* Message queues */ + p->n_msgq = params->n_msgq; + for (i = 0; i < p->n_msgq; i++) + p->msgq_in[i] = params->msgq_in[i]; + for (i = 0; i < p->n_msgq; i++) + p->msgq_out[i] = params->msgq_out[i]; + + /* Message handlers */ + memcpy(p->handlers, handlers, sizeof(p->handlers)); + memcpy(p_rt->custom_handlers, + custom_handlers, + sizeof(p_rt->custom_handlers)); + + return p; +} + +static int +pipeline_routing_free(void *pipeline) +{ + struct pipeline *p = (struct pipeline *) pipeline; + + /* Check input arguments */ + if (p == NULL) + return -1; + + /* Free resources */ + rte_pipeline_free(p->p); + rte_free(p); + return 0; +} + +static int +pipeline_routing_track(void *pipeline, + __rte_unused uint32_t port_in, + uint32_t *port_out) +{ + struct pipeline *p = (struct pipeline *) pipeline; + + /* Check input arguments */ + if ((p == NULL) || + (port_in >= p->n_ports_in) || + (port_out == NULL)) + return -1; + + if (p->n_ports_in == 1) { + *port_out = 0; + return 0; + } + + return -1; +} + +static int +pipeline_routing_timer(void *pipeline) +{ + struct pipeline *p = (struct pipeline *) pipeline; + + pipeline_msg_req_handle(p); + rte_pipeline_flush(p->p); + + return 0; +} + +void * +pipeline_routing_msg_req_custom_handler(struct pipeline *p, + void *msg) +{ + struct pipeline_routing *p_rt = (struct pipeline_routing *) p; + struct pipeline_custom_msg_req *req = msg; + pipeline_msg_req_handler f_handle; + + f_handle = (req->subtype < PIPELINE_ROUTING_MSG_REQS) ? + p_rt->custom_handlers[req->subtype] : + pipeline_msg_req_invalid_handler; + + if (f_handle == NULL) + f_handle = pipeline_msg_req_invalid_handler; + + return f_handle(p, req); +} + +void * +pipeline_routing_msg_req_route_add_handler(struct pipeline *p, void *msg) +{ + struct pipeline_routing *p_rt = (struct pipeline_routing *) p; + struct pipeline_routing_route_add_msg_req *req = msg; + struct pipeline_routing_route_add_msg_rsp *rsp = msg; + + struct rte_table_lpm_key key = { + .ip = req->key.key.ipv4.ip, + .depth = req->key.key.ipv4.depth, + }; + + struct routing_table_entry entry_arp0 = { + .head = { + .action = RTE_PIPELINE_ACTION_PORT, + {.port_id = p->port_out_id[req->data.port_id]}, + }, + + .flags = req->data.flags, + .port_id = req->data.port_id, + .ip = 0, + .data_offset = 0, + .ether_l2_length = 0, + .slab = {0}, + .slab_offset = {0}, + }; + + struct routing_table_entry entry_arp1 = { + .head = { + .action = RTE_PIPELINE_ACTION_TABLE, + {.table_id = p->table_id[1]}, + }, + + .flags = req->data.flags, + .port_id = req->data.port_id, + .ip = rte_bswap32(req->data.ethernet.ip), + .data_offset = 0, + .ether_l2_length = 0, + .slab = {0}, + .slab_offset = {0}, + }; + + struct rte_pipeline_table_entry *entry = (p_rt->params.n_arp_entries) ? + (struct rte_pipeline_table_entry *) &entry_arp1 : + (struct rte_pipeline_table_entry *) &entry_arp0; + + if ((req->key.type != PIPELINE_ROUTING_ROUTE_IPV4) || + ((p_rt->params.n_arp_entries == 0) && + (req->data.flags & PIPELINE_ROUTING_ROUTE_ARP)) || + (p_rt->params.n_arp_entries && + ((req->data.flags & PIPELINE_ROUTING_ROUTE_ARP) == 0)) || + ((p_rt->params.encap != PIPELINE_ROUTING_ENCAP_ETHERNET_QINQ) && + (req->data.flags & PIPELINE_ROUTING_ROUTE_QINQ)) || + ((p_rt->params.encap == PIPELINE_ROUTING_ENCAP_ETHERNET_QINQ) && + ((req->data.flags & PIPELINE_ROUTING_ROUTE_QINQ) == 0)) || + ((p_rt->params.encap != PIPELINE_ROUTING_ENCAP_ETHERNET_MPLS) && + (req->data.flags & PIPELINE_ROUTING_ROUTE_MPLS)) || + ((p_rt->params.encap == PIPELINE_ROUTING_ENCAP_ETHERNET_MPLS) && + ((req->data.flags & PIPELINE_ROUTING_ROUTE_MPLS) == 0))) { + rsp->status = -1; + return rsp; + } + + /* Ether - ARP off */ + if ((p_rt->params.encap == PIPELINE_ROUTING_ENCAP_ETHERNET) && + (p_rt->params.n_arp_entries == 0)) { + uint64_t macaddr_src = MAC_SRC_DEFAULT; + uint64_t macaddr_dst; + uint64_t ethertype = ETHER_TYPE_IPv4; + + macaddr_dst = *((uint64_t *)&(req->data.ethernet.macaddr)); + macaddr_dst = rte_bswap64(macaddr_dst << 16); + + entry_arp0.slab[0] = + rte_bswap64((macaddr_src << 16) | ethertype); + entry_arp0.slab_offset[0] = p_rt->params.ip_hdr_offset - 8; + + entry_arp0.slab[1] = rte_bswap64(macaddr_dst); + entry_arp0.slab_offset[1] = p_rt->params.ip_hdr_offset - 2 * 8; + + entry_arp0.data_offset = entry_arp0.slab_offset[1] + 2 + - sizeof(struct rte_mbuf); + entry_arp0.ether_l2_length = 14; + } + + /* Ether - ARP on */ + if ((p_rt->params.encap == PIPELINE_ROUTING_ENCAP_ETHERNET) && + p_rt->params.n_arp_entries) { + uint64_t macaddr_src = MAC_SRC_DEFAULT; + uint64_t ethertype = ETHER_TYPE_IPv4; + + entry_arp1.slab[0] = rte_bswap64((macaddr_src << 16) | + ethertype); + entry_arp1.slab_offset[0] = p_rt->params.ip_hdr_offset - 8; + + entry_arp1.data_offset = entry_arp1.slab_offset[0] - 6 + - sizeof(struct rte_mbuf); + entry_arp1.ether_l2_length = 14; + } + + /* Ether QinQ - ARP off */ + if ((p_rt->params.encap == PIPELINE_ROUTING_ENCAP_ETHERNET_QINQ) && + (p_rt->params.n_arp_entries == 0)) { + uint64_t macaddr_src = MAC_SRC_DEFAULT; + uint64_t macaddr_dst; + uint64_t ethertype_ipv4 = ETHER_TYPE_IPv4; + uint64_t ethertype_vlan = 0x8100; + uint64_t ethertype_qinq = 0x9100; + uint64_t svlan = req->data.l2.qinq.svlan; + uint64_t cvlan = req->data.l2.qinq.cvlan; + + macaddr_dst = *((uint64_t *)&(req->data.ethernet.macaddr)); + macaddr_dst = rte_bswap64(macaddr_dst << 16); + + entry_arp0.slab[0] = rte_bswap64((svlan << 48) | + (ethertype_vlan << 32) | + (cvlan << 16) | + ethertype_ipv4); + entry_arp0.slab_offset[0] = p_rt->params.ip_hdr_offset - 8; + + entry_arp0.slab[1] = rte_bswap64((macaddr_src << 16) | + ethertype_qinq); + entry_arp0.slab_offset[1] = p_rt->params.ip_hdr_offset - 2 * 8; + + entry_arp0.slab[2] = rte_bswap64(macaddr_dst); + entry_arp0.slab_offset[2] = p_rt->params.ip_hdr_offset - 3 * 8; + + entry_arp0.data_offset = entry_arp0.slab_offset[2] + 2 + - sizeof(struct rte_mbuf); + entry_arp0.ether_l2_length = 22; + } + + /* Ether QinQ - ARP on */ + if ((p_rt->params.encap == PIPELINE_ROUTING_ENCAP_ETHERNET_QINQ) && + p_rt->params.n_arp_entries) { + uint64_t macaddr_src = MAC_SRC_DEFAULT; + uint64_t ethertype_ipv4 = ETHER_TYPE_IPv4; + uint64_t ethertype_vlan = 0x8100; + uint64_t ethertype_qinq = 0x9100; + uint64_t svlan = req->data.l2.qinq.svlan; + uint64_t cvlan = req->data.l2.qinq.cvlan; + + entry_arp1.slab[0] = rte_bswap64((svlan << 48) | + (ethertype_vlan << 32) | + (cvlan << 16) | + ethertype_ipv4); + entry_arp1.slab_offset[0] = p_rt->params.ip_hdr_offset - 8; + + entry_arp1.slab[1] = rte_bswap64((macaddr_src << 16) | + ethertype_qinq); + entry_arp1.slab_offset[1] = p_rt->params.ip_hdr_offset - 2 * 8; + + entry_arp1.data_offset = entry_arp1.slab_offset[1] - 6 + - sizeof(struct rte_mbuf); + entry_arp1.ether_l2_length = 22; + } + + /* Ether MPLS - ARP off */ + if ((p_rt->params.encap == PIPELINE_ROUTING_ENCAP_ETHERNET_MPLS) && + (p_rt->params.n_arp_entries == 0)) { + uint64_t macaddr_src = MAC_SRC_DEFAULT; + uint64_t macaddr_dst; + uint64_t ethertype_mpls = 0x8847; + + uint64_t label0 = req->data.l2.mpls.labels[0]; + uint64_t label1 = req->data.l2.mpls.labels[1]; + uint64_t label2 = req->data.l2.mpls.labels[2]; + uint64_t label3 = req->data.l2.mpls.labels[3]; + uint32_t n_labels = req->data.l2.mpls.n_labels; + + macaddr_dst = *((uint64_t *)&(req->data.ethernet.macaddr)); + macaddr_dst = rte_bswap64(macaddr_dst << 16); + + switch (n_labels) { + case 1: + entry_arp0.slab[0] = 0; + entry_arp0.slab_offset[0] = + p_rt->params.ip_hdr_offset - 8; + + entry_arp0.slab[1] = rte_bswap64( + MPLS_LABEL(label0, 0, 1, 0)); + entry_arp0.slab_offset[1] = + p_rt->params.ip_hdr_offset - 8; + break; + + case 2: + entry_arp0.slab[0] = 0; + entry_arp0.slab_offset[0] = + p_rt->params.ip_hdr_offset - 8; + + entry_arp0.slab[1] = rte_bswap64( + (MPLS_LABEL(label0, 0, 0, 0) << 32) | + MPLS_LABEL(label1, 0, 1, 0)); + entry_arp0.slab_offset[1] = + p_rt->params.ip_hdr_offset - 8; + break; + + case 3: + entry_arp0.slab[0] = rte_bswap64( + (MPLS_LABEL(label1, 0, 0, 0) << 32) | + MPLS_LABEL(label2, 0, 1, 0)); + entry_arp0.slab_offset[0] = + p_rt->params.ip_hdr_offset - 8; + + entry_arp0.slab[1] = rte_bswap64( + MPLS_LABEL(label0, 0, 0, 0)); + entry_arp0.slab_offset[1] = + p_rt->params.ip_hdr_offset - 2 * 8; + break; + + case 4: + entry_arp0.slab[0] = rte_bswap64( + (MPLS_LABEL(label2, 0, 0, 0) << 32) | + MPLS_LABEL(label3, 0, 1, 0)); + entry_arp0.slab_offset[0] = + p_rt->params.ip_hdr_offset - 8; + + entry_arp0.slab[1] = rte_bswap64( + (MPLS_LABEL(label0, 0, 0, 0) << 32) | + MPLS_LABEL(label1, 0, 0, 0)); + entry_arp0.slab_offset[1] = + p_rt->params.ip_hdr_offset - 2 * 8; + break; + + default: + rsp->status = -1; + return rsp; + } + + entry_arp0.slab[2] = rte_bswap64((macaddr_src << 16) | + ethertype_mpls); + entry_arp0.slab_offset[2] = p_rt->params.ip_hdr_offset - + (n_labels * 4 + 8); + + entry_arp0.slab[3] = rte_bswap64(macaddr_dst); + entry_arp0.slab_offset[3] = p_rt->params.ip_hdr_offset - + (n_labels * 4 + 2 * 8); + + entry_arp0.data_offset = entry_arp0.slab_offset[3] + 2 + - sizeof(struct rte_mbuf); + entry_arp0.ether_l2_length = n_labels * 4 + 14; + } + + /* Ether MPLS - ARP on */ + if ((p_rt->params.encap == PIPELINE_ROUTING_ENCAP_ETHERNET_MPLS) && + p_rt->params.n_arp_entries) { + uint64_t macaddr_src = MAC_SRC_DEFAULT; + uint64_t ethertype_mpls = 0x8847; + + uint64_t label0 = req->data.l2.mpls.labels[0]; + uint64_t label1 = req->data.l2.mpls.labels[1]; + uint64_t label2 = req->data.l2.mpls.labels[2]; + uint64_t label3 = req->data.l2.mpls.labels[3]; + uint32_t n_labels = req->data.l2.mpls.n_labels; + + switch (n_labels) { + case 1: + entry_arp1.slab[0] = 0; + entry_arp1.slab_offset[0] = + p_rt->params.ip_hdr_offset - 8; + + entry_arp1.slab[1] = rte_bswap64( + MPLS_LABEL(label0, 0, 1, 0)); + entry_arp1.slab_offset[1] = + p_rt->params.ip_hdr_offset - 8; + break; + + case 2: + entry_arp1.slab[0] = 0; + entry_arp1.slab_offset[0] = + p_rt->params.ip_hdr_offset - 8; + + entry_arp1.slab[1] = rte_bswap64( + (MPLS_LABEL(label0, 0, 0, 0) << 32) | + MPLS_LABEL(label1, 0, 1, 0)); + entry_arp1.slab_offset[1] = + p_rt->params.ip_hdr_offset - 8; + break; + + case 3: + entry_arp1.slab[0] = rte_bswap64( + (MPLS_LABEL(label1, 0, 0, 0) << 32) | + MPLS_LABEL(label2, 0, 1, 0)); + entry_arp1.slab_offset[0] = + p_rt->params.ip_hdr_offset - 8; + + entry_arp1.slab[1] = rte_bswap64( + MPLS_LABEL(label0, 0, 0, 0)); + entry_arp1.slab_offset[1] = + p_rt->params.ip_hdr_offset - 2 * 8; + break; + + case 4: + entry_arp1.slab[0] = rte_bswap64( + (MPLS_LABEL(label2, 0, 0, 0) << 32) | + MPLS_LABEL(label3, 0, 1, 0)); + entry_arp1.slab_offset[0] = + p_rt->params.ip_hdr_offset - 8; + + entry_arp1.slab[1] = rte_bswap64( + (MPLS_LABEL(label0, 0, 0, 0) << 32) | + MPLS_LABEL(label1, 0, 0, 0)); + entry_arp1.slab_offset[1] = + p_rt->params.ip_hdr_offset - 2 * 8; + break; + + default: + rsp->status = -1; + return rsp; + } + + entry_arp1.slab[2] = rte_bswap64((macaddr_src << 16) | + ethertype_mpls); + entry_arp1.slab_offset[2] = p_rt->params.ip_hdr_offset - + (n_labels * 4 + 8); + + entry_arp1.data_offset = entry_arp1.slab_offset[2] - 6 + - sizeof(struct rte_mbuf); + entry_arp1.ether_l2_length = n_labels * 4 + 14; + } + + rsp->status = rte_pipeline_table_entry_add(p->p, + p->table_id[0], + &key, + entry, + &rsp->key_found, + (struct rte_pipeline_table_entry **) &rsp->entry_ptr); + + return rsp; +} + +void * +pipeline_routing_msg_req_route_del_handler(struct pipeline *p, void *msg) +{ + struct pipeline_routing_route_delete_msg_req *req = msg; + struct pipeline_routing_route_delete_msg_rsp *rsp = msg; + + struct rte_table_lpm_key key = { + .ip = req->key.key.ipv4.ip, + .depth = req->key.key.ipv4.depth, + }; + + if (req->key.type != PIPELINE_ROUTING_ROUTE_IPV4) { + rsp->status = -1; + return rsp; + } + + rsp->status = rte_pipeline_table_entry_delete(p->p, + p->table_id[0], + &key, + &rsp->key_found, + NULL); + + return rsp; +} + +void * +pipeline_routing_msg_req_route_add_default_handler(struct pipeline *p, + void *msg) +{ + struct pipeline_routing_route_add_default_msg_req *req = msg; + struct pipeline_routing_route_add_default_msg_rsp *rsp = msg; + + struct routing_table_entry default_entry = { + .head = { + .action = RTE_PIPELINE_ACTION_PORT, + {.port_id = p->port_out_id[req->port_id]}, + }, + + .flags = 0, + .port_id = 0, + .ip = 0, + }; + + rsp->status = rte_pipeline_table_default_entry_add(p->p, + p->table_id[0], + (struct rte_pipeline_table_entry *) &default_entry, + (struct rte_pipeline_table_entry **) &rsp->entry_ptr); + + return rsp; +} + +void * +pipeline_routing_msg_req_route_del_default_handler(struct pipeline *p, + void *msg) +{ + struct pipeline_routing_route_delete_default_msg_rsp *rsp = msg; + + rsp->status = rte_pipeline_table_default_entry_delete(p->p, + p->table_id[0], + NULL); + + return rsp; +} + +void * +pipeline_routing_msg_req_arp_add_handler(struct pipeline *p, void *msg) +{ + struct pipeline_routing_arp_add_msg_req *req = msg; + struct pipeline_routing_arp_add_msg_rsp *rsp = msg; + + struct pipeline_routing_arp_key_ipv4 key = { + .port_id = req->key.key.ipv4.port_id, + .ip = rte_bswap32(req->key.key.ipv4.ip), + }; + + struct arp_table_entry entry = { + .head = { + .action = RTE_PIPELINE_ACTION_PORT, + {.port_id = p->port_out_id[req->key.key.ipv4.port_id]}, + }, + + .macaddr = 0, /* set below */ + }; + + if (req->key.type != PIPELINE_ROUTING_ARP_IPV4) { + rsp->status = -1; + return rsp; + } + + entry.macaddr = *((uint64_t *)&(req->macaddr)); + entry.macaddr = entry.macaddr << 16; + + rsp->status = rte_pipeline_table_entry_add(p->p, + p->table_id[1], + &key, + (struct rte_pipeline_table_entry *) &entry, + &rsp->key_found, + (struct rte_pipeline_table_entry **) &rsp->entry_ptr); + + return rsp; +} + +void * +pipeline_routing_msg_req_arp_del_handler(struct pipeline *p, void *msg) +{ + struct pipeline_routing_arp_delete_msg_req *req = msg; + struct pipeline_routing_arp_delete_msg_rsp *rsp = msg; + + struct pipeline_routing_arp_key_ipv4 key = { + .port_id = req->key.key.ipv4.port_id, + .ip = rte_bswap32(req->key.key.ipv4.ip), + }; + + if (req->key.type != PIPELINE_ROUTING_ARP_IPV4) { + rsp->status = -1; + return rsp; + } + + rsp->status = rte_pipeline_table_entry_delete(p->p, + p->table_id[1], + &key, + &rsp->key_found, + NULL); + + return rsp; +} + +void * +pipeline_routing_msg_req_arp_add_default_handler(struct pipeline *p, void *msg) +{ + struct pipeline_routing_arp_add_default_msg_req *req = msg; + struct pipeline_routing_arp_add_default_msg_rsp *rsp = msg; + + struct arp_table_entry default_entry = { + .head = { + .action = RTE_PIPELINE_ACTION_PORT, + {.port_id = p->port_out_id[req->port_id]}, + }, + + .macaddr = 0, + }; + + rsp->status = rte_pipeline_table_default_entry_add(p->p, + p->table_id[1], + (struct rte_pipeline_table_entry *) &default_entry, + (struct rte_pipeline_table_entry **) &rsp->entry_ptr); + + return rsp; +} + +void * +pipeline_routing_msg_req_arp_del_default_handler(struct pipeline *p, void *msg) +{ + struct pipeline_routing_arp_delete_default_msg_rsp *rsp = msg; + + rsp->status = rte_pipeline_table_default_entry_delete(p->p, + p->table_id[1], + NULL); + + return rsp; +} + +struct pipeline_be_ops pipeline_routing_be_ops = { + .f_init = pipeline_routing_init, + .f_free = pipeline_routing_free, + .f_run = NULL, + .f_timer = pipeline_routing_timer, + .f_track = pipeline_routing_track, +}; diff --git a/examples/ip_pipeline/pipeline/pipeline_routing_be.h b/examples/ip_pipeline/pipeline/pipeline_routing_be.h new file mode 100644 index 00000000..ec767b24 --- /dev/null +++ b/examples/ip_pipeline/pipeline/pipeline_routing_be.h @@ -0,0 +1,296 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_PIPELINE_ROUTING_BE_H__ +#define __INCLUDE_PIPELINE_ROUTING_BE_H__ + +#include <rte_ether.h> + +#include "pipeline_common_be.h" + +/* + * Pipeline argument parsing + */ +#ifndef PIPELINE_ROUTING_N_ROUTES_DEFAULT +#define PIPELINE_ROUTING_N_ROUTES_DEFAULT 4096 +#endif + +enum pipeline_routing_encap { + PIPELINE_ROUTING_ENCAP_ETHERNET = 0, + PIPELINE_ROUTING_ENCAP_ETHERNET_QINQ, + PIPELINE_ROUTING_ENCAP_ETHERNET_MPLS, +}; + +struct pipeline_routing_params { + /* routing */ + uint32_t n_routes; + + /* routing packet encapsulation */ + enum pipeline_routing_encap encap; + uint32_t qinq_sched; + uint32_t mpls_color_mark; + + /* arp */ + uint32_t n_arp_entries; + + /* packet buffer offsets */ + uint32_t ip_hdr_offset; + uint32_t arp_key_offset; + uint32_t color_offset; + + /* debug */ + uint32_t dbg_ah_disable; +}; + +int +pipeline_routing_parse_args(struct pipeline_routing_params *p, + struct pipeline_params *params); + +/* + * Route + */ +enum pipeline_routing_route_key_type { + PIPELINE_ROUTING_ROUTE_IPV4, +}; + +struct pipeline_routing_route_key_ipv4 { + uint32_t ip; + uint32_t depth; +}; + +struct pipeline_routing_route_key { + enum pipeline_routing_route_key_type type; + union { + struct pipeline_routing_route_key_ipv4 ipv4; + } key; +}; + +enum pipeline_routing_route_flags { + PIPELINE_ROUTING_ROUTE_LOCAL = 1 << 0, /* 0 = remote; 1 = local */ + PIPELINE_ROUTING_ROUTE_ARP = 1 << 1, /* 0 = ARP OFF; 1 = ARP ON */ + PIPELINE_ROUTING_ROUTE_QINQ = 1 << 2, /* 0 = QINQ OFF; 1 = QINQ ON */ + PIPELINE_ROUTING_ROUTE_MPLS = 1 << 3, /* 0 = MPLS OFF; 1 = MPLS ON */ +}; + +#define PIPELINE_ROUTING_MPLS_LABELS_MAX 4 + +struct pipeline_routing_route_data { + uint32_t flags; + uint32_t port_id; /* Output port ID */ + + union { + /* Next hop IP (valid only when ARP is enabled) */ + uint32_t ip; + + /* Next hop MAC address (valid only when ARP disabled */ + struct ether_addr macaddr; + } ethernet; + + union { + struct { + uint16_t svlan; + uint16_t cvlan; + } qinq; + + struct { + uint32_t labels[PIPELINE_ROUTING_MPLS_LABELS_MAX]; + uint32_t n_labels; + } mpls; + } l2; +}; + +/* + * ARP + */ +enum pipeline_routing_arp_key_type { + PIPELINE_ROUTING_ARP_IPV4, +}; + +struct pipeline_routing_arp_key_ipv4 { + uint32_t port_id; + uint32_t ip; +}; + +struct pipeline_routing_arp_key { + enum pipeline_routing_arp_key_type type; + union { + struct pipeline_routing_arp_key_ipv4 ipv4; + } key; +}; + +/* + * Messages + */ +enum pipeline_routing_msg_req_type { + PIPELINE_ROUTING_MSG_REQ_ROUTE_ADD, + PIPELINE_ROUTING_MSG_REQ_ROUTE_DEL, + PIPELINE_ROUTING_MSG_REQ_ROUTE_ADD_DEFAULT, + PIPELINE_ROUTING_MSG_REQ_ROUTE_DEL_DEFAULT, + PIPELINE_ROUTING_MSG_REQ_ARP_ADD, + PIPELINE_ROUTING_MSG_REQ_ARP_DEL, + PIPELINE_ROUTING_MSG_REQ_ARP_ADD_DEFAULT, + PIPELINE_ROUTING_MSG_REQ_ARP_DEL_DEFAULT, + PIPELINE_ROUTING_MSG_REQS +}; + +/* + * MSG ROUTE ADD + */ +struct pipeline_routing_route_add_msg_req { + enum pipeline_msg_req_type type; + enum pipeline_routing_msg_req_type subtype; + + /* key */ + struct pipeline_routing_route_key key; + + /* data */ + struct pipeline_routing_route_data data; +}; + +struct pipeline_routing_route_add_msg_rsp { + int status; + int key_found; + void *entry_ptr; +}; + +/* + * MSG ROUTE DELETE + */ +struct pipeline_routing_route_delete_msg_req { + enum pipeline_msg_req_type type; + enum pipeline_routing_msg_req_type subtype; + + /* key */ + struct pipeline_routing_route_key key; +}; + +struct pipeline_routing_route_delete_msg_rsp { + int status; + int key_found; +}; + +/* + * MSG ROUTE ADD DEFAULT + */ +struct pipeline_routing_route_add_default_msg_req { + enum pipeline_msg_req_type type; + enum pipeline_routing_msg_req_type subtype; + + /* data */ + uint32_t port_id; +}; + +struct pipeline_routing_route_add_default_msg_rsp { + int status; + void *entry_ptr; +}; + +/* + * MSG ROUTE DELETE DEFAULT + */ +struct pipeline_routing_route_delete_default_msg_req { + enum pipeline_msg_req_type type; + enum pipeline_routing_msg_req_type subtype; +}; + +struct pipeline_routing_route_delete_default_msg_rsp { + int status; +}; + +/* + * MSG ARP ADD + */ +struct pipeline_routing_arp_add_msg_req { + enum pipeline_msg_req_type type; + enum pipeline_routing_msg_req_type subtype; + + /* key */ + struct pipeline_routing_arp_key key; + + /* data */ + struct ether_addr macaddr; +}; + +struct pipeline_routing_arp_add_msg_rsp { + int status; + int key_found; + void *entry_ptr; +}; + +/* + * MSG ARP DELETE + */ +struct pipeline_routing_arp_delete_msg_req { + enum pipeline_msg_req_type type; + enum pipeline_routing_msg_req_type subtype; + + /* key */ + struct pipeline_routing_arp_key key; +}; + +struct pipeline_routing_arp_delete_msg_rsp { + int status; + int key_found; +}; + +/* + * MSG ARP ADD DEFAULT + */ +struct pipeline_routing_arp_add_default_msg_req { + enum pipeline_msg_req_type type; + enum pipeline_routing_msg_req_type subtype; + + /* data */ + uint32_t port_id; +}; + +struct pipeline_routing_arp_add_default_msg_rsp { + int status; + void *entry_ptr; +}; + +/* + * MSG ARP DELETE DEFAULT + */ +struct pipeline_routing_arp_delete_default_msg_req { + enum pipeline_msg_req_type type; + enum pipeline_routing_msg_req_type subtype; +}; + +struct pipeline_routing_arp_delete_default_msg_rsp { + int status; +}; + +extern struct pipeline_be_ops pipeline_routing_be_ops; + +#endif diff --git a/examples/ip_pipeline/pipeline_be.h b/examples/ip_pipeline/pipeline_be.h new file mode 100644 index 00000000..f4ff262e --- /dev/null +++ b/examples/ip_pipeline/pipeline_be.h @@ -0,0 +1,305 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_PIPELINE_BE_H__ +#define __INCLUDE_PIPELINE_BE_H__ + +#include <rte_port_ethdev.h> +#include <rte_port_ring.h> +#include <rte_port_frag.h> +#include <rte_port_ras.h> +#include <rte_port_sched.h> +#include <rte_port_source_sink.h> +#include <rte_pipeline.h> + +enum pipeline_port_in_type { + PIPELINE_PORT_IN_ETHDEV_READER, + PIPELINE_PORT_IN_RING_READER, + PIPELINE_PORT_IN_RING_MULTI_READER, + PIPELINE_PORT_IN_RING_READER_IPV4_FRAG, + PIPELINE_PORT_IN_RING_READER_IPV6_FRAG, + PIPELINE_PORT_IN_SCHED_READER, + PIPELINE_PORT_IN_SOURCE, +}; + +struct pipeline_port_in_params { + enum pipeline_port_in_type type; + union { + struct rte_port_ethdev_reader_params ethdev; + struct rte_port_ring_reader_params ring; + struct rte_port_ring_multi_reader_params ring_multi; + struct rte_port_ring_reader_ipv4_frag_params ring_ipv4_frag; + struct rte_port_ring_reader_ipv6_frag_params ring_ipv6_frag; + struct rte_port_sched_reader_params sched; + struct rte_port_source_params source; + } params; + uint32_t burst_size; +}; + +static inline void * +pipeline_port_in_params_convert(struct pipeline_port_in_params *p) +{ + switch (p->type) { + case PIPELINE_PORT_IN_ETHDEV_READER: + return (void *) &p->params.ethdev; + case PIPELINE_PORT_IN_RING_READER: + return (void *) &p->params.ring; + case PIPELINE_PORT_IN_RING_MULTI_READER: + return (void *) &p->params.ring_multi; + case PIPELINE_PORT_IN_RING_READER_IPV4_FRAG: + return (void *) &p->params.ring_ipv4_frag; + case PIPELINE_PORT_IN_RING_READER_IPV6_FRAG: + return (void *) &p->params.ring_ipv6_frag; + case PIPELINE_PORT_IN_SCHED_READER: + return (void *) &p->params.sched; + case PIPELINE_PORT_IN_SOURCE: + return (void *) &p->params.source; + default: + return NULL; + } +} + +static inline struct rte_port_in_ops * +pipeline_port_in_params_get_ops(struct pipeline_port_in_params *p) +{ + switch (p->type) { + case PIPELINE_PORT_IN_ETHDEV_READER: + return &rte_port_ethdev_reader_ops; + case PIPELINE_PORT_IN_RING_READER: + return &rte_port_ring_reader_ops; + case PIPELINE_PORT_IN_RING_MULTI_READER: + return &rte_port_ring_multi_reader_ops; + case PIPELINE_PORT_IN_RING_READER_IPV4_FRAG: + return &rte_port_ring_reader_ipv4_frag_ops; + case PIPELINE_PORT_IN_RING_READER_IPV6_FRAG: + return &rte_port_ring_reader_ipv6_frag_ops; + case PIPELINE_PORT_IN_SCHED_READER: + return &rte_port_sched_reader_ops; + case PIPELINE_PORT_IN_SOURCE: + return &rte_port_source_ops; + default: + return NULL; + } +} + +enum pipeline_port_out_type { + PIPELINE_PORT_OUT_ETHDEV_WRITER, + PIPELINE_PORT_OUT_ETHDEV_WRITER_NODROP, + PIPELINE_PORT_OUT_RING_WRITER, + PIPELINE_PORT_OUT_RING_MULTI_WRITER, + PIPELINE_PORT_OUT_RING_WRITER_NODROP, + PIPELINE_PORT_OUT_RING_MULTI_WRITER_NODROP, + PIPELINE_PORT_OUT_RING_WRITER_IPV4_RAS, + PIPELINE_PORT_OUT_RING_WRITER_IPV6_RAS, + PIPELINE_PORT_OUT_SCHED_WRITER, + PIPELINE_PORT_OUT_SINK, +}; + +struct pipeline_port_out_params { + enum pipeline_port_out_type type; + union { + struct rte_port_ethdev_writer_params ethdev; + struct rte_port_ethdev_writer_nodrop_params ethdev_nodrop; + struct rte_port_ring_writer_params ring; + struct rte_port_ring_multi_writer_params ring_multi; + struct rte_port_ring_writer_nodrop_params ring_nodrop; + struct rte_port_ring_multi_writer_nodrop_params ring_multi_nodrop; + struct rte_port_ring_writer_ipv4_ras_params ring_ipv4_ras; + struct rte_port_ring_writer_ipv6_ras_params ring_ipv6_ras; + struct rte_port_sched_writer_params sched; + struct rte_port_sink_params sink; + } params; +}; + +static inline void * +pipeline_port_out_params_convert(struct pipeline_port_out_params *p) +{ + switch (p->type) { + case PIPELINE_PORT_OUT_ETHDEV_WRITER: + return (void *) &p->params.ethdev; + case PIPELINE_PORT_OUT_ETHDEV_WRITER_NODROP: + return (void *) &p->params.ethdev_nodrop; + case PIPELINE_PORT_OUT_RING_WRITER: + return (void *) &p->params.ring; + case PIPELINE_PORT_OUT_RING_MULTI_WRITER: + return (void *) &p->params.ring_multi; + case PIPELINE_PORT_OUT_RING_WRITER_NODROP: + return (void *) &p->params.ring_nodrop; + case PIPELINE_PORT_OUT_RING_MULTI_WRITER_NODROP: + return (void *) &p->params.ring_multi_nodrop; + case PIPELINE_PORT_OUT_RING_WRITER_IPV4_RAS: + return (void *) &p->params.ring_ipv4_ras; + case PIPELINE_PORT_OUT_RING_WRITER_IPV6_RAS: + return (void *) &p->params.ring_ipv6_ras; + case PIPELINE_PORT_OUT_SCHED_WRITER: + return (void *) &p->params.sched; + case PIPELINE_PORT_OUT_SINK: + return (void *) &p->params.sink; + default: + return NULL; + } +} + +static inline void * +pipeline_port_out_params_get_ops(struct pipeline_port_out_params *p) +{ + switch (p->type) { + case PIPELINE_PORT_OUT_ETHDEV_WRITER: + return &rte_port_ethdev_writer_ops; + case PIPELINE_PORT_OUT_ETHDEV_WRITER_NODROP: + return &rte_port_ethdev_writer_nodrop_ops; + case PIPELINE_PORT_OUT_RING_WRITER: + return &rte_port_ring_writer_ops; + case PIPELINE_PORT_OUT_RING_MULTI_WRITER: + return &rte_port_ring_multi_writer_ops; + case PIPELINE_PORT_OUT_RING_WRITER_NODROP: + return &rte_port_ring_writer_nodrop_ops; + case PIPELINE_PORT_OUT_RING_MULTI_WRITER_NODROP: + return &rte_port_ring_multi_writer_nodrop_ops; + case PIPELINE_PORT_OUT_RING_WRITER_IPV4_RAS: + return &rte_port_ring_writer_ipv4_ras_ops; + case PIPELINE_PORT_OUT_RING_WRITER_IPV6_RAS: + return &rte_port_ring_writer_ipv6_ras_ops; + case PIPELINE_PORT_OUT_SCHED_WRITER: + return &rte_port_sched_writer_ops; + case PIPELINE_PORT_OUT_SINK: + return &rte_port_sink_ops; + default: + return NULL; + } +} + +#ifndef PIPELINE_NAME_SIZE +#define PIPELINE_NAME_SIZE 32 +#endif + +#ifndef PIPELINE_MAX_PORT_IN +#define PIPELINE_MAX_PORT_IN 16 +#endif + +#ifndef PIPELINE_MAX_PORT_OUT +#define PIPELINE_MAX_PORT_OUT 16 +#endif + +#ifndef PIPELINE_MAX_TABLES +#define PIPELINE_MAX_TABLES 16 +#endif + +#ifndef PIPELINE_MAX_MSGQ_IN +#define PIPELINE_MAX_MSGQ_IN 16 +#endif + +#ifndef PIPELINE_MAX_MSGQ_OUT +#define PIPELINE_MAX_MSGQ_OUT 16 +#endif + +#ifndef PIPELINE_MAX_ARGS +#define PIPELINE_MAX_ARGS 32 +#endif + +struct pipeline_params { + char name[PIPELINE_NAME_SIZE]; + + struct pipeline_port_in_params port_in[PIPELINE_MAX_PORT_IN]; + struct pipeline_port_out_params port_out[PIPELINE_MAX_PORT_OUT]; + struct rte_ring *msgq_in[PIPELINE_MAX_MSGQ_IN]; + struct rte_ring *msgq_out[PIPELINE_MAX_MSGQ_OUT]; + + uint32_t n_ports_in; + uint32_t n_ports_out; + uint32_t n_msgq; + + int socket_id; + + char *args_name[PIPELINE_MAX_ARGS]; + char *args_value[PIPELINE_MAX_ARGS]; + uint32_t n_args; + + uint32_t log_level; +}; + +/* + * Pipeline type back-end operations + */ + +typedef void* (*pipeline_be_op_init)(struct pipeline_params *params, + void *arg); + +typedef int (*pipeline_be_op_free)(void *pipeline); + +typedef int (*pipeline_be_op_run)(void *pipeline); + +typedef int (*pipeline_be_op_timer)(void *pipeline); + +typedef int (*pipeline_be_op_track)(void *pipeline, + uint32_t port_in, + uint32_t *port_out); + +struct pipeline_be_ops { + pipeline_be_op_init f_init; + pipeline_be_op_free f_free; + pipeline_be_op_run f_run; + pipeline_be_op_timer f_timer; + pipeline_be_op_track f_track; +}; + +/* Pipeline specific config parse error messages */ +#define PIPELINE_ARG_CHECK(exp, fmt, ...) \ +do { \ + if (!(exp)) { \ + fprintf(stderr, fmt "\n", ## __VA_ARGS__); \ + return -1; \ + } \ +} while (0) + +#define PIPELINE_PARSE_ERR_INV_VAL(exp, section, entry, val) \ +PIPELINE_ARG_CHECK(exp, "Parse error in section \"%s\": entry \"%s\" " \ + "has invalid value (\"%s\")", section, entry, val) + +#define PIPELINE_PARSE_ERR_OUT_RNG(exp, section, entry, val) \ +PIPELINE_ARG_CHECK(exp, "Parse error in section \"%s\": entry \"%s\" " \ + "value is out of range (\"%s\")", section, entry, val) + +#define PIPELINE_PARSE_ERR_DUPLICATE(exp, section, entry) \ +PIPELINE_ARG_CHECK(exp, "Parse error in section \"%s\": duplicated " \ + "entry \"%s\"", section, entry) + +#define PIPELINE_PARSE_ERR_INV_ENT(exp, section, entry) \ +PIPELINE_ARG_CHECK(exp, "Parse error in section \"%s\": invalid entry " \ + "\"%s\"", section, entry) + +#define PIPELINE_PARSE_ERR_MANDATORY(exp, section, entry) \ +PIPELINE_ARG_CHECK(exp, "Parse error in section \"%s\": mandatory " \ + "entry \"%s\" is missing", section, entry) + +#endif diff --git a/examples/ip_pipeline/thread.c b/examples/ip_pipeline/thread.c new file mode 100644 index 00000000..a0f1f12f --- /dev/null +++ b/examples/ip_pipeline/thread.c @@ -0,0 +1,322 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <rte_common.h> +#include <rte_cycles.h> +#include <rte_pipeline.h> + +#include "pipeline_common_be.h" +#include "app.h" +#include "thread.h" + +#if APP_THREAD_HEADROOM_STATS_COLLECT + +#define PIPELINE_RUN_REGULAR(thread, pipeline) \ +do { \ + uint64_t t0 = rte_rdtsc_precise(); \ + int n_pkts = rte_pipeline_run(pipeline->p); \ + \ + if (n_pkts == 0) { \ + uint64_t t1 = rte_rdtsc_precise(); \ + \ + thread->headroom_cycles += t1 - t0; \ + } \ +} while (0) + + +#define PIPELINE_RUN_CUSTOM(thread, data) \ +do { \ + uint64_t t0 = rte_rdtsc_precise(); \ + int n_pkts = data->f_run(data->be); \ + \ + if (n_pkts == 0) { \ + uint64_t t1 = rte_rdtsc_precise(); \ + \ + thread->headroom_cycles += t1 - t0; \ + } \ +} while (0) + +#else + +#define PIPELINE_RUN_REGULAR(thread, pipeline) \ + rte_pipeline_run(pipeline->p) + +#define PIPELINE_RUN_CUSTOM(thread, data) \ + data->f_run(data->be) + +#endif + +static inline void * +thread_msg_recv(struct rte_ring *r) +{ + void *msg; + int status = rte_ring_sc_dequeue(r, &msg); + + if (status != 0) + return NULL; + + return msg; +} + +static inline void +thread_msg_send(struct rte_ring *r, + void *msg) +{ + int status; + + do { + status = rte_ring_sp_enqueue(r, msg); + } while (status == -ENOBUFS); +} + +static int +thread_pipeline_enable(struct app_thread_data *t, + struct thread_pipeline_enable_msg_req *req) +{ + struct app_thread_pipeline_data *p; + + if (req->f_run == NULL) { + if (t->n_regular >= APP_MAX_THREAD_PIPELINES) + return -1; + } else { + if (t->n_custom >= APP_MAX_THREAD_PIPELINES) + return -1; + } + + p = (req->f_run == NULL) ? + &t->regular[t->n_regular] : + &t->custom[t->n_custom]; + + p->pipeline_id = req->pipeline_id; + p->be = req->be; + p->f_run = req->f_run; + p->f_timer = req->f_timer; + p->timer_period = req->timer_period; + p->deadline = 0; + + if (req->f_run == NULL) + t->n_regular++; + else + t->n_custom++; + + return 0; +} + +static int +thread_pipeline_disable(struct app_thread_data *t, + struct thread_pipeline_disable_msg_req *req) +{ + uint32_t n_regular = RTE_MIN(t->n_regular, RTE_DIM(t->regular)); + uint32_t n_custom = RTE_MIN(t->n_custom, RTE_DIM(t->custom)); + uint32_t i; + + /* search regular pipelines of current thread */ + for (i = 0; i < n_regular; i++) { + if (t->regular[i].pipeline_id != req->pipeline_id) + continue; + + if (i < n_regular - 1) + memcpy(&t->regular[i], + &t->regular[i+1], + (n_regular - 1 - i) * sizeof(struct app_thread_pipeline_data)); + + n_regular--; + t->n_regular = n_regular; + + return 0; + } + + /* search custom pipelines of current thread */ + for (i = 0; i < n_custom; i++) { + if (t->custom[i].pipeline_id != req->pipeline_id) + continue; + + if (i < n_custom - 1) + memcpy(&t->custom[i], + &t->custom[i+1], + (n_custom - 1 - i) * sizeof(struct app_thread_pipeline_data)); + + n_custom--; + t->n_custom = n_custom; + + return 0; + } + + /* return if pipeline not found */ + return -1; +} + +static int +thread_msg_req_handle(struct app_thread_data *t) +{ + void *msg_ptr; + struct thread_msg_req *req; + struct thread_msg_rsp *rsp; + + msg_ptr = thread_msg_recv(t->msgq_in); + req = msg_ptr; + rsp = msg_ptr; + + if (req != NULL) + switch (req->type) { + case THREAD_MSG_REQ_PIPELINE_ENABLE: { + rsp->status = thread_pipeline_enable(t, + (struct thread_pipeline_enable_msg_req *) req); + thread_msg_send(t->msgq_out, rsp); + break; + } + + case THREAD_MSG_REQ_PIPELINE_DISABLE: { + rsp->status = thread_pipeline_disable(t, + (struct thread_pipeline_disable_msg_req *) req); + thread_msg_send(t->msgq_out, rsp); + break; + } + + case THREAD_MSG_REQ_HEADROOM_READ: { + struct thread_headroom_read_msg_rsp *rsp = + (struct thread_headroom_read_msg_rsp *) + req; + + rsp->headroom_ratio = t->headroom_ratio; + rsp->status = 0; + thread_msg_send(t->msgq_out, rsp); + break; + } + default: + break; + } + + return 0; +} + +static void +thread_headroom_update(struct app_thread_data *t, uint64_t time) +{ + uint64_t time_diff = time - t->headroom_time; + + t->headroom_ratio = + ((double) t->headroom_cycles) / ((double) time_diff); + + t->headroom_cycles = 0; + t->headroom_time = rte_rdtsc_precise(); +} + +int +app_thread(void *arg) +{ + struct app_params *app = (struct app_params *) arg; + uint32_t core_id = rte_lcore_id(), i, j; + struct app_thread_data *t = &app->thread_data[core_id]; + + for (i = 0; ; i++) { + uint32_t n_regular = RTE_MIN(t->n_regular, RTE_DIM(t->regular)); + uint32_t n_custom = RTE_MIN(t->n_custom, RTE_DIM(t->custom)); + + /* Run regular pipelines */ + for (j = 0; j < n_regular; j++) { + struct app_thread_pipeline_data *data = &t->regular[j]; + struct pipeline *p = data->be; + + PIPELINE_RUN_REGULAR(t, p); + } + + /* Run custom pipelines */ + for (j = 0; j < n_custom; j++) { + struct app_thread_pipeline_data *data = &t->custom[j]; + + PIPELINE_RUN_CUSTOM(t, data); + } + + /* Timer */ + if ((i & 0xF) == 0) { + uint64_t time = rte_get_tsc_cycles(); + uint64_t t_deadline = UINT64_MAX; + + if (time < t->deadline) + continue; + + /* Timer for regular pipelines */ + for (j = 0; j < n_regular; j++) { + struct app_thread_pipeline_data *data = + &t->regular[j]; + uint64_t p_deadline = data->deadline; + + if (p_deadline <= time) { + data->f_timer(data->be); + p_deadline = time + data->timer_period; + data->deadline = p_deadline; + } + + if (p_deadline < t_deadline) + t_deadline = p_deadline; + } + + /* Timer for custom pipelines */ + for (j = 0; j < n_custom; j++) { + struct app_thread_pipeline_data *data = + &t->custom[j]; + uint64_t p_deadline = data->deadline; + + if (p_deadline <= time) { + data->f_timer(data->be); + p_deadline = time + data->timer_period; + data->deadline = p_deadline; + } + + if (p_deadline < t_deadline) + t_deadline = p_deadline; + } + + /* Timer for thread message request */ + { + uint64_t deadline = t->thread_req_deadline; + + if (deadline <= time) { + thread_msg_req_handle(t); + thread_headroom_update(t, time); + deadline = time + t->timer_period; + t->thread_req_deadline = deadline; + } + + if (deadline < t_deadline) + t_deadline = deadline; + } + + + t->deadline = t_deadline; + } + } + + return 0; +} diff --git a/examples/ip_pipeline/thread.h b/examples/ip_pipeline/thread.h new file mode 100644 index 00000000..e52b22e6 --- /dev/null +++ b/examples/ip_pipeline/thread.h @@ -0,0 +1,98 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef THREAD_H_ +#define THREAD_H_ + +#include "app.h" +#include "pipeline_be.h" + +enum thread_msg_req_type { + THREAD_MSG_REQ_PIPELINE_ENABLE = 0, + THREAD_MSG_REQ_PIPELINE_DISABLE, + THREAD_MSG_REQ_HEADROOM_READ, + THREAD_MSG_REQS +}; + +struct thread_msg_req { + enum thread_msg_req_type type; +}; + +struct thread_msg_rsp { + int status; +}; + +/* + * PIPELINE ENABLE + */ +struct thread_pipeline_enable_msg_req { + enum thread_msg_req_type type; + + uint32_t pipeline_id; + void *be; + pipeline_be_op_run f_run; + pipeline_be_op_timer f_timer; + uint64_t timer_period; +}; + +struct thread_pipeline_enable_msg_rsp { + int status; +}; + +/* + * PIPELINE DISABLE + */ +struct thread_pipeline_disable_msg_req { + enum thread_msg_req_type type; + + uint32_t pipeline_id; +}; + +struct thread_pipeline_disable_msg_rsp { + int status; +}; + +/* + * THREAD HEADROOM + */ +struct thread_headroom_read_msg_req { + enum thread_msg_req_type type; +}; + +struct thread_headroom_read_msg_rsp { + int status; + + double headroom_ratio; +}; + +#endif /* THREAD_H_ */ diff --git a/examples/ip_pipeline/thread_fe.c b/examples/ip_pipeline/thread_fe.c new file mode 100644 index 00000000..4a435f7c --- /dev/null +++ b/examples/ip_pipeline/thread_fe.c @@ -0,0 +1,461 @@ +#include <rte_common.h> +#include <rte_ring.h> +#include <rte_malloc.h> +#include <cmdline_rdline.h> +#include <cmdline_parse.h> +#include <cmdline_parse_num.h> +#include <cmdline_parse_string.h> +#include <cmdline_parse_ipaddr.h> +#include <cmdline_parse_etheraddr.h> +#include <cmdline_socket.h> +#include <cmdline.h> + +#include "thread.h" +#include "thread_fe.h" +#include "pipeline.h" +#include "pipeline_common_fe.h" +#include "app.h" + +static inline void * +thread_msg_send_recv(struct app_params *app, + uint32_t socket_id, uint32_t core_id, uint32_t ht_id, + void *msg, + uint32_t timeout_ms) +{ + struct rte_ring *r_req = app_thread_msgq_in_get(app, + socket_id, core_id, ht_id); + struct rte_ring *r_rsp = app_thread_msgq_out_get(app, + socket_id, core_id, ht_id); + uint64_t hz = rte_get_tsc_hz(); + void *msg_recv; + uint64_t deadline; + int status; + + /* send */ + do { + status = rte_ring_sp_enqueue(r_req, (void *) msg); + } while (status == -ENOBUFS); + + /* recv */ + deadline = (timeout_ms) ? + (rte_rdtsc() + ((hz * timeout_ms) / 1000)) : + UINT64_MAX; + + do { + if (rte_rdtsc() > deadline) + return NULL; + + status = rte_ring_sc_dequeue(r_rsp, &msg_recv); + } while (status != 0); + + return msg_recv; +} + +int +app_pipeline_enable(struct app_params *app, + uint32_t socket_id, + uint32_t core_id, + uint32_t hyper_th_id, + uint32_t pipeline_id) +{ + struct thread_pipeline_enable_msg_req *req; + struct thread_pipeline_enable_msg_rsp *rsp; + int thread_id; + struct app_pipeline_data *p; + struct app_pipeline_params *p_params; + struct pipeline_type *p_type; + int status; + + if (app == NULL) + return -1; + + thread_id = cpu_core_map_get_lcore_id(app->core_map, + socket_id, + core_id, + hyper_th_id); + + if ((thread_id < 0) || + ((app->core_mask & (1LLU << thread_id)) == 0)) + return -1; + + if (app_pipeline_data(app, pipeline_id) == NULL) + return -1; + + p = &app->pipeline_data[pipeline_id]; + p_params = &app->pipeline_params[pipeline_id]; + p_type = app_pipeline_type_find(app, p_params->type); + + if (p->enabled == 1) + return -1; + + req = app_msg_alloc(app); + if (req == NULL) + return -1; + + req->type = THREAD_MSG_REQ_PIPELINE_ENABLE; + req->pipeline_id = pipeline_id; + req->be = p->be; + req->f_run = p_type->be_ops->f_run; + req->f_timer = p_type->be_ops->f_timer; + req->timer_period = p->timer_period; + + rsp = thread_msg_send_recv(app, + socket_id, core_id, hyper_th_id, req, MSG_TIMEOUT_DEFAULT); + if (rsp == NULL) + return -1; + + status = rsp->status; + app_msg_free(app, rsp); + + if (status != 0) + return -1; + + p->enabled = 1; + return 0; +} + +int +app_pipeline_disable(struct app_params *app, + uint32_t socket_id, + uint32_t core_id, + uint32_t hyper_th_id, + uint32_t pipeline_id) +{ + struct thread_pipeline_disable_msg_req *req; + struct thread_pipeline_disable_msg_rsp *rsp; + int thread_id; + struct app_pipeline_data *p; + int status; + + if (app == NULL) + return -1; + + thread_id = cpu_core_map_get_lcore_id(app->core_map, + socket_id, + core_id, + hyper_th_id); + + if ((thread_id < 0) || + ((app->core_mask & (1LLU << thread_id)) == 0)) + return -1; + + if (app_pipeline_data(app, pipeline_id) == NULL) + return -1; + + p = &app->pipeline_data[pipeline_id]; + + if (p->enabled == 0) + return -1; + + req = app_msg_alloc(app); + if (req == NULL) + return -1; + + req->type = THREAD_MSG_REQ_PIPELINE_DISABLE; + req->pipeline_id = pipeline_id; + + rsp = thread_msg_send_recv(app, + socket_id, core_id, hyper_th_id, req, MSG_TIMEOUT_DEFAULT); + + if (rsp == NULL) + return -1; + + status = rsp->status; + app_msg_free(app, rsp); + + if (status != 0) + return -1; + + p->enabled = 0; + return 0; +} + +int +app_thread_headroom(struct app_params *app, + uint32_t socket_id, + uint32_t core_id, + uint32_t hyper_th_id) +{ + struct thread_headroom_read_msg_req *req; + struct thread_headroom_read_msg_rsp *rsp; + int thread_id; + int status; + + if (app == NULL) + return -1; + + thread_id = cpu_core_map_get_lcore_id(app->core_map, + socket_id, + core_id, + hyper_th_id); + + if ((thread_id < 0) || + ((app->core_mask & (1LLU << thread_id)) == 0)) + return -1; + + req = app_msg_alloc(app); + if (req == NULL) + return -1; + + req->type = THREAD_MSG_REQ_HEADROOM_READ; + + rsp = thread_msg_send_recv(app, + socket_id, core_id, hyper_th_id, req, MSG_TIMEOUT_DEFAULT); + + if (rsp == NULL) + return -1; + + status = rsp->status; + + if (status != 0) + return -1; + + printf("%.3f%%\n", rsp->headroom_ratio * 100); + + + app_msg_free(app, rsp); + + return 0; +} + +/* + * pipeline enable + */ + +struct cmd_pipeline_enable_result { + cmdline_fixed_string_t t_string; + cmdline_fixed_string_t t_id_string; + cmdline_fixed_string_t pipeline_string; + uint32_t pipeline_id; + cmdline_fixed_string_t enable_string; +}; + +static void +cmd_pipeline_enable_parsed( + void *parsed_result, + __rte_unused struct cmdline *cl, + void *data) +{ + struct cmd_pipeline_enable_result *params = parsed_result; + struct app_params *app = data; + int status; + uint32_t core_id, socket_id, hyper_th_id; + + if (parse_pipeline_core(&socket_id, + &core_id, + &hyper_th_id, + params->t_id_string) != 0) { + printf("Command failed\n"); + return; + } + + status = app_pipeline_enable(app, + socket_id, + core_id, + hyper_th_id, + params->pipeline_id); + + if (status != 0) + printf("Command failed\n"); +} + +cmdline_parse_token_string_t cmd_pipeline_enable_t_string = + TOKEN_STRING_INITIALIZER(struct cmd_pipeline_enable_result, t_string, "t"); + +cmdline_parse_token_string_t cmd_pipeline_enable_t_id_string = + TOKEN_STRING_INITIALIZER(struct cmd_pipeline_enable_result, t_id_string, + NULL); + +cmdline_parse_token_string_t cmd_pipeline_enable_pipeline_string = + TOKEN_STRING_INITIALIZER(struct cmd_pipeline_enable_result, pipeline_string, + "pipeline"); + +cmdline_parse_token_num_t cmd_pipeline_enable_pipeline_id = + TOKEN_NUM_INITIALIZER(struct cmd_pipeline_enable_result, pipeline_id, + UINT32); + +cmdline_parse_token_string_t cmd_pipeline_enable_enable_string = + TOKEN_STRING_INITIALIZER(struct cmd_pipeline_enable_result, enable_string, + "enable"); + +cmdline_parse_inst_t cmd_pipeline_enable = { + .f = cmd_pipeline_enable_parsed, + .data = NULL, + .help_str = "Enable pipeline on specified core", + .tokens = { + (void *)&cmd_pipeline_enable_t_string, + (void *)&cmd_pipeline_enable_t_id_string, + (void *)&cmd_pipeline_enable_pipeline_string, + (void *)&cmd_pipeline_enable_pipeline_id, + (void *)&cmd_pipeline_enable_enable_string, + NULL, + }, +}; + +/* + * pipeline disable + */ + +struct cmd_pipeline_disable_result { + cmdline_fixed_string_t t_string; + cmdline_fixed_string_t t_id_string; + cmdline_fixed_string_t pipeline_string; + uint32_t pipeline_id; + cmdline_fixed_string_t disable_string; +}; + +static void +cmd_pipeline_disable_parsed( + void *parsed_result, + __rte_unused struct cmdline *cl, + void *data) +{ + struct cmd_pipeline_disable_result *params = parsed_result; + struct app_params *app = data; + int status; + uint32_t core_id, socket_id, hyper_th_id; + + if (parse_pipeline_core(&socket_id, + &core_id, + &hyper_th_id, + params->t_id_string) != 0) { + printf("Command failed\n"); + return; + } + + status = app_pipeline_disable(app, + socket_id, + core_id, + hyper_th_id, + params->pipeline_id); + + if (status != 0) + printf("Command failed\n"); +} + +cmdline_parse_token_string_t cmd_pipeline_disable_t_string = + TOKEN_STRING_INITIALIZER(struct cmd_pipeline_disable_result, t_string, "t"); + +cmdline_parse_token_string_t cmd_pipeline_disable_t_id_string = + TOKEN_STRING_INITIALIZER(struct cmd_pipeline_disable_result, t_id_string, + NULL); + +cmdline_parse_token_string_t cmd_pipeline_disable_pipeline_string = + TOKEN_STRING_INITIALIZER(struct cmd_pipeline_disable_result, + pipeline_string, "pipeline"); + +cmdline_parse_token_num_t cmd_pipeline_disable_pipeline_id = + TOKEN_NUM_INITIALIZER(struct cmd_pipeline_disable_result, pipeline_id, + UINT32); + +cmdline_parse_token_string_t cmd_pipeline_disable_disable_string = + TOKEN_STRING_INITIALIZER(struct cmd_pipeline_disable_result, disable_string, + "disable"); + +cmdline_parse_inst_t cmd_pipeline_disable = { + .f = cmd_pipeline_disable_parsed, + .data = NULL, + .help_str = "Disable pipeline on specified core", + .tokens = { + (void *)&cmd_pipeline_disable_t_string, + (void *)&cmd_pipeline_disable_t_id_string, + (void *)&cmd_pipeline_disable_pipeline_string, + (void *)&cmd_pipeline_disable_pipeline_id, + (void *)&cmd_pipeline_disable_disable_string, + NULL, + }, +}; + + +/* + * thread headroom + */ + +struct cmd_thread_headroom_result { + cmdline_fixed_string_t t_string; + cmdline_fixed_string_t t_id_string; + cmdline_fixed_string_t headroom_string; +}; + +static void +cmd_thread_headroom_parsed( + void *parsed_result, + __rte_unused struct cmdline *cl, + void *data) +{ + struct cmd_thread_headroom_result *params = parsed_result; + struct app_params *app = data; + int status; + uint32_t core_id, socket_id, hyper_th_id; + + if (parse_pipeline_core(&socket_id, + &core_id, + &hyper_th_id, + params->t_id_string) != 0) { + printf("Command failed\n"); + return; + } + + status = app_thread_headroom(app, + socket_id, + core_id, + hyper_th_id); + + if (status != 0) + printf("Command failed\n"); +} + +cmdline_parse_token_string_t cmd_thread_headroom_t_string = + TOKEN_STRING_INITIALIZER(struct cmd_thread_headroom_result, + t_string, "t"); + +cmdline_parse_token_string_t cmd_thread_headroom_t_id_string = + TOKEN_STRING_INITIALIZER(struct cmd_thread_headroom_result, + t_id_string, NULL); + +cmdline_parse_token_string_t cmd_thread_headroom_headroom_string = + TOKEN_STRING_INITIALIZER(struct cmd_thread_headroom_result, + headroom_string, "headroom"); + +cmdline_parse_inst_t cmd_thread_headroom = { + .f = cmd_thread_headroom_parsed, + .data = NULL, + .help_str = "Display thread headroom", + .tokens = { + (void *)&cmd_thread_headroom_t_string, + (void *)&cmd_thread_headroom_t_id_string, + (void *)&cmd_thread_headroom_headroom_string, + NULL, + }, +}; + + +static cmdline_parse_ctx_t thread_cmds[] = { + (cmdline_parse_inst_t *) &cmd_pipeline_enable, + (cmdline_parse_inst_t *) &cmd_pipeline_disable, + (cmdline_parse_inst_t *) &cmd_thread_headroom, + NULL, +}; + +int +app_pipeline_thread_cmd_push(struct app_params *app) +{ + uint32_t n_cmds, i; + + /* Check for available slots in the application commands array */ + n_cmds = RTE_DIM(thread_cmds) - 1; + if (n_cmds > APP_MAX_CMDS - app->n_cmds) + return -ENOMEM; + + /* Push thread commands into the application */ + memcpy(&app->cmds[app->n_cmds], thread_cmds, + n_cmds * sizeof(cmdline_parse_ctx_t)); + + for (i = 0; i < n_cmds; i++) + app->cmds[app->n_cmds + i]->data = app; + + app->n_cmds += n_cmds; + app->cmds[app->n_cmds] = NULL; + + return 0; +} diff --git a/examples/ip_pipeline/thread_fe.h b/examples/ip_pipeline/thread_fe.h new file mode 100644 index 00000000..2fd4ee8e --- /dev/null +++ b/examples/ip_pipeline/thread_fe.h @@ -0,0 +1,101 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef THREAD_FE_H_ +#define THREAD_FE_H_ + +static inline struct rte_ring * +app_thread_msgq_in_get(struct app_params *app, + uint32_t socket_id, uint32_t core_id, uint32_t ht_id) +{ + char msgq_name[32]; + ssize_t param_idx; + + snprintf(msgq_name, sizeof(msgq_name), + "MSGQ-REQ-CORE-s%" PRIu32 "c%" PRIu32 "%s", + socket_id, + core_id, + (ht_id) ? "h" : ""); + param_idx = APP_PARAM_FIND(app->msgq_params, msgq_name); + + if (param_idx < 0) + return NULL; + + return app->msgq[param_idx]; +} + +static inline struct rte_ring * +app_thread_msgq_out_get(struct app_params *app, + uint32_t socket_id, uint32_t core_id, uint32_t ht_id) +{ + char msgq_name[32]; + ssize_t param_idx; + + snprintf(msgq_name, sizeof(msgq_name), + "MSGQ-RSP-CORE-s%" PRIu32 "c%" PRIu32 "%s", + socket_id, + core_id, + (ht_id) ? "h" : ""); + param_idx = APP_PARAM_FIND(app->msgq_params, msgq_name); + + if (param_idx < 0) + return NULL; + + return app->msgq[param_idx]; + +} + +int +app_pipeline_thread_cmd_push(struct app_params *app); + +int +app_pipeline_enable(struct app_params *app, + uint32_t core_id, + uint32_t socket_id, + uint32_t hyper_th_id, + uint32_t pipeline_id); + +int +app_pipeline_disable(struct app_params *app, + uint32_t core_id, + uint32_t socket_id, + uint32_t hyper_th_id, + uint32_t pipeline_id); + +int +app_thread_headroom(struct app_params *app, + uint32_t core_id, + uint32_t socket_id, + uint32_t hyper_th_id); + +#endif /* THREAD_FE_H_ */ diff --git a/examples/ip_reassembly/Makefile b/examples/ip_reassembly/Makefile new file mode 100644 index 00000000..d9539a3a --- /dev/null +++ b/examples/ip_reassembly/Makefile @@ -0,0 +1,57 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overriden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +# binary name +APP = ip_reassembly + +# all source are stored in SRCS-y +SRCS-y := main.c + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) + +# workaround for a gcc bug with noreturn attribute +# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603 +ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y) +CFLAGS_main.o += -Wno-return-type +endif + +include $(RTE_SDK)/mk/rte.extapp.mk diff --git a/examples/ip_reassembly/main.c b/examples/ip_reassembly/main.c new file mode 100644 index 00000000..c27e7353 --- /dev/null +++ b/examples/ip_reassembly/main.c @@ -0,0 +1,1185 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <inttypes.h> +#include <sys/types.h> +#include <string.h> +#include <sys/queue.h> +#include <stdarg.h> +#include <errno.h> +#include <getopt.h> +#include <signal.h> +#include <sys/param.h> + +#include <rte_common.h> +#include <rte_byteorder.h> +#include <rte_log.h> +#include <rte_memory.h> +#include <rte_memcpy.h> +#include <rte_memzone.h> +#include <rte_eal.h> +#include <rte_per_lcore.h> +#include <rte_launch.h> +#include <rte_atomic.h> +#include <rte_cycles.h> +#include <rte_prefetch.h> +#include <rte_lcore.h> +#include <rte_per_lcore.h> +#include <rte_branch_prediction.h> +#include <rte_interrupts.h> +#include <rte_pci.h> +#include <rte_random.h> +#include <rte_debug.h> +#include <rte_ether.h> +#include <rte_ethdev.h> +#include <rte_ring.h> +#include <rte_mempool.h> +#include <rte_mbuf.h> +#include <rte_malloc.h> +#include <rte_ip.h> +#include <rte_tcp.h> +#include <rte_udp.h> +#include <rte_string_fns.h> +#include <rte_lpm.h> +#include <rte_lpm6.h> + +#include <rte_ip_frag.h> + +#define MAX_PKT_BURST 32 + + +#define RTE_LOGTYPE_IP_RSMBL RTE_LOGTYPE_USER1 + +#define MAX_JUMBO_PKT_LEN 9600 + +#define BUF_SIZE RTE_MBUF_DEFAULT_DATAROOM +#define MBUF_SIZE \ + (BUF_SIZE + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM) + +#define NB_MBUF 8192 + +/* allow max jumbo frame 9.5 KB */ +#define JUMBO_FRAME_MAX_SIZE 0x2600 + +#define MAX_FLOW_NUM UINT16_MAX +#define MIN_FLOW_NUM 1 +#define DEF_FLOW_NUM 0x1000 + +/* TTL numbers are in ms. */ +#define MAX_FLOW_TTL (3600 * MS_PER_S) +#define MIN_FLOW_TTL 1 +#define DEF_FLOW_TTL MS_PER_S + +#define MAX_FRAG_NUM RTE_LIBRTE_IP_FRAG_MAX_FRAG + +/* Should be power of two. */ +#define IP_FRAG_TBL_BUCKET_ENTRIES 16 + +static uint32_t max_flow_num = DEF_FLOW_NUM; +static uint32_t max_flow_ttl = DEF_FLOW_TTL; + +#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */ + +#define NB_SOCKETS 8 + +/* Configure how many packets ahead to prefetch, when reading packets */ +#define PREFETCH_OFFSET 3 + +/* + * Configurable number of RX/TX ring descriptors + */ +#define RTE_TEST_RX_DESC_DEFAULT 128 +#define RTE_TEST_TX_DESC_DEFAULT 512 + +static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; +static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; + +/* ethernet addresses of ports */ +static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; + +#ifndef IPv4_BYTES +#define IPv4_BYTES_FMT "%" PRIu8 ".%" PRIu8 ".%" PRIu8 ".%" PRIu8 +#define IPv4_BYTES(addr) \ + (uint8_t) (((addr) >> 24) & 0xFF),\ + (uint8_t) (((addr) >> 16) & 0xFF),\ + (uint8_t) (((addr) >> 8) & 0xFF),\ + (uint8_t) ((addr) & 0xFF) +#endif + +#ifndef IPv6_BYTES +#define IPv6_BYTES_FMT "%02x%02x:%02x%02x:%02x%02x:%02x%02x:"\ + "%02x%02x:%02x%02x:%02x%02x:%02x%02x" +#define IPv6_BYTES(addr) \ + addr[0], addr[1], addr[2], addr[3], \ + addr[4], addr[5], addr[6], addr[7], \ + addr[8], addr[9], addr[10], addr[11],\ + addr[12], addr[13],addr[14], addr[15] +#endif + +#define IPV6_ADDR_LEN 16 + +/* mask of enabled ports */ +static uint32_t enabled_port_mask = 0; + +static int rx_queue_per_lcore = 1; + +struct mbuf_table { + uint32_t len; + uint32_t head; + uint32_t tail; + struct rte_mbuf *m_table[0]; +}; + +struct rx_queue { + struct rte_ip_frag_tbl *frag_tbl; + struct rte_mempool *pool; + struct rte_lpm *lpm; + struct rte_lpm6 *lpm6; + uint8_t portid; +}; + +struct tx_lcore_stat { + uint64_t call; + uint64_t drop; + uint64_t queue; + uint64_t send; +}; + +#define MAX_RX_QUEUE_PER_LCORE 16 +#define MAX_TX_QUEUE_PER_PORT 16 +#define MAX_RX_QUEUE_PER_PORT 128 + +struct lcore_queue_conf { + uint16_t n_rx_queue; + struct rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE]; + uint16_t tx_queue_id[RTE_MAX_ETHPORTS]; + struct rte_ip_frag_death_row death_row; + struct mbuf_table *tx_mbufs[RTE_MAX_ETHPORTS]; + struct tx_lcore_stat tx_stat; +} __rte_cache_aligned; +static struct lcore_queue_conf lcore_queue_conf[RTE_MAX_LCORE]; + +static struct rte_eth_conf port_conf = { + .rxmode = { + .mq_mode = ETH_MQ_RX_RSS, + .max_rx_pkt_len = JUMBO_FRAME_MAX_SIZE, + .split_hdr_size = 0, + .header_split = 0, /**< Header Split disabled */ + .hw_ip_checksum = 1, /**< IP checksum offload enabled */ + .hw_vlan_filter = 0, /**< VLAN filtering disabled */ + .jumbo_frame = 1, /**< Jumbo Frame Support disabled */ + .hw_strip_crc = 0, /**< CRC stripped by hardware */ + }, + .rx_adv_conf = { + .rss_conf = { + .rss_key = NULL, + .rss_hf = ETH_RSS_IP, + }, + }, + .txmode = { + .mq_mode = ETH_MQ_TX_NONE, + }, +}; + +/* + * IPv4 forwarding table + */ +struct l3fwd_ipv4_route { + uint32_t ip; + uint8_t depth; + uint8_t if_out; +}; + +struct l3fwd_ipv4_route l3fwd_ipv4_route_array[] = { + {IPv4(100,10,0,0), 16, 0}, + {IPv4(100,20,0,0), 16, 1}, + {IPv4(100,30,0,0), 16, 2}, + {IPv4(100,40,0,0), 16, 3}, + {IPv4(100,50,0,0), 16, 4}, + {IPv4(100,60,0,0), 16, 5}, + {IPv4(100,70,0,0), 16, 6}, + {IPv4(100,80,0,0), 16, 7}, +}; + +/* + * IPv6 forwarding table + */ + +struct l3fwd_ipv6_route { + uint8_t ip[IPV6_ADDR_LEN]; + uint8_t depth; + uint8_t if_out; +}; + +static struct l3fwd_ipv6_route l3fwd_ipv6_route_array[] = { + {{1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 0}, + {{2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 1}, + {{3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 2}, + {{4,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 3}, + {{5,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 4}, + {{6,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 5}, + {{7,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 6}, + {{8,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 7}, +}; + +#define LPM_MAX_RULES 1024 +#define LPM6_MAX_RULES 1024 +#define LPM6_NUMBER_TBL8S (1 << 16) + +struct rte_lpm6_config lpm6_config = { + .max_rules = LPM6_MAX_RULES, + .number_tbl8s = LPM6_NUMBER_TBL8S, + .flags = 0 +}; + +static struct rte_lpm *socket_lpm[RTE_MAX_NUMA_NODES]; +static struct rte_lpm6 *socket_lpm6[RTE_MAX_NUMA_NODES]; + +#ifdef RTE_LIBRTE_IP_FRAG_TBL_STAT +#define TX_LCORE_STAT_UPDATE(s, f, v) ((s)->f += (v)) +#else +#define TX_LCORE_STAT_UPDATE(s, f, v) do {} while (0) +#endif /* RTE_LIBRTE_IP_FRAG_TBL_STAT */ + +/* + * If number of queued packets reached given threahold, then + * send burst of packets on an output interface. + */ +static inline uint32_t +send_burst(struct lcore_queue_conf *qconf, uint32_t thresh, uint8_t port) +{ + uint32_t fill, len, k, n; + struct mbuf_table *txmb; + + txmb = qconf->tx_mbufs[port]; + len = txmb->len; + + if ((int32_t)(fill = txmb->head - txmb->tail) < 0) + fill += len; + + if (fill >= thresh) { + n = RTE_MIN(len - txmb->tail, fill); + + k = rte_eth_tx_burst(port, qconf->tx_queue_id[port], + txmb->m_table + txmb->tail, (uint16_t)n); + + TX_LCORE_STAT_UPDATE(&qconf->tx_stat, call, 1); + TX_LCORE_STAT_UPDATE(&qconf->tx_stat, send, k); + + fill -= k; + if ((txmb->tail += k) == len) + txmb->tail = 0; + } + + return fill; +} + +/* Enqueue a single packet, and send burst if queue is filled */ +static inline int +send_single_packet(struct rte_mbuf *m, uint8_t port) +{ + uint32_t fill, lcore_id, len; + struct lcore_queue_conf *qconf; + struct mbuf_table *txmb; + + lcore_id = rte_lcore_id(); + qconf = &lcore_queue_conf[lcore_id]; + + txmb = qconf->tx_mbufs[port]; + len = txmb->len; + + fill = send_burst(qconf, MAX_PKT_BURST, port); + + if (fill == len - 1) { + TX_LCORE_STAT_UPDATE(&qconf->tx_stat, drop, 1); + rte_pktmbuf_free(txmb->m_table[txmb->tail]); + if (++txmb->tail == len) + txmb->tail = 0; + } + + TX_LCORE_STAT_UPDATE(&qconf->tx_stat, queue, 1); + txmb->m_table[txmb->head] = m; + if(++txmb->head == len) + txmb->head = 0; + + return 0; +} + +static inline void +reassemble(struct rte_mbuf *m, uint8_t portid, uint32_t queue, + struct lcore_queue_conf *qconf, uint64_t tms) +{ + struct ether_hdr *eth_hdr; + struct rte_ip_frag_tbl *tbl; + struct rte_ip_frag_death_row *dr; + struct rx_queue *rxq; + void *d_addr_bytes; + uint32_t next_hop_ipv4; + uint8_t next_hop_ipv6, dst_port; + + rxq = &qconf->rx_queue_list[queue]; + + eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); + + dst_port = portid; + + /* if packet is IPv4 */ + if (RTE_ETH_IS_IPV4_HDR(m->packet_type)) { + struct ipv4_hdr *ip_hdr; + uint32_t ip_dst; + + ip_hdr = (struct ipv4_hdr *)(eth_hdr + 1); + + /* if it is a fragmented packet, then try to reassemble. */ + if (rte_ipv4_frag_pkt_is_fragmented(ip_hdr)) { + struct rte_mbuf *mo; + + tbl = rxq->frag_tbl; + dr = &qconf->death_row; + + /* prepare mbuf: setup l2_len/l3_len. */ + m->l2_len = sizeof(*eth_hdr); + m->l3_len = sizeof(*ip_hdr); + + /* process this fragment. */ + mo = rte_ipv4_frag_reassemble_packet(tbl, dr, m, tms, ip_hdr); + if (mo == NULL) + /* no packet to send out. */ + return; + + /* we have our packet reassembled. */ + if (mo != m) { + m = mo; + eth_hdr = rte_pktmbuf_mtod(m, + struct ether_hdr *); + ip_hdr = (struct ipv4_hdr *)(eth_hdr + 1); + } + } + ip_dst = rte_be_to_cpu_32(ip_hdr->dst_addr); + + /* Find destination port */ + if (rte_lpm_lookup(rxq->lpm, ip_dst, &next_hop_ipv4) == 0 && + (enabled_port_mask & 1 << next_hop_ipv4) != 0) { + dst_port = next_hop_ipv4; + } + + eth_hdr->ether_type = rte_be_to_cpu_16(ETHER_TYPE_IPv4); + } else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) { + /* if packet is IPv6 */ + struct ipv6_extension_fragment *frag_hdr; + struct ipv6_hdr *ip_hdr; + + ip_hdr = (struct ipv6_hdr *)(eth_hdr + 1); + + frag_hdr = rte_ipv6_frag_get_ipv6_fragment_header(ip_hdr); + + if (frag_hdr != NULL) { + struct rte_mbuf *mo; + + tbl = rxq->frag_tbl; + dr = &qconf->death_row; + + /* prepare mbuf: setup l2_len/l3_len. */ + m->l2_len = sizeof(*eth_hdr); + m->l3_len = sizeof(*ip_hdr) + sizeof(*frag_hdr); + + mo = rte_ipv6_frag_reassemble_packet(tbl, dr, m, tms, ip_hdr, frag_hdr); + if (mo == NULL) + return; + + if (mo != m) { + m = mo; + eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); + ip_hdr = (struct ipv6_hdr *)(eth_hdr + 1); + } + } + + /* Find destination port */ + if (rte_lpm6_lookup(rxq->lpm6, ip_hdr->dst_addr, &next_hop_ipv6) == 0 && + (enabled_port_mask & 1 << next_hop_ipv6) != 0) { + dst_port = next_hop_ipv6; + } + + eth_hdr->ether_type = rte_be_to_cpu_16(ETHER_TYPE_IPv6); + } + /* if packet wasn't IPv4 or IPv6, it's forwarded to the port it came from */ + + /* 02:00:00:00:00:xx */ + d_addr_bytes = ð_hdr->d_addr.addr_bytes[0]; + *((uint64_t *)d_addr_bytes) = 0x000000000002 + ((uint64_t)dst_port << 40); + + /* src addr */ + ether_addr_copy(&ports_eth_addr[dst_port], ð_hdr->s_addr); + + send_single_packet(m, dst_port); +} + +/* main processing loop */ +static int +main_loop(__attribute__((unused)) void *dummy) +{ + struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; + unsigned lcore_id; + uint64_t diff_tsc, cur_tsc, prev_tsc; + int i, j, nb_rx; + uint8_t portid; + struct lcore_queue_conf *qconf; + const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US; + + prev_tsc = 0; + + lcore_id = rte_lcore_id(); + qconf = &lcore_queue_conf[lcore_id]; + + if (qconf->n_rx_queue == 0) { + RTE_LOG(INFO, IP_RSMBL, "lcore %u has nothing to do\n", lcore_id); + return 0; + } + + RTE_LOG(INFO, IP_RSMBL, "entering main loop on lcore %u\n", lcore_id); + + for (i = 0; i < qconf->n_rx_queue; i++) { + + portid = qconf->rx_queue_list[i].portid; + RTE_LOG(INFO, IP_RSMBL, " -- lcoreid=%u portid=%hhu\n", lcore_id, + portid); + } + + while (1) { + + cur_tsc = rte_rdtsc(); + + /* + * TX burst queue drain + */ + diff_tsc = cur_tsc - prev_tsc; + if (unlikely(diff_tsc > drain_tsc)) { + + /* + * This could be optimized (use queueid instead of + * portid), but it is not called so often + */ + for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) { + if ((enabled_port_mask & (1 << portid)) != 0) + send_burst(qconf, 1, portid); + } + + prev_tsc = cur_tsc; + } + + /* + * Read packet from RX queues + */ + for (i = 0; i < qconf->n_rx_queue; ++i) { + + portid = qconf->rx_queue_list[i].portid; + + nb_rx = rte_eth_rx_burst(portid, 0, pkts_burst, + MAX_PKT_BURST); + + /* Prefetch first packets */ + for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) { + rte_prefetch0(rte_pktmbuf_mtod( + pkts_burst[j], void *)); + } + + /* Prefetch and forward already prefetched packets */ + for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) { + rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[ + j + PREFETCH_OFFSET], void *)); + reassemble(pkts_burst[j], portid, + i, qconf, cur_tsc); + } + + /* Forward remaining prefetched packets */ + for (; j < nb_rx; j++) { + reassemble(pkts_burst[j], portid, + i, qconf, cur_tsc); + } + + rte_ip_frag_free_death_row(&qconf->death_row, + PREFETCH_OFFSET); + } + } +} + +/* display usage */ +static void +print_usage(const char *prgname) +{ + printf("%s [EAL options] -- -p PORTMASK [-q NQ]" + " [--max-pkt-len PKTLEN]" + " [--maxflows=<flows>] [--flowttl=<ttl>[(s|ms)]]\n" + " -p PORTMASK: hexadecimal bitmask of ports to configure\n" + " -q NQ: number of RX queues per lcore\n" + " --maxflows=<flows>: optional, maximum number of flows " + "supported\n" + " --flowttl=<ttl>[(s|ms)]: optional, maximum TTL for each " + "flow\n", + prgname); +} + +static uint32_t +parse_flow_num(const char *str, uint32_t min, uint32_t max, uint32_t *val) +{ + char *end; + uint64_t v; + + /* parse decimal string */ + errno = 0; + v = strtoul(str, &end, 10); + if (errno != 0 || *end != '\0') + return -EINVAL; + + if (v < min || v > max) + return -EINVAL; + + *val = (uint32_t)v; + return 0; +} + +static int +parse_flow_ttl(const char *str, uint32_t min, uint32_t max, uint32_t *val) +{ + char *end; + uint64_t v; + + static const char frmt_sec[] = "s"; + static const char frmt_msec[] = "ms"; + + /* parse decimal string */ + errno = 0; + v = strtoul(str, &end, 10); + if (errno != 0) + return -EINVAL; + + if (*end != '\0') { + if (strncmp(frmt_sec, end, sizeof(frmt_sec)) == 0) + v *= MS_PER_S; + else if (strncmp(frmt_msec, end, sizeof (frmt_msec)) != 0) + return -EINVAL; + } + + if (v < min || v > max) + return -EINVAL; + + *val = (uint32_t)v; + return 0; +} + +static int +parse_portmask(const char *portmask) +{ + char *end = NULL; + unsigned long pm; + + /* parse hexadecimal string */ + pm = strtoul(portmask, &end, 16); + if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0')) + return -1; + + if (pm == 0) + return -1; + + return pm; +} + +static int +parse_nqueue(const char *q_arg) +{ + char *end = NULL; + unsigned long n; + + printf("%p\n", q_arg); + + /* parse hexadecimal string */ + n = strtoul(q_arg, &end, 10); + if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0')) + return -1; + if (n == 0) + return -1; + if (n >= MAX_RX_QUEUE_PER_LCORE) + return -1; + + return n; +} + +/* Parse the argument given in the command line of the application */ +static int +parse_args(int argc, char **argv) +{ + int opt, ret; + char **argvopt; + int option_index; + char *prgname = argv[0]; + static struct option lgopts[] = { + {"max-pkt-len", 1, 0, 0}, + {"maxflows", 1, 0, 0}, + {"flowttl", 1, 0, 0}, + {NULL, 0, 0, 0} + }; + + argvopt = argv; + + while ((opt = getopt_long(argc, argvopt, "p:q:", + lgopts, &option_index)) != EOF) { + + switch (opt) { + /* portmask */ + case 'p': + enabled_port_mask = parse_portmask(optarg); + if (enabled_port_mask == 0) { + printf("invalid portmask\n"); + print_usage(prgname); + return -1; + } + break; + + /* nqueue */ + case 'q': + rx_queue_per_lcore = parse_nqueue(optarg); + if (rx_queue_per_lcore < 0) { + printf("invalid queue number\n"); + print_usage(prgname); + return -1; + } + break; + + /* long options */ + case 0: + if (!strncmp(lgopts[option_index].name, + "maxflows", 8)) { + if ((ret = parse_flow_num(optarg, MIN_FLOW_NUM, + MAX_FLOW_NUM, + &max_flow_num)) != 0) { + printf("invalid value: \"%s\" for " + "parameter %s\n", + optarg, + lgopts[option_index].name); + print_usage(prgname); + return ret; + } + } + + if (!strncmp(lgopts[option_index].name, "flowttl", 7)) { + if ((ret = parse_flow_ttl(optarg, MIN_FLOW_TTL, + MAX_FLOW_TTL, + &max_flow_ttl)) != 0) { + printf("invalid value: \"%s\" for " + "parameter %s\n", + optarg, + lgopts[option_index].name); + print_usage(prgname); + return ret; + } + } + + break; + + default: + print_usage(prgname); + return -1; + } + } + + if (optind >= 0) + argv[optind-1] = prgname; + + ret = optind-1; + optind = 0; /* reset getopt lib */ + return ret; +} + +static void +print_ethaddr(const char *name, const struct ether_addr *eth_addr) +{ + char buf[ETHER_ADDR_FMT_SIZE]; + ether_format_addr(buf, ETHER_ADDR_FMT_SIZE, eth_addr); + printf("%s%s", name, buf); +} + +/* Check the link status of all ports in up to 9s, and print them finally */ +static void +check_all_ports_link_status(uint8_t port_num, uint32_t port_mask) +{ +#define CHECK_INTERVAL 100 /* 100ms */ +#define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */ + uint8_t portid, count, all_ports_up, print_flag = 0; + struct rte_eth_link link; + + printf("\nChecking link status"); + fflush(stdout); + for (count = 0; count <= MAX_CHECK_TIME; count++) { + all_ports_up = 1; + for (portid = 0; portid < port_num; portid++) { + if ((port_mask & (1 << portid)) == 0) + continue; + memset(&link, 0, sizeof(link)); + rte_eth_link_get_nowait(portid, &link); + /* print link status if flag set */ + if (print_flag == 1) { + if (link.link_status) + printf("Port %d Link Up - speed %u " + "Mbps - %s\n", (uint8_t)portid, + (unsigned)link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? + ("full-duplex") : ("half-duplex\n")); + else + printf("Port %d Link Down\n", + (uint8_t)portid); + continue; + } + /* clear all_ports_up flag if any link down */ + if (link.link_status == ETH_LINK_DOWN) { + all_ports_up = 0; + break; + } + } + /* after finally printing all link status, get out */ + if (print_flag == 1) + break; + + if (all_ports_up == 0) { + printf("."); + fflush(stdout); + rte_delay_ms(CHECK_INTERVAL); + } + + /* set the print_flag if all ports up or timeout */ + if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) { + print_flag = 1; + printf("\ndone\n"); + } + } +} + +static int +init_routing_table(void) +{ + struct rte_lpm *lpm; + struct rte_lpm6 *lpm6; + int socket, ret; + unsigned i; + + for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++) { + if (socket_lpm[socket]) { + lpm = socket_lpm[socket]; + /* populate the LPM table */ + for (i = 0; i < RTE_DIM(l3fwd_ipv4_route_array); i++) { + ret = rte_lpm_add(lpm, + l3fwd_ipv4_route_array[i].ip, + l3fwd_ipv4_route_array[i].depth, + l3fwd_ipv4_route_array[i].if_out); + + if (ret < 0) { + RTE_LOG(ERR, IP_RSMBL, "Unable to add entry %i to the l3fwd " + "LPM table\n", i); + return -1; + } + + RTE_LOG(INFO, IP_RSMBL, "Socket %i: adding route " IPv4_BYTES_FMT + "/%d (port %d)\n", + socket, + IPv4_BYTES(l3fwd_ipv4_route_array[i].ip), + l3fwd_ipv4_route_array[i].depth, + l3fwd_ipv4_route_array[i].if_out); + } + } + + if (socket_lpm6[socket]) { + lpm6 = socket_lpm6[socket]; + /* populate the LPM6 table */ + for (i = 0; i < RTE_DIM(l3fwd_ipv6_route_array); i++) { + ret = rte_lpm6_add(lpm6, + l3fwd_ipv6_route_array[i].ip, + l3fwd_ipv6_route_array[i].depth, + l3fwd_ipv6_route_array[i].if_out); + + if (ret < 0) { + RTE_LOG(ERR, IP_RSMBL, "Unable to add entry %i to the l3fwd " + "LPM6 table\n", i); + return -1; + } + + RTE_LOG(INFO, IP_RSMBL, "Socket %i: adding route " IPv6_BYTES_FMT + "/%d (port %d)\n", + socket, + IPv6_BYTES(l3fwd_ipv6_route_array[i].ip), + l3fwd_ipv6_route_array[i].depth, + l3fwd_ipv6_route_array[i].if_out); + } + } + } + return 0; +} + +static int +setup_port_tbl(struct lcore_queue_conf *qconf, uint32_t lcore, int socket, + uint32_t port) +{ + struct mbuf_table *mtb; + uint32_t n; + size_t sz; + + n = RTE_MAX(max_flow_num, 2UL * MAX_PKT_BURST); + sz = sizeof (*mtb) + sizeof (mtb->m_table[0]) * n; + + if ((mtb = rte_zmalloc_socket(__func__, sz, RTE_CACHE_LINE_SIZE, + socket)) == NULL) { + RTE_LOG(ERR, IP_RSMBL, "%s() for lcore: %u, port: %u " + "failed to allocate %zu bytes\n", + __func__, lcore, port, sz); + return -1; + } + + mtb->len = n; + qconf->tx_mbufs[port] = mtb; + + return 0; +} + +static int +setup_queue_tbl(struct rx_queue *rxq, uint32_t lcore, uint32_t queue) +{ + int socket; + uint32_t nb_mbuf; + uint64_t frag_cycles; + char buf[RTE_MEMPOOL_NAMESIZE]; + + socket = rte_lcore_to_socket_id(lcore); + if (socket == SOCKET_ID_ANY) + socket = 0; + + frag_cycles = (rte_get_tsc_hz() + MS_PER_S - 1) / MS_PER_S * + max_flow_ttl; + + if ((rxq->frag_tbl = rte_ip_frag_table_create(max_flow_num, + IP_FRAG_TBL_BUCKET_ENTRIES, max_flow_num, frag_cycles, + socket)) == NULL) { + RTE_LOG(ERR, IP_RSMBL, "ip_frag_tbl_create(%u) on " + "lcore: %u for queue: %u failed\n", + max_flow_num, lcore, queue); + return -1; + } + + /* + * At any given moment up to <max_flow_num * (MAX_FRAG_NUM)> + * mbufs could be stored int the fragment table. + * Plus, each TX queue can hold up to <max_flow_num> packets. + */ + + nb_mbuf = RTE_MAX(max_flow_num, 2UL * MAX_PKT_BURST) * MAX_FRAG_NUM; + nb_mbuf *= (port_conf.rxmode.max_rx_pkt_len + BUF_SIZE - 1) / BUF_SIZE; + nb_mbuf *= 2; /* ipv4 and ipv6 */ + nb_mbuf += RTE_TEST_RX_DESC_DEFAULT + RTE_TEST_TX_DESC_DEFAULT; + + nb_mbuf = RTE_MAX(nb_mbuf, (uint32_t)NB_MBUF); + + snprintf(buf, sizeof(buf), "mbuf_pool_%u_%u", lcore, queue); + + if ((rxq->pool = rte_mempool_create(buf, nb_mbuf, MBUF_SIZE, 0, + sizeof(struct rte_pktmbuf_pool_private), + rte_pktmbuf_pool_init, NULL, rte_pktmbuf_init, NULL, + socket, MEMPOOL_F_SP_PUT | MEMPOOL_F_SC_GET)) == NULL) { + RTE_LOG(ERR, IP_RSMBL, "mempool_create(%s) failed", buf); + return -1; + } + + return 0; +} + +static int +init_mem(void) +{ + char buf[PATH_MAX]; + struct rte_lpm *lpm; + struct rte_lpm6 *lpm6; + struct rte_lpm_config lpm_config; + int socket; + unsigned lcore_id; + + /* traverse through lcores and initialize structures on each socket */ + + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { + + if (rte_lcore_is_enabled(lcore_id) == 0) + continue; + + socket = rte_lcore_to_socket_id(lcore_id); + + if (socket == SOCKET_ID_ANY) + socket = 0; + + if (socket_lpm[socket] == NULL) { + RTE_LOG(INFO, IP_RSMBL, "Creating LPM table on socket %i\n", socket); + snprintf(buf, sizeof(buf), "IP_RSMBL_LPM_%i", socket); + + lpm_config.max_rules = LPM_MAX_RULES; + lpm_config.number_tbl8s = 256; + lpm_config.flags = 0; + + lpm = rte_lpm_create(buf, socket, &lpm_config); + if (lpm == NULL) { + RTE_LOG(ERR, IP_RSMBL, "Cannot create LPM table\n"); + return -1; + } + socket_lpm[socket] = lpm; + } + + if (socket_lpm6[socket] == NULL) { + RTE_LOG(INFO, IP_RSMBL, "Creating LPM6 table on socket %i\n", socket); + snprintf(buf, sizeof(buf), "IP_RSMBL_LPM_%i", socket); + + lpm6 = rte_lpm6_create("IP_RSMBL_LPM6", socket, &lpm6_config); + if (lpm6 == NULL) { + RTE_LOG(ERR, IP_RSMBL, "Cannot create LPM table\n"); + return -1; + } + socket_lpm6[socket] = lpm6; + } + } + + return 0; +} + +static void +queue_dump_stat(void) +{ + uint32_t i, lcore; + const struct lcore_queue_conf *qconf; + + for (lcore = 0; lcore < RTE_MAX_LCORE; lcore++) { + if (rte_lcore_is_enabled(lcore) == 0) + continue; + + qconf = &lcore_queue_conf[lcore]; + for (i = 0; i < qconf->n_rx_queue; i++) { + + fprintf(stdout, " -- lcoreid=%u portid=%hhu " + "frag tbl stat:\n", + lcore, qconf->rx_queue_list[i].portid); + rte_ip_frag_table_statistics_dump(stdout, + qconf->rx_queue_list[i].frag_tbl); + fprintf(stdout, "TX bursts:\t%" PRIu64 "\n" + "TX packets _queued:\t%" PRIu64 "\n" + "TX packets dropped:\t%" PRIu64 "\n" + "TX packets send:\t%" PRIu64 "\n", + qconf->tx_stat.call, + qconf->tx_stat.queue, + qconf->tx_stat.drop, + qconf->tx_stat.send); + } + } +} + +static void +signal_handler(int signum) +{ + queue_dump_stat(); + if (signum != SIGUSR1) + rte_exit(0, "received signal: %d, exiting\n", signum); +} + +int +main(int argc, char **argv) +{ + struct lcore_queue_conf *qconf; + struct rte_eth_dev_info dev_info; + struct rte_eth_txconf *txconf; + struct rx_queue *rxq; + int ret, socket; + unsigned nb_ports; + uint16_t queueid; + unsigned lcore_id = 0, rx_lcore_id = 0; + uint32_t n_tx_queue, nb_lcores; + uint8_t portid; + + /* init EAL */ + ret = rte_eal_init(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Invalid EAL parameters\n"); + argc -= ret; + argv += ret; + + /* parse application arguments (after the EAL ones) */ + ret = parse_args(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Invalid IP reassembly parameters\n"); + + nb_ports = rte_eth_dev_count(); + if (nb_ports > RTE_MAX_ETHPORTS) + nb_ports = RTE_MAX_ETHPORTS; + else if (nb_ports == 0) + rte_exit(EXIT_FAILURE, "No ports found!\n"); + + nb_lcores = rte_lcore_count(); + + /* initialize structures (mempools, lpm etc.) */ + if (init_mem() < 0) + rte_panic("Cannot initialize memory structures!\n"); + + /* check if portmask has non-existent ports */ + if (enabled_port_mask & ~(RTE_LEN2MASK(nb_ports, unsigned))) + rte_exit(EXIT_FAILURE, "Non-existent ports in portmask!\n"); + + /* initialize all ports */ + for (portid = 0; portid < nb_ports; portid++) { + /* skip ports that are not enabled */ + if ((enabled_port_mask & (1 << portid)) == 0) { + printf("\nSkipping disabled port %d\n", portid); + continue; + } + + qconf = &lcore_queue_conf[rx_lcore_id]; + + /* get the lcore_id for this port */ + while (rte_lcore_is_enabled(rx_lcore_id) == 0 || + qconf->n_rx_queue == (unsigned)rx_queue_per_lcore) { + + rx_lcore_id++; + if (rx_lcore_id >= RTE_MAX_LCORE) + rte_exit(EXIT_FAILURE, "Not enough cores\n"); + + qconf = &lcore_queue_conf[rx_lcore_id]; + } + + socket = rte_lcore_to_socket_id(portid); + if (socket == SOCKET_ID_ANY) + socket = 0; + + queueid = qconf->n_rx_queue; + rxq = &qconf->rx_queue_list[queueid]; + rxq->portid = portid; + rxq->lpm = socket_lpm[socket]; + rxq->lpm6 = socket_lpm6[socket]; + if (setup_queue_tbl(rxq, rx_lcore_id, queueid) < 0) + rte_exit(EXIT_FAILURE, "Failed to set up queue table\n"); + qconf->n_rx_queue++; + + /* init port */ + printf("Initializing port %d ... ", portid ); + fflush(stdout); + + n_tx_queue = nb_lcores; + if (n_tx_queue > MAX_TX_QUEUE_PER_PORT) + n_tx_queue = MAX_TX_QUEUE_PER_PORT; + ret = rte_eth_dev_configure(portid, 1, (uint16_t)n_tx_queue, + &port_conf); + if (ret < 0) { + printf("\n"); + rte_exit(EXIT_FAILURE, "Cannot configure device: " + "err=%d, port=%d\n", + ret, portid); + } + + /* init one RX queue */ + ret = rte_eth_rx_queue_setup(portid, 0, nb_rxd, + socket, NULL, + rxq->pool); + if (ret < 0) { + printf("\n"); + rte_exit(EXIT_FAILURE, "rte_eth_rx_queue_setup: " + "err=%d, port=%d\n", + ret, portid); + } + + rte_eth_macaddr_get(portid, &ports_eth_addr[portid]); + print_ethaddr(" Address:", &ports_eth_addr[portid]); + printf("\n"); + + /* init one TX queue per couple (lcore,port) */ + queueid = 0; + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { + if (rte_lcore_is_enabled(lcore_id) == 0) + continue; + + socket = (int) rte_lcore_to_socket_id(lcore_id); + + printf("txq=%u,%d,%d ", lcore_id, queueid, socket); + fflush(stdout); + + rte_eth_dev_info_get(portid, &dev_info); + txconf = &dev_info.default_txconf; + txconf->txq_flags = 0; + + ret = rte_eth_tx_queue_setup(portid, queueid, nb_txd, + socket, txconf); + if (ret < 0) + rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup: err=%d, " + "port=%d\n", ret, portid); + + qconf = &lcore_queue_conf[lcore_id]; + qconf->tx_queue_id[portid] = queueid; + setup_port_tbl(qconf, lcore_id, socket, portid); + queueid++; + } + printf("\n"); + } + + printf("\n"); + + /* start ports */ + for (portid = 0; portid < nb_ports; portid++) { + if ((enabled_port_mask & (1 << portid)) == 0) { + continue; + } + /* Start device */ + ret = rte_eth_dev_start(portid); + if (ret < 0) + rte_exit(EXIT_FAILURE, "rte_eth_dev_start: err=%d, port=%d\n", + ret, portid); + + rte_eth_promiscuous_enable(portid); + } + + if (init_routing_table() < 0) + rte_exit(EXIT_FAILURE, "Cannot init routing table\n"); + + check_all_ports_link_status((uint8_t)nb_ports, enabled_port_mask); + + signal(SIGUSR1, signal_handler); + signal(SIGTERM, signal_handler); + signal(SIGINT, signal_handler); + + /* launch per-lcore init on every lcore */ + rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER); + RTE_LCORE_FOREACH_SLAVE(lcore_id) { + if (rte_eal_wait_lcore(lcore_id) < 0) + return -1; + } + + return 0; +} diff --git a/examples/ipsec-secgw/Makefile b/examples/ipsec-secgw/Makefile new file mode 100644 index 00000000..f9b59c22 --- /dev/null +++ b/examples/ipsec-secgw/Makefile @@ -0,0 +1,62 @@ +# BSD LICENSE +# +# Copyright(c) 2016 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) + $(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overridden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +APP = ipsec-secgw + +CFLAGS += -O3 -gdwarf-2 +CFLAGS += $(WERROR_FLAGS) +ifeq ($(CONFIG_RTE_TOOLCHAIN_ICC),y) +CFLAGS_sa.o += -diag-disable=vec +endif + + +VPATH += $(SRCDIR)/librte_ipsec + +# +# all source are stored in SRCS-y +# +SRCS-y += ipsec.c +SRCS-y += esp.c +SRCS-y += sp.c +SRCS-y += sa.c +SRCS-y += rt.c +SRCS-y += ipsec-secgw.c + +include $(RTE_SDK)/mk/rte.extapp.mk diff --git a/examples/ipsec-secgw/esp.c b/examples/ipsec-secgw/esp.c new file mode 100644 index 00000000..19273807 --- /dev/null +++ b/examples/ipsec-secgw/esp.c @@ -0,0 +1,251 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdint.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <fcntl.h> +#include <unistd.h> + +#include <rte_common.h> +#include <rte_memcpy.h> +#include <rte_crypto.h> +#include <rte_cryptodev.h> +#include <rte_random.h> + +#include "ipsec.h" +#include "esp.h" +#include "ipip.h" + +#define IP_ESP_HDR_SZ (sizeof(struct ip) + sizeof(struct esp_hdr)) + +static inline void +random_iv_u64(uint64_t *buf, uint16_t n) +{ + unsigned left = n & 0x7; + unsigned i; + + IPSEC_ASSERT((n & 0x3) == 0); + + for (i = 0; i < (n >> 3); i++) + buf[i] = rte_rand(); + + if (left) + *((uint32_t *)&buf[i]) = (uint32_t)lrand48(); +} + +/* IPv4 Tunnel */ +int +esp4_tunnel_inbound_pre_crypto(struct rte_mbuf *m, struct ipsec_sa *sa, + struct rte_crypto_op *cop) +{ + int32_t payload_len; + struct rte_crypto_sym_op *sym_cop; + + IPSEC_ASSERT(m != NULL); + IPSEC_ASSERT(sa != NULL); + IPSEC_ASSERT(cop != NULL); + + payload_len = rte_pktmbuf_pkt_len(m) - IP_ESP_HDR_SZ - sa->iv_len - + sa->digest_len; + + if ((payload_len & (sa->block_size - 1)) || (payload_len <= 0)) { + IPSEC_LOG(DEBUG, IPSEC_ESP, "payload %d not multiple of %u\n", + payload_len, sa->block_size); + return -EINVAL; + } + + sym_cop = (struct rte_crypto_sym_op *)(cop + 1); + + sym_cop->m_src = m; + sym_cop->cipher.data.offset = IP_ESP_HDR_SZ + sa->iv_len; + sym_cop->cipher.data.length = payload_len; + + sym_cop->cipher.iv.data = rte_pktmbuf_mtod_offset(m, void*, + IP_ESP_HDR_SZ); + sym_cop->cipher.iv.phys_addr = rte_pktmbuf_mtophys_offset(m, + IP_ESP_HDR_SZ); + sym_cop->cipher.iv.length = sa->iv_len; + + sym_cop->auth.data.offset = sizeof(struct ip); + if (sa->auth_algo == RTE_CRYPTO_AUTH_AES_GCM) + sym_cop->auth.data.length = sizeof(struct esp_hdr); + else + sym_cop->auth.data.length = sizeof(struct esp_hdr) + + sa->iv_len + payload_len; + + sym_cop->auth.digest.data = rte_pktmbuf_mtod_offset(m, void*, + rte_pktmbuf_pkt_len(m) - sa->digest_len); + sym_cop->auth.digest.phys_addr = rte_pktmbuf_mtophys_offset(m, + rte_pktmbuf_pkt_len(m) - sa->digest_len); + sym_cop->auth.digest.length = sa->digest_len; + + return 0; +} + +int +esp4_tunnel_inbound_post_crypto(struct rte_mbuf *m, struct ipsec_sa *sa, + struct rte_crypto_op *cop) +{ + uint8_t *nexthdr, *pad_len; + uint8_t *padding; + uint16_t i; + + IPSEC_ASSERT(m != NULL); + IPSEC_ASSERT(sa != NULL); + IPSEC_ASSERT(cop != NULL); + + if (cop->status != RTE_CRYPTO_OP_STATUS_SUCCESS) { + IPSEC_LOG(ERR, IPSEC_ESP, "Failed crypto op\n"); + return -1; + } + + nexthdr = rte_pktmbuf_mtod_offset(m, uint8_t*, + rte_pktmbuf_pkt_len(m) - sa->digest_len - 1); + pad_len = nexthdr - 1; + + padding = pad_len - *pad_len; + for (i = 0; i < *pad_len; i++) { + if (padding[i] != i) { + IPSEC_LOG(ERR, IPSEC_ESP, "invalid pad_len field\n"); + return -EINVAL; + } + } + + if (rte_pktmbuf_trim(m, *pad_len + 2 + sa->digest_len)) { + IPSEC_LOG(ERR, IPSEC_ESP, + "failed to remove pad_len + digest\n"); + return -EINVAL; + } + + return ip4ip_inbound(m, sizeof(struct esp_hdr) + sa->iv_len); +} + +int +esp4_tunnel_outbound_pre_crypto(struct rte_mbuf *m, struct ipsec_sa *sa, + struct rte_crypto_op *cop) +{ + uint16_t pad_payload_len, pad_len; + struct ip *ip; + struct esp_hdr *esp; + int i; + char *padding; + struct rte_crypto_sym_op *sym_cop; + + IPSEC_ASSERT(m != NULL); + IPSEC_ASSERT(sa != NULL); + IPSEC_ASSERT(cop != NULL); + + /* Payload length */ + pad_payload_len = RTE_ALIGN_CEIL(rte_pktmbuf_pkt_len(m) + 2, + sa->block_size); + pad_len = pad_payload_len - rte_pktmbuf_pkt_len(m); + + rte_prefetch0(rte_pktmbuf_mtod_offset(m, void *, + rte_pktmbuf_pkt_len(m))); + + /* Check maximum packet size */ + if (unlikely(IP_ESP_HDR_SZ + sa->iv_len + pad_payload_len + + sa->digest_len > IP_MAXPACKET)) { + IPSEC_LOG(DEBUG, IPSEC_ESP, "ipsec packet is too big\n"); + return -EINVAL; + } + + padding = rte_pktmbuf_append(m, pad_len + sa->digest_len); + + IPSEC_ASSERT(padding != NULL); + + ip = ip4ip_outbound(m, sizeof(struct esp_hdr) + sa->iv_len, + sa->src, sa->dst); + + esp = (struct esp_hdr *)(ip + 1); + esp->spi = sa->spi; + esp->seq = htonl(sa->seq++); + + IPSEC_LOG(DEBUG, IPSEC_ESP, "pktlen %u\n", rte_pktmbuf_pkt_len(m)); + + /* Fill pad_len using default sequential scheme */ + for (i = 0; i < pad_len - 2; i++) + padding[i] = i + 1; + + padding[pad_len - 2] = pad_len - 2; + padding[pad_len - 1] = IPPROTO_IPIP; + + sym_cop = (struct rte_crypto_sym_op *)(cop + 1); + + sym_cop->m_src = m; + sym_cop->cipher.data.offset = IP_ESP_HDR_SZ + sa->iv_len; + sym_cop->cipher.data.length = pad_payload_len; + + sym_cop->cipher.iv.data = rte_pktmbuf_mtod_offset(m, uint8_t *, + IP_ESP_HDR_SZ); + sym_cop->cipher.iv.phys_addr = rte_pktmbuf_mtophys_offset(m, + IP_ESP_HDR_SZ); + sym_cop->cipher.iv.length = sa->iv_len; + + sym_cop->auth.data.offset = sizeof(struct ip); + sym_cop->auth.data.length = sizeof(struct esp_hdr) + sa->iv_len + + pad_payload_len; + + sym_cop->auth.digest.data = rte_pktmbuf_mtod_offset(m, uint8_t *, + IP_ESP_HDR_SZ + sa->iv_len + pad_payload_len); + sym_cop->auth.digest.phys_addr = rte_pktmbuf_mtophys_offset(m, + IP_ESP_HDR_SZ + sa->iv_len + pad_payload_len); + sym_cop->auth.digest.length = sa->digest_len; + + if (sa->cipher_algo == RTE_CRYPTO_CIPHER_AES_CBC) + random_iv_u64((uint64_t *)sym_cop->cipher.iv.data, + sym_cop->cipher.iv.length); + + return 0; +} + +int +esp4_tunnel_outbound_post_crypto(struct rte_mbuf *m __rte_unused, + struct ipsec_sa *sa __rte_unused, + struct rte_crypto_op *cop) +{ + IPSEC_ASSERT(m != NULL); + IPSEC_ASSERT(sa != NULL); + IPSEC_ASSERT(cop != NULL); + + if (cop->status != RTE_CRYPTO_OP_STATUS_SUCCESS) { + IPSEC_LOG(ERR, IPSEC_ESP, "Failed crypto op\n"); + return -1; + } + + return 0; +} diff --git a/examples/ipsec-secgw/esp.h b/examples/ipsec-secgw/esp.h new file mode 100644 index 00000000..31018823 --- /dev/null +++ b/examples/ipsec-secgw/esp.h @@ -0,0 +1,66 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef __RTE_IPSEC_XFORM_ESP_H__ +#define __RTE_IPSEC_XFORM_ESP_H__ + +struct mbuf; + +/* RFC4303 */ +struct esp_hdr { + uint32_t spi; + uint32_t seq; + /* Payload */ + /* Padding */ + /* Pad Length */ + /* Next Header */ + /* Integrity Check Value - ICV */ +}; + +/* IPv4 Tunnel */ +int +esp4_tunnel_inbound_pre_crypto(struct rte_mbuf *m, struct ipsec_sa *sa, + struct rte_crypto_op *cop); + +int +esp4_tunnel_inbound_post_crypto(struct rte_mbuf *m, struct ipsec_sa *sa, + struct rte_crypto_op *cop); + +int +esp4_tunnel_outbound_pre_crypto(struct rte_mbuf *m, struct ipsec_sa *sa, + struct rte_crypto_op *cop); + +int +esp4_tunnel_outbound_post_crypto(struct rte_mbuf *m, struct ipsec_sa *sa, + struct rte_crypto_op *cop); + +#endif /* __RTE_IPSEC_XFORM_ESP_H__ */ diff --git a/examples/ipsec-secgw/ipip.h b/examples/ipsec-secgw/ipip.h new file mode 100644 index 00000000..322076ce --- /dev/null +++ b/examples/ipsec-secgw/ipip.h @@ -0,0 +1,103 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __IPIP_H__ +#define __IPIP_H__ + +#include <stdint.h> +#include <netinet/in.h> +#include <netinet/ip.h> + +#include <rte_mbuf.h> + +#define IPV6_VERSION (6) + +static inline struct ip * +ip4ip_outbound(struct rte_mbuf *m, uint32_t offset, uint32_t src, uint32_t dst) +{ + struct ip *inip, *outip; + + inip = rte_pktmbuf_mtod(m, struct ip*); + + IPSEC_ASSERT(inip->ip_v == IPVERSION || inip->ip_v == IPV6_VERSION); + + offset += sizeof(struct ip); + + outip = (struct ip *)rte_pktmbuf_prepend(m, offset); + + IPSEC_ASSERT(outip != NULL); + + /* Per RFC4301 5.1.2.1 */ + outip->ip_v = IPVERSION; + outip->ip_hl = 5; + outip->ip_tos = inip->ip_tos; + outip->ip_len = htons(rte_pktmbuf_data_len(m)); + + outip->ip_id = 0; + outip->ip_off = 0; + + outip->ip_ttl = IPDEFTTL; + outip->ip_p = IPPROTO_ESP; + + outip->ip_src.s_addr = src; + outip->ip_dst.s_addr = dst; + + return outip; +} + +static inline int +ip4ip_inbound(struct rte_mbuf *m, uint32_t offset) +{ + struct ip *inip; + struct ip *outip; + + outip = rte_pktmbuf_mtod(m, struct ip*); + + IPSEC_ASSERT(outip->ip_v == IPVERSION); + + offset += sizeof(struct ip); + inip = (struct ip *)rte_pktmbuf_adj(m, offset); + IPSEC_ASSERT(inip->ip_v == IPVERSION || inip->ip_v == IPV6_VERSION); + + /* Check packet is still bigger than IP header (inner) */ + IPSEC_ASSERT(rte_pktmbuf_pkt_len(m) > sizeof(struct ip)); + + /* RFC4301 5.1.2.1 Note 6 */ + if ((inip->ip_tos & htons(IPTOS_ECN_ECT0 | IPTOS_ECN_ECT1)) && + ((outip->ip_tos & htons(IPTOS_ECN_CE)) == IPTOS_ECN_CE)) + inip->ip_tos |= htons(IPTOS_ECN_CE); + + return 0; +} + +#endif /* __IPIP_H__ */ diff --git a/examples/ipsec-secgw/ipsec-secgw.c b/examples/ipsec-secgw/ipsec-secgw.c new file mode 100644 index 00000000..00ab2d84 --- /dev/null +++ b/examples/ipsec-secgw/ipsec-secgw.c @@ -0,0 +1,1362 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <inttypes.h> +#include <sys/types.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <string.h> +#include <sys/queue.h> +#include <stdarg.h> +#include <errno.h> +#include <getopt.h> + +#include <rte_common.h> +#include <rte_byteorder.h> +#include <rte_log.h> +#include <rte_eal.h> +#include <rte_launch.h> +#include <rte_atomic.h> +#include <rte_cycles.h> +#include <rte_prefetch.h> +#include <rte_lcore.h> +#include <rte_per_lcore.h> +#include <rte_branch_prediction.h> +#include <rte_interrupts.h> +#include <rte_pci.h> +#include <rte_random.h> +#include <rte_debug.h> +#include <rte_ether.h> +#include <rte_ethdev.h> +#include <rte_mempool.h> +#include <rte_mbuf.h> +#include <rte_acl.h> +#include <rte_lpm.h> +#include <rte_hash.h> +#include <rte_jhash.h> +#include <rte_cryptodev.h> + +#include "ipsec.h" + +#define RTE_LOGTYPE_IPSEC RTE_LOGTYPE_USER1 + +#define MAX_JUMBO_PKT_LEN 9600 + +#define MEMPOOL_CACHE_SIZE 256 + +#define NB_MBUF (32000) + +#define CDEV_MAP_ENTRIES 1024 +#define CDEV_MP_NB_OBJS 2048 +#define CDEV_MP_CACHE_SZ 64 +#define MAX_QUEUE_PAIRS 1 + +#define OPTION_CONFIG "config" +#define OPTION_SINGLE_SA "single-sa" +#define OPTION_EP0 "ep0" +#define OPTION_EP1 "ep1" + +#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */ + +#define NB_SOCKETS 4 + +/* Configure how many packets ahead to prefetch, when reading packets */ +#define PREFETCH_OFFSET 3 + +#define MAX_RX_QUEUE_PER_LCORE 16 + +#define MAX_LCORE_PARAMS 1024 + +#define UNPROTECTED_PORT(port) (unprotected_port_mask & (1 << portid)) + +/* + * Configurable number of RX/TX ring descriptors + */ +#define IPSEC_SECGW_RX_DESC_DEFAULT 128 +#define IPSEC_SECGW_TX_DESC_DEFAULT 512 +static uint16_t nb_rxd = IPSEC_SECGW_RX_DESC_DEFAULT; +static uint16_t nb_txd = IPSEC_SECGW_TX_DESC_DEFAULT; + +#if RTE_BYTE_ORDER != RTE_LITTLE_ENDIAN +#define __BYTES_TO_UINT64(a, b, c, d, e, f, g, h) \ + (((uint64_t)((a) & 0xff) << 56) | \ + ((uint64_t)((b) & 0xff) << 48) | \ + ((uint64_t)((c) & 0xff) << 40) | \ + ((uint64_t)((d) & 0xff) << 32) | \ + ((uint64_t)((e) & 0xff) << 24) | \ + ((uint64_t)((f) & 0xff) << 16) | \ + ((uint64_t)((g) & 0xff) << 8) | \ + ((uint64_t)(h) & 0xff)) +#else +#define __BYTES_TO_UINT64(a, b, c, d, e, f, g, h) \ + (((uint64_t)((h) & 0xff) << 56) | \ + ((uint64_t)((g) & 0xff) << 48) | \ + ((uint64_t)((f) & 0xff) << 40) | \ + ((uint64_t)((e) & 0xff) << 32) | \ + ((uint64_t)((d) & 0xff) << 24) | \ + ((uint64_t)((c) & 0xff) << 16) | \ + ((uint64_t)((b) & 0xff) << 8) | \ + ((uint64_t)(a) & 0xff)) +#endif +#define ETHADDR(a, b, c, d, e, f) (__BYTES_TO_UINT64(a, b, c, d, e, f, 0, 0)) + +#define ETHADDR_TO_UINT64(addr) __BYTES_TO_UINT64( \ + addr.addr_bytes[0], addr.addr_bytes[1], \ + addr.addr_bytes[2], addr.addr_bytes[3], \ + addr.addr_bytes[4], addr.addr_bytes[5], \ + 0, 0) + +/* port/source ethernet addr and destination ethernet addr */ +struct ethaddr_info { + uint64_t src, dst; +}; + +struct ethaddr_info ethaddr_tbl[RTE_MAX_ETHPORTS] = { + { 0, ETHADDR(0x00, 0x16, 0x3e, 0x7e, 0x94, 0x9a) }, + { 0, ETHADDR(0x00, 0x16, 0x3e, 0x22, 0xa1, 0xd9) }, + { 0, ETHADDR(0x00, 0x16, 0x3e, 0x08, 0x69, 0x26) }, + { 0, ETHADDR(0x00, 0x16, 0x3e, 0x49, 0x9e, 0xdd) } +}; + +/* mask of enabled ports */ +static uint32_t enabled_port_mask; +static uint32_t unprotected_port_mask; +static int32_t promiscuous_on = 1; +static int32_t numa_on = 1; /**< NUMA is enabled by default. */ +static int32_t ep = -1; /**< Endpoint configuration (0 or 1) */ +static uint32_t nb_lcores; +static uint32_t single_sa; +static uint32_t single_sa_idx; + +struct lcore_rx_queue { + uint8_t port_id; + uint8_t queue_id; +} __rte_cache_aligned; + +struct lcore_params { + uint8_t port_id; + uint8_t queue_id; + uint8_t lcore_id; +} __rte_cache_aligned; + +static struct lcore_params lcore_params_array[MAX_LCORE_PARAMS]; + +static struct lcore_params *lcore_params; +static uint16_t nb_lcore_params; + +static struct rte_hash *cdev_map_in; +static struct rte_hash *cdev_map_out; + +struct buffer { + uint16_t len; + struct rte_mbuf *m_table[MAX_PKT_BURST] __rte_aligned(sizeof(void *)); +}; + +struct lcore_conf { + uint16_t nb_rx_queue; + struct lcore_rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE]; + uint16_t tx_queue_id[RTE_MAX_ETHPORTS]; + struct buffer tx_mbufs[RTE_MAX_ETHPORTS]; + struct ipsec_ctx inbound; + struct ipsec_ctx outbound; + struct rt_ctx *rt_ctx; +} __rte_cache_aligned; + +static struct lcore_conf lcore_conf[RTE_MAX_LCORE]; + +static struct rte_eth_conf port_conf = { + .rxmode = { + .mq_mode = ETH_MQ_RX_RSS, + .max_rx_pkt_len = ETHER_MAX_LEN, + .split_hdr_size = 0, + .header_split = 0, /**< Header Split disabled */ + .hw_ip_checksum = 1, /**< IP checksum offload enabled */ + .hw_vlan_filter = 0, /**< VLAN filtering disabled */ + .jumbo_frame = 0, /**< Jumbo Frame Support disabled */ + .hw_strip_crc = 0, /**< CRC stripped by hardware */ + }, + .rx_adv_conf = { + .rss_conf = { + .rss_key = NULL, + .rss_hf = ETH_RSS_IP | ETH_RSS_UDP | + ETH_RSS_TCP | ETH_RSS_SCTP, + }, + }, + .txmode = { + .mq_mode = ETH_MQ_TX_NONE, + }, +}; + +static struct socket_ctx socket_ctx[NB_SOCKETS]; + +struct traffic_type { + const uint8_t *data[MAX_PKT_BURST * 2]; + struct rte_mbuf *pkts[MAX_PKT_BURST * 2]; + uint32_t res[MAX_PKT_BURST * 2]; + uint32_t num; +}; + +struct ipsec_traffic { + struct traffic_type ipsec4; + struct traffic_type ipv4; +}; + +static inline void +prepare_one_packet(struct rte_mbuf *pkt, struct ipsec_traffic *t) +{ + uint8_t *nlp; + + if (RTE_ETH_IS_IPV4_HDR(pkt->packet_type)) { + rte_pktmbuf_adj(pkt, ETHER_HDR_LEN); + nlp = rte_pktmbuf_mtod_offset(pkt, uint8_t *, + offsetof(struct ip, ip_p)); + if (*nlp == IPPROTO_ESP) + t->ipsec4.pkts[(t->ipsec4.num)++] = pkt; + else { + t->ipv4.data[t->ipv4.num] = nlp; + t->ipv4.pkts[(t->ipv4.num)++] = pkt; + } + } else { + /* Unknown/Unsupported type, drop the packet */ + rte_pktmbuf_free(pkt); + } +} + +static inline void +prepare_traffic(struct rte_mbuf **pkts, struct ipsec_traffic *t, + uint16_t nb_pkts) +{ + int32_t i; + + t->ipsec4.num = 0; + t->ipv4.num = 0; + + for (i = 0; i < (nb_pkts - PREFETCH_OFFSET); i++) { + rte_prefetch0(rte_pktmbuf_mtod(pkts[i + PREFETCH_OFFSET], + void *)); + prepare_one_packet(pkts[i], t); + } + /* Process left packets */ + for (; i < nb_pkts; i++) + prepare_one_packet(pkts[i], t); +} + +static inline void +prepare_tx_pkt(struct rte_mbuf *pkt, uint8_t port) +{ + pkt->ol_flags |= PKT_TX_IP_CKSUM | PKT_TX_IPV4; + pkt->l3_len = sizeof(struct ip); + pkt->l2_len = ETHER_HDR_LEN; + + struct ether_hdr *ethhdr = (struct ether_hdr *)rte_pktmbuf_prepend(pkt, + ETHER_HDR_LEN); + + ethhdr->ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4); + memcpy(ðhdr->s_addr, ðaddr_tbl[port].src, + sizeof(struct ether_addr)); + memcpy(ðhdr->d_addr, ðaddr_tbl[port].dst, + sizeof(struct ether_addr)); +} + +static inline void +prepare_tx_burst(struct rte_mbuf *pkts[], uint16_t nb_pkts, uint8_t port) +{ + int32_t i; + const int32_t prefetch_offset = 2; + + for (i = 0; i < (nb_pkts - prefetch_offset); i++) { + rte_prefetch0(pkts[i + prefetch_offset]->cacheline1); + prepare_tx_pkt(pkts[i], port); + } + /* Process left packets */ + for (; i < nb_pkts; i++) + prepare_tx_pkt(pkts[i], port); +} + +/* Send burst of packets on an output interface */ +static inline int32_t +send_burst(struct lcore_conf *qconf, uint16_t n, uint8_t port) +{ + struct rte_mbuf **m_table; + int32_t ret; + uint16_t queueid; + + queueid = qconf->tx_queue_id[port]; + m_table = (struct rte_mbuf **)qconf->tx_mbufs[port].m_table; + + prepare_tx_burst(m_table, n, port); + + ret = rte_eth_tx_burst(port, queueid, m_table, n); + if (unlikely(ret < n)) { + do { + rte_pktmbuf_free(m_table[ret]); + } while (++ret < n); + } + + return 0; +} + +/* Enqueue a single packet, and send burst if queue is filled */ +static inline int32_t +send_single_packet(struct rte_mbuf *m, uint8_t port) +{ + uint32_t lcore_id; + uint16_t len; + struct lcore_conf *qconf; + + lcore_id = rte_lcore_id(); + + qconf = &lcore_conf[lcore_id]; + len = qconf->tx_mbufs[port].len; + qconf->tx_mbufs[port].m_table[len] = m; + len++; + + /* enough pkts to be sent */ + if (unlikely(len == MAX_PKT_BURST)) { + send_burst(qconf, MAX_PKT_BURST, port); + len = 0; + } + + qconf->tx_mbufs[port].len = len; + return 0; +} + +static inline void +process_pkts_inbound(struct ipsec_ctx *ipsec_ctx, + struct ipsec_traffic *traffic) +{ + struct rte_mbuf *m; + uint16_t idx, nb_pkts_in, i, j; + uint32_t sa_idx, res; + + nb_pkts_in = ipsec_inbound(ipsec_ctx, traffic->ipsec4.pkts, + traffic->ipsec4.num, MAX_PKT_BURST); + + /* SP/ACL Inbound check ipsec and ipv4 */ + for (i = 0; i < nb_pkts_in; i++) { + idx = traffic->ipv4.num++; + m = traffic->ipsec4.pkts[i]; + traffic->ipv4.pkts[idx] = m; + traffic->ipv4.data[idx] = rte_pktmbuf_mtod_offset(m, + uint8_t *, offsetof(struct ip, ip_p)); + } + + rte_acl_classify((struct rte_acl_ctx *)ipsec_ctx->sp_ctx, + traffic->ipv4.data, traffic->ipv4.res, + traffic->ipv4.num, DEFAULT_MAX_CATEGORIES); + + j = 0; + for (i = 0; i < traffic->ipv4.num - nb_pkts_in; i++) { + m = traffic->ipv4.pkts[i]; + res = traffic->ipv4.res[i]; + if (res & ~BYPASS) { + rte_pktmbuf_free(m); + continue; + } + traffic->ipv4.pkts[j++] = m; + } + /* Check return SA SPI matches pkt SPI */ + for ( ; i < traffic->ipv4.num; i++) { + m = traffic->ipv4.pkts[i]; + sa_idx = traffic->ipv4.res[i] & PROTECT_MASK; + if (sa_idx == 0 || !inbound_sa_check(ipsec_ctx->sa_ctx, + m, sa_idx)) { + rte_pktmbuf_free(m); + continue; + } + traffic->ipv4.pkts[j++] = m; + } + traffic->ipv4.num = j; +} + +static inline void +process_pkts_outbound(struct ipsec_ctx *ipsec_ctx, + struct ipsec_traffic *traffic) +{ + struct rte_mbuf *m; + uint16_t idx, nb_pkts_out, i, j; + uint32_t sa_idx, res; + + rte_acl_classify((struct rte_acl_ctx *)ipsec_ctx->sp_ctx, + traffic->ipv4.data, traffic->ipv4.res, + traffic->ipv4.num, DEFAULT_MAX_CATEGORIES); + + /* Drop any IPsec traffic from protected ports */ + for (i = 0; i < traffic->ipsec4.num; i++) + rte_pktmbuf_free(traffic->ipsec4.pkts[i]); + + traffic->ipsec4.num = 0; + + j = 0; + for (i = 0; i < traffic->ipv4.num; i++) { + m = traffic->ipv4.pkts[i]; + res = traffic->ipv4.res[i]; + sa_idx = res & PROTECT_MASK; + if ((res == 0) || (res & DISCARD)) + rte_pktmbuf_free(m); + else if (sa_idx != 0) { + traffic->ipsec4.res[traffic->ipsec4.num] = sa_idx; + traffic->ipsec4.pkts[traffic->ipsec4.num++] = m; + } else /* BYPASS */ + traffic->ipv4.pkts[j++] = m; + } + traffic->ipv4.num = j; + + nb_pkts_out = ipsec_outbound(ipsec_ctx, traffic->ipsec4.pkts, + traffic->ipsec4.res, traffic->ipsec4.num, + MAX_PKT_BURST); + + for (i = 0; i < nb_pkts_out; i++) { + idx = traffic->ipv4.num++; + m = traffic->ipsec4.pkts[i]; + traffic->ipv4.pkts[idx] = m; + } +} + +static inline void +process_pkts_inbound_nosp(struct ipsec_ctx *ipsec_ctx, + struct ipsec_traffic *traffic) +{ + uint16_t nb_pkts_in, i; + + /* Drop any IPv4 traffic from unprotected ports */ + for (i = 0; i < traffic->ipv4.num; i++) + rte_pktmbuf_free(traffic->ipv4.pkts[i]); + + traffic->ipv4.num = 0; + + nb_pkts_in = ipsec_inbound(ipsec_ctx, traffic->ipsec4.pkts, + traffic->ipsec4.num, MAX_PKT_BURST); + + for (i = 0; i < nb_pkts_in; i++) + traffic->ipv4.pkts[i] = traffic->ipsec4.pkts[i]; + + traffic->ipv4.num = nb_pkts_in; +} + +static inline void +process_pkts_outbound_nosp(struct ipsec_ctx *ipsec_ctx, + struct ipsec_traffic *traffic) +{ + uint16_t nb_pkts_out, i; + + /* Drop any IPsec traffic from protected ports */ + for (i = 0; i < traffic->ipsec4.num; i++) + rte_pktmbuf_free(traffic->ipsec4.pkts[i]); + + traffic->ipsec4.num = 0; + + for (i = 0; i < traffic->ipv4.num; i++) + traffic->ipv4.res[i] = single_sa_idx; + + nb_pkts_out = ipsec_outbound(ipsec_ctx, traffic->ipv4.pkts, + traffic->ipv4.res, traffic->ipv4.num, + MAX_PKT_BURST); + + traffic->ipv4.num = nb_pkts_out; +} + +static inline void +route_pkts(struct rt_ctx *rt_ctx, struct rte_mbuf *pkts[], uint8_t nb_pkts) +{ + uint32_t hop[MAX_PKT_BURST * 2]; + uint32_t dst_ip[MAX_PKT_BURST * 2]; + uint16_t i, offset; + + if (nb_pkts == 0) + return; + + for (i = 0; i < nb_pkts; i++) { + offset = offsetof(struct ip, ip_dst); + dst_ip[i] = *rte_pktmbuf_mtod_offset(pkts[i], + uint32_t *, offset); + dst_ip[i] = rte_be_to_cpu_32(dst_ip[i]); + } + + rte_lpm_lookup_bulk((struct rte_lpm *)rt_ctx, dst_ip, hop, nb_pkts); + + for (i = 0; i < nb_pkts; i++) { + if ((hop[i] & RTE_LPM_LOOKUP_SUCCESS) == 0) { + rte_pktmbuf_free(pkts[i]); + continue; + } + send_single_packet(pkts[i], hop[i] & 0xff); + } +} + +static inline void +process_pkts(struct lcore_conf *qconf, struct rte_mbuf **pkts, + uint8_t nb_pkts, uint8_t portid) +{ + struct ipsec_traffic traffic; + + prepare_traffic(pkts, &traffic, nb_pkts); + + if (single_sa) { + if (UNPROTECTED_PORT(portid)) + process_pkts_inbound_nosp(&qconf->inbound, &traffic); + else + process_pkts_outbound_nosp(&qconf->outbound, &traffic); + } else { + if (UNPROTECTED_PORT(portid)) + process_pkts_inbound(&qconf->inbound, &traffic); + else + process_pkts_outbound(&qconf->outbound, &traffic); + } + + route_pkts(qconf->rt_ctx, traffic.ipv4.pkts, traffic.ipv4.num); +} + +static inline void +drain_buffers(struct lcore_conf *qconf) +{ + struct buffer *buf; + uint32_t portid; + + for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) { + buf = &qconf->tx_mbufs[portid]; + if (buf->len == 0) + continue; + send_burst(qconf, buf->len, portid); + buf->len = 0; + } +} + +/* main processing loop */ +static int32_t +main_loop(__attribute__((unused)) void *dummy) +{ + struct rte_mbuf *pkts[MAX_PKT_BURST]; + uint32_t lcore_id; + uint64_t prev_tsc, diff_tsc, cur_tsc; + int32_t i, nb_rx; + uint8_t portid, queueid; + struct lcore_conf *qconf; + int32_t socket_id; + const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) + / US_PER_S * BURST_TX_DRAIN_US; + struct lcore_rx_queue *rxql; + + prev_tsc = 0; + lcore_id = rte_lcore_id(); + qconf = &lcore_conf[lcore_id]; + rxql = qconf->rx_queue_list; + socket_id = rte_lcore_to_socket_id(lcore_id); + + qconf->rt_ctx = socket_ctx[socket_id].rt_ipv4; + qconf->inbound.sp_ctx = socket_ctx[socket_id].sp_ipv4_in; + qconf->inbound.sa_ctx = socket_ctx[socket_id].sa_ipv4_in; + qconf->inbound.cdev_map = cdev_map_in; + qconf->outbound.sp_ctx = socket_ctx[socket_id].sp_ipv4_out; + qconf->outbound.sa_ctx = socket_ctx[socket_id].sa_ipv4_out; + qconf->outbound.cdev_map = cdev_map_out; + + if (qconf->nb_rx_queue == 0) { + RTE_LOG(INFO, IPSEC, "lcore %u has nothing to do\n", lcore_id); + return 0; + } + + RTE_LOG(INFO, IPSEC, "entering main loop on lcore %u\n", lcore_id); + + for (i = 0; i < qconf->nb_rx_queue; i++) { + portid = rxql[i].port_id; + queueid = rxql[i].queue_id; + RTE_LOG(INFO, IPSEC, + " -- lcoreid=%u portid=%hhu rxqueueid=%hhu\n", + lcore_id, portid, queueid); + } + + while (1) { + cur_tsc = rte_rdtsc(); + + /* TX queue buffer drain */ + diff_tsc = cur_tsc - prev_tsc; + + if (unlikely(diff_tsc > drain_tsc)) { + drain_buffers(qconf); + prev_tsc = cur_tsc; + } + + /* Read packet from RX queues */ + for (i = 0; i < qconf->nb_rx_queue; ++i) { + portid = rxql[i].port_id; + queueid = rxql[i].queue_id; + nb_rx = rte_eth_rx_burst(portid, queueid, + pkts, MAX_PKT_BURST); + + if (nb_rx > 0) + process_pkts(qconf, pkts, nb_rx, portid); + } + } +} + +static int32_t +check_params(void) +{ + uint8_t lcore, portid, nb_ports; + uint16_t i; + int32_t socket_id; + + if (lcore_params == NULL) { + printf("Error: No port/queue/core mappings\n"); + return -1; + } + + nb_ports = rte_eth_dev_count(); + if (nb_ports > RTE_MAX_ETHPORTS) + nb_ports = RTE_MAX_ETHPORTS; + + for (i = 0; i < nb_lcore_params; ++i) { + lcore = lcore_params[i].lcore_id; + if (!rte_lcore_is_enabled(lcore)) { + printf("error: lcore %hhu is not enabled in " + "lcore mask\n", lcore); + return -1; + } + socket_id = rte_lcore_to_socket_id(lcore); + if (socket_id != 0 && numa_on == 0) { + printf("warning: lcore %hhu is on socket %d " + "with numa off\n", + lcore, socket_id); + } + portid = lcore_params[i].port_id; + if ((enabled_port_mask & (1 << portid)) == 0) { + printf("port %u is not enabled in port mask\n", portid); + return -1; + } + if (portid >= nb_ports) { + printf("port %u is not present on the board\n", portid); + return -1; + } + } + return 0; +} + +static uint8_t +get_port_nb_rx_queues(const uint8_t port) +{ + int32_t queue = -1; + uint16_t i; + + for (i = 0; i < nb_lcore_params; ++i) { + if (lcore_params[i].port_id == port && + lcore_params[i].queue_id > queue) + queue = lcore_params[i].queue_id; + } + return (uint8_t)(++queue); +} + +static int32_t +init_lcore_rx_queues(void) +{ + uint16_t i, nb_rx_queue; + uint8_t lcore; + + for (i = 0; i < nb_lcore_params; ++i) { + lcore = lcore_params[i].lcore_id; + nb_rx_queue = lcore_conf[lcore].nb_rx_queue; + if (nb_rx_queue >= MAX_RX_QUEUE_PER_LCORE) { + printf("error: too many queues (%u) for lcore: %u\n", + nb_rx_queue + 1, lcore); + return -1; + } + lcore_conf[lcore].rx_queue_list[nb_rx_queue].port_id = + lcore_params[i].port_id; + lcore_conf[lcore].rx_queue_list[nb_rx_queue].queue_id = + lcore_params[i].queue_id; + lcore_conf[lcore].nb_rx_queue++; + } + return 0; +} + +/* display usage */ +static void +print_usage(const char *prgname) +{ + printf("%s [EAL options] -- -p PORTMASK -P -u PORTMASK" + " --"OPTION_CONFIG" (port,queue,lcore)[,(port,queue,lcore]" + " --single-sa SAIDX --ep0|--ep1\n" + " -p PORTMASK: hexadecimal bitmask of ports to configure\n" + " -P : enable promiscuous mode\n" + " -u PORTMASK: hexadecimal bitmask of unprotected ports\n" + " --"OPTION_CONFIG": (port,queue,lcore): " + "rx queues configuration\n" + " --single-sa SAIDX: use single SA index for outbound, " + "bypassing the SP\n" + " --ep0: Configure as Endpoint 0\n" + " --ep1: Configure as Endpoint 1\n", prgname); +} + +static int32_t +parse_portmask(const char *portmask) +{ + char *end = NULL; + unsigned long pm; + + /* parse hexadecimal string */ + pm = strtoul(portmask, &end, 16); + if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0')) + return -1; + + if ((pm == 0) && errno) + return -1; + + return pm; +} + +static int32_t +parse_decimal(const char *str) +{ + char *end = NULL; + unsigned long num; + + num = strtoul(str, &end, 10); + if ((str[0] == '\0') || (end == NULL) || (*end != '\0')) + return -1; + + return num; +} + +static int32_t +parse_config(const char *q_arg) +{ + char s[256]; + const char *p, *p0 = q_arg; + char *end; + enum fieldnames { + FLD_PORT = 0, + FLD_QUEUE, + FLD_LCORE, + _NUM_FLD + }; + int long int_fld[_NUM_FLD]; + char *str_fld[_NUM_FLD]; + int32_t i; + uint32_t size; + + nb_lcore_params = 0; + + while ((p = strchr(p0, '(')) != NULL) { + ++p; + p0 = strchr(p, ')'); + if (p0 == NULL) + return -1; + + size = p0 - p; + if (size >= sizeof(s)) + return -1; + + snprintf(s, sizeof(s), "%.*s", size, p); + if (rte_strsplit(s, sizeof(s), str_fld, _NUM_FLD, ',') != + _NUM_FLD) + return -1; + for (i = 0; i < _NUM_FLD; i++) { + errno = 0; + int_fld[i] = strtoul(str_fld[i], &end, 0); + if (errno != 0 || end == str_fld[i] || int_fld[i] > 255) + return -1; + } + if (nb_lcore_params >= MAX_LCORE_PARAMS) { + printf("exceeded max number of lcore params: %hu\n", + nb_lcore_params); + return -1; + } + lcore_params_array[nb_lcore_params].port_id = + (uint8_t)int_fld[FLD_PORT]; + lcore_params_array[nb_lcore_params].queue_id = + (uint8_t)int_fld[FLD_QUEUE]; + lcore_params_array[nb_lcore_params].lcore_id = + (uint8_t)int_fld[FLD_LCORE]; + ++nb_lcore_params; + } + lcore_params = lcore_params_array; + return 0; +} + +#define __STRNCMP(name, opt) (!strncmp(name, opt, sizeof(opt))) +static int32_t +parse_args_long_options(struct option *lgopts, int32_t option_index) +{ + int32_t ret = -1; + const char *optname = lgopts[option_index].name; + + if (__STRNCMP(optname, OPTION_CONFIG)) { + ret = parse_config(optarg); + if (ret) + printf("invalid config\n"); + } + + if (__STRNCMP(optname, OPTION_SINGLE_SA)) { + ret = parse_decimal(optarg); + if (ret != -1) { + single_sa = 1; + single_sa_idx = ret; + printf("Configured with single SA index %u\n", + single_sa_idx); + ret = 0; + } + } + + if (__STRNCMP(optname, OPTION_EP0)) { + printf("endpoint 0\n"); + ep = 0; + ret = 0; + } + + if (__STRNCMP(optname, OPTION_EP1)) { + printf("endpoint 1\n"); + ep = 1; + ret = 0; + } + + return ret; +} +#undef __STRNCMP + +static int32_t +parse_args(int32_t argc, char **argv) +{ + int32_t opt, ret; + char **argvopt; + int32_t option_index; + char *prgname = argv[0]; + static struct option lgopts[] = { + {OPTION_CONFIG, 1, 0, 0}, + {OPTION_SINGLE_SA, 1, 0, 0}, + {OPTION_EP0, 0, 0, 0}, + {OPTION_EP1, 0, 0, 0}, + {NULL, 0, 0, 0} + }; + + argvopt = argv; + + while ((opt = getopt_long(argc, argvopt, "p:Pu:", + lgopts, &option_index)) != EOF) { + + switch (opt) { + case 'p': + enabled_port_mask = parse_portmask(optarg); + if (enabled_port_mask == 0) { + printf("invalid portmask\n"); + print_usage(prgname); + return -1; + } + break; + case 'P': + printf("Promiscuous mode selected\n"); + promiscuous_on = 1; + break; + case 'u': + unprotected_port_mask = parse_portmask(optarg); + if (unprotected_port_mask == 0) { + printf("invalid unprotected portmask\n"); + print_usage(prgname); + return -1; + } + break; + case 0: + if (parse_args_long_options(lgopts, option_index)) { + print_usage(prgname); + return -1; + } + break; + default: + print_usage(prgname); + return -1; + } + } + + if (optind >= 0) + argv[optind-1] = prgname; + + ret = optind-1; + optind = 0; /* reset getopt lib */ + return ret; +} + +static void +print_ethaddr(const char *name, const struct ether_addr *eth_addr) +{ + char buf[ETHER_ADDR_FMT_SIZE]; + ether_format_addr(buf, ETHER_ADDR_FMT_SIZE, eth_addr); + printf("%s%s", name, buf); +} + +/* Check the link status of all ports in up to 9s, and print them finally */ +static void +check_all_ports_link_status(uint8_t port_num, uint32_t port_mask) +{ +#define CHECK_INTERVAL 100 /* 100ms */ +#define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */ + uint8_t portid, count, all_ports_up, print_flag = 0; + struct rte_eth_link link; + + printf("\nChecking link status"); + fflush(stdout); + for (count = 0; count <= MAX_CHECK_TIME; count++) { + all_ports_up = 1; + for (portid = 0; portid < port_num; portid++) { + if ((port_mask & (1 << portid)) == 0) + continue; + memset(&link, 0, sizeof(link)); + rte_eth_link_get_nowait(portid, &link); + /* print link status if flag set */ + if (print_flag == 1) { + if (link.link_status) + printf("Port %d Link Up - speed %u " + "Mbps - %s\n", (uint8_t)portid, + (uint32_t)link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? + ("full-duplex") : ("half-duplex\n")); + else + printf("Port %d Link Down\n", + (uint8_t)portid); + continue; + } + /* clear all_ports_up flag if any link down */ + if (link.link_status == ETH_LINK_DOWN) { + all_ports_up = 0; + break; + } + } + /* after finally printing all link status, get out */ + if (print_flag == 1) + break; + + if (all_ports_up == 0) { + printf("."); + fflush(stdout); + rte_delay_ms(CHECK_INTERVAL); + } + + /* set the print_flag if all ports up or timeout */ + if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) { + print_flag = 1; + printf("done\n"); + } + } +} + +static int32_t +add_mapping(struct rte_hash *map, const char *str, uint16_t cdev_id, + uint16_t qp, struct lcore_params *params, + struct ipsec_ctx *ipsec_ctx, + const struct rte_cryptodev_capabilities *cipher, + const struct rte_cryptodev_capabilities *auth) +{ + int32_t ret = 0; + unsigned long i; + struct cdev_key key = { 0 }; + + key.lcore_id = params->lcore_id; + if (cipher) + key.cipher_algo = cipher->sym.cipher.algo; + if (auth) + key.auth_algo = auth->sym.auth.algo; + + ret = rte_hash_lookup(map, &key); + if (ret != -ENOENT) + return 0; + + for (i = 0; i < ipsec_ctx->nb_qps; i++) + if (ipsec_ctx->tbl[i].id == cdev_id) + break; + + if (i == ipsec_ctx->nb_qps) { + if (ipsec_ctx->nb_qps == MAX_QP_PER_LCORE) { + printf("Maximum number of crypto devices assigned to " + "a core, increase MAX_QP_PER_LCORE value\n"); + return 0; + } + ipsec_ctx->tbl[i].id = cdev_id; + ipsec_ctx->tbl[i].qp = qp; + ipsec_ctx->nb_qps++; + printf("%s cdev mapping: lcore %u using cdev %u qp %u " + "(cdev_id_qp %lu)\n", str, key.lcore_id, + cdev_id, qp, i); + } + + ret = rte_hash_add_key_data(map, &key, (void *)i); + if (ret < 0) { + printf("Faled to insert cdev mapping for (lcore %u, " + "cdev %u, qp %u), errno %d\n", + key.lcore_id, ipsec_ctx->tbl[i].id, + ipsec_ctx->tbl[i].qp, ret); + return 0; + } + + return 1; +} + +static int32_t +add_cdev_mapping(struct rte_cryptodev_info *dev_info, uint16_t cdev_id, + uint16_t qp, struct lcore_params *params) +{ + int32_t ret = 0; + const struct rte_cryptodev_capabilities *i, *j; + struct rte_hash *map; + struct lcore_conf *qconf; + struct ipsec_ctx *ipsec_ctx; + const char *str; + + qconf = &lcore_conf[params->lcore_id]; + + if ((unprotected_port_mask & (1 << params->port_id)) == 0) { + map = cdev_map_out; + ipsec_ctx = &qconf->outbound; + str = "Outbound"; + } else { + map = cdev_map_in; + ipsec_ctx = &qconf->inbound; + str = "Inbound"; + } + + /* Required cryptodevs with operation chainning */ + if (!(dev_info->feature_flags & + RTE_CRYPTODEV_FF_SYM_OPERATION_CHAINING)) + return ret; + + for (i = dev_info->capabilities; + i->op != RTE_CRYPTO_OP_TYPE_UNDEFINED; i++) { + if (i->op != RTE_CRYPTO_OP_TYPE_SYMMETRIC) + continue; + + if (i->sym.xform_type != RTE_CRYPTO_SYM_XFORM_CIPHER) + continue; + + for (j = dev_info->capabilities; + j->op != RTE_CRYPTO_OP_TYPE_UNDEFINED; j++) { + if (j->op != RTE_CRYPTO_OP_TYPE_SYMMETRIC) + continue; + + if (j->sym.xform_type != RTE_CRYPTO_SYM_XFORM_AUTH) + continue; + + ret |= add_mapping(map, str, cdev_id, qp, params, + ipsec_ctx, i, j); + } + } + + return ret; +} + +static int32_t +cryptodevs_init(void) +{ + struct rte_cryptodev_config dev_conf; + struct rte_cryptodev_qp_conf qp_conf; + uint16_t idx, max_nb_qps, qp, i; + int16_t cdev_id; + struct rte_hash_parameters params = { 0 }; + + params.entries = CDEV_MAP_ENTRIES; + params.key_len = sizeof(struct cdev_key); + params.hash_func = rte_jhash; + params.hash_func_init_val = 0; + params.socket_id = rte_socket_id(); + + params.name = "cdev_map_in"; + cdev_map_in = rte_hash_create(¶ms); + if (cdev_map_in == NULL) + rte_panic("Failed to create cdev_map hash table, errno = %d\n", + rte_errno); + + params.name = "cdev_map_out"; + cdev_map_out = rte_hash_create(¶ms); + if (cdev_map_out == NULL) + rte_panic("Failed to create cdev_map hash table, errno = %d\n", + rte_errno); + + printf("lcore/cryptodev/qp mappings:\n"); + + idx = 0; + /* Start from last cdev id to give HW priority */ + for (cdev_id = rte_cryptodev_count() - 1; cdev_id >= 0; cdev_id--) { + struct rte_cryptodev_info cdev_info; + + rte_cryptodev_info_get(cdev_id, &cdev_info); + + if (nb_lcore_params > cdev_info.max_nb_queue_pairs) + max_nb_qps = cdev_info.max_nb_queue_pairs; + else + max_nb_qps = nb_lcore_params; + + qp = 0; + i = 0; + while (qp < max_nb_qps && i < nb_lcore_params) { + if (add_cdev_mapping(&cdev_info, cdev_id, qp, + &lcore_params[idx])) + qp++; + idx++; + idx = idx % nb_lcore_params; + i++; + } + + if (qp == 0) + continue; + + dev_conf.socket_id = rte_cryptodev_socket_id(cdev_id); + dev_conf.nb_queue_pairs = qp; + dev_conf.session_mp.nb_objs = CDEV_MP_NB_OBJS; + dev_conf.session_mp.cache_size = CDEV_MP_CACHE_SZ; + + if (rte_cryptodev_configure(cdev_id, &dev_conf)) + rte_panic("Failed to initialize crypodev %u\n", + cdev_id); + + qp_conf.nb_descriptors = CDEV_MP_NB_OBJS; + for (qp = 0; qp < dev_conf.nb_queue_pairs; qp++) + if (rte_cryptodev_queue_pair_setup(cdev_id, qp, + &qp_conf, dev_conf.socket_id)) + rte_panic("Failed to setup queue %u for " + "cdev_id %u\n", 0, cdev_id); + } + + printf("\n"); + + return 0; +} + +static void +port_init(uint8_t portid) +{ + struct rte_eth_dev_info dev_info; + struct rte_eth_txconf *txconf; + uint16_t nb_tx_queue, nb_rx_queue; + uint16_t tx_queueid, rx_queueid, queue, lcore_id; + int32_t ret, socket_id; + struct lcore_conf *qconf; + struct ether_addr ethaddr; + + rte_eth_dev_info_get(portid, &dev_info); + + printf("Configuring device port %u:\n", portid); + + rte_eth_macaddr_get(portid, ðaddr); + ethaddr_tbl[portid].src = ETHADDR_TO_UINT64(ethaddr); + print_ethaddr("Address: ", ðaddr); + printf("\n"); + + nb_rx_queue = get_port_nb_rx_queues(portid); + nb_tx_queue = nb_lcores; + + if (nb_rx_queue > dev_info.max_rx_queues) + rte_exit(EXIT_FAILURE, "Error: queue %u not available " + "(max rx queue is %u)\n", + nb_rx_queue, dev_info.max_rx_queues); + + if (nb_tx_queue > dev_info.max_tx_queues) + rte_exit(EXIT_FAILURE, "Error: queue %u not available " + "(max tx queue is %u)\n", + nb_tx_queue, dev_info.max_tx_queues); + + printf("Creating queues: nb_rx_queue=%d nb_tx_queue=%u...\n", + nb_rx_queue, nb_tx_queue); + + ret = rte_eth_dev_configure(portid, nb_rx_queue, nb_tx_queue, + &port_conf); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Cannot configure device: " + "err=%d, port=%d\n", ret, portid); + + /* init one TX queue per lcore */ + tx_queueid = 0; + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { + if (rte_lcore_is_enabled(lcore_id) == 0) + continue; + + if (numa_on) + socket_id = (uint8_t)rte_lcore_to_socket_id(lcore_id); + else + socket_id = 0; + + /* init TX queue */ + printf("Setup txq=%u,%d,%d\n", lcore_id, tx_queueid, socket_id); + + txconf = &dev_info.default_txconf; + txconf->txq_flags = 0; + + ret = rte_eth_tx_queue_setup(portid, tx_queueid, nb_txd, + socket_id, txconf); + if (ret < 0) + rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup: " + "err=%d, port=%d\n", ret, portid); + + qconf = &lcore_conf[lcore_id]; + qconf->tx_queue_id[portid] = tx_queueid; + tx_queueid++; + + /* init RX queues */ + for (queue = 0; queue < qconf->nb_rx_queue; ++queue) { + if (portid != qconf->rx_queue_list[queue].port_id) + continue; + + rx_queueid = qconf->rx_queue_list[queue].queue_id; + + printf("Setup rxq=%d,%d,%d\n", portid, rx_queueid, + socket_id); + + ret = rte_eth_rx_queue_setup(portid, rx_queueid, + nb_rxd, socket_id, NULL, + socket_ctx[socket_id].mbuf_pool); + if (ret < 0) + rte_exit(EXIT_FAILURE, + "rte_eth_rx_queue_setup: err=%d, " + "port=%d\n", ret, portid); + } + } + printf("\n"); +} + +static void +pool_init(struct socket_ctx *ctx, int32_t socket_id, uint32_t nb_mbuf) +{ + char s[64]; + + snprintf(s, sizeof(s), "mbuf_pool_%d", socket_id); + ctx->mbuf_pool = rte_pktmbuf_pool_create(s, nb_mbuf, + MEMPOOL_CACHE_SIZE, ipsec_metadata_size(), + RTE_MBUF_DEFAULT_BUF_SIZE, + socket_id); + if (ctx->mbuf_pool == NULL) + rte_exit(EXIT_FAILURE, "Cannot init mbuf pool on socket %d\n", + socket_id); + else + printf("Allocated mbuf pool on socket %d\n", socket_id); +} + +int32_t +main(int32_t argc, char **argv) +{ + int32_t ret; + uint32_t lcore_id, nb_ports; + uint8_t portid, socket_id; + + /* init EAL */ + ret = rte_eal_init(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Invalid EAL parameters\n"); + argc -= ret; + argv += ret; + + /* parse application arguments (after the EAL ones) */ + ret = parse_args(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Invalid parameters\n"); + + if (ep < 0) + rte_exit(EXIT_FAILURE, "need to choose either EP0 or EP1\n"); + + if ((unprotected_port_mask & enabled_port_mask) != + unprotected_port_mask) + rte_exit(EXIT_FAILURE, "Invalid unprotected portmask 0x%x\n", + unprotected_port_mask); + + nb_ports = rte_eth_dev_count(); + if (nb_ports > RTE_MAX_ETHPORTS) + nb_ports = RTE_MAX_ETHPORTS; + + if (check_params() < 0) + rte_exit(EXIT_FAILURE, "check_params failed\n"); + + ret = init_lcore_rx_queues(); + if (ret < 0) + rte_exit(EXIT_FAILURE, "init_lcore_rx_queues failed\n"); + + nb_lcores = rte_lcore_count(); + + /* Replicate each contex per socket */ + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { + if (rte_lcore_is_enabled(lcore_id) == 0) + continue; + + if (numa_on) + socket_id = (uint8_t)rte_lcore_to_socket_id(lcore_id); + else + socket_id = 0; + + if (socket_ctx[socket_id].mbuf_pool) + continue; + + sa_init(&socket_ctx[socket_id], socket_id, ep); + + sp_init(&socket_ctx[socket_id], socket_id, ep); + + rt_init(&socket_ctx[socket_id], socket_id, ep); + + pool_init(&socket_ctx[socket_id], socket_id, NB_MBUF); + } + + for (portid = 0; portid < nb_ports; portid++) { + if ((enabled_port_mask & (1 << portid)) == 0) + continue; + + port_init(portid); + } + + cryptodevs_init(); + + /* start ports */ + for (portid = 0; portid < nb_ports; portid++) { + if ((enabled_port_mask & (1 << portid)) == 0) + continue; + + /* Start device */ + ret = rte_eth_dev_start(portid); + if (ret < 0) + rte_exit(EXIT_FAILURE, "rte_eth_dev_start: " + "err=%d, port=%d\n", ret, portid); + /* + * If enabled, put device in promiscuous mode. + * This allows IO forwarding mode to forward packets + * to itself through 2 cross-connected ports of the + * target machine. + */ + if (promiscuous_on) + rte_eth_promiscuous_enable(portid); + } + + check_all_ports_link_status((uint8_t)nb_ports, enabled_port_mask); + + /* launch per-lcore init on every lcore */ + rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER); + RTE_LCORE_FOREACH_SLAVE(lcore_id) { + if (rte_eal_wait_lcore(lcore_id) < 0) + return -1; + } + + return 0; +} diff --git a/examples/ipsec-secgw/ipsec.c b/examples/ipsec-secgw/ipsec.c new file mode 100644 index 00000000..baf30d4b --- /dev/null +++ b/examples/ipsec-secgw/ipsec.c @@ -0,0 +1,203 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include <sys/types.h> +#include <netinet/in.h> +#include <netinet/ip.h> + +#include <rte_branch_prediction.h> +#include <rte_log.h> +#include <rte_crypto.h> +#include <rte_cryptodev.h> +#include <rte_mbuf.h> +#include <rte_hash.h> + +#include "ipsec.h" + +static inline int +create_session(struct ipsec_ctx *ipsec_ctx __rte_unused, struct ipsec_sa *sa) +{ + uint32_t cdev_id_qp = 0; + int32_t ret; + struct cdev_key key = { 0 }; + + key.lcore_id = (uint8_t)rte_lcore_id(); + + key.cipher_algo = (uint8_t)sa->cipher_algo; + key.auth_algo = (uint8_t)sa->auth_algo; + + ret = rte_hash_lookup_data(ipsec_ctx->cdev_map, &key, + (void **)&cdev_id_qp); + if (ret < 0) { + IPSEC_LOG(ERR, IPSEC, "No cryptodev: core %u, cipher_algo %u, " + "auth_algo %u\n", key.lcore_id, key.cipher_algo, + key.auth_algo); + return -1; + } + + IPSEC_LOG(DEBUG, IPSEC, "Create session for SA spi %u on cryptodev " + "%u qp %u\n", sa->spi, ipsec_ctx->tbl[cdev_id_qp].id, + ipsec_ctx->tbl[cdev_id_qp].qp); + + sa->crypto_session = rte_cryptodev_sym_session_create( + ipsec_ctx->tbl[cdev_id_qp].id, sa->xforms); + + sa->cdev_id_qp = cdev_id_qp; + + return 0; +} + +static inline void +enqueue_cop(struct cdev_qp *cqp, struct rte_crypto_op *cop) +{ + int ret, i; + + cqp->buf[cqp->len++] = cop; + + if (cqp->len == MAX_PKT_BURST) { + ret = rte_cryptodev_enqueue_burst(cqp->id, cqp->qp, + cqp->buf, cqp->len); + if (ret < cqp->len) { + IPSEC_LOG(DEBUG, IPSEC, "Cryptodev %u queue %u:" + " enqueued %u crypto ops out of %u\n", + cqp->id, cqp->qp, + ret, cqp->len); + for (i = ret; i < cqp->len; i++) + rte_pktmbuf_free(cqp->buf[i]->sym->m_src); + } + cqp->in_flight += ret; + cqp->len = 0; + } +} + +static inline uint16_t +ipsec_processing(struct ipsec_ctx *ipsec_ctx, struct rte_mbuf *pkts[], + struct ipsec_sa *sas[], uint16_t nb_pkts, uint16_t max_pkts) +{ + int ret = 0, i, j, nb_cops; + struct ipsec_mbuf_metadata *priv; + struct rte_crypto_op *cops[max_pkts]; + struct ipsec_sa *sa; + struct rte_mbuf *pkt; + + for (i = 0; i < nb_pkts; i++) { + rte_prefetch0(sas[i]); + rte_prefetch0(pkts[i]); + + priv = get_priv(pkts[i]); + sa = sas[i]; + priv->sa = sa; + + IPSEC_ASSERT(sa != NULL); + + priv->cop.type = RTE_CRYPTO_OP_TYPE_SYMMETRIC; + + rte_prefetch0(&priv->sym_cop); + priv->cop.sym = &priv->sym_cop; + + if ((unlikely(sa->crypto_session == NULL)) && + create_session(ipsec_ctx, sa)) { + rte_pktmbuf_free(pkts[i]); + continue; + } + + rte_crypto_op_attach_sym_session(&priv->cop, + sa->crypto_session); + + ret = sa->pre_crypto(pkts[i], sa, &priv->cop); + if (unlikely(ret)) { + rte_pktmbuf_free(pkts[i]); + continue; + } + + IPSEC_ASSERT(sa->cdev_id_qp < ipsec_ctx->nb_qps); + enqueue_cop(&ipsec_ctx->tbl[sa->cdev_id_qp], &priv->cop); + } + + nb_pkts = 0; + for (i = 0; i < ipsec_ctx->nb_qps && nb_pkts < max_pkts; i++) { + struct cdev_qp *cqp; + + cqp = &ipsec_ctx->tbl[ipsec_ctx->last_qp++]; + if (ipsec_ctx->last_qp == ipsec_ctx->nb_qps) + ipsec_ctx->last_qp %= ipsec_ctx->nb_qps; + + if (cqp->in_flight == 0) + continue; + + nb_cops = rte_cryptodev_dequeue_burst(cqp->id, cqp->qp, + cops, max_pkts - nb_pkts); + + cqp->in_flight -= nb_cops; + + for (j = 0; j < nb_cops; j++) { + pkt = cops[j]->sym->m_src; + rte_prefetch0(pkt); + + priv = get_priv(pkt); + sa = priv->sa; + + IPSEC_ASSERT(sa != NULL); + + ret = sa->post_crypto(pkt, sa, cops[j]); + if (unlikely(ret)) + rte_pktmbuf_free(pkt); + else + pkts[nb_pkts++] = pkt; + } + } + + /* return packets */ + return nb_pkts; +} + +uint16_t +ipsec_inbound(struct ipsec_ctx *ctx, struct rte_mbuf *pkts[], + uint16_t nb_pkts, uint16_t len) +{ + struct ipsec_sa *sas[nb_pkts]; + + inbound_sa_lookup(ctx->sa_ctx, pkts, sas, nb_pkts); + + return ipsec_processing(ctx, pkts, sas, nb_pkts, len); +} + +uint16_t +ipsec_outbound(struct ipsec_ctx *ctx, struct rte_mbuf *pkts[], + uint32_t sa_idx[], uint16_t nb_pkts, uint16_t len) +{ + struct ipsec_sa *sas[nb_pkts]; + + outbound_sa_lookup(ctx->sa_ctx, sa_idx, sas, nb_pkts); + + return ipsec_processing(ctx, pkts, sas, nb_pkts, len); +} diff --git a/examples/ipsec-secgw/ipsec.h b/examples/ipsec-secgw/ipsec.h new file mode 100644 index 00000000..a13fdef9 --- /dev/null +++ b/examples/ipsec-secgw/ipsec.h @@ -0,0 +1,190 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __IPSEC_H__ +#define __IPSEC_H__ + +#include <stdint.h> + +#include <rte_byteorder.h> +#include <rte_ip.h> +#include <rte_crypto.h> + +#define RTE_LOGTYPE_IPSEC RTE_LOGTYPE_USER1 +#define RTE_LOGTYPE_IPSEC_ESP RTE_LOGTYPE_USER2 +#define RTE_LOGTYPE_IPSEC_IPIP RTE_LOGTYPE_USER3 + +#define MAX_PKT_BURST 32 +#define MAX_QP_PER_LCORE 256 + +#ifdef IPSEC_DEBUG +#define IPSEC_ASSERT(exp) \ +if (!(exp)) { \ + rte_panic("line%d\tassert \"" #exp "\" failed\n", __LINE__); \ +} + +#define IPSEC_LOG RTE_LOG +#else +#define IPSEC_ASSERT(exp) do {} while (0) +#define IPSEC_LOG(...) do {} while (0) +#endif /* IPSEC_DEBUG */ + +#define MAX_DIGEST_SIZE 32 /* Bytes -- 256 bits */ + +#define uint32_t_to_char(ip, a, b, c, d) do {\ + *a = (unsigned char)(ip >> 24 & 0xff);\ + *b = (unsigned char)(ip >> 16 & 0xff);\ + *c = (unsigned char)(ip >> 8 & 0xff);\ + *d = (unsigned char)(ip & 0xff);\ + } while (0) + +#define DEFAULT_MAX_CATEGORIES 1 + +#define IPSEC_SA_MAX_ENTRIES (64) /* must be power of 2, max 2 power 30 */ +#define SPI2IDX(spi) (spi & (IPSEC_SA_MAX_ENTRIES - 1)) +#define INVALID_SPI (0) + +#define DISCARD (0x80000000) +#define BYPASS (0x40000000) +#define PROTECT_MASK (0x3fffffff) +#define PROTECT(sa_idx) (SPI2IDX(sa_idx) & PROTECT_MASK) /* SA idx 30 bits */ + +#define IPSEC_XFORM_MAX 2 + +struct rte_crypto_xform; +struct ipsec_xform; +struct rte_cryptodev_session; +struct rte_mbuf; + +struct ipsec_sa; + +typedef int (*ipsec_xform_fn)(struct rte_mbuf *m, struct ipsec_sa *sa, + struct rte_crypto_op *cop); + +struct ipsec_sa { + uint32_t spi; + uint32_t cdev_id_qp; + uint32_t src; + uint32_t dst; + struct rte_cryptodev_sym_session *crypto_session; + struct rte_crypto_sym_xform *xforms; + ipsec_xform_fn pre_crypto; + ipsec_xform_fn post_crypto; + enum rte_crypto_cipher_algorithm cipher_algo; + enum rte_crypto_auth_algorithm auth_algo; + uint16_t digest_len; + uint16_t iv_len; + uint16_t block_size; + uint16_t flags; + uint32_t seq; +} __rte_cache_aligned; + +struct ipsec_mbuf_metadata { + struct ipsec_sa *sa; + struct rte_crypto_op cop; + struct rte_crypto_sym_op sym_cop; +}; + +struct cdev_qp { + uint16_t id; + uint16_t qp; + uint16_t in_flight; + uint16_t len; + struct rte_crypto_op *buf[MAX_PKT_BURST] __rte_aligned(sizeof(void *)); +}; + +struct ipsec_ctx { + struct rte_hash *cdev_map; + struct sp_ctx *sp_ctx; + struct sa_ctx *sa_ctx; + uint16_t nb_qps; + uint16_t last_qp; + struct cdev_qp tbl[MAX_QP_PER_LCORE]; +}; + +struct cdev_key { + uint16_t lcore_id; + uint8_t cipher_algo; + uint8_t auth_algo; +}; + +struct socket_ctx { + struct sa_ctx *sa_ipv4_in; + struct sa_ctx *sa_ipv4_out; + struct sp_ctx *sp_ipv4_in; + struct sp_ctx *sp_ipv4_out; + struct rt_ctx *rt_ipv4; + struct rte_mempool *mbuf_pool; +}; + +uint16_t +ipsec_inbound(struct ipsec_ctx *ctx, struct rte_mbuf *pkts[], + uint16_t nb_pkts, uint16_t len); + +uint16_t +ipsec_outbound(struct ipsec_ctx *ctx, struct rte_mbuf *pkts[], + uint32_t sa_idx[], uint16_t nb_pkts, uint16_t len); + +static inline uint16_t +ipsec_metadata_size(void) +{ + return sizeof(struct ipsec_mbuf_metadata); +} + +static inline struct ipsec_mbuf_metadata * +get_priv(struct rte_mbuf *m) +{ + return RTE_PTR_ADD(m, sizeof(struct rte_mbuf)); +} + +int +inbound_sa_check(struct sa_ctx *sa_ctx, struct rte_mbuf *m, uint32_t sa_idx); + +void +inbound_sa_lookup(struct sa_ctx *sa_ctx, struct rte_mbuf *pkts[], + struct ipsec_sa *sa[], uint16_t nb_pkts); + +void +outbound_sa_lookup(struct sa_ctx *sa_ctx, uint32_t sa_idx[], + struct ipsec_sa *sa[], uint16_t nb_pkts); + +void +sp_init(struct socket_ctx *ctx, int socket_id, unsigned ep); + +void +sa_init(struct socket_ctx *ctx, int socket_id, unsigned ep); + +void +rt_init(struct socket_ctx *ctx, int socket_id, unsigned ep); + +#endif /* __IPSEC_H__ */ diff --git a/examples/ipsec-secgw/rt.c b/examples/ipsec-secgw/rt.c new file mode 100644 index 00000000..a6d0866a --- /dev/null +++ b/examples/ipsec-secgw/rt.c @@ -0,0 +1,145 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Routing Table (RT) + */ +#include <sys/types.h> +#include <rte_lpm.h> +#include <rte_errno.h> + +#include "ipsec.h" + +#define RT_IPV4_MAX_RULES 64 + +struct ipv4_route { + uint32_t ip; + uint8_t depth; + uint8_t if_out; +}; + +/* In the default routing table we have: + * ep0 protected ports 0 and 1, and unprotected ports 2 and 3. + */ +static struct ipv4_route rt_ipv4_ep0[] = { + { IPv4(172, 16, 2, 5), 32, 0 }, + { IPv4(172, 16, 2, 6), 32, 0 }, + { IPv4(172, 16, 2, 7), 32, 1 }, + { IPv4(172, 16, 2, 8), 32, 1 }, + + { IPv4(192, 168, 115, 0), 24, 2 }, + { IPv4(192, 168, 116, 0), 24, 2 }, + { IPv4(192, 168, 117, 0), 24, 3 }, + { IPv4(192, 168, 118, 0), 24, 3 }, + + { IPv4(192, 168, 210, 0), 24, 2 }, + + { IPv4(192, 168, 240, 0), 24, 2 }, + { IPv4(192, 168, 250, 0), 24, 0 } +}; + +/* In the default routing table we have: + * ep1 protected ports 0 and 1, and unprotected ports 2 and 3. + */ +static struct ipv4_route rt_ipv4_ep1[] = { + { IPv4(172, 16, 1, 5), 32, 2 }, + { IPv4(172, 16, 1, 6), 32, 2 }, + { IPv4(172, 16, 1, 7), 32, 3 }, + { IPv4(172, 16, 1, 8), 32, 3 }, + + { IPv4(192, 168, 105, 0), 24, 0 }, + { IPv4(192, 168, 106, 0), 24, 0 }, + { IPv4(192, 168, 107, 0), 24, 1 }, + { IPv4(192, 168, 108, 0), 24, 1 }, + + { IPv4(192, 168, 200, 0), 24, 0 }, + + { IPv4(192, 168, 240, 0), 24, 2 }, + { IPv4(192, 168, 250, 0), 24, 0 } +}; + +void +rt_init(struct socket_ctx *ctx, int socket_id, unsigned ep) +{ + char name[PATH_MAX]; + unsigned i; + int ret; + struct rte_lpm *lpm; + struct ipv4_route *rt; + char a, b, c, d; + unsigned nb_routes; + struct rte_lpm_config conf = { 0 }; + + if (ctx == NULL) + rte_exit(EXIT_FAILURE, "NULL context.\n"); + + if (ctx->rt_ipv4 != NULL) + rte_exit(EXIT_FAILURE, "Routing Table for socket %u already " + "initialized\n", socket_id); + + printf("Creating Routing Table (RT) context with %u max routes\n", + RT_IPV4_MAX_RULES); + + if (ep == 0) { + rt = rt_ipv4_ep0; + nb_routes = RTE_DIM(rt_ipv4_ep0); + } else if (ep == 1) { + rt = rt_ipv4_ep1; + nb_routes = RTE_DIM(rt_ipv4_ep1); + } else + rte_exit(EXIT_FAILURE, "Invalid EP value %u. Only 0 or 1 " + "supported.\n", ep); + + /* create the LPM table */ + snprintf(name, sizeof(name), "%s_%u", "rt_ipv4", socket_id); + conf.max_rules = RT_IPV4_MAX_RULES; + conf.number_tbl8s = RTE_LPM_TBL8_NUM_ENTRIES; + lpm = rte_lpm_create(name, socket_id, &conf); + if (lpm == NULL) + rte_exit(EXIT_FAILURE, "Unable to create LPM table " + "on socket %d\n", socket_id); + + /* populate the LPM table */ + for (i = 0; i < nb_routes; i++) { + ret = rte_lpm_add(lpm, rt[i].ip, rt[i].depth, rt[i].if_out); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Unable to add entry num %u to " + "LPM table on socket %d\n", i, socket_id); + + uint32_t_to_char(rt[i].ip, &a, &b, &c, &d); + printf("LPM: Adding route %hhu.%hhu.%hhu.%hhu/%hhu (%hhu)\n", + a, b, c, d, rt[i].depth, rt[i].if_out); + } + + ctx->rt_ipv4 = (struct rt_ctx *)lpm; +} diff --git a/examples/ipsec-secgw/sa.c b/examples/ipsec-secgw/sa.c new file mode 100644 index 00000000..b6260ede --- /dev/null +++ b/examples/ipsec-secgw/sa.c @@ -0,0 +1,446 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Security Associations + */ +#include <sys/types.h> +#include <netinet/in.h> +#include <netinet/ip.h> + +#include <rte_memzone.h> +#include <rte_crypto.h> +#include <rte_cryptodev.h> +#include <rte_byteorder.h> +#include <rte_errno.h> + +#include "ipsec.h" +#include "esp.h" + +/* SAs EP0 Outbound */ +const struct ipsec_sa sa_ep0_out[] = { + { 5, 0, IPv4(172, 16, 1, 5), IPv4(172, 16, 2, 5), + NULL, NULL, + esp4_tunnel_outbound_pre_crypto, + esp4_tunnel_outbound_post_crypto, + RTE_CRYPTO_CIPHER_AES_CBC, RTE_CRYPTO_AUTH_SHA1_HMAC, + 12, 16, 16, + 0, 0 }, + { 6, 0, IPv4(172, 16, 1, 6), IPv4(172, 16, 2, 6), + NULL, NULL, + esp4_tunnel_outbound_pre_crypto, + esp4_tunnel_outbound_post_crypto, + RTE_CRYPTO_CIPHER_AES_CBC, RTE_CRYPTO_AUTH_SHA1_HMAC, + 12, 16, 16, + 0, 0 }, + { 7, 0, IPv4(172, 16, 1, 7), IPv4(172, 16, 2, 7), + NULL, NULL, + esp4_tunnel_outbound_pre_crypto, + esp4_tunnel_outbound_post_crypto, + RTE_CRYPTO_CIPHER_AES_CBC, RTE_CRYPTO_AUTH_SHA1_HMAC, + 12, 16, 16, + 0, 0 }, + { 8, 0, IPv4(172, 16, 1, 8), IPv4(172, 16, 2, 8), + NULL, NULL, + esp4_tunnel_outbound_pre_crypto, + esp4_tunnel_outbound_post_crypto, + RTE_CRYPTO_CIPHER_AES_CBC, RTE_CRYPTO_AUTH_SHA1_HMAC, + 12, 16, 16, + 0, 0 }, + { 9, 0, IPv4(172, 16, 1, 5), IPv4(172, 16, 2, 5), + NULL, NULL, + esp4_tunnel_outbound_pre_crypto, + esp4_tunnel_outbound_post_crypto, + RTE_CRYPTO_CIPHER_NULL, RTE_CRYPTO_AUTH_NULL, + 0, 0, 4, + 0, 0 }, +}; + +/* SAs EP0 Inbound */ +const struct ipsec_sa sa_ep0_in[] = { + { 5, 0, IPv4(172, 16, 2, 5), IPv4(172, 16, 1, 5), + NULL, NULL, + esp4_tunnel_inbound_pre_crypto, + esp4_tunnel_inbound_post_crypto, + RTE_CRYPTO_CIPHER_AES_CBC, RTE_CRYPTO_AUTH_SHA1_HMAC, + 12, 16, 16, + 0, 0 }, + { 6, 0, IPv4(172, 16, 2, 6), IPv4(172, 16, 1, 6), + NULL, NULL, + esp4_tunnel_inbound_pre_crypto, + esp4_tunnel_inbound_post_crypto, + RTE_CRYPTO_CIPHER_AES_CBC, RTE_CRYPTO_AUTH_SHA1_HMAC, + 12, 16, 16, + 0, 0 }, + { 7, 0, IPv4(172, 16, 2, 7), IPv4(172, 16, 1, 7), + NULL, NULL, + esp4_tunnel_inbound_pre_crypto, + esp4_tunnel_inbound_post_crypto, + RTE_CRYPTO_CIPHER_AES_CBC, RTE_CRYPTO_AUTH_SHA1_HMAC, + 12, 16, 16, + 0, 0 }, + { 8, 0, IPv4(172, 16, 2, 8), IPv4(172, 16, 1, 8), + NULL, NULL, + esp4_tunnel_inbound_pre_crypto, + esp4_tunnel_inbound_post_crypto, + RTE_CRYPTO_CIPHER_AES_CBC, RTE_CRYPTO_AUTH_SHA1_HMAC, + 12, 16, 16, + 0, 0 }, + { 9, 0, IPv4(172, 16, 2, 5), IPv4(172, 16, 1, 5), + NULL, NULL, + esp4_tunnel_inbound_pre_crypto, + esp4_tunnel_inbound_post_crypto, + RTE_CRYPTO_CIPHER_NULL, RTE_CRYPTO_AUTH_NULL, + 0, 0, 4, + 0, 0 }, +}; + +/* SAs EP1 Outbound */ +const struct ipsec_sa sa_ep1_out[] = { + { 5, 0, IPv4(172, 16, 2, 5), IPv4(172, 16, 1, 5), + NULL, NULL, + esp4_tunnel_outbound_pre_crypto, + esp4_tunnel_outbound_post_crypto, + RTE_CRYPTO_CIPHER_AES_CBC, RTE_CRYPTO_AUTH_SHA1_HMAC, + 12, 16, 16, + 0, 0 }, + { 6, 0, IPv4(172, 16, 2, 6), IPv4(172, 16, 1, 6), + NULL, NULL, + esp4_tunnel_outbound_pre_crypto, + esp4_tunnel_outbound_post_crypto, + RTE_CRYPTO_CIPHER_AES_CBC, RTE_CRYPTO_AUTH_SHA1_HMAC, + 12, 16, 16, + 0, 0 }, + { 7, 0, IPv4(172, 16, 2, 7), IPv4(172, 16, 1, 7), + NULL, NULL, + esp4_tunnel_outbound_pre_crypto, + esp4_tunnel_outbound_post_crypto, + RTE_CRYPTO_CIPHER_AES_CBC, RTE_CRYPTO_AUTH_SHA1_HMAC, + 12, 16, 16, + 0, 0 }, + { 8, 0, IPv4(172, 16, 2, 8), IPv4(172, 16, 1, 8), + NULL, NULL, + esp4_tunnel_outbound_pre_crypto, + esp4_tunnel_outbound_post_crypto, + RTE_CRYPTO_CIPHER_AES_CBC, RTE_CRYPTO_AUTH_SHA1_HMAC, + 12, 16, 16, + 0, 0 }, + { 9, 0, IPv4(172, 16, 2, 5), IPv4(172, 16, 1, 5), + NULL, NULL, + esp4_tunnel_outbound_pre_crypto, + esp4_tunnel_outbound_post_crypto, + RTE_CRYPTO_CIPHER_NULL, RTE_CRYPTO_AUTH_NULL, + 0, 0, 4, + 0, 0 }, +}; + +/* SAs EP1 Inbound */ +const struct ipsec_sa sa_ep1_in[] = { + { 5, 0, IPv4(172, 16, 1, 5), IPv4(172, 16, 2, 5), + NULL, NULL, + esp4_tunnel_inbound_pre_crypto, + esp4_tunnel_inbound_post_crypto, + RTE_CRYPTO_CIPHER_AES_CBC, RTE_CRYPTO_AUTH_SHA1_HMAC, + 12, 16, 16, + 0, 0 }, + { 6, 0, IPv4(172, 16, 1, 6), IPv4(172, 16, 2, 6), + NULL, NULL, + esp4_tunnel_inbound_pre_crypto, + esp4_tunnel_inbound_post_crypto, + RTE_CRYPTO_CIPHER_AES_CBC, RTE_CRYPTO_AUTH_SHA1_HMAC, + 12, 16, 16, + 0, 0 }, + { 7, 0, IPv4(172, 16, 1, 7), IPv4(172, 16, 2, 7), + NULL, NULL, + esp4_tunnel_inbound_pre_crypto, + esp4_tunnel_inbound_post_crypto, + RTE_CRYPTO_CIPHER_AES_CBC, RTE_CRYPTO_AUTH_SHA1_HMAC, + 12, 16, 16, + 0, 0 }, + { 8, 0, IPv4(172, 16, 1, 8), IPv4(172, 16, 2, 8), + NULL, NULL, + esp4_tunnel_inbound_pre_crypto, + esp4_tunnel_inbound_post_crypto, + RTE_CRYPTO_CIPHER_AES_CBC, RTE_CRYPTO_AUTH_SHA1_HMAC, + 12, 16, 16, + 0, 0 }, + { 9, 0, IPv4(172, 16, 1, 5), IPv4(172, 16, 2, 5), + NULL, NULL, + esp4_tunnel_inbound_pre_crypto, + esp4_tunnel_inbound_post_crypto, + RTE_CRYPTO_CIPHER_NULL, RTE_CRYPTO_AUTH_NULL, + 0, 0, 4, + 0, 0 }, +}; + +static uint8_t cipher_key[256] = "sixteenbytes key"; + +/* AES CBC xform */ +const struct rte_crypto_sym_xform aescbc_enc_xf = { + NULL, + RTE_CRYPTO_SYM_XFORM_CIPHER, + {.cipher = { RTE_CRYPTO_CIPHER_OP_ENCRYPT, RTE_CRYPTO_CIPHER_AES_CBC, + .key = { cipher_key, 16 } } + } +}; + +const struct rte_crypto_sym_xform aescbc_dec_xf = { + NULL, + RTE_CRYPTO_SYM_XFORM_CIPHER, + {.cipher = { RTE_CRYPTO_CIPHER_OP_DECRYPT, RTE_CRYPTO_CIPHER_AES_CBC, + .key = { cipher_key, 16 } } + } +}; + +static uint8_t auth_key[256] = "twentybytes hash key"; + +/* SHA1 HMAC xform */ +const struct rte_crypto_sym_xform sha1hmac_gen_xf = { + NULL, + RTE_CRYPTO_SYM_XFORM_AUTH, + {.auth = { RTE_CRYPTO_AUTH_OP_GENERATE, RTE_CRYPTO_AUTH_SHA1_HMAC, + .key = { auth_key, 20 }, 12, 0 } + } +}; + +const struct rte_crypto_sym_xform sha1hmac_verify_xf = { + NULL, + RTE_CRYPTO_SYM_XFORM_AUTH, + {.auth = { RTE_CRYPTO_AUTH_OP_VERIFY, RTE_CRYPTO_AUTH_SHA1_HMAC, + .key = { auth_key, 20 }, 12, 0 } + } +}; + +/* AES CBC xform */ +const struct rte_crypto_sym_xform null_cipher_xf = { + NULL, + RTE_CRYPTO_SYM_XFORM_CIPHER, + {.cipher = { .algo = RTE_CRYPTO_CIPHER_NULL } + } +}; + +const struct rte_crypto_sym_xform null_auth_xf = { + NULL, + RTE_CRYPTO_SYM_XFORM_AUTH, + {.auth = { .algo = RTE_CRYPTO_AUTH_NULL } + } +}; + +struct sa_ctx { + struct ipsec_sa sa[IPSEC_SA_MAX_ENTRIES]; + struct { + struct rte_crypto_sym_xform a; + struct rte_crypto_sym_xform b; + } xf[IPSEC_SA_MAX_ENTRIES]; +}; + +static struct sa_ctx * +sa_ipv4_create(const char *name, int socket_id) +{ + char s[PATH_MAX]; + struct sa_ctx *sa_ctx; + unsigned mz_size; + const struct rte_memzone *mz; + + snprintf(s, sizeof(s), "%s_%u", name, socket_id); + + /* Create SA array table */ + printf("Creating SA context with %u maximum entries\n", + IPSEC_SA_MAX_ENTRIES); + + mz_size = sizeof(struct sa_ctx); + mz = rte_memzone_reserve(s, mz_size, socket_id, + RTE_MEMZONE_1GB | RTE_MEMZONE_SIZE_HINT_ONLY); + if (mz == NULL) { + printf("Failed to allocate SA DB memory\n"); + rte_errno = -ENOMEM; + return NULL; + } + + sa_ctx = (struct sa_ctx *)mz->addr; + + return sa_ctx; +} + +static int +sa_add_rules(struct sa_ctx *sa_ctx, const struct ipsec_sa entries[], + unsigned nb_entries, unsigned inbound) +{ + struct ipsec_sa *sa; + unsigned i, idx; + + for (i = 0; i < nb_entries; i++) { + idx = SPI2IDX(entries[i].spi); + sa = &sa_ctx->sa[idx]; + if (sa->spi != 0) { + printf("Index %u already in use by SPI %u\n", + idx, sa->spi); + return -EINVAL; + } + *sa = entries[i]; + sa->src = rte_cpu_to_be_32(sa->src); + sa->dst = rte_cpu_to_be_32(sa->dst); + if (inbound) { + if (sa->cipher_algo == RTE_CRYPTO_CIPHER_NULL) { + sa_ctx->xf[idx].a = null_auth_xf; + sa_ctx->xf[idx].b = null_cipher_xf; + } else { + sa_ctx->xf[idx].a = sha1hmac_verify_xf; + sa_ctx->xf[idx].b = aescbc_dec_xf; + } + } else { /* outbound */ + if (sa->cipher_algo == RTE_CRYPTO_CIPHER_NULL) { + sa_ctx->xf[idx].a = null_cipher_xf; + sa_ctx->xf[idx].b = null_auth_xf; + } else { + sa_ctx->xf[idx].a = aescbc_enc_xf; + sa_ctx->xf[idx].b = sha1hmac_gen_xf; + } + } + sa_ctx->xf[idx].a.next = &sa_ctx->xf[idx].b; + sa_ctx->xf[idx].b.next = NULL; + sa->xforms = &sa_ctx->xf[idx].a; + } + + return 0; +} + +static inline int +sa_out_add_rules(struct sa_ctx *sa_ctx, const struct ipsec_sa entries[], + unsigned nb_entries) +{ + return sa_add_rules(sa_ctx, entries, nb_entries, 0); +} + +static inline int +sa_in_add_rules(struct sa_ctx *sa_ctx, const struct ipsec_sa entries[], + unsigned nb_entries) +{ + return sa_add_rules(sa_ctx, entries, nb_entries, 1); +} + +void +sa_init(struct socket_ctx *ctx, int socket_id, unsigned ep) +{ + const struct ipsec_sa *sa_out_entries, *sa_in_entries; + unsigned nb_out_entries, nb_in_entries; + const char *name; + + if (ctx == NULL) + rte_exit(EXIT_FAILURE, "NULL context.\n"); + + if (ctx->sa_ipv4_in != NULL) + rte_exit(EXIT_FAILURE, "Inbound SA DB for socket %u already " + "initialized\n", socket_id); + + if (ctx->sa_ipv4_out != NULL) + rte_exit(EXIT_FAILURE, "Outbound SA DB for socket %u already " + "initialized\n", socket_id); + + if (ep == 0) { + sa_out_entries = sa_ep0_out; + nb_out_entries = RTE_DIM(sa_ep0_out); + sa_in_entries = sa_ep0_in; + nb_in_entries = RTE_DIM(sa_ep0_in); + } else if (ep == 1) { + sa_out_entries = sa_ep1_out; + nb_out_entries = RTE_DIM(sa_ep1_out); + sa_in_entries = sa_ep1_in; + nb_in_entries = RTE_DIM(sa_ep1_in); + } else + rte_exit(EXIT_FAILURE, "Invalid EP value %u. " + "Only 0 or 1 supported.\n", ep); + + name = "sa_ipv4_in"; + ctx->sa_ipv4_in = sa_ipv4_create(name, socket_id); + if (ctx->sa_ipv4_in == NULL) + rte_exit(EXIT_FAILURE, "Error [%d] creating SA context %s " + "in socket %d\n", rte_errno, name, socket_id); + + name = "sa_ipv4_out"; + ctx->sa_ipv4_out = sa_ipv4_create(name, socket_id); + if (ctx->sa_ipv4_out == NULL) + rte_exit(EXIT_FAILURE, "Error [%d] creating SA context %s " + "in socket %d\n", rte_errno, name, socket_id); + + sa_in_add_rules(ctx->sa_ipv4_in, sa_in_entries, nb_in_entries); + + sa_out_add_rules(ctx->sa_ipv4_out, sa_out_entries, nb_out_entries); +} + +int +inbound_sa_check(struct sa_ctx *sa_ctx, struct rte_mbuf *m, uint32_t sa_idx) +{ + struct ipsec_mbuf_metadata *priv; + + priv = RTE_PTR_ADD(m, sizeof(struct rte_mbuf)); + + return (sa_ctx->sa[sa_idx].spi == priv->sa->spi); +} + +void +inbound_sa_lookup(struct sa_ctx *sa_ctx, struct rte_mbuf *pkts[], + struct ipsec_sa *sa[], uint16_t nb_pkts) +{ + unsigned i; + uint32_t *src, spi; + + for (i = 0; i < nb_pkts; i++) { + spi = rte_pktmbuf_mtod_offset(pkts[i], struct esp_hdr *, + sizeof(struct ip))->spi; + + if (spi == INVALID_SPI) + continue; + + sa[i] = &sa_ctx->sa[SPI2IDX(spi)]; + if (spi != sa[i]->spi) { + sa[i] = NULL; + continue; + } + + src = rte_pktmbuf_mtod_offset(pkts[i], uint32_t *, + offsetof(struct ip, ip_src)); + if ((sa[i]->src != *src) || (sa[i]->dst != *(src + 1))) + sa[i] = NULL; + } +} + +void +outbound_sa_lookup(struct sa_ctx *sa_ctx, uint32_t sa_idx[], + struct ipsec_sa *sa[], uint16_t nb_pkts) +{ + unsigned i; + + for (i = 0; i < nb_pkts; i++) + sa[i] = &sa_ctx->sa[sa_idx[i]]; +} diff --git a/examples/ipsec-secgw/sp.c b/examples/ipsec-secgw/sp.c new file mode 100644 index 00000000..4f167301 --- /dev/null +++ b/examples/ipsec-secgw/sp.c @@ -0,0 +1,366 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Security Policies + */ +#include <sys/types.h> +#include <netinet/in.h> +#include <netinet/ip.h> + +#include <rte_acl.h> + +#include "ipsec.h" + +#define MAX_ACL_RULE_NUM 1000 + +/* + * Rule and trace formats definitions. + */ +enum { + PROTO_FIELD_IPV4, + SRC_FIELD_IPV4, + DST_FIELD_IPV4, + SRCP_FIELD_IPV4, + DSTP_FIELD_IPV4, + NUM_FIELDS_IPV4 +}; + +/* + * That effectively defines order of IPV4 classifications: + * - PROTO + * - SRC IP ADDRESS + * - DST IP ADDRESS + * - PORTS (SRC and DST) + */ +enum { + RTE_ACL_IPV4_PROTO, + RTE_ACL_IPV4_SRC, + RTE_ACL_IPV4_DST, + RTE_ACL_IPV4_PORTS, + RTE_ACL_IPV4_NUM +}; + +struct rte_acl_field_def ipv4_defs[NUM_FIELDS_IPV4] = { + { + .type = RTE_ACL_FIELD_TYPE_BITMASK, + .size = sizeof(uint8_t), + .field_index = PROTO_FIELD_IPV4, + .input_index = RTE_ACL_IPV4_PROTO, + .offset = 0, + }, + { + .type = RTE_ACL_FIELD_TYPE_MASK, + .size = sizeof(uint32_t), + .field_index = SRC_FIELD_IPV4, + .input_index = RTE_ACL_IPV4_SRC, + .offset = offsetof(struct ip, ip_src) - offsetof(struct ip, ip_p) + }, + { + .type = RTE_ACL_FIELD_TYPE_MASK, + .size = sizeof(uint32_t), + .field_index = DST_FIELD_IPV4, + .input_index = RTE_ACL_IPV4_DST, + .offset = offsetof(struct ip, ip_dst) - offsetof(struct ip, ip_p) + }, + { + .type = RTE_ACL_FIELD_TYPE_RANGE, + .size = sizeof(uint16_t), + .field_index = SRCP_FIELD_IPV4, + .input_index = RTE_ACL_IPV4_PORTS, + .offset = sizeof(struct ip) - offsetof(struct ip, ip_p) + }, + { + .type = RTE_ACL_FIELD_TYPE_RANGE, + .size = sizeof(uint16_t), + .field_index = DSTP_FIELD_IPV4, + .input_index = RTE_ACL_IPV4_PORTS, + .offset = sizeof(struct ip) - offsetof(struct ip, ip_p) + + sizeof(uint16_t) + }, +}; + +RTE_ACL_RULE_DEF(acl4_rules, RTE_DIM(ipv4_defs)); + +const struct acl4_rules acl4_rules_in[] = { + { + .data = {.userdata = PROTECT(5), .category_mask = 1, .priority = 1}, + /* destination IPv4 */ + .field[2] = {.value.u32 = IPv4(192, 168, 105, 0), + .mask_range.u32 = 24,}, + /* source port */ + .field[3] = {.value.u16 = 0, .mask_range.u16 = 0xffff,}, + /* destination port */ + .field[4] = {.value.u16 = 0, .mask_range.u16 = 0xffff,} + }, + { + .data = {.userdata = PROTECT(6), .category_mask = 1, .priority = 2}, + /* destination IPv4 */ + .field[2] = {.value.u32 = IPv4(192, 168, 106, 0), + .mask_range.u32 = 24,}, + /* source port */ + .field[3] = {.value.u16 = 0, .mask_range.u16 = 0xffff,}, + /* destination port */ + .field[4] = {.value.u16 = 0, .mask_range.u16 = 0xffff,} + }, + { + .data = {.userdata = PROTECT(7), .category_mask = 1, .priority = 3}, + /* destination IPv4 */ + .field[2] = {.value.u32 = IPv4(192, 168, 107, 0), + .mask_range.u32 = 24,}, + /* source port */ + .field[3] = {.value.u16 = 0, .mask_range.u16 = 0xffff,}, + /* destination port */ + .field[4] = {.value.u16 = 0, .mask_range.u16 = 0xffff,} + }, + { + .data = {.userdata = PROTECT(8), .category_mask = 1, .priority = 4}, + /* destination IPv4 */ + .field[2] = {.value.u32 = IPv4(192, 168, 108, 0), + .mask_range.u32 = 24,}, + /* source port */ + .field[3] = {.value.u16 = 0, .mask_range.u16 = 0xffff,}, + /* destination port */ + .field[4] = {.value.u16 = 0, .mask_range.u16 = 0xffff,} + }, + { + .data = {.userdata = PROTECT(9), .category_mask = 1, .priority = 5}, + /* destination IPv4 */ + .field[2] = {.value.u32 = IPv4(192, 168, 200, 0), + .mask_range.u32 = 24,}, + /* source port */ + .field[3] = {.value.u16 = 0, .mask_range.u16 = 0xffff,}, + /* destination port */ + .field[4] = {.value.u16 = 0, .mask_range.u16 = 0xffff,} + }, + { + .data = {.userdata = BYPASS, .category_mask = 1, .priority = 6}, + /* destination IPv4 */ + .field[2] = {.value.u32 = IPv4(192, 168, 250, 0), + .mask_range.u32 = 24,}, + /* source port */ + .field[3] = {.value.u16 = 0, .mask_range.u16 = 0xffff,}, + /* destination port */ + .field[4] = {.value.u16 = 0, .mask_range.u16 = 0xffff,} + } +}; + +const struct acl4_rules acl4_rules_out[] = { + { + .data = {.userdata = PROTECT(5), .category_mask = 1, .priority = 1}, + /* destination IPv4 */ + .field[2] = {.value.u32 = IPv4(192, 168, 115, 0), + .mask_range.u32 = 24,}, + /* source port */ + .field[3] = {.value.u16 = 0, .mask_range.u16 = 0xffff,}, + /* destination port */ + .field[4] = {.value.u16 = 0, .mask_range.u16 = 0xffff,} + }, + { + .data = {.userdata = PROTECT(6), .category_mask = 1, .priority = 2}, + /* destination IPv4 */ + .field[2] = {.value.u32 = IPv4(192, 168, 116, 0), + .mask_range.u32 = 24,}, + /* source port */ + .field[3] = {.value.u16 = 0, .mask_range.u16 = 0xffff,}, + /* destination port */ + .field[4] = {.value.u16 = 0, .mask_range.u16 = 0xffff,} + }, + { + .data = {.userdata = PROTECT(7), .category_mask = 1, .priority = 3}, + /* destination IPv4 */ + .field[2] = {.value.u32 = IPv4(192, 168, 117, 0), + .mask_range.u32 = 24,}, + /* source port */ + .field[3] = {.value.u16 = 0, .mask_range.u16 = 0xffff,}, + /* destination port */ + .field[4] = {.value.u16 = 0, .mask_range.u16 = 0xffff,} + }, + { + .data = {.userdata = PROTECT(8), .category_mask = 1, .priority = 4}, + /* destination IPv4 */ + .field[2] = {.value.u32 = IPv4(192, 168, 118, 0), + .mask_range.u32 = 24,}, + /* source port */ + .field[3] = {.value.u16 = 0, .mask_range.u16 = 0xffff,}, + /* destination port */ + .field[4] = {.value.u16 = 0, .mask_range.u16 = 0xffff,} + }, + { + .data = {.userdata = PROTECT(9), .category_mask = 1, .priority = 5}, + /* destination IPv4 */ + .field[2] = {.value.u32 = IPv4(192, 168, 210, 0), + .mask_range.u32 = 24,}, + /* source port */ + .field[3] = {.value.u16 = 0, .mask_range.u16 = 0xffff,}, + /* destination port */ + .field[4] = {.value.u16 = 0, .mask_range.u16 = 0xffff,} + }, + { + .data = {.userdata = BYPASS, .category_mask = 1, .priority = 6}, + /* destination IPv4 */ + .field[2] = {.value.u32 = IPv4(192, 168, 240, 0), + .mask_range.u32 = 24,}, + /* source port */ + .field[3] = {.value.u16 = 0, .mask_range.u16 = 0xffff,}, + /* destination port */ + .field[4] = {.value.u16 = 0, .mask_range.u16 = 0xffff,} + } +}; + +static void +print_one_ipv4_rule(const struct acl4_rules *rule, int extra) +{ + unsigned char a, b, c, d; + + uint32_t_to_char(rule->field[SRC_FIELD_IPV4].value.u32, + &a, &b, &c, &d); + printf("%hhu.%hhu.%hhu.%hhu/%u ", a, b, c, d, + rule->field[SRC_FIELD_IPV4].mask_range.u32); + uint32_t_to_char(rule->field[DST_FIELD_IPV4].value.u32, + &a, &b, &c, &d); + printf("%hhu.%hhu.%hhu.%hhu/%u ", a, b, c, d, + rule->field[DST_FIELD_IPV4].mask_range.u32); + printf("%hu : %hu %hu : %hu 0x%hhx/0x%hhx ", + rule->field[SRCP_FIELD_IPV4].value.u16, + rule->field[SRCP_FIELD_IPV4].mask_range.u16, + rule->field[DSTP_FIELD_IPV4].value.u16, + rule->field[DSTP_FIELD_IPV4].mask_range.u16, + rule->field[PROTO_FIELD_IPV4].value.u8, + rule->field[PROTO_FIELD_IPV4].mask_range.u8); + if (extra) + printf("0x%x-0x%x-0x%x ", + rule->data.category_mask, + rule->data.priority, + rule->data.userdata); +} + +static inline void +dump_ipv4_rules(const struct acl4_rules *rule, int num, int extra) +{ + int i; + + for (i = 0; i < num; i++, rule++) { + printf("\t%d:", i + 1); + print_one_ipv4_rule(rule, extra); + printf("\n"); + } +} + +static struct rte_acl_ctx * +acl4_init(const char *name, int socketid, const struct acl4_rules *rules, + unsigned rules_nb) +{ + char s[PATH_MAX]; + struct rte_acl_param acl_param; + struct rte_acl_config acl_build_param; + struct rte_acl_ctx *ctx; + + printf("Creating SP context with %u max rules\n", MAX_ACL_RULE_NUM); + + memset(&acl_param, 0, sizeof(acl_param)); + + /* Create ACL contexts */ + snprintf(s, sizeof(s), "%s_%d", name, socketid); + + printf("IPv4 %s entries [%u]:\n", s, rules_nb); + dump_ipv4_rules(rules, rules_nb, 1); + + acl_param.name = s; + acl_param.socket_id = socketid; + acl_param.rule_size = RTE_ACL_RULE_SZ(RTE_DIM(ipv4_defs)); + acl_param.max_rule_num = MAX_ACL_RULE_NUM; + + ctx = rte_acl_create(&acl_param); + if (ctx == NULL) + rte_exit(EXIT_FAILURE, "Failed to create ACL context\n"); + + if (rte_acl_add_rules(ctx, (const struct rte_acl_rule *)rules, + rules_nb) < 0) + rte_exit(EXIT_FAILURE, "add rules failed\n"); + + /* Perform builds */ + memset(&acl_build_param, 0, sizeof(acl_build_param)); + + acl_build_param.num_categories = DEFAULT_MAX_CATEGORIES; + acl_build_param.num_fields = RTE_DIM(ipv4_defs); + memcpy(&acl_build_param.defs, ipv4_defs, sizeof(ipv4_defs)); + + if (rte_acl_build(ctx, &acl_build_param) != 0) + rte_exit(EXIT_FAILURE, "Failed to build ACL trie\n"); + + rte_acl_dump(ctx); + + return ctx; +} + +void +sp_init(struct socket_ctx *ctx, int socket_id, unsigned ep) +{ + const char *name; + const struct acl4_rules *rules_out, *rules_in; + unsigned nb_out_rules, nb_in_rules; + + if (ctx == NULL) + rte_exit(EXIT_FAILURE, "NULL context.\n"); + + if (ctx->sp_ipv4_in != NULL) + rte_exit(EXIT_FAILURE, "Inbound SP DB for socket %u already " + "initialized\n", socket_id); + + if (ctx->sp_ipv4_out != NULL) + rte_exit(EXIT_FAILURE, "Outbound SP DB for socket %u already " + "initialized\n", socket_id); + + if (ep == 0) { + rules_out = acl4_rules_in; + nb_out_rules = RTE_DIM(acl4_rules_in); + rules_in = acl4_rules_out; + nb_in_rules = RTE_DIM(acl4_rules_out); + } else if (ep == 1) { + rules_out = acl4_rules_out; + nb_out_rules = RTE_DIM(acl4_rules_out); + rules_in = acl4_rules_in; + nb_in_rules = RTE_DIM(acl4_rules_in); + } else + rte_exit(EXIT_FAILURE, "Invalid EP value %u. " + "Only 0 or 1 supported.\n", ep); + + name = "sp_ipv4_in"; + ctx->sp_ipv4_in = (struct sp_ctx *)acl4_init(name, socket_id, + rules_in, nb_in_rules); + + name = "sp_ipv4_out"; + ctx->sp_ipv4_out = (struct sp_ctx *)acl4_init(name, socket_id, + rules_out, nb_out_rules); +} diff --git a/examples/ipv4_multicast/Makefile b/examples/ipv4_multicast/Makefile new file mode 100644 index 00000000..44f0a3bb --- /dev/null +++ b/examples/ipv4_multicast/Makefile @@ -0,0 +1,57 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overriden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +# binary name +APP = ipv4_multicast + +# all source are stored in SRCS-y +SRCS-y := main.c + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) + +# workaround for a gcc bug with noreturn attribute +# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603 +ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y) +CFLAGS_main.o += -Wno-return-type +endif + +include $(RTE_SDK)/mk/rte.extapp.mk diff --git a/examples/ipv4_multicast/main.c b/examples/ipv4_multicast/main.c new file mode 100644 index 00000000..96b41578 --- /dev/null +++ b/examples/ipv4_multicast/main.c @@ -0,0 +1,819 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <inttypes.h> +#include <sys/types.h> +#include <string.h> +#include <sys/queue.h> +#include <stdarg.h> +#include <errno.h> +#include <getopt.h> + +#include <rte_common.h> +#include <rte_byteorder.h> +#include <rte_log.h> +#include <rte_memory.h> +#include <rte_memcpy.h> +#include <rte_memzone.h> +#include <rte_eal.h> +#include <rte_per_lcore.h> +#include <rte_launch.h> +#include <rte_atomic.h> +#include <rte_cycles.h> +#include <rte_prefetch.h> +#include <rte_lcore.h> +#include <rte_per_lcore.h> +#include <rte_branch_prediction.h> +#include <rte_interrupts.h> +#include <rte_pci.h> +#include <rte_random.h> +#include <rte_debug.h> +#include <rte_ether.h> +#include <rte_ethdev.h> +#include <rte_ring.h> +#include <rte_mempool.h> +#include <rte_mbuf.h> +#include <rte_malloc.h> +#include <rte_fbk_hash.h> +#include <rte_ip.h> + +#define RTE_LOGTYPE_IPv4_MULTICAST RTE_LOGTYPE_USER1 + +#define MAX_PORTS 16 + +#define MCAST_CLONE_PORTS 2 +#define MCAST_CLONE_SEGS 2 + +#define PKT_MBUF_DATA_SIZE RTE_MBUF_DEFAULT_BUF_SIZE +#define NB_PKT_MBUF 8192 + +#define HDR_MBUF_DATA_SIZE (2 * RTE_PKTMBUF_HEADROOM) +#define NB_HDR_MBUF (NB_PKT_MBUF * MAX_PORTS) + +#define NB_CLONE_MBUF (NB_PKT_MBUF * MCAST_CLONE_PORTS * MCAST_CLONE_SEGS * 2) + +/* allow max jumbo frame 9.5 KB */ +#define JUMBO_FRAME_MAX_SIZE 0x2600 + +#define MAX_PKT_BURST 32 +#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */ + +/* Configure how many packets ahead to prefetch, when reading packets */ +#define PREFETCH_OFFSET 3 + +/* + * Construct Ethernet multicast address from IPv4 multicast address. + * Citing RFC 1112, section 6.4: + * "An IP host group address is mapped to an Ethernet multicast address + * by placing the low-order 23-bits of the IP address into the low-order + * 23 bits of the Ethernet multicast address 01-00-5E-00-00-00 (hex)." + */ +#define ETHER_ADDR_FOR_IPV4_MCAST(x) \ + (rte_cpu_to_be_64(0x01005e000000ULL | ((x) & 0x7fffff)) >> 16) + +/* + * Configurable number of RX/TX ring descriptors + */ +#define RTE_TEST_RX_DESC_DEFAULT 128 +#define RTE_TEST_TX_DESC_DEFAULT 512 +static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; +static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; + +/* ethernet addresses of ports */ +static struct ether_addr ports_eth_addr[MAX_PORTS]; + +/* mask of enabled ports */ +static uint32_t enabled_port_mask = 0; + +static uint8_t nb_ports = 0; + +static int rx_queue_per_lcore = 1; + +struct mbuf_table { + uint16_t len; + struct rte_mbuf *m_table[MAX_PKT_BURST]; +}; + +#define MAX_RX_QUEUE_PER_LCORE 16 +#define MAX_TX_QUEUE_PER_PORT 16 +struct lcore_queue_conf { + uint64_t tx_tsc; + uint16_t n_rx_queue; + uint8_t rx_queue_list[MAX_RX_QUEUE_PER_LCORE]; + uint16_t tx_queue_id[MAX_PORTS]; + struct mbuf_table tx_mbufs[MAX_PORTS]; +} __rte_cache_aligned; +static struct lcore_queue_conf lcore_queue_conf[RTE_MAX_LCORE]; + +static const struct rte_eth_conf port_conf = { + .rxmode = { + .max_rx_pkt_len = JUMBO_FRAME_MAX_SIZE, + .split_hdr_size = 0, + .header_split = 0, /**< Header Split disabled */ + .hw_ip_checksum = 0, /**< IP checksum offload disabled */ + .hw_vlan_filter = 0, /**< VLAN filtering disabled */ + .jumbo_frame = 1, /**< Jumbo Frame Support enabled */ + .hw_strip_crc = 0, /**< CRC stripped by hardware */ + }, + .txmode = { + .mq_mode = ETH_MQ_TX_NONE, + }, +}; + +static struct rte_mempool *packet_pool, *header_pool, *clone_pool; + + +/* Multicast */ +static struct rte_fbk_hash_params mcast_hash_params = { + .name = "MCAST_HASH", + .entries = 1024, + .entries_per_bucket = 4, + .socket_id = 0, + .hash_func = NULL, + .init_val = 0, +}; + +struct rte_fbk_hash_table *mcast_hash = NULL; + +struct mcast_group_params { + uint32_t ip; + uint16_t port_mask; +}; + +static struct mcast_group_params mcast_group_table[] = { + {IPv4(224,0,0,101), 0x1}, + {IPv4(224,0,0,102), 0x2}, + {IPv4(224,0,0,103), 0x3}, + {IPv4(224,0,0,104), 0x4}, + {IPv4(224,0,0,105), 0x5}, + {IPv4(224,0,0,106), 0x6}, + {IPv4(224,0,0,107), 0x7}, + {IPv4(224,0,0,108), 0x8}, + {IPv4(224,0,0,109), 0x9}, + {IPv4(224,0,0,110), 0xA}, + {IPv4(224,0,0,111), 0xB}, + {IPv4(224,0,0,112), 0xC}, + {IPv4(224,0,0,113), 0xD}, + {IPv4(224,0,0,114), 0xE}, + {IPv4(224,0,0,115), 0xF}, +}; + +#define N_MCAST_GROUPS \ + (sizeof (mcast_group_table) / sizeof (mcast_group_table[0])) + + +/* Send burst of packets on an output interface */ +static void +send_burst(struct lcore_queue_conf *qconf, uint8_t port) +{ + struct rte_mbuf **m_table; + uint16_t n, queueid; + int ret; + + queueid = qconf->tx_queue_id[port]; + m_table = (struct rte_mbuf **)qconf->tx_mbufs[port].m_table; + n = qconf->tx_mbufs[port].len; + + ret = rte_eth_tx_burst(port, queueid, m_table, n); + while (unlikely (ret < n)) { + rte_pktmbuf_free(m_table[ret]); + ret++; + } + + qconf->tx_mbufs[port].len = 0; +} + +/* Get number of bits set. */ +static inline uint32_t +bitcnt(uint32_t v) +{ + uint32_t n; + + for (n = 0; v != 0; v &= v - 1, n++) + ; + + return n; +} + +/** + * Create the output multicast packet based on the given input packet. + * There are two approaches for creating outgoing packet, though both + * are based on data zero-copy idea, they differ in few details: + * First one creates a clone of the input packet, e.g - walk though all + * segments of the input packet, and for each of them create a new packet + * mbuf and attach that new mbuf to the segment (refer to rte_pktmbuf_clone() + * for more details). Then new mbuf is allocated for the packet header + * and is prepended to the 'clone' mbuf. + * Second approach doesn't make a clone, it just increment refcnt for all + * input packet segments. Then it allocates new mbuf for the packet header + * and prepends it to the input packet. + * Basically first approach reuses only input packet's data, but creates + * it's own copy of packet's metadata. Second approach reuses both input's + * packet data and metadata. + * The advantage of first approach - is that each outgoing packet has it's + * own copy of metadata, so we can safely modify data pointer of the + * input packet. That allows us to skip creation if the output packet for + * the last destination port, but instead modify input packet's header inplace, + * e.g: for N destination ports we need to invoke mcast_out_pkt (N-1) times. + * The advantage of second approach - less work for each outgoing packet, + * e.g: we skip "clone" operation completely. Though it comes with a price - + * input packet's metadata has to be intact. So for N destination ports we + * need to invoke mcast_out_pkt N times. + * So for small number of outgoing ports (and segments in the input packet) + * first approach will be faster. + * As number of outgoing ports (and/or input segments) will grow, + * second way will become more preferable. + * + * @param pkt + * Input packet mbuf. + * @param use_clone + * Control which of the two approaches described above should be used: + * - 0 - use second approach: + * Don't "clone" input packet. + * Prepend new header directly to the input packet + * - 1 - use first approach: + * Make a "clone" of input packet first. + * Prepend new header to the clone of the input packet + * @return + * - The pointer to the new outgoing packet. + * - NULL if operation failed. + */ +static inline struct rte_mbuf * +mcast_out_pkt(struct rte_mbuf *pkt, int use_clone) +{ + struct rte_mbuf *hdr; + + /* Create new mbuf for the header. */ + if (unlikely ((hdr = rte_pktmbuf_alloc(header_pool)) == NULL)) + return NULL; + + /* If requested, then make a new clone packet. */ + if (use_clone != 0 && + unlikely ((pkt = rte_pktmbuf_clone(pkt, clone_pool)) == NULL)) { + rte_pktmbuf_free(hdr); + return NULL; + } + + /* prepend new header */ + hdr->next = pkt; + + + /* update header's fields */ + hdr->pkt_len = (uint16_t)(hdr->data_len + pkt->pkt_len); + hdr->nb_segs = (uint8_t)(pkt->nb_segs + 1); + + /* copy metadata from source packet*/ + hdr->port = pkt->port; + hdr->vlan_tci = pkt->vlan_tci; + hdr->vlan_tci_outer = pkt->vlan_tci_outer; + hdr->tx_offload = pkt->tx_offload; + hdr->hash = pkt->hash; + + hdr->ol_flags = pkt->ol_flags; + + __rte_mbuf_sanity_check(hdr, 1); + return hdr; +} + +/* + * Write new Ethernet header to the outgoing packet, + * and put it into the outgoing queue for the given port. + */ +static inline void +mcast_send_pkt(struct rte_mbuf *pkt, struct ether_addr *dest_addr, + struct lcore_queue_conf *qconf, uint8_t port) +{ + struct ether_hdr *ethdr; + uint16_t len; + + /* Construct Ethernet header. */ + ethdr = (struct ether_hdr *)rte_pktmbuf_prepend(pkt, (uint16_t)sizeof(*ethdr)); + RTE_MBUF_ASSERT(ethdr != NULL); + + ether_addr_copy(dest_addr, ðdr->d_addr); + ether_addr_copy(&ports_eth_addr[port], ðdr->s_addr); + ethdr->ether_type = rte_be_to_cpu_16(ETHER_TYPE_IPv4); + + /* Put new packet into the output queue */ + len = qconf->tx_mbufs[port].len; + qconf->tx_mbufs[port].m_table[len] = pkt; + qconf->tx_mbufs[port].len = ++len; + + /* Transmit packets */ + if (unlikely(MAX_PKT_BURST == len)) + send_burst(qconf, port); +} + +/* Multicast forward of the input packet */ +static inline void +mcast_forward(struct rte_mbuf *m, struct lcore_queue_conf *qconf) +{ + struct rte_mbuf *mc; + struct ipv4_hdr *iphdr; + uint32_t dest_addr, port_mask, port_num, use_clone; + int32_t hash; + uint8_t port; + union { + uint64_t as_int; + struct ether_addr as_addr; + } dst_eth_addr; + + /* Remove the Ethernet header from the input packet */ + iphdr = (struct ipv4_hdr *)rte_pktmbuf_adj(m, (uint16_t)sizeof(struct ether_hdr)); + RTE_MBUF_ASSERT(iphdr != NULL); + + dest_addr = rte_be_to_cpu_32(iphdr->dst_addr); + + /* + * Check that it is a valid multicast address and + * we have some active ports assigned to it. + */ + if(!IS_IPV4_MCAST(dest_addr) || + (hash = rte_fbk_hash_lookup(mcast_hash, dest_addr)) <= 0 || + (port_mask = hash & enabled_port_mask) == 0) { + rte_pktmbuf_free(m); + return; + } + + /* Calculate number of destination ports. */ + port_num = bitcnt(port_mask); + + /* Should we use rte_pktmbuf_clone() or not. */ + use_clone = (port_num <= MCAST_CLONE_PORTS && + m->nb_segs <= MCAST_CLONE_SEGS); + + /* Mark all packet's segments as referenced port_num times */ + if (use_clone == 0) + rte_pktmbuf_refcnt_update(m, (uint16_t)port_num); + + /* construct destination ethernet address */ + dst_eth_addr.as_int = ETHER_ADDR_FOR_IPV4_MCAST(dest_addr); + + for (port = 0; use_clone != port_mask; port_mask >>= 1, port++) { + + /* Prepare output packet and send it out. */ + if ((port_mask & 1) != 0) { + if (likely ((mc = mcast_out_pkt(m, use_clone)) != NULL)) + mcast_send_pkt(mc, &dst_eth_addr.as_addr, + qconf, port); + else if (use_clone == 0) + rte_pktmbuf_free(m); + } + } + + /* + * If we making clone packets, then, for the last destination port, + * we can overwrite input packet's metadata. + */ + if (use_clone != 0) + mcast_send_pkt(m, &dst_eth_addr.as_addr, qconf, port); + else + rte_pktmbuf_free(m); +} + +/* Send burst of outgoing packet, if timeout expires. */ +static inline void +send_timeout_burst(struct lcore_queue_conf *qconf) +{ + uint64_t cur_tsc; + uint8_t portid; + const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US; + + cur_tsc = rte_rdtsc(); + if (likely (cur_tsc < qconf->tx_tsc + drain_tsc)) + return; + + for (portid = 0; portid < MAX_PORTS; portid++) { + if (qconf->tx_mbufs[portid].len != 0) + send_burst(qconf, portid); + } + qconf->tx_tsc = cur_tsc; +} + +/* main processing loop */ +static int +main_loop(__rte_unused void *dummy) +{ + struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; + unsigned lcore_id; + int i, j, nb_rx; + uint8_t portid; + struct lcore_queue_conf *qconf; + + lcore_id = rte_lcore_id(); + qconf = &lcore_queue_conf[lcore_id]; + + + if (qconf->n_rx_queue == 0) { + RTE_LOG(INFO, IPv4_MULTICAST, "lcore %u has nothing to do\n", + lcore_id); + return 0; + } + + RTE_LOG(INFO, IPv4_MULTICAST, "entering main loop on lcore %u\n", + lcore_id); + + for (i = 0; i < qconf->n_rx_queue; i++) { + + portid = qconf->rx_queue_list[i]; + RTE_LOG(INFO, IPv4_MULTICAST, " -- lcoreid=%u portid=%d\n", + lcore_id, (int) portid); + } + + while (1) { + + /* + * Read packet from RX queues + */ + for (i = 0; i < qconf->n_rx_queue; i++) { + + portid = qconf->rx_queue_list[i]; + nb_rx = rte_eth_rx_burst(portid, 0, pkts_burst, + MAX_PKT_BURST); + + /* Prefetch first packets */ + for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) { + rte_prefetch0(rte_pktmbuf_mtod( + pkts_burst[j], void *)); + } + + /* Prefetch and forward already prefetched packets */ + for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) { + rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[ + j + PREFETCH_OFFSET], void *)); + mcast_forward(pkts_burst[j], qconf); + } + + /* Forward remaining prefetched packets */ + for (; j < nb_rx; j++) { + mcast_forward(pkts_burst[j], qconf); + } + } + + /* Send out packets from TX queues */ + send_timeout_burst(qconf); + } +} + +/* display usage */ +static void +print_usage(const char *prgname) +{ + printf("%s [EAL options] -- -p PORTMASK [-q NQ]\n" + " -p PORTMASK: hexadecimal bitmask of ports to configure\n" + " -q NQ: number of queue (=ports) per lcore (default is 1)\n", + prgname); +} + +static uint32_t +parse_portmask(const char *portmask) +{ + char *end = NULL; + unsigned long pm; + + /* parse hexadecimal string */ + pm = strtoul(portmask, &end, 16); + if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0')) + return 0; + + return (uint32_t)pm; +} + +static int +parse_nqueue(const char *q_arg) +{ + char *end = NULL; + unsigned long n; + + /* parse numerical string */ + errno = 0; + n = strtoul(q_arg, &end, 0); + if (errno != 0 || end == NULL || *end != '\0' || + n == 0 || n >= MAX_RX_QUEUE_PER_LCORE) + return -1; + + return n; +} + +/* Parse the argument given in the command line of the application */ +static int +parse_args(int argc, char **argv) +{ + int opt, ret; + char **argvopt; + int option_index; + char *prgname = argv[0]; + static struct option lgopts[] = { + {NULL, 0, 0, 0} + }; + + argvopt = argv; + + while ((opt = getopt_long(argc, argvopt, "p:q:", + lgopts, &option_index)) != EOF) { + + switch (opt) { + /* portmask */ + case 'p': + enabled_port_mask = parse_portmask(optarg); + if (enabled_port_mask == 0) { + printf("invalid portmask\n"); + print_usage(prgname); + return -1; + } + break; + + /* nqueue */ + case 'q': + rx_queue_per_lcore = parse_nqueue(optarg); + if (rx_queue_per_lcore < 0) { + printf("invalid queue number\n"); + print_usage(prgname); + return -1; + } + break; + + default: + print_usage(prgname); + return -1; + } + } + + if (optind >= 0) + argv[optind-1] = prgname; + + ret = optind-1; + optind = 0; /* reset getopt lib */ + return ret; +} + +static void +print_ethaddr(const char *name, struct ether_addr *eth_addr) +{ + char buf[ETHER_ADDR_FMT_SIZE]; + ether_format_addr(buf, ETHER_ADDR_FMT_SIZE, eth_addr); + printf("%s%s", name, buf); +} + +static int +init_mcast_hash(void) +{ + uint32_t i; + + mcast_hash_params.socket_id = rte_socket_id(); + mcast_hash = rte_fbk_hash_create(&mcast_hash_params); + if (mcast_hash == NULL){ + return -1; + } + + for (i = 0; i < N_MCAST_GROUPS; i ++){ + if (rte_fbk_hash_add_key(mcast_hash, + mcast_group_table[i].ip, + mcast_group_table[i].port_mask) < 0) { + return -1; + } + } + + return 0; +} + +/* Check the link status of all ports in up to 9s, and print them finally */ +static void +check_all_ports_link_status(uint8_t port_num, uint32_t port_mask) +{ +#define CHECK_INTERVAL 100 /* 100ms */ +#define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */ + uint8_t portid, count, all_ports_up, print_flag = 0; + struct rte_eth_link link; + + printf("\nChecking link status"); + fflush(stdout); + for (count = 0; count <= MAX_CHECK_TIME; count++) { + all_ports_up = 1; + for (portid = 0; portid < port_num; portid++) { + if ((port_mask & (1 << portid)) == 0) + continue; + memset(&link, 0, sizeof(link)); + rte_eth_link_get_nowait(portid, &link); + /* print link status if flag set */ + if (print_flag == 1) { + if (link.link_status) + printf("Port %d Link Up - speed %u " + "Mbps - %s\n", (uint8_t)portid, + (unsigned)link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? + ("full-duplex") : ("half-duplex\n")); + else + printf("Port %d Link Down\n", + (uint8_t)portid); + continue; + } + /* clear all_ports_up flag if any link down */ + if (link.link_status == ETH_LINK_DOWN) { + all_ports_up = 0; + break; + } + } + /* after finally printing all link status, get out */ + if (print_flag == 1) + break; + + if (all_ports_up == 0) { + printf("."); + fflush(stdout); + rte_delay_ms(CHECK_INTERVAL); + } + + /* set the print_flag if all ports up or timeout */ + if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) { + print_flag = 1; + printf("done\n"); + } + } +} + +int +main(int argc, char **argv) +{ + struct lcore_queue_conf *qconf; + struct rte_eth_dev_info dev_info; + struct rte_eth_txconf *txconf; + int ret; + uint16_t queueid; + unsigned lcore_id = 0, rx_lcore_id = 0; + uint32_t n_tx_queue, nb_lcores; + uint8_t portid; + + /* init EAL */ + ret = rte_eal_init(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Invalid EAL parameters\n"); + argc -= ret; + argv += ret; + + /* parse application arguments (after the EAL ones) */ + ret = parse_args(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Invalid IPV4_MULTICAST parameters\n"); + + /* create the mbuf pools */ + packet_pool = rte_pktmbuf_pool_create("packet_pool", NB_PKT_MBUF, 32, + 0, PKT_MBUF_DATA_SIZE, rte_socket_id()); + + if (packet_pool == NULL) + rte_exit(EXIT_FAILURE, "Cannot init packet mbuf pool\n"); + + header_pool = rte_pktmbuf_pool_create("header_pool", NB_HDR_MBUF, 32, + 0, HDR_MBUF_DATA_SIZE, rte_socket_id()); + + if (header_pool == NULL) + rte_exit(EXIT_FAILURE, "Cannot init header mbuf pool\n"); + + clone_pool = rte_pktmbuf_pool_create("clone_pool", NB_CLONE_MBUF, 32, + 0, 0, rte_socket_id()); + + if (clone_pool == NULL) + rte_exit(EXIT_FAILURE, "Cannot init clone mbuf pool\n"); + + nb_ports = rte_eth_dev_count(); + if (nb_ports == 0) + rte_exit(EXIT_FAILURE, "No physical ports!\n"); + if (nb_ports > MAX_PORTS) + nb_ports = MAX_PORTS; + + nb_lcores = rte_lcore_count(); + + /* initialize all ports */ + for (portid = 0; portid < nb_ports; portid++) { + /* skip ports that are not enabled */ + if ((enabled_port_mask & (1 << portid)) == 0) { + printf("Skipping disabled port %d\n", portid); + continue; + } + + qconf = &lcore_queue_conf[rx_lcore_id]; + + /* get the lcore_id for this port */ + while (rte_lcore_is_enabled(rx_lcore_id) == 0 || + qconf->n_rx_queue == (unsigned)rx_queue_per_lcore) { + + rx_lcore_id ++; + qconf = &lcore_queue_conf[rx_lcore_id]; + + if (rx_lcore_id >= RTE_MAX_LCORE) + rte_exit(EXIT_FAILURE, "Not enough cores\n"); + } + qconf->rx_queue_list[qconf->n_rx_queue] = portid; + qconf->n_rx_queue++; + + /* init port */ + printf("Initializing port %d on lcore %u... ", portid, + rx_lcore_id); + fflush(stdout); + + n_tx_queue = nb_lcores; + if (n_tx_queue > MAX_TX_QUEUE_PER_PORT) + n_tx_queue = MAX_TX_QUEUE_PER_PORT; + ret = rte_eth_dev_configure(portid, 1, (uint16_t)n_tx_queue, + &port_conf); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Cannot configure device: err=%d, port=%d\n", + ret, portid); + + rte_eth_macaddr_get(portid, &ports_eth_addr[portid]); + print_ethaddr(" Address:", &ports_eth_addr[portid]); + printf(", "); + + /* init one RX queue */ + queueid = 0; + printf("rxq=%hu ", queueid); + fflush(stdout); + ret = rte_eth_rx_queue_setup(portid, queueid, nb_rxd, + rte_eth_dev_socket_id(portid), + NULL, + packet_pool); + if (ret < 0) + rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup: err=%d, port=%d\n", + ret, portid); + + /* init one TX queue per couple (lcore,port) */ + queueid = 0; + + RTE_LCORE_FOREACH(lcore_id) { + if (rte_lcore_is_enabled(lcore_id) == 0) + continue; + printf("txq=%u,%hu ", lcore_id, queueid); + fflush(stdout); + + rte_eth_dev_info_get(portid, &dev_info); + txconf = &dev_info.default_txconf; + txconf->txq_flags = 0; + ret = rte_eth_tx_queue_setup(portid, queueid, nb_txd, + rte_lcore_to_socket_id(lcore_id), txconf); + if (ret < 0) + rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup: err=%d, " + "port=%d\n", ret, portid); + + qconf = &lcore_queue_conf[lcore_id]; + qconf->tx_queue_id[portid] = queueid; + queueid++; + } + + /* Start device */ + ret = rte_eth_dev_start(portid); + if (ret < 0) + rte_exit(EXIT_FAILURE, "rte_eth_dev_start: err=%d, port=%d\n", + ret, portid); + + printf("done:\n"); + } + + check_all_ports_link_status(nb_ports, enabled_port_mask); + + /* initialize the multicast hash */ + int retval = init_mcast_hash(); + if (retval != 0) + rte_exit(EXIT_FAILURE, "Cannot build the multicast hash\n"); + + /* launch per-lcore init on every lcore */ + rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER); + RTE_LCORE_FOREACH_SLAVE(lcore_id) { + if (rte_eal_wait_lcore(lcore_id) < 0) + return -1; + } + + return 0; +} diff --git a/examples/kni/Makefile b/examples/kni/Makefile new file mode 100644 index 00000000..6800dd5c --- /dev/null +++ b/examples/kni/Makefile @@ -0,0 +1,55 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overriden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +ifneq ($(CONFIG_RTE_EXEC_ENV),"linuxapp") +$(error This application can only operate in a linuxapp environment, \ +please change the definition of the RTE_TARGET environment variable) +endif + +# binary name +APP = kni + +# all source are stored in SRCS-y +SRCS-y := main.c + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) + +include $(RTE_SDK)/mk/rte.extapp.mk diff --git a/examples/kni/main.c b/examples/kni/main.c new file mode 100644 index 00000000..a5297f28 --- /dev/null +++ b/examples/kni/main.c @@ -0,0 +1,928 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <inttypes.h> +#include <string.h> +#include <sys/queue.h> +#include <stdarg.h> +#include <errno.h> +#include <getopt.h> + +#include <netinet/in.h> +#include <linux/if.h> +#include <linux/if_tun.h> +#include <fcntl.h> +#include <sys/ioctl.h> +#include <unistd.h> +#include <signal.h> + +#include <rte_common.h> +#include <rte_log.h> +#include <rte_memory.h> +#include <rte_memcpy.h> +#include <rte_memzone.h> +#include <rte_eal.h> +#include <rte_per_lcore.h> +#include <rte_launch.h> +#include <rte_atomic.h> +#include <rte_lcore.h> +#include <rte_branch_prediction.h> +#include <rte_interrupts.h> +#include <rte_pci.h> +#include <rte_debug.h> +#include <rte_ether.h> +#include <rte_ethdev.h> +#include <rte_ring.h> +#include <rte_log.h> +#include <rte_mempool.h> +#include <rte_mbuf.h> +#include <rte_string_fns.h> +#include <rte_cycles.h> +#include <rte_malloc.h> +#include <rte_kni.h> + +/* Macros for printing using RTE_LOG */ +#define RTE_LOGTYPE_APP RTE_LOGTYPE_USER1 + +/* Max size of a single packet */ +#define MAX_PACKET_SZ 2048 + +/* Size of the data buffer in each mbuf */ +#define MBUF_DATA_SZ (MAX_PACKET_SZ + RTE_PKTMBUF_HEADROOM) + +/* Number of mbufs in mempool that is created */ +#define NB_MBUF (8192 * 16) + +/* How many packets to attempt to read from NIC in one go */ +#define PKT_BURST_SZ 32 + +/* How many objects (mbufs) to keep in per-lcore mempool cache */ +#define MEMPOOL_CACHE_SZ PKT_BURST_SZ + +/* Number of RX ring descriptors */ +#define NB_RXD 128 + +/* Number of TX ring descriptors */ +#define NB_TXD 512 + +/* Total octets in ethernet header */ +#define KNI_ENET_HEADER_SIZE 14 + +/* Total octets in the FCS */ +#define KNI_ENET_FCS_SIZE 4 + +#define KNI_US_PER_SECOND 1000000 +#define KNI_SECOND_PER_DAY 86400 + +#define KNI_MAX_KTHREAD 32 +/* + * Structure of port parameters + */ +struct kni_port_params { + uint8_t port_id;/* Port ID */ + unsigned lcore_rx; /* lcore ID for RX */ + unsigned lcore_tx; /* lcore ID for TX */ + uint32_t nb_lcore_k; /* Number of lcores for KNI multi kernel threads */ + uint32_t nb_kni; /* Number of KNI devices to be created */ + unsigned lcore_k[KNI_MAX_KTHREAD]; /* lcore ID list for kthreads */ + struct rte_kni *kni[KNI_MAX_KTHREAD]; /* KNI context pointers */ +} __rte_cache_aligned; + +static struct kni_port_params *kni_port_params_array[RTE_MAX_ETHPORTS]; + + +/* Options for configuring ethernet port */ +static struct rte_eth_conf port_conf = { + .rxmode = { + .header_split = 0, /* Header Split disabled */ + .hw_ip_checksum = 0, /* IP checksum offload disabled */ + .hw_vlan_filter = 0, /* VLAN filtering disabled */ + .jumbo_frame = 0, /* Jumbo Frame Support disabled */ + .hw_strip_crc = 0, /* CRC stripped by hardware */ + }, + .txmode = { + .mq_mode = ETH_MQ_TX_NONE, + }, +}; + +/* Mempool for mbufs */ +static struct rte_mempool * pktmbuf_pool = NULL; + +/* Mask of enabled ports */ +static uint32_t ports_mask = 0; +/* Ports set in promiscuous mode off by default. */ +static int promiscuous_on = 0; + +/* Structure type for recording kni interface specific stats */ +struct kni_interface_stats { + /* number of pkts received from NIC, and sent to KNI */ + uint64_t rx_packets; + + /* number of pkts received from NIC, but failed to send to KNI */ + uint64_t rx_dropped; + + /* number of pkts received from KNI, and sent to NIC */ + uint64_t tx_packets; + + /* number of pkts received from KNI, but failed to send to NIC */ + uint64_t tx_dropped; +}; + +/* kni device statistics array */ +static struct kni_interface_stats kni_stats[RTE_MAX_ETHPORTS]; + +static int kni_change_mtu(uint8_t port_id, unsigned new_mtu); +static int kni_config_network_interface(uint8_t port_id, uint8_t if_up); + +static rte_atomic32_t kni_stop = RTE_ATOMIC32_INIT(0); + +/* Print out statistics on packets handled */ +static void +print_stats(void) +{ + uint8_t i; + + printf("\n**KNI example application statistics**\n" + "====== ============== ============ ============ ============ ============\n" + " Port Lcore(RX/TX) rx_packets rx_dropped tx_packets tx_dropped\n" + "------ -------------- ------------ ------------ ------------ ------------\n"); + for (i = 0; i < RTE_MAX_ETHPORTS; i++) { + if (!kni_port_params_array[i]) + continue; + + printf("%7d %10u/%2u %13"PRIu64" %13"PRIu64" %13"PRIu64" " + "%13"PRIu64"\n", i, + kni_port_params_array[i]->lcore_rx, + kni_port_params_array[i]->lcore_tx, + kni_stats[i].rx_packets, + kni_stats[i].rx_dropped, + kni_stats[i].tx_packets, + kni_stats[i].tx_dropped); + } + printf("====== ============== ============ ============ ============ ============\n"); +} + +/* Custom handling of signals to handle stats and kni processing */ +static void +signal_handler(int signum) +{ + /* When we receive a USR1 signal, print stats */ + if (signum == SIGUSR1) { + print_stats(); + } + + /* When we receive a USR2 signal, reset stats */ + if (signum == SIGUSR2) { + memset(&kni_stats, 0, sizeof(kni_stats)); + printf("\n**Statistics have been reset**\n"); + return; + } + + /* When we receive a RTMIN or SIGINT signal, stop kni processing */ + if (signum == SIGRTMIN || signum == SIGINT){ + printf("SIGRTMIN is received, and the KNI processing is " + "going to stop\n"); + rte_atomic32_inc(&kni_stop); + return; + } +} + +static void +kni_burst_free_mbufs(struct rte_mbuf **pkts, unsigned num) +{ + unsigned i; + + if (pkts == NULL) + return; + + for (i = 0; i < num; i++) { + rte_pktmbuf_free(pkts[i]); + pkts[i] = NULL; + } +} + +/** + * Interface to burst rx and enqueue mbufs into rx_q + */ +static void +kni_ingress(struct kni_port_params *p) +{ + uint8_t i, port_id; + unsigned nb_rx, num; + uint32_t nb_kni; + struct rte_mbuf *pkts_burst[PKT_BURST_SZ]; + + if (p == NULL) + return; + + nb_kni = p->nb_kni; + port_id = p->port_id; + for (i = 0; i < nb_kni; i++) { + /* Burst rx from eth */ + nb_rx = rte_eth_rx_burst(port_id, 0, pkts_burst, PKT_BURST_SZ); + if (unlikely(nb_rx > PKT_BURST_SZ)) { + RTE_LOG(ERR, APP, "Error receiving from eth\n"); + return; + } + /* Burst tx to kni */ + num = rte_kni_tx_burst(p->kni[i], pkts_burst, nb_rx); + kni_stats[port_id].rx_packets += num; + + rte_kni_handle_request(p->kni[i]); + if (unlikely(num < nb_rx)) { + /* Free mbufs not tx to kni interface */ + kni_burst_free_mbufs(&pkts_burst[num], nb_rx - num); + kni_stats[port_id].rx_dropped += nb_rx - num; + } + } +} + +/** + * Interface to dequeue mbufs from tx_q and burst tx + */ +static void +kni_egress(struct kni_port_params *p) +{ + uint8_t i, port_id; + unsigned nb_tx, num; + uint32_t nb_kni; + struct rte_mbuf *pkts_burst[PKT_BURST_SZ]; + + if (p == NULL) + return; + + nb_kni = p->nb_kni; + port_id = p->port_id; + for (i = 0; i < nb_kni; i++) { + /* Burst rx from kni */ + num = rte_kni_rx_burst(p->kni[i], pkts_burst, PKT_BURST_SZ); + if (unlikely(num > PKT_BURST_SZ)) { + RTE_LOG(ERR, APP, "Error receiving from KNI\n"); + return; + } + /* Burst tx to eth */ + nb_tx = rte_eth_tx_burst(port_id, 0, pkts_burst, (uint16_t)num); + kni_stats[port_id].tx_packets += nb_tx; + if (unlikely(nb_tx < num)) { + /* Free mbufs not tx to NIC */ + kni_burst_free_mbufs(&pkts_burst[nb_tx], num - nb_tx); + kni_stats[port_id].tx_dropped += num - nb_tx; + } + } +} + +static int +main_loop(__rte_unused void *arg) +{ + uint8_t i, nb_ports = rte_eth_dev_count(); + int32_t f_stop; + const unsigned lcore_id = rte_lcore_id(); + enum lcore_rxtx { + LCORE_NONE, + LCORE_RX, + LCORE_TX, + LCORE_MAX + }; + enum lcore_rxtx flag = LCORE_NONE; + + nb_ports = (uint8_t)(nb_ports < RTE_MAX_ETHPORTS ? + nb_ports : RTE_MAX_ETHPORTS); + for (i = 0; i < nb_ports; i++) { + if (!kni_port_params_array[i]) + continue; + if (kni_port_params_array[i]->lcore_rx == (uint8_t)lcore_id) { + flag = LCORE_RX; + break; + } else if (kni_port_params_array[i]->lcore_tx == + (uint8_t)lcore_id) { + flag = LCORE_TX; + break; + } + } + + if (flag == LCORE_RX) { + RTE_LOG(INFO, APP, "Lcore %u is reading from port %d\n", + kni_port_params_array[i]->lcore_rx, + kni_port_params_array[i]->port_id); + while (1) { + f_stop = rte_atomic32_read(&kni_stop); + if (f_stop) + break; + kni_ingress(kni_port_params_array[i]); + } + } else if (flag == LCORE_TX) { + RTE_LOG(INFO, APP, "Lcore %u is writing to port %d\n", + kni_port_params_array[i]->lcore_tx, + kni_port_params_array[i]->port_id); + while (1) { + f_stop = rte_atomic32_read(&kni_stop); + if (f_stop) + break; + kni_egress(kni_port_params_array[i]); + } + } else + RTE_LOG(INFO, APP, "Lcore %u has nothing to do\n", lcore_id); + + return 0; +} + +/* Display usage instructions */ +static void +print_usage(const char *prgname) +{ + RTE_LOG(INFO, APP, "\nUsage: %s [EAL options] -- -p PORTMASK -P " + "[--config (port,lcore_rx,lcore_tx,lcore_kthread...)" + "[,(port,lcore_rx,lcore_tx,lcore_kthread...)]]\n" + " -p PORTMASK: hex bitmask of ports to use\n" + " -P : enable promiscuous mode\n" + " --config (port,lcore_rx,lcore_tx,lcore_kthread...): " + "port and lcore configurations\n", + prgname); +} + +/* Convert string to unsigned number. 0 is returned if error occurs */ +static uint32_t +parse_unsigned(const char *portmask) +{ + char *end = NULL; + unsigned long num; + + num = strtoul(portmask, &end, 16); + if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0')) + return 0; + + return (uint32_t)num; +} + +static void +print_config(void) +{ + uint32_t i, j; + struct kni_port_params **p = kni_port_params_array; + + for (i = 0; i < RTE_MAX_ETHPORTS; i++) { + if (!p[i]) + continue; + RTE_LOG(DEBUG, APP, "Port ID: %d\n", p[i]->port_id); + RTE_LOG(DEBUG, APP, "Rx lcore ID: %u, Tx lcore ID: %u\n", + p[i]->lcore_rx, p[i]->lcore_tx); + for (j = 0; j < p[i]->nb_lcore_k; j++) + RTE_LOG(DEBUG, APP, "Kernel thread lcore ID: %u\n", + p[i]->lcore_k[j]); + } +} + +static int +parse_config(const char *arg) +{ + const char *p, *p0 = arg; + char s[256], *end; + unsigned size; + enum fieldnames { + FLD_PORT = 0, + FLD_LCORE_RX, + FLD_LCORE_TX, + _NUM_FLD = KNI_MAX_KTHREAD + 3, + }; + int i, j, nb_token; + char *str_fld[_NUM_FLD]; + unsigned long int_fld[_NUM_FLD]; + uint8_t port_id, nb_kni_port_params = 0; + + memset(&kni_port_params_array, 0, sizeof(kni_port_params_array)); + while (((p = strchr(p0, '(')) != NULL) && + nb_kni_port_params < RTE_MAX_ETHPORTS) { + p++; + if ((p0 = strchr(p, ')')) == NULL) + goto fail; + size = p0 - p; + if (size >= sizeof(s)) { + printf("Invalid config parameters\n"); + goto fail; + } + snprintf(s, sizeof(s), "%.*s", size, p); + nb_token = rte_strsplit(s, sizeof(s), str_fld, _NUM_FLD, ','); + if (nb_token <= FLD_LCORE_TX) { + printf("Invalid config parameters\n"); + goto fail; + } + for (i = 0; i < nb_token; i++) { + errno = 0; + int_fld[i] = strtoul(str_fld[i], &end, 0); + if (errno != 0 || end == str_fld[i]) { + printf("Invalid config parameters\n"); + goto fail; + } + } + + i = 0; + port_id = (uint8_t)int_fld[i++]; + if (port_id >= RTE_MAX_ETHPORTS) { + printf("Port ID %d could not exceed the maximum %d\n", + port_id, RTE_MAX_ETHPORTS); + goto fail; + } + if (kni_port_params_array[port_id]) { + printf("Port %d has been configured\n", port_id); + goto fail; + } + kni_port_params_array[port_id] = + rte_zmalloc("KNI_port_params", + sizeof(struct kni_port_params), RTE_CACHE_LINE_SIZE); + kni_port_params_array[port_id]->port_id = port_id; + kni_port_params_array[port_id]->lcore_rx = + (uint8_t)int_fld[i++]; + kni_port_params_array[port_id]->lcore_tx = + (uint8_t)int_fld[i++]; + if (kni_port_params_array[port_id]->lcore_rx >= RTE_MAX_LCORE || + kni_port_params_array[port_id]->lcore_tx >= RTE_MAX_LCORE) { + printf("lcore_rx %u or lcore_tx %u ID could not " + "exceed the maximum %u\n", + kni_port_params_array[port_id]->lcore_rx, + kni_port_params_array[port_id]->lcore_tx, + (unsigned)RTE_MAX_LCORE); + goto fail; + } + for (j = 0; i < nb_token && j < KNI_MAX_KTHREAD; i++, j++) + kni_port_params_array[port_id]->lcore_k[j] = + (uint8_t)int_fld[i]; + kni_port_params_array[port_id]->nb_lcore_k = j; + } + print_config(); + + return 0; + +fail: + for (i = 0; i < RTE_MAX_ETHPORTS; i++) { + if (kni_port_params_array[i]) { + rte_free(kni_port_params_array[i]); + kni_port_params_array[i] = NULL; + } + } + + return -1; +} + +static int +validate_parameters(uint32_t portmask) +{ + uint32_t i; + + if (!portmask) { + printf("No port configured in port mask\n"); + return -1; + } + + for (i = 0; i < RTE_MAX_ETHPORTS; i++) { + if (((portmask & (1 << i)) && !kni_port_params_array[i]) || + (!(portmask & (1 << i)) && kni_port_params_array[i])) + rte_exit(EXIT_FAILURE, "portmask is not consistent " + "to port ids specified in --config\n"); + + if (kni_port_params_array[i] && !rte_lcore_is_enabled(\ + (unsigned)(kni_port_params_array[i]->lcore_rx))) + rte_exit(EXIT_FAILURE, "lcore id %u for " + "port %d receiving not enabled\n", + kni_port_params_array[i]->lcore_rx, + kni_port_params_array[i]->port_id); + + if (kni_port_params_array[i] && !rte_lcore_is_enabled(\ + (unsigned)(kni_port_params_array[i]->lcore_tx))) + rte_exit(EXIT_FAILURE, "lcore id %u for " + "port %d transmitting not enabled\n", + kni_port_params_array[i]->lcore_tx, + kni_port_params_array[i]->port_id); + + } + + return 0; +} + +#define CMDLINE_OPT_CONFIG "config" + +/* Parse the arguments given in the command line of the application */ +static int +parse_args(int argc, char **argv) +{ + int opt, longindex, ret = 0; + const char *prgname = argv[0]; + static struct option longopts[] = { + {CMDLINE_OPT_CONFIG, required_argument, NULL, 0}, + {NULL, 0, NULL, 0} + }; + + /* Disable printing messages within getopt() */ + opterr = 0; + + /* Parse command line */ + while ((opt = getopt_long(argc, argv, "p:P", longopts, + &longindex)) != EOF) { + switch (opt) { + case 'p': + ports_mask = parse_unsigned(optarg); + break; + case 'P': + promiscuous_on = 1; + break; + case 0: + if (!strncmp(longopts[longindex].name, + CMDLINE_OPT_CONFIG, + sizeof(CMDLINE_OPT_CONFIG))) { + ret = parse_config(optarg); + if (ret) { + printf("Invalid config\n"); + print_usage(prgname); + return -1; + } + } + break; + default: + print_usage(prgname); + rte_exit(EXIT_FAILURE, "Invalid option specified\n"); + } + } + + /* Check that options were parsed ok */ + if (validate_parameters(ports_mask) < 0) { + print_usage(prgname); + rte_exit(EXIT_FAILURE, "Invalid parameters\n"); + } + + return ret; +} + +/* Initialize KNI subsystem */ +static void +init_kni(void) +{ + unsigned int num_of_kni_ports = 0, i; + struct kni_port_params **params = kni_port_params_array; + + /* Calculate the maximum number of KNI interfaces that will be used */ + for (i = 0; i < RTE_MAX_ETHPORTS; i++) { + if (kni_port_params_array[i]) { + num_of_kni_ports += (params[i]->nb_lcore_k ? + params[i]->nb_lcore_k : 1); + } + } + + /* Invoke rte KNI init to preallocate the ports */ + rte_kni_init(num_of_kni_ports); +} + +/* Initialise a single port on an Ethernet device */ +static void +init_port(uint8_t port) +{ + int ret; + + /* Initialise device and RX/TX queues */ + RTE_LOG(INFO, APP, "Initialising port %u ...\n", (unsigned)port); + fflush(stdout); + ret = rte_eth_dev_configure(port, 1, 1, &port_conf); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Could not configure port%u (%d)\n", + (unsigned)port, ret); + + ret = rte_eth_rx_queue_setup(port, 0, NB_RXD, + rte_eth_dev_socket_id(port), NULL, pktmbuf_pool); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Could not setup up RX queue for " + "port%u (%d)\n", (unsigned)port, ret); + + ret = rte_eth_tx_queue_setup(port, 0, NB_TXD, + rte_eth_dev_socket_id(port), NULL); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Could not setup up TX queue for " + "port%u (%d)\n", (unsigned)port, ret); + + ret = rte_eth_dev_start(port); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Could not start port%u (%d)\n", + (unsigned)port, ret); + + if (promiscuous_on) + rte_eth_promiscuous_enable(port); +} + +/* Check the link status of all ports in up to 9s, and print them finally */ +static void +check_all_ports_link_status(uint8_t port_num, uint32_t port_mask) +{ +#define CHECK_INTERVAL 100 /* 100ms */ +#define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */ + uint8_t portid, count, all_ports_up, print_flag = 0; + struct rte_eth_link link; + + printf("\nChecking link status\n"); + fflush(stdout); + for (count = 0; count <= MAX_CHECK_TIME; count++) { + all_ports_up = 1; + for (portid = 0; portid < port_num; portid++) { + if ((port_mask & (1 << portid)) == 0) + continue; + memset(&link, 0, sizeof(link)); + rte_eth_link_get_nowait(portid, &link); + /* print link status if flag set */ + if (print_flag == 1) { + if (link.link_status) + printf("Port %d Link Up - speed %u " + "Mbps - %s\n", (uint8_t)portid, + (unsigned)link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? + ("full-duplex") : ("half-duplex\n")); + else + printf("Port %d Link Down\n", + (uint8_t)portid); + continue; + } + /* clear all_ports_up flag if any link down */ + if (link.link_status == ETH_LINK_DOWN) { + all_ports_up = 0; + break; + } + } + /* after finally printing all link status, get out */ + if (print_flag == 1) + break; + + if (all_ports_up == 0) { + printf("."); + fflush(stdout); + rte_delay_ms(CHECK_INTERVAL); + } + + /* set the print_flag if all ports up or timeout */ + if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) { + print_flag = 1; + printf("done\n"); + } + } +} + +/* Callback for request of changing MTU */ +static int +kni_change_mtu(uint8_t port_id, unsigned new_mtu) +{ + int ret; + struct rte_eth_conf conf; + + if (port_id >= rte_eth_dev_count()) { + RTE_LOG(ERR, APP, "Invalid port id %d\n", port_id); + return -EINVAL; + } + + RTE_LOG(INFO, APP, "Change MTU of port %d to %u\n", port_id, new_mtu); + + /* Stop specific port */ + rte_eth_dev_stop(port_id); + + memcpy(&conf, &port_conf, sizeof(conf)); + /* Set new MTU */ + if (new_mtu > ETHER_MAX_LEN) + conf.rxmode.jumbo_frame = 1; + else + conf.rxmode.jumbo_frame = 0; + + /* mtu + length of header + length of FCS = max pkt length */ + conf.rxmode.max_rx_pkt_len = new_mtu + KNI_ENET_HEADER_SIZE + + KNI_ENET_FCS_SIZE; + ret = rte_eth_dev_configure(port_id, 1, 1, &conf); + if (ret < 0) { + RTE_LOG(ERR, APP, "Fail to reconfigure port %d\n", port_id); + return ret; + } + + /* Restart specific port */ + ret = rte_eth_dev_start(port_id); + if (ret < 0) { + RTE_LOG(ERR, APP, "Fail to restart port %d\n", port_id); + return ret; + } + + return 0; +} + +/* Callback for request of configuring network interface up/down */ +static int +kni_config_network_interface(uint8_t port_id, uint8_t if_up) +{ + int ret = 0; + + if (port_id >= rte_eth_dev_count() || port_id >= RTE_MAX_ETHPORTS) { + RTE_LOG(ERR, APP, "Invalid port id %d\n", port_id); + return -EINVAL; + } + + RTE_LOG(INFO, APP, "Configure network interface of %d %s\n", + port_id, if_up ? "up" : "down"); + + if (if_up != 0) { /* Configure network interface up */ + rte_eth_dev_stop(port_id); + ret = rte_eth_dev_start(port_id); + } else /* Configure network interface down */ + rte_eth_dev_stop(port_id); + + if (ret < 0) + RTE_LOG(ERR, APP, "Failed to start port %d\n", port_id); + + return ret; +} + +static int +kni_alloc(uint8_t port_id) +{ + uint8_t i; + struct rte_kni *kni; + struct rte_kni_conf conf; + struct kni_port_params **params = kni_port_params_array; + + if (port_id >= RTE_MAX_ETHPORTS || !params[port_id]) + return -1; + + params[port_id]->nb_kni = params[port_id]->nb_lcore_k ? + params[port_id]->nb_lcore_k : 1; + + for (i = 0; i < params[port_id]->nb_kni; i++) { + /* Clear conf at first */ + memset(&conf, 0, sizeof(conf)); + if (params[port_id]->nb_lcore_k) { + snprintf(conf.name, RTE_KNI_NAMESIZE, + "vEth%u_%u", port_id, i); + conf.core_id = params[port_id]->lcore_k[i]; + conf.force_bind = 1; + } else + snprintf(conf.name, RTE_KNI_NAMESIZE, + "vEth%u", port_id); + conf.group_id = (uint16_t)port_id; + conf.mbuf_size = MAX_PACKET_SZ; + /* + * The first KNI device associated to a port + * is the master, for multiple kernel thread + * environment. + */ + if (i == 0) { + struct rte_kni_ops ops; + struct rte_eth_dev_info dev_info; + + memset(&dev_info, 0, sizeof(dev_info)); + rte_eth_dev_info_get(port_id, &dev_info); + conf.addr = dev_info.pci_dev->addr; + conf.id = dev_info.pci_dev->id; + + memset(&ops, 0, sizeof(ops)); + ops.port_id = port_id; + ops.change_mtu = kni_change_mtu; + ops.config_network_if = kni_config_network_interface; + + kni = rte_kni_alloc(pktmbuf_pool, &conf, &ops); + } else + kni = rte_kni_alloc(pktmbuf_pool, &conf, NULL); + + if (!kni) + rte_exit(EXIT_FAILURE, "Fail to create kni for " + "port: %d\n", port_id); + params[port_id]->kni[i] = kni; + } + + return 0; +} + +static int +kni_free_kni(uint8_t port_id) +{ + uint8_t i; + struct kni_port_params **p = kni_port_params_array; + + if (port_id >= RTE_MAX_ETHPORTS || !p[port_id]) + return -1; + + for (i = 0; i < p[port_id]->nb_kni; i++) { + rte_kni_release(p[port_id]->kni[i]); + p[port_id]->kni[i] = NULL; + } + rte_eth_dev_stop(port_id); + + return 0; +} + +/* Initialise ports/queues etc. and start main loop on each core */ +int +main(int argc, char** argv) +{ + int ret; + uint8_t nb_sys_ports, port; + unsigned i; + + /* Associate signal_hanlder function with USR signals */ + signal(SIGUSR1, signal_handler); + signal(SIGUSR2, signal_handler); + signal(SIGRTMIN, signal_handler); + signal(SIGINT, signal_handler); + + /* Initialise EAL */ + ret = rte_eal_init(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Could not initialise EAL (%d)\n", ret); + argc -= ret; + argv += ret; + + /* Parse application arguments (after the EAL ones) */ + ret = parse_args(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Could not parse input parameters\n"); + + /* Create the mbuf pool */ + pktmbuf_pool = rte_pktmbuf_pool_create("mbuf_pool", NB_MBUF, + MEMPOOL_CACHE_SZ, 0, MBUF_DATA_SZ, rte_socket_id()); + if (pktmbuf_pool == NULL) { + rte_exit(EXIT_FAILURE, "Could not initialise mbuf pool\n"); + return -1; + } + + /* Get number of ports found in scan */ + nb_sys_ports = rte_eth_dev_count(); + if (nb_sys_ports == 0) + rte_exit(EXIT_FAILURE, "No supported Ethernet device found\n"); + + /* Check if the configured port ID is valid */ + for (i = 0; i < RTE_MAX_ETHPORTS; i++) + if (kni_port_params_array[i] && i >= nb_sys_ports) + rte_exit(EXIT_FAILURE, "Configured invalid " + "port ID %u\n", i); + + /* Initialize KNI subsystem */ + init_kni(); + + /* Initialise each port */ + for (port = 0; port < nb_sys_ports; port++) { + /* Skip ports that are not enabled */ + if (!(ports_mask & (1 << port))) + continue; + init_port(port); + + if (port >= RTE_MAX_ETHPORTS) + rte_exit(EXIT_FAILURE, "Can not use more than " + "%d ports for kni\n", RTE_MAX_ETHPORTS); + + kni_alloc(port); + } + check_all_ports_link_status(nb_sys_ports, ports_mask); + + /* Launch per-lcore function on every lcore */ + rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER); + RTE_LCORE_FOREACH_SLAVE(i) { + if (rte_eal_wait_lcore(i) < 0) + return -1; + } + + /* Release resources */ + for (port = 0; port < nb_sys_ports; port++) { + if (!(ports_mask & (1 << port))) + continue; + kni_free_kni(port); + } +#ifdef RTE_LIBRTE_XEN_DOM0 + rte_kni_close(); +#endif + for (i = 0; i < RTE_MAX_ETHPORTS; i++) + if (kni_port_params_array[i]) { + rte_free(kni_port_params_array[i]); + kni_port_params_array[i] = NULL; + } + + return 0; +} diff --git a/examples/l2fwd-cat/Makefile b/examples/l2fwd-cat/Makefile new file mode 100644 index 00000000..ae921ade --- /dev/null +++ b/examples/l2fwd-cat/Makefile @@ -0,0 +1,70 @@ +# BSD LICENSE +# +# Copyright(c) 2016 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +ifeq ($(PQOS_INSTALL_PATH),) +$(error "Please define PQOS_INSTALL_PATH environment variable") +endif + +# Default target, can be overridden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +# Location of PQoS library and includes, +PQOS_LIBRARY_PATH = $(PQOS_INSTALL_PATH)/libpqos.a + +include $(RTE_SDK)/mk/rte.vars.mk + +# binary name +APP = l2fwd-cat + +# all source are stored in SRCS-y +SRCS-y := l2fwd-cat.c cat.c + +CFLAGS += $(WERROR_FLAGS) + +# workaround for a gcc bug with noreturn attribute +# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603 +ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y) +CFLAGS_main.o += -Wno-return-type +endif + +EXTRA_CFLAGS += -O3 -g -Wfatal-errors + +CFLAGS += -I$(PQOS_INSTALL_PATH)/../include +CFLAGS_cat.o := -D_GNU_SOURCE + +LDLIBS += -L$(PQOS_INSTALL_PATH) +LDLIBS += $(PQOS_LIBRARY_PATH) + +include $(RTE_SDK)/mk/rte.extapp.mk diff --git a/examples/l2fwd-cat/cat.c b/examples/l2fwd-cat/cat.c new file mode 100644 index 00000000..bad39305 --- /dev/null +++ b/examples/l2fwd-cat/cat.c @@ -0,0 +1,996 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <getopt.h> +#include <inttypes.h> +#include <limits.h> +#include <sched.h> +#include <signal.h> +#include <stdio.h> + +#include <rte_common.h> +#include <rte_memcpy.h> + +#include <pqos.h> + +#include "cat.h" + +#define BITS_PER_HEX 4 +#define PQOS_MAX_SOCKETS 8 +#define PQOS_MAX_SOCKET_CORES 64 +#define PQOS_MAX_CORES (PQOS_MAX_SOCKET_CORES * PQOS_MAX_SOCKETS) + +static const struct pqos_cap *m_cap; +static const struct pqos_cpuinfo *m_cpu; +static const struct pqos_capability *m_cap_l3ca; +static unsigned m_sockets[PQOS_MAX_SOCKETS]; +static unsigned m_sock_count; +static struct cat_config m_config[PQOS_MAX_CORES]; +static unsigned m_config_count; + +static unsigned +bits_count(uint64_t bitmask) +{ + unsigned count = 0; + + for (; bitmask != 0; count++) + bitmask &= bitmask - 1; + + return count; +} + +/* + * Parse elem, the elem could be single number/range or '(' ')' group + * 1) A single number elem, it's just a simple digit. e.g. 9 + * 2) A single range elem, two digits with a '-' between. e.g. 2-6 + * 3) A group elem, combines multiple 1) or 2) with '( )'. e.g (0,2-4,6) + * Within group elem, '-' used for a range separator; + * ',' used for a single number. + */ +static int +parse_set(const char *input, rte_cpuset_t *cpusetp) +{ + unsigned idx; + const char *str = input; + char *end = NULL; + unsigned min, max; + const unsigned num = PQOS_MAX_CORES; + + CPU_ZERO(cpusetp); + + while (isblank(*str)) + str++; + + /* only digit or left bracket is qualify for start point */ + if ((!isdigit(*str) && *str != '(') || *str == '\0') + return -1; + + /* process single number or single range of number */ + if (*str != '(') { + errno = 0; + idx = strtoul(str, &end, 10); + + if (errno || end == NULL || idx >= num) + return -1; + + while (isblank(*end)) + end++; + + min = idx; + max = idx; + if (*end == '-') { + /* process single <number>-<number> */ + end++; + while (isblank(*end)) + end++; + if (!isdigit(*end)) + return -1; + + errno = 0; + idx = strtoul(end, &end, 10); + if (errno || end == NULL || idx >= num) + return -1; + max = idx; + while (isblank(*end)) + end++; + if (*end != ',' && *end != '\0') + return -1; + } + + if (*end != ',' && *end != '\0' && *end != '@') + return -1; + + for (idx = RTE_MIN(min, max); idx <= RTE_MAX(min, max); + idx++) + CPU_SET(idx, cpusetp); + + return end - input; + } + + /* process set within bracket */ + str++; + while (isblank(*str)) + str++; + if (*str == '\0') + return -1; + + min = PQOS_MAX_CORES; + do { + + /* go ahead to the first digit */ + while (isblank(*str)) + str++; + if (!isdigit(*str)) + return -1; + + /* get the digit value */ + errno = 0; + idx = strtoul(str, &end, 10); + if (errno || end == NULL || idx >= num) + return -1; + + /* go ahead to separator '-',',' and ')' */ + while (isblank(*end)) + end++; + if (*end == '-') { + if (min == PQOS_MAX_CORES) + min = idx; + else /* avoid continuous '-' */ + return -1; + } else if ((*end == ',') || (*end == ')')) { + max = idx; + if (min == PQOS_MAX_CORES) + min = idx; + for (idx = RTE_MIN(min, max); idx <= RTE_MAX(min, max); + idx++) + CPU_SET(idx, cpusetp); + + min = PQOS_MAX_CORES; + } else + return -1; + + str = end + 1; + } while (*end != '\0' && *end != ')'); + + return str - input; +} + +/* Test if bitmask is contiguous */ +static int +is_contiguous(uint64_t bitmask) +{ + /* check if bitmask is contiguous */ + unsigned i = 0; + unsigned j = 0; + const unsigned max_idx = (sizeof(bitmask) * CHAR_BIT); + + if (bitmask == 0) + return 0; + + for (i = 0; i < max_idx; i++) { + if (((1ULL << i) & bitmask) != 0) + j++; + else if (j > 0) + break; + } + + if (bits_count(bitmask) != j) { + printf("PQOS: mask 0x%llx is not contiguous.\n", + (unsigned long long)bitmask); + return 0; + } + + return 1; +} + +/* + * The format pattern: --l3ca='<cbm@cpus>[,<(ccbm,dcbm)@cpus>...]' + * cbm could be a single mask or for a CDP enabled system, a group of two masks + * ("code cbm" and "data cbm") + * '(' and ')' are necessary if it's a group. + * cpus could be a single digit/range or a group. + * '(' and ')' are necessary if it's a group. + * + * e.g. '0x00F00@(1,3), 0x0FF00@(4-6), 0xF0000@7' + * - CPUs 1 and 3 share its 4 ways with CPUs 4, 5 and 6; + * - CPUs 4,5 and 6 share half (4 out of 8 ways) of its L3 with 1 and 3; + * - CPUs 4,5 and 6 have exclusive access to 4 out of 8 ways; + * - CPU 7 has exclusive access to all of its 4 ways; + * + * e.g. '(0x00C00,0x00300)@(1,3)' for a CDP enabled system + * - cpus 1 and 3 have access to 2 ways for code and 2 ways for data, + * code and data ways are not overlapping.; + */ +static int +parse_l3ca(const char *l3ca) +{ + unsigned idx = 0; + const char *cbm_start = NULL; + char *cbm_end = NULL; + const char *end = NULL; + int offset; + rte_cpuset_t cpuset; + uint64_t mask = 0; + uint64_t cmask = 0; + + if (l3ca == NULL) + goto err; + + /* Get cbm */ + do { + CPU_ZERO(&cpuset); + mask = 0; + cmask = 0; + + while (isblank(*l3ca)) + l3ca++; + + if (*l3ca == '\0') + goto err; + + /* record mask_set start point */ + cbm_start = l3ca; + + /* go across a complete bracket */ + if (*cbm_start == '(') { + l3ca += strcspn(l3ca, ")"); + if (*l3ca++ == '\0') + goto err; + } + + /* scan the separator '@', ','(next) or '\0'(finish) */ + l3ca += strcspn(l3ca, "@,"); + + if (*l3ca == '@') { + /* explicit assign cpu_set */ + offset = parse_set(l3ca + 1, &cpuset); + if (offset < 0 || CPU_COUNT(&cpuset) == 0) + goto err; + + end = l3ca + 1 + offset; + } else + goto err; + + if (*end != ',' && *end != '\0') + goto err; + + /* parse mask_set from start point */ + if (*cbm_start == '(') { + cbm_start++; + + while (isblank(*cbm_start)) + cbm_start++; + + if (!isxdigit(*cbm_start)) + goto err; + + errno = 0; + cmask = strtoul(cbm_start, &cbm_end, 16); + if (errno != 0 || cbm_end == NULL || cmask == 0) + goto err; + + while (isblank(*cbm_end)) + cbm_end++; + + if (*cbm_end != ',') + goto err; + + cbm_end++; + + while (isblank(*cbm_end)) + cbm_end++; + + if (!isxdigit(*cbm_end)) + goto err; + + errno = 0; + mask = strtoul(cbm_end, &cbm_end, 16); + if (errno != 0 || cbm_end == NULL || mask == 0) + goto err; + } else { + while (isblank(*cbm_start)) + cbm_start++; + + if (!isxdigit(*cbm_start)) + goto err; + + errno = 0; + mask = strtoul(cbm_start, &cbm_end, 16); + if (errno != 0 || cbm_end == NULL || mask == 0) + goto err; + + } + + if (mask == 0 || is_contiguous(mask) == 0) + goto err; + + if (cmask != 0 && is_contiguous(cmask) == 0) + goto err; + + rte_memcpy(&m_config[idx].cpumask, + &cpuset, sizeof(rte_cpuset_t)); + + if (cmask != 0) { + m_config[idx].cdp = 1; + m_config[idx].code_mask = cmask; + m_config[idx].data_mask = mask; + } else + m_config[idx].mask = mask; + + m_config_count++; + + l3ca = end + 1; + idx++; + } while (*end != '\0' && idx < PQOS_MAX_CORES); + + if (m_config_count == 0) + goto err; + + return 0; + +err: + return -EINVAL; +} + +static int +check_cpus_overlapping(void) +{ + unsigned i = 0; + unsigned j = 0; + rte_cpuset_t mask; + + CPU_ZERO(&mask); + + for (i = 0; i < m_config_count; i++) { + for (j = i + 1; j < m_config_count; j++) { + CPU_AND(&mask, + &m_config[i].cpumask, + &m_config[j].cpumask); + + if (CPU_COUNT(&mask) != 0) { + printf("PQOS: Requested CPUs sets are " + "overlapping.\n"); + return -EINVAL; + } + } + } + + return 0; +} + +static int +check_cpus(void) +{ + unsigned i = 0; + unsigned cpu_id = 0; + unsigned cos_id = 0; + int ret = 0; + + for (i = 0; i < m_config_count; i++) { + for (cpu_id = 0; cpu_id < PQOS_MAX_CORES; cpu_id++) { + if (CPU_ISSET(cpu_id, &m_config[i].cpumask) != 0) { + + ret = pqos_cpu_check_core(m_cpu, cpu_id); + if (ret != PQOS_RETVAL_OK) { + printf("PQOS: %u is not a valid " + "logical core id.\n", cpu_id); + ret = -ENODEV; + goto exit; + } + + ret = pqos_l3ca_assoc_get(cpu_id, &cos_id); + if (ret != PQOS_RETVAL_OK) { + printf("PQOS: Failed to read COS " + "associated to cpu %u.\n", + cpu_id); + ret = -EFAULT; + goto exit; + } + + /* + * Check if COS assigned to lcore is different + * then default one (#0) + */ + if (cos_id != 0) { + printf("PQOS: cpu %u has already " + "associated COS#%u. " + "Please reset L3CA.\n", + cpu_id, cos_id); + ret = -EBUSY; + goto exit; + } + } + } + } + +exit: + return ret; +} + +static int +check_cdp(void) +{ + unsigned i = 0; + + for (i = 0; i < m_config_count; i++) { + if (m_config[i].cdp == 1 && m_cap_l3ca->u.l3ca->cdp_on == 0) { + if (m_cap_l3ca->u.l3ca->cdp == 0) { + printf("PQOS: CDP requested but not " + "supported.\n"); + } else { + printf("PQOS: CDP requested but not enabled. " + "Please enable CDP.\n"); + } + return -ENOTSUP; + } + } + + return 0; +} + +static int +check_cbm_len_and_contention(void) +{ + unsigned i = 0; + uint64_t mask = 0; + const uint64_t not_cbm = (UINT64_MAX << (m_cap_l3ca->u.l3ca->num_ways)); + const uint64_t cbm_contention_mask = m_cap_l3ca->u.l3ca->way_contention; + int ret = 0; + + for (i = 0; i < m_config_count; i++) { + if (m_config[i].cdp == 1) + mask = m_config[i].code_mask | m_config[i].data_mask; + else + mask = m_config[i].mask; + + if ((mask & not_cbm) != 0) { + printf("PQOS: One or more of requested CBM masks not " + "supported by system (too long).\n"); + ret = -ENOTSUP; + break; + } + + /* Just a warning */ + if ((mask & cbm_contention_mask) != 0) { + printf("PQOS: One or more of requested CBM masks " + "overlap CBM contention mask.\n"); + break; + } + + } + + return ret; +} + +static int +check_and_select_classes(unsigned cos_id_map[][PQOS_MAX_SOCKETS]) +{ + unsigned i = 0; + unsigned j = 0; + unsigned phy_pkg_id = 0; + unsigned cos_id = 0; + unsigned cpu_id = 0; + unsigned phy_pkg_lcores[PQOS_MAX_SOCKETS][m_config_count]; + const unsigned cos_num = m_cap_l3ca->u.l3ca->num_classes; + unsigned used_cos_table[PQOS_MAX_SOCKETS][cos_num]; + int ret = 0; + + memset(phy_pkg_lcores, 0, sizeof(phy_pkg_lcores)); + memset(used_cos_table, 0, sizeof(used_cos_table)); + + /* detect currently used COS */ + for (j = 0; j < m_cpu->num_cores; j++) { + cpu_id = m_cpu->cores[j].lcore; + + ret = pqos_l3ca_assoc_get(cpu_id, &cos_id); + if (ret != PQOS_RETVAL_OK) { + printf("PQOS: Failed to read COS associated to " + "cpu %u on phy_pkg %u.\n", cpu_id, phy_pkg_id); + ret = -EFAULT; + goto exit; + } + + ret = pqos_cpu_get_socketid(m_cpu, cpu_id, &phy_pkg_id); + if (ret != PQOS_RETVAL_OK) { + printf("PQOS: Failed to get socket for cpu %u\n", + cpu_id); + ret = -EFAULT; + goto exit; + } + + /* Mark COS as used */ + if (used_cos_table[phy_pkg_id][cos_id] == 0) + used_cos_table[phy_pkg_id][cos_id]++; + } + + /* look for avail. COS to fulfill requested config */ + for (i = 0; i < m_config_count; i++) { + for (j = 0; j < m_cpu->num_cores; j++) { + cpu_id = m_cpu->cores[j].lcore; + if (CPU_ISSET(cpu_id, &m_config[i].cpumask) == 0) + continue; + + ret = pqos_cpu_get_socketid(m_cpu, cpu_id, &phy_pkg_id); + if (ret != PQOS_RETVAL_OK) { + printf("PQOS: Failed to get socket for " + "cpu %u\n", cpu_id); + ret = -EFAULT; + goto exit; + } + + /* + * Check if we already have COS selected + * to be used for that group on that socket + */ + if (phy_pkg_lcores[phy_pkg_id][i] != 0) + continue; + + phy_pkg_lcores[phy_pkg_id][i]++; + + /* Search for avail. COS to be used on that socket */ + for (cos_id = 0; cos_id < cos_num; cos_id++) { + if (used_cos_table[phy_pkg_id][cos_id] == 0) { + used_cos_table[phy_pkg_id][cos_id]++; + cos_id_map[i][phy_pkg_id] = cos_id; + break; + } + } + + /* If there is no COS available ...*/ + if (cos_id == cos_num) { + ret = -E2BIG; + goto exit; + } + } + } + +exit: + if (ret != 0) + printf("PQOS: Not enough available COS to configure " + "requested configuration.\n"); + + return ret; +} + +static int +configure_cat(unsigned cos_id_map[][PQOS_MAX_SOCKETS]) +{ + unsigned phy_pkg_id = 0; + unsigned cpu_id = 0; + unsigned cos_id = 0; + unsigned i = 0; + unsigned j = 0; + struct pqos_l3ca l3ca = {0}; + int ret = 0; + + for (i = 0; i < m_config_count; i++) { + memset(&l3ca, 0, sizeof(l3ca)); + + l3ca.cdp = m_config[i].cdp; + if (m_config[i].cdp == 1) { + l3ca.code_mask = m_config[i].code_mask; + l3ca.data_mask = m_config[i].data_mask; + } else + l3ca.ways_mask = m_config[i].mask; + + for (j = 0; j < m_sock_count; j++) { + phy_pkg_id = m_sockets[j]; + if (cos_id_map[i][phy_pkg_id] == 0) + continue; + + l3ca.class_id = cos_id_map[i][phy_pkg_id]; + + ret = pqos_l3ca_set(phy_pkg_id, 1, &l3ca); + if (ret != PQOS_RETVAL_OK) { + printf("PQOS: Failed to set COS %u on " + "phy_pkg %u.\n", l3ca.class_id, + phy_pkg_id); + ret = -EFAULT; + goto exit; + } + } + } + + for (i = 0; i < m_config_count; i++) { + for (j = 0; j < m_cpu->num_cores; j++) { + cpu_id = m_cpu->cores[j].lcore; + if (CPU_ISSET(cpu_id, &m_config[i].cpumask) == 0) + continue; + + ret = pqos_cpu_get_socketid(m_cpu, cpu_id, &phy_pkg_id); + if (ret != PQOS_RETVAL_OK) { + printf("PQOS: Failed to get socket for " + "cpu %u\n", cpu_id); + ret = -EFAULT; + goto exit; + } + + cos_id = cos_id_map[i][phy_pkg_id]; + + ret = pqos_l3ca_assoc_set(cpu_id, cos_id); + if (ret != PQOS_RETVAL_OK) { + printf("PQOS: Failed to associate COS %u to " + "cpu %u\n", cos_id, cpu_id); + ret = -EFAULT; + goto exit; + } + } + } + +exit: + return ret; +} + + +/* Parse the argument given in the command line of the application */ +static int +parse_args(int argc, char **argv) +{ + int opt = 0; + int retval = 0; + int oldopterr = 0; + char **argvopt = argv; + char *prgname = argv[0]; + + static struct option lgopts[] = { + { "l3ca", required_argument, 0, 0 }, + { NULL, 0, 0, 0 } + }; + + /* Disable printing messages within getopt() */ + oldopterr = opterr; + opterr = 0; + + opt = getopt_long(argc, argvopt, "", lgopts, NULL); + if (opt == 0) { + retval = parse_l3ca(optarg); + if (retval != 0) { + printf("PQOS: Invalid L3CA parameters!\n"); + goto exit; + } + + argv[optind - 1] = prgname; + retval = optind - 1; + } else + retval = 0; + +exit: + /* reset getopt lib */ + optind = 0; + + /* Restore opterr value */ + opterr = oldopterr; + + return retval; +} + +static void +print_cmd_line_config(void) +{ + char cpustr[PQOS_MAX_CORES * 3] = {0}; + unsigned i = 0; + unsigned j = 0; + + for (i = 0; i < m_config_count; i++) { + unsigned len = 0; + memset(cpustr, 0, sizeof(cpustr)); + + /* Generate CPU list */ + for (j = 0; j < PQOS_MAX_CORES; j++) { + if (CPU_ISSET(j, &m_config[i].cpumask) != 1) + continue; + + len += snprintf(cpustr + len, sizeof(cpustr) - len - 1, + "%u,", j); + + if (len >= sizeof(cpustr) - 1) + break; + } + + if (m_config[i].cdp == 1) { + printf("PQOS: CPUs: %s cMASK: 0x%llx, dMASK: " + "0x%llx\n", cpustr, + (unsigned long long)m_config[i].code_mask, + (unsigned long long)m_config[i].data_mask); + } else { + printf("PQOS: CPUs: %s MASK: 0x%llx\n", cpustr, + (unsigned long long)m_config[i].mask); + } + } +} + +/** + * @brief Prints CAT configuration + */ +static void +print_cat_config(void) +{ + int ret = PQOS_RETVAL_OK; + unsigned i = 0; + + for (i = 0; i < m_sock_count; i++) { + struct pqos_l3ca tab[PQOS_MAX_L3CA_COS] = {{0} }; + unsigned num = 0; + unsigned n = 0; + + ret = pqos_l3ca_get(m_sockets[i], PQOS_MAX_L3CA_COS, &num, tab); + if (ret != PQOS_RETVAL_OK) { + printf("PQOS: Error retrieving COS!\n"); + return; + } + + printf("PQOS: COS definitions for Socket %u:\n", m_sockets[i]); + for (n = 0; n < num; n++) { + if (tab[n].cdp == 1) { + printf("PQOS: COS: %u, cMASK: 0x%llx, " + "dMASK: 0x%llx\n", tab[n].class_id, + (unsigned long long)tab[n].code_mask, + (unsigned long long)tab[n].data_mask); + } else { + printf("PQOS: COS: %u, MASK: 0x%llx\n", + tab[n].class_id, + (unsigned long long)tab[n].ways_mask); + } + } + } + + for (i = 0; i < m_sock_count; i++) { + unsigned lcores[PQOS_MAX_SOCKET_CORES] = {0}; + unsigned lcount = 0; + unsigned n = 0; + + ret = pqos_cpu_get_cores(m_cpu, m_sockets[i], + PQOS_MAX_SOCKET_CORES, &lcount, &lcores[0]); + if (ret != PQOS_RETVAL_OK) { + printf("PQOS: Error retrieving core information!\n"); + return; + } + + printf("PQOS: CPU information for socket %u:\n", m_sockets[i]); + for (n = 0; n < lcount; n++) { + unsigned class_id = 0; + + ret = pqos_l3ca_assoc_get(lcores[n], &class_id); + if (ret == PQOS_RETVAL_OK) + printf("PQOS: CPU: %u, COS: %u\n", lcores[n], + class_id); + else + printf("PQOS: CPU: %u, ERROR\n", lcores[n]); + } + } + +} + +static int +cat_validate(void) +{ + int ret = 0; + + ret = check_cpus(); + if (ret != 0) + return ret; + + ret = check_cdp(); + if (ret != 0) + return ret; + + ret = check_cbm_len_and_contention(); + if (ret != 0) + return ret; + + ret = check_cpus_overlapping(); + if (ret != 0) + return ret; + + return 0; +} + +static int +cat_set(void) +{ + int ret = 0; + unsigned cos_id_map[m_config_count][PQOS_MAX_SOCKETS]; + + memset(cos_id_map, 0, sizeof(cos_id_map)); + + ret = check_and_select_classes(cos_id_map); + if (ret != 0) + return ret; + + ret = configure_cat(cos_id_map); + if (ret != 0) + return ret; + + return 0; +} + +static void +cat_fini(void) +{ + int ret = 0; + + printf("PQOS: Shutting down PQoS library...\n"); + + /* deallocate all the resources */ + ret = pqos_fini(); + if (ret != PQOS_RETVAL_OK && ret != PQOS_RETVAL_INIT) + printf("PQOS: Error shutting down PQoS library!\n"); + + m_cap = NULL; + m_cpu = NULL; + m_cap_l3ca = NULL; + memset(m_sockets, 0, sizeof(m_sockets)); + m_sock_count = 0; + memset(m_config, 0, sizeof(m_config)); + m_config_count = 0; +} + +void +cat_exit(void) +{ + unsigned i = 0; + unsigned j = 0; + unsigned cpu_id = 0; + int ret = 0; + + /* if lib is not initialized, do nothing */ + if (m_cap == NULL && m_cpu == NULL) + return; + + printf("PQOS: Reverting CAT configuration...\n"); + + for (i = 0; i < m_config_count; i++) { + for (j = 0; j < m_cpu->num_cores; j++) { + cpu_id = m_cpu->cores[j].lcore; + if (CPU_ISSET(cpu_id, &m_config[i].cpumask) == 0) + continue; + + ret = pqos_l3ca_assoc_set(cpu_id, 0); + if (ret != PQOS_RETVAL_OK) { + printf("PQOS: Failed to associate COS 0 to " + "cpu %u\n", cpu_id); + } + } + } + + cat_fini(); +} + +static void +signal_handler(int signum) +{ + if (signum == SIGINT || signum == SIGTERM) { + printf("\nPQOS: Signal %d received, preparing to exit...\n", + signum); + + cat_exit(); + + /* exit with the expected status */ + signal(signum, SIG_DFL); + kill(getpid(), signum); + } +} + +int +cat_init(int argc, char **argv) +{ + int ret = 0; + int args_num = 0; + struct pqos_config cfg = {0}; + + if (m_cap != NULL || m_cpu != NULL) { + printf("PQOS: CAT module already initialized!\n"); + return -EEXIST; + } + + /* Parse cmd line args */ + ret = parse_args(argc, argv); + + if (ret <= 0) + goto err; + + args_num = ret; + + /* Print cmd line configuration */ + print_cmd_line_config(); + + /* PQoS Initialization - Check and initialize CAT capability */ + cfg.fd_log = STDOUT_FILENO; + cfg.verbose = 0; + cfg.cdp_cfg = PQOS_REQUIRE_CDP_ANY; + ret = pqos_init(&cfg); + if (ret != PQOS_RETVAL_OK) { + printf("PQOS: Error initializing PQoS library!\n"); + ret = -EFAULT; + goto err; + } + + /* Get capability and CPU info pointer */ + ret = pqos_cap_get(&m_cap, &m_cpu); + if (ret != PQOS_RETVAL_OK || m_cap == NULL || m_cpu == NULL) { + printf("PQOS: Error retrieving PQoS capabilities!\n"); + ret = -EFAULT; + goto err; + } + + /* Get L3CA capabilities */ + ret = pqos_cap_get_type(m_cap, PQOS_CAP_TYPE_L3CA, &m_cap_l3ca); + if (ret != PQOS_RETVAL_OK || m_cap_l3ca == NULL) { + printf("PQOS: Error retrieving PQOS_CAP_TYPE_L3CA " + "capabilities!\n"); + ret = -EFAULT; + goto err; + } + + /* Get CPU socket information */ + ret = pqos_cpu_get_sockets(m_cpu, PQOS_MAX_SOCKETS, &m_sock_count, + m_sockets); + if (ret != PQOS_RETVAL_OK) { + printf("PQOS: Error retrieving CPU socket information!\n"); + ret = -EFAULT; + goto err; + } + + /* Validate cmd line configuration */ + ret = cat_validate(); + if (ret != 0) { + printf("PQOS: Requested CAT configuration is not valid!\n"); + goto err; + } + + /* configure system */ + ret = cat_set(); + if (ret != 0) { + printf("PQOS: Failed to configure CAT!\n"); + goto err; + } + + signal(SIGINT, signal_handler); + signal(SIGTERM, signal_handler); + + ret = atexit(cat_exit); + if (ret != 0) { + printf("PQOS: Cannot set exit function\n"); + goto err; + } + + /* Print CAT configuration */ + print_cat_config(); + + return args_num; + +err: + /* deallocate all the resources */ + cat_fini(); + return ret; +} diff --git a/examples/l2fwd-cat/cat.h b/examples/l2fwd-cat/cat.h new file mode 100644 index 00000000..aef2b768 --- /dev/null +++ b/examples/l2fwd-cat/cat.h @@ -0,0 +1,72 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _CAT_H +#define _CAT_H + +/** + * @file + * PQoS CAT + */ + +#include <stdint.h> +#include <string.h> + +#include <rte_lcore.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* L3 cache allocation class of service data structure */ +struct cat_config { + rte_cpuset_t cpumask; /* CPUs bitmask */ + int cdp; /* data & code masks used if true */ + union { + uint64_t mask; /* capacity bitmask (CBM) */ + struct { + uint64_t data_mask; /* data capacity bitmask (CBM) */ + uint64_t code_mask; /* code capacity bitmask (CBM) */ + }; + }; +}; + +int cat_init(int argc, char **argv); + +void cat_exit(void); + +#ifdef __cplusplus +} +#endif + +#endif /* _CAT_H */ diff --git a/examples/l2fwd-cat/l2fwd-cat.c b/examples/l2fwd-cat/l2fwd-cat.c new file mode 100644 index 00000000..8cce33b8 --- /dev/null +++ b/examples/l2fwd-cat/l2fwd-cat.c @@ -0,0 +1,224 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdint.h> +#include <inttypes.h> +#include <rte_eal.h> +#include <rte_ethdev.h> +#include <rte_cycles.h> +#include <rte_lcore.h> +#include <rte_mbuf.h> + +#include "cat.h" + +#define RX_RING_SIZE 128 +#define TX_RING_SIZE 512 + +#define NUM_MBUFS 8191 +#define MBUF_CACHE_SIZE 250 +#define BURST_SIZE 32 + +static const struct rte_eth_conf port_conf_default = { + .rxmode = { .max_rx_pkt_len = ETHER_MAX_LEN } +}; + +/* l2fwd-cat.c: CAT enabled, basic DPDK skeleton forwarding example. */ + +/* + * Initializes a given port using global settings and with the RX buffers + * coming from the mbuf_pool passed as a parameter. + */ +static inline int +port_init(uint8_t port, struct rte_mempool *mbuf_pool) +{ + struct rte_eth_conf port_conf = port_conf_default; + const uint16_t rx_rings = 1, tx_rings = 1; + int retval; + uint16_t q; + + if (port >= rte_eth_dev_count()) + return -1; + + /* Configure the Ethernet device. */ + retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf); + if (retval != 0) + return retval; + + /* Allocate and set up 1 RX queue per Ethernet port. */ + for (q = 0; q < rx_rings; q++) { + retval = rte_eth_rx_queue_setup(port, q, RX_RING_SIZE, + rte_eth_dev_socket_id(port), NULL, mbuf_pool); + if (retval < 0) + return retval; + } + + /* Allocate and set up 1 TX queue per Ethernet port. */ + for (q = 0; q < tx_rings; q++) { + retval = rte_eth_tx_queue_setup(port, q, TX_RING_SIZE, + rte_eth_dev_socket_id(port), NULL); + if (retval < 0) + return retval; + } + + /* Start the Ethernet port. */ + retval = rte_eth_dev_start(port); + if (retval < 0) + return retval; + + /* Display the port MAC address. */ + struct ether_addr addr; + rte_eth_macaddr_get(port, &addr); + printf("Port %u MAC: %02" PRIx8 " %02" PRIx8 " %02" PRIx8 + " %02" PRIx8 " %02" PRIx8 " %02" PRIx8 "\n", + (unsigned)port, + addr.addr_bytes[0], addr.addr_bytes[1], + addr.addr_bytes[2], addr.addr_bytes[3], + addr.addr_bytes[4], addr.addr_bytes[5]); + + /* Enable RX in promiscuous mode for the Ethernet device. */ + rte_eth_promiscuous_enable(port); + + return 0; +} + +/* + * The lcore main. This is the main thread that does the work, reading from + * an input port and writing to an output port. + */ +static __attribute__((noreturn)) void +lcore_main(void) +{ + const uint8_t nb_ports = rte_eth_dev_count(); + uint8_t port; + + /* + * Check that the port is on the same NUMA node as the polling thread + * for best performance. + */ + for (port = 0; port < nb_ports; port++) + if (rte_eth_dev_socket_id(port) > 0 && + rte_eth_dev_socket_id(port) != + (int)rte_socket_id()) + printf("WARNING, port %u is on remote NUMA node to " + "polling thread.\n\tPerformance will " + "not be optimal.\n", port); + + printf("\nCore %u forwarding packets. [Ctrl+C to quit]\n", + rte_lcore_id()); + + /* Run until the application is quit or killed. */ + for (;;) { + /* + * Receive packets on a port and forward them on the paired + * port. The mapping is 0 -> 1, 1 -> 0, 2 -> 3, 3 -> 2, etc. + */ + for (port = 0; port < nb_ports; port++) { + + /* Get burst of RX packets, from first port of pair. */ + struct rte_mbuf *bufs[BURST_SIZE]; + const uint16_t nb_rx = rte_eth_rx_burst(port, 0, + bufs, BURST_SIZE); + + if (unlikely(nb_rx == 0)) + continue; + + /* Send burst of TX packets, to second port of pair. */ + const uint16_t nb_tx = rte_eth_tx_burst(port ^ 1, 0, + bufs, nb_rx); + + /* Free any unsent packets. */ + if (unlikely(nb_tx < nb_rx)) { + uint16_t buf; + for (buf = nb_tx; buf < nb_rx; buf++) + rte_pktmbuf_free(bufs[buf]); + } + } + } +} + +/* + * The main function, which does initialization and calls the per-lcore + * functions. + */ +int +main(int argc, char *argv[]) +{ + struct rte_mempool *mbuf_pool; + unsigned nb_ports; + uint8_t portid; + + /* Initialize the Environment Abstraction Layer (EAL). */ + int ret = rte_eal_init(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Error with EAL initialization\n"); + + argc -= ret; + argv += ret; + + /* + * Initialize the PQoS library and configure CAT. + * Please see l2fwd-cat documentation for more info. + */ + ret = cat_init(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "PQOS: L3CA init failed!\n"); + + argc -= ret; + argv += ret; + + /* Check that there is an even number of ports to send/receive on. */ + nb_ports = rte_eth_dev_count(); + if (nb_ports < 2 || (nb_ports & 1)) + rte_exit(EXIT_FAILURE, "Error: number of ports must be even\n"); + + /* Creates a new mempool in memory to hold the mbufs. */ + mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL", NUM_MBUFS * nb_ports, + MBUF_CACHE_SIZE, 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id()); + + if (mbuf_pool == NULL) + rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n"); + + /* Initialize all ports. */ + for (portid = 0; portid < nb_ports; portid++) + if (port_init(portid, mbuf_pool) != 0) + rte_exit(EXIT_FAILURE, "Cannot init port %"PRIu8 "\n", + portid); + + if (rte_lcore_count() > 1) + printf("\nWARNING: Too many lcores enabled. Only 1 used.\n"); + + /* Call lcore_main on the master core only. */ + lcore_main(); + + return 0; +} diff --git a/examples/l2fwd-crypto/Makefile b/examples/l2fwd-crypto/Makefile new file mode 100644 index 00000000..e8224cae --- /dev/null +++ b/examples/l2fwd-crypto/Makefile @@ -0,0 +1,50 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overridden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +# binary name +APP = l2fwd-crypto + +# all source are stored in SRCS-y +SRCS-y := main.c + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) + +include $(RTE_SDK)/mk/rte.extapp.mk diff --git a/examples/l2fwd-crypto/main.c b/examples/l2fwd-crypto/main.c new file mode 100644 index 00000000..d4e2d8de --- /dev/null +++ b/examples/l2fwd-crypto/main.c @@ -0,0 +1,2056 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2015-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <time.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <inttypes.h> +#include <sys/types.h> +#include <sys/queue.h> +#include <netinet/in.h> +#include <setjmp.h> +#include <stdarg.h> +#include <ctype.h> +#include <errno.h> +#include <getopt.h> + +#include <rte_atomic.h> +#include <rte_branch_prediction.h> +#include <rte_common.h> +#include <rte_cryptodev.h> +#include <rte_cycles.h> +#include <rte_debug.h> +#include <rte_eal.h> +#include <rte_ether.h> +#include <rte_ethdev.h> +#include <rte_interrupts.h> +#include <rte_ip.h> +#include <rte_launch.h> +#include <rte_lcore.h> +#include <rte_log.h> +#include <rte_malloc.h> +#include <rte_mbuf.h> +#include <rte_memcpy.h> +#include <rte_memory.h> +#include <rte_mempool.h> +#include <rte_memzone.h> +#include <rte_pci.h> +#include <rte_per_lcore.h> +#include <rte_prefetch.h> +#include <rte_random.h> +#include <rte_ring.h> +#include <rte_hexdump.h> + +enum cdev_type { + CDEV_TYPE_ANY, + CDEV_TYPE_HW, + CDEV_TYPE_SW +}; + +#define RTE_LOGTYPE_L2FWD RTE_LOGTYPE_USER1 + +#define NB_MBUF 8192 + +#define MAX_STR_LEN 32 +#define MAX_KEY_SIZE 128 +#define MAX_PKT_BURST 32 +#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */ + +/* + * Configurable number of RX/TX ring descriptors + */ +#define RTE_TEST_RX_DESC_DEFAULT 128 +#define RTE_TEST_TX_DESC_DEFAULT 512 + +static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; +static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; + +/* ethernet addresses of ports */ +static struct ether_addr l2fwd_ports_eth_addr[RTE_MAX_ETHPORTS]; + +/* mask of enabled ports */ +static uint64_t l2fwd_enabled_port_mask; +static uint64_t l2fwd_enabled_crypto_mask; + +/* list of enabled ports */ +static uint32_t l2fwd_dst_ports[RTE_MAX_ETHPORTS]; + + +struct pkt_buffer { + unsigned len; + struct rte_mbuf *buffer[MAX_PKT_BURST]; +}; + +struct op_buffer { + unsigned len; + struct rte_crypto_op *buffer[MAX_PKT_BURST]; +}; + +#define MAX_RX_QUEUE_PER_LCORE 16 +#define MAX_TX_QUEUE_PER_PORT 16 + +enum l2fwd_crypto_xform_chain { + L2FWD_CRYPTO_CIPHER_HASH, + L2FWD_CRYPTO_HASH_CIPHER, + L2FWD_CRYPTO_CIPHER_ONLY, + L2FWD_CRYPTO_HASH_ONLY +}; + +struct l2fwd_key { + uint8_t *data; + uint32_t length; + phys_addr_t phys_addr; +}; + +char supported_auth_algo[RTE_CRYPTO_AUTH_LIST_END][MAX_STR_LEN]; +char supported_cipher_algo[RTE_CRYPTO_CIPHER_LIST_END][MAX_STR_LEN]; + +/** l2fwd crypto application command line options */ +struct l2fwd_crypto_options { + unsigned portmask; + unsigned nb_ports_per_lcore; + unsigned refresh_period; + unsigned single_lcore:1; + + enum cdev_type type; + unsigned sessionless:1; + + enum l2fwd_crypto_xform_chain xform_chain; + + struct rte_crypto_sym_xform cipher_xform; + unsigned ckey_param; + int ckey_random_size; + + struct l2fwd_key iv; + unsigned iv_param; + int iv_random_size; + + struct rte_crypto_sym_xform auth_xform; + uint8_t akey_param; + int akey_random_size; + + struct l2fwd_key aad; + unsigned aad_param; + int aad_random_size; + + int digest_size; + + uint16_t block_size; + char string_type[MAX_STR_LEN]; +}; + +/** l2fwd crypto lcore params */ +struct l2fwd_crypto_params { + uint8_t dev_id; + uint8_t qp_id; + + unsigned digest_length; + unsigned block_size; + + struct l2fwd_key iv; + struct l2fwd_key aad; + struct rte_cryptodev_sym_session *session; + + uint8_t do_cipher; + uint8_t do_hash; + uint8_t hash_verify; + + enum rte_crypto_cipher_algorithm cipher_algo; + enum rte_crypto_auth_algorithm auth_algo; +}; + +/** lcore configuration */ +struct lcore_queue_conf { + unsigned nb_rx_ports; + unsigned rx_port_list[MAX_RX_QUEUE_PER_LCORE]; + + unsigned nb_crypto_devs; + unsigned cryptodev_list[MAX_RX_QUEUE_PER_LCORE]; + + struct op_buffer op_buf[RTE_MAX_ETHPORTS]; + struct pkt_buffer pkt_buf[RTE_MAX_ETHPORTS]; +} __rte_cache_aligned; + +struct lcore_queue_conf lcore_queue_conf[RTE_MAX_LCORE]; + +static const struct rte_eth_conf port_conf = { + .rxmode = { + .mq_mode = ETH_MQ_RX_NONE, + .max_rx_pkt_len = ETHER_MAX_LEN, + .split_hdr_size = 0, + .header_split = 0, /**< Header Split disabled */ + .hw_ip_checksum = 0, /**< IP checksum offload disabled */ + .hw_vlan_filter = 0, /**< VLAN filtering disabled */ + .jumbo_frame = 0, /**< Jumbo Frame Support disabled */ + .hw_strip_crc = 0, /**< CRC stripped by hardware */ + }, + .txmode = { + .mq_mode = ETH_MQ_TX_NONE, + }, +}; + +struct rte_mempool *l2fwd_pktmbuf_pool; +struct rte_mempool *l2fwd_crypto_op_pool; + +/* Per-port statistics struct */ +struct l2fwd_port_statistics { + uint64_t tx; + uint64_t rx; + + uint64_t crypto_enqueued; + uint64_t crypto_dequeued; + + uint64_t dropped; +} __rte_cache_aligned; + +struct l2fwd_crypto_statistics { + uint64_t enqueued; + uint64_t dequeued; + + uint64_t errors; +} __rte_cache_aligned; + +struct l2fwd_port_statistics port_statistics[RTE_MAX_ETHPORTS]; +struct l2fwd_crypto_statistics crypto_statistics[RTE_MAX_ETHPORTS]; + +/* A tsc-based timer responsible for triggering statistics printout */ +#define TIMER_MILLISECOND 2000000ULL /* around 1ms at 2 Ghz */ +#define MAX_TIMER_PERIOD 86400UL /* 1 day max */ + +/* default period is 10 seconds */ +static int64_t timer_period = 10 * TIMER_MILLISECOND * 1000; + +/* Print out statistics on packets dropped */ +static void +print_stats(void) +{ + uint64_t total_packets_dropped, total_packets_tx, total_packets_rx; + uint64_t total_packets_enqueued, total_packets_dequeued, + total_packets_errors; + unsigned portid; + uint64_t cdevid; + + total_packets_dropped = 0; + total_packets_tx = 0; + total_packets_rx = 0; + total_packets_enqueued = 0; + total_packets_dequeued = 0; + total_packets_errors = 0; + + const char clr[] = { 27, '[', '2', 'J', '\0' }; + const char topLeft[] = { 27, '[', '1', ';', '1', 'H', '\0' }; + + /* Clear screen and move to top left */ + printf("%s%s", clr, topLeft); + + printf("\nPort statistics ===================================="); + + for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) { + /* skip disabled ports */ + if ((l2fwd_enabled_port_mask & (1 << portid)) == 0) + continue; + printf("\nStatistics for port %u ------------------------------" + "\nPackets sent: %32"PRIu64 + "\nPackets received: %28"PRIu64 + "\nPackets dropped: %29"PRIu64, + portid, + port_statistics[portid].tx, + port_statistics[portid].rx, + port_statistics[portid].dropped); + + total_packets_dropped += port_statistics[portid].dropped; + total_packets_tx += port_statistics[portid].tx; + total_packets_rx += port_statistics[portid].rx; + } + printf("\nCrypto statistics =================================="); + + for (cdevid = 0; cdevid < RTE_CRYPTO_MAX_DEVS; cdevid++) { + /* skip disabled ports */ + if ((l2fwd_enabled_crypto_mask & (1lu << cdevid)) == 0) + continue; + printf("\nStatistics for cryptodev %"PRIu64 + " -------------------------" + "\nPackets enqueued: %28"PRIu64 + "\nPackets dequeued: %28"PRIu64 + "\nPackets errors: %30"PRIu64, + cdevid, + crypto_statistics[cdevid].enqueued, + crypto_statistics[cdevid].dequeued, + crypto_statistics[cdevid].errors); + + total_packets_enqueued += crypto_statistics[cdevid].enqueued; + total_packets_dequeued += crypto_statistics[cdevid].dequeued; + total_packets_errors += crypto_statistics[cdevid].errors; + } + printf("\nAggregate statistics ===============================" + "\nTotal packets received: %22"PRIu64 + "\nTotal packets enqueued: %22"PRIu64 + "\nTotal packets dequeued: %22"PRIu64 + "\nTotal packets sent: %26"PRIu64 + "\nTotal packets dropped: %23"PRIu64 + "\nTotal packets crypto errors: %17"PRIu64, + total_packets_rx, + total_packets_enqueued, + total_packets_dequeued, + total_packets_tx, + total_packets_dropped, + total_packets_errors); + printf("\n====================================================\n"); +} + +static void +fill_supported_algorithm_tables(void) +{ + unsigned i; + + for (i = 0; i < RTE_CRYPTO_AUTH_LIST_END; i++) + strcpy(supported_auth_algo[i], "NOT_SUPPORTED"); + + strcpy(supported_auth_algo[RTE_CRYPTO_AUTH_AES_GCM], "AES_GCM"); + strcpy(supported_auth_algo[RTE_CRYPTO_AUTH_MD5_HMAC], "MD5_HMAC"); + strcpy(supported_auth_algo[RTE_CRYPTO_AUTH_NULL], "NULL"); + strcpy(supported_auth_algo[RTE_CRYPTO_AUTH_SHA1_HMAC], "SHA1_HMAC"); + strcpy(supported_auth_algo[RTE_CRYPTO_AUTH_SHA224_HMAC], "SHA224_HMAC"); + strcpy(supported_auth_algo[RTE_CRYPTO_AUTH_SHA256_HMAC], "SHA256_HMAC"); + strcpy(supported_auth_algo[RTE_CRYPTO_AUTH_SHA384_HMAC], "SHA384_HMAC"); + strcpy(supported_auth_algo[RTE_CRYPTO_AUTH_SHA512_HMAC], "SHA512_HMAC"); + strcpy(supported_auth_algo[RTE_CRYPTO_AUTH_SNOW3G_UIA2], "SNOW3G_UIA2"); + + for (i = 0; i < RTE_CRYPTO_CIPHER_LIST_END; i++) + strcpy(supported_cipher_algo[i], "NOT_SUPPORTED"); + + strcpy(supported_cipher_algo[RTE_CRYPTO_CIPHER_AES_CBC], "AES_CBC"); + strcpy(supported_cipher_algo[RTE_CRYPTO_CIPHER_AES_GCM], "AES_GCM"); + strcpy(supported_cipher_algo[RTE_CRYPTO_CIPHER_NULL], "NULL"); + strcpy(supported_cipher_algo[RTE_CRYPTO_CIPHER_SNOW3G_UEA2], "SNOW3G_UEA2"); +} + + +static int +l2fwd_crypto_send_burst(struct lcore_queue_conf *qconf, unsigned n, + struct l2fwd_crypto_params *cparams) +{ + struct rte_crypto_op **op_buffer; + unsigned ret; + + op_buffer = (struct rte_crypto_op **) + qconf->op_buf[cparams->dev_id].buffer; + + ret = rte_cryptodev_enqueue_burst(cparams->dev_id, + cparams->qp_id, op_buffer, (uint16_t) n); + + crypto_statistics[cparams->dev_id].enqueued += ret; + if (unlikely(ret < n)) { + crypto_statistics[cparams->dev_id].errors += (n - ret); + do { + rte_pktmbuf_free(op_buffer[ret]->sym->m_src); + rte_crypto_op_free(op_buffer[ret]); + } while (++ret < n); + } + + return 0; +} + +static int +l2fwd_crypto_enqueue(struct rte_crypto_op *op, + struct l2fwd_crypto_params *cparams) +{ + unsigned lcore_id, len; + struct lcore_queue_conf *qconf; + + lcore_id = rte_lcore_id(); + + qconf = &lcore_queue_conf[lcore_id]; + len = qconf->op_buf[cparams->dev_id].len; + qconf->op_buf[cparams->dev_id].buffer[len] = op; + len++; + + /* enough ops to be sent */ + if (len == MAX_PKT_BURST) { + l2fwd_crypto_send_burst(qconf, MAX_PKT_BURST, cparams); + len = 0; + } + + qconf->op_buf[cparams->dev_id].len = len; + return 0; +} + +static int +l2fwd_simple_crypto_enqueue(struct rte_mbuf *m, + struct rte_crypto_op *op, + struct l2fwd_crypto_params *cparams) +{ + struct ether_hdr *eth_hdr; + struct ipv4_hdr *ip_hdr; + + unsigned ipdata_offset, pad_len, data_len; + char *padding; + + eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); + + if (eth_hdr->ether_type != rte_cpu_to_be_16(ETHER_TYPE_IPv4)) + return -1; + + ipdata_offset = sizeof(struct ether_hdr); + + ip_hdr = (struct ipv4_hdr *)(rte_pktmbuf_mtod(m, char *) + + ipdata_offset); + + ipdata_offset += (ip_hdr->version_ihl & IPV4_HDR_IHL_MASK) + * IPV4_IHL_MULTIPLIER; + + + /* Zero pad data to be crypto'd so it is block aligned */ + data_len = rte_pktmbuf_data_len(m) - ipdata_offset; + pad_len = data_len % cparams->block_size ? cparams->block_size - + (data_len % cparams->block_size) : 0; + + if (pad_len) { + padding = rte_pktmbuf_append(m, pad_len); + if (unlikely(!padding)) + return -1; + + data_len += pad_len; + memset(padding, 0, pad_len); + } + + /* Set crypto operation data parameters */ + rte_crypto_op_attach_sym_session(op, cparams->session); + + if (cparams->do_hash) { + if (!cparams->hash_verify) { + /* Append space for digest to end of packet */ + op->sym->auth.digest.data = (uint8_t *)rte_pktmbuf_append(m, + cparams->digest_length); + } else { + op->sym->auth.digest.data = (uint8_t *)rte_pktmbuf_append(m, + cparams->digest_length); + } + + op->sym->auth.digest.phys_addr = rte_pktmbuf_mtophys_offset(m, + rte_pktmbuf_pkt_len(m) - cparams->digest_length); + op->sym->auth.digest.length = cparams->digest_length; + + /* For SNOW3G algorithms, offset/length must be in bits */ + if (cparams->auth_algo == RTE_CRYPTO_AUTH_SNOW3G_UIA2) { + op->sym->auth.data.offset = ipdata_offset << 3; + op->sym->auth.data.length = data_len << 3; + } else { + op->sym->auth.data.offset = ipdata_offset; + op->sym->auth.data.length = data_len; + } + + if (cparams->aad.length) { + op->sym->auth.aad.data = cparams->aad.data; + op->sym->auth.aad.phys_addr = cparams->aad.phys_addr; + op->sym->auth.aad.length = cparams->aad.length; + } + } + + if (cparams->do_cipher) { + op->sym->cipher.iv.data = cparams->iv.data; + op->sym->cipher.iv.phys_addr = cparams->iv.phys_addr; + op->sym->cipher.iv.length = cparams->iv.length; + + /* For SNOW3G algorithms, offset/length must be in bits */ + if (cparams->cipher_algo == RTE_CRYPTO_CIPHER_SNOW3G_UEA2) { + op->sym->cipher.data.offset = ipdata_offset << 3; + if (cparams->do_hash && cparams->hash_verify) + /* Do not cipher the hash tag */ + op->sym->cipher.data.length = (data_len - + cparams->digest_length) << 3; + else + op->sym->cipher.data.length = data_len << 3; + + } else { + op->sym->cipher.data.offset = ipdata_offset; + if (cparams->do_hash && cparams->hash_verify) + /* Do not cipher the hash tag */ + op->sym->cipher.data.length = data_len - + cparams->digest_length; + else + op->sym->cipher.data.length = data_len; + } + } + + op->sym->m_src = m; + + return l2fwd_crypto_enqueue(op, cparams); +} + + +/* Send the burst of packets on an output interface */ +static int +l2fwd_send_burst(struct lcore_queue_conf *qconf, unsigned n, + uint8_t port) +{ + struct rte_mbuf **pkt_buffer; + unsigned ret; + + pkt_buffer = (struct rte_mbuf **)qconf->pkt_buf[port].buffer; + + ret = rte_eth_tx_burst(port, 0, pkt_buffer, (uint16_t)n); + port_statistics[port].tx += ret; + if (unlikely(ret < n)) { + port_statistics[port].dropped += (n - ret); + do { + rte_pktmbuf_free(pkt_buffer[ret]); + } while (++ret < n); + } + + return 0; +} + +/* Enqueue packets for TX and prepare them to be sent */ +static int +l2fwd_send_packet(struct rte_mbuf *m, uint8_t port) +{ + unsigned lcore_id, len; + struct lcore_queue_conf *qconf; + + lcore_id = rte_lcore_id(); + + qconf = &lcore_queue_conf[lcore_id]; + len = qconf->pkt_buf[port].len; + qconf->pkt_buf[port].buffer[len] = m; + len++; + + /* enough pkts to be sent */ + if (unlikely(len == MAX_PKT_BURST)) { + l2fwd_send_burst(qconf, MAX_PKT_BURST, port); + len = 0; + } + + qconf->pkt_buf[port].len = len; + return 0; +} + +static void +l2fwd_simple_forward(struct rte_mbuf *m, unsigned portid) +{ + struct ether_hdr *eth; + void *tmp; + unsigned dst_port; + + dst_port = l2fwd_dst_ports[portid]; + eth = rte_pktmbuf_mtod(m, struct ether_hdr *); + + /* 02:00:00:00:00:xx */ + tmp = ð->d_addr.addr_bytes[0]; + *((uint64_t *)tmp) = 0x000000000002 + ((uint64_t)dst_port << 40); + + /* src addr */ + ether_addr_copy(&l2fwd_ports_eth_addr[dst_port], ð->s_addr); + + l2fwd_send_packet(m, (uint8_t) dst_port); +} + +/** Generate random key */ +static void +generate_random_key(uint8_t *key, unsigned length) +{ + unsigned i; + + for (i = 0; i < length; i++) + key[i] = rand() % 0xff; +} + +static struct rte_cryptodev_sym_session * +initialize_crypto_session(struct l2fwd_crypto_options *options, + uint8_t cdev_id) +{ + struct rte_crypto_sym_xform *first_xform; + + if (options->xform_chain == L2FWD_CRYPTO_CIPHER_HASH) { + first_xform = &options->cipher_xform; + first_xform->next = &options->auth_xform; + } else if (options->xform_chain == L2FWD_CRYPTO_HASH_CIPHER) { + first_xform = &options->auth_xform; + first_xform->next = &options->cipher_xform; + } else if (options->xform_chain == L2FWD_CRYPTO_CIPHER_ONLY) { + first_xform = &options->cipher_xform; + } else { + first_xform = &options->auth_xform; + } + + /* Setup Cipher Parameters */ + return rte_cryptodev_sym_session_create(cdev_id, first_xform); +} + +static void +l2fwd_crypto_options_print(struct l2fwd_crypto_options *options); + +/* main processing loop */ +static void +l2fwd_main_loop(struct l2fwd_crypto_options *options) +{ + struct rte_mbuf *m, *pkts_burst[MAX_PKT_BURST]; + struct rte_crypto_op *ops_burst[MAX_PKT_BURST]; + + unsigned lcore_id = rte_lcore_id(); + uint64_t prev_tsc = 0, diff_tsc, cur_tsc, timer_tsc = 0; + unsigned i, j, portid, nb_rx; + struct lcore_queue_conf *qconf = &lcore_queue_conf[lcore_id]; + const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / + US_PER_S * BURST_TX_DRAIN_US; + struct l2fwd_crypto_params *cparams; + struct l2fwd_crypto_params port_cparams[qconf->nb_crypto_devs]; + + if (qconf->nb_rx_ports == 0) { + RTE_LOG(INFO, L2FWD, "lcore %u has nothing to do\n", lcore_id); + return; + } + + RTE_LOG(INFO, L2FWD, "entering main loop on lcore %u\n", lcore_id); + + for (i = 0; i < qconf->nb_rx_ports; i++) { + + portid = qconf->rx_port_list[i]; + RTE_LOG(INFO, L2FWD, " -- lcoreid=%u portid=%u\n", lcore_id, + portid); + } + + for (i = 0; i < qconf->nb_crypto_devs; i++) { + port_cparams[i].do_cipher = 0; + port_cparams[i].do_hash = 0; + + switch (options->xform_chain) { + case L2FWD_CRYPTO_CIPHER_HASH: + case L2FWD_CRYPTO_HASH_CIPHER: + port_cparams[i].do_cipher = 1; + port_cparams[i].do_hash = 1; + break; + case L2FWD_CRYPTO_HASH_ONLY: + port_cparams[i].do_hash = 1; + break; + case L2FWD_CRYPTO_CIPHER_ONLY: + port_cparams[i].do_cipher = 1; + break; + } + + port_cparams[i].dev_id = qconf->cryptodev_list[i]; + port_cparams[i].qp_id = 0; + + port_cparams[i].block_size = options->block_size; + + if (port_cparams[i].do_hash) { + port_cparams[i].digest_length = + options->auth_xform.auth.digest_length; + if (options->auth_xform.auth.add_auth_data_length) { + port_cparams[i].aad.data = options->aad.data; + port_cparams[i].aad.length = + options->auth_xform.auth.add_auth_data_length; + port_cparams[i].aad.phys_addr = options->aad.phys_addr; + if (!options->aad_param) + generate_random_key(port_cparams[i].aad.data, + port_cparams[i].aad.length); + + } + + if (options->auth_xform.auth.op == RTE_CRYPTO_AUTH_OP_VERIFY) + port_cparams[i].hash_verify = 1; + else + port_cparams[i].hash_verify = 0; + + port_cparams[i].auth_algo = options->auth_xform.auth.algo; + } + + if (port_cparams[i].do_cipher) { + port_cparams[i].iv.data = options->iv.data; + port_cparams[i].iv.length = options->iv.length; + port_cparams[i].iv.phys_addr = options->iv.phys_addr; + if (!options->iv_param) + generate_random_key(port_cparams[i].iv.data, + port_cparams[i].iv.length); + + port_cparams[i].cipher_algo = options->cipher_xform.cipher.algo; + } + + port_cparams[i].session = initialize_crypto_session(options, + port_cparams[i].dev_id); + + if (port_cparams[i].session == NULL) + return; + RTE_LOG(INFO, L2FWD, " -- lcoreid=%u cryptoid=%u\n", lcore_id, + port_cparams[i].dev_id); + } + + l2fwd_crypto_options_print(options); + + /* + * Initialize previous tsc timestamp before the loop, + * to avoid showing the port statistics immediately, + * so user can see the crypto information. + */ + prev_tsc = rte_rdtsc(); + while (1) { + + cur_tsc = rte_rdtsc(); + + /* + * TX burst queue drain + */ + diff_tsc = cur_tsc - prev_tsc; + if (unlikely(diff_tsc > drain_tsc)) { + for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) { + if (qconf->pkt_buf[portid].len == 0) + continue; + l2fwd_send_burst(&lcore_queue_conf[lcore_id], + qconf->pkt_buf[portid].len, + (uint8_t) portid); + qconf->pkt_buf[portid].len = 0; + } + + /* if timer is enabled */ + if (timer_period > 0) { + + /* advance the timer */ + timer_tsc += diff_tsc; + + /* if timer has reached its timeout */ + if (unlikely(timer_tsc >= + (uint64_t)timer_period)) { + + /* do this only on master core */ + if (lcore_id == rte_get_master_lcore() + && options->refresh_period) { + print_stats(); + timer_tsc = 0; + } + } + } + + prev_tsc = cur_tsc; + } + + /* + * Read packet from RX queues + */ + for (i = 0; i < qconf->nb_rx_ports; i++) { + portid = qconf->rx_port_list[i]; + + cparams = &port_cparams[i]; + + nb_rx = rte_eth_rx_burst((uint8_t) portid, 0, + pkts_burst, MAX_PKT_BURST); + + port_statistics[portid].rx += nb_rx; + + if (nb_rx) { + /* + * If we can't allocate a crypto_ops, then drop + * the rest of the burst and dequeue and + * process the packets to free offload structs + */ + if (rte_crypto_op_bulk_alloc( + l2fwd_crypto_op_pool, + RTE_CRYPTO_OP_TYPE_SYMMETRIC, + ops_burst, nb_rx) != + nb_rx) { + for (j = 0; j < nb_rx; j++) + rte_pktmbuf_free(pkts_burst[i]); + + nb_rx = 0; + } + + /* Enqueue packets from Crypto device*/ + for (j = 0; j < nb_rx; j++) { + m = pkts_burst[j]; + + l2fwd_simple_crypto_enqueue(m, + ops_burst[j], cparams); + } + } + + /* Dequeue packets from Crypto device */ + do { + nb_rx = rte_cryptodev_dequeue_burst( + cparams->dev_id, cparams->qp_id, + ops_burst, MAX_PKT_BURST); + + crypto_statistics[cparams->dev_id].dequeued += + nb_rx; + + /* Forward crypto'd packets */ + for (j = 0; j < nb_rx; j++) { + m = ops_burst[j]->sym->m_src; + + rte_crypto_op_free(ops_burst[j]); + l2fwd_simple_forward(m, portid); + } + } while (nb_rx == MAX_PKT_BURST); + } + } +} + +static int +l2fwd_launch_one_lcore(void *arg) +{ + l2fwd_main_loop((struct l2fwd_crypto_options *)arg); + return 0; +} + +/* Display command line arguments usage */ +static void +l2fwd_crypto_usage(const char *prgname) +{ + printf("%s [EAL options] --\n" + " -p PORTMASK: hexadecimal bitmask of ports to configure\n" + " -q NQ: number of queue (=ports) per lcore (default is 1)\n" + " -s manage all ports from single lcore\n" + " -T PERIOD: statistics will be refreshed each PERIOD seconds" + " (0 to disable, 10 default, 86400 maximum)\n" + + " --cdev_type HW / SW / ANY\n" + " --chain HASH_CIPHER / CIPHER_HASH\n" + + " --cipher_algo ALGO\n" + " --cipher_op ENCRYPT / DECRYPT\n" + " --cipher_key KEY (bytes separated with \":\")\n" + " --cipher_key_random_size SIZE: size of cipher key when generated randomly\n" + " --iv IV (bytes separated with \":\")\n" + " --iv_random_size SIZE: size of IV when generated randomly\n" + + " --auth_algo ALGO\n" + " --auth_op GENERATE / VERIFY\n" + " --auth_key KEY (bytes separated with \":\")\n" + " --auth_key_random_size SIZE: size of auth key when generated randomly\n" + " --aad AAD (bytes separated with \":\")\n" + " --aad_random_size SIZE: size of AAD when generated randomly\n" + " --digest_size SIZE: size of digest to be generated/verified\n" + + " --sessionless\n", + prgname); +} + +/** Parse crypto device type command line argument */ +static int +parse_cryptodev_type(enum cdev_type *type, char *optarg) +{ + if (strcmp("HW", optarg) == 0) { + *type = CDEV_TYPE_HW; + return 0; + } else if (strcmp("SW", optarg) == 0) { + *type = CDEV_TYPE_SW; + return 0; + } else if (strcmp("ANY", optarg) == 0) { + *type = CDEV_TYPE_ANY; + return 0; + } + + return -1; +} + +/** Parse crypto chain xform command line argument */ +static int +parse_crypto_opt_chain(struct l2fwd_crypto_options *options, char *optarg) +{ + if (strcmp("CIPHER_HASH", optarg) == 0) { + options->xform_chain = L2FWD_CRYPTO_CIPHER_HASH; + return 0; + } else if (strcmp("HASH_CIPHER", optarg) == 0) { + options->xform_chain = L2FWD_CRYPTO_HASH_CIPHER; + return 0; + } else if (strcmp("CIPHER_ONLY", optarg) == 0) { + options->xform_chain = L2FWD_CRYPTO_CIPHER_ONLY; + return 0; + } else if (strcmp("HASH_ONLY", optarg) == 0) { + options->xform_chain = L2FWD_CRYPTO_HASH_ONLY; + return 0; + } + + return -1; +} + +/** Parse crypto cipher algo option command line argument */ +static int +parse_cipher_algo(enum rte_crypto_cipher_algorithm *algo, char *optarg) +{ + unsigned i; + + for (i = 0; i < RTE_CRYPTO_CIPHER_LIST_END; i++) { + if (!strcmp(supported_cipher_algo[i], optarg)) { + *algo = (enum rte_crypto_cipher_algorithm)i; + return 0; + } + } + + printf("Cipher algorithm not supported!\n"); + return -1; +} + +/** Parse crypto cipher operation command line argument */ +static int +parse_cipher_op(enum rte_crypto_cipher_operation *op, char *optarg) +{ + if (strcmp("ENCRYPT", optarg) == 0) { + *op = RTE_CRYPTO_CIPHER_OP_ENCRYPT; + return 0; + } else if (strcmp("DECRYPT", optarg) == 0) { + *op = RTE_CRYPTO_CIPHER_OP_DECRYPT; + return 0; + } + + printf("Cipher operation not supported!\n"); + return -1; +} + +/** Parse crypto key command line argument */ +static int +parse_key(uint8_t *data, char *input_arg) +{ + unsigned byte_count; + char *token; + + for (byte_count = 0, token = strtok(input_arg, ":"); + (byte_count < MAX_KEY_SIZE) && (token != NULL); + token = strtok(NULL, ":")) { + + int number = (int)strtol(token, NULL, 16); + + if (errno == EINVAL || errno == ERANGE || number > 0xFF) + return -1; + + data[byte_count++] = (uint8_t)number; + } + + return byte_count; +} + +/** Parse size param*/ +static int +parse_size(int *size, const char *q_arg) +{ + char *end = NULL; + unsigned long n; + + /* parse hexadecimal string */ + n = strtoul(q_arg, &end, 10); + if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0')) + n = 0; + + if (n == 0) { + printf("invalid size\n"); + return -1; + } + + *size = n; + return 0; +} + +/** Parse crypto cipher operation command line argument */ +static int +parse_auth_algo(enum rte_crypto_auth_algorithm *algo, char *optarg) +{ + unsigned i; + + for (i = 0; i < RTE_CRYPTO_AUTH_LIST_END; i++) { + if (!strcmp(supported_auth_algo[i], optarg)) { + *algo = (enum rte_crypto_auth_algorithm)i; + return 0; + } + } + + printf("Authentication algorithm specified not supported!\n"); + return -1; +} + +static int +parse_auth_op(enum rte_crypto_auth_operation *op, char *optarg) +{ + if (strcmp("VERIFY", optarg) == 0) { + *op = RTE_CRYPTO_AUTH_OP_VERIFY; + return 0; + } else if (strcmp("GENERATE", optarg) == 0) { + *op = RTE_CRYPTO_AUTH_OP_GENERATE; + return 0; + } + + printf("Authentication operation specified not supported!\n"); + return -1; +} + +/** Parse long options */ +static int +l2fwd_crypto_parse_args_long_options(struct l2fwd_crypto_options *options, + struct option *lgopts, int option_index) +{ + int retval; + + if (strcmp(lgopts[option_index].name, "cdev_type") == 0) { + retval = parse_cryptodev_type(&options->type, optarg); + if (retval == 0) + snprintf(options->string_type, MAX_STR_LEN, + "%s", optarg); + return retval; + } + + else if (strcmp(lgopts[option_index].name, "chain") == 0) + return parse_crypto_opt_chain(options, optarg); + + /* Cipher options */ + else if (strcmp(lgopts[option_index].name, "cipher_algo") == 0) + return parse_cipher_algo(&options->cipher_xform.cipher.algo, + optarg); + + else if (strcmp(lgopts[option_index].name, "cipher_op") == 0) + return parse_cipher_op(&options->cipher_xform.cipher.op, + optarg); + + else if (strcmp(lgopts[option_index].name, "cipher_key") == 0) { + options->ckey_param = 1; + options->cipher_xform.cipher.key.length = + parse_key(options->cipher_xform.cipher.key.data, optarg); + if (options->cipher_xform.cipher.key.length > 0) + return 0; + else + return -1; + } + + else if (strcmp(lgopts[option_index].name, "cipher_key_random_size") == 0) + return parse_size(&options->ckey_random_size, optarg); + + else if (strcmp(lgopts[option_index].name, "iv") == 0) { + options->iv_param = 1; + options->iv.length = + parse_key(options->iv.data, optarg); + if (options->iv.length > 0) + return 0; + else + return -1; + } + + else if (strcmp(lgopts[option_index].name, "iv_random_size") == 0) + return parse_size(&options->iv_random_size, optarg); + + /* Authentication options */ + else if (strcmp(lgopts[option_index].name, "auth_algo") == 0) { + return parse_auth_algo(&options->auth_xform.auth.algo, + optarg); + } + + else if (strcmp(lgopts[option_index].name, "auth_op") == 0) + return parse_auth_op(&options->auth_xform.auth.op, + optarg); + + else if (strcmp(lgopts[option_index].name, "auth_key") == 0) { + options->akey_param = 1; + options->auth_xform.auth.key.length = + parse_key(options->auth_xform.auth.key.data, optarg); + if (options->auth_xform.auth.key.length > 0) + return 0; + else + return -1; + } + + else if (strcmp(lgopts[option_index].name, "auth_key_random_size") == 0) { + return parse_size(&options->akey_random_size, optarg); + } + + else if (strcmp(lgopts[option_index].name, "aad") == 0) { + options->aad_param = 1; + options->aad.length = + parse_key(options->aad.data, optarg); + if (options->aad.length > 0) + return 0; + else + return -1; + } + + else if (strcmp(lgopts[option_index].name, "aad_random_size") == 0) { + return parse_size(&options->aad_random_size, optarg); + } + + else if (strcmp(lgopts[option_index].name, "digest_size") == 0) { + return parse_size(&options->digest_size, optarg); + } + + else if (strcmp(lgopts[option_index].name, "sessionless") == 0) { + options->sessionless = 1; + return 0; + } + + return -1; +} + +/** Parse port mask */ +static int +l2fwd_crypto_parse_portmask(struct l2fwd_crypto_options *options, + const char *q_arg) +{ + char *end = NULL; + unsigned long pm; + + /* parse hexadecimal string */ + pm = strtoul(q_arg, &end, 16); + if ((pm == '\0') || (end == NULL) || (*end != '\0')) + pm = 0; + + options->portmask = pm; + if (options->portmask == 0) { + printf("invalid portmask specified\n"); + return -1; + } + + return pm; +} + +/** Parse number of queues */ +static int +l2fwd_crypto_parse_nqueue(struct l2fwd_crypto_options *options, + const char *q_arg) +{ + char *end = NULL; + unsigned long n; + + /* parse hexadecimal string */ + n = strtoul(q_arg, &end, 10); + if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0')) + n = 0; + else if (n >= MAX_RX_QUEUE_PER_LCORE) + n = 0; + + options->nb_ports_per_lcore = n; + if (options->nb_ports_per_lcore == 0) { + printf("invalid number of ports selected\n"); + return -1; + } + + return 0; +} + +/** Parse timer period */ +static int +l2fwd_crypto_parse_timer_period(struct l2fwd_crypto_options *options, + const char *q_arg) +{ + char *end = NULL; + unsigned long n; + + /* parse number string */ + n = (unsigned)strtol(q_arg, &end, 10); + if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0')) + n = 0; + + if (n >= MAX_TIMER_PERIOD) { + printf("Warning refresh period specified %lu is greater than " + "max value %lu! using max value", + n, MAX_TIMER_PERIOD); + n = MAX_TIMER_PERIOD; + } + + options->refresh_period = n * 1000 * TIMER_MILLISECOND; + + return 0; +} + +/** Generate default options for application */ +static void +l2fwd_crypto_default_options(struct l2fwd_crypto_options *options) +{ + srand(time(NULL)); + + options->portmask = 0xffffffff; + options->nb_ports_per_lcore = 1; + options->refresh_period = 10000; + options->single_lcore = 0; + options->sessionless = 0; + + options->xform_chain = L2FWD_CRYPTO_CIPHER_HASH; + + /* Cipher Data */ + options->cipher_xform.type = RTE_CRYPTO_SYM_XFORM_CIPHER; + options->cipher_xform.next = NULL; + options->ckey_param = 0; + options->ckey_random_size = -1; + options->cipher_xform.cipher.key.length = 0; + options->iv_param = 0; + options->iv_random_size = -1; + options->iv.length = 0; + + options->cipher_xform.cipher.algo = RTE_CRYPTO_CIPHER_AES_CBC; + options->cipher_xform.cipher.op = RTE_CRYPTO_CIPHER_OP_ENCRYPT; + + /* Authentication Data */ + options->auth_xform.type = RTE_CRYPTO_SYM_XFORM_AUTH; + options->auth_xform.next = NULL; + options->akey_param = 0; + options->akey_random_size = -1; + options->auth_xform.auth.key.length = 0; + options->aad_param = 0; + options->aad_random_size = -1; + options->aad.length = 0; + options->digest_size = -1; + + options->auth_xform.auth.algo = RTE_CRYPTO_AUTH_SHA1_HMAC; + options->auth_xform.auth.op = RTE_CRYPTO_AUTH_OP_GENERATE; + + options->type = CDEV_TYPE_ANY; +} + +static void +display_cipher_info(struct l2fwd_crypto_options *options) +{ + printf("\n---- Cipher information ---\n"); + printf("Algorithm: %s\n", + supported_cipher_algo[options->cipher_xform.cipher.algo]); + rte_hexdump(stdout, "Cipher key:", + options->cipher_xform.cipher.key.data, + options->cipher_xform.cipher.key.length); + rte_hexdump(stdout, "IV:", options->iv.data, options->iv.length); +} + +static void +display_auth_info(struct l2fwd_crypto_options *options) +{ + printf("\n---- Authentication information ---\n"); + printf("Algorithm: %s\n", + supported_auth_algo[options->auth_xform.auth.algo]); + rte_hexdump(stdout, "Auth key:", + options->auth_xform.auth.key.data, + options->auth_xform.auth.key.length); + rte_hexdump(stdout, "AAD:", options->aad.data, options->aad.length); +} + +static void +l2fwd_crypto_options_print(struct l2fwd_crypto_options *options) +{ + char string_cipher_op[MAX_STR_LEN]; + char string_auth_op[MAX_STR_LEN]; + + if (options->cipher_xform.cipher.op == RTE_CRYPTO_CIPHER_OP_ENCRYPT) + strcpy(string_cipher_op, "Encrypt"); + else + strcpy(string_cipher_op, "Decrypt"); + + if (options->auth_xform.auth.op == RTE_CRYPTO_AUTH_OP_GENERATE) + strcpy(string_auth_op, "Auth generate"); + else + strcpy(string_auth_op, "Auth verify"); + + printf("Options:-\nn"); + printf("portmask: %x\n", options->portmask); + printf("ports per lcore: %u\n", options->nb_ports_per_lcore); + printf("refresh period : %u\n", options->refresh_period); + printf("single lcore mode: %s\n", + options->single_lcore ? "enabled" : "disabled"); + printf("stats_printing: %s\n", + options->refresh_period == 0 ? "disabled" : "enabled"); + + printf("sessionless crypto: %s\n", + options->sessionless ? "enabled" : "disabled"); + + if (options->ckey_param && (options->ckey_random_size != -1)) + printf("Cipher key already parsed, ignoring size of random key\n"); + + if (options->akey_param && (options->akey_random_size != -1)) + printf("Auth key already parsed, ignoring size of random key\n"); + + if (options->iv_param && (options->iv_random_size != -1)) + printf("IV already parsed, ignoring size of random IV\n"); + + if (options->aad_param && (options->aad_random_size != -1)) + printf("AAD already parsed, ignoring size of random AAD\n"); + + printf("\nCrypto chain: "); + switch (options->xform_chain) { + case L2FWD_CRYPTO_CIPHER_HASH: + printf("Input --> %s --> %s --> Output\n", + string_cipher_op, string_auth_op); + display_cipher_info(options); + display_auth_info(options); + break; + case L2FWD_CRYPTO_HASH_CIPHER: + printf("Input --> %s --> %s --> Output\n", + string_auth_op, string_cipher_op); + display_cipher_info(options); + display_auth_info(options); + break; + case L2FWD_CRYPTO_HASH_ONLY: + printf("Input --> %s --> Output\n", string_auth_op); + display_auth_info(options); + break; + case L2FWD_CRYPTO_CIPHER_ONLY: + printf("Input --> %s --> Output\n", string_cipher_op); + display_cipher_info(options); + break; + } +} + +/* Parse the argument given in the command line of the application */ +static int +l2fwd_crypto_parse_args(struct l2fwd_crypto_options *options, + int argc, char **argv) +{ + int opt, retval, option_index; + char **argvopt = argv, *prgname = argv[0]; + + static struct option lgopts[] = { + { "sessionless", no_argument, 0, 0 }, + + { "cdev_type", required_argument, 0, 0 }, + { "chain", required_argument, 0, 0 }, + + { "cipher_algo", required_argument, 0, 0 }, + { "cipher_op", required_argument, 0, 0 }, + { "cipher_key", required_argument, 0, 0 }, + { "cipher_key_random_size", required_argument, 0, 0 }, + + { "auth_algo", required_argument, 0, 0 }, + { "auth_op", required_argument, 0, 0 }, + { "auth_key", required_argument, 0, 0 }, + { "auth_key_random_size", required_argument, 0, 0 }, + + { "iv", required_argument, 0, 0 }, + { "iv_random_size", required_argument, 0, 0 }, + { "aad", required_argument, 0, 0 }, + { "aad_random_size", required_argument, 0, 0 }, + { "digest_size", required_argument, 0, 0 }, + + { "sessionless", no_argument, 0, 0 }, + + { NULL, 0, 0, 0 } + }; + + l2fwd_crypto_default_options(options); + + while ((opt = getopt_long(argc, argvopt, "p:q:st:", lgopts, + &option_index)) != EOF) { + switch (opt) { + /* long options */ + case 0: + retval = l2fwd_crypto_parse_args_long_options(options, + lgopts, option_index); + if (retval < 0) { + l2fwd_crypto_usage(prgname); + return -1; + } + break; + + /* portmask */ + case 'p': + retval = l2fwd_crypto_parse_portmask(options, optarg); + if (retval < 0) { + l2fwd_crypto_usage(prgname); + return -1; + } + break; + + /* nqueue */ + case 'q': + retval = l2fwd_crypto_parse_nqueue(options, optarg); + if (retval < 0) { + l2fwd_crypto_usage(prgname); + return -1; + } + break; + + /* single */ + case 's': + options->single_lcore = 1; + + break; + + /* timer period */ + case 'T': + retval = l2fwd_crypto_parse_timer_period(options, + optarg); + if (retval < 0) { + l2fwd_crypto_usage(prgname); + return -1; + } + break; + + default: + l2fwd_crypto_usage(prgname); + return -1; + } + } + + + if (optind >= 0) + argv[optind-1] = prgname; + + retval = optind-1; + optind = 0; /* reset getopt lib */ + + return retval; +} + +/* Check the link status of all ports in up to 9s, and print them finally */ +static void +check_all_ports_link_status(uint8_t port_num, uint32_t port_mask) +{ +#define CHECK_INTERVAL 100 /* 100ms */ +#define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */ + uint8_t portid, count, all_ports_up, print_flag = 0; + struct rte_eth_link link; + + printf("\nChecking link status"); + fflush(stdout); + for (count = 0; count <= MAX_CHECK_TIME; count++) { + all_ports_up = 1; + for (portid = 0; portid < port_num; portid++) { + if ((port_mask & (1 << portid)) == 0) + continue; + memset(&link, 0, sizeof(link)); + rte_eth_link_get_nowait(portid, &link); + /* print link status if flag set */ + if (print_flag == 1) { + if (link.link_status) + printf("Port %d Link Up - speed %u " + "Mbps - %s\n", (uint8_t)portid, + (unsigned)link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? + ("full-duplex") : ("half-duplex\n")); + else + printf("Port %d Link Down\n", + (uint8_t)portid); + continue; + } + /* clear all_ports_up flag if any link down */ + if (link.link_status == ETH_LINK_DOWN) { + all_ports_up = 0; + break; + } + } + /* after finally printing all link status, get out */ + if (print_flag == 1) + break; + + if (all_ports_up == 0) { + printf("."); + fflush(stdout); + rte_delay_ms(CHECK_INTERVAL); + } + + /* set the print_flag if all ports up or timeout */ + if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) { + print_flag = 1; + printf("done\n"); + } + } +} + +/* Check if device has to be HW/SW or any */ +static int +check_type(struct l2fwd_crypto_options *options, struct rte_cryptodev_info *dev_info) +{ + if (options->type == CDEV_TYPE_HW && + (dev_info->feature_flags & RTE_CRYPTODEV_FF_HW_ACCELERATED)) + return 0; + if (options->type == CDEV_TYPE_SW && + !(dev_info->feature_flags & RTE_CRYPTODEV_FF_HW_ACCELERATED)) + return 0; + if (options->type == CDEV_TYPE_ANY) + return 0; + + return -1; +} + +static inline int +check_supported_size(uint16_t length, uint16_t min, uint16_t max, + uint16_t increment) +{ + uint16_t supp_size; + + for (supp_size = min; supp_size <= max; supp_size += increment) { + if (length == supp_size) + return 0; + } + + return -1; +} +static int +initialize_cryptodevs(struct l2fwd_crypto_options *options, unsigned nb_ports, + uint8_t *enabled_cdevs) +{ + unsigned i, cdev_id, cdev_count, enabled_cdev_count = 0; + const struct rte_cryptodev_capabilities *cap; + enum rte_crypto_auth_algorithm cap_auth_algo; + enum rte_crypto_auth_algorithm opt_auth_algo; + enum rte_crypto_cipher_algorithm cap_cipher_algo; + enum rte_crypto_cipher_algorithm opt_cipher_algo; + int retval; + + cdev_count = rte_cryptodev_count(); + if (cdev_count == 0) { + printf("No crypto devices available\n"); + return -1; + } + + for (cdev_id = 0; cdev_id < cdev_count && enabled_cdev_count < nb_ports; + cdev_id++) { + struct rte_cryptodev_qp_conf qp_conf; + struct rte_cryptodev_info dev_info; + + struct rte_cryptodev_config conf = { + .nb_queue_pairs = 1, + .socket_id = SOCKET_ID_ANY, + .session_mp = { + .nb_objs = 2048, + .cache_size = 64 + } + }; + + rte_cryptodev_info_get(cdev_id, &dev_info); + + /* Set cipher parameters */ + if (options->xform_chain == L2FWD_CRYPTO_CIPHER_HASH || + options->xform_chain == L2FWD_CRYPTO_HASH_CIPHER || + options->xform_chain == L2FWD_CRYPTO_CIPHER_ONLY) { + /* Check if device supports cipher algo */ + i = 0; + opt_cipher_algo = options->cipher_xform.cipher.algo; + cap = &dev_info.capabilities[i]; + while (cap->op != RTE_CRYPTO_OP_TYPE_UNDEFINED) { + cap_cipher_algo = cap->sym.cipher.algo; + if (cap->sym.xform_type == + RTE_CRYPTO_SYM_XFORM_CIPHER) { + if (cap_cipher_algo == opt_cipher_algo) { + if (check_type(options, &dev_info) == 0) + break; + } + } + cap = &dev_info.capabilities[++i]; + } + + if (cap->op == RTE_CRYPTO_OP_TYPE_UNDEFINED) { + printf("Algorithm %s not supported by cryptodev %u" + " or device not of preferred type (%s)\n", + supported_cipher_algo[opt_cipher_algo], + cdev_id, + options->string_type); + continue; + } + + options->block_size = cap->sym.cipher.block_size; + /* + * Check if length of provided IV is supported + * by the algorithm chosen. + */ + if (options->iv_param) { + if (check_supported_size(options->iv.length, + cap->sym.cipher.iv_size.min, + cap->sym.cipher.iv_size.max, + cap->sym.cipher.iv_size.increment) + != 0) { + printf("Unsupported IV length\n"); + return -1; + } + /* + * Check if length of IV to be randomly generated + * is supported by the algorithm chosen. + */ + } else if (options->iv_random_size != -1) { + if (check_supported_size(options->iv_random_size, + cap->sym.cipher.iv_size.min, + cap->sym.cipher.iv_size.max, + cap->sym.cipher.iv_size.increment) + != 0) { + printf("Unsupported IV length\n"); + return -1; + } + options->iv.length = options->iv_random_size; + /* No size provided, use minimum size. */ + } else + options->iv.length = cap->sym.cipher.iv_size.min; + + /* + * Check if length of provided cipher key is supported + * by the algorithm chosen. + */ + if (options->ckey_param) { + if (check_supported_size( + options->cipher_xform.cipher.key.length, + cap->sym.cipher.key_size.min, + cap->sym.cipher.key_size.max, + cap->sym.cipher.key_size.increment) + != 0) { + printf("Unsupported cipher key length\n"); + return -1; + } + /* + * Check if length of the cipher key to be randomly generated + * is supported by the algorithm chosen. + */ + } else if (options->ckey_random_size != -1) { + if (check_supported_size(options->ckey_random_size, + cap->sym.cipher.key_size.min, + cap->sym.cipher.key_size.max, + cap->sym.cipher.key_size.increment) + != 0) { + printf("Unsupported cipher key length\n"); + return -1; + } + options->cipher_xform.cipher.key.length = + options->ckey_random_size; + /* No size provided, use minimum size. */ + } else + options->cipher_xform.cipher.key.length = + cap->sym.cipher.key_size.min; + + if (!options->ckey_param) + generate_random_key( + options->cipher_xform.cipher.key.data, + options->cipher_xform.cipher.key.length); + + } + + /* Set auth parameters */ + if (options->xform_chain == L2FWD_CRYPTO_CIPHER_HASH || + options->xform_chain == L2FWD_CRYPTO_HASH_CIPHER || + options->xform_chain == L2FWD_CRYPTO_HASH_ONLY) { + /* Check if device supports auth algo */ + i = 0; + opt_auth_algo = options->auth_xform.auth.algo; + cap = &dev_info.capabilities[i]; + while (cap->op != RTE_CRYPTO_OP_TYPE_UNDEFINED) { + cap_auth_algo = cap->sym.auth.algo; + if ((cap->sym.xform_type == RTE_CRYPTO_SYM_XFORM_AUTH) && + (cap_auth_algo == opt_auth_algo) && + (check_type(options, &dev_info) == 0)) { + break; + } + cap = &dev_info.capabilities[++i]; + } + + if (cap->op == RTE_CRYPTO_OP_TYPE_UNDEFINED) { + printf("Algorithm %s not supported by cryptodev %u" + " or device not of preferred type (%s)\n", + supported_auth_algo[opt_auth_algo], + cdev_id, + options->string_type); + continue; + } + + options->block_size = cap->sym.auth.block_size; + /* + * Check if length of provided AAD is supported + * by the algorithm chosen. + */ + if (options->aad_param) { + if (check_supported_size(options->aad.length, + cap->sym.auth.aad_size.min, + cap->sym.auth.aad_size.max, + cap->sym.auth.aad_size.increment) + != 0) { + printf("Unsupported AAD length\n"); + return -1; + } + /* + * Check if length of AAD to be randomly generated + * is supported by the algorithm chosen. + */ + } else if (options->aad_random_size != -1) { + if (check_supported_size(options->aad_random_size, + cap->sym.auth.aad_size.min, + cap->sym.auth.aad_size.max, + cap->sym.auth.aad_size.increment) + != 0) { + printf("Unsupported AAD length\n"); + return -1; + } + options->aad.length = options->aad_random_size; + /* No size provided, use minimum size. */ + } else + options->aad.length = cap->sym.auth.aad_size.min; + + options->auth_xform.auth.add_auth_data_length = + options->aad.length; + + /* + * Check if length of provided auth key is supported + * by the algorithm chosen. + */ + if (options->akey_param) { + if (check_supported_size( + options->auth_xform.auth.key.length, + cap->sym.auth.key_size.min, + cap->sym.auth.key_size.max, + cap->sym.auth.key_size.increment) + != 0) { + printf("Unsupported auth key length\n"); + return -1; + } + /* + * Check if length of the auth key to be randomly generated + * is supported by the algorithm chosen. + */ + } else if (options->akey_random_size != -1) { + if (check_supported_size(options->akey_random_size, + cap->sym.auth.key_size.min, + cap->sym.auth.key_size.max, + cap->sym.auth.key_size.increment) + != 0) { + printf("Unsupported auth key length\n"); + return -1; + } + options->auth_xform.auth.key.length = + options->akey_random_size; + /* No size provided, use minimum size. */ + } else + options->auth_xform.auth.key.length = + cap->sym.auth.key_size.min; + + if (!options->akey_param) + generate_random_key( + options->auth_xform.auth.key.data, + options->auth_xform.auth.key.length); + + /* Check if digest size is supported by the algorithm. */ + if (options->digest_size != -1) { + if (check_supported_size(options->digest_size, + cap->sym.auth.digest_size.min, + cap->sym.auth.digest_size.max, + cap->sym.auth.digest_size.increment) + != 0) { + printf("Unsupported digest length\n"); + return -1; + } + options->auth_xform.auth.digest_length = + options->digest_size; + /* No size provided, use minimum size. */ + } else + options->auth_xform.auth.digest_length = + cap->sym.auth.digest_size.min; + } + + retval = rte_cryptodev_configure(cdev_id, &conf); + if (retval < 0) { + printf("Failed to configure cryptodev %u", cdev_id); + return -1; + } + + qp_conf.nb_descriptors = 2048; + + retval = rte_cryptodev_queue_pair_setup(cdev_id, 0, &qp_conf, + SOCKET_ID_ANY); + if (retval < 0) { + printf("Failed to setup queue pair %u on cryptodev %u", + 0, cdev_id); + return -1; + } + + l2fwd_enabled_crypto_mask |= (1 << cdev_id); + + enabled_cdevs[cdev_id] = 1; + enabled_cdev_count++; + } + + return enabled_cdev_count; +} + +static int +initialize_ports(struct l2fwd_crypto_options *options) +{ + uint8_t last_portid, portid; + unsigned enabled_portcount = 0; + unsigned nb_ports = rte_eth_dev_count(); + + if (nb_ports == 0) { + printf("No Ethernet ports - bye\n"); + return -1; + } + + if (nb_ports > RTE_MAX_ETHPORTS) + nb_ports = RTE_MAX_ETHPORTS; + + /* Reset l2fwd_dst_ports */ + for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) + l2fwd_dst_ports[portid] = 0; + + for (last_portid = 0, portid = 0; portid < nb_ports; portid++) { + int retval; + + /* Skip ports that are not enabled */ + if ((options->portmask & (1 << portid)) == 0) + continue; + + /* init port */ + printf("Initializing port %u... ", (unsigned) portid); + fflush(stdout); + retval = rte_eth_dev_configure(portid, 1, 1, &port_conf); + if (retval < 0) { + printf("Cannot configure device: err=%d, port=%u\n", + retval, (unsigned) portid); + return -1; + } + + /* init one RX queue */ + fflush(stdout); + retval = rte_eth_rx_queue_setup(portid, 0, nb_rxd, + rte_eth_dev_socket_id(portid), + NULL, l2fwd_pktmbuf_pool); + if (retval < 0) { + printf("rte_eth_rx_queue_setup:err=%d, port=%u\n", + retval, (unsigned) portid); + return -1; + } + + /* init one TX queue on each port */ + fflush(stdout); + retval = rte_eth_tx_queue_setup(portid, 0, nb_txd, + rte_eth_dev_socket_id(portid), + NULL); + if (retval < 0) { + printf("rte_eth_tx_queue_setup:err=%d, port=%u\n", + retval, (unsigned) portid); + + return -1; + } + + /* Start device */ + retval = rte_eth_dev_start(portid); + if (retval < 0) { + printf("rte_eth_dev_start:err=%d, port=%u\n", + retval, (unsigned) portid); + return -1; + } + + rte_eth_promiscuous_enable(portid); + + rte_eth_macaddr_get(portid, &l2fwd_ports_eth_addr[portid]); + + printf("Port %u, MAC address: %02X:%02X:%02X:%02X:%02X:%02X\n\n", + (unsigned) portid, + l2fwd_ports_eth_addr[portid].addr_bytes[0], + l2fwd_ports_eth_addr[portid].addr_bytes[1], + l2fwd_ports_eth_addr[portid].addr_bytes[2], + l2fwd_ports_eth_addr[portid].addr_bytes[3], + l2fwd_ports_eth_addr[portid].addr_bytes[4], + l2fwd_ports_eth_addr[portid].addr_bytes[5]); + + /* initialize port stats */ + memset(&port_statistics, 0, sizeof(port_statistics)); + + /* Setup port forwarding table */ + if (enabled_portcount % 2) { + l2fwd_dst_ports[portid] = last_portid; + l2fwd_dst_ports[last_portid] = portid; + } else { + last_portid = portid; + } + + l2fwd_enabled_port_mask |= (1 << portid); + enabled_portcount++; + } + + if (enabled_portcount == 1) { + l2fwd_dst_ports[last_portid] = last_portid; + } else if (enabled_portcount % 2) { + printf("odd number of ports in portmask- bye\n"); + return -1; + } + + check_all_ports_link_status(nb_ports, l2fwd_enabled_port_mask); + + return enabled_portcount; +} + +static void +reserve_key_memory(struct l2fwd_crypto_options *options) +{ + options->cipher_xform.cipher.key.data = rte_malloc("crypto key", + MAX_KEY_SIZE, 0); + if (options->cipher_xform.cipher.key.data == NULL) + rte_exit(EXIT_FAILURE, "Failed to allocate memory for cipher key"); + + + options->auth_xform.auth.key.data = rte_malloc("auth key", + MAX_KEY_SIZE, 0); + if (options->auth_xform.auth.key.data == NULL) + rte_exit(EXIT_FAILURE, "Failed to allocate memory for auth key"); + + options->iv.data = rte_malloc("iv", MAX_KEY_SIZE, 0); + if (options->iv.data == NULL) + rte_exit(EXIT_FAILURE, "Failed to allocate memory for IV"); + options->iv.phys_addr = rte_malloc_virt2phy(options->iv.data); + + options->aad.data = rte_malloc("aad", MAX_KEY_SIZE, 0); + if (options->aad.data == NULL) + rte_exit(EXIT_FAILURE, "Failed to allocate memory for AAD"); + options->aad.phys_addr = rte_malloc_virt2phy(options->aad.data); +} + +int +main(int argc, char **argv) +{ + struct lcore_queue_conf *qconf; + struct l2fwd_crypto_options options; + + uint8_t nb_ports, nb_cryptodevs, portid, cdev_id; + unsigned lcore_id, rx_lcore_id; + int ret, enabled_cdevcount, enabled_portcount; + uint8_t enabled_cdevs[RTE_CRYPTO_MAX_DEVS] = {0}; + + /* init EAL */ + ret = rte_eal_init(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Invalid EAL arguments\n"); + argc -= ret; + argv += ret; + + /* reserve memory for Cipher/Auth key and IV */ + reserve_key_memory(&options); + + /* fill out the supported algorithm tables */ + fill_supported_algorithm_tables(); + + /* parse application arguments (after the EAL ones) */ + ret = l2fwd_crypto_parse_args(&options, argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Invalid L2FWD-CRYPTO arguments\n"); + + /* create the mbuf pool */ + l2fwd_pktmbuf_pool = rte_pktmbuf_pool_create("mbuf_pool", NB_MBUF, 512, + sizeof(struct rte_crypto_op), + RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id()); + if (l2fwd_pktmbuf_pool == NULL) + rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n"); + + /* create crypto op pool */ + l2fwd_crypto_op_pool = rte_crypto_op_pool_create("crypto_op_pool", + RTE_CRYPTO_OP_TYPE_SYMMETRIC, NB_MBUF, 128, 0, + rte_socket_id()); + if (l2fwd_crypto_op_pool == NULL) + rte_exit(EXIT_FAILURE, "Cannot create crypto op pool\n"); + + /* Enable Ethernet ports */ + enabled_portcount = initialize_ports(&options); + if (enabled_portcount < 1) + rte_exit(EXIT_FAILURE, "Failed to initial Ethernet ports\n"); + + nb_ports = rte_eth_dev_count(); + /* Initialize the port/queue configuration of each logical core */ + for (rx_lcore_id = 0, qconf = NULL, portid = 0; + portid < nb_ports; portid++) { + + /* skip ports that are not enabled */ + if ((options.portmask & (1 << portid)) == 0) + continue; + + if (options.single_lcore && qconf == NULL) { + while (rte_lcore_is_enabled(rx_lcore_id) == 0) { + rx_lcore_id++; + if (rx_lcore_id >= RTE_MAX_LCORE) + rte_exit(EXIT_FAILURE, + "Not enough cores\n"); + } + } else if (!options.single_lcore) { + /* get the lcore_id for this port */ + while (rte_lcore_is_enabled(rx_lcore_id) == 0 || + lcore_queue_conf[rx_lcore_id].nb_rx_ports == + options.nb_ports_per_lcore) { + rx_lcore_id++; + if (rx_lcore_id >= RTE_MAX_LCORE) + rte_exit(EXIT_FAILURE, + "Not enough cores\n"); + } + } + + /* Assigned a new logical core in the loop above. */ + if (qconf != &lcore_queue_conf[rx_lcore_id]) + qconf = &lcore_queue_conf[rx_lcore_id]; + + qconf->rx_port_list[qconf->nb_rx_ports] = portid; + qconf->nb_rx_ports++; + + printf("Lcore %u: RX port %u\n", rx_lcore_id, (unsigned)portid); + } + + /* Enable Crypto devices */ + enabled_cdevcount = initialize_cryptodevs(&options, enabled_portcount, + enabled_cdevs); + if (enabled_cdevcount < 0) + rte_exit(EXIT_FAILURE, "Failed to initialize crypto devices\n"); + + if (enabled_cdevcount < enabled_portcount) + rte_exit(EXIT_FAILURE, "Number of capable crypto devices (%d) " + "has to be more or equal to number of ports (%d)\n", + enabled_cdevcount, enabled_portcount); + + nb_cryptodevs = rte_cryptodev_count(); + + /* Initialize the port/cryptodev configuration of each logical core */ + for (rx_lcore_id = 0, qconf = NULL, cdev_id = 0; + cdev_id < nb_cryptodevs && enabled_cdevcount; + cdev_id++) { + /* Crypto op not supported by crypto device */ + if (!enabled_cdevs[cdev_id]) + continue; + + if (options.single_lcore && qconf == NULL) { + while (rte_lcore_is_enabled(rx_lcore_id) == 0) { + rx_lcore_id++; + if (rx_lcore_id >= RTE_MAX_LCORE) + rte_exit(EXIT_FAILURE, + "Not enough cores\n"); + } + } else if (!options.single_lcore) { + /* get the lcore_id for this port */ + while (rte_lcore_is_enabled(rx_lcore_id) == 0 || + lcore_queue_conf[rx_lcore_id].nb_crypto_devs == + options.nb_ports_per_lcore) { + rx_lcore_id++; + if (rx_lcore_id >= RTE_MAX_LCORE) + rte_exit(EXIT_FAILURE, + "Not enough cores\n"); + } + } + + /* Assigned a new logical core in the loop above. */ + if (qconf != &lcore_queue_conf[rx_lcore_id]) + qconf = &lcore_queue_conf[rx_lcore_id]; + + qconf->cryptodev_list[qconf->nb_crypto_devs] = cdev_id; + qconf->nb_crypto_devs++; + + enabled_cdevcount--; + + printf("Lcore %u: cryptodev %u\n", rx_lcore_id, + (unsigned)cdev_id); + } + + /* launch per-lcore init on every lcore */ + rte_eal_mp_remote_launch(l2fwd_launch_one_lcore, (void *)&options, + CALL_MASTER); + RTE_LCORE_FOREACH_SLAVE(lcore_id) { + if (rte_eal_wait_lcore(lcore_id) < 0) + return -1; + } + + return 0; +} diff --git a/examples/l2fwd-ivshmem/Makefile b/examples/l2fwd-ivshmem/Makefile new file mode 100644 index 00000000..5f1d1728 --- /dev/null +++ b/examples/l2fwd-ivshmem/Makefile @@ -0,0 +1,43 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overriden by command line or environment +RTE_TARGET ?= x86_64-ivshmem-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +DIRS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += host guest + +include $(RTE_SDK)/mk/rte.extsubdir.mk diff --git a/examples/l2fwd-ivshmem/guest/Makefile b/examples/l2fwd-ivshmem/guest/Makefile new file mode 100644 index 00000000..3ca73b43 --- /dev/null +++ b/examples/l2fwd-ivshmem/guest/Makefile @@ -0,0 +1,50 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overriden by command line or environment +RTE_TARGET ?= x86_64-ivshmem-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +# binary name +APP = guest + +# all source are stored in SRCS-y +SRCS-y := guest.c + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) + +include $(RTE_SDK)/mk/rte.extapp.mk diff --git a/examples/l2fwd-ivshmem/guest/guest.c b/examples/l2fwd-ivshmem/guest/guest.c new file mode 100644 index 00000000..7c49521b --- /dev/null +++ b/examples/l2fwd-ivshmem/guest/guest.c @@ -0,0 +1,452 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <unistd.h> +#include <getopt.h> +#include <signal.h> +#include <sys/mman.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/queue.h> +#include <sys/file.h> +#include <unistd.h> +#include <limits.h> +#include <errno.h> +#include <sys/ioctl.h> +#include <sys/time.h> + +#include <rte_common.h> +#include <rte_eal_memconfig.h> +#include <rte_log.h> +#include <rte_memory.h> +#include <rte_memcpy.h> +#include <rte_memzone.h> +#include <rte_eal.h> +#include <rte_per_lcore.h> +#include <rte_launch.h> +#include <rte_atomic.h> +#include <rte_cycles.h> +#include <rte_prefetch.h> +#include <rte_lcore.h> +#include <rte_per_lcore.h> +#include <rte_branch_prediction.h> +#include <rte_interrupts.h> +#include <rte_pci.h> +#include <rte_random.h> +#include <rte_debug.h> +#include <rte_ether.h> +#include <rte_ethdev.h> +#include <rte_ring.h> +#include <rte_mempool.h> +#include <rte_mbuf.h> +#include <rte_ivshmem.h> + +#include "../include/common.h" + +#define MAX_RX_QUEUE_PER_LCORE 16 +#define MAX_TX_QUEUE_PER_PORT 16 +struct lcore_queue_conf { + unsigned n_rx_port; + unsigned rx_port_list[MAX_RX_QUEUE_PER_LCORE]; + struct mbuf_table rx_mbufs[RTE_MAX_ETHPORTS]; + struct vm_port_param * port_param[MAX_RX_QUEUE_PER_LCORE]; +} __rte_cache_aligned; +static struct lcore_queue_conf lcore_queue_conf[RTE_MAX_LCORE]; + +/* Print out statistics on packets dropped */ +static void +print_stats(void) +{ + uint64_t total_packets_dropped, total_packets_tx, total_packets_rx; + unsigned portid; + + total_packets_dropped = 0; + total_packets_tx = 0; + total_packets_rx = 0; + + const char clr[] = { 27, '[', '2', 'J', '\0' }; + const char topLeft[] = { 27, '[', '1', ';', '1', 'H','\0' }; + + /* Clear screen and move to top left */ + printf("%s%s", clr, topLeft); + + printf("\nPort statistics ===================================="); + + for (portid = 0; portid < ctrl->nb_ports; portid++) { + /* skip ports that are not enabled */ + printf("\nStatistics for port %u ------------------------------" + "\nPackets sent: %24"PRIu64 + "\nPackets received: %20"PRIu64 + "\nPackets dropped: %21"PRIu64, + portid, + ctrl->vm_ports[portid].stats.tx, + ctrl->vm_ports[portid].stats.rx, + ctrl->vm_ports[portid].stats.dropped); + + total_packets_dropped += ctrl->vm_ports[portid].stats.dropped; + total_packets_tx += ctrl->vm_ports[portid].stats.tx; + total_packets_rx += ctrl->vm_ports[portid].stats.rx; + } + printf("\nAggregate statistics ===============================" + "\nTotal packets sent: %18"PRIu64 + "\nTotal packets received: %14"PRIu64 + "\nTotal packets dropped: %15"PRIu64, + total_packets_tx, + total_packets_rx, + total_packets_dropped); + printf("\n====================================================\n"); +} + +/* display usage */ +static void +l2fwd_ivshmem_usage(const char *prgname) +{ + printf("%s [EAL options] -- [-q NQ -T PERIOD]\n" + " -q NQ: number of queue (=ports) per lcore (default is 1)\n" + " -T PERIOD: statistics will be refreshed each PERIOD seconds (0 to disable, 10 default, 86400 maximum)\n", + prgname); +} + +static unsigned int +l2fwd_ivshmem_parse_nqueue(const char *q_arg) +{ + char *end = NULL; + unsigned long n; + + /* parse hexadecimal string */ + n = strtoul(q_arg, &end, 10); + if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0')) + return 0; + if (n == 0) + return 0; + if (n >= MAX_RX_QUEUE_PER_LCORE) + return 0; + + return n; +} + +static int +l2fwd_ivshmem_parse_timer_period(const char *q_arg) +{ + char *end = NULL; + int n; + + /* parse number string */ + n = strtol(q_arg, &end, 10); + if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0')) + return -1; + if (n >= MAX_TIMER_PERIOD) + return -1; + + return n; +} + +/* Parse the argument given in the command line of the application */ +static int +l2fwd_ivshmem_parse_args(int argc, char **argv) +{ + int opt, ret; + char **argvopt; + int option_index; + char *prgname = argv[0]; + static struct option lgopts[] = { + {NULL, 0, 0, 0} + }; + + argvopt = argv; + + while ((opt = getopt_long(argc, argvopt, "q:p:T:", + lgopts, &option_index)) != EOF) { + + switch (opt) { + + /* nqueue */ + case 'q': + l2fwd_ivshmem_rx_queue_per_lcore = l2fwd_ivshmem_parse_nqueue(optarg); + if (l2fwd_ivshmem_rx_queue_per_lcore == 0) { + printf("invalid queue number\n"); + l2fwd_ivshmem_usage(prgname); + return -1; + } + break; + + /* timer period */ + case 'T': + timer_period = l2fwd_ivshmem_parse_timer_period(optarg) * 1000 * TIMER_MILLISECOND; + if (timer_period < 0) { + printf("invalid timer period\n"); + l2fwd_ivshmem_usage(prgname); + return -1; + } + break; + + /* long options */ + case 0: + l2fwd_ivshmem_usage(prgname); + return -1; + + default: + l2fwd_ivshmem_usage(prgname); + return -1; + } + } + + if (optind >= 0) + argv[optind-1] = prgname; + + ret = optind-1; + optind = 0; /* reset getopt lib */ + return ret; +} + +/* + * this loop is getting packets from RX rings of each port, and puts them + * into TX rings of destination ports. + */ +static void +fwd_loop(void) +{ + + struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; + struct rte_mbuf **m_table; + struct rte_mbuf *m; + struct rte_ring *rx, *tx; + unsigned lcore_id, len; + uint64_t prev_tsc, diff_tsc, cur_tsc, timer_tsc; + unsigned i, j, portid, nb_rx; + struct lcore_queue_conf *qconf; + struct ether_hdr *eth; + void *tmp; + + prev_tsc = 0; + timer_tsc = 0; + + lcore_id = rte_lcore_id(); + qconf = &lcore_queue_conf[lcore_id]; + + if (qconf->n_rx_port == 0) { + RTE_LOG(INFO, L2FWD_IVSHMEM, "lcore %u has nothing to do\n", lcore_id); + return; + } + + RTE_LOG(INFO, L2FWD_IVSHMEM, "entering main loop on lcore %u\n", lcore_id); + + for (i = 0; i < qconf->n_rx_port; i++) { + portid = qconf->rx_port_list[i]; + RTE_LOG(INFO, L2FWD_IVSHMEM, " -- lcoreid=%u portid=%u\n", lcore_id, + portid); + } + + while (ctrl->state == STATE_FWD) { + cur_tsc = rte_rdtsc(); + + diff_tsc = cur_tsc - prev_tsc; + + /* + * Read packet from RX queues and send it to TX queues + */ + for (i = 0; i < qconf->n_rx_port; i++) { + + portid = qconf->rx_port_list[i]; + + len = qconf->rx_mbufs[portid].len; + + rx = ctrl->vm_ports[portid].rx_ring; + tx = ctrl->vm_ports[portid].dst->tx_ring; + + m_table = qconf->rx_mbufs[portid].m_table; + + /* if we have something in the queue, try and transmit it down */ + if (len != 0) { + + /* if we succeed in sending the packets down, mark queue as free */ + if (rte_ring_enqueue_bulk(tx, (void**) m_table, len) == 0) { + ctrl->vm_ports[portid].stats.tx += len; + qconf->rx_mbufs[portid].len = 0; + len = 0; + } + } + + nb_rx = rte_ring_count(rx); + + nb_rx = RTE_MIN(nb_rx, (unsigned) MAX_PKT_BURST); + + if (nb_rx == 0) + continue; + + /* if we can get packets into the m_table */ + if (nb_rx < (RTE_DIM(qconf->rx_mbufs[portid].m_table) - len)) { + + /* this situation cannot exist, so if we fail to dequeue, that + * means something went horribly wrong, hence the failure. */ + if (rte_ring_dequeue_bulk(rx, (void**) pkts_burst, nb_rx) < 0) { + ctrl->state = STATE_FAIL; + return; + } + + ctrl->vm_ports[portid].stats.rx += nb_rx; + + /* put packets into the queue */ + for (j = 0; j < nb_rx; j++) { + m = pkts_burst[j]; + + rte_prefetch0(rte_pktmbuf_mtod(m, void *)); + + m_table[len + j] = m; + + eth = rte_pktmbuf_mtod(m, struct ether_hdr *); + + /* 02:00:00:00:00:xx */ + tmp = ð->d_addr.addr_bytes[0]; + *((uint64_t *)tmp) = 0x000000000002 + ((uint64_t)portid << 40); + + /* src addr */ + ether_addr_copy(&ctrl->vm_ports[portid].dst->ethaddr, + ð->s_addr); + } + qconf->rx_mbufs[portid].len += nb_rx; + + } + + } + + /* if timer is enabled */ + if (timer_period > 0) { + + /* advance the timer */ + timer_tsc += diff_tsc; + + /* if timer has reached its timeout */ + if (unlikely(timer_tsc >= (uint64_t) timer_period)) { + + /* do this only on master core */ + if (lcore_id == rte_get_master_lcore()) { + print_stats(); + /* reset the timer */ + timer_tsc = 0; + } + } + } + + prev_tsc = cur_tsc; + } +} + +static int +l2fwd_ivshmem_launch_one_lcore(__attribute__((unused)) void *dummy) +{ + fwd_loop(); + return 0; +} + +int +main(int argc, char **argv) +{ + struct lcore_queue_conf *qconf; + const struct rte_memzone * mz; + int ret; + uint8_t portid; + unsigned rx_lcore_id, lcore_id; + + /* init EAL */ + ret = rte_eal_init(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Invalid EAL arguments\n"); + argc -= ret; + argv += ret; + + /* parse application arguments (after the EAL ones) */ + ret = l2fwd_ivshmem_parse_args(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Invalid l2fwd-ivshmem arguments\n"); + + /* find control structure */ + mz = rte_memzone_lookup(CTRL_MZ_NAME); + if (mz == NULL) + rte_exit(EXIT_FAILURE, "Cannot find control memzone\n"); + + ctrl = (struct ivshmem_ctrl*) mz->addr; + + /* lock the ctrl so that we don't have conflicts with anything else */ + rte_spinlock_lock(&ctrl->lock); + + if (ctrl->state == STATE_FWD) + rte_exit(EXIT_FAILURE, "Forwarding already started!\n"); + + rx_lcore_id = 0; + qconf = NULL; + + /* Initialize the port/queue configuration of each logical core */ + for (portid = 0; portid < ctrl->nb_ports; portid++) { + + /* get the lcore_id for this port */ + while (rte_lcore_is_enabled(rx_lcore_id) == 0 || + lcore_queue_conf[rx_lcore_id].n_rx_port == + l2fwd_ivshmem_rx_queue_per_lcore) { + rx_lcore_id++; + if (rx_lcore_id >= RTE_MAX_LCORE) + rte_exit(EXIT_FAILURE, "Not enough cores\n"); + } + + if (qconf != &lcore_queue_conf[rx_lcore_id]) + /* Assigned a new logical core in the loop above. */ + qconf = &lcore_queue_conf[rx_lcore_id]; + + qconf->rx_port_list[qconf->n_rx_port] = portid; + qconf->port_param[qconf->n_rx_port] = &ctrl->vm_ports[portid]; + qconf->n_rx_port++; + + printf("Lcore %u: RX port %u\n", rx_lcore_id, (unsigned) portid); + } + + sigsetup(); + + /* indicate that we are ready to forward */ + ctrl->state = STATE_FWD; + + /* unlock */ + rte_spinlock_unlock(&ctrl->lock); + + /* launch per-lcore init on every lcore */ + rte_eal_mp_remote_launch(l2fwd_ivshmem_launch_one_lcore, NULL, CALL_MASTER); + RTE_LCORE_FOREACH_SLAVE(lcore_id) { + if (rte_eal_wait_lcore(lcore_id) < 0) + return -1; + } + + return 0; +} diff --git a/examples/l2fwd-ivshmem/host/Makefile b/examples/l2fwd-ivshmem/host/Makefile new file mode 100644 index 00000000..f91419e9 --- /dev/null +++ b/examples/l2fwd-ivshmem/host/Makefile @@ -0,0 +1,50 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overriden by command line or environment +RTE_TARGET ?= x86_64-ivshmem-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +# binary name +APP = host + +# all source are stored in SRCS-y +SRCS-y := host.c + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) + +include $(RTE_SDK)/mk/rte.extapp.mk diff --git a/examples/l2fwd-ivshmem/host/host.c b/examples/l2fwd-ivshmem/host/host.c new file mode 100644 index 00000000..4bd7c41d --- /dev/null +++ b/examples/l2fwd-ivshmem/host/host.c @@ -0,0 +1,897 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <unistd.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <limits.h> +#include <inttypes.h> +#include <getopt.h> +#include <signal.h> + +#include <rte_eal.h> +#include <rte_cycles.h> +#include <rte_eal_memconfig.h> +#include <rte_debug.h> +#include <rte_ether.h> +#include <rte_ethdev.h> +#include <rte_string_fns.h> +#include <rte_ivshmem.h> +#include <rte_ring.h> +#include <rte_mempool.h> +#include <rte_mbuf.h> + +#include "../include/common.h" + +/* + * Configurable number of RX/TX ring descriptors + */ +#define RTE_TEST_RX_DESC_DEFAULT 128 +#define RTE_TEST_TX_DESC_DEFAULT 512 +static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; +static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; + +#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */ + +/* mask of enabled ports */ +static uint32_t l2fwd_ivshmem_enabled_port_mask = 0; + +static struct ether_addr l2fwd_ivshmem_ports_eth_addr[RTE_MAX_ETHPORTS]; + +#define NB_MBUF 8192 + +#define MAX_RX_QUEUE_PER_LCORE 16 +#define MAX_TX_QUEUE_PER_PORT 16 +struct lcore_queue_conf { + unsigned n_rx_port; + unsigned rx_port_list[MAX_RX_QUEUE_PER_LCORE]; + struct vm_port_param * port_param[MAX_RX_QUEUE_PER_LCORE]; + struct mbuf_table tx_mbufs[RTE_MAX_ETHPORTS]; + struct mbuf_table rx_mbufs[RTE_MAX_ETHPORTS]; +} __rte_cache_aligned; +static struct lcore_queue_conf lcore_queue_conf[RTE_MAX_LCORE]; + +static const struct rte_eth_conf port_conf = { + .rxmode = { + .split_hdr_size = 0, + .header_split = 0, /**< Header Split disabled */ + .hw_ip_checksum = 0, /**< IP checksum offload disabled */ + .hw_vlan_filter = 0, /**< VLAN filtering disabled */ + .jumbo_frame = 0, /**< Jumbo Frame Support disabled */ + .hw_strip_crc = 0, /**< CRC stripped by hardware */ + }, + .txmode = { + .mq_mode = ETH_MQ_TX_NONE, + }, +}; + +#define METADATA_NAME "l2fwd_ivshmem" +#define CMDLINE_OPT_FWD_CONF "fwd-conf" + +#define QEMU_CMD_FMT "/tmp/ivshmem_qemu_cmdline_%s" + +struct port_statistics port_statistics[RTE_MAX_ETHPORTS]; + +struct rte_mempool * l2fwd_ivshmem_pktmbuf_pool = NULL; + +/* Print out statistics on packets dropped */ +static void +print_stats(void) +{ + uint64_t total_packets_dropped, total_packets_tx, total_packets_rx; + uint64_t total_vm_packets_dropped, total_vm_packets_tx, total_vm_packets_rx; + unsigned portid; + + total_packets_dropped = 0; + total_packets_tx = 0; + total_packets_rx = 0; + total_vm_packets_tx = 0; + total_vm_packets_rx = 0; + + const char clr[] = { 27, '[', '2', 'J', '\0' }; + const char topLeft[] = { 27, '[', '1', ';', '1', 'H','\0' }; + + /* Clear screen and move to top left */ + printf("%s%s", clr, topLeft); + + printf("\nPort statistics ===================================="); + + for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) { + /* skip disabled ports */ + if ((l2fwd_ivshmem_enabled_port_mask & (1 << portid)) == 0) + continue; + printf("\nStatistics for port %u ------------------------------" + "\nPackets sent: %24"PRIu64 + "\nPackets received: %20"PRIu64 + "\nPackets dropped: %21"PRIu64, + portid, + port_statistics[portid].tx, + port_statistics[portid].rx, + port_statistics[portid].dropped); + + total_packets_dropped += port_statistics[portid].dropped; + total_packets_tx += port_statistics[portid].tx; + total_packets_rx += port_statistics[portid].rx; + } + + printf("\nVM statistics ======================================"); + for (portid = 0; portid < ctrl->nb_ports; portid++) { + printf("\nStatistics for port %u ------------------------------" + "\nPackets sent: %24"PRIu64 + "\nPackets received: %20"PRIu64, + portid, + ctrl->vm_ports[portid].stats.tx, + ctrl->vm_ports[portid].stats.rx); + + total_vm_packets_dropped += ctrl->vm_ports[portid].stats.dropped; + total_vm_packets_tx += ctrl->vm_ports[portid].stats.tx; + total_vm_packets_rx += ctrl->vm_ports[portid].stats.rx; + } + printf("\nAggregate statistics ===============================" + "\nTotal packets sent: %18"PRIu64 + "\nTotal packets received: %14"PRIu64 + "\nTotal packets dropped: %15"PRIu64 + "\nTotal VM packets sent: %15"PRIu64 + "\nTotal VM packets received: %11"PRIu64, + total_packets_tx, + total_packets_rx, + total_packets_dropped, + total_vm_packets_tx, + total_vm_packets_rx); + printf("\n====================================================\n"); +} + +static int +print_to_file(const char *cmdline, const char *config_name) +{ + FILE *file; + char path[PATH_MAX]; + + snprintf(path, sizeof(path), QEMU_CMD_FMT, config_name); + file = fopen(path, "w"); + if (file == NULL) { + RTE_LOG(ERR, L2FWD_IVSHMEM, "Could not open '%s' \n", path); + return -1; + } + + RTE_LOG(DEBUG, L2FWD_IVSHMEM, "QEMU command line for config '%s': %s \n", + config_name, cmdline); + + fprintf(file, "%s\n", cmdline); + fclose(file); + return 0; +} + +static int +generate_ivshmem_cmdline(const char *config_name) +{ + char cmdline[PATH_MAX]; + if (rte_ivshmem_metadata_cmdline_generate(cmdline, sizeof(cmdline), + config_name) < 0) + return -1; + + if (print_to_file(cmdline, config_name) < 0) + return -1; + + rte_ivshmem_metadata_dump(stdout, config_name); + return 0; +} + +/* display usage */ +static void +l2fwd_ivshmem_usage(const char *prgname) +{ + printf("%s [EAL options] -- -p PORTMASK [-q NQ -T PERIOD]\n" + " -p PORTMASK: hexadecimal bitmask of ports to configure\n" + " -q NQ: number of queue (=ports) per lcore (default is 1)\n" + " -T PERIOD: statistics will be refreshed each PERIOD seconds " + "(0 to disable, 10 default, 86400 maximum)\n", + prgname); +} + +static unsigned int +l2fwd_ivshmem_parse_nqueue(const char *q_arg) +{ + char *end = NULL; + unsigned long n; + + /* parse hexadecimal string */ + n = strtoul(q_arg, &end, 10); + if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0')) + return 0; + if (n == 0) + return 0; + if (n >= MAX_RX_QUEUE_PER_LCORE) + return 0; + + return n; +} + +static int +l2fwd_ivshmem_parse_portmask(const char *portmask) +{ + char *end = NULL; + unsigned long pm; + + /* parse hexadecimal string */ + pm = strtoul(portmask, &end, 16); + if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0')) + return -1; + + if (pm == 0) + return -1; + + return pm; +} + +static int +l2fwd_ivshmem_parse_timer_period(const char *q_arg) +{ + char *end = NULL; + int n; + + /* parse number string */ + n = strtol(q_arg, &end, 10); + if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0')) + return -1; + if (n >= MAX_TIMER_PERIOD) + return -1; + + return n; +} + +/* Parse the argument given in the command line of the application */ +static int +l2fwd_ivshmem_parse_args(int argc, char **argv) +{ + int opt, ret; + char **argvopt; + int option_index; + char *prgname = argv[0]; + static struct option lgopts[] = { + {CMDLINE_OPT_FWD_CONF, 1, 0, 0}, + {NULL, 0, 0, 0} + }; + + argvopt = argv; + + while ((opt = getopt_long(argc, argvopt, "q:p:T:", + lgopts, &option_index)) != EOF) { + + switch (opt) { + /* portmask */ + case 'p': + l2fwd_ivshmem_enabled_port_mask = l2fwd_ivshmem_parse_portmask(optarg); + if (l2fwd_ivshmem_enabled_port_mask == 0) { + printf("invalid portmask\n"); + l2fwd_ivshmem_usage(prgname); + return -1; + } + break; + + /* nqueue */ + case 'q': + l2fwd_ivshmem_rx_queue_per_lcore = l2fwd_ivshmem_parse_nqueue(optarg); + if (l2fwd_ivshmem_rx_queue_per_lcore == 0) { + printf("invalid queue number\n"); + l2fwd_ivshmem_usage(prgname); + return -1; + } + break; + + /* timer period */ + case 'T': + timer_period = l2fwd_ivshmem_parse_timer_period(optarg) * 1000 * TIMER_MILLISECOND; + if (timer_period < 0) { + printf("invalid timer period\n"); + l2fwd_ivshmem_usage(prgname); + return -1; + } + break; + + /* long options */ + case 0: + l2fwd_ivshmem_usage(prgname); + return -1; + + default: + l2fwd_ivshmem_usage(prgname); + return -1; + } + } + + if (optind >= 0) + argv[optind-1] = prgname; + + ret = optind-1; + optind = 0; /* reset getopt lib */ + return ret; +} + +/* Check the link status of all ports in up to 9s, and print them finally */ +static void +check_all_ports_link_status(uint8_t port_num, uint32_t port_mask) +{ +#define CHECK_INTERVAL 100 /* 100ms */ +#define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */ + uint8_t portid, count, all_ports_up, print_flag = 0; + struct rte_eth_link link; + + printf("\nChecking link status"); + fflush(stdout); + for (count = 0; count <= MAX_CHECK_TIME; count++) { + all_ports_up = 1; + for (portid = 0; portid < port_num; portid++) { + if ((port_mask & (1 << portid)) == 0) + continue; + memset(&link, 0, sizeof(link)); + rte_eth_link_get_nowait(portid, &link); + /* print link status if flag set */ + if (print_flag == 1) { + if (link.link_status) + printf("Port %d Link Up - speed %u " + "Mbps - %s\n", (uint8_t)portid, + (unsigned)link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? + ("full-duplex") : ("half-duplex\n")); + else + printf("Port %d Link Down\n", + (uint8_t)portid); + continue; + } + /* clear all_ports_up flag if any link down */ + if (link.link_status == ETH_LINK_DOWN) { + all_ports_up = 0; + break; + } + } + /* after finally printing all link status, get out */ + if (print_flag == 1) + break; + + if (all_ports_up == 0) { + printf("."); + fflush(stdout); + rte_delay_ms(CHECK_INTERVAL); + } + + /* set the print_flag if all ports up or timeout */ + if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) { + print_flag = 1; + printf("done\n"); + } + } +} + +/* Send the burst of packets on an output interface */ +static int +l2fwd_ivshmem_send_burst(struct lcore_queue_conf *qconf, unsigned n, uint8_t port) +{ + struct rte_mbuf **m_table; + unsigned ret; + unsigned queueid =0; + + m_table = (struct rte_mbuf **)qconf->tx_mbufs[port].m_table; + + ret = rte_eth_tx_burst(port, (uint16_t) queueid, m_table, (uint16_t) n); + port_statistics[port].tx += ret; + if (unlikely(ret < n)) { + port_statistics[port].dropped += (n - ret); + do { + rte_pktmbuf_free(m_table[ret]); + } while (++ret < n); + } + + return 0; +} + +/* Enqueue packets for TX and prepare them to be sent on the network */ +static int +l2fwd_ivshmem_send_packet(struct rte_mbuf *m, uint8_t port) +{ + unsigned lcore_id, len; + struct lcore_queue_conf *qconf; + + lcore_id = rte_lcore_id(); + + qconf = &lcore_queue_conf[lcore_id]; + len = qconf->tx_mbufs[port].len; + qconf->tx_mbufs[port].m_table[len] = m; + len++; + + /* enough pkts to be sent */ + if (unlikely(len == MAX_PKT_BURST)) { + l2fwd_ivshmem_send_burst(qconf, MAX_PKT_BURST, port); + len = 0; + } + + qconf->tx_mbufs[port].len = len; + return 0; +} + +static int +l2fwd_ivshmem_receive_burst(struct lcore_queue_conf *qconf, unsigned portid, + unsigned vm_port) +{ + struct rte_mbuf ** m; + struct rte_ring * rx; + unsigned len, pkt_idx; + + m = qconf->rx_mbufs[portid].m_table; + len = qconf->rx_mbufs[portid].len; + rx = qconf->port_param[vm_port]->rx_ring; + + /* if enqueueing failed, ring is probably full, so drop the packets */ + if (rte_ring_enqueue_bulk(rx, (void**) m, len) < 0) { + port_statistics[portid].dropped += len; + + pkt_idx = 0; + do { + rte_pktmbuf_free(m[pkt_idx]); + } while (++pkt_idx < len); + } + else + /* increment rx stats by however many packets we managed to receive */ + port_statistics[portid].rx += len; + + return 0; +} + +/* Enqueue packets for RX and prepare them to be sent to VM */ +static int +l2fwd_ivshmem_receive_packets(struct rte_mbuf ** m, unsigned n, unsigned portid, + unsigned vm_port) +{ + unsigned lcore_id, len, pkt_idx; + struct lcore_queue_conf *qconf; + + lcore_id = rte_lcore_id(); + + qconf = &lcore_queue_conf[lcore_id]; + + len = qconf->rx_mbufs[portid].len; + pkt_idx = 0; + + /* enqueue packets */ + while (pkt_idx < n && len < MAX_PKT_BURST * 2) { + qconf->rx_mbufs[portid].m_table[len++] = m[pkt_idx++]; + } + + /* increment queue len by however many packets we managed to receive */ + qconf->rx_mbufs[portid].len += pkt_idx; + + /* drop the unreceived packets */ + if (unlikely(pkt_idx < n)) { + port_statistics[portid].dropped += n - pkt_idx; + do { + rte_pktmbuf_free(m[pkt_idx]); + } while (++pkt_idx < n); + } + + /* drain the queue halfway through the maximum capacity */ + if (unlikely(qconf->rx_mbufs[portid].len >= MAX_PKT_BURST)) + l2fwd_ivshmem_receive_burst(qconf, portid, vm_port); + + return 0; +} + +/* loop for host forwarding mode. + * the data flow is as follows: + * 1) get packets from TX queue and send it out from a given port + * 2) RX packets from given port and enqueue them on RX ring + * 3) dequeue packets from TX ring and put them on TX queue for a given port + */ +static void +fwd_loop(void) +{ + struct rte_mbuf *pkts_burst[MAX_PKT_BURST * 2]; + struct rte_mbuf *m; + unsigned lcore_id; + uint64_t prev_tsc, diff_tsc, cur_tsc, timer_tsc; + unsigned i, j, portid, nb_rx; + struct lcore_queue_conf *qconf; + struct rte_ring *tx; + const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US; + + prev_tsc = 0; + timer_tsc = 0; + + lcore_id = rte_lcore_id(); + qconf = &lcore_queue_conf[lcore_id]; + + if (qconf->n_rx_port == 0) { + RTE_LOG(INFO, L2FWD_IVSHMEM, "lcore %u has nothing to do\n", lcore_id); + return; + } + + RTE_LOG(INFO, L2FWD_IVSHMEM, "entering main loop on lcore %u\n", lcore_id); + + for (i = 0; i < qconf->n_rx_port; i++) { + + portid = qconf->rx_port_list[i]; + RTE_LOG(INFO, L2FWD_IVSHMEM, " -- lcoreid=%u portid=%u\n", lcore_id, + portid); + } + + while (ctrl->state == STATE_FWD) { + + cur_tsc = rte_rdtsc(); + + /* + * Burst queue drain + */ + diff_tsc = cur_tsc - prev_tsc; + if (unlikely(diff_tsc > drain_tsc)) { + + /* + * TX + */ + for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) { + if (qconf->tx_mbufs[portid].len == 0) + continue; + l2fwd_ivshmem_send_burst(qconf, + qconf->tx_mbufs[portid].len, + (uint8_t) portid); + qconf->tx_mbufs[portid].len = 0; + } + + /* + * RX + */ + for (i = 0; i < qconf->n_rx_port; i++) { + portid = qconf->rx_port_list[i]; + if (qconf->rx_mbufs[portid].len == 0) + continue; + l2fwd_ivshmem_receive_burst(qconf, portid, i); + qconf->rx_mbufs[portid].len = 0; + } + + /* if timer is enabled */ + if (timer_period > 0) { + + /* advance the timer */ + timer_tsc += diff_tsc; + + /* if timer has reached its timeout */ + if (unlikely(timer_tsc >= (uint64_t) timer_period)) { + + /* do this only on master core */ + if (lcore_id == rte_get_master_lcore()) { + print_stats(); + /* reset the timer */ + timer_tsc = 0; + } + } + } + + prev_tsc = cur_tsc; + } + + /* + * packet RX and forwarding + */ + for (i = 0; i < qconf->n_rx_port; i++) { + + /* RX packets from port and put them on RX ring */ + portid = qconf->rx_port_list[i]; + nb_rx = rte_eth_rx_burst((uint8_t) portid, 0, + pkts_burst, MAX_PKT_BURST); + + if (nb_rx != 0) + l2fwd_ivshmem_receive_packets(pkts_burst, nb_rx, portid, i); + + /* dequeue packets from TX ring and send them to TX queue */ + tx = qconf->port_param[i]->tx_ring; + + nb_rx = rte_ring_count(tx); + + nb_rx = RTE_MIN(nb_rx, (unsigned) MAX_PKT_BURST); + + if (nb_rx == 0) + continue; + + /* should not happen */ + if (unlikely(rte_ring_dequeue_bulk(tx, (void**) pkts_burst, nb_rx) < 0)) { + ctrl->state = STATE_FAIL; + return; + } + + for (j = 0; j < nb_rx; j++) { + m = pkts_burst[j]; + l2fwd_ivshmem_send_packet(m, portid); + } + } + } +} + +static int +l2fwd_ivshmem_launch_one_lcore(__attribute__((unused)) void *dummy) +{ + fwd_loop(); + return 0; +} + +int main(int argc, char **argv) +{ + char name[RTE_RING_NAMESIZE]; + struct rte_ring *r; + struct lcore_queue_conf *qconf; + struct rte_eth_dev_info dev_info; + uint8_t portid, port_nr; + uint8_t nb_ports, nb_ports_available; + uint8_t nb_ports_in_mask; + int ret; + unsigned lcore_id, rx_lcore_id; + + /* init EAL */ + ret = rte_eal_init(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Invalid EAL arguments\n"); + argc -= ret; + argv += ret; + + /* parse application arguments (after the EAL ones) */ + ret = l2fwd_ivshmem_parse_args(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Invalid l2fwd-ivshmem arguments\n"); + + /* create a shared mbuf pool */ + l2fwd_ivshmem_pktmbuf_pool = + rte_pktmbuf_pool_create(MBUF_MP_NAME, NB_MBUF, 32, + 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id()); + if (l2fwd_ivshmem_pktmbuf_pool == NULL) + rte_exit(EXIT_FAILURE, "Cannot init mbuf pool\n"); + + nb_ports = rte_eth_dev_count(); + if (nb_ports == 0) + rte_exit(EXIT_FAILURE, "No Ethernet ports - bye\n"); + + if (nb_ports > RTE_MAX_ETHPORTS) + nb_ports = RTE_MAX_ETHPORTS; + + /* + * reserve memzone to communicate with VMs - we cannot use rte_malloc here + * because while it is technically possible, it is a very bad idea to share + * the heap between two primary processes. + */ + ctrl_mz = rte_memzone_reserve(CTRL_MZ_NAME, sizeof(struct ivshmem_ctrl), + SOCKET_ID_ANY, 0); + if (ctrl_mz == NULL) + rte_exit(EXIT_FAILURE, "Cannot reserve control memzone\n"); + ctrl = (struct ivshmem_ctrl*) ctrl_mz->addr; + + memset(ctrl, 0, sizeof(struct ivshmem_ctrl)); + + /* + * Each port is assigned an output port. + */ + nb_ports_in_mask = 0; + for (portid = 0; portid < nb_ports; portid++) { + /* skip ports that are not enabled */ + if ((l2fwd_ivshmem_enabled_port_mask & (1 << portid)) == 0) + continue; + if (portid % 2) { + ctrl->vm_ports[nb_ports_in_mask].dst = &ctrl->vm_ports[nb_ports_in_mask-1]; + ctrl->vm_ports[nb_ports_in_mask-1].dst = &ctrl->vm_ports[nb_ports_in_mask]; + } + + nb_ports_in_mask++; + + rte_eth_dev_info_get(portid, &dev_info); + } + if (nb_ports_in_mask % 2) { + printf("Notice: odd number of ports in portmask.\n"); + ctrl->vm_ports[nb_ports_in_mask-1].dst = + &ctrl->vm_ports[nb_ports_in_mask-1]; + } + + rx_lcore_id = 0; + qconf = NULL; + + printf("Initializing ports configuration...\n"); + + nb_ports_available = nb_ports; + + /* Initialise each port */ + for (portid = 0; portid < nb_ports; portid++) { + + /* skip ports that are not enabled */ + if ((l2fwd_ivshmem_enabled_port_mask & (1 << portid)) == 0) { + printf("Skipping disabled port %u\n", (unsigned) portid); + nb_ports_available--; + continue; + } + + /* init port */ + printf("Initializing port %u... ", (unsigned) portid); + fflush(stdout); + ret = rte_eth_dev_configure(portid, 1, 1, &port_conf); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Cannot configure device: err=%d, port=%u\n", + ret, (unsigned) portid); + + rte_eth_macaddr_get(portid,&l2fwd_ivshmem_ports_eth_addr[portid]); + + /* init one RX queue */ + fflush(stdout); + ret = rte_eth_rx_queue_setup(portid, 0, nb_rxd, + rte_eth_dev_socket_id(portid), + NULL, + l2fwd_ivshmem_pktmbuf_pool); + if (ret < 0) + rte_exit(EXIT_FAILURE, "rte_eth_rx_queue_setup:err=%d, port=%u\n", + ret, (unsigned) portid); + + /* init one TX queue on each port */ + fflush(stdout); + ret = rte_eth_tx_queue_setup(portid, 0, nb_txd, + rte_eth_dev_socket_id(portid), + NULL); + if (ret < 0) + rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup:err=%d, port=%u\n", + ret, (unsigned) portid); + + /* Start device */ + ret = rte_eth_dev_start(portid); + if (ret < 0) + rte_exit(EXIT_FAILURE, "rte_eth_dev_start:err=%d, port=%u\n", + ret, (unsigned) portid); + + printf("done: \n"); + + rte_eth_promiscuous_enable(portid); + + printf("Port %u, MAC address: %02X:%02X:%02X:%02X:%02X:%02X\n\n", + (unsigned) portid, + l2fwd_ivshmem_ports_eth_addr[portid].addr_bytes[0], + l2fwd_ivshmem_ports_eth_addr[portid].addr_bytes[1], + l2fwd_ivshmem_ports_eth_addr[portid].addr_bytes[2], + l2fwd_ivshmem_ports_eth_addr[portid].addr_bytes[3], + l2fwd_ivshmem_ports_eth_addr[portid].addr_bytes[4], + l2fwd_ivshmem_ports_eth_addr[portid].addr_bytes[5]); + + /* initialize port stats */ + memset(&port_statistics, 0, sizeof(port_statistics)); + } + + if (!nb_ports_available) { + rte_exit(EXIT_FAILURE, + "All available ports are disabled. Please set portmask.\n"); + } + port_nr = 0; + + /* Initialize the port/queue configuration of each logical core */ + for (portid = 0; portid < nb_ports; portid++) { + if ((l2fwd_ivshmem_enabled_port_mask & (1 << portid)) == 0) + continue; + + /* get the lcore_id for this port */ + while (rte_lcore_is_enabled(rx_lcore_id) == 0 || + lcore_queue_conf[rx_lcore_id].n_rx_port == + l2fwd_ivshmem_rx_queue_per_lcore) { + rx_lcore_id++; + if (rx_lcore_id >= RTE_MAX_LCORE) + rte_exit(EXIT_FAILURE, "Not enough cores\n"); + } + + if (qconf != &lcore_queue_conf[rx_lcore_id]) + /* Assigned a new logical core in the loop above. */ + qconf = &lcore_queue_conf[rx_lcore_id]; + + + rte_eth_macaddr_get(portid, &ctrl->vm_ports[port_nr].ethaddr); + + qconf->rx_port_list[qconf->n_rx_port] = portid; + qconf->port_param[qconf->n_rx_port] = &ctrl->vm_ports[port_nr]; + qconf->n_rx_port++; + port_nr++; + printf("Lcore %u: RX port %u\n", rx_lcore_id, (unsigned) portid); + } + + check_all_ports_link_status(nb_ports_available, l2fwd_ivshmem_enabled_port_mask); + + /* create rings for each VM port (several ports can be on the same VM). + * note that we store the pointers in ctrl - that way, they are the same + * and valid across all VMs because ctrl is also in DPDK memory */ + for (portid = 0; portid < nb_ports_available; portid++) { + + /* RX ring. SP/SC because it's only used by host and a single VM */ + snprintf(name, sizeof(name), "%s%i", RX_RING_PREFIX, portid); + r = rte_ring_create(name, NB_MBUF, + SOCKET_ID_ANY, RING_F_SP_ENQ | RING_F_SC_DEQ); + if (r == NULL) + rte_exit(EXIT_FAILURE, "Cannot create ring %s\n", name); + + ctrl->vm_ports[portid].rx_ring = r; + + /* TX ring. SP/SC because it's only used by host and a single VM */ + snprintf(name, sizeof(name), "%s%i", TX_RING_PREFIX, portid); + r = rte_ring_create(name, NB_MBUF, + SOCKET_ID_ANY, RING_F_SP_ENQ | RING_F_SC_DEQ); + if (r == NULL) + rte_exit(EXIT_FAILURE, "Cannot create ring %s\n", name); + + ctrl->vm_ports[portid].tx_ring = r; + } + + /* create metadata, output cmdline */ + if (rte_ivshmem_metadata_create(METADATA_NAME) < 0) + rte_exit(EXIT_FAILURE, "Cannot create IVSHMEM metadata\n"); + + if (rte_ivshmem_metadata_add_memzone(ctrl_mz, METADATA_NAME)) + rte_exit(EXIT_FAILURE, "Cannot add memzone to IVSHMEM metadata\n"); + + if (rte_ivshmem_metadata_add_mempool(l2fwd_ivshmem_pktmbuf_pool, METADATA_NAME)) + rte_exit(EXIT_FAILURE, "Cannot add mbuf mempool to IVSHMEM metadata\n"); + + for (portid = 0; portid < nb_ports_available; portid++) { + if (rte_ivshmem_metadata_add_ring(ctrl->vm_ports[portid].rx_ring, + METADATA_NAME) < 0) + rte_exit(EXIT_FAILURE, "Cannot add ring %s to IVSHMEM metadata\n", + ctrl->vm_ports[portid].rx_ring->name); + if (rte_ivshmem_metadata_add_ring(ctrl->vm_ports[portid].tx_ring, + METADATA_NAME) < 0) + rte_exit(EXIT_FAILURE, "Cannot add ring %s to IVSHMEM metadata\n", + ctrl->vm_ports[portid].tx_ring->name); + } + generate_ivshmem_cmdline(METADATA_NAME); + + ctrl->nb_ports = nb_ports_available; + + printf("Waiting for VM to initialize...\n"); + + /* wait for VM to initialize */ + while (ctrl->state != STATE_FWD) { + if (ctrl->state == STATE_FAIL) + rte_exit(EXIT_FAILURE, "VM reported failure\n"); + + sleep(1); + } + + printf("Done!\n"); + + sigsetup(); + + /* launch per-lcore init on every lcore */ + rte_eal_mp_remote_launch(l2fwd_ivshmem_launch_one_lcore, NULL, CALL_MASTER); + RTE_LCORE_FOREACH_SLAVE(lcore_id) { + if (rte_eal_wait_lcore(lcore_id) < 0) + return -1; + } + + if (ctrl->state == STATE_FAIL) + rte_exit(EXIT_FAILURE, "VM reported failure\n"); + + return 0; +} diff --git a/examples/l2fwd-ivshmem/include/common.h b/examples/l2fwd-ivshmem/include/common.h new file mode 100644 index 00000000..8564d32b --- /dev/null +++ b/examples/l2fwd-ivshmem/include/common.h @@ -0,0 +1,111 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _IVSHMEM_COMMON_H_ +#define _IVSHMEM_COMMON_H_ + +#define RTE_LOGTYPE_L2FWD_IVSHMEM RTE_LOGTYPE_USER1 + +#define CTRL_MZ_NAME "CTRL_MEMZONE" +#define MBUF_MP_NAME "MBUF_MEMPOOL" +#define RX_RING_PREFIX "RX_" +#define TX_RING_PREFIX "TX_" + +/* A tsc-based timer responsible for triggering statistics printout */ +#define TIMER_MILLISECOND 2000000ULL /* around 1ms at 2 Ghz */ +#define MAX_TIMER_PERIOD 86400 /* 1 day max */ +static int64_t timer_period = 10 * TIMER_MILLISECOND * 1000; /* default period is 10 seconds */ + +#define DIM(x)\ + (sizeof(x)/sizeof(x)[0]) + +#define MAX_PKT_BURST 32 + +const struct rte_memzone * ctrl_mz; + +enum l2fwd_state { + STATE_NONE = 0, + STATE_FWD, + STATE_EXIT, + STATE_FAIL +}; + +/* Per-port statistics struct */ +struct port_statistics { + uint64_t tx; + uint64_t rx; + uint64_t dropped; +} __rte_cache_aligned; + +struct mbuf_table { + unsigned len; + struct rte_mbuf *m_table[MAX_PKT_BURST * 2]; /**< allow up to two bursts */ +}; + +struct vm_port_param { + struct rte_ring * rx_ring; /**< receiving ring for current port */ + struct rte_ring * tx_ring; /**< transmitting ring for current port */ + struct vm_port_param * dst; /**< current port's destination port */ + volatile struct port_statistics stats; /**< statistics for current port */ + struct ether_addr ethaddr; /**< Ethernet address of the port */ +}; + +/* control structure, to synchronize host and VM */ +struct ivshmem_ctrl { + rte_spinlock_t lock; + uint8_t nb_ports; /**< total nr of ports */ + volatile enum l2fwd_state state; /**< report state */ + struct vm_port_param vm_ports[RTE_MAX_ETHPORTS]; +}; + +struct ivshmem_ctrl * ctrl; + +static unsigned int l2fwd_ivshmem_rx_queue_per_lcore = 1; + +static void sighandler(int __rte_unused s) +{ + ctrl->state = STATE_EXIT; +} + +static void sigsetup(void) +{ + struct sigaction sigIntHandler; + + sigIntHandler.sa_handler = sighandler; + sigemptyset(&sigIntHandler.sa_mask); + sigIntHandler.sa_flags = 0; + + sigaction(SIGINT, &sigIntHandler, NULL); +} + +#endif /* _IVSHMEM_COMMON_H_ */ diff --git a/examples/l2fwd-jobstats/Makefile b/examples/l2fwd-jobstats/Makefile new file mode 100644 index 00000000..ab089f66 --- /dev/null +++ b/examples/l2fwd-jobstats/Makefile @@ -0,0 +1,51 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2015 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overridden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +# binary name +APP = l2fwd-jobstats + +# all source are stored in SRCS-y +SRCS-y := main.c + + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) + +include $(RTE_SDK)/mk/rte.extapp.mk diff --git a/examples/l2fwd-jobstats/main.c b/examples/l2fwd-jobstats/main.c new file mode 100644 index 00000000..9f3a77d2 --- /dev/null +++ b/examples/l2fwd-jobstats/main.c @@ -0,0 +1,1026 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <locale.h> +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <ctype.h> +#include <getopt.h> + +#include <rte_alarm.h> +#include <rte_common.h> +#include <rte_log.h> +#include <rte_malloc.h> +#include <rte_memory.h> +#include <rte_memcpy.h> +#include <rte_memzone.h> +#include <rte_eal.h> +#include <rte_per_lcore.h> +#include <rte_launch.h> +#include <rte_atomic.h> +#include <rte_cycles.h> +#include <rte_prefetch.h> +#include <rte_lcore.h> +#include <rte_per_lcore.h> +#include <rte_branch_prediction.h> +#include <rte_interrupts.h> +#include <rte_pci.h> +#include <rte_debug.h> +#include <rte_ether.h> +#include <rte_ethdev.h> +#include <rte_ring.h> +#include <rte_mempool.h> +#include <rte_mbuf.h> +#include <rte_spinlock.h> + +#include <rte_errno.h> +#include <rte_jobstats.h> +#include <rte_timer.h> +#include <rte_alarm.h> + +#define RTE_LOGTYPE_L2FWD RTE_LOGTYPE_USER1 + +#define NB_MBUF 8192 + +#define MAX_PKT_BURST 32 +#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */ + +/* + * Configurable number of RX/TX ring descriptors + */ +#define RTE_TEST_RX_DESC_DEFAULT 128 +#define RTE_TEST_TX_DESC_DEFAULT 512 +static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; +static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; + +/* ethernet addresses of ports */ +static struct ether_addr l2fwd_ports_eth_addr[RTE_MAX_ETHPORTS]; + +/* mask of enabled ports */ +static uint32_t l2fwd_enabled_port_mask; + +/* list of enabled ports */ +static uint32_t l2fwd_dst_ports[RTE_MAX_ETHPORTS]; + +#define UPDATE_STEP_UP 1 +#define UPDATE_STEP_DOWN 32 + +static unsigned int l2fwd_rx_queue_per_lcore = 1; + +#define MAX_RX_QUEUE_PER_LCORE 16 +#define MAX_TX_QUEUE_PER_PORT 16 +struct lcore_queue_conf { + unsigned n_rx_port; + unsigned rx_port_list[MAX_RX_QUEUE_PER_LCORE]; + uint64_t next_flush_time[RTE_MAX_ETHPORTS]; + + struct rte_timer rx_timers[MAX_RX_QUEUE_PER_LCORE]; + struct rte_jobstats port_fwd_jobs[MAX_RX_QUEUE_PER_LCORE]; + + struct rte_timer flush_timer; + struct rte_jobstats flush_job; + struct rte_jobstats idle_job; + struct rte_jobstats_context jobs_context; + + rte_atomic16_t stats_read_pending; + rte_spinlock_t lock; +} __rte_cache_aligned; +struct lcore_queue_conf lcore_queue_conf[RTE_MAX_LCORE]; + +struct rte_eth_dev_tx_buffer *tx_buffer[RTE_MAX_ETHPORTS]; + +static const struct rte_eth_conf port_conf = { + .rxmode = { + .split_hdr_size = 0, + .header_split = 0, /**< Header Split disabled */ + .hw_ip_checksum = 0, /**< IP checksum offload disabled */ + .hw_vlan_filter = 0, /**< VLAN filtering disabled */ + .jumbo_frame = 0, /**< Jumbo Frame Support disabled */ + .hw_strip_crc = 0, /**< CRC stripped by hardware */ + }, + .txmode = { + .mq_mode = ETH_MQ_TX_NONE, + }, +}; + +struct rte_mempool *l2fwd_pktmbuf_pool = NULL; + +/* Per-port statistics struct */ +struct l2fwd_port_statistics { + uint64_t tx; + uint64_t rx; + uint64_t dropped; +} __rte_cache_aligned; +struct l2fwd_port_statistics port_statistics[RTE_MAX_ETHPORTS]; + +/* 1 day max */ +#define MAX_TIMER_PERIOD 86400 +/* default period is 10 seconds */ +static int64_t timer_period = 10; +/* default timer frequency */ +static double hz; +/* BURST_TX_DRAIN_US converted to cycles */ +uint64_t drain_tsc; +/* Convert cycles to ns */ +static inline double +cycles_to_ns(uint64_t cycles) +{ + double t = cycles; + + t *= (double)NS_PER_S; + t /= hz; + return t; +} + +static void +show_lcore_stats(unsigned lcore_id) +{ + struct lcore_queue_conf *qconf = &lcore_queue_conf[lcore_id]; + struct rte_jobstats_context *ctx = &qconf->jobs_context; + struct rte_jobstats *job; + uint8_t i; + + /* LCore statistics. */ + uint64_t stats_period, loop_count; + uint64_t exec, exec_min, exec_max; + uint64_t management, management_min, management_max; + uint64_t busy, busy_min, busy_max; + + /* Jobs statistics. */ + const uint8_t port_cnt = qconf->n_rx_port; + uint64_t jobs_exec_cnt[port_cnt], jobs_period[port_cnt]; + uint64_t jobs_exec[port_cnt], jobs_exec_min[port_cnt], + jobs_exec_max[port_cnt]; + + uint64_t flush_exec_cnt, flush_period; + uint64_t flush_exec, flush_exec_min, flush_exec_max; + + uint64_t idle_exec_cnt; + uint64_t idle_exec, idle_exec_min, idle_exec_max; + uint64_t collection_time = rte_get_timer_cycles(); + + /* Ask forwarding thread to give us stats. */ + rte_atomic16_set(&qconf->stats_read_pending, 1); + rte_spinlock_lock(&qconf->lock); + rte_atomic16_set(&qconf->stats_read_pending, 0); + + /* Collect context statistics. */ + stats_period = ctx->state_time - ctx->start_time; + loop_count = ctx->loop_cnt; + + exec = ctx->exec_time; + exec_min = ctx->min_exec_time; + exec_max = ctx->max_exec_time; + + management = ctx->management_time; + management_min = ctx->min_management_time; + management_max = ctx->max_management_time; + + rte_jobstats_context_reset(ctx); + + for (i = 0; i < port_cnt; i++) { + job = &qconf->port_fwd_jobs[i]; + + jobs_exec_cnt[i] = job->exec_cnt; + jobs_period[i] = job->period; + + jobs_exec[i] = job->exec_time; + jobs_exec_min[i] = job->min_exec_time; + jobs_exec_max[i] = job->max_exec_time; + + rte_jobstats_reset(job); + } + + flush_exec_cnt = qconf->flush_job.exec_cnt; + flush_period = qconf->flush_job.period; + flush_exec = qconf->flush_job.exec_time; + flush_exec_min = qconf->flush_job.min_exec_time; + flush_exec_max = qconf->flush_job.max_exec_time; + rte_jobstats_reset(&qconf->flush_job); + + idle_exec_cnt = qconf->idle_job.exec_cnt; + idle_exec = qconf->idle_job.exec_time; + idle_exec_min = qconf->idle_job.min_exec_time; + idle_exec_max = qconf->idle_job.max_exec_time; + rte_jobstats_reset(&qconf->idle_job); + + rte_spinlock_unlock(&qconf->lock); + + exec -= idle_exec; + busy = exec + management; + busy_min = exec_min + management_min; + busy_max = exec_max + management_max; + + + collection_time = rte_get_timer_cycles() - collection_time; + +#define STAT_FMT "\n%-18s %'14.0f %6.1f%% %'10.0f %'10.0f %'10.0f" + + printf("\n----------------" + "\nLCore %3u: statistics (time in ns, collected in %'9.0f)" + "\n%-18s %14s %7s %10s %10s %10s " + "\n%-18s %'14.0f" + "\n%-18s %'14" PRIu64 + STAT_FMT /* Exec */ + STAT_FMT /* Management */ + STAT_FMT /* Busy */ + STAT_FMT, /* Idle */ + lcore_id, cycles_to_ns(collection_time), + "Stat type", "total", "%total", "avg", "min", "max", + "Stats duration:", cycles_to_ns(stats_period), + "Loop count:", loop_count, + "Exec time", + cycles_to_ns(exec), exec * 100.0 / stats_period, + cycles_to_ns(loop_count ? exec / loop_count : 0), + cycles_to_ns(exec_min), + cycles_to_ns(exec_max), + "Management time", + cycles_to_ns(management), management * 100.0 / stats_period, + cycles_to_ns(loop_count ? management / loop_count : 0), + cycles_to_ns(management_min), + cycles_to_ns(management_max), + "Exec + management", + cycles_to_ns(busy), busy * 100.0 / stats_period, + cycles_to_ns(loop_count ? busy / loop_count : 0), + cycles_to_ns(busy_min), + cycles_to_ns(busy_max), + "Idle (job)", + cycles_to_ns(idle_exec), idle_exec * 100.0 / stats_period, + cycles_to_ns(idle_exec_cnt ? idle_exec / idle_exec_cnt : 0), + cycles_to_ns(idle_exec_min), + cycles_to_ns(idle_exec_max)); + + for (i = 0; i < qconf->n_rx_port; i++) { + job = &qconf->port_fwd_jobs[i]; + printf("\n\nJob %" PRIu32 ": %-20s " + "\n%-18s %'14" PRIu64 + "\n%-18s %'14.0f" + STAT_FMT, + i, job->name, + "Exec count:", jobs_exec_cnt[i], + "Exec period: ", cycles_to_ns(jobs_period[i]), + "Exec time", + cycles_to_ns(jobs_exec[i]), jobs_exec[i] * 100.0 / stats_period, + cycles_to_ns(jobs_exec_cnt[i] ? jobs_exec[i] / jobs_exec_cnt[i] + : 0), + cycles_to_ns(jobs_exec_min[i]), + cycles_to_ns(jobs_exec_max[i])); + } + + if (qconf->n_rx_port > 0) { + job = &qconf->flush_job; + printf("\n\nJob %" PRIu32 ": %-20s " + "\n%-18s %'14" PRIu64 + "\n%-18s %'14.0f" + STAT_FMT, + i, job->name, + "Exec count:", flush_exec_cnt, + "Exec period: ", cycles_to_ns(flush_period), + "Exec time", + cycles_to_ns(flush_exec), flush_exec * 100.0 / stats_period, + cycles_to_ns(flush_exec_cnt ? flush_exec / flush_exec_cnt : 0), + cycles_to_ns(flush_exec_min), + cycles_to_ns(flush_exec_max)); + } +} + +/* Print out statistics on packets dropped */ +static void +show_stats_cb(__rte_unused void *param) +{ + uint64_t total_packets_dropped, total_packets_tx, total_packets_rx; + unsigned portid, lcore_id; + + total_packets_dropped = 0; + total_packets_tx = 0; + total_packets_rx = 0; + + const char clr[] = { 27, '[', '2', 'J', '\0' }; + const char topLeft[] = { 27, '[', '1', ';', '1', 'H', '\0' }; + + /* Clear screen and move to top left */ + printf("%s%s" + "\nPort statistics ===================================", + clr, topLeft); + + for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) { + /* skip disabled ports */ + if ((l2fwd_enabled_port_mask & (1 << portid)) == 0) + continue; + printf("\nStatistics for port %u ------------------------------" + "\nPackets sent: %24"PRIu64 + "\nPackets received: %20"PRIu64 + "\nPackets dropped: %21"PRIu64, + portid, + port_statistics[portid].tx, + port_statistics[portid].rx, + port_statistics[portid].dropped); + + total_packets_dropped += port_statistics[portid].dropped; + total_packets_tx += port_statistics[portid].tx; + total_packets_rx += port_statistics[portid].rx; + } + + printf("\nAggregate statistics ===============================" + "\nTotal packets sent: %18"PRIu64 + "\nTotal packets received: %14"PRIu64 + "\nTotal packets dropped: %15"PRIu64 + "\n====================================================", + total_packets_tx, + total_packets_rx, + total_packets_dropped); + + RTE_LCORE_FOREACH(lcore_id) { + if (lcore_queue_conf[lcore_id].n_rx_port > 0) + show_lcore_stats(lcore_id); + } + + printf("\n====================================================\n"); + rte_eal_alarm_set(timer_period * US_PER_S, show_stats_cb, NULL); +} + +static void +l2fwd_simple_forward(struct rte_mbuf *m, unsigned portid) +{ + struct ether_hdr *eth; + void *tmp; + int sent; + unsigned dst_port; + struct rte_eth_dev_tx_buffer *buffer; + + dst_port = l2fwd_dst_ports[portid]; + eth = rte_pktmbuf_mtod(m, struct ether_hdr *); + + /* 02:00:00:00:00:xx */ + tmp = ð->d_addr.addr_bytes[0]; + *((uint64_t *)tmp) = 0x000000000002 + ((uint64_t)dst_port << 40); + + /* src addr */ + ether_addr_copy(&l2fwd_ports_eth_addr[dst_port], ð->s_addr); + + buffer = tx_buffer[dst_port]; + sent = rte_eth_tx_buffer(dst_port, 0, buffer, m); + if (sent) + port_statistics[dst_port].tx += sent; +} + +static void +l2fwd_job_update_cb(struct rte_jobstats *job, int64_t result) +{ + int64_t err = job->target - result; + int64_t histeresis = job->target / 8; + + if (err < -histeresis) { + if (job->min_period + UPDATE_STEP_DOWN < job->period) + job->period -= UPDATE_STEP_DOWN; + } else if (err > histeresis) { + if (job->period + UPDATE_STEP_UP < job->max_period) + job->period += UPDATE_STEP_UP; + } +} + +static void +l2fwd_fwd_job(__rte_unused struct rte_timer *timer, void *arg) +{ + struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; + struct rte_mbuf *m; + + const uint8_t port_idx = (uintptr_t) arg; + const unsigned lcore_id = rte_lcore_id(); + struct lcore_queue_conf *qconf = &lcore_queue_conf[lcore_id]; + struct rte_jobstats *job = &qconf->port_fwd_jobs[port_idx]; + const uint8_t portid = qconf->rx_port_list[port_idx]; + + uint8_t j; + uint16_t total_nb_rx; + + rte_jobstats_start(&qconf->jobs_context, job); + + /* Call rx burst 2 times. This allow rte_jobstats logic to see if this + * function must be called more frequently. */ + + total_nb_rx = rte_eth_rx_burst((uint8_t) portid, 0, pkts_burst, + MAX_PKT_BURST); + + for (j = 0; j < total_nb_rx; j++) { + m = pkts_burst[j]; + rte_prefetch0(rte_pktmbuf_mtod(m, void *)); + l2fwd_simple_forward(m, portid); + } + + if (total_nb_rx == MAX_PKT_BURST) { + const uint16_t nb_rx = rte_eth_rx_burst((uint8_t) portid, 0, pkts_burst, + MAX_PKT_BURST); + + total_nb_rx += nb_rx; + for (j = 0; j < nb_rx; j++) { + m = pkts_burst[j]; + rte_prefetch0(rte_pktmbuf_mtod(m, void *)); + l2fwd_simple_forward(m, portid); + } + } + + port_statistics[portid].rx += total_nb_rx; + + /* Adjust period time in which we are running here. */ + if (rte_jobstats_finish(job, total_nb_rx) != 0) { + rte_timer_reset(&qconf->rx_timers[port_idx], job->period, PERIODICAL, + lcore_id, l2fwd_fwd_job, arg); + } +} + +static void +l2fwd_flush_job(__rte_unused struct rte_timer *timer, __rte_unused void *arg) +{ + uint64_t now; + unsigned lcore_id; + struct lcore_queue_conf *qconf; + uint8_t portid; + unsigned i; + uint32_t sent; + struct rte_eth_dev_tx_buffer *buffer; + + lcore_id = rte_lcore_id(); + qconf = &lcore_queue_conf[lcore_id]; + + rte_jobstats_start(&qconf->jobs_context, &qconf->flush_job); + + now = rte_get_timer_cycles(); + lcore_id = rte_lcore_id(); + qconf = &lcore_queue_conf[lcore_id]; + + for (i = 0; i < qconf->n_rx_port; i++) { + portid = l2fwd_dst_ports[qconf->rx_port_list[i]]; + + if (qconf->next_flush_time[portid] <= now) + continue; + + buffer = tx_buffer[portid]; + sent = rte_eth_tx_buffer_flush(portid, 0, buffer); + if (sent) + port_statistics[portid].tx += sent; + + qconf->next_flush_time[portid] = rte_get_timer_cycles() + drain_tsc; + } + + /* Pass target to indicate that this job is happy of time interwal + * in which it was called. */ + rte_jobstats_finish(&qconf->flush_job, qconf->flush_job.target); +} + +/* main processing loop */ +static void +l2fwd_main_loop(void) +{ + unsigned lcore_id; + unsigned i, portid; + struct lcore_queue_conf *qconf; + uint8_t stats_read_pending = 0; + uint8_t need_manage; + + lcore_id = rte_lcore_id(); + qconf = &lcore_queue_conf[lcore_id]; + + if (qconf->n_rx_port == 0) { + RTE_LOG(INFO, L2FWD, "lcore %u has nothing to do\n", lcore_id); + return; + } + + RTE_LOG(INFO, L2FWD, "entering main loop on lcore %u\n", lcore_id); + + for (i = 0; i < qconf->n_rx_port; i++) { + + portid = qconf->rx_port_list[i]; + RTE_LOG(INFO, L2FWD, " -- lcoreid=%u portid=%u\n", lcore_id, + portid); + } + + rte_jobstats_init(&qconf->idle_job, "idle", 0, 0, 0, 0); + + for (;;) { + rte_spinlock_lock(&qconf->lock); + + do { + rte_jobstats_context_start(&qconf->jobs_context); + + /* Do the Idle job: + * - Read stats_read_pending flag + * - check if some real job need to be executed + */ + rte_jobstats_start(&qconf->jobs_context, &qconf->idle_job); + + uint64_t repeats = 0; + + do { + uint8_t i; + uint64_t now = rte_get_timer_cycles(); + + repeats++; + need_manage = qconf->flush_timer.expire < now; + /* Check if we was esked to give a stats. */ + stats_read_pending = + rte_atomic16_read(&qconf->stats_read_pending); + need_manage |= stats_read_pending; + + for (i = 0; i < qconf->n_rx_port && !need_manage; i++) + need_manage = qconf->rx_timers[i].expire < now; + + } while (!need_manage); + + if (likely(repeats != 1)) + rte_jobstats_finish(&qconf->idle_job, qconf->idle_job.target); + else + rte_jobstats_abort(&qconf->idle_job); + + rte_timer_manage(); + rte_jobstats_context_finish(&qconf->jobs_context); + } while (likely(stats_read_pending == 0)); + + rte_spinlock_unlock(&qconf->lock); + rte_pause(); + } +} + +static int +l2fwd_launch_one_lcore(__attribute__((unused)) void *dummy) +{ + l2fwd_main_loop(); + return 0; +} + +/* display usage */ +static void +l2fwd_usage(const char *prgname) +{ + printf("%s [EAL options] -- -p PORTMASK [-q NQ]\n" + " -p PORTMASK: hexadecimal bitmask of ports to configure\n" + " -q NQ: number of queue (=ports) per lcore (default is 1)\n" + " -T PERIOD: statistics will be refreshed each PERIOD seconds (0 to disable, 10 default, 86400 maximum)\n" + " -l set system default locale instead of default (\"C\" locale) for thousands separator in stats.", + prgname); +} + +static int +l2fwd_parse_portmask(const char *portmask) +{ + char *end = NULL; + unsigned long pm; + + /* parse hexadecimal string */ + pm = strtoul(portmask, &end, 16); + if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0')) + return -1; + + if (pm == 0) + return -1; + + return pm; +} + +static unsigned int +l2fwd_parse_nqueue(const char *q_arg) +{ + char *end = NULL; + unsigned long n; + + /* parse hexadecimal string */ + n = strtoul(q_arg, &end, 10); + if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0')) + return 0; + if (n == 0) + return 0; + if (n >= MAX_RX_QUEUE_PER_LCORE) + return 0; + + return n; +} + +static int +l2fwd_parse_timer_period(const char *q_arg) +{ + char *end = NULL; + int n; + + /* parse number string */ + n = strtol(q_arg, &end, 10); + if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0')) + return -1; + if (n >= MAX_TIMER_PERIOD) + return -1; + + return n; +} + +/* Parse the argument given in the command line of the application */ +static int +l2fwd_parse_args(int argc, char **argv) +{ + int opt, ret; + char **argvopt; + int option_index; + char *prgname = argv[0]; + static struct option lgopts[] = { + {NULL, 0, 0, 0} + }; + + argvopt = argv; + + while ((opt = getopt_long(argc, argvopt, "p:q:T:l", + lgopts, &option_index)) != EOF) { + + switch (opt) { + /* portmask */ + case 'p': + l2fwd_enabled_port_mask = l2fwd_parse_portmask(optarg); + if (l2fwd_enabled_port_mask == 0) { + printf("invalid portmask\n"); + l2fwd_usage(prgname); + return -1; + } + break; + + /* nqueue */ + case 'q': + l2fwd_rx_queue_per_lcore = l2fwd_parse_nqueue(optarg); + if (l2fwd_rx_queue_per_lcore == 0) { + printf("invalid queue number\n"); + l2fwd_usage(prgname); + return -1; + } + break; + + /* timer period */ + case 'T': + timer_period = l2fwd_parse_timer_period(optarg); + if (timer_period < 0) { + printf("invalid timer period\n"); + l2fwd_usage(prgname); + return -1; + } + break; + + /* For thousands separator in printf. */ + case 'l': + setlocale(LC_ALL, ""); + break; + + /* long options */ + case 0: + l2fwd_usage(prgname); + return -1; + + default: + l2fwd_usage(prgname); + return -1; + } + } + + if (optind >= 0) + argv[optind-1] = prgname; + + ret = optind-1; + optind = 0; /* reset getopt lib */ + return ret; +} + +/* Check the link status of all ports in up to 9s, and print them finally */ +static void +check_all_ports_link_status(uint8_t port_num, uint32_t port_mask) +{ +#define CHECK_INTERVAL 100 /* 100ms */ +#define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */ + uint8_t portid, count, all_ports_up, print_flag = 0; + struct rte_eth_link link; + + printf("\nChecking link status"); + fflush(stdout); + for (count = 0; count <= MAX_CHECK_TIME; count++) { + all_ports_up = 1; + for (portid = 0; portid < port_num; portid++) { + if ((port_mask & (1 << portid)) == 0) + continue; + memset(&link, 0, sizeof(link)); + rte_eth_link_get_nowait(portid, &link); + /* print link status if flag set */ + if (print_flag == 1) { + if (link.link_status) + printf("Port %d Link Up - speed %u " + "Mbps - %s\n", (uint8_t)portid, + (unsigned)link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? + ("full-duplex") : ("half-duplex\n")); + else + printf("Port %d Link Down\n", + (uint8_t)portid); + continue; + } + /* clear all_ports_up flag if any link down */ + if (link.link_status == ETH_LINK_DOWN) { + all_ports_up = 0; + break; + } + } + /* after finally printing all link status, get out */ + if (print_flag == 1) + break; + + if (all_ports_up == 0) { + printf("."); + fflush(stdout); + rte_delay_ms(CHECK_INTERVAL); + } + + /* set the print_flag if all ports up or timeout */ + if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) { + print_flag = 1; + printf("done\n"); + } + } +} + +int +main(int argc, char **argv) +{ + struct lcore_queue_conf *qconf; + struct rte_eth_dev_info dev_info; + unsigned lcore_id, rx_lcore_id; + unsigned nb_ports_in_mask = 0; + int ret; + char name[RTE_JOBSTATS_NAMESIZE]; + uint8_t nb_ports; + uint8_t nb_ports_available; + uint8_t portid, last_port; + uint8_t i; + + /* init EAL */ + ret = rte_eal_init(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Invalid EAL arguments\n"); + argc -= ret; + argv += ret; + + /* parse application arguments (after the EAL ones) */ + ret = l2fwd_parse_args(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Invalid L2FWD arguments\n"); + + rte_timer_subsystem_init(); + + /* fetch default timer frequency. */ + hz = rte_get_timer_hz(); + + /* create the mbuf pool */ + l2fwd_pktmbuf_pool = + rte_pktmbuf_pool_create("mbuf_pool", NB_MBUF, 32, + 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id()); + if (l2fwd_pktmbuf_pool == NULL) + rte_exit(EXIT_FAILURE, "Cannot init mbuf pool\n"); + + nb_ports = rte_eth_dev_count(); + if (nb_ports == 0) + rte_exit(EXIT_FAILURE, "No Ethernet ports - bye\n"); + + if (nb_ports > RTE_MAX_ETHPORTS) + nb_ports = RTE_MAX_ETHPORTS; + + /* reset l2fwd_dst_ports */ + for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) + l2fwd_dst_ports[portid] = 0; + last_port = 0; + + /* + * Each logical core is assigned a dedicated TX queue on each port. + */ + for (portid = 0; portid < nb_ports; portid++) { + /* skip ports that are not enabled */ + if ((l2fwd_enabled_port_mask & (1 << portid)) == 0) + continue; + + if (nb_ports_in_mask % 2) { + l2fwd_dst_ports[portid] = last_port; + l2fwd_dst_ports[last_port] = portid; + } else + last_port = portid; + + nb_ports_in_mask++; + + rte_eth_dev_info_get(portid, &dev_info); + } + if (nb_ports_in_mask % 2) { + printf("Notice: odd number of ports in portmask.\n"); + l2fwd_dst_ports[last_port] = last_port; + } + + rx_lcore_id = 0; + qconf = NULL; + + /* Initialize the port/queue configuration of each logical core */ + for (portid = 0; portid < nb_ports; portid++) { + /* skip ports that are not enabled */ + if ((l2fwd_enabled_port_mask & (1 << portid)) == 0) + continue; + + /* get the lcore_id for this port */ + while (rte_lcore_is_enabled(rx_lcore_id) == 0 || + lcore_queue_conf[rx_lcore_id].n_rx_port == + l2fwd_rx_queue_per_lcore) { + rx_lcore_id++; + if (rx_lcore_id >= RTE_MAX_LCORE) + rte_exit(EXIT_FAILURE, "Not enough cores\n"); + } + + if (qconf != &lcore_queue_conf[rx_lcore_id]) + /* Assigned a new logical core in the loop above. */ + qconf = &lcore_queue_conf[rx_lcore_id]; + + qconf->rx_port_list[qconf->n_rx_port] = portid; + qconf->n_rx_port++; + printf("Lcore %u: RX port %u\n", rx_lcore_id, (unsigned) portid); + } + + nb_ports_available = nb_ports; + + /* Initialise each port */ + for (portid = 0; portid < nb_ports; portid++) { + /* skip ports that are not enabled */ + if ((l2fwd_enabled_port_mask & (1 << portid)) == 0) { + printf("Skipping disabled port %u\n", (unsigned) portid); + nb_ports_available--; + continue; + } + /* init port */ + printf("Initializing port %u... ", (unsigned) portid); + fflush(stdout); + ret = rte_eth_dev_configure(portid, 1, 1, &port_conf); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Cannot configure device: err=%d, port=%u\n", + ret, (unsigned) portid); + + rte_eth_macaddr_get(portid, &l2fwd_ports_eth_addr[portid]); + + /* init one RX queue */ + fflush(stdout); + ret = rte_eth_rx_queue_setup(portid, 0, nb_rxd, + rte_eth_dev_socket_id(portid), + NULL, + l2fwd_pktmbuf_pool); + if (ret < 0) + rte_exit(EXIT_FAILURE, "rte_eth_rx_queue_setup:err=%d, port=%u\n", + ret, (unsigned) portid); + + /* init one TX queue on each port */ + fflush(stdout); + ret = rte_eth_tx_queue_setup(portid, 0, nb_txd, + rte_eth_dev_socket_id(portid), + NULL); + if (ret < 0) + rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup:err=%d, port=%u\n", + ret, (unsigned) portid); + + /* Initialize TX buffers */ + tx_buffer[portid] = rte_zmalloc_socket("tx_buffer", + RTE_ETH_TX_BUFFER_SIZE(MAX_PKT_BURST), 0, + rte_eth_dev_socket_id(portid)); + if (tx_buffer[portid] == NULL) + rte_exit(EXIT_FAILURE, "Cannot allocate buffer for tx on port %u\n", + (unsigned) portid); + + rte_eth_tx_buffer_init(tx_buffer[portid], MAX_PKT_BURST); + + ret = rte_eth_tx_buffer_set_err_callback(tx_buffer[portid], + rte_eth_tx_buffer_count_callback, + &port_statistics[portid].dropped); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Cannot set error callback for " + "tx buffer on port %u\n", (unsigned) portid); + + /* Start device */ + ret = rte_eth_dev_start(portid); + if (ret < 0) + rte_exit(EXIT_FAILURE, "rte_eth_dev_start:err=%d, port=%u\n", + ret, (unsigned) portid); + + printf("done:\n"); + + rte_eth_promiscuous_enable(portid); + + printf("Port %u, MAC address: %02X:%02X:%02X:%02X:%02X:%02X\n\n", + (unsigned) portid, + l2fwd_ports_eth_addr[portid].addr_bytes[0], + l2fwd_ports_eth_addr[portid].addr_bytes[1], + l2fwd_ports_eth_addr[portid].addr_bytes[2], + l2fwd_ports_eth_addr[portid].addr_bytes[3], + l2fwd_ports_eth_addr[portid].addr_bytes[4], + l2fwd_ports_eth_addr[portid].addr_bytes[5]); + + /* initialize port stats */ + memset(&port_statistics, 0, sizeof(port_statistics)); + } + + if (!nb_ports_available) { + rte_exit(EXIT_FAILURE, + "All available ports are disabled. Please set portmask.\n"); + } + + check_all_ports_link_status(nb_ports, l2fwd_enabled_port_mask); + + drain_tsc = (hz + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US; + + RTE_LCORE_FOREACH(lcore_id) { + qconf = &lcore_queue_conf[lcore_id]; + + rte_spinlock_init(&qconf->lock); + + if (rte_jobstats_context_init(&qconf->jobs_context) != 0) + rte_panic("Jobs stats context for core %u init failed\n", lcore_id); + + if (qconf->n_rx_port == 0) { + RTE_LOG(INFO, L2FWD, + "lcore %u: no ports so no jobs stats context initialization\n", + lcore_id); + continue; + } + /* Add flush job. + * Set fixed period by setting min = max = initial period. Set target to + * zero as it is irrelevant for this job. */ + rte_jobstats_init(&qconf->flush_job, "flush", drain_tsc, drain_tsc, + drain_tsc, 0); + + rte_timer_init(&qconf->flush_timer); + ret = rte_timer_reset(&qconf->flush_timer, drain_tsc, PERIODICAL, + lcore_id, &l2fwd_flush_job, NULL); + + if (ret < 0) { + rte_exit(1, "Failed to reset flush job timer for lcore %u: %s", + lcore_id, rte_strerror(-ret)); + } + + for (i = 0; i < qconf->n_rx_port; i++) { + struct rte_jobstats *job = &qconf->port_fwd_jobs[i]; + + portid = qconf->rx_port_list[i]; + printf("Setting forward jon for port %u\n", portid); + + snprintf(name, RTE_DIM(name), "port %u fwd", portid); + /* Setup forward job. + * Set min, max and initial period. Set target to MAX_PKT_BURST as + * this is desired optimal RX/TX burst size. */ + rte_jobstats_init(job, name, 0, drain_tsc, 0, MAX_PKT_BURST); + rte_jobstats_set_update_period_function(job, l2fwd_job_update_cb); + + rte_timer_init(&qconf->rx_timers[i]); + ret = rte_timer_reset(&qconf->rx_timers[i], 0, PERIODICAL, lcore_id, + &l2fwd_fwd_job, (void *)(uintptr_t)i); + + if (ret < 0) { + rte_exit(1, "Failed to reset lcore %u port %u job timer: %s", + lcore_id, qconf->rx_port_list[i], rte_strerror(-ret)); + } + } + } + + if (timer_period) + rte_eal_alarm_set(timer_period * MS_PER_S, show_stats_cb, NULL); + else + RTE_LOG(INFO, L2FWD, "Stats display disabled\n"); + + /* launch per-lcore init on every lcore */ + rte_eal_mp_remote_launch(l2fwd_launch_one_lcore, NULL, CALL_MASTER); + RTE_LCORE_FOREACH_SLAVE(lcore_id) { + if (rte_eal_wait_lcore(lcore_id) < 0) + return -1; + } + + return 0; +} diff --git a/examples/l2fwd-keepalive/Makefile b/examples/l2fwd-keepalive/Makefile new file mode 100644 index 00000000..568edcb4 --- /dev/null +++ b/examples/l2fwd-keepalive/Makefile @@ -0,0 +1,50 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overridden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +# binary name +APP = l2fwd-keepalive + +# all source are stored in SRCS-y +SRCS-y := main.c + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) + +include $(RTE_SDK)/mk/rte.extapp.mk diff --git a/examples/l2fwd-keepalive/main.c b/examples/l2fwd-keepalive/main.c new file mode 100644 index 00000000..8da89aa1 --- /dev/null +++ b/examples/l2fwd-keepalive/main.c @@ -0,0 +1,782 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <inttypes.h> +#include <sys/types.h> +#include <sys/queue.h> +#include <netinet/in.h> +#include <setjmp.h> +#include <stdarg.h> +#include <ctype.h> +#include <errno.h> +#include <getopt.h> + +#include <rte_common.h> +#include <rte_log.h> +#include <rte_malloc.h> +#include <rte_memory.h> +#include <rte_memcpy.h> +#include <rte_memzone.h> +#include <rte_eal.h> +#include <rte_per_lcore.h> +#include <rte_launch.h> +#include <rte_atomic.h> +#include <rte_cycles.h> +#include <rte_prefetch.h> +#include <rte_lcore.h> +#include <rte_per_lcore.h> +#include <rte_branch_prediction.h> +#include <rte_interrupts.h> +#include <rte_pci.h> +#include <rte_random.h> +#include <rte_debug.h> +#include <rte_ether.h> +#include <rte_ethdev.h> +#include <rte_ring.h> +#include <rte_mempool.h> +#include <rte_mbuf.h> +#include <rte_timer.h> +#include <rte_keepalive.h> + +#define RTE_LOGTYPE_L2FWD RTE_LOGTYPE_USER1 + +#define NB_MBUF 8192 + +#define MAX_PKT_BURST 32 +#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */ + +/* + * Configurable number of RX/TX ring descriptors + */ +#define RTE_TEST_RX_DESC_DEFAULT 128 +#define RTE_TEST_TX_DESC_DEFAULT 512 +static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; +static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; + +/* ethernet addresses of ports */ +static struct ether_addr l2fwd_ports_eth_addr[RTE_MAX_ETHPORTS]; + +/* mask of enabled ports */ +static uint32_t l2fwd_enabled_port_mask; + +/* list of enabled ports */ +static uint32_t l2fwd_dst_ports[RTE_MAX_ETHPORTS]; + +static unsigned int l2fwd_rx_queue_per_lcore = 1; + +#define MAX_RX_QUEUE_PER_LCORE 16 +#define MAX_TX_QUEUE_PER_PORT 16 +struct lcore_queue_conf { + unsigned n_rx_port; + unsigned rx_port_list[MAX_RX_QUEUE_PER_LCORE]; +} __rte_cache_aligned; +struct lcore_queue_conf lcore_queue_conf[RTE_MAX_LCORE]; + +struct rte_eth_dev_tx_buffer *tx_buffer[RTE_MAX_ETHPORTS]; + +static const struct rte_eth_conf port_conf = { + .rxmode = { + .split_hdr_size = 0, + .header_split = 0, /**< Header Split disabled */ + .hw_ip_checksum = 0, /**< IP checksum offload disabled */ + .hw_vlan_filter = 0, /**< VLAN filtering disabled */ + .jumbo_frame = 0, /**< Jumbo Frame Support disabled */ + .hw_strip_crc = 0, /**< CRC stripped by hardware */ + }, + .txmode = { + .mq_mode = ETH_MQ_TX_NONE, + }, +}; + +struct rte_mempool *l2fwd_pktmbuf_pool = NULL; + +/* Per-port statistics struct */ +struct l2fwd_port_statistics { + uint64_t tx; + uint64_t rx; + uint64_t dropped; +} __rte_cache_aligned; +struct l2fwd_port_statistics port_statistics[RTE_MAX_ETHPORTS]; + +/* A tsc-based timer responsible for triggering statistics printout */ +#define TIMER_MILLISECOND 1 +#define MAX_TIMER_PERIOD 86400 /* 1 day max */ +static int64_t timer_period = 10 * TIMER_MILLISECOND * 1000; /* 10 seconds */ +static int64_t check_period = 5; /* default check cycle is 5ms */ + +/* Keepalive structure */ +struct rte_keepalive *rte_global_keepalive_info; + +/* Print out statistics on packets dropped */ +static void +print_stats(__attribute__((unused)) struct rte_timer *ptr_timer, + __attribute__((unused)) void *ptr_data) +{ + uint64_t total_packets_dropped, total_packets_tx, total_packets_rx; + unsigned portid; + + total_packets_dropped = 0; + total_packets_tx = 0; + total_packets_rx = 0; + + const char clr[] = { 27, '[', '2', 'J', '\0' }; + const char topLeft[] = { 27, '[', '1', ';', '1', 'H', '\0' }; + + /* Clear screen and move to top left */ + printf("%s%s", clr, topLeft); + + printf("\nPort statistics ===================================="); + + for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) { + /* skip disabled ports */ + if ((l2fwd_enabled_port_mask & (1 << portid)) == 0) + continue; + printf("\nStatistics for port %u ------------------------------" + "\nPackets sent: %24"PRIu64 + "\nPackets received: %20"PRIu64 + "\nPackets dropped: %21"PRIu64, + portid, + port_statistics[portid].tx, + port_statistics[portid].rx, + port_statistics[portid].dropped); + + total_packets_dropped += port_statistics[portid].dropped; + total_packets_tx += port_statistics[portid].tx; + total_packets_rx += port_statistics[portid].rx; + } + printf("\nAggregate statistics ===============================" + "\nTotal packets sent: %18"PRIu64 + "\nTotal packets received: %14"PRIu64 + "\nTotal packets dropped: %15"PRIu64, + total_packets_tx, + total_packets_rx, + total_packets_dropped); + printf("\n====================================================\n"); +} + +static void +l2fwd_simple_forward(struct rte_mbuf *m, unsigned portid) +{ + struct ether_hdr *eth; + void *tmp; + int sent; + unsigned dst_port; + struct rte_eth_dev_tx_buffer *buffer; + + dst_port = l2fwd_dst_ports[portid]; + eth = rte_pktmbuf_mtod(m, struct ether_hdr *); + + /* 02:00:00:00:00:xx */ + tmp = ð->d_addr.addr_bytes[0]; + *((uint64_t *)tmp) = 0x000000000002 + ((uint64_t)dst_port << 40); + + /* src addr */ + ether_addr_copy(&l2fwd_ports_eth_addr[dst_port], ð->s_addr); + + buffer = tx_buffer[dst_port]; + sent = rte_eth_tx_buffer(dst_port, 0, buffer, m); + if (sent) + port_statistics[dst_port].tx += sent; +} + +/* main processing loop */ +static void +l2fwd_main_loop(void) +{ + struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; + struct rte_mbuf *m; + int sent; + unsigned lcore_id; + uint64_t prev_tsc, diff_tsc, cur_tsc; + unsigned i, j, portid, nb_rx; + struct lcore_queue_conf *qconf; + const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) + / US_PER_S * BURST_TX_DRAIN_US; + struct rte_eth_dev_tx_buffer *buffer; + + prev_tsc = 0; + + lcore_id = rte_lcore_id(); + qconf = &lcore_queue_conf[lcore_id]; + + if (qconf->n_rx_port == 0) { + RTE_LOG(INFO, L2FWD, "lcore %u has nothing to do\n", lcore_id); + return; + } + + RTE_LOG(INFO, L2FWD, "entering main loop on lcore %u\n", lcore_id); + + for (i = 0; i < qconf->n_rx_port; i++) { + + portid = qconf->rx_port_list[i]; + RTE_LOG(INFO, L2FWD, " -- lcoreid=%u portid=%u\n", lcore_id, + portid); + } + + uint64_t tsc_initial = rte_rdtsc(); + uint64_t tsc_lifetime = (rand()&0x07) * rte_get_tsc_hz(); + + while (1) { + /* Keepalive heartbeat */ + rte_keepalive_mark_alive(rte_global_keepalive_info); + + cur_tsc = rte_rdtsc(); + + /* + * Die randomly within 7 secs for demo purposes if + * keepalive enabled + */ + if (check_period > 0 && cur_tsc - tsc_initial > tsc_lifetime) + break; + + /* + * TX burst queue drain + */ + diff_tsc = cur_tsc - prev_tsc; + if (unlikely(diff_tsc > drain_tsc)) { + + for (i = 0; i < qconf->n_rx_port; i++) { + + portid = l2fwd_dst_ports[qconf->rx_port_list[i]]; + buffer = tx_buffer[portid]; + + sent = rte_eth_tx_buffer_flush(portid, 0, buffer); + if (sent) + port_statistics[portid].tx += sent; + + } + + prev_tsc = cur_tsc; + } + + /* + * Read packet from RX queues + */ + for (i = 0; i < qconf->n_rx_port; i++) { + + portid = qconf->rx_port_list[i]; + nb_rx = rte_eth_rx_burst((uint8_t) portid, 0, + pkts_burst, MAX_PKT_BURST); + + port_statistics[portid].rx += nb_rx; + + for (j = 0; j < nb_rx; j++) { + m = pkts_burst[j]; + rte_prefetch0(rte_pktmbuf_mtod(m, void *)); + l2fwd_simple_forward(m, portid); + } + } + } +} + +static int +l2fwd_launch_one_lcore(__attribute__((unused)) void *dummy) +{ + l2fwd_main_loop(); + return 0; +} + +/* display usage */ +static void +l2fwd_usage(const char *prgname) +{ + printf("%s [EAL options] -- -p PORTMASK [-q NQ]\n" + " -p PORTMASK: hexadecimal bitmask of ports to configure\n" + " -q NQ: number of queue (=ports) per lcore (default is 1)\n" + " -K PERIOD: Keepalive check period (5 default; 86400 max)\n" + " -T PERIOD: statistics will be refreshed each PERIOD seconds (0 to disable, 10 default, 86400 maximum)\n", + prgname); +} + +static int +l2fwd_parse_portmask(const char *portmask) +{ + char *end = NULL; + unsigned long pm; + + /* parse hexadecimal string */ + pm = strtoul(portmask, &end, 16); + if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0')) + return -1; + + if (pm == 0) + return -1; + + return pm; +} + +static unsigned int +l2fwd_parse_nqueue(const char *q_arg) +{ + char *end = NULL; + unsigned long n; + + /* parse hexadecimal string */ + n = strtoul(q_arg, &end, 10); + if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0')) + return 0; + if (n == 0) + return 0; + if (n >= MAX_RX_QUEUE_PER_LCORE) + return 0; + + return n; +} + +static int +l2fwd_parse_timer_period(const char *q_arg) +{ + char *end = NULL; + int n; + + /* parse number string */ + n = strtol(q_arg, &end, 10); + if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0')) + return -1; + if (n >= MAX_TIMER_PERIOD) + return -1; + + return n; +} + +static int +l2fwd_parse_check_period(const char *q_arg) +{ + char *end = NULL; + int n; + + /* parse number string */ + n = strtol(q_arg, &end, 10); + if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0')) + return -1; + if (n >= MAX_TIMER_PERIOD) + return -1; + + return n; +} + +/* Parse the argument given in the command line of the application */ +static int +l2fwd_parse_args(int argc, char **argv) +{ + int opt, ret; + char **argvopt; + int option_index; + char *prgname = argv[0]; + static struct option lgopts[] = { + {NULL, 0, 0, 0} + }; + + argvopt = argv; + + while ((opt = getopt_long(argc, argvopt, "p:q:T:K:", + lgopts, &option_index)) != EOF) { + + switch (opt) { + /* portmask */ + case 'p': + l2fwd_enabled_port_mask = l2fwd_parse_portmask(optarg); + if (l2fwd_enabled_port_mask == 0) { + printf("invalid portmask\n"); + l2fwd_usage(prgname); + return -1; + } + break; + + /* nqueue */ + case 'q': + l2fwd_rx_queue_per_lcore = l2fwd_parse_nqueue(optarg); + if (l2fwd_rx_queue_per_lcore == 0) { + printf("invalid queue number\n"); + l2fwd_usage(prgname); + return -1; + } + break; + + /* timer period */ + case 'T': + timer_period = l2fwd_parse_timer_period(optarg) + * (int64_t)(1000 * TIMER_MILLISECOND); + if (timer_period < 0) { + printf("invalid timer period\n"); + l2fwd_usage(prgname); + return -1; + } + break; + + /* Check period */ + case 'K': + check_period = l2fwd_parse_check_period(optarg); + if (check_period < 0) { + printf("invalid check period\n"); + l2fwd_usage(prgname); + return -1; + } + break; + + /* long options */ + case 0: + l2fwd_usage(prgname); + return -1; + + default: + l2fwd_usage(prgname); + return -1; + } + } + + if (optind >= 0) + argv[optind-1] = prgname; + + ret = optind-1; + optind = 0; /* reset getopt lib */ + return ret; +} + +/* Check the link status of all ports in up to 9s, and print them finally */ +static void +check_all_ports_link_status(uint8_t port_num, uint32_t port_mask) +{ +#define CHECK_INTERVAL 100 /* 100ms */ +#define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */ + uint8_t portid, count, all_ports_up, print_flag = 0; + struct rte_eth_link link; + + printf("\nChecking link status"); + fflush(stdout); + for (count = 0; count <= MAX_CHECK_TIME; count++) { + all_ports_up = 1; + for (portid = 0; portid < port_num; portid++) { + if ((port_mask & (1 << portid)) == 0) + continue; + memset(&link, 0, sizeof(link)); + rte_eth_link_get_nowait(portid, &link); + /* print link status if flag set */ + if (print_flag == 1) { + if (link.link_status) + printf("Port %d Link Up - speed %u " + "Mbps - %s\n", (uint8_t)portid, + (unsigned)link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? + ("full-duplex") : ("half-duplex\n")); + else + printf("Port %d Link Down\n", + (uint8_t)portid); + continue; + } + /* clear all_ports_up flag if any link down */ + if (link.link_status == ETH_LINK_DOWN) { + all_ports_up = 0; + break; + } + } + /* after finally printing all link status, get out */ + if (print_flag == 1) + break; + + if (all_ports_up == 0) { + printf("."); + fflush(stdout); + rte_delay_ms(CHECK_INTERVAL); + } + + /* set the print_flag if all ports up or timeout */ + if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) { + print_flag = 1; + printf("done\n"); + } + } +} + +static void +dead_core(__attribute__((unused)) void *ptr_data, const int id_core) +{ + printf("Dead core %i - restarting..\n", id_core); + if (rte_eal_get_lcore_state(id_core) == FINISHED) { + rte_eal_wait_lcore(id_core); + rte_eal_remote_launch(l2fwd_launch_one_lcore, NULL, id_core); + } else { + printf("..false positive!\n"); + } +} + +int +main(int argc, char **argv) +{ + struct lcore_queue_conf *qconf; + struct rte_eth_dev_info dev_info; + int ret; + uint8_t nb_ports; + uint8_t nb_ports_available; + uint8_t portid, last_port; + unsigned lcore_id, rx_lcore_id; + unsigned nb_ports_in_mask = 0; + + /* init EAL */ + ret = rte_eal_init(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Invalid EAL arguments\n"); + argc -= ret; + argv += ret; + + l2fwd_enabled_port_mask = 0; + + /* parse application arguments (after the EAL ones) */ + ret = l2fwd_parse_args(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Invalid L2FWD arguments\n"); + + /* create the mbuf pool */ + l2fwd_pktmbuf_pool = rte_pktmbuf_pool_create("mbuf_pool", NB_MBUF, 32, + 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id()); + if (l2fwd_pktmbuf_pool == NULL) + rte_exit(EXIT_FAILURE, "Cannot init mbuf pool\n"); + + nb_ports = rte_eth_dev_count(); + if (nb_ports == 0) + rte_exit(EXIT_FAILURE, "No Ethernet ports - bye\n"); + + if (nb_ports > RTE_MAX_ETHPORTS) + nb_ports = RTE_MAX_ETHPORTS; + + /* reset l2fwd_dst_ports */ + for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) + l2fwd_dst_ports[portid] = 0; + last_port = 0; + + /* + * Each logical core is assigned a dedicated TX queue on each port. + */ + for (portid = 0; portid < nb_ports; portid++) { + /* skip ports that are not enabled */ + if ((l2fwd_enabled_port_mask & (1 << portid)) == 0) + continue; + + if (nb_ports_in_mask % 2) { + l2fwd_dst_ports[portid] = last_port; + l2fwd_dst_ports[last_port] = portid; + } else + last_port = portid; + + nb_ports_in_mask++; + + rte_eth_dev_info_get(portid, &dev_info); + } + if (nb_ports_in_mask % 2) { + printf("Notice: odd number of ports in portmask.\n"); + l2fwd_dst_ports[last_port] = last_port; + } + + rx_lcore_id = 1; + qconf = NULL; + + /* Initialize the port/queue configuration of each logical core */ + for (portid = 0; portid < nb_ports; portid++) { + /* skip ports that are not enabled */ + if ((l2fwd_enabled_port_mask & (1 << portid)) == 0) + continue; + + /* get the lcore_id for this port */ + while (rte_lcore_is_enabled(rx_lcore_id) == 0 || + lcore_queue_conf[rx_lcore_id].n_rx_port == + l2fwd_rx_queue_per_lcore) { + rx_lcore_id++; + if (rx_lcore_id >= RTE_MAX_LCORE) + rte_exit(EXIT_FAILURE, "Not enough cores\n"); + } + + if (qconf != &lcore_queue_conf[rx_lcore_id]) + /* Assigned a new logical core in the loop above. */ + qconf = &lcore_queue_conf[rx_lcore_id]; + + qconf->rx_port_list[qconf->n_rx_port] = portid; + qconf->n_rx_port++; + printf("Lcore %u: RX port %u\n", + rx_lcore_id, (unsigned) portid); + } + + nb_ports_available = nb_ports; + + /* Initialise each port */ + for (portid = 0; portid < nb_ports; portid++) { + /* skip ports that are not enabled */ + if ((l2fwd_enabled_port_mask & (1 << portid)) == 0) { + printf("Skipping disabled port %u\n", + (unsigned) portid); + nb_ports_available--; + continue; + } + /* init port */ + printf("Initializing port %u... ", (unsigned) portid); + fflush(stdout); + ret = rte_eth_dev_configure(portid, 1, 1, &port_conf); + if (ret < 0) + rte_exit(EXIT_FAILURE, + "Cannot configure device: err=%d, port=%u\n", + ret, (unsigned) portid); + + rte_eth_macaddr_get(portid, &l2fwd_ports_eth_addr[portid]); + + /* init one RX queue */ + fflush(stdout); + ret = rte_eth_rx_queue_setup(portid, 0, nb_rxd, + rte_eth_dev_socket_id(portid), + NULL, + l2fwd_pktmbuf_pool); + if (ret < 0) + rte_exit(EXIT_FAILURE, + "rte_eth_rx_queue_setup:err=%d, port=%u\n", + ret, (unsigned) portid); + + /* init one TX queue on each port */ + fflush(stdout); + ret = rte_eth_tx_queue_setup(portid, 0, nb_txd, + rte_eth_dev_socket_id(portid), + NULL); + if (ret < 0) + rte_exit(EXIT_FAILURE, + "rte_eth_tx_queue_setup:err=%d, port=%u\n", + ret, (unsigned) portid); + + /* Initialize TX buffers */ + tx_buffer[portid] = rte_zmalloc_socket("tx_buffer", + RTE_ETH_TX_BUFFER_SIZE(MAX_PKT_BURST), 0, + rte_eth_dev_socket_id(portid)); + if (tx_buffer[portid] == NULL) + rte_exit(EXIT_FAILURE, "Cannot allocate buffer for tx on port %u\n", + (unsigned) portid); + + rte_eth_tx_buffer_init(tx_buffer[portid], MAX_PKT_BURST); + + ret = rte_eth_tx_buffer_set_err_callback(tx_buffer[portid], + rte_eth_tx_buffer_count_callback, + &port_statistics[portid].dropped); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Cannot set error callback for " + "tx buffer on port %u\n", (unsigned) portid); + + /* Start device */ + ret = rte_eth_dev_start(portid); + if (ret < 0) + rte_exit(EXIT_FAILURE, + "rte_eth_dev_start:err=%d, port=%u\n", + ret, (unsigned) portid); + + rte_eth_promiscuous_enable(portid); + + printf("Port %u, MAC address: " + "%02X:%02X:%02X:%02X:%02X:%02X\n\n", + (unsigned) portid, + l2fwd_ports_eth_addr[portid].addr_bytes[0], + l2fwd_ports_eth_addr[portid].addr_bytes[1], + l2fwd_ports_eth_addr[portid].addr_bytes[2], + l2fwd_ports_eth_addr[portid].addr_bytes[3], + l2fwd_ports_eth_addr[portid].addr_bytes[4], + l2fwd_ports_eth_addr[portid].addr_bytes[5]); + + /* initialize port stats */ + memset(&port_statistics, 0, sizeof(port_statistics)); + } + + if (!nb_ports_available) { + rte_exit(EXIT_FAILURE, + "All available ports are disabled. Please set portmask.\n"); + } + + check_all_ports_link_status(nb_ports, l2fwd_enabled_port_mask); + + struct rte_timer hb_timer, stats_timer; + + rte_timer_subsystem_init(); + rte_timer_init(&stats_timer); + + if (check_period > 0) { + rte_global_keepalive_info = + rte_keepalive_create(&dead_core, NULL); + if (rte_global_keepalive_info == NULL) + rte_exit(EXIT_FAILURE, "init_keep_alive() failed"); + rte_timer_init(&hb_timer); + if (rte_timer_reset(&hb_timer, + (check_period * rte_get_timer_hz()) / 1000, + PERIODICAL, + rte_lcore_id(), + (void(*)(struct rte_timer*, void*)) + &rte_keepalive_dispatch_pings, + rte_global_keepalive_info + ) != 0 ) + rte_exit(EXIT_FAILURE, "Keepalive setup failure.\n"); + } + if (timer_period > 0) { + if (rte_timer_reset(&stats_timer, + (timer_period * rte_get_timer_hz()) / 1000, + PERIODICAL, + rte_lcore_id(), + &print_stats, NULL + ) != 0 ) + rte_exit(EXIT_FAILURE, "Stats setup failure.\n"); + } + /* launch per-lcore init on every slave lcore */ + RTE_LCORE_FOREACH_SLAVE(lcore_id) { + struct lcore_queue_conf *qconf = &lcore_queue_conf[lcore_id]; + + if (qconf->n_rx_port == 0) + RTE_LOG(INFO, L2FWD, + "lcore %u has nothing to do\n", + lcore_id + ); + else { + rte_eal_remote_launch( + l2fwd_launch_one_lcore, + NULL, + lcore_id + ); + rte_keepalive_register_core(rte_global_keepalive_info, + lcore_id); + } + } + for (;;) { + rte_timer_manage(); + rte_delay_ms(5); + } + + RTE_LCORE_FOREACH_SLAVE(lcore_id) { + if (rte_eal_wait_lcore(lcore_id) < 0) + return -1; + } + + return 0; +} diff --git a/examples/l2fwd/Makefile b/examples/l2fwd/Makefile new file mode 100644 index 00000000..78feeeb8 --- /dev/null +++ b/examples/l2fwd/Makefile @@ -0,0 +1,50 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overriden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +# binary name +APP = l2fwd + +# all source are stored in SRCS-y +SRCS-y := main.c + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) + +include $(RTE_SDK)/mk/rte.extapp.mk diff --git a/examples/l2fwd/main.c b/examples/l2fwd/main.c new file mode 100644 index 00000000..1ad94887 --- /dev/null +++ b/examples/l2fwd/main.c @@ -0,0 +1,720 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <inttypes.h> +#include <sys/types.h> +#include <sys/queue.h> +#include <netinet/in.h> +#include <setjmp.h> +#include <stdarg.h> +#include <ctype.h> +#include <errno.h> +#include <getopt.h> +#include <signal.h> +#include <stdbool.h> + +#include <rte_common.h> +#include <rte_log.h> +#include <rte_malloc.h> +#include <rte_memory.h> +#include <rte_memcpy.h> +#include <rte_memzone.h> +#include <rte_eal.h> +#include <rte_per_lcore.h> +#include <rte_launch.h> +#include <rte_atomic.h> +#include <rte_cycles.h> +#include <rte_prefetch.h> +#include <rte_lcore.h> +#include <rte_per_lcore.h> +#include <rte_branch_prediction.h> +#include <rte_interrupts.h> +#include <rte_pci.h> +#include <rte_random.h> +#include <rte_debug.h> +#include <rte_ether.h> +#include <rte_ethdev.h> +#include <rte_ring.h> +#include <rte_mempool.h> +#include <rte_mbuf.h> + +static volatile bool force_quit; + +#define RTE_LOGTYPE_L2FWD RTE_LOGTYPE_USER1 + +#define NB_MBUF 8192 + +#define MAX_PKT_BURST 32 +#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */ + +/* + * Configurable number of RX/TX ring descriptors + */ +#define RTE_TEST_RX_DESC_DEFAULT 128 +#define RTE_TEST_TX_DESC_DEFAULT 512 +static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; +static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; + +/* ethernet addresses of ports */ +static struct ether_addr l2fwd_ports_eth_addr[RTE_MAX_ETHPORTS]; + +/* mask of enabled ports */ +static uint32_t l2fwd_enabled_port_mask = 0; + +/* list of enabled ports */ +static uint32_t l2fwd_dst_ports[RTE_MAX_ETHPORTS]; + +static unsigned int l2fwd_rx_queue_per_lcore = 1; + +#define MAX_RX_QUEUE_PER_LCORE 16 +#define MAX_TX_QUEUE_PER_PORT 16 +struct lcore_queue_conf { + unsigned n_rx_port; + unsigned rx_port_list[MAX_RX_QUEUE_PER_LCORE]; +} __rte_cache_aligned; +struct lcore_queue_conf lcore_queue_conf[RTE_MAX_LCORE]; + +static struct rte_eth_dev_tx_buffer *tx_buffer[RTE_MAX_ETHPORTS]; + +static const struct rte_eth_conf port_conf = { + .rxmode = { + .split_hdr_size = 0, + .header_split = 0, /**< Header Split disabled */ + .hw_ip_checksum = 0, /**< IP checksum offload disabled */ + .hw_vlan_filter = 0, /**< VLAN filtering disabled */ + .jumbo_frame = 0, /**< Jumbo Frame Support disabled */ + .hw_strip_crc = 0, /**< CRC stripped by hardware */ + }, + .txmode = { + .mq_mode = ETH_MQ_TX_NONE, + }, +}; + +struct rte_mempool * l2fwd_pktmbuf_pool = NULL; + +/* Per-port statistics struct */ +struct l2fwd_port_statistics { + uint64_t tx; + uint64_t rx; + uint64_t dropped; +} __rte_cache_aligned; +struct l2fwd_port_statistics port_statistics[RTE_MAX_ETHPORTS]; + +/* A tsc-based timer responsible for triggering statistics printout */ +#define TIMER_MILLISECOND 2000000ULL /* around 1ms at 2 Ghz */ +#define MAX_TIMER_PERIOD 86400 /* 1 day max */ +static int64_t timer_period = 10 * TIMER_MILLISECOND * 1000; /* default period is 10 seconds */ + +/* Print out statistics on packets dropped */ +static void +print_stats(void) +{ + uint64_t total_packets_dropped, total_packets_tx, total_packets_rx; + unsigned portid; + + total_packets_dropped = 0; + total_packets_tx = 0; + total_packets_rx = 0; + + const char clr[] = { 27, '[', '2', 'J', '\0' }; + const char topLeft[] = { 27, '[', '1', ';', '1', 'H','\0' }; + + /* Clear screen and move to top left */ + printf("%s%s", clr, topLeft); + + printf("\nPort statistics ===================================="); + + for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) { + /* skip disabled ports */ + if ((l2fwd_enabled_port_mask & (1 << portid)) == 0) + continue; + printf("\nStatistics for port %u ------------------------------" + "\nPackets sent: %24"PRIu64 + "\nPackets received: %20"PRIu64 + "\nPackets dropped: %21"PRIu64, + portid, + port_statistics[portid].tx, + port_statistics[portid].rx, + port_statistics[portid].dropped); + + total_packets_dropped += port_statistics[portid].dropped; + total_packets_tx += port_statistics[portid].tx; + total_packets_rx += port_statistics[portid].rx; + } + printf("\nAggregate statistics ===============================" + "\nTotal packets sent: %18"PRIu64 + "\nTotal packets received: %14"PRIu64 + "\nTotal packets dropped: %15"PRIu64, + total_packets_tx, + total_packets_rx, + total_packets_dropped); + printf("\n====================================================\n"); +} + +static void +l2fwd_simple_forward(struct rte_mbuf *m, unsigned portid) +{ + struct ether_hdr *eth; + void *tmp; + unsigned dst_port; + int sent; + struct rte_eth_dev_tx_buffer *buffer; + + dst_port = l2fwd_dst_ports[portid]; + eth = rte_pktmbuf_mtod(m, struct ether_hdr *); + + /* 02:00:00:00:00:xx */ + tmp = ð->d_addr.addr_bytes[0]; + *((uint64_t *)tmp) = 0x000000000002 + ((uint64_t)dst_port << 40); + + /* src addr */ + ether_addr_copy(&l2fwd_ports_eth_addr[dst_port], ð->s_addr); + + buffer = tx_buffer[dst_port]; + sent = rte_eth_tx_buffer(dst_port, 0, buffer, m); + if (sent) + port_statistics[dst_port].tx += sent; +} + +/* main processing loop */ +static void +l2fwd_main_loop(void) +{ + struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; + struct rte_mbuf *m; + int sent; + unsigned lcore_id; + uint64_t prev_tsc, diff_tsc, cur_tsc, timer_tsc; + unsigned i, j, portid, nb_rx; + struct lcore_queue_conf *qconf; + const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * + BURST_TX_DRAIN_US; + struct rte_eth_dev_tx_buffer *buffer; + + prev_tsc = 0; + timer_tsc = 0; + + lcore_id = rte_lcore_id(); + qconf = &lcore_queue_conf[lcore_id]; + + if (qconf->n_rx_port == 0) { + RTE_LOG(INFO, L2FWD, "lcore %u has nothing to do\n", lcore_id); + return; + } + + RTE_LOG(INFO, L2FWD, "entering main loop on lcore %u\n", lcore_id); + + for (i = 0; i < qconf->n_rx_port; i++) { + + portid = qconf->rx_port_list[i]; + RTE_LOG(INFO, L2FWD, " -- lcoreid=%u portid=%u\n", lcore_id, + portid); + + } + + while (!force_quit) { + + cur_tsc = rte_rdtsc(); + + /* + * TX burst queue drain + */ + diff_tsc = cur_tsc - prev_tsc; + if (unlikely(diff_tsc > drain_tsc)) { + + for (i = 0; i < qconf->n_rx_port; i++) { + + portid = l2fwd_dst_ports[qconf->rx_port_list[i]]; + buffer = tx_buffer[portid]; + + sent = rte_eth_tx_buffer_flush(portid, 0, buffer); + if (sent) + port_statistics[portid].tx += sent; + + } + + /* if timer is enabled */ + if (timer_period > 0) { + + /* advance the timer */ + timer_tsc += diff_tsc; + + /* if timer has reached its timeout */ + if (unlikely(timer_tsc >= (uint64_t) timer_period)) { + + /* do this only on master core */ + if (lcore_id == rte_get_master_lcore()) { + print_stats(); + /* reset the timer */ + timer_tsc = 0; + } + } + } + + prev_tsc = cur_tsc; + } + + /* + * Read packet from RX queues + */ + for (i = 0; i < qconf->n_rx_port; i++) { + + portid = qconf->rx_port_list[i]; + nb_rx = rte_eth_rx_burst((uint8_t) portid, 0, + pkts_burst, MAX_PKT_BURST); + + port_statistics[portid].rx += nb_rx; + + for (j = 0; j < nb_rx; j++) { + m = pkts_burst[j]; + rte_prefetch0(rte_pktmbuf_mtod(m, void *)); + l2fwd_simple_forward(m, portid); + } + } + } +} + +static int +l2fwd_launch_one_lcore(__attribute__((unused)) void *dummy) +{ + l2fwd_main_loop(); + return 0; +} + +/* display usage */ +static void +l2fwd_usage(const char *prgname) +{ + printf("%s [EAL options] -- -p PORTMASK [-q NQ]\n" + " -p PORTMASK: hexadecimal bitmask of ports to configure\n" + " -q NQ: number of queue (=ports) per lcore (default is 1)\n" + " -T PERIOD: statistics will be refreshed each PERIOD seconds (0 to disable, 10 default, 86400 maximum)\n", + prgname); +} + +static int +l2fwd_parse_portmask(const char *portmask) +{ + char *end = NULL; + unsigned long pm; + + /* parse hexadecimal string */ + pm = strtoul(portmask, &end, 16); + if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0')) + return -1; + + if (pm == 0) + return -1; + + return pm; +} + +static unsigned int +l2fwd_parse_nqueue(const char *q_arg) +{ + char *end = NULL; + unsigned long n; + + /* parse hexadecimal string */ + n = strtoul(q_arg, &end, 10); + if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0')) + return 0; + if (n == 0) + return 0; + if (n >= MAX_RX_QUEUE_PER_LCORE) + return 0; + + return n; +} + +static int +l2fwd_parse_timer_period(const char *q_arg) +{ + char *end = NULL; + int n; + + /* parse number string */ + n = strtol(q_arg, &end, 10); + if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0')) + return -1; + if (n >= MAX_TIMER_PERIOD) + return -1; + + return n; +} + +/* Parse the argument given in the command line of the application */ +static int +l2fwd_parse_args(int argc, char **argv) +{ + int opt, ret; + char **argvopt; + int option_index; + char *prgname = argv[0]; + static struct option lgopts[] = { + {NULL, 0, 0, 0} + }; + + argvopt = argv; + + while ((opt = getopt_long(argc, argvopt, "p:q:T:", + lgopts, &option_index)) != EOF) { + + switch (opt) { + /* portmask */ + case 'p': + l2fwd_enabled_port_mask = l2fwd_parse_portmask(optarg); + if (l2fwd_enabled_port_mask == 0) { + printf("invalid portmask\n"); + l2fwd_usage(prgname); + return -1; + } + break; + + /* nqueue */ + case 'q': + l2fwd_rx_queue_per_lcore = l2fwd_parse_nqueue(optarg); + if (l2fwd_rx_queue_per_lcore == 0) { + printf("invalid queue number\n"); + l2fwd_usage(prgname); + return -1; + } + break; + + /* timer period */ + case 'T': + timer_period = l2fwd_parse_timer_period(optarg) * 1000 * TIMER_MILLISECOND; + if (timer_period < 0) { + printf("invalid timer period\n"); + l2fwd_usage(prgname); + return -1; + } + break; + + /* long options */ + case 0: + l2fwd_usage(prgname); + return -1; + + default: + l2fwd_usage(prgname); + return -1; + } + } + + if (optind >= 0) + argv[optind-1] = prgname; + + ret = optind-1; + optind = 0; /* reset getopt lib */ + return ret; +} + +/* Check the link status of all ports in up to 9s, and print them finally */ +static void +check_all_ports_link_status(uint8_t port_num, uint32_t port_mask) +{ +#define CHECK_INTERVAL 100 /* 100ms */ +#define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */ + uint8_t portid, count, all_ports_up, print_flag = 0; + struct rte_eth_link link; + + printf("\nChecking link status"); + fflush(stdout); + for (count = 0; count <= MAX_CHECK_TIME; count++) { + if (force_quit) + return; + all_ports_up = 1; + for (portid = 0; portid < port_num; portid++) { + if (force_quit) + return; + if ((port_mask & (1 << portid)) == 0) + continue; + memset(&link, 0, sizeof(link)); + rte_eth_link_get_nowait(portid, &link); + /* print link status if flag set */ + if (print_flag == 1) { + if (link.link_status) + printf("Port %d Link Up - speed %u " + "Mbps - %s\n", (uint8_t)portid, + (unsigned)link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? + ("full-duplex") : ("half-duplex\n")); + else + printf("Port %d Link Down\n", + (uint8_t)portid); + continue; + } + /* clear all_ports_up flag if any link down */ + if (link.link_status == ETH_LINK_DOWN) { + all_ports_up = 0; + break; + } + } + /* after finally printing all link status, get out */ + if (print_flag == 1) + break; + + if (all_ports_up == 0) { + printf("."); + fflush(stdout); + rte_delay_ms(CHECK_INTERVAL); + } + + /* set the print_flag if all ports up or timeout */ + if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) { + print_flag = 1; + printf("done\n"); + } + } +} + +static void +signal_handler(int signum) +{ + if (signum == SIGINT || signum == SIGTERM) { + printf("\n\nSignal %d received, preparing to exit...\n", + signum); + force_quit = true; + } +} + +int +main(int argc, char **argv) +{ + struct lcore_queue_conf *qconf; + struct rte_eth_dev_info dev_info; + int ret; + uint8_t nb_ports; + uint8_t nb_ports_available; + uint8_t portid, last_port; + unsigned lcore_id, rx_lcore_id; + unsigned nb_ports_in_mask = 0; + + /* init EAL */ + ret = rte_eal_init(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Invalid EAL arguments\n"); + argc -= ret; + argv += ret; + + force_quit = false; + signal(SIGINT, signal_handler); + signal(SIGTERM, signal_handler); + + /* parse application arguments (after the EAL ones) */ + ret = l2fwd_parse_args(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Invalid L2FWD arguments\n"); + + /* create the mbuf pool */ + l2fwd_pktmbuf_pool = rte_pktmbuf_pool_create("mbuf_pool", NB_MBUF, 32, + 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id()); + if (l2fwd_pktmbuf_pool == NULL) + rte_exit(EXIT_FAILURE, "Cannot init mbuf pool\n"); + + nb_ports = rte_eth_dev_count(); + if (nb_ports == 0) + rte_exit(EXIT_FAILURE, "No Ethernet ports - bye\n"); + + if (nb_ports > RTE_MAX_ETHPORTS) + nb_ports = RTE_MAX_ETHPORTS; + + /* reset l2fwd_dst_ports */ + for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) + l2fwd_dst_ports[portid] = 0; + last_port = 0; + + /* + * Each logical core is assigned a dedicated TX queue on each port. + */ + for (portid = 0; portid < nb_ports; portid++) { + /* skip ports that are not enabled */ + if ((l2fwd_enabled_port_mask & (1 << portid)) == 0) + continue; + + if (nb_ports_in_mask % 2) { + l2fwd_dst_ports[portid] = last_port; + l2fwd_dst_ports[last_port] = portid; + } + else + last_port = portid; + + nb_ports_in_mask++; + + rte_eth_dev_info_get(portid, &dev_info); + } + if (nb_ports_in_mask % 2) { + printf("Notice: odd number of ports in portmask.\n"); + l2fwd_dst_ports[last_port] = last_port; + } + + rx_lcore_id = 0; + qconf = NULL; + + /* Initialize the port/queue configuration of each logical core */ + for (portid = 0; portid < nb_ports; portid++) { + /* skip ports that are not enabled */ + if ((l2fwd_enabled_port_mask & (1 << portid)) == 0) + continue; + + /* get the lcore_id for this port */ + while (rte_lcore_is_enabled(rx_lcore_id) == 0 || + lcore_queue_conf[rx_lcore_id].n_rx_port == + l2fwd_rx_queue_per_lcore) { + rx_lcore_id++; + if (rx_lcore_id >= RTE_MAX_LCORE) + rte_exit(EXIT_FAILURE, "Not enough cores\n"); + } + + if (qconf != &lcore_queue_conf[rx_lcore_id]) + /* Assigned a new logical core in the loop above. */ + qconf = &lcore_queue_conf[rx_lcore_id]; + + qconf->rx_port_list[qconf->n_rx_port] = portid; + qconf->n_rx_port++; + printf("Lcore %u: RX port %u\n", rx_lcore_id, (unsigned) portid); + } + + nb_ports_available = nb_ports; + + /* Initialise each port */ + for (portid = 0; portid < nb_ports; portid++) { + /* skip ports that are not enabled */ + if ((l2fwd_enabled_port_mask & (1 << portid)) == 0) { + printf("Skipping disabled port %u\n", (unsigned) portid); + nb_ports_available--; + continue; + } + /* init port */ + printf("Initializing port %u... ", (unsigned) portid); + fflush(stdout); + ret = rte_eth_dev_configure(portid, 1, 1, &port_conf); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Cannot configure device: err=%d, port=%u\n", + ret, (unsigned) portid); + + rte_eth_macaddr_get(portid,&l2fwd_ports_eth_addr[portid]); + + /* init one RX queue */ + fflush(stdout); + ret = rte_eth_rx_queue_setup(portid, 0, nb_rxd, + rte_eth_dev_socket_id(portid), + NULL, + l2fwd_pktmbuf_pool); + if (ret < 0) + rte_exit(EXIT_FAILURE, "rte_eth_rx_queue_setup:err=%d, port=%u\n", + ret, (unsigned) portid); + + /* init one TX queue on each port */ + fflush(stdout); + ret = rte_eth_tx_queue_setup(portid, 0, nb_txd, + rte_eth_dev_socket_id(portid), + NULL); + if (ret < 0) + rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup:err=%d, port=%u\n", + ret, (unsigned) portid); + + /* Initialize TX buffers */ + tx_buffer[portid] = rte_zmalloc_socket("tx_buffer", + RTE_ETH_TX_BUFFER_SIZE(MAX_PKT_BURST), 0, + rte_eth_dev_socket_id(portid)); + if (tx_buffer[portid] == NULL) + rte_exit(EXIT_FAILURE, "Cannot allocate buffer for tx on port %u\n", + (unsigned) portid); + + rte_eth_tx_buffer_init(tx_buffer[portid], MAX_PKT_BURST); + + ret = rte_eth_tx_buffer_set_err_callback(tx_buffer[portid], + rte_eth_tx_buffer_count_callback, + &port_statistics[portid].dropped); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Cannot set error callback for " + "tx buffer on port %u\n", (unsigned) portid); + + /* Start device */ + ret = rte_eth_dev_start(portid); + if (ret < 0) + rte_exit(EXIT_FAILURE, "rte_eth_dev_start:err=%d, port=%u\n", + ret, (unsigned) portid); + + printf("done: \n"); + + rte_eth_promiscuous_enable(portid); + + printf("Port %u, MAC address: %02X:%02X:%02X:%02X:%02X:%02X\n\n", + (unsigned) portid, + l2fwd_ports_eth_addr[portid].addr_bytes[0], + l2fwd_ports_eth_addr[portid].addr_bytes[1], + l2fwd_ports_eth_addr[portid].addr_bytes[2], + l2fwd_ports_eth_addr[portid].addr_bytes[3], + l2fwd_ports_eth_addr[portid].addr_bytes[4], + l2fwd_ports_eth_addr[portid].addr_bytes[5]); + + /* initialize port stats */ + memset(&port_statistics, 0, sizeof(port_statistics)); + } + + if (!nb_ports_available) { + rte_exit(EXIT_FAILURE, + "All available ports are disabled. Please set portmask.\n"); + } + + check_all_ports_link_status(nb_ports, l2fwd_enabled_port_mask); + + ret = 0; + /* launch per-lcore init on every lcore */ + rte_eal_mp_remote_launch(l2fwd_launch_one_lcore, NULL, CALL_MASTER); + RTE_LCORE_FOREACH_SLAVE(lcore_id) { + if (rte_eal_wait_lcore(lcore_id) < 0) { + ret = -1; + break; + } + } + + for (portid = 0; portid < nb_ports; portid++) { + if ((l2fwd_enabled_port_mask & (1 << portid)) == 0) + continue; + printf("Closing port %d...", portid); + rte_eth_dev_stop(portid); + rte_eth_dev_close(portid); + printf(" Done\n"); + } + printf("Bye...\n"); + + return ret; +} diff --git a/examples/l3fwd-acl/Makefile b/examples/l3fwd-acl/Makefile new file mode 100644 index 00000000..a3473a83 --- /dev/null +++ b/examples/l3fwd-acl/Makefile @@ -0,0 +1,56 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overriden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +# binary name +APP = l3fwd-acl + +# all source are stored in SRCS-y +SRCS-y := main.c + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) + +# workaround for a gcc bug with noreturn attribute +# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603 +ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y) +CFLAGS_main.o += -Wno-return-type +endif + +include $(RTE_SDK)/mk/rte.extapp.mk diff --git a/examples/l3fwd-acl/main.c b/examples/l3fwd-acl/main.c new file mode 100644 index 00000000..26d9f5eb --- /dev/null +++ b/examples/l3fwd-acl/main.c @@ -0,0 +1,2079 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <inttypes.h> +#include <sys/types.h> +#include <string.h> +#include <sys/queue.h> +#include <stdarg.h> +#include <errno.h> +#include <getopt.h> + +#include <rte_common.h> +#include <rte_byteorder.h> +#include <rte_log.h> +#include <rte_memory.h> +#include <rte_memcpy.h> +#include <rte_memzone.h> +#include <rte_eal.h> +#include <rte_per_lcore.h> +#include <rte_launch.h> +#include <rte_atomic.h> +#include <rte_cycles.h> +#include <rte_prefetch.h> +#include <rte_lcore.h> +#include <rte_per_lcore.h> +#include <rte_branch_prediction.h> +#include <rte_interrupts.h> +#include <rte_pci.h> +#include <rte_random.h> +#include <rte_debug.h> +#include <rte_ether.h> +#include <rte_ethdev.h> +#include <rte_ring.h> +#include <rte_mempool.h> +#include <rte_mbuf.h> +#include <rte_ip.h> +#include <rte_tcp.h> +#include <rte_udp.h> +#include <rte_string_fns.h> +#include <rte_acl.h> + +#define DO_RFC_1812_CHECKS + +#define RTE_LOGTYPE_L3FWD RTE_LOGTYPE_USER1 + +#define MAX_JUMBO_PKT_LEN 9600 + +#define MEMPOOL_CACHE_SIZE 256 + +/* + * This expression is used to calculate the number of mbufs needed + * depending on user input, taking into account memory for rx and tx hardware + * rings, cache per lcore and mtable per port per lcore. + * RTE_MAX is used to ensure that NB_MBUF never goes below a + * minimum value of 8192 + */ + +#define NB_MBUF RTE_MAX(\ + (nb_ports * nb_rx_queue*RTE_TEST_RX_DESC_DEFAULT + \ + nb_ports * nb_lcores * MAX_PKT_BURST + \ + nb_ports * n_tx_queue * RTE_TEST_TX_DESC_DEFAULT + \ + nb_lcores * MEMPOOL_CACHE_SIZE), \ + (unsigned)8192) + +#define MAX_PKT_BURST 32 +#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */ + +#define NB_SOCKETS 8 + +/* Configure how many packets ahead to prefetch, when reading packets */ +#define PREFETCH_OFFSET 3 + +/* + * Configurable number of RX/TX ring descriptors + */ +#define RTE_TEST_RX_DESC_DEFAULT 128 +#define RTE_TEST_TX_DESC_DEFAULT 512 +static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; +static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; + +/* ethernet addresses of ports */ +static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; + +/* mask of enabled ports */ +static uint32_t enabled_port_mask; +static int promiscuous_on; /**< Ports set in promiscuous mode off by default. */ +static int numa_on = 1; /**< NUMA is enabled by default. */ + +struct lcore_rx_queue { + uint8_t port_id; + uint8_t queue_id; +} __rte_cache_aligned; + +#define MAX_RX_QUEUE_PER_LCORE 16 +#define MAX_TX_QUEUE_PER_PORT RTE_MAX_ETHPORTS +#define MAX_RX_QUEUE_PER_PORT 128 + +#define MAX_LCORE_PARAMS 1024 +struct lcore_params { + uint8_t port_id; + uint8_t queue_id; + uint8_t lcore_id; +} __rte_cache_aligned; + +static struct lcore_params lcore_params_array[MAX_LCORE_PARAMS]; +static struct lcore_params lcore_params_array_default[] = { + {0, 0, 2}, + {0, 1, 2}, + {0, 2, 2}, + {1, 0, 2}, + {1, 1, 2}, + {1, 2, 2}, + {2, 0, 2}, + {3, 0, 3}, + {3, 1, 3}, +}; + +static struct lcore_params *lcore_params = lcore_params_array_default; +static uint16_t nb_lcore_params = sizeof(lcore_params_array_default) / + sizeof(lcore_params_array_default[0]); + +static struct rte_eth_conf port_conf = { + .rxmode = { + .mq_mode = ETH_MQ_RX_RSS, + .max_rx_pkt_len = ETHER_MAX_LEN, + .split_hdr_size = 0, + .header_split = 0, /**< Header Split disabled */ + .hw_ip_checksum = 1, /**< IP checksum offload enabled */ + .hw_vlan_filter = 0, /**< VLAN filtering disabled */ + .jumbo_frame = 0, /**< Jumbo Frame Support disabled */ + .hw_strip_crc = 0, /**< CRC stripped by hardware */ + }, + .rx_adv_conf = { + .rss_conf = { + .rss_key = NULL, + .rss_hf = ETH_RSS_IP | ETH_RSS_UDP | + ETH_RSS_TCP | ETH_RSS_SCTP, + }, + }, + .txmode = { + .mq_mode = ETH_MQ_TX_NONE, + }, +}; + +static struct rte_mempool *pktmbuf_pool[NB_SOCKETS]; + +/***********************start of ACL part******************************/ +#ifdef DO_RFC_1812_CHECKS +static inline int +is_valid_ipv4_pkt(struct ipv4_hdr *pkt, uint32_t link_len); +#endif +static inline void +send_single_packet(struct rte_mbuf *m, uint8_t port); + +#define MAX_ACL_RULE_NUM 100000 +#define DEFAULT_MAX_CATEGORIES 1 +#define L3FWD_ACL_IPV4_NAME "l3fwd-acl-ipv4" +#define L3FWD_ACL_IPV6_NAME "l3fwd-acl-ipv6" +#define ACL_LEAD_CHAR ('@') +#define ROUTE_LEAD_CHAR ('R') +#define COMMENT_LEAD_CHAR ('#') +#define OPTION_CONFIG "config" +#define OPTION_NONUMA "no-numa" +#define OPTION_ENBJMO "enable-jumbo" +#define OPTION_RULE_IPV4 "rule_ipv4" +#define OPTION_RULE_IPV6 "rule_ipv6" +#define OPTION_SCALAR "scalar" +#define ACL_DENY_SIGNATURE 0xf0000000 +#define RTE_LOGTYPE_L3FWDACL RTE_LOGTYPE_USER3 +#define acl_log(format, ...) RTE_LOG(ERR, L3FWDACL, format, ##__VA_ARGS__) +#define uint32_t_to_char(ip, a, b, c, d) do {\ + *a = (unsigned char)(ip >> 24 & 0xff);\ + *b = (unsigned char)(ip >> 16 & 0xff);\ + *c = (unsigned char)(ip >> 8 & 0xff);\ + *d = (unsigned char)(ip & 0xff);\ + } while (0) +#define OFF_ETHHEAD (sizeof(struct ether_hdr)) +#define OFF_IPV42PROTO (offsetof(struct ipv4_hdr, next_proto_id)) +#define OFF_IPV62PROTO (offsetof(struct ipv6_hdr, proto)) +#define MBUF_IPV4_2PROTO(m) \ + rte_pktmbuf_mtod_offset((m), uint8_t *, OFF_ETHHEAD + OFF_IPV42PROTO) +#define MBUF_IPV6_2PROTO(m) \ + rte_pktmbuf_mtod_offset((m), uint8_t *, OFF_ETHHEAD + OFF_IPV62PROTO) + +#define GET_CB_FIELD(in, fd, base, lim, dlm) do { \ + unsigned long val; \ + char *end; \ + errno = 0; \ + val = strtoul((in), &end, (base)); \ + if (errno != 0 || end[0] != (dlm) || val > (lim)) \ + return -EINVAL; \ + (fd) = (typeof(fd))val; \ + (in) = end + 1; \ +} while (0) + +/* + * ACL rules should have higher priorities than route ones to ensure ACL rule + * always be found when input packets have multi-matches in the database. + * A exception case is performance measure, which can define route rules with + * higher priority and route rules will always be returned in each lookup. + * Reserve range from ACL_RULE_PRIORITY_MAX + 1 to + * RTE_ACL_MAX_PRIORITY for route entries in performance measure + */ +#define ACL_RULE_PRIORITY_MAX 0x10000000 + +/* + * Forward port info save in ACL lib starts from 1 + * since ACL assume 0 is invalid. + * So, need add 1 when saving and minus 1 when forwarding packets. + */ +#define FWD_PORT_SHIFT 1 + +/* + * Rule and trace formats definitions. + */ + +enum { + PROTO_FIELD_IPV4, + SRC_FIELD_IPV4, + DST_FIELD_IPV4, + SRCP_FIELD_IPV4, + DSTP_FIELD_IPV4, + NUM_FIELDS_IPV4 +}; + +/* + * That effectively defines order of IPV4VLAN classifications: + * - PROTO + * - VLAN (TAG and DOMAIN) + * - SRC IP ADDRESS + * - DST IP ADDRESS + * - PORTS (SRC and DST) + */ +enum { + RTE_ACL_IPV4VLAN_PROTO, + RTE_ACL_IPV4VLAN_VLAN, + RTE_ACL_IPV4VLAN_SRC, + RTE_ACL_IPV4VLAN_DST, + RTE_ACL_IPV4VLAN_PORTS, + RTE_ACL_IPV4VLAN_NUM +}; + +struct rte_acl_field_def ipv4_defs[NUM_FIELDS_IPV4] = { + { + .type = RTE_ACL_FIELD_TYPE_BITMASK, + .size = sizeof(uint8_t), + .field_index = PROTO_FIELD_IPV4, + .input_index = RTE_ACL_IPV4VLAN_PROTO, + .offset = 0, + }, + { + .type = RTE_ACL_FIELD_TYPE_MASK, + .size = sizeof(uint32_t), + .field_index = SRC_FIELD_IPV4, + .input_index = RTE_ACL_IPV4VLAN_SRC, + .offset = offsetof(struct ipv4_hdr, src_addr) - + offsetof(struct ipv4_hdr, next_proto_id), + }, + { + .type = RTE_ACL_FIELD_TYPE_MASK, + .size = sizeof(uint32_t), + .field_index = DST_FIELD_IPV4, + .input_index = RTE_ACL_IPV4VLAN_DST, + .offset = offsetof(struct ipv4_hdr, dst_addr) - + offsetof(struct ipv4_hdr, next_proto_id), + }, + { + .type = RTE_ACL_FIELD_TYPE_RANGE, + .size = sizeof(uint16_t), + .field_index = SRCP_FIELD_IPV4, + .input_index = RTE_ACL_IPV4VLAN_PORTS, + .offset = sizeof(struct ipv4_hdr) - + offsetof(struct ipv4_hdr, next_proto_id), + }, + { + .type = RTE_ACL_FIELD_TYPE_RANGE, + .size = sizeof(uint16_t), + .field_index = DSTP_FIELD_IPV4, + .input_index = RTE_ACL_IPV4VLAN_PORTS, + .offset = sizeof(struct ipv4_hdr) - + offsetof(struct ipv4_hdr, next_proto_id) + + sizeof(uint16_t), + }, +}; + +#define IPV6_ADDR_LEN 16 +#define IPV6_ADDR_U16 (IPV6_ADDR_LEN / sizeof(uint16_t)) +#define IPV6_ADDR_U32 (IPV6_ADDR_LEN / sizeof(uint32_t)) + +enum { + PROTO_FIELD_IPV6, + SRC1_FIELD_IPV6, + SRC2_FIELD_IPV6, + SRC3_FIELD_IPV6, + SRC4_FIELD_IPV6, + DST1_FIELD_IPV6, + DST2_FIELD_IPV6, + DST3_FIELD_IPV6, + DST4_FIELD_IPV6, + SRCP_FIELD_IPV6, + DSTP_FIELD_IPV6, + NUM_FIELDS_IPV6 +}; + +struct rte_acl_field_def ipv6_defs[NUM_FIELDS_IPV6] = { + { + .type = RTE_ACL_FIELD_TYPE_BITMASK, + .size = sizeof(uint8_t), + .field_index = PROTO_FIELD_IPV6, + .input_index = PROTO_FIELD_IPV6, + .offset = 0, + }, + { + .type = RTE_ACL_FIELD_TYPE_MASK, + .size = sizeof(uint32_t), + .field_index = SRC1_FIELD_IPV6, + .input_index = SRC1_FIELD_IPV6, + .offset = offsetof(struct ipv6_hdr, src_addr) - + offsetof(struct ipv6_hdr, proto), + }, + { + .type = RTE_ACL_FIELD_TYPE_MASK, + .size = sizeof(uint32_t), + .field_index = SRC2_FIELD_IPV6, + .input_index = SRC2_FIELD_IPV6, + .offset = offsetof(struct ipv6_hdr, src_addr) - + offsetof(struct ipv6_hdr, proto) + sizeof(uint32_t), + }, + { + .type = RTE_ACL_FIELD_TYPE_MASK, + .size = sizeof(uint32_t), + .field_index = SRC3_FIELD_IPV6, + .input_index = SRC3_FIELD_IPV6, + .offset = offsetof(struct ipv6_hdr, src_addr) - + offsetof(struct ipv6_hdr, proto) + 2 * sizeof(uint32_t), + }, + { + .type = RTE_ACL_FIELD_TYPE_MASK, + .size = sizeof(uint32_t), + .field_index = SRC4_FIELD_IPV6, + .input_index = SRC4_FIELD_IPV6, + .offset = offsetof(struct ipv6_hdr, src_addr) - + offsetof(struct ipv6_hdr, proto) + 3 * sizeof(uint32_t), + }, + { + .type = RTE_ACL_FIELD_TYPE_MASK, + .size = sizeof(uint32_t), + .field_index = DST1_FIELD_IPV6, + .input_index = DST1_FIELD_IPV6, + .offset = offsetof(struct ipv6_hdr, dst_addr) + - offsetof(struct ipv6_hdr, proto), + }, + { + .type = RTE_ACL_FIELD_TYPE_MASK, + .size = sizeof(uint32_t), + .field_index = DST2_FIELD_IPV6, + .input_index = DST2_FIELD_IPV6, + .offset = offsetof(struct ipv6_hdr, dst_addr) - + offsetof(struct ipv6_hdr, proto) + sizeof(uint32_t), + }, + { + .type = RTE_ACL_FIELD_TYPE_MASK, + .size = sizeof(uint32_t), + .field_index = DST3_FIELD_IPV6, + .input_index = DST3_FIELD_IPV6, + .offset = offsetof(struct ipv6_hdr, dst_addr) - + offsetof(struct ipv6_hdr, proto) + 2 * sizeof(uint32_t), + }, + { + .type = RTE_ACL_FIELD_TYPE_MASK, + .size = sizeof(uint32_t), + .field_index = DST4_FIELD_IPV6, + .input_index = DST4_FIELD_IPV6, + .offset = offsetof(struct ipv6_hdr, dst_addr) - + offsetof(struct ipv6_hdr, proto) + 3 * sizeof(uint32_t), + }, + { + .type = RTE_ACL_FIELD_TYPE_RANGE, + .size = sizeof(uint16_t), + .field_index = SRCP_FIELD_IPV6, + .input_index = SRCP_FIELD_IPV6, + .offset = sizeof(struct ipv6_hdr) - + offsetof(struct ipv6_hdr, proto), + }, + { + .type = RTE_ACL_FIELD_TYPE_RANGE, + .size = sizeof(uint16_t), + .field_index = DSTP_FIELD_IPV6, + .input_index = SRCP_FIELD_IPV6, + .offset = sizeof(struct ipv6_hdr) - + offsetof(struct ipv6_hdr, proto) + sizeof(uint16_t), + }, +}; + +enum { + CB_FLD_SRC_ADDR, + CB_FLD_DST_ADDR, + CB_FLD_SRC_PORT_LOW, + CB_FLD_SRC_PORT_DLM, + CB_FLD_SRC_PORT_HIGH, + CB_FLD_DST_PORT_LOW, + CB_FLD_DST_PORT_DLM, + CB_FLD_DST_PORT_HIGH, + CB_FLD_PROTO, + CB_FLD_USERDATA, + CB_FLD_NUM, +}; + +RTE_ACL_RULE_DEF(acl4_rule, RTE_DIM(ipv4_defs)); +RTE_ACL_RULE_DEF(acl6_rule, RTE_DIM(ipv6_defs)); + +struct acl_search_t { + const uint8_t *data_ipv4[MAX_PKT_BURST]; + struct rte_mbuf *m_ipv4[MAX_PKT_BURST]; + uint32_t res_ipv4[MAX_PKT_BURST]; + int num_ipv4; + + const uint8_t *data_ipv6[MAX_PKT_BURST]; + struct rte_mbuf *m_ipv6[MAX_PKT_BURST]; + uint32_t res_ipv6[MAX_PKT_BURST]; + int num_ipv6; +}; + +static struct { + char mapped[NB_SOCKETS]; + struct rte_acl_ctx *acx_ipv4[NB_SOCKETS]; + struct rte_acl_ctx *acx_ipv6[NB_SOCKETS]; +#ifdef L3FWDACL_DEBUG + struct acl4_rule *rule_ipv4; + struct acl6_rule *rule_ipv6; +#endif +} acl_config; + +static struct{ + const char *rule_ipv4_name; + const char *rule_ipv6_name; + int scalar; +} parm_config; + +const char cb_port_delim[] = ":"; + +static inline void +print_one_ipv4_rule(struct acl4_rule *rule, int extra) +{ + unsigned char a, b, c, d; + + uint32_t_to_char(rule->field[SRC_FIELD_IPV4].value.u32, + &a, &b, &c, &d); + printf("%hhu.%hhu.%hhu.%hhu/%u ", a, b, c, d, + rule->field[SRC_FIELD_IPV4].mask_range.u32); + uint32_t_to_char(rule->field[DST_FIELD_IPV4].value.u32, + &a, &b, &c, &d); + printf("%hhu.%hhu.%hhu.%hhu/%u ", a, b, c, d, + rule->field[DST_FIELD_IPV4].mask_range.u32); + printf("%hu : %hu %hu : %hu 0x%hhx/0x%hhx ", + rule->field[SRCP_FIELD_IPV4].value.u16, + rule->field[SRCP_FIELD_IPV4].mask_range.u16, + rule->field[DSTP_FIELD_IPV4].value.u16, + rule->field[DSTP_FIELD_IPV4].mask_range.u16, + rule->field[PROTO_FIELD_IPV4].value.u8, + rule->field[PROTO_FIELD_IPV4].mask_range.u8); + if (extra) + printf("0x%x-0x%x-0x%x ", + rule->data.category_mask, + rule->data.priority, + rule->data.userdata); +} + +static inline void +print_one_ipv6_rule(struct acl6_rule *rule, int extra) +{ + unsigned char a, b, c, d; + + uint32_t_to_char(rule->field[SRC1_FIELD_IPV6].value.u32, + &a, &b, &c, &d); + printf("%.2x%.2x:%.2x%.2x", a, b, c, d); + uint32_t_to_char(rule->field[SRC2_FIELD_IPV6].value.u32, + &a, &b, &c, &d); + printf(":%.2x%.2x:%.2x%.2x", a, b, c, d); + uint32_t_to_char(rule->field[SRC3_FIELD_IPV6].value.u32, + &a, &b, &c, &d); + printf(":%.2x%.2x:%.2x%.2x", a, b, c, d); + uint32_t_to_char(rule->field[SRC4_FIELD_IPV6].value.u32, + &a, &b, &c, &d); + printf(":%.2x%.2x:%.2x%.2x/%u ", a, b, c, d, + rule->field[SRC1_FIELD_IPV6].mask_range.u32 + + rule->field[SRC2_FIELD_IPV6].mask_range.u32 + + rule->field[SRC3_FIELD_IPV6].mask_range.u32 + + rule->field[SRC4_FIELD_IPV6].mask_range.u32); + + uint32_t_to_char(rule->field[DST1_FIELD_IPV6].value.u32, + &a, &b, &c, &d); + printf("%.2x%.2x:%.2x%.2x", a, b, c, d); + uint32_t_to_char(rule->field[DST2_FIELD_IPV6].value.u32, + &a, &b, &c, &d); + printf(":%.2x%.2x:%.2x%.2x", a, b, c, d); + uint32_t_to_char(rule->field[DST3_FIELD_IPV6].value.u32, + &a, &b, &c, &d); + printf(":%.2x%.2x:%.2x%.2x", a, b, c, d); + uint32_t_to_char(rule->field[DST4_FIELD_IPV6].value.u32, + &a, &b, &c, &d); + printf(":%.2x%.2x:%.2x%.2x/%u ", a, b, c, d, + rule->field[DST1_FIELD_IPV6].mask_range.u32 + + rule->field[DST2_FIELD_IPV6].mask_range.u32 + + rule->field[DST3_FIELD_IPV6].mask_range.u32 + + rule->field[DST4_FIELD_IPV6].mask_range.u32); + + printf("%hu : %hu %hu : %hu 0x%hhx/0x%hhx ", + rule->field[SRCP_FIELD_IPV6].value.u16, + rule->field[SRCP_FIELD_IPV6].mask_range.u16, + rule->field[DSTP_FIELD_IPV6].value.u16, + rule->field[DSTP_FIELD_IPV6].mask_range.u16, + rule->field[PROTO_FIELD_IPV6].value.u8, + rule->field[PROTO_FIELD_IPV6].mask_range.u8); + if (extra) + printf("0x%x-0x%x-0x%x ", + rule->data.category_mask, + rule->data.priority, + rule->data.userdata); +} + +/* Bypass comment and empty lines */ +static inline int +is_bypass_line(char *buff) +{ + int i = 0; + + /* comment line */ + if (buff[0] == COMMENT_LEAD_CHAR) + return 1; + /* empty line */ + while (buff[i] != '\0') { + if (!isspace(buff[i])) + return 0; + i++; + } + return 1; +} + +#ifdef L3FWDACL_DEBUG +static inline void +dump_acl4_rule(struct rte_mbuf *m, uint32_t sig) +{ + uint32_t offset = sig & ~ACL_DENY_SIGNATURE; + unsigned char a, b, c, d; + struct ipv4_hdr *ipv4_hdr = rte_pktmbuf_mtod_offset(m, + struct ipv4_hdr *, + sizeof(struct ether_hdr)); + + uint32_t_to_char(rte_bswap32(ipv4_hdr->src_addr), &a, &b, &c, &d); + printf("Packet Src:%hhu.%hhu.%hhu.%hhu ", a, b, c, d); + uint32_t_to_char(rte_bswap32(ipv4_hdr->dst_addr), &a, &b, &c, &d); + printf("Dst:%hhu.%hhu.%hhu.%hhu ", a, b, c, d); + + printf("Src port:%hu,Dst port:%hu ", + rte_bswap16(*(uint16_t *)(ipv4_hdr + 1)), + rte_bswap16(*((uint16_t *)(ipv4_hdr + 1) + 1))); + printf("hit ACL %d - ", offset); + + print_one_ipv4_rule(acl_config.rule_ipv4 + offset, 1); + + printf("\n\n"); +} + +static inline void +dump_acl6_rule(struct rte_mbuf *m, uint32_t sig) +{ + unsigned i; + uint32_t offset = sig & ~ACL_DENY_SIGNATURE; + struct ipv6_hdr *ipv6_hdr = rte_pktmbuf_mtod_offset(m, + struct ipv6_hdr *, + sizeof(struct ether_hdr)); + + printf("Packet Src"); + for (i = 0; i < RTE_DIM(ipv6_hdr->src_addr); i += sizeof(uint16_t)) + printf(":%.2x%.2x", + ipv6_hdr->src_addr[i], ipv6_hdr->src_addr[i + 1]); + + printf("\nDst"); + for (i = 0; i < RTE_DIM(ipv6_hdr->dst_addr); i += sizeof(uint16_t)) + printf(":%.2x%.2x", + ipv6_hdr->dst_addr[i], ipv6_hdr->dst_addr[i + 1]); + + printf("\nSrc port:%hu,Dst port:%hu ", + rte_bswap16(*(uint16_t *)(ipv6_hdr + 1)), + rte_bswap16(*((uint16_t *)(ipv6_hdr + 1) + 1))); + printf("hit ACL %d - ", offset); + + print_one_ipv6_rule(acl_config.rule_ipv6 + offset, 1); + + printf("\n\n"); +} +#endif /* L3FWDACL_DEBUG */ + +static inline void +dump_ipv4_rules(struct acl4_rule *rule, int num, int extra) +{ + int i; + + for (i = 0; i < num; i++, rule++) { + printf("\t%d:", i + 1); + print_one_ipv4_rule(rule, extra); + printf("\n"); + } +} + +static inline void +dump_ipv6_rules(struct acl6_rule *rule, int num, int extra) +{ + int i; + + for (i = 0; i < num; i++, rule++) { + printf("\t%d:", i + 1); + print_one_ipv6_rule(rule, extra); + printf("\n"); + } +} + +#ifdef DO_RFC_1812_CHECKS +static inline void +prepare_one_packet(struct rte_mbuf **pkts_in, struct acl_search_t *acl, + int index) +{ + struct ipv4_hdr *ipv4_hdr; + struct rte_mbuf *pkt = pkts_in[index]; + + if (RTE_ETH_IS_IPV4_HDR(pkt->packet_type)) { + ipv4_hdr = rte_pktmbuf_mtod_offset(pkt, struct ipv4_hdr *, + sizeof(struct ether_hdr)); + + /* Check to make sure the packet is valid (RFC1812) */ + if (is_valid_ipv4_pkt(ipv4_hdr, pkt->pkt_len) >= 0) { + + /* Update time to live and header checksum */ + --(ipv4_hdr->time_to_live); + ++(ipv4_hdr->hdr_checksum); + + /* Fill acl structure */ + acl->data_ipv4[acl->num_ipv4] = MBUF_IPV4_2PROTO(pkt); + acl->m_ipv4[(acl->num_ipv4)++] = pkt; + + } else { + /* Not a valid IPv4 packet */ + rte_pktmbuf_free(pkt); + } + } else if (RTE_ETH_IS_IPV6_HDR(pkt->packet_type)) { + /* Fill acl structure */ + acl->data_ipv6[acl->num_ipv6] = MBUF_IPV6_2PROTO(pkt); + acl->m_ipv6[(acl->num_ipv6)++] = pkt; + + } else { + /* Unknown type, drop the packet */ + rte_pktmbuf_free(pkt); + } +} + +#else +static inline void +prepare_one_packet(struct rte_mbuf **pkts_in, struct acl_search_t *acl, + int index) +{ + struct rte_mbuf *pkt = pkts_in[index]; + + if (RTE_ETH_IS_IPV4_HDR(pkt->packet_type)) { + /* Fill acl structure */ + acl->data_ipv4[acl->num_ipv4] = MBUF_IPV4_2PROTO(pkt); + acl->m_ipv4[(acl->num_ipv4)++] = pkt; + + } else if (RTE_ETH_IS_IPV6_HDR(pkt->packet_type)) { + /* Fill acl structure */ + acl->data_ipv6[acl->num_ipv6] = MBUF_IPV6_2PROTO(pkt); + acl->m_ipv6[(acl->num_ipv6)++] = pkt; + } else { + /* Unknown type, drop the packet */ + rte_pktmbuf_free(pkt); + } +} +#endif /* DO_RFC_1812_CHECKS */ + +static inline void +prepare_acl_parameter(struct rte_mbuf **pkts_in, struct acl_search_t *acl, + int nb_rx) +{ + int i; + + acl->num_ipv4 = 0; + acl->num_ipv6 = 0; + + /* Prefetch first packets */ + for (i = 0; i < PREFETCH_OFFSET && i < nb_rx; i++) { + rte_prefetch0(rte_pktmbuf_mtod( + pkts_in[i], void *)); + } + + for (i = 0; i < (nb_rx - PREFETCH_OFFSET); i++) { + rte_prefetch0(rte_pktmbuf_mtod(pkts_in[ + i + PREFETCH_OFFSET], void *)); + prepare_one_packet(pkts_in, acl, i); + } + + /* Process left packets */ + for (; i < nb_rx; i++) + prepare_one_packet(pkts_in, acl, i); +} + +static inline void +send_one_packet(struct rte_mbuf *m, uint32_t res) +{ + if (likely((res & ACL_DENY_SIGNATURE) == 0 && res != 0)) { + /* forward packets */ + send_single_packet(m, + (uint8_t)(res - FWD_PORT_SHIFT)); + } else{ + /* in the ACL list, drop it */ +#ifdef L3FWDACL_DEBUG + if ((res & ACL_DENY_SIGNATURE) != 0) { + if (RTE_ETH_IS_IPV4_HDR(m->packet_type)) + dump_acl4_rule(m, res); + else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) + dump_acl6_rule(m, res); + } +#endif + rte_pktmbuf_free(m); + } +} + + + +static inline void +send_packets(struct rte_mbuf **m, uint32_t *res, int num) +{ + int i; + + /* Prefetch first packets */ + for (i = 0; i < PREFETCH_OFFSET && i < num; i++) { + rte_prefetch0(rte_pktmbuf_mtod( + m[i], void *)); + } + + for (i = 0; i < (num - PREFETCH_OFFSET); i++) { + rte_prefetch0(rte_pktmbuf_mtod(m[ + i + PREFETCH_OFFSET], void *)); + send_one_packet(m[i], res[i]); + } + + /* Process left packets */ + for (; i < num; i++) + send_one_packet(m[i], res[i]); +} + +/* + * Parses IPV6 address, exepcts the following format: + * XXXX:XXXX:XXXX:XXXX:XXXX:XXXX:XXXX:XXXX (where X - is a hexedecimal digit). + */ +static int +parse_ipv6_addr(const char *in, const char **end, uint32_t v[IPV6_ADDR_U32], + char dlm) +{ + uint32_t addr[IPV6_ADDR_U16]; + + GET_CB_FIELD(in, addr[0], 16, UINT16_MAX, ':'); + GET_CB_FIELD(in, addr[1], 16, UINT16_MAX, ':'); + GET_CB_FIELD(in, addr[2], 16, UINT16_MAX, ':'); + GET_CB_FIELD(in, addr[3], 16, UINT16_MAX, ':'); + GET_CB_FIELD(in, addr[4], 16, UINT16_MAX, ':'); + GET_CB_FIELD(in, addr[5], 16, UINT16_MAX, ':'); + GET_CB_FIELD(in, addr[6], 16, UINT16_MAX, ':'); + GET_CB_FIELD(in, addr[7], 16, UINT16_MAX, dlm); + + *end = in; + + v[0] = (addr[0] << 16) + addr[1]; + v[1] = (addr[2] << 16) + addr[3]; + v[2] = (addr[4] << 16) + addr[5]; + v[3] = (addr[6] << 16) + addr[7]; + + return 0; +} + +static int +parse_ipv6_net(const char *in, struct rte_acl_field field[4]) +{ + int32_t rc; + const char *mp; + uint32_t i, m, v[4]; + const uint32_t nbu32 = sizeof(uint32_t) * CHAR_BIT; + + /* get address. */ + rc = parse_ipv6_addr(in, &mp, v, '/'); + if (rc != 0) + return rc; + + /* get mask. */ + GET_CB_FIELD(mp, m, 0, CHAR_BIT * sizeof(v), 0); + + /* put all together. */ + for (i = 0; i != RTE_DIM(v); i++) { + if (m >= (i + 1) * nbu32) + field[i].mask_range.u32 = nbu32; + else + field[i].mask_range.u32 = m > (i * nbu32) ? + m - (i * 32) : 0; + + field[i].value.u32 = v[i]; + } + + return 0; +} + +static int +parse_cb_ipv6_rule(char *str, struct rte_acl_rule *v, int has_userdata) +{ + int i, rc; + char *s, *sp, *in[CB_FLD_NUM]; + static const char *dlm = " \t\n"; + int dim = has_userdata ? CB_FLD_NUM : CB_FLD_USERDATA; + s = str; + + for (i = 0; i != dim; i++, s = NULL) { + in[i] = strtok_r(s, dlm, &sp); + if (in[i] == NULL) + return -EINVAL; + } + + rc = parse_ipv6_net(in[CB_FLD_SRC_ADDR], v->field + SRC1_FIELD_IPV6); + if (rc != 0) { + acl_log("failed to read source address/mask: %s\n", + in[CB_FLD_SRC_ADDR]); + return rc; + } + + rc = parse_ipv6_net(in[CB_FLD_DST_ADDR], v->field + DST1_FIELD_IPV6); + if (rc != 0) { + acl_log("failed to read destination address/mask: %s\n", + in[CB_FLD_DST_ADDR]); + return rc; + } + + /* source port. */ + GET_CB_FIELD(in[CB_FLD_SRC_PORT_LOW], + v->field[SRCP_FIELD_IPV6].value.u16, + 0, UINT16_MAX, 0); + GET_CB_FIELD(in[CB_FLD_SRC_PORT_HIGH], + v->field[SRCP_FIELD_IPV6].mask_range.u16, + 0, UINT16_MAX, 0); + + if (strncmp(in[CB_FLD_SRC_PORT_DLM], cb_port_delim, + sizeof(cb_port_delim)) != 0) + return -EINVAL; + + /* destination port. */ + GET_CB_FIELD(in[CB_FLD_DST_PORT_LOW], + v->field[DSTP_FIELD_IPV6].value.u16, + 0, UINT16_MAX, 0); + GET_CB_FIELD(in[CB_FLD_DST_PORT_HIGH], + v->field[DSTP_FIELD_IPV6].mask_range.u16, + 0, UINT16_MAX, 0); + + if (strncmp(in[CB_FLD_DST_PORT_DLM], cb_port_delim, + sizeof(cb_port_delim)) != 0) + return -EINVAL; + + if (v->field[SRCP_FIELD_IPV6].mask_range.u16 + < v->field[SRCP_FIELD_IPV6].value.u16 + || v->field[DSTP_FIELD_IPV6].mask_range.u16 + < v->field[DSTP_FIELD_IPV6].value.u16) + return -EINVAL; + + GET_CB_FIELD(in[CB_FLD_PROTO], v->field[PROTO_FIELD_IPV6].value.u8, + 0, UINT8_MAX, '/'); + GET_CB_FIELD(in[CB_FLD_PROTO], v->field[PROTO_FIELD_IPV6].mask_range.u8, + 0, UINT8_MAX, 0); + + if (has_userdata) + GET_CB_FIELD(in[CB_FLD_USERDATA], v->data.userdata, + 0, UINT32_MAX, 0); + + return 0; +} + +/* + * Parse ClassBench rules file. + * Expected format: + * '@'<src_ipv4_addr>'/'<masklen> <space> \ + * <dst_ipv4_addr>'/'<masklen> <space> \ + * <src_port_low> <space> ":" <src_port_high> <space> \ + * <dst_port_low> <space> ":" <dst_port_high> <space> \ + * <proto>'/'<mask> + */ +static int +parse_ipv4_net(const char *in, uint32_t *addr, uint32_t *mask_len) +{ + uint8_t a, b, c, d, m; + + GET_CB_FIELD(in, a, 0, UINT8_MAX, '.'); + GET_CB_FIELD(in, b, 0, UINT8_MAX, '.'); + GET_CB_FIELD(in, c, 0, UINT8_MAX, '.'); + GET_CB_FIELD(in, d, 0, UINT8_MAX, '/'); + GET_CB_FIELD(in, m, 0, sizeof(uint32_t) * CHAR_BIT, 0); + + addr[0] = IPv4(a, b, c, d); + mask_len[0] = m; + + return 0; +} + +static int +parse_cb_ipv4vlan_rule(char *str, struct rte_acl_rule *v, int has_userdata) +{ + int i, rc; + char *s, *sp, *in[CB_FLD_NUM]; + static const char *dlm = " \t\n"; + int dim = has_userdata ? CB_FLD_NUM : CB_FLD_USERDATA; + s = str; + + for (i = 0; i != dim; i++, s = NULL) { + in[i] = strtok_r(s, dlm, &sp); + if (in[i] == NULL) + return -EINVAL; + } + + rc = parse_ipv4_net(in[CB_FLD_SRC_ADDR], + &v->field[SRC_FIELD_IPV4].value.u32, + &v->field[SRC_FIELD_IPV4].mask_range.u32); + if (rc != 0) { + acl_log("failed to read source address/mask: %s\n", + in[CB_FLD_SRC_ADDR]); + return rc; + } + + rc = parse_ipv4_net(in[CB_FLD_DST_ADDR], + &v->field[DST_FIELD_IPV4].value.u32, + &v->field[DST_FIELD_IPV4].mask_range.u32); + if (rc != 0) { + acl_log("failed to read destination address/mask: %s\n", + in[CB_FLD_DST_ADDR]); + return rc; + } + + GET_CB_FIELD(in[CB_FLD_SRC_PORT_LOW], + v->field[SRCP_FIELD_IPV4].value.u16, + 0, UINT16_MAX, 0); + GET_CB_FIELD(in[CB_FLD_SRC_PORT_HIGH], + v->field[SRCP_FIELD_IPV4].mask_range.u16, + 0, UINT16_MAX, 0); + + if (strncmp(in[CB_FLD_SRC_PORT_DLM], cb_port_delim, + sizeof(cb_port_delim)) != 0) + return -EINVAL; + + GET_CB_FIELD(in[CB_FLD_DST_PORT_LOW], + v->field[DSTP_FIELD_IPV4].value.u16, + 0, UINT16_MAX, 0); + GET_CB_FIELD(in[CB_FLD_DST_PORT_HIGH], + v->field[DSTP_FIELD_IPV4].mask_range.u16, + 0, UINT16_MAX, 0); + + if (strncmp(in[CB_FLD_DST_PORT_DLM], cb_port_delim, + sizeof(cb_port_delim)) != 0) + return -EINVAL; + + if (v->field[SRCP_FIELD_IPV4].mask_range.u16 + < v->field[SRCP_FIELD_IPV4].value.u16 + || v->field[DSTP_FIELD_IPV4].mask_range.u16 + < v->field[DSTP_FIELD_IPV4].value.u16) + return -EINVAL; + + GET_CB_FIELD(in[CB_FLD_PROTO], v->field[PROTO_FIELD_IPV4].value.u8, + 0, UINT8_MAX, '/'); + GET_CB_FIELD(in[CB_FLD_PROTO], v->field[PROTO_FIELD_IPV4].mask_range.u8, + 0, UINT8_MAX, 0); + + if (has_userdata) + GET_CB_FIELD(in[CB_FLD_USERDATA], v->data.userdata, 0, + UINT32_MAX, 0); + + return 0; +} + +static int +add_rules(const char *rule_path, + struct rte_acl_rule **proute_base, + unsigned int *proute_num, + struct rte_acl_rule **pacl_base, + unsigned int *pacl_num, uint32_t rule_size, + int (*parser)(char *, struct rte_acl_rule*, int)) +{ + uint8_t *acl_rules, *route_rules; + struct rte_acl_rule *next; + unsigned int acl_num = 0, route_num = 0, total_num = 0; + unsigned int acl_cnt = 0, route_cnt = 0; + char buff[LINE_MAX]; + FILE *fh = fopen(rule_path, "rb"); + unsigned int i = 0; + + if (fh == NULL) + rte_exit(EXIT_FAILURE, "%s: Open %s failed\n", __func__, + rule_path); + + while ((fgets(buff, LINE_MAX, fh) != NULL)) { + if (buff[0] == ROUTE_LEAD_CHAR) + route_num++; + else if (buff[0] == ACL_LEAD_CHAR) + acl_num++; + } + + if (0 == route_num) + rte_exit(EXIT_FAILURE, "Not find any route entries in %s!\n", + rule_path); + + fseek(fh, 0, SEEK_SET); + + acl_rules = calloc(acl_num, rule_size); + + if (NULL == acl_rules) + rte_exit(EXIT_FAILURE, "%s: failed to malloc memory\n", + __func__); + + route_rules = calloc(route_num, rule_size); + + if (NULL == route_rules) + rte_exit(EXIT_FAILURE, "%s: failed to malloc memory\n", + __func__); + + i = 0; + while (fgets(buff, LINE_MAX, fh) != NULL) { + i++; + + if (is_bypass_line(buff)) + continue; + + char s = buff[0]; + + /* Route entry */ + if (s == ROUTE_LEAD_CHAR) + next = (struct rte_acl_rule *)(route_rules + + route_cnt * rule_size); + + /* ACL entry */ + else if (s == ACL_LEAD_CHAR) + next = (struct rte_acl_rule *)(acl_rules + + acl_cnt * rule_size); + + /* Illegal line */ + else + rte_exit(EXIT_FAILURE, + "%s Line %u: should start with leading " + "char %c or %c\n", + rule_path, i, ROUTE_LEAD_CHAR, ACL_LEAD_CHAR); + + if (parser(buff + 1, next, s == ROUTE_LEAD_CHAR) != 0) + rte_exit(EXIT_FAILURE, + "%s Line %u: parse rules error\n", + rule_path, i); + + if (s == ROUTE_LEAD_CHAR) { + /* Check the forwarding port number */ + if ((enabled_port_mask & (1 << next->data.userdata)) == + 0) + rte_exit(EXIT_FAILURE, + "%s Line %u: fwd number illegal:%u\n", + rule_path, i, next->data.userdata); + next->data.userdata += FWD_PORT_SHIFT; + route_cnt++; + } else { + next->data.userdata = ACL_DENY_SIGNATURE + acl_cnt; + acl_cnt++; + } + + next->data.priority = RTE_ACL_MAX_PRIORITY - total_num; + next->data.category_mask = -1; + total_num++; + } + + fclose(fh); + + *pacl_base = (struct rte_acl_rule *)acl_rules; + *pacl_num = acl_num; + *proute_base = (struct rte_acl_rule *)route_rules; + *proute_num = route_cnt; + + return 0; +} + +static void +dump_acl_config(void) +{ + printf("ACL option are:\n"); + printf(OPTION_RULE_IPV4": %s\n", parm_config.rule_ipv4_name); + printf(OPTION_RULE_IPV6": %s\n", parm_config.rule_ipv6_name); + printf(OPTION_SCALAR": %d\n", parm_config.scalar); +} + +static int +check_acl_config(void) +{ + if (parm_config.rule_ipv4_name == NULL) { + acl_log("ACL IPv4 rule file not specified\n"); + return -1; + } else if (parm_config.rule_ipv6_name == NULL) { + acl_log("ACL IPv6 rule file not specified\n"); + return -1; + } + + return 0; +} + +static struct rte_acl_ctx* +setup_acl(struct rte_acl_rule *route_base, + struct rte_acl_rule *acl_base, unsigned int route_num, + unsigned int acl_num, int ipv6, int socketid) +{ + char name[PATH_MAX]; + struct rte_acl_param acl_param; + struct rte_acl_config acl_build_param; + struct rte_acl_ctx *context; + int dim = ipv6 ? RTE_DIM(ipv6_defs) : RTE_DIM(ipv4_defs); + + /* Create ACL contexts */ + snprintf(name, sizeof(name), "%s%d", + ipv6 ? L3FWD_ACL_IPV6_NAME : L3FWD_ACL_IPV4_NAME, + socketid); + + acl_param.name = name; + acl_param.socket_id = socketid; + acl_param.rule_size = RTE_ACL_RULE_SZ(dim); + acl_param.max_rule_num = MAX_ACL_RULE_NUM; + + if ((context = rte_acl_create(&acl_param)) == NULL) + rte_exit(EXIT_FAILURE, "Failed to create ACL context\n"); + + if (parm_config.scalar && rte_acl_set_ctx_classify(context, + RTE_ACL_CLASSIFY_SCALAR) != 0) + rte_exit(EXIT_FAILURE, + "Failed to setup classify method for ACL context\n"); + + if (rte_acl_add_rules(context, route_base, route_num) < 0) + rte_exit(EXIT_FAILURE, "add rules failed\n"); + + if (rte_acl_add_rules(context, acl_base, acl_num) < 0) + rte_exit(EXIT_FAILURE, "add rules failed\n"); + + /* Perform builds */ + memset(&acl_build_param, 0, sizeof(acl_build_param)); + + acl_build_param.num_categories = DEFAULT_MAX_CATEGORIES; + acl_build_param.num_fields = dim; + memcpy(&acl_build_param.defs, ipv6 ? ipv6_defs : ipv4_defs, + ipv6 ? sizeof(ipv6_defs) : sizeof(ipv4_defs)); + + if (rte_acl_build(context, &acl_build_param) != 0) + rte_exit(EXIT_FAILURE, "Failed to build ACL trie\n"); + + rte_acl_dump(context); + + return context; +} + +static int +app_acl_init(void) +{ + unsigned lcore_id; + unsigned int i; + int socketid; + struct rte_acl_rule *acl_base_ipv4, *route_base_ipv4, + *acl_base_ipv6, *route_base_ipv6; + unsigned int acl_num_ipv4 = 0, route_num_ipv4 = 0, + acl_num_ipv6 = 0, route_num_ipv6 = 0; + + if (check_acl_config() != 0) + rte_exit(EXIT_FAILURE, "Failed to get valid ACL options\n"); + + dump_acl_config(); + + /* Load rules from the input file */ + if (add_rules(parm_config.rule_ipv4_name, &route_base_ipv4, + &route_num_ipv4, &acl_base_ipv4, &acl_num_ipv4, + sizeof(struct acl4_rule), &parse_cb_ipv4vlan_rule) < 0) + rte_exit(EXIT_FAILURE, "Failed to add rules\n"); + + acl_log("IPv4 Route entries %u:\n", route_num_ipv4); + dump_ipv4_rules((struct acl4_rule *)route_base_ipv4, route_num_ipv4, 1); + + acl_log("IPv4 ACL entries %u:\n", acl_num_ipv4); + dump_ipv4_rules((struct acl4_rule *)acl_base_ipv4, acl_num_ipv4, 1); + + if (add_rules(parm_config.rule_ipv6_name, &route_base_ipv6, + &route_num_ipv6, + &acl_base_ipv6, &acl_num_ipv6, + sizeof(struct acl6_rule), &parse_cb_ipv6_rule) < 0) + rte_exit(EXIT_FAILURE, "Failed to add rules\n"); + + acl_log("IPv6 Route entries %u:\n", route_num_ipv6); + dump_ipv6_rules((struct acl6_rule *)route_base_ipv6, route_num_ipv6, 1); + + acl_log("IPv6 ACL entries %u:\n", acl_num_ipv6); + dump_ipv6_rules((struct acl6_rule *)acl_base_ipv6, acl_num_ipv6, 1); + + memset(&acl_config, 0, sizeof(acl_config)); + + /* Check sockets a context should be created on */ + if (!numa_on) + acl_config.mapped[0] = 1; + else { + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { + if (rte_lcore_is_enabled(lcore_id) == 0) + continue; + + socketid = rte_lcore_to_socket_id(lcore_id); + if (socketid >= NB_SOCKETS) { + acl_log("Socket %d of lcore %u is out " + "of range %d\n", + socketid, lcore_id, NB_SOCKETS); + free(route_base_ipv4); + free(route_base_ipv6); + free(acl_base_ipv4); + free(acl_base_ipv6); + return -1; + } + + acl_config.mapped[socketid] = 1; + } + } + + for (i = 0; i < NB_SOCKETS; i++) { + if (acl_config.mapped[i]) { + acl_config.acx_ipv4[i] = setup_acl(route_base_ipv4, + acl_base_ipv4, route_num_ipv4, acl_num_ipv4, + 0, i); + + acl_config.acx_ipv6[i] = setup_acl(route_base_ipv6, + acl_base_ipv6, route_num_ipv6, acl_num_ipv6, + 1, i); + } + } + + free(route_base_ipv4); + free(route_base_ipv6); + +#ifdef L3FWDACL_DEBUG + acl_config.rule_ipv4 = (struct acl4_rule *)acl_base_ipv4; + acl_config.rule_ipv6 = (struct acl6_rule *)acl_base_ipv6; +#else + free(acl_base_ipv4); + free(acl_base_ipv6); +#endif + + return 0; +} + +/***********************end of ACL part******************************/ + +struct lcore_conf { + uint16_t n_rx_queue; + struct lcore_rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE]; + uint16_t n_tx_port; + uint16_t tx_port_id[RTE_MAX_ETHPORTS]; + uint16_t tx_queue_id[RTE_MAX_ETHPORTS]; + struct rte_eth_dev_tx_buffer *tx_buffer[RTE_MAX_ETHPORTS]; +} __rte_cache_aligned; + +static struct lcore_conf lcore_conf[RTE_MAX_LCORE]; + +/* Enqueue a single packet, and send burst if queue is filled */ +static inline void +send_single_packet(struct rte_mbuf *m, uint8_t port) +{ + uint32_t lcore_id; + struct lcore_conf *qconf; + + lcore_id = rte_lcore_id(); + + qconf = &lcore_conf[lcore_id]; + rte_eth_tx_buffer(port, qconf->tx_queue_id[port], + qconf->tx_buffer[port], m); +} + +#ifdef DO_RFC_1812_CHECKS +static inline int +is_valid_ipv4_pkt(struct ipv4_hdr *pkt, uint32_t link_len) +{ + /* From http://www.rfc-editor.org/rfc/rfc1812.txt section 5.2.2 */ + /* + * 1. The packet length reported by the Link Layer must be large + * enough to hold the minimum length legal IP datagram (20 bytes). + */ + if (link_len < sizeof(struct ipv4_hdr)) + return -1; + + /* 2. The IP checksum must be correct. */ + /* this is checked in H/W */ + + /* + * 3. The IP version number must be 4. If the version number is not 4 + * then the packet may be another version of IP, such as IPng or + * ST-II. + */ + if (((pkt->version_ihl) >> 4) != 4) + return -3; + /* + * 4. The IP header length field must be large enough to hold the + * minimum length legal IP datagram (20 bytes = 5 words). + */ + if ((pkt->version_ihl & 0xf) < 5) + return -4; + + /* + * 5. The IP total length field must be large enough to hold the IP + * datagram header, whose length is specified in the IP header length + * field. + */ + if (rte_cpu_to_be_16(pkt->total_length) < sizeof(struct ipv4_hdr)) + return -5; + + return 0; +} +#endif + +/* main processing loop */ +static int +main_loop(__attribute__((unused)) void *dummy) +{ + struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; + unsigned lcore_id; + uint64_t prev_tsc, diff_tsc, cur_tsc; + int i, nb_rx; + uint8_t portid, queueid; + struct lcore_conf *qconf; + int socketid; + const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) + / US_PER_S * BURST_TX_DRAIN_US; + + prev_tsc = 0; + lcore_id = rte_lcore_id(); + qconf = &lcore_conf[lcore_id]; + socketid = rte_lcore_to_socket_id(lcore_id); + + if (qconf->n_rx_queue == 0) { + RTE_LOG(INFO, L3FWD, "lcore %u has nothing to do\n", lcore_id); + return 0; + } + + RTE_LOG(INFO, L3FWD, "entering main loop on lcore %u\n", lcore_id); + + for (i = 0; i < qconf->n_rx_queue; i++) { + + portid = qconf->rx_queue_list[i].port_id; + queueid = qconf->rx_queue_list[i].queue_id; + RTE_LOG(INFO, L3FWD, + " -- lcoreid=%u portid=%hhu rxqueueid=%hhu\n", + lcore_id, portid, queueid); + } + + while (1) { + + cur_tsc = rte_rdtsc(); + + /* + * TX burst queue drain + */ + diff_tsc = cur_tsc - prev_tsc; + if (unlikely(diff_tsc > drain_tsc)) { + for (i = 0; i < qconf->n_tx_port; ++i) { + portid = qconf->tx_port_id[i]; + rte_eth_tx_buffer_flush(portid, + qconf->tx_queue_id[portid], + qconf->tx_buffer[portid]); + } + prev_tsc = cur_tsc; + } + + /* + * Read packet from RX queues + */ + for (i = 0; i < qconf->n_rx_queue; ++i) { + + portid = qconf->rx_queue_list[i].port_id; + queueid = qconf->rx_queue_list[i].queue_id; + nb_rx = rte_eth_rx_burst(portid, queueid, + pkts_burst, MAX_PKT_BURST); + + if (nb_rx > 0) { + struct acl_search_t acl_search; + + prepare_acl_parameter(pkts_burst, &acl_search, + nb_rx); + + if (acl_search.num_ipv4) { + rte_acl_classify( + acl_config.acx_ipv4[socketid], + acl_search.data_ipv4, + acl_search.res_ipv4, + acl_search.num_ipv4, + DEFAULT_MAX_CATEGORIES); + + send_packets(acl_search.m_ipv4, + acl_search.res_ipv4, + acl_search.num_ipv4); + } + + if (acl_search.num_ipv6) { + rte_acl_classify( + acl_config.acx_ipv6[socketid], + acl_search.data_ipv6, + acl_search.res_ipv6, + acl_search.num_ipv6, + DEFAULT_MAX_CATEGORIES); + + send_packets(acl_search.m_ipv6, + acl_search.res_ipv6, + acl_search.num_ipv6); + } + } + } + } +} + +static int +check_lcore_params(void) +{ + uint8_t queue, lcore; + uint16_t i; + int socketid; + + for (i = 0; i < nb_lcore_params; ++i) { + queue = lcore_params[i].queue_id; + if (queue >= MAX_RX_QUEUE_PER_PORT) { + printf("invalid queue number: %hhu\n", queue); + return -1; + } + lcore = lcore_params[i].lcore_id; + if (!rte_lcore_is_enabled(lcore)) { + printf("error: lcore %hhu is not enabled in " + "lcore mask\n", lcore); + return -1; + } + socketid = rte_lcore_to_socket_id(lcore); + if (socketid != 0 && numa_on == 0) { + printf("warning: lcore %hhu is on socket %d " + "with numa off\n", + lcore, socketid); + } + } + return 0; +} + +static int +check_port_config(const unsigned nb_ports) +{ + unsigned portid; + uint16_t i; + + for (i = 0; i < nb_lcore_params; ++i) { + portid = lcore_params[i].port_id; + + if ((enabled_port_mask & (1 << portid)) == 0) { + printf("port %u is not enabled in port mask\n", portid); + return -1; + } + if (portid >= nb_ports) { + printf("port %u is not present on the board\n", portid); + return -1; + } + } + return 0; +} + +static uint8_t +get_port_n_rx_queues(const uint8_t port) +{ + int queue = -1; + uint16_t i; + + for (i = 0; i < nb_lcore_params; ++i) { + if (lcore_params[i].port_id == port && + lcore_params[i].queue_id > queue) + queue = lcore_params[i].queue_id; + } + return (uint8_t)(++queue); +} + +static int +init_lcore_rx_queues(void) +{ + uint16_t i, nb_rx_queue; + uint8_t lcore; + + for (i = 0; i < nb_lcore_params; ++i) { + lcore = lcore_params[i].lcore_id; + nb_rx_queue = lcore_conf[lcore].n_rx_queue; + if (nb_rx_queue >= MAX_RX_QUEUE_PER_LCORE) { + printf("error: too many queues (%u) for lcore: %u\n", + (unsigned)nb_rx_queue + 1, (unsigned)lcore); + return -1; + } else { + lcore_conf[lcore].rx_queue_list[nb_rx_queue].port_id = + lcore_params[i].port_id; + lcore_conf[lcore].rx_queue_list[nb_rx_queue].queue_id = + lcore_params[i].queue_id; + lcore_conf[lcore].n_rx_queue++; + } + } + return 0; +} + +/* display usage */ +static void +print_usage(const char *prgname) +{ + printf("%s [EAL options] -- -p PORTMASK -P" + "--"OPTION_RULE_IPV4"=FILE" + "--"OPTION_RULE_IPV6"=FILE" + " [--"OPTION_CONFIG" (port,queue,lcore)[,(port,queue,lcore]]" + " [--"OPTION_ENBJMO" [--max-pkt-len PKTLEN]]\n" + " -p PORTMASK: hexadecimal bitmask of ports to configure\n" + " -P : enable promiscuous mode\n" + " --"OPTION_CONFIG": (port,queue,lcore): " + "rx queues configuration\n" + " --"OPTION_NONUMA": optional, disable numa awareness\n" + " --"OPTION_ENBJMO": enable jumbo frame" + " which max packet len is PKTLEN in decimal (64-9600)\n" + " --"OPTION_RULE_IPV4"=FILE: specify the ipv4 rules entries " + "file. " + "Each rule occupy one line. " + "2 kinds of rules are supported. " + "One is ACL entry at while line leads with character '%c', " + "another is route entry at while line leads with " + "character '%c'.\n" + " --"OPTION_RULE_IPV6"=FILE: specify the ipv6 rules " + "entries file.\n" + " --"OPTION_SCALAR": Use scalar function to do lookup\n", + prgname, ACL_LEAD_CHAR, ROUTE_LEAD_CHAR); +} + +static int +parse_max_pkt_len(const char *pktlen) +{ + char *end = NULL; + unsigned long len; + + /* parse decimal string */ + len = strtoul(pktlen, &end, 10); + if ((pktlen[0] == '\0') || (end == NULL) || (*end != '\0')) + return -1; + + if (len == 0) + return -1; + + return len; +} + +static int +parse_portmask(const char *portmask) +{ + char *end = NULL; + unsigned long pm; + + /* parse hexadecimal string */ + pm = strtoul(portmask, &end, 16); + if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0')) + return -1; + + if (pm == 0) + return -1; + + return pm; +} + +static int +parse_config(const char *q_arg) +{ + char s[256]; + const char *p, *p0 = q_arg; + char *end; + enum fieldnames { + FLD_PORT = 0, + FLD_QUEUE, + FLD_LCORE, + _NUM_FLD + }; + unsigned long int_fld[_NUM_FLD]; + char *str_fld[_NUM_FLD]; + int i; + unsigned size; + + nb_lcore_params = 0; + + while ((p = strchr(p0, '(')) != NULL) { + ++p; + if ((p0 = strchr(p, ')')) == NULL) + return -1; + + size = p0 - p; + if (size >= sizeof(s)) + return -1; + + snprintf(s, sizeof(s), "%.*s", size, p); + if (rte_strsplit(s, sizeof(s), str_fld, _NUM_FLD, ',') != + _NUM_FLD) + return -1; + for (i = 0; i < _NUM_FLD; i++) { + errno = 0; + int_fld[i] = strtoul(str_fld[i], &end, 0); + if (errno != 0 || end == str_fld[i] || int_fld[i] > 255) + return -1; + } + if (nb_lcore_params >= MAX_LCORE_PARAMS) { + printf("exceeded max number of lcore params: %hu\n", + nb_lcore_params); + return -1; + } + lcore_params_array[nb_lcore_params].port_id = + (uint8_t)int_fld[FLD_PORT]; + lcore_params_array[nb_lcore_params].queue_id = + (uint8_t)int_fld[FLD_QUEUE]; + lcore_params_array[nb_lcore_params].lcore_id = + (uint8_t)int_fld[FLD_LCORE]; + ++nb_lcore_params; + } + lcore_params = lcore_params_array; + return 0; +} + +/* Parse the argument given in the command line of the application */ +static int +parse_args(int argc, char **argv) +{ + int opt, ret; + char **argvopt; + int option_index; + char *prgname = argv[0]; + static struct option lgopts[] = { + {OPTION_CONFIG, 1, 0, 0}, + {OPTION_NONUMA, 0, 0, 0}, + {OPTION_ENBJMO, 0, 0, 0}, + {OPTION_RULE_IPV4, 1, 0, 0}, + {OPTION_RULE_IPV6, 1, 0, 0}, + {OPTION_SCALAR, 0, 0, 0}, + {NULL, 0, 0, 0} + }; + + argvopt = argv; + + while ((opt = getopt_long(argc, argvopt, "p:P", + lgopts, &option_index)) != EOF) { + + switch (opt) { + /* portmask */ + case 'p': + enabled_port_mask = parse_portmask(optarg); + if (enabled_port_mask == 0) { + printf("invalid portmask\n"); + print_usage(prgname); + return -1; + } + break; + case 'P': + printf("Promiscuous mode selected\n"); + promiscuous_on = 1; + break; + + /* long options */ + case 0: + if (!strncmp(lgopts[option_index].name, + OPTION_CONFIG, + sizeof(OPTION_CONFIG))) { + ret = parse_config(optarg); + if (ret) { + printf("invalid config\n"); + print_usage(prgname); + return -1; + } + } + + if (!strncmp(lgopts[option_index].name, + OPTION_NONUMA, + sizeof(OPTION_NONUMA))) { + printf("numa is disabled\n"); + numa_on = 0; + } + + if (!strncmp(lgopts[option_index].name, + OPTION_ENBJMO, sizeof(OPTION_ENBJMO))) { + struct option lenopts = { + "max-pkt-len", + required_argument, + 0, + 0 + }; + + printf("jumbo frame is enabled\n"); + port_conf.rxmode.jumbo_frame = 1; + + /* + * if no max-pkt-len set, then use the + * default value ETHER_MAX_LEN + */ + if (0 == getopt_long(argc, argvopt, "", + &lenopts, &option_index)) { + ret = parse_max_pkt_len(optarg); + if ((ret < 64) || + (ret > MAX_JUMBO_PKT_LEN)) { + printf("invalid packet " + "length\n"); + print_usage(prgname); + return -1; + } + port_conf.rxmode.max_rx_pkt_len = ret; + } + printf("set jumbo frame max packet length " + "to %u\n", + (unsigned int) + port_conf.rxmode.max_rx_pkt_len); + } + + if (!strncmp(lgopts[option_index].name, + OPTION_RULE_IPV4, + sizeof(OPTION_RULE_IPV4))) + parm_config.rule_ipv4_name = optarg; + + if (!strncmp(lgopts[option_index].name, + OPTION_RULE_IPV6, + sizeof(OPTION_RULE_IPV6))) { + parm_config.rule_ipv6_name = optarg; + } + + if (!strncmp(lgopts[option_index].name, + OPTION_SCALAR, sizeof(OPTION_SCALAR))) + parm_config.scalar = 1; + + + break; + + default: + print_usage(prgname); + return -1; + } + } + + if (optind >= 0) + argv[optind-1] = prgname; + + ret = optind-1; + optind = 0; /* reset getopt lib */ + return ret; +} + +static void +print_ethaddr(const char *name, const struct ether_addr *eth_addr) +{ + char buf[ETHER_ADDR_FMT_SIZE]; + ether_format_addr(buf, ETHER_ADDR_FMT_SIZE, eth_addr); + printf("%s%s", name, buf); +} + +static int +init_mem(unsigned nb_mbuf) +{ + int socketid; + unsigned lcore_id; + char s[64]; + + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { + if (rte_lcore_is_enabled(lcore_id) == 0) + continue; + + if (numa_on) + socketid = rte_lcore_to_socket_id(lcore_id); + else + socketid = 0; + + if (socketid >= NB_SOCKETS) { + rte_exit(EXIT_FAILURE, + "Socket %d of lcore %u is out of range %d\n", + socketid, lcore_id, NB_SOCKETS); + } + if (pktmbuf_pool[socketid] == NULL) { + snprintf(s, sizeof(s), "mbuf_pool_%d", socketid); + pktmbuf_pool[socketid] = + rte_pktmbuf_pool_create(s, nb_mbuf, + MEMPOOL_CACHE_SIZE, 0, + RTE_MBUF_DEFAULT_BUF_SIZE, + socketid); + if (pktmbuf_pool[socketid] == NULL) + rte_exit(EXIT_FAILURE, + "Cannot init mbuf pool on socket %d\n", + socketid); + else + printf("Allocated mbuf pool on socket %d\n", + socketid); + } + } + return 0; +} + +/* Check the link status of all ports in up to 9s, and print them finally */ +static void +check_all_ports_link_status(uint8_t port_num, uint32_t port_mask) +{ +#define CHECK_INTERVAL 100 /* 100ms */ +#define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */ + uint8_t portid, count, all_ports_up, print_flag = 0; + struct rte_eth_link link; + + printf("\nChecking link status"); + fflush(stdout); + for (count = 0; count <= MAX_CHECK_TIME; count++) { + all_ports_up = 1; + for (portid = 0; portid < port_num; portid++) { + if ((port_mask & (1 << portid)) == 0) + continue; + memset(&link, 0, sizeof(link)); + rte_eth_link_get_nowait(portid, &link); + /* print link status if flag set */ + if (print_flag == 1) { + if (link.link_status) + printf("Port %d Link Up - speed %u " + "Mbps - %s\n", (uint8_t)portid, + (unsigned)link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? + ("full-duplex") : ("half-duplex\n")); + else + printf("Port %d Link Down\n", + (uint8_t)portid); + continue; + } + /* clear all_ports_up flag if any link down */ + if (link.link_status == ETH_LINK_DOWN) { + all_ports_up = 0; + break; + } + } + /* after finally printing all link status, get out */ + if (print_flag == 1) + break; + + if (all_ports_up == 0) { + printf("."); + fflush(stdout); + rte_delay_ms(CHECK_INTERVAL); + } + + /* set the print_flag if all ports up or timeout */ + if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) { + print_flag = 1; + printf("done\n"); + } + } +} + +int +main(int argc, char **argv) +{ + struct lcore_conf *qconf; + struct rte_eth_dev_info dev_info; + struct rte_eth_txconf *txconf; + int ret; + unsigned nb_ports; + uint16_t queueid; + unsigned lcore_id; + uint32_t n_tx_queue, nb_lcores; + uint8_t portid, nb_rx_queue, queue, socketid; + + /* init EAL */ + ret = rte_eal_init(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Invalid EAL parameters\n"); + argc -= ret; + argv += ret; + + /* parse application arguments (after the EAL ones) */ + ret = parse_args(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Invalid L3FWD parameters\n"); + + if (check_lcore_params() < 0) + rte_exit(EXIT_FAILURE, "check_lcore_params failed\n"); + + ret = init_lcore_rx_queues(); + if (ret < 0) + rte_exit(EXIT_FAILURE, "init_lcore_rx_queues failed\n"); + + nb_ports = rte_eth_dev_count(); + if (nb_ports > RTE_MAX_ETHPORTS) + nb_ports = RTE_MAX_ETHPORTS; + + if (check_port_config(nb_ports) < 0) + rte_exit(EXIT_FAILURE, "check_port_config failed\n"); + + /* Add ACL rules and route entries, build trie */ + if (app_acl_init() < 0) + rte_exit(EXIT_FAILURE, "app_acl_init failed\n"); + + nb_lcores = rte_lcore_count(); + + /* initialize all ports */ + for (portid = 0; portid < nb_ports; portid++) { + /* skip ports that are not enabled */ + if ((enabled_port_mask & (1 << portid)) == 0) { + printf("\nSkipping disabled port %d\n", portid); + continue; + } + + /* init port */ + printf("Initializing port %d ... ", portid); + fflush(stdout); + + nb_rx_queue = get_port_n_rx_queues(portid); + n_tx_queue = nb_lcores; + if (n_tx_queue > MAX_TX_QUEUE_PER_PORT) + n_tx_queue = MAX_TX_QUEUE_PER_PORT; + printf("Creating queues: nb_rxq=%d nb_txq=%u... ", + nb_rx_queue, (unsigned)n_tx_queue); + ret = rte_eth_dev_configure(portid, nb_rx_queue, + (uint16_t)n_tx_queue, &port_conf); + if (ret < 0) + rte_exit(EXIT_FAILURE, + "Cannot configure device: err=%d, port=%d\n", + ret, portid); + + rte_eth_macaddr_get(portid, &ports_eth_addr[portid]); + print_ethaddr(" Address:", &ports_eth_addr[portid]); + printf(", "); + + /* init memory */ + ret = init_mem(NB_MBUF); + if (ret < 0) + rte_exit(EXIT_FAILURE, "init_mem failed\n"); + + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { + if (rte_lcore_is_enabled(lcore_id) == 0) + continue; + + /* Initialize TX buffers */ + qconf = &lcore_conf[lcore_id]; + qconf->tx_buffer[portid] = rte_zmalloc_socket("tx_buffer", + RTE_ETH_TX_BUFFER_SIZE(MAX_PKT_BURST), 0, + rte_eth_dev_socket_id(portid)); + if (qconf->tx_buffer[portid] == NULL) + rte_exit(EXIT_FAILURE, "Can't allocate tx buffer for port %u\n", + (unsigned) portid); + + rte_eth_tx_buffer_init(qconf->tx_buffer[portid], MAX_PKT_BURST); + } + + /* init one TX queue per couple (lcore,port) */ + queueid = 0; + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { + if (rte_lcore_is_enabled(lcore_id) == 0) + continue; + + if (numa_on) + socketid = (uint8_t) + rte_lcore_to_socket_id(lcore_id); + else + socketid = 0; + + printf("txq=%u,%d,%d ", lcore_id, queueid, socketid); + fflush(stdout); + + rte_eth_dev_info_get(portid, &dev_info); + txconf = &dev_info.default_txconf; + if (port_conf.rxmode.jumbo_frame) + txconf->txq_flags = 0; + ret = rte_eth_tx_queue_setup(portid, queueid, nb_txd, + socketid, txconf); + if (ret < 0) + rte_exit(EXIT_FAILURE, + "rte_eth_tx_queue_setup: err=%d, " + "port=%d\n", ret, portid); + + qconf = &lcore_conf[lcore_id]; + qconf->tx_queue_id[portid] = queueid; + queueid++; + + qconf->tx_port_id[qconf->n_tx_port] = portid; + qconf->n_tx_port++; + } + printf("\n"); + } + + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { + if (rte_lcore_is_enabled(lcore_id) == 0) + continue; + qconf = &lcore_conf[lcore_id]; + printf("\nInitializing rx queues on lcore %u ... ", lcore_id); + fflush(stdout); + /* init RX queues */ + for (queue = 0; queue < qconf->n_rx_queue; ++queue) { + portid = qconf->rx_queue_list[queue].port_id; + queueid = qconf->rx_queue_list[queue].queue_id; + + if (numa_on) + socketid = (uint8_t) + rte_lcore_to_socket_id(lcore_id); + else + socketid = 0; + + printf("rxq=%d,%d,%d ", portid, queueid, socketid); + fflush(stdout); + + ret = rte_eth_rx_queue_setup(portid, queueid, nb_rxd, + socketid, NULL, + pktmbuf_pool[socketid]); + if (ret < 0) + rte_exit(EXIT_FAILURE, + "rte_eth_rx_queue_setup: err=%d," + "port=%d\n", ret, portid); + } + } + + printf("\n"); + + /* start ports */ + for (portid = 0; portid < nb_ports; portid++) { + if ((enabled_port_mask & (1 << portid)) == 0) + continue; + + /* Start device */ + ret = rte_eth_dev_start(portid); + if (ret < 0) + rte_exit(EXIT_FAILURE, + "rte_eth_dev_start: err=%d, port=%d\n", + ret, portid); + + /* + * If enabled, put device in promiscuous mode. + * This allows IO forwarding mode to forward packets + * to itself through 2 cross-connected ports of the + * target machine. + */ + if (promiscuous_on) + rte_eth_promiscuous_enable(portid); + } + + check_all_ports_link_status((uint8_t)nb_ports, enabled_port_mask); + + /* launch per-lcore init on every lcore */ + rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER); + RTE_LCORE_FOREACH_SLAVE(lcore_id) { + if (rte_eal_wait_lcore(lcore_id) < 0) + return -1; + } + + return 0; +} diff --git a/examples/l3fwd-power/Makefile b/examples/l3fwd-power/Makefile new file mode 100644 index 00000000..783772a7 --- /dev/null +++ b/examples/l3fwd-power/Makefile @@ -0,0 +1,63 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overriden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +ifneq ($(CONFIG_RTE_EXEC_ENV),"linuxapp") +$(info This application can only operate in a linuxapp environment, \ +please change the definition of the RTE_TARGET environment variable) +all: +else + +# binary name +APP = l3fwd-power + +# all source are stored in SRCS-y +SRCS-y := main.c + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) + +# workaround for a gcc bug with noreturn attribute +# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603 +ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y) +CFLAGS_main.o += -Wno-return-type +endif + +include $(RTE_SDK)/mk/rte.extapp.mk +endif diff --git a/examples/l3fwd-power/main.c b/examples/l3fwd-power/main.c new file mode 100644 index 00000000..cb42bfb9 --- /dev/null +++ b/examples/l3fwd-power/main.c @@ -0,0 +1,1760 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <inttypes.h> +#include <sys/types.h> +#include <string.h> +#include <sys/queue.h> +#include <stdarg.h> +#include <errno.h> +#include <getopt.h> +#include <unistd.h> +#include <signal.h> + +#include <rte_common.h> +#include <rte_byteorder.h> +#include <rte_log.h> +#include <rte_malloc.h> +#include <rte_memory.h> +#include <rte_memcpy.h> +#include <rte_memzone.h> +#include <rte_eal.h> +#include <rte_per_lcore.h> +#include <rte_launch.h> +#include <rte_atomic.h> +#include <rte_cycles.h> +#include <rte_prefetch.h> +#include <rte_lcore.h> +#include <rte_per_lcore.h> +#include <rte_branch_prediction.h> +#include <rte_interrupts.h> +#include <rte_pci.h> +#include <rte_random.h> +#include <rte_debug.h> +#include <rte_ether.h> +#include <rte_ethdev.h> +#include <rte_ring.h> +#include <rte_mempool.h> +#include <rte_mbuf.h> +#include <rte_ip.h> +#include <rte_tcp.h> +#include <rte_udp.h> +#include <rte_string_fns.h> +#include <rte_timer.h> +#include <rte_power.h> +#include <rte_eal.h> +#include <rte_spinlock.h> + +#define RTE_LOGTYPE_L3FWD_POWER RTE_LOGTYPE_USER1 + +#define MAX_PKT_BURST 32 + +#define MIN_ZERO_POLL_COUNT 10 + +/* around 100ms at 2 Ghz */ +#define TIMER_RESOLUTION_CYCLES 200000000ULL +/* 100 ms interval */ +#define TIMER_NUMBER_PER_SECOND 10 +/* 100000 us */ +#define SCALING_PERIOD (1000000/TIMER_NUMBER_PER_SECOND) +#define SCALING_DOWN_TIME_RATIO_THRESHOLD 0.25 + +#define APP_LOOKUP_EXACT_MATCH 0 +#define APP_LOOKUP_LPM 1 +#define DO_RFC_1812_CHECKS + +#ifndef APP_LOOKUP_METHOD +#define APP_LOOKUP_METHOD APP_LOOKUP_LPM +#endif + +#if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) +#include <rte_hash.h> +#elif (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) +#include <rte_lpm.h> +#else +#error "APP_LOOKUP_METHOD set to incorrect value" +#endif + +#ifndef IPv6_BYTES +#define IPv6_BYTES_FMT "%02x%02x:%02x%02x:%02x%02x:%02x%02x:"\ + "%02x%02x:%02x%02x:%02x%02x:%02x%02x" +#define IPv6_BYTES(addr) \ + addr[0], addr[1], addr[2], addr[3], \ + addr[4], addr[5], addr[6], addr[7], \ + addr[8], addr[9], addr[10], addr[11],\ + addr[12], addr[13],addr[14], addr[15] +#endif + +#define MAX_JUMBO_PKT_LEN 9600 + +#define IPV6_ADDR_LEN 16 + +#define MEMPOOL_CACHE_SIZE 256 + +/* + * This expression is used to calculate the number of mbufs needed depending on + * user input, taking into account memory for rx and tx hardware rings, cache + * per lcore and mtable per port per lcore. RTE_MAX is used to ensure that + * NB_MBUF never goes below a minimum value of 8192. + */ + +#define NB_MBUF RTE_MAX ( \ + (nb_ports*nb_rx_queue*RTE_TEST_RX_DESC_DEFAULT + \ + nb_ports*nb_lcores*MAX_PKT_BURST + \ + nb_ports*n_tx_queue*RTE_TEST_TX_DESC_DEFAULT + \ + nb_lcores*MEMPOOL_CACHE_SIZE), \ + (unsigned)8192) + +#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */ + +#define NB_SOCKETS 8 + +/* Configure how many packets ahead to prefetch, when reading packets */ +#define PREFETCH_OFFSET 3 + +/* + * Configurable number of RX/TX ring descriptors + */ +#define RTE_TEST_RX_DESC_DEFAULT 128 +#define RTE_TEST_TX_DESC_DEFAULT 512 +static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; +static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; + +/* ethernet addresses of ports */ +static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; + +/* ethernet addresses of ports */ +static rte_spinlock_t locks[RTE_MAX_ETHPORTS]; + +/* mask of enabled ports */ +static uint32_t enabled_port_mask = 0; +/* Ports set in promiscuous mode off by default. */ +static int promiscuous_on = 0; +/* NUMA is enabled by default. */ +static int numa_on = 1; + +enum freq_scale_hint_t +{ + FREQ_LOWER = -1, + FREQ_CURRENT = 0, + FREQ_HIGHER = 1, + FREQ_HIGHEST = 2 +}; + +struct lcore_rx_queue { + uint8_t port_id; + uint8_t queue_id; + enum freq_scale_hint_t freq_up_hint; + uint32_t zero_rx_packet_count; + uint32_t idle_hint; +} __rte_cache_aligned; + +#define MAX_RX_QUEUE_PER_LCORE 16 +#define MAX_TX_QUEUE_PER_PORT RTE_MAX_ETHPORTS +#define MAX_RX_QUEUE_PER_PORT 128 + +#define MAX_RX_QUEUE_INTERRUPT_PER_PORT 16 + + +#define MAX_LCORE_PARAMS 1024 +struct lcore_params { + uint8_t port_id; + uint8_t queue_id; + uint8_t lcore_id; +} __rte_cache_aligned; + +static struct lcore_params lcore_params_array[MAX_LCORE_PARAMS]; +static struct lcore_params lcore_params_array_default[] = { + {0, 0, 2}, + {0, 1, 2}, + {0, 2, 2}, + {1, 0, 2}, + {1, 1, 2}, + {1, 2, 2}, + {2, 0, 2}, + {3, 0, 3}, + {3, 1, 3}, +}; + +static struct lcore_params * lcore_params = lcore_params_array_default; +static uint16_t nb_lcore_params = sizeof(lcore_params_array_default) / + sizeof(lcore_params_array_default[0]); + +static struct rte_eth_conf port_conf = { + .rxmode = { + .mq_mode = ETH_MQ_RX_RSS, + .max_rx_pkt_len = ETHER_MAX_LEN, + .split_hdr_size = 0, + .header_split = 0, /**< Header Split disabled */ + .hw_ip_checksum = 1, /**< IP checksum offload enabled */ + .hw_vlan_filter = 0, /**< VLAN filtering disabled */ + .jumbo_frame = 0, /**< Jumbo Frame Support disabled */ + .hw_strip_crc = 0, /**< CRC stripped by hardware */ + }, + .rx_adv_conf = { + .rss_conf = { + .rss_key = NULL, + .rss_hf = ETH_RSS_UDP, + }, + }, + .txmode = { + .mq_mode = ETH_MQ_TX_NONE, + }, + .intr_conf = { + .lsc = 1, + .rxq = 1, + }, +}; + +static struct rte_mempool * pktmbuf_pool[NB_SOCKETS]; + + +#if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) + +#ifdef RTE_MACHINE_CPUFLAG_SSE4_2 +#include <rte_hash_crc.h> +#define DEFAULT_HASH_FUNC rte_hash_crc +#else +#include <rte_jhash.h> +#define DEFAULT_HASH_FUNC rte_jhash +#endif + +struct ipv4_5tuple { + uint32_t ip_dst; + uint32_t ip_src; + uint16_t port_dst; + uint16_t port_src; + uint8_t proto; +} __attribute__((__packed__)); + +struct ipv6_5tuple { + uint8_t ip_dst[IPV6_ADDR_LEN]; + uint8_t ip_src[IPV6_ADDR_LEN]; + uint16_t port_dst; + uint16_t port_src; + uint8_t proto; +} __attribute__((__packed__)); + +struct ipv4_l3fwd_route { + struct ipv4_5tuple key; + uint8_t if_out; +}; + +struct ipv6_l3fwd_route { + struct ipv6_5tuple key; + uint8_t if_out; +}; + +static struct ipv4_l3fwd_route ipv4_l3fwd_route_array[] = { + {{IPv4(100,10,0,1), IPv4(200,10,0,1), 101, 11, IPPROTO_TCP}, 0}, + {{IPv4(100,20,0,2), IPv4(200,20,0,2), 102, 12, IPPROTO_TCP}, 1}, + {{IPv4(100,30,0,3), IPv4(200,30,0,3), 103, 13, IPPROTO_TCP}, 2}, + {{IPv4(100,40,0,4), IPv4(200,40,0,4), 104, 14, IPPROTO_TCP}, 3}, +}; + +static struct ipv6_l3fwd_route ipv6_l3fwd_route_array[] = { + { + { + {0xfe, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38, 0x05}, + {0xfe, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x02, 0x1e, 0x67, 0xff, 0xfe, 0x0d, 0xb6, 0x0a}, + 1, 10, IPPROTO_UDP + }, 4 + }, +}; + +typedef struct rte_hash lookup_struct_t; +static lookup_struct_t *ipv4_l3fwd_lookup_struct[NB_SOCKETS]; +static lookup_struct_t *ipv6_l3fwd_lookup_struct[NB_SOCKETS]; + +#define L3FWD_HASH_ENTRIES 1024 + +#define IPV4_L3FWD_NUM_ROUTES \ + (sizeof(ipv4_l3fwd_route_array) / sizeof(ipv4_l3fwd_route_array[0])) + +#define IPV6_L3FWD_NUM_ROUTES \ + (sizeof(ipv6_l3fwd_route_array) / sizeof(ipv6_l3fwd_route_array[0])) + +static uint8_t ipv4_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned; +static uint8_t ipv6_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned; +#endif + +#if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) +struct ipv4_l3fwd_route { + uint32_t ip; + uint8_t depth; + uint8_t if_out; +}; + +static struct ipv4_l3fwd_route ipv4_l3fwd_route_array[] = { + {IPv4(1,1,1,0), 24, 0}, + {IPv4(2,1,1,0), 24, 1}, + {IPv4(3,1,1,0), 24, 2}, + {IPv4(4,1,1,0), 24, 3}, + {IPv4(5,1,1,0), 24, 4}, + {IPv4(6,1,1,0), 24, 5}, + {IPv4(7,1,1,0), 24, 6}, + {IPv4(8,1,1,0), 24, 7}, +}; + +#define IPV4_L3FWD_NUM_ROUTES \ + (sizeof(ipv4_l3fwd_route_array) / sizeof(ipv4_l3fwd_route_array[0])) + +#define IPV4_L3FWD_LPM_MAX_RULES 1024 + +typedef struct rte_lpm lookup_struct_t; +static lookup_struct_t *ipv4_l3fwd_lookup_struct[NB_SOCKETS]; +#endif + +struct lcore_conf { + uint16_t n_rx_queue; + struct lcore_rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE]; + uint16_t n_tx_port; + uint16_t tx_port_id[RTE_MAX_ETHPORTS]; + uint16_t tx_queue_id[RTE_MAX_ETHPORTS]; + struct rte_eth_dev_tx_buffer *tx_buffer[RTE_MAX_ETHPORTS]; + lookup_struct_t * ipv4_lookup_struct; + lookup_struct_t * ipv6_lookup_struct; +} __rte_cache_aligned; + +struct lcore_stats { + /* total sleep time in ms since last frequency scaling down */ + uint32_t sleep_time; + /* number of long sleep recently */ + uint32_t nb_long_sleep; + /* freq. scaling up trend */ + uint32_t trend; + /* total packet processed recently */ + uint64_t nb_rx_processed; + /* total iterations looped recently */ + uint64_t nb_iteration_looped; + uint32_t padding[9]; +} __rte_cache_aligned; + +static struct lcore_conf lcore_conf[RTE_MAX_LCORE] __rte_cache_aligned; +static struct lcore_stats stats[RTE_MAX_LCORE] __rte_cache_aligned; +static struct rte_timer power_timers[RTE_MAX_LCORE]; + +static inline uint32_t power_idle_heuristic(uint32_t zero_rx_packet_count); +static inline enum freq_scale_hint_t power_freq_scaleup_heuristic( \ + unsigned lcore_id, uint8_t port_id, uint16_t queue_id); + +/* exit signal handler */ +static void +signal_exit_now(int sigtype) +{ + unsigned lcore_id; + int ret; + + if (sigtype == SIGINT) { + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { + if (rte_lcore_is_enabled(lcore_id) == 0) + continue; + + /* init power management library */ + ret = rte_power_exit(lcore_id); + if (ret) + rte_exit(EXIT_FAILURE, "Power management " + "library de-initialization failed on " + "core%u\n", lcore_id); + } + } + + rte_exit(EXIT_SUCCESS, "User forced exit\n"); +} + +/* Freqency scale down timer callback */ +static void +power_timer_cb(__attribute__((unused)) struct rte_timer *tim, + __attribute__((unused)) void *arg) +{ + uint64_t hz; + float sleep_time_ratio; + unsigned lcore_id = rte_lcore_id(); + + /* accumulate total execution time in us when callback is invoked */ + sleep_time_ratio = (float)(stats[lcore_id].sleep_time) / + (float)SCALING_PERIOD; + /** + * check whether need to scale down frequency a step if it sleep a lot. + */ + if (sleep_time_ratio >= SCALING_DOWN_TIME_RATIO_THRESHOLD) { + if (rte_power_freq_down) + rte_power_freq_down(lcore_id); + } + else if ( (unsigned)(stats[lcore_id].nb_rx_processed / + stats[lcore_id].nb_iteration_looped) < MAX_PKT_BURST) { + /** + * scale down a step if average packet per iteration less + * than expectation. + */ + if (rte_power_freq_down) + rte_power_freq_down(lcore_id); + } + + /** + * initialize another timer according to current frequency to ensure + * timer interval is relatively fixed. + */ + hz = rte_get_timer_hz(); + rte_timer_reset(&power_timers[lcore_id], hz/TIMER_NUMBER_PER_SECOND, + SINGLE, lcore_id, power_timer_cb, NULL); + + stats[lcore_id].nb_rx_processed = 0; + stats[lcore_id].nb_iteration_looped = 0; + + stats[lcore_id].sleep_time = 0; +} + +/* Enqueue a single packet, and send burst if queue is filled */ +static inline int +send_single_packet(struct rte_mbuf *m, uint8_t port) +{ + uint32_t lcore_id; + struct lcore_conf *qconf; + + lcore_id = rte_lcore_id(); + qconf = &lcore_conf[lcore_id]; + + rte_eth_tx_buffer(port, qconf->tx_queue_id[port], + qconf->tx_buffer[port], m); + + return 0; +} + +#ifdef DO_RFC_1812_CHECKS +static inline int +is_valid_ipv4_pkt(struct ipv4_hdr *pkt, uint32_t link_len) +{ + /* From http://www.rfc-editor.org/rfc/rfc1812.txt section 5.2.2 */ + /* + * 1. The packet length reported by the Link Layer must be large + * enough to hold the minimum length legal IP datagram (20 bytes). + */ + if (link_len < sizeof(struct ipv4_hdr)) + return -1; + + /* 2. The IP checksum must be correct. */ + /* this is checked in H/W */ + + /* + * 3. The IP version number must be 4. If the version number is not 4 + * then the packet may be another version of IP, such as IPng or + * ST-II. + */ + if (((pkt->version_ihl) >> 4) != 4) + return -3; + /* + * 4. The IP header length field must be large enough to hold the + * minimum length legal IP datagram (20 bytes = 5 words). + */ + if ((pkt->version_ihl & 0xf) < 5) + return -4; + + /* + * 5. The IP total length field must be large enough to hold the IP + * datagram header, whose length is specified in the IP header length + * field. + */ + if (rte_cpu_to_be_16(pkt->total_length) < sizeof(struct ipv4_hdr)) + return -5; + + return 0; +} +#endif + +#if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) +static void +print_ipv4_key(struct ipv4_5tuple key) +{ + printf("IP dst = %08x, IP src = %08x, port dst = %d, port src = %d, " + "proto = %d\n", (unsigned)key.ip_dst, (unsigned)key.ip_src, + key.port_dst, key.port_src, key.proto); +} +static void +print_ipv6_key(struct ipv6_5tuple key) +{ + printf( "IP dst = " IPv6_BYTES_FMT ", IP src = " IPv6_BYTES_FMT ", " + "port dst = %d, port src = %d, proto = %d\n", + IPv6_BYTES(key.ip_dst), IPv6_BYTES(key.ip_src), + key.port_dst, key.port_src, key.proto); +} + +static inline uint8_t +get_ipv4_dst_port(struct ipv4_hdr *ipv4_hdr, uint8_t portid, + lookup_struct_t * ipv4_l3fwd_lookup_struct) +{ + struct ipv4_5tuple key; + struct tcp_hdr *tcp; + struct udp_hdr *udp; + int ret = 0; + + key.ip_dst = rte_be_to_cpu_32(ipv4_hdr->dst_addr); + key.ip_src = rte_be_to_cpu_32(ipv4_hdr->src_addr); + key.proto = ipv4_hdr->next_proto_id; + + switch (ipv4_hdr->next_proto_id) { + case IPPROTO_TCP: + tcp = (struct tcp_hdr *)((unsigned char *)ipv4_hdr + + sizeof(struct ipv4_hdr)); + key.port_dst = rte_be_to_cpu_16(tcp->dst_port); + key.port_src = rte_be_to_cpu_16(tcp->src_port); + break; + + case IPPROTO_UDP: + udp = (struct udp_hdr *)((unsigned char *)ipv4_hdr + + sizeof(struct ipv4_hdr)); + key.port_dst = rte_be_to_cpu_16(udp->dst_port); + key.port_src = rte_be_to_cpu_16(udp->src_port); + break; + + default: + key.port_dst = 0; + key.port_src = 0; + break; + } + + /* Find destination port */ + ret = rte_hash_lookup(ipv4_l3fwd_lookup_struct, (const void *)&key); + return (uint8_t)((ret < 0)? portid : ipv4_l3fwd_out_if[ret]); +} + +static inline uint8_t +get_ipv6_dst_port(struct ipv6_hdr *ipv6_hdr, uint8_t portid, + lookup_struct_t *ipv6_l3fwd_lookup_struct) +{ + struct ipv6_5tuple key; + struct tcp_hdr *tcp; + struct udp_hdr *udp; + int ret = 0; + + memcpy(key.ip_dst, ipv6_hdr->dst_addr, IPV6_ADDR_LEN); + memcpy(key.ip_src, ipv6_hdr->src_addr, IPV6_ADDR_LEN); + + key.proto = ipv6_hdr->proto; + + switch (ipv6_hdr->proto) { + case IPPROTO_TCP: + tcp = (struct tcp_hdr *)((unsigned char *) ipv6_hdr + + sizeof(struct ipv6_hdr)); + key.port_dst = rte_be_to_cpu_16(tcp->dst_port); + key.port_src = rte_be_to_cpu_16(tcp->src_port); + break; + + case IPPROTO_UDP: + udp = (struct udp_hdr *)((unsigned char *) ipv6_hdr + + sizeof(struct ipv6_hdr)); + key.port_dst = rte_be_to_cpu_16(udp->dst_port); + key.port_src = rte_be_to_cpu_16(udp->src_port); + break; + + default: + key.port_dst = 0; + key.port_src = 0; + break; + } + + /* Find destination port */ + ret = rte_hash_lookup(ipv6_l3fwd_lookup_struct, (const void *)&key); + return (uint8_t)((ret < 0)? portid : ipv6_l3fwd_out_if[ret]); +} +#endif + +#if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) +static inline uint8_t +get_ipv4_dst_port(struct ipv4_hdr *ipv4_hdr, uint8_t portid, + lookup_struct_t *ipv4_l3fwd_lookup_struct) +{ + uint32_t next_hop; + + return (uint8_t) ((rte_lpm_lookup(ipv4_l3fwd_lookup_struct, + rte_be_to_cpu_32(ipv4_hdr->dst_addr), &next_hop) == 0)? + next_hop : portid); +} +#endif + +static inline void +l3fwd_simple_forward(struct rte_mbuf *m, uint8_t portid, + struct lcore_conf *qconf) +{ + struct ether_hdr *eth_hdr; + struct ipv4_hdr *ipv4_hdr; + void *d_addr_bytes; + uint8_t dst_port; + + eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); + + if (RTE_ETH_IS_IPV4_HDR(m->packet_type)) { + /* Handle IPv4 headers.*/ + ipv4_hdr = + rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, + sizeof(struct ether_hdr)); + +#ifdef DO_RFC_1812_CHECKS + /* Check to make sure the packet is valid (RFC1812) */ + if (is_valid_ipv4_pkt(ipv4_hdr, m->pkt_len) < 0) { + rte_pktmbuf_free(m); + return; + } +#endif + + dst_port = get_ipv4_dst_port(ipv4_hdr, portid, + qconf->ipv4_lookup_struct); + if (dst_port >= RTE_MAX_ETHPORTS || + (enabled_port_mask & 1 << dst_port) == 0) + dst_port = portid; + + /* 02:00:00:00:00:xx */ + d_addr_bytes = ð_hdr->d_addr.addr_bytes[0]; + *((uint64_t *)d_addr_bytes) = + 0x000000000002 + ((uint64_t)dst_port << 40); + +#ifdef DO_RFC_1812_CHECKS + /* Update time to live and header checksum */ + --(ipv4_hdr->time_to_live); + ++(ipv4_hdr->hdr_checksum); +#endif + + /* src addr */ + ether_addr_copy(&ports_eth_addr[dst_port], ð_hdr->s_addr); + + send_single_packet(m, dst_port); + } else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) { + /* Handle IPv6 headers.*/ +#if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) + struct ipv6_hdr *ipv6_hdr; + + ipv6_hdr = + rte_pktmbuf_mtod_offset(m, struct ipv6_hdr *, + sizeof(struct ether_hdr)); + + dst_port = get_ipv6_dst_port(ipv6_hdr, portid, + qconf->ipv6_lookup_struct); + + if (dst_port >= RTE_MAX_ETHPORTS || + (enabled_port_mask & 1 << dst_port) == 0) + dst_port = portid; + + /* 02:00:00:00:00:xx */ + d_addr_bytes = ð_hdr->d_addr.addr_bytes[0]; + *((uint64_t *)d_addr_bytes) = + 0x000000000002 + ((uint64_t)dst_port << 40); + + /* src addr */ + ether_addr_copy(&ports_eth_addr[dst_port], ð_hdr->s_addr); + + send_single_packet(m, dst_port); +#else + /* We don't currently handle IPv6 packets in LPM mode. */ + rte_pktmbuf_free(m); +#endif + } else + rte_pktmbuf_free(m); + +} + +#define MINIMUM_SLEEP_TIME 1 +#define SUSPEND_THRESHOLD 300 + +static inline uint32_t +power_idle_heuristic(uint32_t zero_rx_packet_count) +{ + /* If zero count is less than 100, sleep 1us */ + if (zero_rx_packet_count < SUSPEND_THRESHOLD) + return MINIMUM_SLEEP_TIME; + /* If zero count is less than 1000, sleep 100 us which is the + minimum latency switching from C3/C6 to C0 + */ + else + return SUSPEND_THRESHOLD; + + return 0; +} + +static inline enum freq_scale_hint_t +power_freq_scaleup_heuristic(unsigned lcore_id, + uint8_t port_id, + uint16_t queue_id) +{ +/** + * HW Rx queue size is 128 by default, Rx burst read at maximum 32 entries + * per iteration + */ +#define FREQ_GEAR1_RX_PACKET_THRESHOLD MAX_PKT_BURST +#define FREQ_GEAR2_RX_PACKET_THRESHOLD (MAX_PKT_BURST*2) +#define FREQ_GEAR3_RX_PACKET_THRESHOLD (MAX_PKT_BURST*3) +#define FREQ_UP_TREND1_ACC 1 +#define FREQ_UP_TREND2_ACC 100 +#define FREQ_UP_THRESHOLD 10000 + + if (likely(rte_eth_rx_descriptor_done(port_id, queue_id, + FREQ_GEAR3_RX_PACKET_THRESHOLD) > 0)) { + stats[lcore_id].trend = 0; + return FREQ_HIGHEST; + } else if (likely(rte_eth_rx_descriptor_done(port_id, queue_id, + FREQ_GEAR2_RX_PACKET_THRESHOLD) > 0)) + stats[lcore_id].trend += FREQ_UP_TREND2_ACC; + else if (likely(rte_eth_rx_descriptor_done(port_id, queue_id, + FREQ_GEAR1_RX_PACKET_THRESHOLD) > 0)) + stats[lcore_id].trend += FREQ_UP_TREND1_ACC; + + if (likely(stats[lcore_id].trend > FREQ_UP_THRESHOLD)) { + stats[lcore_id].trend = 0; + return FREQ_HIGHER; + } + + return FREQ_CURRENT; +} + +/** + * force polling thread sleep until one-shot rx interrupt triggers + * @param port_id + * Port id. + * @param queue_id + * Rx queue id. + * @return + * 0 on success + */ +static int +sleep_until_rx_interrupt(int num) +{ + struct rte_epoll_event event[num]; + int n, i; + uint8_t port_id, queue_id; + void *data; + + RTE_LOG(INFO, L3FWD_POWER, + "lcore %u sleeps until interrupt triggers\n", + rte_lcore_id()); + + n = rte_epoll_wait(RTE_EPOLL_PER_THREAD, event, num, -1); + for (i = 0; i < n; i++) { + data = event[i].epdata.data; + port_id = ((uintptr_t)data) >> CHAR_BIT; + queue_id = ((uintptr_t)data) & + RTE_LEN2MASK(CHAR_BIT, uint8_t); + rte_eth_dev_rx_intr_disable(port_id, queue_id); + RTE_LOG(INFO, L3FWD_POWER, + "lcore %u is waked up from rx interrupt on" + " port %d queue %d\n", + rte_lcore_id(), port_id, queue_id); + } + + return 0; +} + +static void turn_on_intr(struct lcore_conf *qconf) +{ + int i; + struct lcore_rx_queue *rx_queue; + uint8_t port_id, queue_id; + + for (i = 0; i < qconf->n_rx_queue; ++i) { + rx_queue = &(qconf->rx_queue_list[i]); + port_id = rx_queue->port_id; + queue_id = rx_queue->queue_id; + + rte_spinlock_lock(&(locks[port_id])); + rte_eth_dev_rx_intr_enable(port_id, queue_id); + rte_spinlock_unlock(&(locks[port_id])); + } +} + +static int event_register(struct lcore_conf *qconf) +{ + struct lcore_rx_queue *rx_queue; + uint8_t portid, queueid; + uint32_t data; + int ret; + int i; + + for (i = 0; i < qconf->n_rx_queue; ++i) { + rx_queue = &(qconf->rx_queue_list[i]); + portid = rx_queue->port_id; + queueid = rx_queue->queue_id; + data = portid << CHAR_BIT | queueid; + + ret = rte_eth_dev_rx_intr_ctl_q(portid, queueid, + RTE_EPOLL_PER_THREAD, + RTE_INTR_EVENT_ADD, + (void *)((uintptr_t)data)); + if (ret) + return ret; + } + + return 0; +} + +/* main processing loop */ +static int +main_loop(__attribute__((unused)) void *dummy) +{ + struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; + unsigned lcore_id; + uint64_t prev_tsc, diff_tsc, cur_tsc; + uint64_t prev_tsc_power = 0, cur_tsc_power, diff_tsc_power; + int i, j, nb_rx; + uint8_t portid, queueid; + struct lcore_conf *qconf; + struct lcore_rx_queue *rx_queue; + enum freq_scale_hint_t lcore_scaleup_hint; + uint32_t lcore_rx_idle_count = 0; + uint32_t lcore_idle_hint = 0; + int intr_en = 0; + + const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US; + + prev_tsc = 0; + + lcore_id = rte_lcore_id(); + qconf = &lcore_conf[lcore_id]; + + if (qconf->n_rx_queue == 0) { + RTE_LOG(INFO, L3FWD_POWER, "lcore %u has nothing to do\n", lcore_id); + return 0; + } + + RTE_LOG(INFO, L3FWD_POWER, "entering main loop on lcore %u\n", lcore_id); + + for (i = 0; i < qconf->n_rx_queue; i++) { + portid = qconf->rx_queue_list[i].port_id; + queueid = qconf->rx_queue_list[i].queue_id; + RTE_LOG(INFO, L3FWD_POWER, " -- lcoreid=%u portid=%hhu " + "rxqueueid=%hhu\n", lcore_id, portid, queueid); + } + + /* add into event wait list */ + if (event_register(qconf) == 0) + intr_en = 1; + else + RTE_LOG(INFO, L3FWD_POWER, "RX interrupt won't enable.\n"); + + while (1) { + stats[lcore_id].nb_iteration_looped++; + + cur_tsc = rte_rdtsc(); + cur_tsc_power = cur_tsc; + + /* + * TX burst queue drain + */ + diff_tsc = cur_tsc - prev_tsc; + if (unlikely(diff_tsc > drain_tsc)) { + for (i = 0; i < qconf->n_tx_port; ++i) { + portid = qconf->tx_port_id[i]; + rte_eth_tx_buffer_flush(portid, + qconf->tx_queue_id[portid], + qconf->tx_buffer[portid]); + } + prev_tsc = cur_tsc; + } + + diff_tsc_power = cur_tsc_power - prev_tsc_power; + if (diff_tsc_power > TIMER_RESOLUTION_CYCLES) { + rte_timer_manage(); + prev_tsc_power = cur_tsc_power; + } + +start_rx: + /* + * Read packet from RX queues + */ + lcore_scaleup_hint = FREQ_CURRENT; + lcore_rx_idle_count = 0; + for (i = 0; i < qconf->n_rx_queue; ++i) { + rx_queue = &(qconf->rx_queue_list[i]); + rx_queue->idle_hint = 0; + portid = rx_queue->port_id; + queueid = rx_queue->queue_id; + + nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst, + MAX_PKT_BURST); + + stats[lcore_id].nb_rx_processed += nb_rx; + if (unlikely(nb_rx == 0)) { + /** + * no packet received from rx queue, try to + * sleep for a while forcing CPU enter deeper + * C states. + */ + rx_queue->zero_rx_packet_count++; + + if (rx_queue->zero_rx_packet_count <= + MIN_ZERO_POLL_COUNT) + continue; + + rx_queue->idle_hint = power_idle_heuristic(\ + rx_queue->zero_rx_packet_count); + lcore_rx_idle_count++; + } else { + rx_queue->zero_rx_packet_count = 0; + + /** + * do not scale up frequency immediately as + * user to kernel space communication is costly + * which might impact packet I/O for received + * packets. + */ + rx_queue->freq_up_hint = + power_freq_scaleup_heuristic(lcore_id, + portid, queueid); + } + + /* Prefetch first packets */ + for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) { + rte_prefetch0(rte_pktmbuf_mtod( + pkts_burst[j], void *)); + } + + /* Prefetch and forward already prefetched packets */ + for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) { + rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[ + j + PREFETCH_OFFSET], void *)); + l3fwd_simple_forward(pkts_burst[j], portid, + qconf); + } + + /* Forward remaining prefetched packets */ + for (; j < nb_rx; j++) { + l3fwd_simple_forward(pkts_burst[j], portid, + qconf); + } + } + + if (likely(lcore_rx_idle_count != qconf->n_rx_queue)) { + for (i = 1, lcore_scaleup_hint = + qconf->rx_queue_list[0].freq_up_hint; + i < qconf->n_rx_queue; ++i) { + rx_queue = &(qconf->rx_queue_list[i]); + if (rx_queue->freq_up_hint > + lcore_scaleup_hint) + lcore_scaleup_hint = + rx_queue->freq_up_hint; + } + + if (lcore_scaleup_hint == FREQ_HIGHEST) { + if (rte_power_freq_max) + rte_power_freq_max(lcore_id); + } else if (lcore_scaleup_hint == FREQ_HIGHER) { + if (rte_power_freq_up) + rte_power_freq_up(lcore_id); + } + } else { + /** + * All Rx queues empty in recent consecutive polls, + * sleep in a conservative manner, meaning sleep as + * less as possible. + */ + for (i = 1, lcore_idle_hint = + qconf->rx_queue_list[0].idle_hint; + i < qconf->n_rx_queue; ++i) { + rx_queue = &(qconf->rx_queue_list[i]); + if (rx_queue->idle_hint < lcore_idle_hint) + lcore_idle_hint = rx_queue->idle_hint; + } + + if (lcore_idle_hint < SUSPEND_THRESHOLD) + /** + * execute "pause" instruction to avoid context + * switch which generally take hundred of + * microseconds for short sleep. + */ + rte_delay_us(lcore_idle_hint); + else { + /* suspend until rx interrupt trigges */ + if (intr_en) { + turn_on_intr(qconf); + sleep_until_rx_interrupt( + qconf->n_rx_queue); + } + /* start receiving packets immediately */ + goto start_rx; + } + stats[lcore_id].sleep_time += lcore_idle_hint; + } + } +} + +static int +check_lcore_params(void) +{ + uint8_t queue, lcore; + uint16_t i; + int socketid; + + for (i = 0; i < nb_lcore_params; ++i) { + queue = lcore_params[i].queue_id; + if (queue >= MAX_RX_QUEUE_PER_PORT) { + printf("invalid queue number: %hhu\n", queue); + return -1; + } + lcore = lcore_params[i].lcore_id; + if (!rte_lcore_is_enabled(lcore)) { + printf("error: lcore %hhu is not enabled in lcore " + "mask\n", lcore); + return -1; + } + if ((socketid = rte_lcore_to_socket_id(lcore) != 0) && + (numa_on == 0)) { + printf("warning: lcore %hhu is on socket %d with numa " + "off\n", lcore, socketid); + } + } + return 0; +} + +static int +check_port_config(const unsigned nb_ports) +{ + unsigned portid; + uint16_t i; + + for (i = 0; i < nb_lcore_params; ++i) { + portid = lcore_params[i].port_id; + if ((enabled_port_mask & (1 << portid)) == 0) { + printf("port %u is not enabled in port mask\n", + portid); + return -1; + } + if (portid >= nb_ports) { + printf("port %u is not present on the board\n", + portid); + return -1; + } + } + return 0; +} + +static uint8_t +get_port_n_rx_queues(const uint8_t port) +{ + int queue = -1; + uint16_t i; + + for (i = 0; i < nb_lcore_params; ++i) { + if (lcore_params[i].port_id == port && + lcore_params[i].queue_id > queue) + queue = lcore_params[i].queue_id; + } + return (uint8_t)(++queue); +} + +static int +init_lcore_rx_queues(void) +{ + uint16_t i, nb_rx_queue; + uint8_t lcore; + + for (i = 0; i < nb_lcore_params; ++i) { + lcore = lcore_params[i].lcore_id; + nb_rx_queue = lcore_conf[lcore].n_rx_queue; + if (nb_rx_queue >= MAX_RX_QUEUE_PER_LCORE) { + printf("error: too many queues (%u) for lcore: %u\n", + (unsigned)nb_rx_queue + 1, (unsigned)lcore); + return -1; + } else { + lcore_conf[lcore].rx_queue_list[nb_rx_queue].port_id = + lcore_params[i].port_id; + lcore_conf[lcore].rx_queue_list[nb_rx_queue].queue_id = + lcore_params[i].queue_id; + lcore_conf[lcore].n_rx_queue++; + } + } + return 0; +} + +/* display usage */ +static void +print_usage(const char *prgname) +{ + printf ("%s [EAL options] -- -p PORTMASK -P" + " [--config (port,queue,lcore)[,(port,queue,lcore]]" + " [--enable-jumbo [--max-pkt-len PKTLEN]]\n" + " -p PORTMASK: hexadecimal bitmask of ports to configure\n" + " -P : enable promiscuous mode\n" + " --config (port,queue,lcore): rx queues configuration\n" + " --no-numa: optional, disable numa awareness\n" + " --enable-jumbo: enable jumbo frame" + " which max packet len is PKTLEN in decimal (64-9600)\n", + prgname); +} + +static int parse_max_pkt_len(const char *pktlen) +{ + char *end = NULL; + unsigned long len; + + /* parse decimal string */ + len = strtoul(pktlen, &end, 10); + if ((pktlen[0] == '\0') || (end == NULL) || (*end != '\0')) + return -1; + + if (len == 0) + return -1; + + return len; +} + +static int +parse_portmask(const char *portmask) +{ + char *end = NULL; + unsigned long pm; + + /* parse hexadecimal string */ + pm = strtoul(portmask, &end, 16); + if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0')) + return -1; + + if (pm == 0) + return -1; + + return pm; +} + +static int +parse_config(const char *q_arg) +{ + char s[256]; + const char *p, *p0 = q_arg; + char *end; + enum fieldnames { + FLD_PORT = 0, + FLD_QUEUE, + FLD_LCORE, + _NUM_FLD + }; + unsigned long int_fld[_NUM_FLD]; + char *str_fld[_NUM_FLD]; + int i; + unsigned size; + + nb_lcore_params = 0; + + while ((p = strchr(p0,'(')) != NULL) { + ++p; + if((p0 = strchr(p,')')) == NULL) + return -1; + + size = p0 - p; + if(size >= sizeof(s)) + return -1; + + snprintf(s, sizeof(s), "%.*s", size, p); + if (rte_strsplit(s, sizeof(s), str_fld, _NUM_FLD, ',') != + _NUM_FLD) + return -1; + for (i = 0; i < _NUM_FLD; i++){ + errno = 0; + int_fld[i] = strtoul(str_fld[i], &end, 0); + if (errno != 0 || end == str_fld[i] || int_fld[i] > + 255) + return -1; + } + if (nb_lcore_params >= MAX_LCORE_PARAMS) { + printf("exceeded max number of lcore params: %hu\n", + nb_lcore_params); + return -1; + } + lcore_params_array[nb_lcore_params].port_id = + (uint8_t)int_fld[FLD_PORT]; + lcore_params_array[nb_lcore_params].queue_id = + (uint8_t)int_fld[FLD_QUEUE]; + lcore_params_array[nb_lcore_params].lcore_id = + (uint8_t)int_fld[FLD_LCORE]; + ++nb_lcore_params; + } + lcore_params = lcore_params_array; + + return 0; +} + +/* Parse the argument given in the command line of the application */ +static int +parse_args(int argc, char **argv) +{ + int opt, ret; + char **argvopt; + int option_index; + char *prgname = argv[0]; + static struct option lgopts[] = { + {"config", 1, 0, 0}, + {"no-numa", 0, 0, 0}, + {"enable-jumbo", 0, 0, 0}, + {NULL, 0, 0, 0} + }; + + argvopt = argv; + + while ((opt = getopt_long(argc, argvopt, "p:P", + lgopts, &option_index)) != EOF) { + + switch (opt) { + /* portmask */ + case 'p': + enabled_port_mask = parse_portmask(optarg); + if (enabled_port_mask == 0) { + printf("invalid portmask\n"); + print_usage(prgname); + return -1; + } + break; + case 'P': + printf("Promiscuous mode selected\n"); + promiscuous_on = 1; + break; + + /* long options */ + case 0: + if (!strncmp(lgopts[option_index].name, "config", 6)) { + ret = parse_config(optarg); + if (ret) { + printf("invalid config\n"); + print_usage(prgname); + return -1; + } + } + + if (!strncmp(lgopts[option_index].name, + "no-numa", 7)) { + printf("numa is disabled \n"); + numa_on = 0; + } + + if (!strncmp(lgopts[option_index].name, + "enable-jumbo", 12)) { + struct option lenopts = + {"max-pkt-len", required_argument, \ + 0, 0}; + + printf("jumbo frame is enabled \n"); + port_conf.rxmode.jumbo_frame = 1; + + /** + * if no max-pkt-len set, use the default value + * ETHER_MAX_LEN + */ + if (0 == getopt_long(argc, argvopt, "", + &lenopts, &option_index)) { + ret = parse_max_pkt_len(optarg); + if ((ret < 64) || + (ret > MAX_JUMBO_PKT_LEN)){ + printf("invalid packet " + "length\n"); + print_usage(prgname); + return -1; + } + port_conf.rxmode.max_rx_pkt_len = ret; + } + printf("set jumbo frame " + "max packet length to %u\n", + (unsigned int)port_conf.rxmode.max_rx_pkt_len); + } + + break; + + default: + print_usage(prgname); + return -1; + } + } + + if (optind >= 0) + argv[optind-1] = prgname; + + ret = optind-1; + optind = 0; /* reset getopt lib */ + return ret; +} + +static void +print_ethaddr(const char *name, const struct ether_addr *eth_addr) +{ + char buf[ETHER_ADDR_FMT_SIZE]; + ether_format_addr(buf, ETHER_ADDR_FMT_SIZE, eth_addr); + printf("%s%s", name, buf); +} + +#if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) +static void +setup_hash(int socketid) +{ + struct rte_hash_parameters ipv4_l3fwd_hash_params = { + .name = NULL, + .entries = L3FWD_HASH_ENTRIES, + .key_len = sizeof(struct ipv4_5tuple), + .hash_func = DEFAULT_HASH_FUNC, + .hash_func_init_val = 0, + }; + + struct rte_hash_parameters ipv6_l3fwd_hash_params = { + .name = NULL, + .entries = L3FWD_HASH_ENTRIES, + .key_len = sizeof(struct ipv6_5tuple), + .hash_func = DEFAULT_HASH_FUNC, + .hash_func_init_val = 0, + }; + + unsigned i; + int ret; + char s[64]; + + /* create ipv4 hash */ + snprintf(s, sizeof(s), "ipv4_l3fwd_hash_%d", socketid); + ipv4_l3fwd_hash_params.name = s; + ipv4_l3fwd_hash_params.socket_id = socketid; + ipv4_l3fwd_lookup_struct[socketid] = + rte_hash_create(&ipv4_l3fwd_hash_params); + if (ipv4_l3fwd_lookup_struct[socketid] == NULL) + rte_exit(EXIT_FAILURE, "Unable to create the l3fwd hash on " + "socket %d\n", socketid); + + /* create ipv6 hash */ + snprintf(s, sizeof(s), "ipv6_l3fwd_hash_%d", socketid); + ipv6_l3fwd_hash_params.name = s; + ipv6_l3fwd_hash_params.socket_id = socketid; + ipv6_l3fwd_lookup_struct[socketid] = + rte_hash_create(&ipv6_l3fwd_hash_params); + if (ipv6_l3fwd_lookup_struct[socketid] == NULL) + rte_exit(EXIT_FAILURE, "Unable to create the l3fwd hash on " + "socket %d\n", socketid); + + + /* populate the ipv4 hash */ + for (i = 0; i < IPV4_L3FWD_NUM_ROUTES; i++) { + ret = rte_hash_add_key (ipv4_l3fwd_lookup_struct[socketid], + (void *) &ipv4_l3fwd_route_array[i].key); + if (ret < 0) { + rte_exit(EXIT_FAILURE, "Unable to add entry %u to the" + "l3fwd hash on socket %d\n", i, socketid); + } + ipv4_l3fwd_out_if[ret] = ipv4_l3fwd_route_array[i].if_out; + printf("Hash: Adding key\n"); + print_ipv4_key(ipv4_l3fwd_route_array[i].key); + } + + /* populate the ipv6 hash */ + for (i = 0; i < IPV6_L3FWD_NUM_ROUTES; i++) { + ret = rte_hash_add_key (ipv6_l3fwd_lookup_struct[socketid], + (void *) &ipv6_l3fwd_route_array[i].key); + if (ret < 0) { + rte_exit(EXIT_FAILURE, "Unable to add entry %u to the" + "l3fwd hash on socket %d\n", i, socketid); + } + ipv6_l3fwd_out_if[ret] = ipv6_l3fwd_route_array[i].if_out; + printf("Hash: Adding key\n"); + print_ipv6_key(ipv6_l3fwd_route_array[i].key); + } +} +#endif + +#if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) +static void +setup_lpm(int socketid) +{ + unsigned i; + int ret; + char s[64]; + + /* create the LPM table */ + struct rte_lpm_config lpm_ipv4_config; + + lpm_ipv4_config.max_rules = IPV4_L3FWD_LPM_MAX_RULES; + lpm_ipv4_config.number_tbl8s = 256; + lpm_ipv4_config.flags = 0; + + snprintf(s, sizeof(s), "IPV4_L3FWD_LPM_%d", socketid); + ipv4_l3fwd_lookup_struct[socketid] = + rte_lpm_create(s, socketid, &lpm_ipv4_config); + if (ipv4_l3fwd_lookup_struct[socketid] == NULL) + rte_exit(EXIT_FAILURE, "Unable to create the l3fwd LPM table" + " on socket %d\n", socketid); + + /* populate the LPM table */ + for (i = 0; i < IPV4_L3FWD_NUM_ROUTES; i++) { + ret = rte_lpm_add(ipv4_l3fwd_lookup_struct[socketid], + ipv4_l3fwd_route_array[i].ip, + ipv4_l3fwd_route_array[i].depth, + ipv4_l3fwd_route_array[i].if_out); + + if (ret < 0) { + rte_exit(EXIT_FAILURE, "Unable to add entry %u to the " + "l3fwd LPM table on socket %d\n", + i, socketid); + } + + printf("LPM: Adding route 0x%08x / %d (%d)\n", + (unsigned)ipv4_l3fwd_route_array[i].ip, + ipv4_l3fwd_route_array[i].depth, + ipv4_l3fwd_route_array[i].if_out); + } +} +#endif + +static int +init_mem(unsigned nb_mbuf) +{ + struct lcore_conf *qconf; + int socketid; + unsigned lcore_id; + char s[64]; + + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { + if (rte_lcore_is_enabled(lcore_id) == 0) + continue; + + if (numa_on) + socketid = rte_lcore_to_socket_id(lcore_id); + else + socketid = 0; + + if (socketid >= NB_SOCKETS) { + rte_exit(EXIT_FAILURE, "Socket %d of lcore %u is " + "out of range %d\n", socketid, + lcore_id, NB_SOCKETS); + } + if (pktmbuf_pool[socketid] == NULL) { + snprintf(s, sizeof(s), "mbuf_pool_%d", socketid); + pktmbuf_pool[socketid] = + rte_pktmbuf_pool_create(s, nb_mbuf, + MEMPOOL_CACHE_SIZE, 0, + RTE_MBUF_DEFAULT_BUF_SIZE, + socketid); + if (pktmbuf_pool[socketid] == NULL) + rte_exit(EXIT_FAILURE, + "Cannot init mbuf pool on socket %d\n", + socketid); + else + printf("Allocated mbuf pool on socket %d\n", + socketid); + +#if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) + setup_lpm(socketid); +#else + setup_hash(socketid); +#endif + } + qconf = &lcore_conf[lcore_id]; + qconf->ipv4_lookup_struct = ipv4_l3fwd_lookup_struct[socketid]; +#if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) + qconf->ipv6_lookup_struct = ipv6_l3fwd_lookup_struct[socketid]; +#endif + } + return 0; +} + +/* Check the link status of all ports in up to 9s, and print them finally */ +static void +check_all_ports_link_status(uint8_t port_num, uint32_t port_mask) +{ +#define CHECK_INTERVAL 100 /* 100ms */ +#define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */ + uint8_t portid, count, all_ports_up, print_flag = 0; + struct rte_eth_link link; + + printf("\nChecking link status"); + fflush(stdout); + for (count = 0; count <= MAX_CHECK_TIME; count++) { + all_ports_up = 1; + for (portid = 0; portid < port_num; portid++) { + if ((port_mask & (1 << portid)) == 0) + continue; + memset(&link, 0, sizeof(link)); + rte_eth_link_get_nowait(portid, &link); + /* print link status if flag set */ + if (print_flag == 1) { + if (link.link_status) + printf("Port %d Link Up - speed %u " + "Mbps - %s\n", (uint8_t)portid, + (unsigned)link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? + ("full-duplex") : ("half-duplex\n")); + else + printf("Port %d Link Down\n", + (uint8_t)portid); + continue; + } + /* clear all_ports_up flag if any link down */ + if (link.link_status == ETH_LINK_DOWN) { + all_ports_up = 0; + break; + } + } + /* after finally printing all link status, get out */ + if (print_flag == 1) + break; + + if (all_ports_up == 0) { + printf("."); + fflush(stdout); + rte_delay_ms(CHECK_INTERVAL); + } + + /* set the print_flag if all ports up or timeout */ + if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) { + print_flag = 1; + printf("done\n"); + } + } +} + +int +main(int argc, char **argv) +{ + struct lcore_conf *qconf; + struct rte_eth_dev_info dev_info; + struct rte_eth_txconf *txconf; + int ret; + unsigned nb_ports; + uint16_t queueid; + unsigned lcore_id; + uint64_t hz; + uint32_t n_tx_queue, nb_lcores; + uint32_t dev_rxq_num, dev_txq_num; + uint8_t portid, nb_rx_queue, queue, socketid; + + /* catch SIGINT and restore cpufreq governor to ondemand */ + signal(SIGINT, signal_exit_now); + + /* init EAL */ + ret = rte_eal_init(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Invalid EAL parameters\n"); + argc -= ret; + argv += ret; + + /* init RTE timer library to be used late */ + rte_timer_subsystem_init(); + + /* parse application arguments (after the EAL ones) */ + ret = parse_args(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Invalid L3FWD parameters\n"); + + if (check_lcore_params() < 0) + rte_exit(EXIT_FAILURE, "check_lcore_params failed\n"); + + ret = init_lcore_rx_queues(); + if (ret < 0) + rte_exit(EXIT_FAILURE, "init_lcore_rx_queues failed\n"); + + + nb_ports = rte_eth_dev_count(); + if (nb_ports > RTE_MAX_ETHPORTS) + nb_ports = RTE_MAX_ETHPORTS; + + if (check_port_config(nb_ports) < 0) + rte_exit(EXIT_FAILURE, "check_port_config failed\n"); + + nb_lcores = rte_lcore_count(); + + /* initialize all ports */ + for (portid = 0; portid < nb_ports; portid++) { + /* skip ports that are not enabled */ + if ((enabled_port_mask & (1 << portid)) == 0) { + printf("\nSkipping disabled port %d\n", portid); + continue; + } + + /* init port */ + printf("Initializing port %d ... ", portid ); + fflush(stdout); + + rte_eth_dev_info_get(portid, &dev_info); + dev_rxq_num = dev_info.max_rx_queues; + dev_txq_num = dev_info.max_tx_queues; + + nb_rx_queue = get_port_n_rx_queues(portid); + if (nb_rx_queue > dev_rxq_num) + rte_exit(EXIT_FAILURE, + "Cannot configure not existed rxq: " + "port=%d\n", portid); + + n_tx_queue = nb_lcores; + if (n_tx_queue > dev_txq_num) + n_tx_queue = dev_txq_num; + printf("Creating queues: nb_rxq=%d nb_txq=%u... ", + nb_rx_queue, (unsigned)n_tx_queue ); + ret = rte_eth_dev_configure(portid, nb_rx_queue, + (uint16_t)n_tx_queue, &port_conf); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Cannot configure device: " + "err=%d, port=%d\n", ret, portid); + + rte_eth_macaddr_get(portid, &ports_eth_addr[portid]); + print_ethaddr(" Address:", &ports_eth_addr[portid]); + printf(", "); + + /* init memory */ + ret = init_mem(NB_MBUF); + if (ret < 0) + rte_exit(EXIT_FAILURE, "init_mem failed\n"); + + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { + if (rte_lcore_is_enabled(lcore_id) == 0) + continue; + + /* Initialize TX buffers */ + qconf = &lcore_conf[lcore_id]; + qconf->tx_buffer[portid] = rte_zmalloc_socket("tx_buffer", + RTE_ETH_TX_BUFFER_SIZE(MAX_PKT_BURST), 0, + rte_eth_dev_socket_id(portid)); + if (qconf->tx_buffer[portid] == NULL) + rte_exit(EXIT_FAILURE, "Can't allocate tx buffer for port %u\n", + (unsigned) portid); + + rte_eth_tx_buffer_init(qconf->tx_buffer[portid], MAX_PKT_BURST); + } + + /* init one TX queue per couple (lcore,port) */ + queueid = 0; + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { + if (rte_lcore_is_enabled(lcore_id) == 0) + continue; + + if (queueid >= dev_txq_num) + continue; + + if (numa_on) + socketid = \ + (uint8_t)rte_lcore_to_socket_id(lcore_id); + else + socketid = 0; + + printf("txq=%u,%d,%d ", lcore_id, queueid, socketid); + fflush(stdout); + + rte_eth_dev_info_get(portid, &dev_info); + txconf = &dev_info.default_txconf; + if (port_conf.rxmode.jumbo_frame) + txconf->txq_flags = 0; + ret = rte_eth_tx_queue_setup(portid, queueid, nb_txd, + socketid, txconf); + if (ret < 0) + rte_exit(EXIT_FAILURE, + "rte_eth_tx_queue_setup: err=%d, " + "port=%d\n", ret, portid); + + qconf = &lcore_conf[lcore_id]; + qconf->tx_queue_id[portid] = queueid; + queueid++; + + qconf->tx_port_id[qconf->n_tx_port] = portid; + qconf->n_tx_port++; + } + printf("\n"); + } + + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { + if (rte_lcore_is_enabled(lcore_id) == 0) + continue; + + /* init power management library */ + ret = rte_power_init(lcore_id); + if (ret) + RTE_LOG(ERR, POWER, + "Library initialization failed on core %u\n", lcore_id); + + /* init timer structures for each enabled lcore */ + rte_timer_init(&power_timers[lcore_id]); + hz = rte_get_timer_hz(); + rte_timer_reset(&power_timers[lcore_id], + hz/TIMER_NUMBER_PER_SECOND, SINGLE, lcore_id, + power_timer_cb, NULL); + + qconf = &lcore_conf[lcore_id]; + printf("\nInitializing rx queues on lcore %u ... ", lcore_id ); + fflush(stdout); + /* init RX queues */ + for(queue = 0; queue < qconf->n_rx_queue; ++queue) { + portid = qconf->rx_queue_list[queue].port_id; + queueid = qconf->rx_queue_list[queue].queue_id; + + if (numa_on) + socketid = \ + (uint8_t)rte_lcore_to_socket_id(lcore_id); + else + socketid = 0; + + printf("rxq=%d,%d,%d ", portid, queueid, socketid); + fflush(stdout); + + ret = rte_eth_rx_queue_setup(portid, queueid, nb_rxd, + socketid, NULL, + pktmbuf_pool[socketid]); + if (ret < 0) + rte_exit(EXIT_FAILURE, + "rte_eth_rx_queue_setup: err=%d, " + "port=%d\n", ret, portid); + } + } + + printf("\n"); + + /* start ports */ + for (portid = 0; portid < nb_ports; portid++) { + if ((enabled_port_mask & (1 << portid)) == 0) { + continue; + } + /* Start device */ + ret = rte_eth_dev_start(portid); + if (ret < 0) + rte_exit(EXIT_FAILURE, "rte_eth_dev_start: err=%d, " + "port=%d\n", ret, portid); + /* + * If enabled, put device in promiscuous mode. + * This allows IO forwarding mode to forward packets + * to itself through 2 cross-connected ports of the + * target machine. + */ + if (promiscuous_on) + rte_eth_promiscuous_enable(portid); + /* initialize spinlock for each port */ + rte_spinlock_init(&(locks[portid])); + } + + check_all_ports_link_status((uint8_t)nb_ports, enabled_port_mask); + + /* launch per-lcore init on every lcore */ + rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER); + RTE_LCORE_FOREACH_SLAVE(lcore_id) { + if (rte_eal_wait_lcore(lcore_id) < 0) + return -1; + } + + return 0; +} diff --git a/examples/l3fwd-vf/Makefile b/examples/l3fwd-vf/Makefile new file mode 100644 index 00000000..d97611cf --- /dev/null +++ b/examples/l3fwd-vf/Makefile @@ -0,0 +1,56 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overriden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +# binary name +APP = l3fwd-vf + +# all source are stored in SRCS-y +SRCS-y := main.c + +CFLAGS += -O3 $(USER_FLAGS) +CFLAGS += $(WERROR_FLAGS) + +# workaround for a gcc bug with noreturn attribute +# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603 +ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y) +CFLAGS_main.o += -Wno-return-type +endif + +include $(RTE_SDK)/mk/rte.extapp.mk diff --git a/examples/l3fwd-vf/main.c b/examples/l3fwd-vf/main.c new file mode 100644 index 00000000..034c22a7 --- /dev/null +++ b/examples/l3fwd-vf/main.c @@ -0,0 +1,1097 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <inttypes.h> +#include <sys/types.h> +#include <string.h> +#include <sys/queue.h> +#include <stdarg.h> +#include <errno.h> +#include <getopt.h> +#include <signal.h> + +#include <rte_common.h> +#include <rte_byteorder.h> +#include <rte_log.h> +#include <rte_memory.h> +#include <rte_memcpy.h> +#include <rte_memzone.h> +#include <rte_eal.h> +#include <rte_per_lcore.h> +#include <rte_launch.h> +#include <rte_atomic.h> +#include <rte_spinlock.h> +#include <rte_cycles.h> +#include <rte_prefetch.h> +#include <rte_lcore.h> +#include <rte_per_lcore.h> +#include <rte_branch_prediction.h> +#include <rte_interrupts.h> +#include <rte_pci.h> +#include <rte_random.h> +#include <rte_debug.h> +#include <rte_ether.h> +#include <rte_ethdev.h> +#include <rte_ring.h> +#include <rte_mempool.h> +#include <rte_mbuf.h> +#include <rte_ip.h> +#include <rte_tcp.h> +#include <rte_udp.h> +#include <rte_string_fns.h> + +#define APP_LOOKUP_EXACT_MATCH 0 +#define APP_LOOKUP_LPM 1 +#define DO_RFC_1812_CHECKS + +//#define APP_LOOKUP_METHOD APP_LOOKUP_EXACT_MATCH +#ifndef APP_LOOKUP_METHOD +#define APP_LOOKUP_METHOD APP_LOOKUP_LPM +#endif + +#if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) +#include <rte_hash.h> +#elif (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) +#include <rte_lpm.h> +#else +#error "APP_LOOKUP_METHOD set to incorrect value" +#endif + +#define RTE_LOGTYPE_L3FWD RTE_LOGTYPE_USER1 + +#define MEMPOOL_CACHE_SIZE 256 + +/* + * This expression is used to calculate the number of mbufs needed depending on user input, taking + * into account memory for rx and tx hardware rings, cache per lcore and mtable per port per lcore. + * RTE_MAX is used to ensure that NB_MBUF never goes below a minimum value of 8192 + */ + +#define NB_MBUF RTE_MAX ( \ + (nb_ports*nb_rx_queue*RTE_TEST_RX_DESC_DEFAULT + \ + nb_ports*nb_lcores*MAX_PKT_BURST + \ + nb_ports*n_tx_queue*RTE_TEST_TX_DESC_DEFAULT + \ + nb_lcores*MEMPOOL_CACHE_SIZE), \ + (unsigned)8192) + +/* + * RX and TX Prefetch, Host, and Write-back threshold values should be + * carefully set for optimal performance. Consult the network + * controller's datasheet and supporting DPDK documentation for guidance + * on how these parameters should be set. + */ +#define RX_PTHRESH 8 /**< Default values of RX prefetch threshold reg. */ +#define RX_HTHRESH 8 /**< Default values of RX host threshold reg. */ +#define RX_WTHRESH 4 /**< Default values of RX write-back threshold reg. */ + +/* + * These default values are optimized for use with the Intel(R) 82599 10 GbE + * Controller and the DPDK ixgbe PMD. Consider using other values for other + * network controllers and/or network drivers. + */ +#define TX_PTHRESH 36 /**< Default values of TX prefetch threshold reg. */ +#define TX_HTHRESH 0 /**< Default values of TX host threshold reg. */ +#define TX_WTHRESH 0 /**< Default values of TX write-back threshold reg. */ + +#define MAX_PKT_BURST 32 +#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */ + +#define NB_SOCKETS 8 + +#define SOCKET0 0 + +/* Configure how many packets ahead to prefetch, when reading packets */ +#define PREFETCH_OFFSET 3 + +/* + * Configurable number of RX/TX ring descriptors + */ +#define RTE_TEST_RX_DESC_DEFAULT 128 +#define RTE_TEST_TX_DESC_DEFAULT 512 +static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; +static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; + +/* ethernet addresses of ports */ +static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; + +/* mask of enabled ports */ +static uint32_t enabled_port_mask = 0; +static int numa_on = 1; /**< NUMA is enabled by default. */ + +struct mbuf_table { + uint16_t len; + struct rte_mbuf *m_table[MAX_PKT_BURST]; +}; + +struct lcore_rx_queue { + uint8_t port_id; + uint8_t queue_id; +} __rte_cache_aligned; + +#define MAX_RX_QUEUE_PER_LCORE 16 +#define MAX_TX_QUEUE_PER_PORT 1 +#define MAX_RX_QUEUE_PER_PORT 1 + +#define MAX_LCORE_PARAMS 1024 +struct lcore_params { + uint8_t port_id; + uint8_t queue_id; + uint8_t lcore_id; +} __rte_cache_aligned; + +static struct lcore_params lcore_params_array[MAX_LCORE_PARAMS]; +static struct lcore_params lcore_params_array_default[] = { + {0, 0, 2}, + {0, 1, 2}, + {0, 2, 2}, + {1, 0, 2}, + {1, 1, 2}, + {1, 2, 2}, + {2, 0, 2}, + {3, 0, 3}, + {3, 1, 3}, +}; + +static struct lcore_params * lcore_params = lcore_params_array_default; +static uint16_t nb_lcore_params = sizeof(lcore_params_array_default) / + sizeof(lcore_params_array_default[0]); + +static struct rte_eth_conf port_conf = { + .rxmode = { + .mq_mode = ETH_MQ_RX_RSS, + .max_rx_pkt_len = ETHER_MAX_LEN, + .split_hdr_size = 0, + .header_split = 0, /**< Header Split disabled */ + .hw_ip_checksum = 1, /**< IP checksum offload enabled */ + .hw_vlan_filter = 0, /**< VLAN filtering disabled */ + .jumbo_frame = 0, /**< Jumbo Frame Support disabled */ + .hw_strip_crc = 0, /**< CRC stripped by hardware */ + }, + .rx_adv_conf = { + .rss_conf = { + .rss_key = NULL, + .rss_hf = ETH_RSS_IP, + }, + }, + .txmode = { + .mq_mode = ETH_MQ_TX_NONE, + }, +}; + +static struct rte_mempool * pktmbuf_pool[NB_SOCKETS]; + + +#if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) + +#ifdef RTE_MACHINE_CPUFLAG_SSE4_2 +#include <rte_hash_crc.h> +#define DEFAULT_HASH_FUNC rte_hash_crc +#else +#include <rte_jhash.h> +#define DEFAULT_HASH_FUNC rte_jhash +#endif + +struct ipv4_5tuple { + uint32_t ip_dst; + uint32_t ip_src; + uint16_t port_dst; + uint16_t port_src; + uint8_t proto; +} __attribute__((__packed__)); + +struct l3fwd_route { + struct ipv4_5tuple key; + uint8_t if_out; +}; + +static struct l3fwd_route l3fwd_route_array[] = { + {{IPv4(100,10,0,1), IPv4(200,10,0,1), 101, 11, IPPROTO_TCP}, 0}, + {{IPv4(100,20,0,2), IPv4(200,20,0,2), 102, 12, IPPROTO_TCP}, 1}, + {{IPv4(100,30,0,3), IPv4(200,30,0,3), 103, 13, IPPROTO_TCP}, 2}, + {{IPv4(100,40,0,4), IPv4(200,40,0,4), 104, 14, IPPROTO_TCP}, 3}, +}; + +typedef struct rte_hash lookup_struct_t; +static lookup_struct_t *l3fwd_lookup_struct[NB_SOCKETS]; + +#define L3FWD_HASH_ENTRIES 1024 +struct rte_hash_parameters l3fwd_hash_params = { + .name = "l3fwd_hash_0", + .entries = L3FWD_HASH_ENTRIES, + .key_len = sizeof(struct ipv4_5tuple), + .hash_func = DEFAULT_HASH_FUNC, + .hash_func_init_val = 0, + .socket_id = SOCKET0, +}; + +#define L3FWD_NUM_ROUTES \ + (sizeof(l3fwd_route_array) / sizeof(l3fwd_route_array[0])) + +static uint8_t l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned; +#endif + +#if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) +struct l3fwd_route { + uint32_t ip; + uint8_t depth; + uint8_t if_out; +}; + +static struct l3fwd_route l3fwd_route_array[] = { + {IPv4(1,1,1,0), 24, 0}, + {IPv4(2,1,1,0), 24, 1}, + {IPv4(3,1,1,0), 24, 2}, + {IPv4(4,1,1,0), 24, 3}, + {IPv4(5,1,1,0), 24, 4}, + {IPv4(6,1,1,0), 24, 5}, + {IPv4(7,1,1,0), 24, 6}, + {IPv4(8,1,1,0), 24, 7}, +}; + +#define L3FWD_NUM_ROUTES \ + (sizeof(l3fwd_route_array) / sizeof(l3fwd_route_array[0])) + +#define L3FWD_LPM_MAX_RULES 1024 + +typedef struct rte_lpm lookup_struct_t; +static lookup_struct_t *l3fwd_lookup_struct[NB_SOCKETS]; +#endif + +struct lcore_conf { + uint16_t n_rx_queue; + struct lcore_rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE]; + uint16_t tx_queue_id; + struct mbuf_table tx_mbufs[RTE_MAX_ETHPORTS]; + lookup_struct_t * lookup_struct; +} __rte_cache_aligned; + +static struct lcore_conf lcore_conf[RTE_MAX_LCORE]; +static rte_spinlock_t spinlock_conf[RTE_MAX_ETHPORTS] = {RTE_SPINLOCK_INITIALIZER}; +/* Send burst of packets on an output interface */ +static inline int +send_burst(struct lcore_conf *qconf, uint16_t n, uint8_t port) +{ + struct rte_mbuf **m_table; + int ret; + uint16_t queueid; + + queueid = qconf->tx_queue_id; + m_table = (struct rte_mbuf **)qconf->tx_mbufs[port].m_table; + + rte_spinlock_lock(&spinlock_conf[port]); + ret = rte_eth_tx_burst(port, queueid, m_table, n); + rte_spinlock_unlock(&spinlock_conf[port]); + + if (unlikely(ret < n)) { + do { + rte_pktmbuf_free(m_table[ret]); + } while (++ret < n); + } + + return 0; +} + +/* Enqueue a single packet, and send burst if queue is filled */ +static inline int +send_single_packet(struct rte_mbuf *m, uint8_t port) +{ + uint32_t lcore_id; + uint16_t len; + struct lcore_conf *qconf; + + lcore_id = rte_lcore_id(); + + qconf = &lcore_conf[lcore_id]; + len = qconf->tx_mbufs[port].len; + qconf->tx_mbufs[port].m_table[len] = m; + len++; + + /* enough pkts to be sent */ + if (unlikely(len == MAX_PKT_BURST)) { + send_burst(qconf, MAX_PKT_BURST, port); + len = 0; + } + + qconf->tx_mbufs[port].len = len; + return 0; +} + +#ifdef DO_RFC_1812_CHECKS +static inline int +is_valid_ipv4_pkt(struct ipv4_hdr *pkt, uint32_t link_len) +{ + /* From http://www.rfc-editor.org/rfc/rfc1812.txt section 5.2.2 */ + /* + * 1. The packet length reported by the Link Layer must be large + * enough to hold the minimum length legal IP datagram (20 bytes). + */ + if (link_len < sizeof(struct ipv4_hdr)) + return -1; + + /* 2. The IP checksum must be correct. */ + /* this is checked in H/W */ + + /* + * 3. The IP version number must be 4. If the version number is not 4 + * then the packet may be another version of IP, such as IPng or + * ST-II. + */ + if (((pkt->version_ihl) >> 4) != 4) + return -3; + /* + * 4. The IP header length field must be large enough to hold the + * minimum length legal IP datagram (20 bytes = 5 words). + */ + if ((pkt->version_ihl & 0xf) < 5) + return -4; + + /* + * 5. The IP total length field must be large enough to hold the IP + * datagram header, whose length is specified in the IP header length + * field. + */ + if (rte_cpu_to_be_16(pkt->total_length) < sizeof(struct ipv4_hdr)) + return -5; + + return 0; +} +#endif + +#if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) +static void +print_key(struct ipv4_5tuple key) +{ + printf("IP dst = %08x, IP src = %08x, port dst = %d, port src = %d, proto = %d\n", + (unsigned)key.ip_dst, (unsigned)key.ip_src, key.port_dst, key.port_src, key.proto); +} + +static inline uint8_t +get_dst_port(struct ipv4_hdr *ipv4_hdr, uint8_t portid, lookup_struct_t * l3fwd_lookup_struct) +{ + struct ipv4_5tuple key; + struct tcp_hdr *tcp; + struct udp_hdr *udp; + int ret = 0; + + key.ip_dst = rte_be_to_cpu_32(ipv4_hdr->dst_addr); + key.ip_src = rte_be_to_cpu_32(ipv4_hdr->src_addr); + key.proto = ipv4_hdr->next_proto_id; + + switch (ipv4_hdr->next_proto_id) { + case IPPROTO_TCP: + tcp = (struct tcp_hdr *)((unsigned char *) ipv4_hdr + + sizeof(struct ipv4_hdr)); + key.port_dst = rte_be_to_cpu_16(tcp->dst_port); + key.port_src = rte_be_to_cpu_16(tcp->src_port); + break; + + case IPPROTO_UDP: + udp = (struct udp_hdr *)((unsigned char *) ipv4_hdr + + sizeof(struct ipv4_hdr)); + key.port_dst = rte_be_to_cpu_16(udp->dst_port); + key.port_src = rte_be_to_cpu_16(udp->src_port); + break; + + default: + key.port_dst = 0; + key.port_src = 0; + } + + /* Find destination port */ + ret = rte_hash_lookup(l3fwd_lookup_struct, (const void *)&key); + return (uint8_t)((ret < 0)? portid : l3fwd_out_if[ret]); +} +#endif + +#if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) +static inline uint8_t +get_dst_port(struct ipv4_hdr *ipv4_hdr, uint8_t portid, lookup_struct_t * l3fwd_lookup_struct) +{ + uint32_t next_hop; + + return (uint8_t) ((rte_lpm_lookup(l3fwd_lookup_struct, + rte_be_to_cpu_32(ipv4_hdr->dst_addr), &next_hop) == 0)? + next_hop : portid); +} +#endif + +static inline void +l3fwd_simple_forward(struct rte_mbuf *m, uint8_t portid, lookup_struct_t * l3fwd_lookup_struct) +{ + struct ether_hdr *eth_hdr; + struct ipv4_hdr *ipv4_hdr; + void *tmp; + uint8_t dst_port; + + eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); + + ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, + sizeof(struct ether_hdr)); + +#ifdef DO_RFC_1812_CHECKS + /* Check to make sure the packet is valid (RFC1812) */ + if (is_valid_ipv4_pkt(ipv4_hdr, m->pkt_len) < 0) { + rte_pktmbuf_free(m); + return; + } +#endif + + dst_port = get_dst_port(ipv4_hdr, portid, l3fwd_lookup_struct); + if (dst_port >= RTE_MAX_ETHPORTS || (enabled_port_mask & 1 << dst_port) == 0) + dst_port = portid; + + /* 02:00:00:00:00:xx */ + tmp = ð_hdr->d_addr.addr_bytes[0]; + *((uint64_t *)tmp) = 0x000000000002 + ((uint64_t)dst_port << 40); + +#ifdef DO_RFC_1812_CHECKS + /* Update time to live and header checksum */ + --(ipv4_hdr->time_to_live); + ++(ipv4_hdr->hdr_checksum); +#endif + + /* src addr */ + ether_addr_copy(&ports_eth_addr[dst_port], ð_hdr->s_addr); + + send_single_packet(m, dst_port); + +} + +/* main processing loop */ +static int +main_loop(__attribute__((unused)) void *dummy) +{ + struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; + unsigned lcore_id; + uint64_t prev_tsc, diff_tsc, cur_tsc; + int i, j, nb_rx; + uint8_t portid, queueid; + struct lcore_conf *qconf; + const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US; + + prev_tsc = 0; + + lcore_id = rte_lcore_id(); + qconf = &lcore_conf[lcore_id]; + + if (qconf->n_rx_queue == 0) { + RTE_LOG(INFO, L3FWD, "lcore %u has nothing to do\n", lcore_id); + return 0; + } + + RTE_LOG(INFO, L3FWD, "entering main loop on lcore %u\n", lcore_id); + + for (i = 0; i < qconf->n_rx_queue; i++) { + + portid = qconf->rx_queue_list[i].port_id; + queueid = qconf->rx_queue_list[i].queue_id; + RTE_LOG(INFO, L3FWD, " -- lcoreid=%u portid=%hhu rxqueueid=%hhu\n", lcore_id, + portid, queueid); + } + + while (1) { + + cur_tsc = rte_rdtsc(); + + /* + * TX burst queue drain + */ + diff_tsc = cur_tsc - prev_tsc; + if (unlikely(diff_tsc > drain_tsc)) { + + /* + * This could be optimized (use queueid instead of + * portid), but it is not called so often + */ + for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) { + if (qconf->tx_mbufs[portid].len == 0) + continue; + send_burst(&lcore_conf[lcore_id], + qconf->tx_mbufs[portid].len, + portid); + qconf->tx_mbufs[portid].len = 0; + } + + prev_tsc = cur_tsc; + } + + /* + * Read packet from RX queues + */ + for (i = 0; i < qconf->n_rx_queue; ++i) { + + portid = qconf->rx_queue_list[i].port_id; + queueid = qconf->rx_queue_list[i].queue_id; + nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst, MAX_PKT_BURST); + + /* Prefetch first packets */ + for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) { + rte_prefetch0(rte_pktmbuf_mtod( + pkts_burst[j], void *)); + } + + /* Prefetch and forward already prefetched packets */ + for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) { + rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[ + j + PREFETCH_OFFSET], void *)); + l3fwd_simple_forward(pkts_burst[j], portid, qconf->lookup_struct); + } + + /* Forward remaining prefetched packets */ + for (; j < nb_rx; j++) { + l3fwd_simple_forward(pkts_burst[j], portid, qconf->lookup_struct); + } + } + } +} + +static int +check_lcore_params(void) +{ + uint8_t queue, lcore; + uint16_t i; + int socketid; + + for (i = 0; i < nb_lcore_params; ++i) { + queue = lcore_params[i].queue_id; + if (queue >= MAX_RX_QUEUE_PER_PORT) { + printf("invalid queue number: %hhu\n", queue); + return -1; + } + lcore = lcore_params[i].lcore_id; + if (!rte_lcore_is_enabled(lcore)) { + printf("error: lcore %hhu is not enabled in lcore mask\n", lcore); + return -1; + } + if ((socketid = rte_lcore_to_socket_id(lcore) != 0) && + (numa_on == 0)) { + printf("warning: lcore %hhu is on socket %d with numa off \n", + lcore, socketid); + } + } + return 0; +} + +static int +check_port_config(const unsigned nb_ports) +{ + unsigned portid; + uint16_t i; + + for (i = 0; i < nb_lcore_params; ++i) { + portid = lcore_params[i].port_id; + if ((enabled_port_mask & (1 << portid)) == 0) { + printf("port %u is not enabled in port mask\n", portid); + return -1; + } + if (portid >= nb_ports) { + printf("port %u is not present on the board\n", portid); + return -1; + } + } + return 0; +} + +static uint8_t +get_port_n_rx_queues(const uint8_t port) +{ + int queue = -1; + uint16_t i; + + for (i = 0; i < nb_lcore_params; ++i) { + if (lcore_params[i].port_id == port && lcore_params[i].queue_id > queue) + queue = lcore_params[i].queue_id; + } + return (uint8_t)(++queue); +} + +static int +init_lcore_rx_queues(void) +{ + uint16_t i, nb_rx_queue; + uint8_t lcore; + + for (i = 0; i < nb_lcore_params; ++i) { + lcore = lcore_params[i].lcore_id; + nb_rx_queue = lcore_conf[lcore].n_rx_queue; + if (nb_rx_queue >= MAX_RX_QUEUE_PER_LCORE) { + printf("error: too many queues (%u) for lcore: %u\n", + (unsigned)nb_rx_queue + 1, (unsigned)lcore); + return -1; + } else { + lcore_conf[lcore].rx_queue_list[nb_rx_queue].port_id = + lcore_params[i].port_id; + lcore_conf[lcore].rx_queue_list[nb_rx_queue].queue_id = + lcore_params[i].queue_id; + lcore_conf[lcore].n_rx_queue++; + } + } + return 0; +} + +/* display usage */ +static void +print_usage(const char *prgname) +{ + printf ("%s [EAL options] -- -p PORTMASK" + " [--config (port,queue,lcore)[,(port,queue,lcore]]\n" + " -p PORTMASK: hexadecimal bitmask of ports to configure\n" + " --config (port,queue,lcore): rx queues configuration\n" + " --no-numa: optional, disable numa awareness\n", + prgname); +} + +/* Custom handling of signals to handle process terminal */ +static void +signal_handler(int signum) +{ + uint8_t portid; + uint8_t nb_ports = rte_eth_dev_count(); + + /* When we receive a SIGINT signal */ + if (signum == SIGINT) { + for (portid = 0; portid < nb_ports; portid++) { + /* skip ports that are not enabled */ + if ((enabled_port_mask & (1 << portid)) == 0) + continue; + rte_eth_dev_close(portid); + } + } + rte_exit(EXIT_SUCCESS, "\n User forced exit\n"); +} +static int +parse_portmask(const char *portmask) +{ + char *end = NULL; + unsigned long pm; + + /* parse hexadecimal string */ + pm = strtoul(portmask, &end, 16); + if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0')) + return -1; + + if (pm == 0) + return -1; + + return pm; +} + +static int +parse_config(const char *q_arg) +{ + char s[256]; + const char *p, *p0 = q_arg; + char *end; + enum fieldnames { + FLD_PORT = 0, + FLD_QUEUE, + FLD_LCORE, + _NUM_FLD + }; + unsigned long int_fld[_NUM_FLD]; + char *str_fld[_NUM_FLD]; + int i; + unsigned size; + + nb_lcore_params = 0; + + while ((p = strchr(p0,'(')) != NULL) { + ++p; + if((p0 = strchr(p,')')) == NULL) + return -1; + + size = p0 - p; + if(size >= sizeof(s)) + return -1; + + snprintf(s, sizeof(s), "%.*s", size, p); + if (rte_strsplit(s, sizeof(s), str_fld, _NUM_FLD, ',') != _NUM_FLD) + return -1; + for (i = 0; i < _NUM_FLD; i++){ + errno = 0; + int_fld[i] = strtoul(str_fld[i], &end, 0); + if (errno != 0 || end == str_fld[i] || int_fld[i] > 255) + return -1; + } + if (nb_lcore_params >= MAX_LCORE_PARAMS) { + printf("exceeded max number of lcore params: %hu\n", + nb_lcore_params); + return -1; + } + lcore_params_array[nb_lcore_params].port_id = (uint8_t)int_fld[FLD_PORT]; + lcore_params_array[nb_lcore_params].queue_id = (uint8_t)int_fld[FLD_QUEUE]; + lcore_params_array[nb_lcore_params].lcore_id = (uint8_t)int_fld[FLD_LCORE]; + ++nb_lcore_params; + } + lcore_params = lcore_params_array; + return 0; +} + +/* Parse the argument given in the command line of the application */ +static int +parse_args(int argc, char **argv) +{ + int opt, ret; + char **argvopt; + int option_index; + char *prgname = argv[0]; + static struct option lgopts[] = { + {"config", 1, 0, 0}, + {"no-numa", 0, 0, 0}, + {NULL, 0, 0, 0} + }; + + argvopt = argv; + + while ((opt = getopt_long(argc, argvopt, "p:", + lgopts, &option_index)) != EOF) { + + switch (opt) { + /* portmask */ + case 'p': + enabled_port_mask = parse_portmask(optarg); + if (enabled_port_mask == 0) { + printf("invalid portmask\n"); + print_usage(prgname); + return -1; + } + break; + + /* long options */ + case 0: + if (!strcmp(lgopts[option_index].name, "config")) { + ret = parse_config(optarg); + if (ret) { + printf("invalid config\n"); + print_usage(prgname); + return -1; + } + } + + if (!strcmp(lgopts[option_index].name, "no-numa")) { + printf("numa is disabled \n"); + numa_on = 0; + } + break; + + default: + print_usage(prgname); + return -1; + } + } + + if (optind >= 0) + argv[optind-1] = prgname; + + ret = optind-1; + optind = 0; /* reset getopt lib */ + return ret; +} + +static void +print_ethaddr(const char *name, const struct ether_addr *eth_addr) +{ + char buf[ETHER_ADDR_FMT_SIZE]; + ether_format_addr(buf, ETHER_ADDR_FMT_SIZE, eth_addr); + printf("%s%s", name, buf); +} + +#if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) +static void +setup_hash(int socketid) +{ + unsigned i; + int ret; + char s[64]; + + /* create hashes */ + snprintf(s, sizeof(s), "l3fwd_hash_%d", socketid); + l3fwd_hash_params.name = s; + l3fwd_hash_params.socket_id = socketid; + l3fwd_lookup_struct[socketid] = rte_hash_create(&l3fwd_hash_params); + if (l3fwd_lookup_struct[socketid] == NULL) + rte_exit(EXIT_FAILURE, "Unable to create the l3fwd hash on " + "socket %d\n", socketid); + + /* populate the hash */ + for (i = 0; i < L3FWD_NUM_ROUTES; i++) { + ret = rte_hash_add_key (l3fwd_lookup_struct[socketid], + (void *) &l3fwd_route_array[i].key); + if (ret < 0) { + rte_exit(EXIT_FAILURE, "Unable to add entry %u to the" + "l3fwd hash on socket %d\n", i, socketid); + } + l3fwd_out_if[ret] = l3fwd_route_array[i].if_out; + printf("Hash: Adding key\n"); + print_key(l3fwd_route_array[i].key); + } +} +#endif + +#if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) +static void +setup_lpm(int socketid) +{ + unsigned i; + int ret; + char s[64]; + + struct rte_lpm_config lpm_ipv4_config; + + lpm_ipv4_config.max_rules = L3FWD_LPM_MAX_RULES; + lpm_ipv4_config.number_tbl8s = 256; + lpm_ipv4_config.flags = 0; + + /* create the LPM table */ + snprintf(s, sizeof(s), "L3FWD_LPM_%d", socketid); + l3fwd_lookup_struct[socketid] = + rte_lpm_create(s, socketid, &lpm_ipv4_config); + if (l3fwd_lookup_struct[socketid] == NULL) + rte_exit(EXIT_FAILURE, "Unable to create the l3fwd LPM table" + " on socket %d\n", socketid); + + /* populate the LPM table */ + for (i = 0; i < L3FWD_NUM_ROUTES; i++) { + ret = rte_lpm_add(l3fwd_lookup_struct[socketid], + l3fwd_route_array[i].ip, + l3fwd_route_array[i].depth, + l3fwd_route_array[i].if_out); + + if (ret < 0) { + rte_exit(EXIT_FAILURE, "Unable to add entry %u to the " + "l3fwd LPM table on socket %d\n", + i, socketid); + } + + printf("LPM: Adding route 0x%08x / %d (%d)\n", + (unsigned)l3fwd_route_array[i].ip, + l3fwd_route_array[i].depth, + l3fwd_route_array[i].if_out); + } +} +#endif + +static int +init_mem(unsigned nb_mbuf) +{ + struct lcore_conf *qconf; + int socketid; + unsigned lcore_id; + char s[64]; + + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { + if (rte_lcore_is_enabled(lcore_id) == 0) + continue; + + if (numa_on) + socketid = rte_lcore_to_socket_id(lcore_id); + else + socketid = 0; + + if (socketid >= NB_SOCKETS) { + rte_exit(EXIT_FAILURE, "Socket %d of lcore %u is out of range %d\n", + socketid, lcore_id, NB_SOCKETS); + } + if (pktmbuf_pool[socketid] == NULL) { + snprintf(s, sizeof(s), "mbuf_pool_%d", socketid); + pktmbuf_pool[socketid] = rte_pktmbuf_pool_create(s, + nb_mbuf, MEMPOOL_CACHE_SIZE, 0, + RTE_MBUF_DEFAULT_BUF_SIZE, socketid); + if (pktmbuf_pool[socketid] == NULL) + rte_exit(EXIT_FAILURE, "Cannot init mbuf pool on socket %d\n", socketid); + else + printf("Allocated mbuf pool on socket %d\n", socketid); + +#if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) + setup_lpm(socketid); +#else + setup_hash(socketid); +#endif + } + qconf = &lcore_conf[lcore_id]; + qconf->lookup_struct = l3fwd_lookup_struct[socketid]; + } + return 0; +} + +int +main(int argc, char **argv) +{ + struct lcore_conf *qconf; + struct rte_eth_dev_info dev_info; + struct rte_eth_txconf *txconf; + int ret; + unsigned nb_ports; + uint16_t queueid; + unsigned lcore_id; + uint32_t nb_lcores; + uint16_t n_tx_queue; + uint8_t portid, nb_rx_queue, queue, socketid; + + signal(SIGINT, signal_handler); + /* init EAL */ + ret = rte_eal_init(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Invalid EAL parameters\n"); + argc -= ret; + argv += ret; + + /* parse application arguments (after the EAL ones) */ + ret = parse_args(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Invalid L3FWD-VF parameters\n"); + + if (check_lcore_params() < 0) + rte_exit(EXIT_FAILURE, "check_lcore_params failed\n"); + + ret = init_lcore_rx_queues(); + if (ret < 0) + rte_exit(EXIT_FAILURE, "init_lcore_rx_queues failed\n"); + + nb_ports = rte_eth_dev_count(); + if (nb_ports > RTE_MAX_ETHPORTS) + nb_ports = RTE_MAX_ETHPORTS; + + if (check_port_config(nb_ports) < 0) + rte_exit(EXIT_FAILURE, "check_port_config failed\n"); + + nb_lcores = rte_lcore_count(); + + /* initialize all ports */ + for (portid = 0; portid < nb_ports; portid++) { + /* skip ports that are not enabled */ + if ((enabled_port_mask & (1 << portid)) == 0) { + printf("\nSkipping disabled port %d\n", portid); + continue; + } + + /* init port */ + printf("Initializing port %d ... ", portid ); + fflush(stdout); + + /* must always equal(=1) */ + nb_rx_queue = get_port_n_rx_queues(portid); + n_tx_queue = MAX_TX_QUEUE_PER_PORT; + + printf("Creating queues: nb_rxq=%d nb_txq=%u... ", + nb_rx_queue, (unsigned)1 ); + ret = rte_eth_dev_configure(portid, nb_rx_queue, n_tx_queue, &port_conf); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Cannot configure device: err=%d, port=%d\n", + ret, portid); + + rte_eth_macaddr_get(portid, &ports_eth_addr[portid]); + print_ethaddr(" Address:", &ports_eth_addr[portid]); + printf(", "); + + ret = init_mem(NB_MBUF); + if (ret < 0) + rte_exit(EXIT_FAILURE, "init_mem failed\n"); + + /* init one TX queue */ + socketid = (uint8_t)rte_lcore_to_socket_id(rte_get_master_lcore()); + + printf("txq=%d,%d,%d ", portid, 0, socketid); + fflush(stdout); + + rte_eth_dev_info_get(portid, &dev_info); + txconf = &dev_info.default_txconf; + if (port_conf.rxmode.jumbo_frame) + txconf->txq_flags = 0; + ret = rte_eth_tx_queue_setup(portid, 0, nb_txd, + socketid, txconf); + if (ret < 0) + rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup: err=%d, " + "port=%d\n", ret, portid); + + printf("\n"); + } + + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { + if (rte_lcore_is_enabled(lcore_id) == 0) + continue; + qconf = &lcore_conf[lcore_id]; + qconf->tx_queue_id = 0; + + printf("\nInitializing rx queues on lcore %u ... ", lcore_id ); + fflush(stdout); + /* init RX queues */ + for(queue = 0; queue < qconf->n_rx_queue; ++queue) { + portid = qconf->rx_queue_list[queue].port_id; + queueid = qconf->rx_queue_list[queue].queue_id; + + if (numa_on) + socketid = (uint8_t)rte_lcore_to_socket_id(lcore_id); + else + socketid = 0; + + printf("rxq=%d,%d,%d ", portid, queueid, socketid); + fflush(stdout); + + ret = rte_eth_rx_queue_setup(portid, queueid, nb_rxd, + socketid, NULL, + pktmbuf_pool[socketid]); + if (ret < 0) + rte_exit(EXIT_FAILURE, "rte_eth_rx_queue_setup: err=%d," + "port=%d\n", ret, portid); + } + } + printf("\n"); + + /* start ports */ + for (portid = 0; portid < nb_ports; portid++) { + if ((enabled_port_mask & (1 << portid)) == 0) { + continue; + } + /* Start device */ + ret = rte_eth_dev_start(portid); + if (ret < 0) + rte_exit(EXIT_FAILURE, "rte_eth_dev_start: err=%d, port=%d\n", + ret, portid); + + printf("done: Port %d\n", portid); + + } + + /* launch per-lcore init on every lcore */ + rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER); + RTE_LCORE_FOREACH_SLAVE(lcore_id) { + if (rte_eal_wait_lcore(lcore_id) < 0) + return -1; + } + + return 0; +} diff --git a/examples/l3fwd/Makefile b/examples/l3fwd/Makefile new file mode 100644 index 00000000..5ce0ce05 --- /dev/null +++ b/examples/l3fwd/Makefile @@ -0,0 +1,51 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2016 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overriden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +# binary name +APP = l3fwd + +# all source are stored in SRCS-y +SRCS-y := main.c l3fwd_lpm.c l3fwd_em.c + +CFLAGS += -I$(SRCDIR) +CFLAGS += -O3 $(USER_FLAGS) +CFLAGS += $(WERROR_FLAGS) + +include $(RTE_SDK)/mk/rte.extapp.mk diff --git a/examples/l3fwd/l3fwd.h b/examples/l3fwd/l3fwd.h new file mode 100644 index 00000000..d8798b7d --- /dev/null +++ b/examples/l3fwd/l3fwd.h @@ -0,0 +1,241 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __L3_FWD_H__ +#define __L3_FWD_H__ + +#include <rte_vect.h> + +#define DO_RFC_1812_CHECKS + +#define RTE_LOGTYPE_L3FWD RTE_LOGTYPE_USER1 + +#if !defined(NO_HASH_MULTI_LOOKUP) && defined(RTE_MACHINE_CPUFLAG_NEON) +#define NO_HASH_MULTI_LOOKUP 1 +#endif + +#define MAX_PKT_BURST 32 +#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */ + +#define MAX_RX_QUEUE_PER_LCORE 16 + +/* + * Try to avoid TX buffering if we have at least MAX_TX_BURST packets to send. + */ +#define MAX_TX_BURST (MAX_PKT_BURST / 2) + +#define NB_SOCKETS 8 + +/* Configure how many packets ahead to prefetch, when reading packets */ +#define PREFETCH_OFFSET 3 + +/* Used to mark destination port as 'invalid'. */ +#define BAD_PORT ((uint16_t)-1) + +#define FWDSTEP 4 + +/* replace first 12B of the ethernet header. */ +#define MASK_ETH 0x3f + +/* Hash parameters. */ +#ifdef RTE_ARCH_X86_64 +/* default to 4 million hash entries (approx) */ +#define L3FWD_HASH_ENTRIES (1024*1024*4) +#else +/* 32-bit has less address-space for hugepage memory, limit to 1M entries */ +#define L3FWD_HASH_ENTRIES (1024*1024*1) +#endif +#define HASH_ENTRY_NUMBER_DEFAULT 4 + +struct mbuf_table { + uint16_t len; + struct rte_mbuf *m_table[MAX_PKT_BURST]; +}; + +struct lcore_rx_queue { + uint8_t port_id; + uint8_t queue_id; +} __rte_cache_aligned; + +struct lcore_conf { + uint16_t n_rx_queue; + struct lcore_rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE]; + uint16_t n_tx_port; + uint16_t tx_port_id[RTE_MAX_ETHPORTS]; + uint16_t tx_queue_id[RTE_MAX_ETHPORTS]; + struct mbuf_table tx_mbufs[RTE_MAX_ETHPORTS]; + void *ipv4_lookup_struct; + void *ipv6_lookup_struct; +} __rte_cache_aligned; + +extern volatile bool force_quit; + +/* ethernet addresses of ports */ +extern uint64_t dest_eth_addr[RTE_MAX_ETHPORTS]; +extern struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; + +/* mask of enabled ports */ +extern uint32_t enabled_port_mask; + +/* Used only in exact match mode. */ +extern int ipv6; /**< ipv6 is false by default. */ +extern uint32_t hash_entry_number; + +extern xmm_t val_eth[RTE_MAX_ETHPORTS]; + +extern struct lcore_conf lcore_conf[RTE_MAX_LCORE]; + +/* Send burst of packets on an output interface */ +static inline int +send_burst(struct lcore_conf *qconf, uint16_t n, uint8_t port) +{ + struct rte_mbuf **m_table; + int ret; + uint16_t queueid; + + queueid = qconf->tx_queue_id[port]; + m_table = (struct rte_mbuf **)qconf->tx_mbufs[port].m_table; + + ret = rte_eth_tx_burst(port, queueid, m_table, n); + if (unlikely(ret < n)) { + do { + rte_pktmbuf_free(m_table[ret]); + } while (++ret < n); + } + + return 0; +} + +/* Enqueue a single packet, and send burst if queue is filled */ +static inline int +send_single_packet(struct lcore_conf *qconf, + struct rte_mbuf *m, uint8_t port) +{ + uint16_t len; + + len = qconf->tx_mbufs[port].len; + qconf->tx_mbufs[port].m_table[len] = m; + len++; + + /* enough pkts to be sent */ + if (unlikely(len == MAX_PKT_BURST)) { + send_burst(qconf, MAX_PKT_BURST, port); + len = 0; + } + + qconf->tx_mbufs[port].len = len; + return 0; +} + +#ifdef DO_RFC_1812_CHECKS +static inline int +is_valid_ipv4_pkt(struct ipv4_hdr *pkt, uint32_t link_len) +{ + /* From http://www.rfc-editor.org/rfc/rfc1812.txt section 5.2.2 */ + /* + * 1. The packet length reported by the Link Layer must be large + * enough to hold the minimum length legal IP datagram (20 bytes). + */ + if (link_len < sizeof(struct ipv4_hdr)) + return -1; + + /* 2. The IP checksum must be correct. */ + /* this is checked in H/W */ + + /* + * 3. The IP version number must be 4. If the version number is not 4 + * then the packet may be another version of IP, such as IPng or + * ST-II. + */ + if (((pkt->version_ihl) >> 4) != 4) + return -3; + /* + * 4. The IP header length field must be large enough to hold the + * minimum length legal IP datagram (20 bytes = 5 words). + */ + if ((pkt->version_ihl & 0xf) < 5) + return -4; + + /* + * 5. The IP total length field must be large enough to hold the IP + * datagram header, whose length is specified in the IP header length + * field. + */ + if (rte_cpu_to_be_16(pkt->total_length) < sizeof(struct ipv4_hdr)) + return -5; + + return 0; +} +#endif /* DO_RFC_1812_CHECKS */ + +/* Function pointers for LPM or EM functionality. */ +void +setup_lpm(const int socketid); + +void +setup_hash(const int socketid); + +int +em_check_ptype(int portid); + +int +lpm_check_ptype(int portid); + +uint16_t +em_cb_parse_ptype(uint8_t port, uint16_t queue, struct rte_mbuf *pkts[], + uint16_t nb_pkts, uint16_t max_pkts, void *user_param); + +uint16_t +lpm_cb_parse_ptype(uint8_t port, uint16_t queue, struct rte_mbuf *pkts[], + uint16_t nb_pkts, uint16_t max_pkts, void *user_param); + +int +em_main_loop(__attribute__((unused)) void *dummy); + +int +lpm_main_loop(__attribute__((unused)) void *dummy); + +/* Return ipv4/ipv6 fwd lookup struct for LPM or EM. */ +void * +em_get_ipv4_l3fwd_lookup_struct(const int socketid); + +void * +em_get_ipv6_l3fwd_lookup_struct(const int socketid); + +void * +lpm_get_ipv4_l3fwd_lookup_struct(const int socketid); + +void * +lpm_get_ipv6_l3fwd_lookup_struct(const int socketid); + +#endif /* __L3_FWD_H__ */ diff --git a/examples/l3fwd/l3fwd_em.c b/examples/l3fwd/l3fwd_em.c new file mode 100644 index 00000000..fc59243d --- /dev/null +++ b/examples/l3fwd/l3fwd_em.c @@ -0,0 +1,801 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <inttypes.h> +#include <sys/types.h> +#include <string.h> +#include <sys/queue.h> +#include <stdarg.h> +#include <errno.h> +#include <getopt.h> +#include <stdbool.h> +#include <netinet/in.h> + +#include <rte_debug.h> +#include <rte_ether.h> +#include <rte_ethdev.h> +#include <rte_ring.h> +#include <rte_mempool.h> +#include <rte_cycles.h> +#include <rte_mbuf.h> +#include <rte_ip.h> +#include <rte_tcp.h> +#include <rte_udp.h> +#include <rte_hash.h> + +#include "l3fwd.h" + +#ifdef RTE_MACHINE_CPUFLAG_SSE4_2 +#include <rte_hash_crc.h> +#define DEFAULT_HASH_FUNC rte_hash_crc +#else +#include <rte_jhash.h> +#define DEFAULT_HASH_FUNC rte_jhash +#endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */ + +#define IPV6_ADDR_LEN 16 + +struct ipv4_5tuple { + uint32_t ip_dst; + uint32_t ip_src; + uint16_t port_dst; + uint16_t port_src; + uint8_t proto; +} __attribute__((__packed__)); + +union ipv4_5tuple_host { + struct { + uint8_t pad0; + uint8_t proto; + uint16_t pad1; + uint32_t ip_src; + uint32_t ip_dst; + uint16_t port_src; + uint16_t port_dst; + }; + xmm_t xmm; +}; + +#define XMM_NUM_IN_IPV6_5TUPLE 3 + +struct ipv6_5tuple { + uint8_t ip_dst[IPV6_ADDR_LEN]; + uint8_t ip_src[IPV6_ADDR_LEN]; + uint16_t port_dst; + uint16_t port_src; + uint8_t proto; +} __attribute__((__packed__)); + +union ipv6_5tuple_host { + struct { + uint16_t pad0; + uint8_t proto; + uint8_t pad1; + uint8_t ip_src[IPV6_ADDR_LEN]; + uint8_t ip_dst[IPV6_ADDR_LEN]; + uint16_t port_src; + uint16_t port_dst; + uint64_t reserve; + }; + xmm_t xmm[XMM_NUM_IN_IPV6_5TUPLE]; +}; + + + +struct ipv4_l3fwd_em_route { + struct ipv4_5tuple key; + uint8_t if_out; +}; + +struct ipv6_l3fwd_em_route { + struct ipv6_5tuple key; + uint8_t if_out; +}; + +static struct ipv4_l3fwd_em_route ipv4_l3fwd_em_route_array[] = { + {{IPv4(101, 0, 0, 0), IPv4(100, 10, 0, 1), 101, 11, IPPROTO_TCP}, 0}, + {{IPv4(201, 0, 0, 0), IPv4(200, 20, 0, 1), 102, 12, IPPROTO_TCP}, 1}, + {{IPv4(111, 0, 0, 0), IPv4(100, 30, 0, 1), 101, 11, IPPROTO_TCP}, 2}, + {{IPv4(211, 0, 0, 0), IPv4(200, 40, 0, 1), 102, 12, IPPROTO_TCP}, 3}, +}; + +static struct ipv6_l3fwd_em_route ipv6_l3fwd_em_route_array[] = { + {{ + {0xfe, 0x80, 0, 0, 0, 0, 0, 0, 0x02, 0x1e, 0x67, 0xff, 0xfe, 0, 0, 0}, + {0xfe, 0x80, 0, 0, 0, 0, 0, 0, 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38, 0x05}, + 101, 11, IPPROTO_TCP}, 0}, + + {{ + {0xfe, 0x90, 0, 0, 0, 0, 0, 0, 0x02, 0x1e, 0x67, 0xff, 0xfe, 0, 0, 0}, + {0xfe, 0x90, 0, 0, 0, 0, 0, 0, 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38, 0x05}, + 102, 12, IPPROTO_TCP}, 1}, + + {{ + {0xfe, 0xa0, 0, 0, 0, 0, 0, 0, 0x02, 0x1e, 0x67, 0xff, 0xfe, 0, 0, 0}, + {0xfe, 0xa0, 0, 0, 0, 0, 0, 0, 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38, 0x05}, + 101, 11, IPPROTO_TCP}, 2}, + + {{ + {0xfe, 0xb0, 0, 0, 0, 0, 0, 0, 0x02, 0x1e, 0x67, 0xff, 0xfe, 0, 0, 0}, + {0xfe, 0xb0, 0, 0, 0, 0, 0, 0, 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38, 0x05}, + 102, 12, IPPROTO_TCP}, 3}, +}; + +struct rte_hash *ipv4_l3fwd_em_lookup_struct[NB_SOCKETS]; +struct rte_hash *ipv6_l3fwd_em_lookup_struct[NB_SOCKETS]; + +static inline uint32_t +ipv4_hash_crc(const void *data, __rte_unused uint32_t data_len, + uint32_t init_val) +{ + const union ipv4_5tuple_host *k; + uint32_t t; + const uint32_t *p; + + k = data; + t = k->proto; + p = (const uint32_t *)&k->port_src; + +#ifdef RTE_MACHINE_CPUFLAG_SSE4_2 + init_val = rte_hash_crc_4byte(t, init_val); + init_val = rte_hash_crc_4byte(k->ip_src, init_val); + init_val = rte_hash_crc_4byte(k->ip_dst, init_val); + init_val = rte_hash_crc_4byte(*p, init_val); +#else /* RTE_MACHINE_CPUFLAG_SSE4_2 */ + init_val = rte_jhash_1word(t, init_val); + init_val = rte_jhash_1word(k->ip_src, init_val); + init_val = rte_jhash_1word(k->ip_dst, init_val); + init_val = rte_jhash_1word(*p, init_val); +#endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */ + + return init_val; +} + +static inline uint32_t +ipv6_hash_crc(const void *data, __rte_unused uint32_t data_len, + uint32_t init_val) +{ + const union ipv6_5tuple_host *k; + uint32_t t; + const uint32_t *p; +#ifdef RTE_MACHINE_CPUFLAG_SSE4_2 + const uint32_t *ip_src0, *ip_src1, *ip_src2, *ip_src3; + const uint32_t *ip_dst0, *ip_dst1, *ip_dst2, *ip_dst3; +#endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */ + + k = data; + t = k->proto; + p = (const uint32_t *)&k->port_src; + +#ifdef RTE_MACHINE_CPUFLAG_SSE4_2 + ip_src0 = (const uint32_t *) k->ip_src; + ip_src1 = (const uint32_t *)(k->ip_src+4); + ip_src2 = (const uint32_t *)(k->ip_src+8); + ip_src3 = (const uint32_t *)(k->ip_src+12); + ip_dst0 = (const uint32_t *) k->ip_dst; + ip_dst1 = (const uint32_t *)(k->ip_dst+4); + ip_dst2 = (const uint32_t *)(k->ip_dst+8); + ip_dst3 = (const uint32_t *)(k->ip_dst+12); + init_val = rte_hash_crc_4byte(t, init_val); + init_val = rte_hash_crc_4byte(*ip_src0, init_val); + init_val = rte_hash_crc_4byte(*ip_src1, init_val); + init_val = rte_hash_crc_4byte(*ip_src2, init_val); + init_val = rte_hash_crc_4byte(*ip_src3, init_val); + init_val = rte_hash_crc_4byte(*ip_dst0, init_val); + init_val = rte_hash_crc_4byte(*ip_dst1, init_val); + init_val = rte_hash_crc_4byte(*ip_dst2, init_val); + init_val = rte_hash_crc_4byte(*ip_dst3, init_val); + init_val = rte_hash_crc_4byte(*p, init_val); +#else /* RTE_MACHINE_CPUFLAG_SSE4_2 */ + init_val = rte_jhash_1word(t, init_val); + init_val = rte_jhash(k->ip_src, + sizeof(uint8_t) * IPV6_ADDR_LEN, init_val); + init_val = rte_jhash(k->ip_dst, + sizeof(uint8_t) * IPV6_ADDR_LEN, init_val); + init_val = rte_jhash_1word(*p, init_val); +#endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */ + return init_val; +} + +#define IPV4_L3FWD_EM_NUM_ROUTES \ + (sizeof(ipv4_l3fwd_em_route_array) / sizeof(ipv4_l3fwd_em_route_array[0])) + +#define IPV6_L3FWD_EM_NUM_ROUTES \ + (sizeof(ipv6_l3fwd_em_route_array) / sizeof(ipv6_l3fwd_em_route_array[0])) + +static uint8_t ipv4_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned; +static uint8_t ipv6_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned; + +static rte_xmm_t mask0; +static rte_xmm_t mask1; +static rte_xmm_t mask2; + +#if defined(__SSE2__) +static inline xmm_t +em_mask_key(void *key, xmm_t mask) +{ + __m128i data = _mm_loadu_si128((__m128i *)(key)); + + return _mm_and_si128(data, mask); +} +#elif defined(RTE_MACHINE_CPUFLAG_NEON) +static inline xmm_t +em_mask_key(void *key, xmm_t mask) +{ + int32x4_t data = vld1q_s32((int32_t *)key); + + return vandq_s32(data, mask); +} +#endif + +static inline uint8_t +em_get_ipv4_dst_port(void *ipv4_hdr, uint8_t portid, void *lookup_struct) +{ + int ret = 0; + union ipv4_5tuple_host key; + struct rte_hash *ipv4_l3fwd_lookup_struct = + (struct rte_hash *)lookup_struct; + + ipv4_hdr = (uint8_t *)ipv4_hdr + offsetof(struct ipv4_hdr, time_to_live); + + /* + * Get 5 tuple: dst port, src port, dst IP address, + * src IP address and protocol. + */ + key.xmm = em_mask_key(ipv4_hdr, mask0.x); + + /* Find destination port */ + ret = rte_hash_lookup(ipv4_l3fwd_lookup_struct, (const void *)&key); + return (uint8_t)((ret < 0) ? portid : ipv4_l3fwd_out_if[ret]); +} + +static inline uint8_t +em_get_ipv6_dst_port(void *ipv6_hdr, uint8_t portid, void *lookup_struct) +{ + int ret = 0; + union ipv6_5tuple_host key; + struct rte_hash *ipv6_l3fwd_lookup_struct = + (struct rte_hash *)lookup_struct; + + ipv6_hdr = (uint8_t *)ipv6_hdr + offsetof(struct ipv6_hdr, payload_len); + void *data0 = ipv6_hdr; + void *data1 = ((uint8_t *)ipv6_hdr) + sizeof(xmm_t); + void *data2 = ((uint8_t *)ipv6_hdr) + sizeof(xmm_t) + sizeof(xmm_t); + + /* Get part of 5 tuple: src IP address lower 96 bits and protocol */ + key.xmm[0] = em_mask_key(data0, mask1.x); + + /* + * Get part of 5 tuple: dst IP address lower 96 bits + * and src IP address higher 32 bits. + */ + key.xmm[1] = *(xmm_t *)data1; + + /* + * Get part of 5 tuple: dst port and src port + * and dst IP address higher 32 bits. + */ + key.xmm[2] = em_mask_key(data2, mask2.x); + + /* Find destination port */ + ret = rte_hash_lookup(ipv6_l3fwd_lookup_struct, (const void *)&key); + return (uint8_t)((ret < 0) ? portid : ipv6_l3fwd_out_if[ret]); +} + +#if defined(__SSE4_1__) +#if defined(NO_HASH_MULTI_LOOKUP) +#include "l3fwd_em_sse.h" +#else +#include "l3fwd_em_hlm_sse.h" +#endif +#else +#include "l3fwd_em.h" +#endif + +static void +convert_ipv4_5tuple(struct ipv4_5tuple *key1, + union ipv4_5tuple_host *key2) +{ + key2->ip_dst = rte_cpu_to_be_32(key1->ip_dst); + key2->ip_src = rte_cpu_to_be_32(key1->ip_src); + key2->port_dst = rte_cpu_to_be_16(key1->port_dst); + key2->port_src = rte_cpu_to_be_16(key1->port_src); + key2->proto = key1->proto; + key2->pad0 = 0; + key2->pad1 = 0; +} + +static void +convert_ipv6_5tuple(struct ipv6_5tuple *key1, + union ipv6_5tuple_host *key2) +{ + uint32_t i; + + for (i = 0; i < 16; i++) { + key2->ip_dst[i] = key1->ip_dst[i]; + key2->ip_src[i] = key1->ip_src[i]; + } + key2->port_dst = rte_cpu_to_be_16(key1->port_dst); + key2->port_src = rte_cpu_to_be_16(key1->port_src); + key2->proto = key1->proto; + key2->pad0 = 0; + key2->pad1 = 0; + key2->reserve = 0; +} + +#define BYTE_VALUE_MAX 256 +#define ALL_32_BITS 0xffffffff +#define BIT_8_TO_15 0x0000ff00 + +static inline void +populate_ipv4_few_flow_into_table(const struct rte_hash *h) +{ + uint32_t i; + int32_t ret; + + mask0 = (rte_xmm_t){.u32 = {BIT_8_TO_15, ALL_32_BITS, + ALL_32_BITS, ALL_32_BITS} }; + + for (i = 0; i < IPV4_L3FWD_EM_NUM_ROUTES; i++) { + struct ipv4_l3fwd_em_route entry; + union ipv4_5tuple_host newkey; + + entry = ipv4_l3fwd_em_route_array[i]; + convert_ipv4_5tuple(&entry.key, &newkey); + ret = rte_hash_add_key(h, (void *) &newkey); + if (ret < 0) { + rte_exit(EXIT_FAILURE, "Unable to add entry %" PRIu32 + " to the l3fwd hash.\n", i); + } + ipv4_l3fwd_out_if[ret] = entry.if_out; + } + printf("Hash: Adding 0x%" PRIx64 " keys\n", + (uint64_t)IPV4_L3FWD_EM_NUM_ROUTES); +} + +#define BIT_16_TO_23 0x00ff0000 +static inline void +populate_ipv6_few_flow_into_table(const struct rte_hash *h) +{ + uint32_t i; + int32_t ret; + + mask1 = (rte_xmm_t){.u32 = {BIT_16_TO_23, ALL_32_BITS, + ALL_32_BITS, ALL_32_BITS} }; + + mask2 = (rte_xmm_t){.u32 = {ALL_32_BITS, ALL_32_BITS, 0, 0} }; + + for (i = 0; i < IPV6_L3FWD_EM_NUM_ROUTES; i++) { + struct ipv6_l3fwd_em_route entry; + union ipv6_5tuple_host newkey; + + entry = ipv6_l3fwd_em_route_array[i]; + convert_ipv6_5tuple(&entry.key, &newkey); + ret = rte_hash_add_key(h, (void *) &newkey); + if (ret < 0) { + rte_exit(EXIT_FAILURE, "Unable to add entry %" PRIu32 + " to the l3fwd hash.\n", i); + } + ipv6_l3fwd_out_if[ret] = entry.if_out; + } + printf("Hash: Adding 0x%" PRIx64 "keys\n", + (uint64_t)IPV6_L3FWD_EM_NUM_ROUTES); +} + +#define NUMBER_PORT_USED 4 +static inline void +populate_ipv4_many_flow_into_table(const struct rte_hash *h, + unsigned int nr_flow) +{ + unsigned i; + + mask0 = (rte_xmm_t){.u32 = {BIT_8_TO_15, ALL_32_BITS, + ALL_32_BITS, ALL_32_BITS} }; + + for (i = 0; i < nr_flow; i++) { + struct ipv4_l3fwd_em_route entry; + union ipv4_5tuple_host newkey; + + uint8_t a = (uint8_t) + ((i/NUMBER_PORT_USED)%BYTE_VALUE_MAX); + uint8_t b = (uint8_t) + (((i/NUMBER_PORT_USED)/BYTE_VALUE_MAX)%BYTE_VALUE_MAX); + uint8_t c = (uint8_t) + ((i/NUMBER_PORT_USED)/(BYTE_VALUE_MAX*BYTE_VALUE_MAX)); + + /* Create the ipv4 exact match flow */ + memset(&entry, 0, sizeof(entry)); + switch (i & (NUMBER_PORT_USED - 1)) { + case 0: + entry = ipv4_l3fwd_em_route_array[0]; + entry.key.ip_dst = IPv4(101, c, b, a); + break; + case 1: + entry = ipv4_l3fwd_em_route_array[1]; + entry.key.ip_dst = IPv4(201, c, b, a); + break; + case 2: + entry = ipv4_l3fwd_em_route_array[2]; + entry.key.ip_dst = IPv4(111, c, b, a); + break; + case 3: + entry = ipv4_l3fwd_em_route_array[3]; + entry.key.ip_dst = IPv4(211, c, b, a); + break; + }; + convert_ipv4_5tuple(&entry.key, &newkey); + int32_t ret = rte_hash_add_key(h, (void *) &newkey); + + if (ret < 0) + rte_exit(EXIT_FAILURE, "Unable to add entry %u\n", i); + + ipv4_l3fwd_out_if[ret] = (uint8_t) entry.if_out; + + } + printf("Hash: Adding 0x%x keys\n", nr_flow); +} + +static inline void +populate_ipv6_many_flow_into_table(const struct rte_hash *h, + unsigned int nr_flow) +{ + unsigned i; + + mask1 = (rte_xmm_t){.u32 = {BIT_16_TO_23, ALL_32_BITS, + ALL_32_BITS, ALL_32_BITS} }; + mask2 = (rte_xmm_t){.u32 = {ALL_32_BITS, ALL_32_BITS, 0, 0} }; + + for (i = 0; i < nr_flow; i++) { + struct ipv6_l3fwd_em_route entry; + union ipv6_5tuple_host newkey; + + uint8_t a = (uint8_t) + ((i/NUMBER_PORT_USED)%BYTE_VALUE_MAX); + uint8_t b = (uint8_t) + (((i/NUMBER_PORT_USED)/BYTE_VALUE_MAX)%BYTE_VALUE_MAX); + uint8_t c = (uint8_t) + ((i/NUMBER_PORT_USED)/(BYTE_VALUE_MAX*BYTE_VALUE_MAX)); + + /* Create the ipv6 exact match flow */ + memset(&entry, 0, sizeof(entry)); + switch (i & (NUMBER_PORT_USED - 1)) { + case 0: + entry = ipv6_l3fwd_em_route_array[0]; + break; + case 1: + entry = ipv6_l3fwd_em_route_array[1]; + break; + case 2: + entry = ipv6_l3fwd_em_route_array[2]; + break; + case 3: + entry = ipv6_l3fwd_em_route_array[3]; + break; + }; + entry.key.ip_dst[13] = c; + entry.key.ip_dst[14] = b; + entry.key.ip_dst[15] = a; + convert_ipv6_5tuple(&entry.key, &newkey); + int32_t ret = rte_hash_add_key(h, (void *) &newkey); + + if (ret < 0) + rte_exit(EXIT_FAILURE, "Unable to add entry %u\n", i); + + ipv6_l3fwd_out_if[ret] = (uint8_t) entry.if_out; + + } + printf("Hash: Adding 0x%x keys\n", nr_flow); +} + +/* Requirements: + * 1. IP packets without extension; + * 2. L4 payload should be either TCP or UDP. + */ +int +em_check_ptype(int portid) +{ + int i, ret; + int ptype_l3_ipv4_ext = 0; + int ptype_l3_ipv6_ext = 0; + int ptype_l4_tcp = 0; + int ptype_l4_udp = 0; + uint32_t ptype_mask = RTE_PTYPE_L3_MASK | RTE_PTYPE_L4_MASK; + + ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, NULL, 0); + if (ret <= 0) + return 0; + + uint32_t ptypes[ret]; + + ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, ptypes, ret); + for (i = 0; i < ret; ++i) { + switch (ptypes[i]) { + case RTE_PTYPE_L3_IPV4_EXT: + ptype_l3_ipv4_ext = 1; + break; + case RTE_PTYPE_L3_IPV6_EXT: + ptype_l3_ipv6_ext = 1; + break; + case RTE_PTYPE_L4_TCP: + ptype_l4_tcp = 1; + break; + case RTE_PTYPE_L4_UDP: + ptype_l4_udp = 1; + break; + } + } + + if (ptype_l3_ipv4_ext == 0) + printf("port %d cannot parse RTE_PTYPE_L3_IPV4_EXT\n", portid); + if (ptype_l3_ipv6_ext == 0) + printf("port %d cannot parse RTE_PTYPE_L3_IPV6_EXT\n", portid); + if (!ptype_l3_ipv4_ext || !ptype_l3_ipv6_ext) + return 0; + + if (ptype_l4_tcp == 0) + printf("port %d cannot parse RTE_PTYPE_L4_TCP\n", portid); + if (ptype_l4_udp == 0) + printf("port %d cannot parse RTE_PTYPE_L4_UDP\n", portid); + if (ptype_l4_tcp && ptype_l4_udp) + return 1; + + return 0; +} + +static inline void +em_parse_ptype(struct rte_mbuf *m) +{ + struct ether_hdr *eth_hdr; + uint32_t packet_type = RTE_PTYPE_UNKNOWN; + uint16_t ether_type; + void *l3; + int hdr_len; + struct ipv4_hdr *ipv4_hdr; + struct ipv6_hdr *ipv6_hdr; + + eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); + ether_type = eth_hdr->ether_type; + l3 = (uint8_t *)eth_hdr + sizeof(struct ether_hdr); + if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) { + ipv4_hdr = (struct ipv4_hdr *)l3; + hdr_len = (ipv4_hdr->version_ihl & IPV4_HDR_IHL_MASK) * + IPV4_IHL_MULTIPLIER; + if (hdr_len == sizeof(struct ipv4_hdr)) { + packet_type |= RTE_PTYPE_L3_IPV4; + if (ipv4_hdr->next_proto_id == IPPROTO_TCP) + packet_type |= RTE_PTYPE_L4_TCP; + else if (ipv4_hdr->next_proto_id == IPPROTO_UDP) + packet_type |= RTE_PTYPE_L4_UDP; + } else + packet_type |= RTE_PTYPE_L3_IPV4_EXT; + } else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) { + ipv6_hdr = (struct ipv6_hdr *)l3; + if (ipv6_hdr->proto == IPPROTO_TCP) + packet_type |= RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP; + else if (ipv6_hdr->proto == IPPROTO_UDP) + packet_type |= RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP; + else + packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN; + } + + m->packet_type = packet_type; +} + +uint16_t +em_cb_parse_ptype(uint8_t port __rte_unused, uint16_t queue __rte_unused, + struct rte_mbuf *pkts[], uint16_t nb_pkts, + uint16_t max_pkts __rte_unused, + void *user_param __rte_unused) +{ + unsigned i; + + for (i = 0; i < nb_pkts; ++i) + em_parse_ptype(pkts[i]); + + return nb_pkts; +} + +/* main processing loop */ +int +em_main_loop(__attribute__((unused)) void *dummy) +{ + struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; + unsigned lcore_id; + uint64_t prev_tsc, diff_tsc, cur_tsc; + int i, nb_rx; + uint8_t portid, queueid; + struct lcore_conf *qconf; + const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / + US_PER_S * BURST_TX_DRAIN_US; + + prev_tsc = 0; + + lcore_id = rte_lcore_id(); + qconf = &lcore_conf[lcore_id]; + + if (qconf->n_rx_queue == 0) { + RTE_LOG(INFO, L3FWD, "lcore %u has nothing to do\n", lcore_id); + return 0; + } + + RTE_LOG(INFO, L3FWD, "entering main loop on lcore %u\n", lcore_id); + + for (i = 0; i < qconf->n_rx_queue; i++) { + + portid = qconf->rx_queue_list[i].port_id; + queueid = qconf->rx_queue_list[i].queue_id; + RTE_LOG(INFO, L3FWD, + " -- lcoreid=%u portid=%hhu rxqueueid=%hhu\n", + lcore_id, portid, queueid); + } + + while (!force_quit) { + + cur_tsc = rte_rdtsc(); + + /* + * TX burst queue drain + */ + diff_tsc = cur_tsc - prev_tsc; + if (unlikely(diff_tsc > drain_tsc)) { + + for (i = 0; i < qconf->n_tx_port; ++i) { + portid = qconf->tx_port_id[i]; + if (qconf->tx_mbufs[portid].len == 0) + continue; + send_burst(qconf, + qconf->tx_mbufs[portid].len, + portid); + qconf->tx_mbufs[portid].len = 0; + } + + prev_tsc = cur_tsc; + } + + /* + * Read packet from RX queues + */ + for (i = 0; i < qconf->n_rx_queue; ++i) { + portid = qconf->rx_queue_list[i].port_id; + queueid = qconf->rx_queue_list[i].queue_id; + nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst, + MAX_PKT_BURST); + if (nb_rx == 0) + continue; + +#if defined(__SSE4_1__) + l3fwd_em_send_packets(nb_rx, pkts_burst, + portid, qconf); +#else + l3fwd_em_no_opt_send_packets(nb_rx, pkts_burst, + portid, qconf); +#endif /* __SSE_4_1__ */ + } + } + + return 0; +} + +/* + * Initialize exact match (hash) parameters. + */ +void +setup_hash(const int socketid) +{ + struct rte_hash_parameters ipv4_l3fwd_hash_params = { + .name = NULL, + .entries = L3FWD_HASH_ENTRIES, + .key_len = sizeof(union ipv4_5tuple_host), + .hash_func = ipv4_hash_crc, + .hash_func_init_val = 0, + }; + + struct rte_hash_parameters ipv6_l3fwd_hash_params = { + .name = NULL, + .entries = L3FWD_HASH_ENTRIES, + .key_len = sizeof(union ipv6_5tuple_host), + .hash_func = ipv6_hash_crc, + .hash_func_init_val = 0, + }; + + char s[64]; + + /* create ipv4 hash */ + snprintf(s, sizeof(s), "ipv4_l3fwd_hash_%d", socketid); + ipv4_l3fwd_hash_params.name = s; + ipv4_l3fwd_hash_params.socket_id = socketid; + ipv4_l3fwd_em_lookup_struct[socketid] = + rte_hash_create(&ipv4_l3fwd_hash_params); + if (ipv4_l3fwd_em_lookup_struct[socketid] == NULL) + rte_exit(EXIT_FAILURE, + "Unable to create the l3fwd hash on socket %d\n", + socketid); + + /* create ipv6 hash */ + snprintf(s, sizeof(s), "ipv6_l3fwd_hash_%d", socketid); + ipv6_l3fwd_hash_params.name = s; + ipv6_l3fwd_hash_params.socket_id = socketid; + ipv6_l3fwd_em_lookup_struct[socketid] = + rte_hash_create(&ipv6_l3fwd_hash_params); + if (ipv6_l3fwd_em_lookup_struct[socketid] == NULL) + rte_exit(EXIT_FAILURE, + "Unable to create the l3fwd hash on socket %d\n", + socketid); + + if (hash_entry_number != HASH_ENTRY_NUMBER_DEFAULT) { + /* For testing hash matching with a large number of flows we + * generate millions of IP 5-tuples with an incremented dst + * address to initialize the hash table. */ + if (ipv6 == 0) { + /* populate the ipv4 hash */ + populate_ipv4_many_flow_into_table( + ipv4_l3fwd_em_lookup_struct[socketid], + hash_entry_number); + } else { + /* populate the ipv6 hash */ + populate_ipv6_many_flow_into_table( + ipv6_l3fwd_em_lookup_struct[socketid], + hash_entry_number); + } + } else { + /* + * Use data in ipv4/ipv6 l3fwd lookup table + * directly to initialize the hash table. + */ + if (ipv6 == 0) { + /* populate the ipv4 hash */ + populate_ipv4_few_flow_into_table( + ipv4_l3fwd_em_lookup_struct[socketid]); + } else { + /* populate the ipv6 hash */ + populate_ipv6_few_flow_into_table( + ipv6_l3fwd_em_lookup_struct[socketid]); + } + } +} + +/* Return ipv4/ipv6 em fwd lookup struct. */ +void * +em_get_ipv4_l3fwd_lookup_struct(const int socketid) +{ + return ipv4_l3fwd_em_lookup_struct[socketid]; +} + +void * +em_get_ipv6_l3fwd_lookup_struct(const int socketid) +{ + return ipv6_l3fwd_em_lookup_struct[socketid]; +} diff --git a/examples/l3fwd/l3fwd_em.h b/examples/l3fwd/l3fwd_em.h new file mode 100644 index 00000000..2284bbd5 --- /dev/null +++ b/examples/l3fwd/l3fwd_em.h @@ -0,0 +1,138 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __L3FWD_EM_H__ +#define __L3FWD_EM_H__ + +static inline __attribute__((always_inline)) void +l3fwd_em_simple_forward(struct rte_mbuf *m, uint8_t portid, + struct lcore_conf *qconf) +{ + struct ether_hdr *eth_hdr; + struct ipv4_hdr *ipv4_hdr; + uint8_t dst_port; + uint32_t tcp_or_udp; + uint32_t l3_ptypes; + + eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); + tcp_or_udp = m->packet_type & (RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP); + l3_ptypes = m->packet_type & RTE_PTYPE_L3_MASK; + + if (tcp_or_udp && (l3_ptypes == RTE_PTYPE_L3_IPV4)) { + /* Handle IPv4 headers.*/ + ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, + sizeof(struct ether_hdr)); + +#ifdef DO_RFC_1812_CHECKS + /* Check to make sure the packet is valid (RFC1812) */ + if (is_valid_ipv4_pkt(ipv4_hdr, m->pkt_len) < 0) { + rte_pktmbuf_free(m); + return; + } +#endif + dst_port = em_get_ipv4_dst_port(ipv4_hdr, portid, + qconf->ipv4_lookup_struct); + + if (dst_port >= RTE_MAX_ETHPORTS || + (enabled_port_mask & 1 << dst_port) == 0) + dst_port = portid; + +#ifdef DO_RFC_1812_CHECKS + /* Update time to live and header checksum */ + --(ipv4_hdr->time_to_live); + ++(ipv4_hdr->hdr_checksum); +#endif + /* dst addr */ + *(uint64_t *)ð_hdr->d_addr = dest_eth_addr[dst_port]; + + /* src addr */ + ether_addr_copy(&ports_eth_addr[dst_port], ð_hdr->s_addr); + + send_single_packet(qconf, m, dst_port); + } else if (tcp_or_udp && (l3_ptypes == RTE_PTYPE_L3_IPV6)) { + /* Handle IPv6 headers.*/ + struct ipv6_hdr *ipv6_hdr; + + ipv6_hdr = rte_pktmbuf_mtod_offset(m, struct ipv6_hdr *, + sizeof(struct ether_hdr)); + + dst_port = em_get_ipv6_dst_port(ipv6_hdr, portid, + qconf->ipv6_lookup_struct); + + if (dst_port >= RTE_MAX_ETHPORTS || + (enabled_port_mask & 1 << dst_port) == 0) + dst_port = portid; + + /* dst addr */ + *(uint64_t *)ð_hdr->d_addr = dest_eth_addr[dst_port]; + + /* src addr */ + ether_addr_copy(&ports_eth_addr[dst_port], ð_hdr->s_addr); + + send_single_packet(qconf, m, dst_port); + } else { + /* Free the mbuf that contains non-IPV4/IPV6 packet */ + rte_pktmbuf_free(m); + } +} + +/* + * Buffer non-optimized handling of packets, invoked + * from main_loop. + */ +static inline void +l3fwd_em_no_opt_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, + uint8_t portid, struct lcore_conf *qconf) +{ + int32_t j; + + /* Prefetch first packets */ + for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) + rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[j], void *)); + + /* + * Prefetch and forward already prefetched + * packets. + */ + for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) { + rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[ + j + PREFETCH_OFFSET], void *)); + l3fwd_em_simple_forward(pkts_burst[j], portid, qconf); + } + + /* Forward remaining prefetched packets */ + for (; j < nb_rx; j++) + l3fwd_em_simple_forward(pkts_burst[j], portid, qconf); +} + +#endif /* __L3FWD_EM_H__ */ diff --git a/examples/l3fwd/l3fwd_em_hlm_sse.h b/examples/l3fwd/l3fwd_em_hlm_sse.h new file mode 100644 index 00000000..5001c724 --- /dev/null +++ b/examples/l3fwd/l3fwd_em_hlm_sse.h @@ -0,0 +1,342 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __L3FWD_EM_HLM_SSE_H__ +#define __L3FWD_EM_HLM_SSE_H__ + +#include "l3fwd_sse.h" + +static inline __attribute__((always_inline)) void +em_get_dst_port_ipv4x8(struct lcore_conf *qconf, struct rte_mbuf *m[8], + uint8_t portid, uint16_t dst_port[8]) +{ + int32_t ret[8]; + union ipv4_5tuple_host key[8]; + __m128i data[8]; + + data[0] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[0], __m128i *, + sizeof(struct ether_hdr) + + offsetof(struct ipv4_hdr, time_to_live))); + data[1] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[1], __m128i *, + sizeof(struct ether_hdr) + + offsetof(struct ipv4_hdr, time_to_live))); + data[2] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[2], __m128i *, + sizeof(struct ether_hdr) + + offsetof(struct ipv4_hdr, time_to_live))); + data[3] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[3], __m128i *, + sizeof(struct ether_hdr) + + offsetof(struct ipv4_hdr, time_to_live))); + data[4] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[4], __m128i *, + sizeof(struct ether_hdr) + + offsetof(struct ipv4_hdr, time_to_live))); + data[5] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[5], __m128i *, + sizeof(struct ether_hdr) + + offsetof(struct ipv4_hdr, time_to_live))); + data[6] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[6], __m128i *, + sizeof(struct ether_hdr) + + offsetof(struct ipv4_hdr, time_to_live))); + data[7] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[7], __m128i *, + sizeof(struct ether_hdr) + + offsetof(struct ipv4_hdr, time_to_live))); + + key[0].xmm = _mm_and_si128(data[0], mask0.x); + key[1].xmm = _mm_and_si128(data[1], mask0.x); + key[2].xmm = _mm_and_si128(data[2], mask0.x); + key[3].xmm = _mm_and_si128(data[3], mask0.x); + key[4].xmm = _mm_and_si128(data[4], mask0.x); + key[5].xmm = _mm_and_si128(data[5], mask0.x); + key[6].xmm = _mm_and_si128(data[6], mask0.x); + key[7].xmm = _mm_and_si128(data[7], mask0.x); + + const void *key_array[8] = {&key[0], &key[1], &key[2], &key[3], + &key[4], &key[5], &key[6], &key[7]}; + + rte_hash_lookup_multi(qconf->ipv4_lookup_struct, &key_array[0], 8, ret); + + dst_port[0] = (uint8_t) ((ret[0] < 0) ? + portid : ipv4_l3fwd_out_if[ret[0]]); + dst_port[1] = (uint8_t) ((ret[1] < 0) ? + portid : ipv4_l3fwd_out_if[ret[1]]); + dst_port[2] = (uint8_t) ((ret[2] < 0) ? + portid : ipv4_l3fwd_out_if[ret[2]]); + dst_port[3] = (uint8_t) ((ret[3] < 0) ? + portid : ipv4_l3fwd_out_if[ret[3]]); + dst_port[4] = (uint8_t) ((ret[4] < 0) ? + portid : ipv4_l3fwd_out_if[ret[4]]); + dst_port[5] = (uint8_t) ((ret[5] < 0) ? + portid : ipv4_l3fwd_out_if[ret[5]]); + dst_port[6] = (uint8_t) ((ret[6] < 0) ? + portid : ipv4_l3fwd_out_if[ret[6]]); + dst_port[7] = (uint8_t) ((ret[7] < 0) ? + portid : ipv4_l3fwd_out_if[ret[7]]); + + if (dst_port[0] >= RTE_MAX_ETHPORTS || + (enabled_port_mask & 1 << dst_port[0]) == 0) + dst_port[0] = portid; + + if (dst_port[1] >= RTE_MAX_ETHPORTS || + (enabled_port_mask & 1 << dst_port[1]) == 0) + dst_port[1] = portid; + + if (dst_port[2] >= RTE_MAX_ETHPORTS || + (enabled_port_mask & 1 << dst_port[2]) == 0) + dst_port[2] = portid; + + if (dst_port[3] >= RTE_MAX_ETHPORTS || + (enabled_port_mask & 1 << dst_port[3]) == 0) + dst_port[3] = portid; + + if (dst_port[4] >= RTE_MAX_ETHPORTS || + (enabled_port_mask & 1 << dst_port[4]) == 0) + dst_port[4] = portid; + + if (dst_port[5] >= RTE_MAX_ETHPORTS || + (enabled_port_mask & 1 << dst_port[5]) == 0) + dst_port[5] = portid; + + if (dst_port[6] >= RTE_MAX_ETHPORTS || + (enabled_port_mask & 1 << dst_port[6]) == 0) + dst_port[6] = portid; + + if (dst_port[7] >= RTE_MAX_ETHPORTS || + (enabled_port_mask & 1 << dst_port[7]) == 0) + dst_port[7] = portid; + +} + +static inline void +get_ipv6_5tuple(struct rte_mbuf *m0, __m128i mask0, + __m128i mask1, union ipv6_5tuple_host *key) +{ + __m128i tmpdata0 = _mm_loadu_si128( + rte_pktmbuf_mtod_offset(m0, __m128i *, + sizeof(struct ether_hdr) + + offsetof(struct ipv6_hdr, payload_len))); + + __m128i tmpdata1 = _mm_loadu_si128( + rte_pktmbuf_mtod_offset(m0, __m128i *, + sizeof(struct ether_hdr) + + offsetof(struct ipv6_hdr, payload_len) + + sizeof(__m128i))); + + __m128i tmpdata2 = _mm_loadu_si128( + rte_pktmbuf_mtod_offset(m0, __m128i *, + sizeof(struct ether_hdr) + + offsetof(struct ipv6_hdr, payload_len) + + sizeof(__m128i) + sizeof(__m128i))); + + key->xmm[0] = _mm_and_si128(tmpdata0, mask0); + key->xmm[1] = tmpdata1; + key->xmm[2] = _mm_and_si128(tmpdata2, mask1); +} + +static inline __attribute__((always_inline)) void +em_get_dst_port_ipv6x8(struct lcore_conf *qconf, struct rte_mbuf *m[8], + uint8_t portid, uint16_t dst_port[8]) +{ + int32_t ret[8]; + union ipv6_5tuple_host key[8]; + + get_ipv6_5tuple(m[0], mask1.x, mask2.x, &key[0]); + get_ipv6_5tuple(m[1], mask1.x, mask2.x, &key[1]); + get_ipv6_5tuple(m[2], mask1.x, mask2.x, &key[2]); + get_ipv6_5tuple(m[3], mask1.x, mask2.x, &key[3]); + get_ipv6_5tuple(m[4], mask1.x, mask2.x, &key[4]); + get_ipv6_5tuple(m[5], mask1.x, mask2.x, &key[5]); + get_ipv6_5tuple(m[6], mask1.x, mask2.x, &key[6]); + get_ipv6_5tuple(m[7], mask1.x, mask2.x, &key[7]); + + const void *key_array[8] = {&key[0], &key[1], &key[2], &key[3], + &key[4], &key[5], &key[6], &key[7]}; + + rte_hash_lookup_multi(qconf->ipv6_lookup_struct, &key_array[0], 8, ret); + + dst_port[0] = (uint8_t) ((ret[0] < 0) ? + portid : ipv6_l3fwd_out_if[ret[0]]); + dst_port[1] = (uint8_t) ((ret[1] < 0) ? + portid : ipv6_l3fwd_out_if[ret[1]]); + dst_port[2] = (uint8_t) ((ret[2] < 0) ? + portid : ipv6_l3fwd_out_if[ret[2]]); + dst_port[3] = (uint8_t) ((ret[3] < 0) ? + portid : ipv6_l3fwd_out_if[ret[3]]); + dst_port[4] = (uint8_t) ((ret[4] < 0) ? + portid : ipv6_l3fwd_out_if[ret[4]]); + dst_port[5] = (uint8_t) ((ret[5] < 0) ? + portid : ipv6_l3fwd_out_if[ret[5]]); + dst_port[6] = (uint8_t) ((ret[6] < 0) ? + portid : ipv6_l3fwd_out_if[ret[6]]); + dst_port[7] = (uint8_t) ((ret[7] < 0) ? + portid : ipv6_l3fwd_out_if[ret[7]]); + + if (dst_port[0] >= RTE_MAX_ETHPORTS || + (enabled_port_mask & 1 << dst_port[0]) == 0) + dst_port[0] = portid; + + if (dst_port[1] >= RTE_MAX_ETHPORTS || + (enabled_port_mask & 1 << dst_port[1]) == 0) + dst_port[1] = portid; + + if (dst_port[2] >= RTE_MAX_ETHPORTS || + (enabled_port_mask & 1 << dst_port[2]) == 0) + dst_port[2] = portid; + + if (dst_port[3] >= RTE_MAX_ETHPORTS || + (enabled_port_mask & 1 << dst_port[3]) == 0) + dst_port[3] = portid; + + if (dst_port[4] >= RTE_MAX_ETHPORTS || + (enabled_port_mask & 1 << dst_port[4]) == 0) + dst_port[4] = portid; + + if (dst_port[5] >= RTE_MAX_ETHPORTS || + (enabled_port_mask & 1 << dst_port[5]) == 0) + dst_port[5] = portid; + + if (dst_port[6] >= RTE_MAX_ETHPORTS || + (enabled_port_mask & 1 << dst_port[6]) == 0) + dst_port[6] = portid; + + if (dst_port[7] >= RTE_MAX_ETHPORTS || + (enabled_port_mask & 1 << dst_port[7]) == 0) + dst_port[7] = portid; + +} + +static inline __attribute__((always_inline)) uint16_t +em_get_dst_port(const struct lcore_conf *qconf, struct rte_mbuf *pkt, + uint8_t portid) +{ + uint8_t next_hop; + struct ipv4_hdr *ipv4_hdr; + struct ipv6_hdr *ipv6_hdr; + uint32_t tcp_or_udp; + uint32_t l3_ptypes; + + tcp_or_udp = pkt->packet_type & (RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP); + l3_ptypes = pkt->packet_type & RTE_PTYPE_L3_MASK; + + if (tcp_or_udp && (l3_ptypes == RTE_PTYPE_L3_IPV4)) { + + /* Handle IPv4 headers.*/ + ipv4_hdr = rte_pktmbuf_mtod_offset(pkt, struct ipv4_hdr *, + sizeof(struct ether_hdr)); + + next_hop = em_get_ipv4_dst_port(ipv4_hdr, portid, + qconf->ipv4_lookup_struct); + + if (next_hop >= RTE_MAX_ETHPORTS || + (enabled_port_mask & 1 << next_hop) == 0) + next_hop = portid; + + return next_hop; + + } else if (tcp_or_udp && (l3_ptypes == RTE_PTYPE_L3_IPV6)) { + + /* Handle IPv6 headers.*/ + ipv6_hdr = rte_pktmbuf_mtod_offset(pkt, struct ipv6_hdr *, + sizeof(struct ether_hdr)); + + next_hop = em_get_ipv6_dst_port(ipv6_hdr, portid, + qconf->ipv6_lookup_struct); + + if (next_hop >= RTE_MAX_ETHPORTS || + (enabled_port_mask & 1 << next_hop) == 0) + next_hop = portid; + + return next_hop; + + } + + return portid; +} + +/* + * Buffer optimized handling of packets, invoked + * from main_loop. + */ +static inline void +l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, + uint8_t portid, struct lcore_conf *qconf) +{ + int32_t j; + uint16_t dst_port[MAX_PKT_BURST]; + + /* + * Send nb_rx - nb_rx%8 packets + * in groups of 8. + */ + int32_t n = RTE_ALIGN_FLOOR(nb_rx, 8); + + for (j = 0; j < n; j += 8) { + + uint32_t pkt_type = + pkts_burst[j]->packet_type & + pkts_burst[j+1]->packet_type & + pkts_burst[j+2]->packet_type & + pkts_burst[j+3]->packet_type & + pkts_burst[j+4]->packet_type & + pkts_burst[j+5]->packet_type & + pkts_burst[j+6]->packet_type & + pkts_burst[j+7]->packet_type; + + uint32_t l3_type = pkt_type & RTE_PTYPE_L3_MASK; + uint32_t tcp_or_udp = pkt_type & + (RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP); + + if (tcp_or_udp && (l3_type == RTE_PTYPE_L3_IPV4)) { + + em_get_dst_port_ipv4x8(qconf, &pkts_burst[j], portid, &dst_port[j]); + + } else if (tcp_or_udp && (l3_type == RTE_PTYPE_L3_IPV6)) { + + em_get_dst_port_ipv6x8(qconf, &pkts_burst[j], portid, &dst_port[j]); + + } else { + dst_port[j] = em_get_dst_port(qconf, pkts_burst[j], portid); + dst_port[j+1] = em_get_dst_port(qconf, pkts_burst[j+1], portid); + dst_port[j+2] = em_get_dst_port(qconf, pkts_burst[j+2], portid); + dst_port[j+3] = em_get_dst_port(qconf, pkts_burst[j+3], portid); + dst_port[j+4] = em_get_dst_port(qconf, pkts_burst[j+4], portid); + dst_port[j+5] = em_get_dst_port(qconf, pkts_burst[j+5], portid); + dst_port[j+6] = em_get_dst_port(qconf, pkts_burst[j+6], portid); + dst_port[j+7] = em_get_dst_port(qconf, pkts_burst[j+7], portid); + } + } + + for (; j < nb_rx; j++) + dst_port[j] = em_get_dst_port(qconf, pkts_burst[j], portid); + + send_packets_multi(qconf, pkts_burst, dst_port, nb_rx); + +} +#endif /* __L3FWD_EM_SSE_HLM_H__ */ diff --git a/examples/l3fwd/l3fwd_em_sse.h b/examples/l3fwd/l3fwd_em_sse.h new file mode 100644 index 00000000..c0a9725a --- /dev/null +++ b/examples/l3fwd/l3fwd_em_sse.h @@ -0,0 +1,112 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __L3FWD_EM_SSE_H__ +#define __L3FWD_EM_SSE_H__ + +/** + * @file + * This is an optional implementation of packet classification in Exact-Match + * path using sequential packet classification method. + * While hash lookup multi seems to provide better performance, it's disabled + * by default and can be enabled with NO_HASH_LOOKUP_MULTI global define in + * compilation time. + */ + +#include "l3fwd_sse.h" + +static inline __attribute__((always_inline)) uint16_t +em_get_dst_port(const struct lcore_conf *qconf, struct rte_mbuf *pkt, + uint8_t portid) +{ + uint8_t next_hop; + struct ipv4_hdr *ipv4_hdr; + struct ipv6_hdr *ipv6_hdr; + uint32_t tcp_or_udp; + uint32_t l3_ptypes; + + tcp_or_udp = pkt->packet_type & (RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP); + l3_ptypes = pkt->packet_type & RTE_PTYPE_L3_MASK; + + if (tcp_or_udp && (l3_ptypes == RTE_PTYPE_L3_IPV4)) { + + /* Handle IPv4 headers.*/ + ipv4_hdr = rte_pktmbuf_mtod_offset(pkt, struct ipv4_hdr *, + sizeof(struct ether_hdr)); + + next_hop = em_get_ipv4_dst_port(ipv4_hdr, portid, + qconf->ipv4_lookup_struct); + + if (next_hop >= RTE_MAX_ETHPORTS || + (enabled_port_mask & 1 << next_hop) == 0) + next_hop = portid; + + return next_hop; + + } else if (tcp_or_udp && (l3_ptypes == RTE_PTYPE_L3_IPV6)) { + + /* Handle IPv6 headers.*/ + ipv6_hdr = rte_pktmbuf_mtod_offset(pkt, struct ipv6_hdr *, + sizeof(struct ether_hdr)); + + next_hop = em_get_ipv6_dst_port(ipv6_hdr, portid, + qconf->ipv6_lookup_struct); + + if (next_hop >= RTE_MAX_ETHPORTS || + (enabled_port_mask & 1 << next_hop) == 0) + next_hop = portid; + + return next_hop; + + } + + return portid; +} + +/* + * Buffer optimized handling of packets, invoked + * from main_loop. + */ +static inline void +l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, + uint8_t portid, struct lcore_conf *qconf) +{ + int32_t j; + uint16_t dst_port[MAX_PKT_BURST]; + + for (j = 0; j < nb_rx; j++) + dst_port[j] = em_get_dst_port(qconf, pkts_burst[j], portid); + + send_packets_multi(qconf, pkts_burst, dst_port, nb_rx); +} +#endif /* __L3FWD_EM_SSE_H__ */ diff --git a/examples/l3fwd/l3fwd_lpm.c b/examples/l3fwd/l3fwd_lpm.c new file mode 100644 index 00000000..d941bdfd --- /dev/null +++ b/examples/l3fwd/l3fwd_lpm.c @@ -0,0 +1,356 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <inttypes.h> +#include <sys/types.h> +#include <string.h> +#include <sys/queue.h> +#include <stdarg.h> +#include <errno.h> +#include <getopt.h> +#include <stdbool.h> + +#include <rte_debug.h> +#include <rte_ether.h> +#include <rte_ethdev.h> +#include <rte_ring.h> +#include <rte_mempool.h> +#include <rte_cycles.h> +#include <rte_mbuf.h> +#include <rte_ip.h> +#include <rte_tcp.h> +#include <rte_udp.h> +#include <rte_lpm.h> +#include <rte_lpm6.h> + +#include "l3fwd.h" + +struct ipv4_l3fwd_lpm_route { + uint32_t ip; + uint8_t depth; + uint8_t if_out; +}; + +struct ipv6_l3fwd_lpm_route { + uint8_t ip[16]; + uint8_t depth; + uint8_t if_out; +}; + +static struct ipv4_l3fwd_lpm_route ipv4_l3fwd_lpm_route_array[] = { + {IPv4(1, 1, 1, 0), 24, 0}, + {IPv4(2, 1, 1, 0), 24, 1}, + {IPv4(3, 1, 1, 0), 24, 2}, + {IPv4(4, 1, 1, 0), 24, 3}, + {IPv4(5, 1, 1, 0), 24, 4}, + {IPv4(6, 1, 1, 0), 24, 5}, + {IPv4(7, 1, 1, 0), 24, 6}, + {IPv4(8, 1, 1, 0), 24, 7}, +}; + +static struct ipv6_l3fwd_lpm_route ipv6_l3fwd_lpm_route_array[] = { + {{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 0}, + {{2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 1}, + {{3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 2}, + {{4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 3}, + {{5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 4}, + {{6, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 5}, + {{7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 6}, + {{8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 7}, +}; + +#define IPV4_L3FWD_LPM_NUM_ROUTES \ + (sizeof(ipv4_l3fwd_lpm_route_array) / sizeof(ipv4_l3fwd_lpm_route_array[0])) +#define IPV6_L3FWD_LPM_NUM_ROUTES \ + (sizeof(ipv6_l3fwd_lpm_route_array) / sizeof(ipv6_l3fwd_lpm_route_array[0])) + +#define IPV4_L3FWD_LPM_MAX_RULES 1024 +#define IPV4_L3FWD_LPM_NUMBER_TBL8S (1 << 8) +#define IPV6_L3FWD_LPM_MAX_RULES 1024 +#define IPV6_L3FWD_LPM_NUMBER_TBL8S (1 << 16) + +struct rte_lpm *ipv4_l3fwd_lpm_lookup_struct[NB_SOCKETS]; +struct rte_lpm6 *ipv6_l3fwd_lpm_lookup_struct[NB_SOCKETS]; + +#if defined(__SSE4_1__) +#include "l3fwd_lpm_sse.h" +#else +#include "l3fwd_lpm.h" +#endif + +/* main processing loop */ +int +lpm_main_loop(__attribute__((unused)) void *dummy) +{ + struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; + unsigned lcore_id; + uint64_t prev_tsc, diff_tsc, cur_tsc; + int i, nb_rx; + uint8_t portid, queueid; + struct lcore_conf *qconf; + const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / + US_PER_S * BURST_TX_DRAIN_US; + + prev_tsc = 0; + + lcore_id = rte_lcore_id(); + qconf = &lcore_conf[lcore_id]; + + if (qconf->n_rx_queue == 0) { + RTE_LOG(INFO, L3FWD, "lcore %u has nothing to do\n", lcore_id); + return 0; + } + + RTE_LOG(INFO, L3FWD, "entering main loop on lcore %u\n", lcore_id); + + for (i = 0; i < qconf->n_rx_queue; i++) { + + portid = qconf->rx_queue_list[i].port_id; + queueid = qconf->rx_queue_list[i].queue_id; + RTE_LOG(INFO, L3FWD, + " -- lcoreid=%u portid=%hhu rxqueueid=%hhu\n", + lcore_id, portid, queueid); + } + + while (!force_quit) { + + cur_tsc = rte_rdtsc(); + + /* + * TX burst queue drain + */ + diff_tsc = cur_tsc - prev_tsc; + if (unlikely(diff_tsc > drain_tsc)) { + + for (i = 0; i < qconf->n_tx_port; ++i) { + portid = qconf->tx_port_id[i]; + if (qconf->tx_mbufs[portid].len == 0) + continue; + send_burst(qconf, + qconf->tx_mbufs[portid].len, + portid); + qconf->tx_mbufs[portid].len = 0; + } + + prev_tsc = cur_tsc; + } + + /* + * Read packet from RX queues + */ + for (i = 0; i < qconf->n_rx_queue; ++i) { + portid = qconf->rx_queue_list[i].port_id; + queueid = qconf->rx_queue_list[i].queue_id; + nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst, + MAX_PKT_BURST); + if (nb_rx == 0) + continue; + +#if defined(__SSE4_1__) + l3fwd_lpm_send_packets(nb_rx, pkts_burst, + portid, qconf); +#else + l3fwd_lpm_no_opt_send_packets(nb_rx, pkts_burst, + portid, qconf); +#endif /* __SSE_4_1__ */ + } + } + + return 0; +} + +void +setup_lpm(const int socketid) +{ + struct rte_lpm6_config config; + struct rte_lpm_config config_ipv4; + unsigned i; + int ret; + char s[64]; + + /* create the LPM table */ + config_ipv4.max_rules = IPV4_L3FWD_LPM_MAX_RULES; + config_ipv4.number_tbl8s = IPV4_L3FWD_LPM_NUMBER_TBL8S; + config_ipv4.flags = 0; + snprintf(s, sizeof(s), "IPV4_L3FWD_LPM_%d", socketid); + ipv4_l3fwd_lpm_lookup_struct[socketid] = + rte_lpm_create(s, socketid, &config_ipv4); + if (ipv4_l3fwd_lpm_lookup_struct[socketid] == NULL) + rte_exit(EXIT_FAILURE, + "Unable to create the l3fwd LPM table on socket %d\n", + socketid); + + /* populate the LPM table */ + for (i = 0; i < IPV4_L3FWD_LPM_NUM_ROUTES; i++) { + + /* skip unused ports */ + if ((1 << ipv4_l3fwd_lpm_route_array[i].if_out & + enabled_port_mask) == 0) + continue; + + ret = rte_lpm_add(ipv4_l3fwd_lpm_lookup_struct[socketid], + ipv4_l3fwd_lpm_route_array[i].ip, + ipv4_l3fwd_lpm_route_array[i].depth, + ipv4_l3fwd_lpm_route_array[i].if_out); + + if (ret < 0) { + rte_exit(EXIT_FAILURE, + "Unable to add entry %u to the l3fwd LPM table on socket %d\n", + i, socketid); + } + + printf("LPM: Adding route 0x%08x / %d (%d)\n", + (unsigned)ipv4_l3fwd_lpm_route_array[i].ip, + ipv4_l3fwd_lpm_route_array[i].depth, + ipv4_l3fwd_lpm_route_array[i].if_out); + } + + /* create the LPM6 table */ + snprintf(s, sizeof(s), "IPV6_L3FWD_LPM_%d", socketid); + + config.max_rules = IPV6_L3FWD_LPM_MAX_RULES; + config.number_tbl8s = IPV6_L3FWD_LPM_NUMBER_TBL8S; + config.flags = 0; + ipv6_l3fwd_lpm_lookup_struct[socketid] = rte_lpm6_create(s, socketid, + &config); + if (ipv6_l3fwd_lpm_lookup_struct[socketid] == NULL) + rte_exit(EXIT_FAILURE, + "Unable to create the l3fwd LPM table on socket %d\n", + socketid); + + /* populate the LPM table */ + for (i = 0; i < IPV6_L3FWD_LPM_NUM_ROUTES; i++) { + + /* skip unused ports */ + if ((1 << ipv6_l3fwd_lpm_route_array[i].if_out & + enabled_port_mask) == 0) + continue; + + ret = rte_lpm6_add(ipv6_l3fwd_lpm_lookup_struct[socketid], + ipv6_l3fwd_lpm_route_array[i].ip, + ipv6_l3fwd_lpm_route_array[i].depth, + ipv6_l3fwd_lpm_route_array[i].if_out); + + if (ret < 0) { + rte_exit(EXIT_FAILURE, + "Unable to add entry %u to the l3fwd LPM table on socket %d\n", + i, socketid); + } + + printf("LPM: Adding route %s / %d (%d)\n", + "IPV6", + ipv6_l3fwd_lpm_route_array[i].depth, + ipv6_l3fwd_lpm_route_array[i].if_out); + } +} + +int +lpm_check_ptype(int portid) +{ + int i, ret; + int ptype_l3_ipv4 = 0, ptype_l3_ipv6 = 0; + uint32_t ptype_mask = RTE_PTYPE_L3_MASK; + + ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, NULL, 0); + if (ret <= 0) + return 0; + + uint32_t ptypes[ret]; + + ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, ptypes, ret); + for (i = 0; i < ret; ++i) { + if (ptypes[i] & RTE_PTYPE_L3_IPV4) + ptype_l3_ipv4 = 1; + if (ptypes[i] & RTE_PTYPE_L3_IPV6) + ptype_l3_ipv6 = 1; + } + + if (ptype_l3_ipv4 == 0) + printf("port %d cannot parse RTE_PTYPE_L3_IPV4\n", portid); + + if (ptype_l3_ipv6 == 0) + printf("port %d cannot parse RTE_PTYPE_L3_IPV6\n", portid); + + if (ptype_l3_ipv4 && ptype_l3_ipv6) + return 1; + + return 0; + +} + +static inline void +lpm_parse_ptype(struct rte_mbuf *m) +{ + struct ether_hdr *eth_hdr; + uint32_t packet_type = RTE_PTYPE_UNKNOWN; + uint16_t ether_type; + + eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); + ether_type = eth_hdr->ether_type; + if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) + packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN; + else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv6)) + packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN; + + m->packet_type = packet_type; +} + +uint16_t +lpm_cb_parse_ptype(uint8_t port __rte_unused, uint16_t queue __rte_unused, + struct rte_mbuf *pkts[], uint16_t nb_pkts, + uint16_t max_pkts __rte_unused, + void *user_param __rte_unused) +{ + unsigned i; + + for (i = 0; i < nb_pkts; ++i) + lpm_parse_ptype(pkts[i]); + + return nb_pkts; +} + +/* Return ipv4/ipv6 lpm fwd lookup struct. */ +void * +lpm_get_ipv4_l3fwd_lookup_struct(const int socketid) +{ + return ipv4_l3fwd_lpm_lookup_struct[socketid]; +} + +void * +lpm_get_ipv6_l3fwd_lookup_struct(const int socketid) +{ + return ipv6_l3fwd_lpm_lookup_struct[socketid]; +} diff --git a/examples/l3fwd/l3fwd_lpm.h b/examples/l3fwd/l3fwd_lpm.h new file mode 100644 index 00000000..a43c5070 --- /dev/null +++ b/examples/l3fwd/l3fwd_lpm.h @@ -0,0 +1,151 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __L3FWD_LPM_H__ +#define __L3FWD_LPM_H__ + +static inline uint8_t +lpm_get_ipv4_dst_port(void *ipv4_hdr, uint8_t portid, void *lookup_struct) +{ + uint32_t next_hop; + struct rte_lpm *ipv4_l3fwd_lookup_struct = + (struct rte_lpm *)lookup_struct; + + return (uint8_t) ((rte_lpm_lookup(ipv4_l3fwd_lookup_struct, + rte_be_to_cpu_32(((struct ipv4_hdr *)ipv4_hdr)->dst_addr), + &next_hop) == 0) ? next_hop : portid); +} + +static inline uint8_t +lpm_get_ipv6_dst_port(void *ipv6_hdr, uint8_t portid, void *lookup_struct) +{ + uint8_t next_hop; + struct rte_lpm6 *ipv6_l3fwd_lookup_struct = + (struct rte_lpm6 *)lookup_struct; + + return (uint8_t) ((rte_lpm6_lookup(ipv6_l3fwd_lookup_struct, + ((struct ipv6_hdr *)ipv6_hdr)->dst_addr, + &next_hop) == 0) ? next_hop : portid); +} + +static inline __attribute__((always_inline)) void +l3fwd_lpm_simple_forward(struct rte_mbuf *m, uint8_t portid, + struct lcore_conf *qconf) +{ + struct ether_hdr *eth_hdr; + struct ipv4_hdr *ipv4_hdr; + uint8_t dst_port; + + eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); + + if (RTE_ETH_IS_IPV4_HDR(m->packet_type)) { + /* Handle IPv4 headers.*/ + ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, + sizeof(struct ether_hdr)); + +#ifdef DO_RFC_1812_CHECKS + /* Check to make sure the packet is valid (RFC1812) */ + if (is_valid_ipv4_pkt(ipv4_hdr, m->pkt_len) < 0) { + rte_pktmbuf_free(m); + return; + } +#endif + dst_port = lpm_get_ipv4_dst_port(ipv4_hdr, portid, + qconf->ipv4_lookup_struct); + + if (dst_port >= RTE_MAX_ETHPORTS || + (enabled_port_mask & 1 << dst_port) == 0) + dst_port = portid; + +#ifdef DO_RFC_1812_CHECKS + /* Update time to live and header checksum */ + --(ipv4_hdr->time_to_live); + ++(ipv4_hdr->hdr_checksum); +#endif + /* dst addr */ + *(uint64_t *)ð_hdr->d_addr = dest_eth_addr[dst_port]; + + /* src addr */ + ether_addr_copy(&ports_eth_addr[dst_port], ð_hdr->s_addr); + + send_single_packet(qconf, m, dst_port); + } else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) { + /* Handle IPv6 headers.*/ + struct ipv6_hdr *ipv6_hdr; + + ipv6_hdr = rte_pktmbuf_mtod_offset(m, struct ipv6_hdr *, + sizeof(struct ether_hdr)); + + dst_port = lpm_get_ipv6_dst_port(ipv6_hdr, portid, + qconf->ipv6_lookup_struct); + + if (dst_port >= RTE_MAX_ETHPORTS || + (enabled_port_mask & 1 << dst_port) == 0) + dst_port = portid; + + /* dst addr */ + *(uint64_t *)ð_hdr->d_addr = dest_eth_addr[dst_port]; + + /* src addr */ + ether_addr_copy(&ports_eth_addr[dst_port], ð_hdr->s_addr); + + send_single_packet(qconf, m, dst_port); + } else { + /* Free the mbuf that contains non-IPV4/IPV6 packet */ + rte_pktmbuf_free(m); + } +} + +static inline void +l3fwd_lpm_no_opt_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, + uint8_t portid, struct lcore_conf *qconf) +{ + int32_t j; + + /* Prefetch first packets */ + for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) + rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[j], void *)); + + /* Prefetch and forward already prefetched packets. */ + for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) { + rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[ + j + PREFETCH_OFFSET], void *)); + l3fwd_lpm_simple_forward(pkts_burst[j], portid, qconf); + } + + /* Forward remaining prefetched packets */ + for (; j < nb_rx; j++) + l3fwd_lpm_simple_forward(pkts_burst[j], portid, qconf); +} + +#endif /* __L3FWD_LPM_H__ */ diff --git a/examples/l3fwd/l3fwd_lpm_sse.h b/examples/l3fwd/l3fwd_lpm_sse.h new file mode 100644 index 00000000..538fe3d7 --- /dev/null +++ b/examples/l3fwd/l3fwd_lpm_sse.h @@ -0,0 +1,213 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __L3FWD_LPM_SSE_H__ +#define __L3FWD_LPM_SSE_H__ + +#include "l3fwd_sse.h" + +static inline __attribute__((always_inline)) uint16_t +lpm_get_dst_port(const struct lcore_conf *qconf, struct rte_mbuf *pkt, + uint8_t portid) +{ + uint32_t next_hop_ipv4; + uint8_t next_hop_ipv6; + struct ipv6_hdr *ipv6_hdr; + struct ipv4_hdr *ipv4_hdr; + struct ether_hdr *eth_hdr; + + if (RTE_ETH_IS_IPV4_HDR(pkt->packet_type)) { + + eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *); + ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1); + + return (uint16_t) ((rte_lpm_lookup(qconf->ipv4_lookup_struct, + rte_be_to_cpu_32(ipv4_hdr->dst_addr), &next_hop_ipv4) == 0) ? + next_hop_ipv4 : portid); + + } else if (RTE_ETH_IS_IPV6_HDR(pkt->packet_type)) { + + eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *); + ipv6_hdr = (struct ipv6_hdr *)(eth_hdr + 1); + + return (uint16_t) ((rte_lpm6_lookup(qconf->ipv6_lookup_struct, + ipv6_hdr->dst_addr, &next_hop_ipv6) == 0) + ? next_hop_ipv6 : portid); + + } + + return portid; +} + +/* + * lpm_get_dst_port optimized routine for packets where dst_ipv4 is already + * precalculated. If packet is ipv6 dst_addr is taken directly from packet + * header and dst_ipv4 value is not used. + */ +static inline __attribute__((always_inline)) uint16_t +lpm_get_dst_port_with_ipv4(const struct lcore_conf *qconf, struct rte_mbuf *pkt, + uint32_t dst_ipv4, uint8_t portid) +{ + uint32_t next_hop_ipv4; + uint8_t next_hop_ipv6; + struct ipv6_hdr *ipv6_hdr; + struct ether_hdr *eth_hdr; + + if (RTE_ETH_IS_IPV4_HDR(pkt->packet_type)) { + return (uint16_t) ((rte_lpm_lookup(qconf->ipv4_lookup_struct, dst_ipv4, + &next_hop_ipv4) == 0) ? next_hop_ipv4 : portid); + + } else if (RTE_ETH_IS_IPV6_HDR(pkt->packet_type)) { + + eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *); + ipv6_hdr = (struct ipv6_hdr *)(eth_hdr + 1); + + return (uint16_t) ((rte_lpm6_lookup(qconf->ipv6_lookup_struct, + ipv6_hdr->dst_addr, &next_hop_ipv6) == 0) + ? next_hop_ipv6 : portid); + + } + + return portid; + +} + +/* + * Read packet_type and destination IPV4 addresses from 4 mbufs. + */ +static inline void +processx4_step1(struct rte_mbuf *pkt[FWDSTEP], + __m128i *dip, + uint32_t *ipv4_flag) +{ + struct ipv4_hdr *ipv4_hdr; + struct ether_hdr *eth_hdr; + uint32_t x0, x1, x2, x3; + + eth_hdr = rte_pktmbuf_mtod(pkt[0], struct ether_hdr *); + ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1); + x0 = ipv4_hdr->dst_addr; + ipv4_flag[0] = pkt[0]->packet_type & RTE_PTYPE_L3_IPV4; + + eth_hdr = rte_pktmbuf_mtod(pkt[1], struct ether_hdr *); + ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1); + x1 = ipv4_hdr->dst_addr; + ipv4_flag[0] &= pkt[1]->packet_type; + + eth_hdr = rte_pktmbuf_mtod(pkt[2], struct ether_hdr *); + ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1); + x2 = ipv4_hdr->dst_addr; + ipv4_flag[0] &= pkt[2]->packet_type; + + eth_hdr = rte_pktmbuf_mtod(pkt[3], struct ether_hdr *); + ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1); + x3 = ipv4_hdr->dst_addr; + ipv4_flag[0] &= pkt[3]->packet_type; + + dip[0] = _mm_set_epi32(x3, x2, x1, x0); +} + +/* + * Lookup into LPM for destination port. + * If lookup fails, use incoming port (portid) as destination port. + */ +static inline void +processx4_step2(const struct lcore_conf *qconf, + __m128i dip, + uint32_t ipv4_flag, + uint8_t portid, + struct rte_mbuf *pkt[FWDSTEP], + uint16_t dprt[FWDSTEP]) +{ + rte_xmm_t dst; + const __m128i bswap_mask = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, + 4, 5, 6, 7, 0, 1, 2, 3); + + /* Byte swap 4 IPV4 addresses. */ + dip = _mm_shuffle_epi8(dip, bswap_mask); + + /* if all 4 packets are IPV4. */ + if (likely(ipv4_flag)) { + rte_lpm_lookupx4(qconf->ipv4_lookup_struct, dip, dst.u32, + portid); + /* get rid of unused upper 16 bit for each dport. */ + dst.x = _mm_packs_epi32(dst.x, dst.x); + *(uint64_t *)dprt = dst.u64[0]; + } else { + dst.x = dip; + dprt[0] = lpm_get_dst_port_with_ipv4(qconf, pkt[0], dst.u32[0], portid); + dprt[1] = lpm_get_dst_port_with_ipv4(qconf, pkt[1], dst.u32[1], portid); + dprt[2] = lpm_get_dst_port_with_ipv4(qconf, pkt[2], dst.u32[2], portid); + dprt[3] = lpm_get_dst_port_with_ipv4(qconf, pkt[3], dst.u32[3], portid); + } +} + +/* + * Buffer optimized handling of packets, invoked + * from main_loop. + */ +static inline void +l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, + uint8_t portid, struct lcore_conf *qconf) +{ + int32_t j; + uint16_t dst_port[MAX_PKT_BURST]; + __m128i dip[MAX_PKT_BURST / FWDSTEP]; + uint32_t ipv4_flag[MAX_PKT_BURST / FWDSTEP]; + const int32_t k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP); + + for (j = 0; j != k; j += FWDSTEP) + processx4_step1(&pkts_burst[j], &dip[j / FWDSTEP], + &ipv4_flag[j / FWDSTEP]); + + for (j = 0; j != k; j += FWDSTEP) + processx4_step2(qconf, dip[j / FWDSTEP], + ipv4_flag[j / FWDSTEP], portid, &pkts_burst[j], &dst_port[j]); + + /* Classify last up to 3 packets one by one */ + switch (nb_rx % FWDSTEP) { + case 3: + dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); + j++; + case 2: + dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); + j++; + case 1: + dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); + j++; + } + + send_packets_multi(qconf, pkts_burst, dst_port, nb_rx); +} + +#endif /* __L3FWD_LPM_SSE_H__ */ diff --git a/examples/l3fwd/l3fwd_sse.h b/examples/l3fwd/l3fwd_sse.h new file mode 100644 index 00000000..1afa1f00 --- /dev/null +++ b/examples/l3fwd/l3fwd_sse.h @@ -0,0 +1,501 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +#ifndef _L3FWD_COMMON_H_ +#define _L3FWD_COMMON_H_ + +#include "l3fwd.h" + +#ifdef DO_RFC_1812_CHECKS + +#define IPV4_MIN_VER_IHL 0x45 +#define IPV4_MAX_VER_IHL 0x4f +#define IPV4_MAX_VER_IHL_DIFF (IPV4_MAX_VER_IHL - IPV4_MIN_VER_IHL) + +/* Minimum value of IPV4 total length (20B) in network byte order. */ +#define IPV4_MIN_LEN_BE (sizeof(struct ipv4_hdr) << 8) + +/* + * From http://www.rfc-editor.org/rfc/rfc1812.txt section 5.2.2: + * - The IP version number must be 4. + * - The IP header length field must be large enough to hold the + * minimum length legal IP datagram (20 bytes = 5 words). + * - The IP total length field must be large enough to hold the IP + * datagram header, whose length is specified in the IP header length + * field. + * If we encounter invalid IPV4 packet, then set destination port for it + * to BAD_PORT value. + */ +static inline __attribute__((always_inline)) void +rfc1812_process(struct ipv4_hdr *ipv4_hdr, uint16_t *dp, uint32_t ptype) +{ + uint8_t ihl; + + if (RTE_ETH_IS_IPV4_HDR(ptype)) { + ihl = ipv4_hdr->version_ihl - IPV4_MIN_VER_IHL; + + ipv4_hdr->time_to_live--; + ipv4_hdr->hdr_checksum++; + + if (ihl > IPV4_MAX_VER_IHL_DIFF || + ((uint8_t)ipv4_hdr->total_length == 0 && + ipv4_hdr->total_length < IPV4_MIN_LEN_BE)) + dp[0] = BAD_PORT; + + } +} + +#else +#define rfc1812_process(mb, dp, ptype) do { } while (0) +#endif /* DO_RFC_1812_CHECKS */ + +/* + * Update source and destination MAC addresses in the ethernet header. + * Perform RFC1812 checks and updates for IPV4 packets. + */ +static inline void +processx4_step3(struct rte_mbuf *pkt[FWDSTEP], uint16_t dst_port[FWDSTEP]) +{ + __m128i te[FWDSTEP]; + __m128i ve[FWDSTEP]; + __m128i *p[FWDSTEP]; + + p[0] = rte_pktmbuf_mtod(pkt[0], __m128i *); + p[1] = rte_pktmbuf_mtod(pkt[1], __m128i *); + p[2] = rte_pktmbuf_mtod(pkt[2], __m128i *); + p[3] = rte_pktmbuf_mtod(pkt[3], __m128i *); + + ve[0] = val_eth[dst_port[0]]; + te[0] = _mm_loadu_si128(p[0]); + + ve[1] = val_eth[dst_port[1]]; + te[1] = _mm_loadu_si128(p[1]); + + ve[2] = val_eth[dst_port[2]]; + te[2] = _mm_loadu_si128(p[2]); + + ve[3] = val_eth[dst_port[3]]; + te[3] = _mm_loadu_si128(p[3]); + + /* Update first 12 bytes, keep rest bytes intact. */ + te[0] = _mm_blend_epi16(te[0], ve[0], MASK_ETH); + te[1] = _mm_blend_epi16(te[1], ve[1], MASK_ETH); + te[2] = _mm_blend_epi16(te[2], ve[2], MASK_ETH); + te[3] = _mm_blend_epi16(te[3], ve[3], MASK_ETH); + + _mm_storeu_si128(p[0], te[0]); + _mm_storeu_si128(p[1], te[1]); + _mm_storeu_si128(p[2], te[2]); + _mm_storeu_si128(p[3], te[3]); + + rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[0] + 1), + &dst_port[0], pkt[0]->packet_type); + rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[1] + 1), + &dst_port[1], pkt[1]->packet_type); + rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[2] + 1), + &dst_port[2], pkt[2]->packet_type); + rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[3] + 1), + &dst_port[3], pkt[3]->packet_type); +} + +/* + * We group consecutive packets with the same destionation port into one burst. + * To avoid extra latency this is done together with some other packet + * processing, but after we made a final decision about packet's destination. + * To do this we maintain: + * pnum - array of number of consecutive packets with the same dest port for + * each packet in the input burst. + * lp - pointer to the last updated element in the pnum. + * dlp - dest port value lp corresponds to. + */ + +#define GRPSZ (1 << FWDSTEP) +#define GRPMSK (GRPSZ - 1) + +#define GROUP_PORT_STEP(dlp, dcp, lp, pn, idx) do { \ + if (likely((dlp) == (dcp)[(idx)])) { \ + (lp)[0]++; \ + } else { \ + (dlp) = (dcp)[idx]; \ + (lp) = (pn) + (idx); \ + (lp)[0] = 1; \ + } \ +} while (0) + +/* + * Group consecutive packets with the same destination port in bursts of 4. + * Suppose we have array of destionation ports: + * dst_port[] = {a, b, c, d,, e, ... } + * dp1 should contain: <a, b, c, d>, dp2: <b, c, d, e>. + * We doing 4 comparisions at once and the result is 4 bit mask. + * This mask is used as an index into prebuild array of pnum values. + */ +static inline uint16_t * +port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp, __m128i dp1, __m128i dp2) +{ + static const struct { + uint64_t pnum; /* prebuild 4 values for pnum[]. */ + int32_t idx; /* index for new last updated elemnet. */ + uint16_t lpv; /* add value to the last updated element. */ + } gptbl[GRPSZ] = { + { + /* 0: a != b, b != c, c != d, d != e */ + .pnum = UINT64_C(0x0001000100010001), + .idx = 4, + .lpv = 0, + }, + { + /* 1: a == b, b != c, c != d, d != e */ + .pnum = UINT64_C(0x0001000100010002), + .idx = 4, + .lpv = 1, + }, + { + /* 2: a != b, b == c, c != d, d != e */ + .pnum = UINT64_C(0x0001000100020001), + .idx = 4, + .lpv = 0, + }, + { + /* 3: a == b, b == c, c != d, d != e */ + .pnum = UINT64_C(0x0001000100020003), + .idx = 4, + .lpv = 2, + }, + { + /* 4: a != b, b != c, c == d, d != e */ + .pnum = UINT64_C(0x0001000200010001), + .idx = 4, + .lpv = 0, + }, + { + /* 5: a == b, b != c, c == d, d != e */ + .pnum = UINT64_C(0x0001000200010002), + .idx = 4, + .lpv = 1, + }, + { + /* 6: a != b, b == c, c == d, d != e */ + .pnum = UINT64_C(0x0001000200030001), + .idx = 4, + .lpv = 0, + }, + { + /* 7: a == b, b == c, c == d, d != e */ + .pnum = UINT64_C(0x0001000200030004), + .idx = 4, + .lpv = 3, + }, + { + /* 8: a != b, b != c, c != d, d == e */ + .pnum = UINT64_C(0x0002000100010001), + .idx = 3, + .lpv = 0, + }, + { + /* 9: a == b, b != c, c != d, d == e */ + .pnum = UINT64_C(0x0002000100010002), + .idx = 3, + .lpv = 1, + }, + { + /* 0xa: a != b, b == c, c != d, d == e */ + .pnum = UINT64_C(0x0002000100020001), + .idx = 3, + .lpv = 0, + }, + { + /* 0xb: a == b, b == c, c != d, d == e */ + .pnum = UINT64_C(0x0002000100020003), + .idx = 3, + .lpv = 2, + }, + { + /* 0xc: a != b, b != c, c == d, d == e */ + .pnum = UINT64_C(0x0002000300010001), + .idx = 2, + .lpv = 0, + }, + { + /* 0xd: a == b, b != c, c == d, d == e */ + .pnum = UINT64_C(0x0002000300010002), + .idx = 2, + .lpv = 1, + }, + { + /* 0xe: a != b, b == c, c == d, d == e */ + .pnum = UINT64_C(0x0002000300040001), + .idx = 1, + .lpv = 0, + }, + { + /* 0xf: a == b, b == c, c == d, d == e */ + .pnum = UINT64_C(0x0002000300040005), + .idx = 0, + .lpv = 4, + }, + }; + + union { + uint16_t u16[FWDSTEP + 1]; + uint64_t u64; + } *pnum = (void *)pn; + + int32_t v; + + dp1 = _mm_cmpeq_epi16(dp1, dp2); + dp1 = _mm_unpacklo_epi16(dp1, dp1); + v = _mm_movemask_ps((__m128)dp1); + + /* update last port counter. */ + lp[0] += gptbl[v].lpv; + + /* if dest port value has changed. */ + if (v != GRPMSK) { + pnum->u64 = gptbl[v].pnum; + pnum->u16[FWDSTEP] = 1; + lp = pnum->u16 + gptbl[v].idx; + } + + return lp; +} + +/** + * Process one packet: + * Update source and destination MAC addresses in the ethernet header. + * Perform RFC1812 checks and updates for IPV4 packets. + */ +static inline void +process_packet(struct rte_mbuf *pkt, uint16_t *dst_port) +{ + struct ether_hdr *eth_hdr; + __m128i te, ve; + + eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *); + + te = _mm_loadu_si128((__m128i *)eth_hdr); + ve = val_eth[dst_port[0]]; + + rfc1812_process((struct ipv4_hdr *)(eth_hdr + 1), dst_port, + pkt->packet_type); + + te = _mm_blend_epi16(te, ve, MASK_ETH); + _mm_storeu_si128((__m128i *)eth_hdr, te); +} + +static inline __attribute__((always_inline)) void +send_packetsx4(struct lcore_conf *qconf, uint8_t port, struct rte_mbuf *m[], + uint32_t num) +{ + uint32_t len, j, n; + + len = qconf->tx_mbufs[port].len; + + /* + * If TX buffer for that queue is empty, and we have enough packets, + * then send them straightway. + */ + if (num >= MAX_TX_BURST && len == 0) { + n = rte_eth_tx_burst(port, qconf->tx_queue_id[port], m, num); + if (unlikely(n < num)) { + do { + rte_pktmbuf_free(m[n]); + } while (++n < num); + } + return; + } + + /* + * Put packets into TX buffer for that queue. + */ + + n = len + num; + n = (n > MAX_PKT_BURST) ? MAX_PKT_BURST - len : num; + + j = 0; + switch (n % FWDSTEP) { + while (j < n) { + case 0: + qconf->tx_mbufs[port].m_table[len + j] = m[j]; + j++; + case 3: + qconf->tx_mbufs[port].m_table[len + j] = m[j]; + j++; + case 2: + qconf->tx_mbufs[port].m_table[len + j] = m[j]; + j++; + case 1: + qconf->tx_mbufs[port].m_table[len + j] = m[j]; + j++; + } + } + + len += n; + + /* enough pkts to be sent */ + if (unlikely(len == MAX_PKT_BURST)) { + + send_burst(qconf, MAX_PKT_BURST, port); + + /* copy rest of the packets into the TX buffer. */ + len = num - n; + j = 0; + switch (len % FWDSTEP) { + while (j < len) { + case 0: + qconf->tx_mbufs[port].m_table[j] = m[n + j]; + j++; + case 3: + qconf->tx_mbufs[port].m_table[j] = m[n + j]; + j++; + case 2: + qconf->tx_mbufs[port].m_table[j] = m[n + j]; + j++; + case 1: + qconf->tx_mbufs[port].m_table[j] = m[n + j]; + j++; + } + } + } + + qconf->tx_mbufs[port].len = len; +} + +/** + * Send packets burst from pkts_burst to the ports in dst_port array + */ +static inline __attribute__((always_inline)) void +send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst, + uint16_t dst_port[MAX_PKT_BURST], int nb_rx) +{ + int32_t k; + int j = 0; + uint16_t dlp; + uint16_t *lp; + uint16_t pnum[MAX_PKT_BURST + 1]; + + /* + * Finish packet processing and group consecutive + * packets with the same destination port. + */ + k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP); + if (k != 0) { + __m128i dp1, dp2; + + lp = pnum; + lp[0] = 1; + + processx4_step3(pkts_burst, dst_port); + + /* dp1: <d[0], d[1], d[2], d[3], ... > */ + dp1 = _mm_loadu_si128((__m128i *)dst_port); + + for (j = FWDSTEP; j != k; j += FWDSTEP) { + processx4_step3(&pkts_burst[j], &dst_port[j]); + + /* + * dp2: + * <d[j-3], d[j-2], d[j-1], d[j], ... > + */ + dp2 = _mm_loadu_si128((__m128i *) + &dst_port[j - FWDSTEP + 1]); + lp = port_groupx4(&pnum[j - FWDSTEP], lp, dp1, dp2); + + /* + * dp1: + * <d[j], d[j+1], d[j+2], d[j+3], ... > + */ + dp1 = _mm_srli_si128(dp2, (FWDSTEP - 1) * + sizeof(dst_port[0])); + } + + /* + * dp2: <d[j-3], d[j-2], d[j-1], d[j-1], ... > + */ + dp2 = _mm_shufflelo_epi16(dp1, 0xf9); + lp = port_groupx4(&pnum[j - FWDSTEP], lp, dp1, dp2); + + /* + * remove values added by the last repeated + * dst port. + */ + lp[0]--; + dlp = dst_port[j - 1]; + } else { + /* set dlp and lp to the never used values. */ + dlp = BAD_PORT - 1; + lp = pnum + MAX_PKT_BURST; + } + + /* Process up to last 3 packets one by one. */ + switch (nb_rx % FWDSTEP) { + case 3: + process_packet(pkts_burst[j], dst_port + j); + GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j); + j++; + case 2: + process_packet(pkts_burst[j], dst_port + j); + GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j); + j++; + case 1: + process_packet(pkts_burst[j], dst_port + j); + GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j); + j++; + } + + /* + * Send packets out, through destination port. + * Consecutive packets with the same destination port + * are already grouped together. + * If destination port for the packet equals BAD_PORT, + * then free the packet without sending it out. + */ + for (j = 0; j < nb_rx; j += k) { + + int32_t m; + uint16_t pn; + + pn = dst_port[j]; + k = pnum[j]; + + if (likely(pn != BAD_PORT)) + send_packetsx4(qconf, pn, pkts_burst + j, k); + else + for (m = j; m != j + k; m++) + rte_pktmbuf_free(pkts_burst[m]); + + } +} + +#endif /* _L3FWD_COMMON_H_ */ diff --git a/examples/l3fwd/main.c b/examples/l3fwd/main.c new file mode 100644 index 00000000..bf6d8856 --- /dev/null +++ b/examples/l3fwd/main.c @@ -0,0 +1,1055 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <inttypes.h> +#include <sys/types.h> +#include <string.h> +#include <sys/queue.h> +#include <stdarg.h> +#include <errno.h> +#include <getopt.h> +#include <signal.h> +#include <stdbool.h> + +#include <rte_common.h> +#include <rte_vect.h> +#include <rte_byteorder.h> +#include <rte_log.h> +#include <rte_memory.h> +#include <rte_memcpy.h> +#include <rte_memzone.h> +#include <rte_eal.h> +#include <rte_per_lcore.h> +#include <rte_launch.h> +#include <rte_atomic.h> +#include <rte_cycles.h> +#include <rte_prefetch.h> +#include <rte_lcore.h> +#include <rte_per_lcore.h> +#include <rte_branch_prediction.h> +#include <rte_interrupts.h> +#include <rte_pci.h> +#include <rte_random.h> +#include <rte_debug.h> +#include <rte_ether.h> +#include <rte_ethdev.h> +#include <rte_ring.h> +#include <rte_mempool.h> +#include <rte_mbuf.h> +#include <rte_ip.h> +#include <rte_tcp.h> +#include <rte_udp.h> +#include <rte_string_fns.h> +#include <rte_cpuflags.h> + +#include <cmdline_parse.h> +#include <cmdline_parse_etheraddr.h> + +#include "l3fwd.h" + +/* + * Configurable number of RX/TX ring descriptors + */ +#define RTE_TEST_RX_DESC_DEFAULT 128 +#define RTE_TEST_TX_DESC_DEFAULT 512 + +#define MAX_TX_QUEUE_PER_PORT RTE_MAX_ETHPORTS +#define MAX_RX_QUEUE_PER_PORT 128 + +#define MAX_LCORE_PARAMS 1024 + +/* Static global variables used within this file. */ +static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; +static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; + +/**< Ports set in promiscuous mode off by default. */ +static int promiscuous_on; + +/* Select Longest-Prefix or Exact match. */ +static int l3fwd_lpm_on; +static int l3fwd_em_on; + +static int numa_on = 1; /**< NUMA is enabled by default. */ +static int parse_ptype; /**< Parse packet type using rx callback, and */ + /**< disabled by default */ + +/* Global variables. */ + +volatile bool force_quit; + +/* ethernet addresses of ports */ +uint64_t dest_eth_addr[RTE_MAX_ETHPORTS]; +struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; + +xmm_t val_eth[RTE_MAX_ETHPORTS]; + +/* mask of enabled ports */ +uint32_t enabled_port_mask; + +/* Used only in exact match mode. */ +int ipv6; /**< ipv6 is false by default. */ +uint32_t hash_entry_number = HASH_ENTRY_NUMBER_DEFAULT; + +struct lcore_conf lcore_conf[RTE_MAX_LCORE]; + +struct lcore_params { + uint8_t port_id; + uint8_t queue_id; + uint8_t lcore_id; +} __rte_cache_aligned; + +static struct lcore_params lcore_params_array[MAX_LCORE_PARAMS]; +static struct lcore_params lcore_params_array_default[] = { + {0, 0, 2}, + {0, 1, 2}, + {0, 2, 2}, + {1, 0, 2}, + {1, 1, 2}, + {1, 2, 2}, + {2, 0, 2}, + {3, 0, 3}, + {3, 1, 3}, +}; + +static struct lcore_params * lcore_params = lcore_params_array_default; +static uint16_t nb_lcore_params = sizeof(lcore_params_array_default) / + sizeof(lcore_params_array_default[0]); + +static struct rte_eth_conf port_conf = { + .rxmode = { + .mq_mode = ETH_MQ_RX_RSS, + .max_rx_pkt_len = ETHER_MAX_LEN, + .split_hdr_size = 0, + .header_split = 0, /**< Header Split disabled */ + .hw_ip_checksum = 1, /**< IP checksum offload enabled */ + .hw_vlan_filter = 0, /**< VLAN filtering disabled */ + .jumbo_frame = 0, /**< Jumbo Frame Support disabled */ + .hw_strip_crc = 0, /**< CRC stripped by hardware */ + }, + .rx_adv_conf = { + .rss_conf = { + .rss_key = NULL, + .rss_hf = ETH_RSS_IP, + }, + }, + .txmode = { + .mq_mode = ETH_MQ_TX_NONE, + }, +}; + +static struct rte_mempool * pktmbuf_pool[NB_SOCKETS]; + +struct l3fwd_lkp_mode { + void (*setup)(int); + int (*check_ptype)(int); + rte_rx_callback_fn cb_parse_ptype; + int (*main_loop)(void *); + void* (*get_ipv4_lookup_struct)(int); + void* (*get_ipv6_lookup_struct)(int); +}; + +static struct l3fwd_lkp_mode l3fwd_lkp; + +static struct l3fwd_lkp_mode l3fwd_em_lkp = { + .setup = setup_hash, + .check_ptype = em_check_ptype, + .cb_parse_ptype = em_cb_parse_ptype, + .main_loop = em_main_loop, + .get_ipv4_lookup_struct = em_get_ipv4_l3fwd_lookup_struct, + .get_ipv6_lookup_struct = em_get_ipv6_l3fwd_lookup_struct, +}; + +static struct l3fwd_lkp_mode l3fwd_lpm_lkp = { + .setup = setup_lpm, + .check_ptype = lpm_check_ptype, + .cb_parse_ptype = lpm_cb_parse_ptype, + .main_loop = lpm_main_loop, + .get_ipv4_lookup_struct = lpm_get_ipv4_l3fwd_lookup_struct, + .get_ipv6_lookup_struct = lpm_get_ipv6_l3fwd_lookup_struct, +}; + +/* + * Setup lookup methods for forwarding. + * Currently exact-match and longest-prefix-match + * are supported ones. + */ +static void +setup_l3fwd_lookup_tables(void) +{ + /* Setup HASH lookup functions. */ + if (l3fwd_em_on) + l3fwd_lkp = l3fwd_em_lkp; + /* Setup LPM lookup functions. */ + else + l3fwd_lkp = l3fwd_lpm_lkp; +} + +static int +check_lcore_params(void) +{ + uint8_t queue, lcore; + uint16_t i; + int socketid; + + for (i = 0; i < nb_lcore_params; ++i) { + queue = lcore_params[i].queue_id; + if (queue >= MAX_RX_QUEUE_PER_PORT) { + printf("invalid queue number: %hhu\n", queue); + return -1; + } + lcore = lcore_params[i].lcore_id; + if (!rte_lcore_is_enabled(lcore)) { + printf("error: lcore %hhu is not enabled in lcore mask\n", lcore); + return -1; + } + if ((socketid = rte_lcore_to_socket_id(lcore) != 0) && + (numa_on == 0)) { + printf("warning: lcore %hhu is on socket %d with numa off \n", + lcore, socketid); + } + } + return 0; +} + +static int +check_port_config(const unsigned nb_ports) +{ + unsigned portid; + uint16_t i; + + for (i = 0; i < nb_lcore_params; ++i) { + portid = lcore_params[i].port_id; + if ((enabled_port_mask & (1 << portid)) == 0) { + printf("port %u is not enabled in port mask\n", portid); + return -1; + } + if (portid >= nb_ports) { + printf("port %u is not present on the board\n", portid); + return -1; + } + } + return 0; +} + +static uint8_t +get_port_n_rx_queues(const uint8_t port) +{ + int queue = -1; + uint16_t i; + + for (i = 0; i < nb_lcore_params; ++i) { + if (lcore_params[i].port_id == port) { + if (lcore_params[i].queue_id == queue+1) + queue = lcore_params[i].queue_id; + else + rte_exit(EXIT_FAILURE, "queue ids of the port %d must be" + " in sequence and must start with 0\n", + lcore_params[i].port_id); + } + } + return (uint8_t)(++queue); +} + +static int +init_lcore_rx_queues(void) +{ + uint16_t i, nb_rx_queue; + uint8_t lcore; + + for (i = 0; i < nb_lcore_params; ++i) { + lcore = lcore_params[i].lcore_id; + nb_rx_queue = lcore_conf[lcore].n_rx_queue; + if (nb_rx_queue >= MAX_RX_QUEUE_PER_LCORE) { + printf("error: too many queues (%u) for lcore: %u\n", + (unsigned)nb_rx_queue + 1, (unsigned)lcore); + return -1; + } else { + lcore_conf[lcore].rx_queue_list[nb_rx_queue].port_id = + lcore_params[i].port_id; + lcore_conf[lcore].rx_queue_list[nb_rx_queue].queue_id = + lcore_params[i].queue_id; + lcore_conf[lcore].n_rx_queue++; + } + } + return 0; +} + +/* display usage */ +static void +print_usage(const char *prgname) +{ + printf ("%s [EAL options] -- -p PORTMASK -P" + " [--config (port,queue,lcore)[,(port,queue,lcore]]" + " [--enable-jumbo [--max-pkt-len PKTLEN]]\n" + " -p PORTMASK: hexadecimal bitmask of ports to configure\n" + " -P : enable promiscuous mode\n" + " -E : enable exact match\n" + " -L : enable longest prefix match\n" + " --config (port,queue,lcore): rx queues configuration\n" + " --eth-dest=X,MM:MM:MM:MM:MM:MM: optional, ethernet destination for port X\n" + " --no-numa: optional, disable numa awareness\n" + " --ipv6: optional, specify it if running ipv6 packets\n" + " --enable-jumbo: enable jumbo frame" + " which max packet len is PKTLEN in decimal (64-9600)\n" + " --hash-entry-num: specify the hash entry number in hexadecimal to be setup\n", + prgname); +} + +static int +parse_max_pkt_len(const char *pktlen) +{ + char *end = NULL; + unsigned long len; + + /* parse decimal string */ + len = strtoul(pktlen, &end, 10); + if ((pktlen[0] == '\0') || (end == NULL) || (*end != '\0')) + return -1; + + if (len == 0) + return -1; + + return len; +} + +static int +parse_portmask(const char *portmask) +{ + char *end = NULL; + unsigned long pm; + + /* parse hexadecimal string */ + pm = strtoul(portmask, &end, 16); + if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0')) + return -1; + + if (pm == 0) + return -1; + + return pm; +} + +static int +parse_hash_entry_number(const char *hash_entry_num) +{ + char *end = NULL; + unsigned long hash_en; + /* parse hexadecimal string */ + hash_en = strtoul(hash_entry_num, &end, 16); + if ((hash_entry_num[0] == '\0') || (end == NULL) || (*end != '\0')) + return -1; + + if (hash_en == 0) + return -1; + + return hash_en; +} + +static int +parse_config(const char *q_arg) +{ + char s[256]; + const char *p, *p0 = q_arg; + char *end; + enum fieldnames { + FLD_PORT = 0, + FLD_QUEUE, + FLD_LCORE, + _NUM_FLD + }; + unsigned long int_fld[_NUM_FLD]; + char *str_fld[_NUM_FLD]; + int i; + unsigned size; + + nb_lcore_params = 0; + + while ((p = strchr(p0,'(')) != NULL) { + ++p; + if((p0 = strchr(p,')')) == NULL) + return -1; + + size = p0 - p; + if(size >= sizeof(s)) + return -1; + + snprintf(s, sizeof(s), "%.*s", size, p); + if (rte_strsplit(s, sizeof(s), str_fld, _NUM_FLD, ',') != _NUM_FLD) + return -1; + for (i = 0; i < _NUM_FLD; i++){ + errno = 0; + int_fld[i] = strtoul(str_fld[i], &end, 0); + if (errno != 0 || end == str_fld[i] || int_fld[i] > 255) + return -1; + } + if (nb_lcore_params >= MAX_LCORE_PARAMS) { + printf("exceeded max number of lcore params: %hu\n", + nb_lcore_params); + return -1; + } + lcore_params_array[nb_lcore_params].port_id = + (uint8_t)int_fld[FLD_PORT]; + lcore_params_array[nb_lcore_params].queue_id = + (uint8_t)int_fld[FLD_QUEUE]; + lcore_params_array[nb_lcore_params].lcore_id = + (uint8_t)int_fld[FLD_LCORE]; + ++nb_lcore_params; + } + lcore_params = lcore_params_array; + return 0; +} + +static void +parse_eth_dest(const char *optarg) +{ + uint8_t portid; + char *port_end; + uint8_t c, *dest, peer_addr[6]; + + errno = 0; + portid = strtoul(optarg, &port_end, 10); + if (errno != 0 || port_end == optarg || *port_end++ != ',') + rte_exit(EXIT_FAILURE, + "Invalid eth-dest: %s", optarg); + if (portid >= RTE_MAX_ETHPORTS) + rte_exit(EXIT_FAILURE, + "eth-dest: port %d >= RTE_MAX_ETHPORTS(%d)\n", + portid, RTE_MAX_ETHPORTS); + + if (cmdline_parse_etheraddr(NULL, port_end, + &peer_addr, sizeof(peer_addr)) < 0) + rte_exit(EXIT_FAILURE, + "Invalid ethernet address: %s\n", + port_end); + dest = (uint8_t *)&dest_eth_addr[portid]; + for (c = 0; c < 6; c++) + dest[c] = peer_addr[c]; + *(uint64_t *)(val_eth + portid) = dest_eth_addr[portid]; +} + +#define MAX_JUMBO_PKT_LEN 9600 +#define MEMPOOL_CACHE_SIZE 256 + +#define CMD_LINE_OPT_CONFIG "config" +#define CMD_LINE_OPT_ETH_DEST "eth-dest" +#define CMD_LINE_OPT_NO_NUMA "no-numa" +#define CMD_LINE_OPT_IPV6 "ipv6" +#define CMD_LINE_OPT_ENABLE_JUMBO "enable-jumbo" +#define CMD_LINE_OPT_HASH_ENTRY_NUM "hash-entry-num" +#define CMD_LINE_OPT_PARSE_PTYPE "parse-ptype" + +/* + * This expression is used to calculate the number of mbufs needed + * depending on user input, taking into account memory for rx and + * tx hardware rings, cache per lcore and mtable per port per lcore. + * RTE_MAX is used to ensure that NB_MBUF never goes below a minimum + * value of 8192 + */ +#define NB_MBUF RTE_MAX( \ + (nb_ports*nb_rx_queue*RTE_TEST_RX_DESC_DEFAULT + \ + nb_ports*nb_lcores*MAX_PKT_BURST + \ + nb_ports*n_tx_queue*RTE_TEST_TX_DESC_DEFAULT + \ + nb_lcores*MEMPOOL_CACHE_SIZE), \ + (unsigned)8192) + +/* Parse the argument given in the command line of the application */ +static int +parse_args(int argc, char **argv) +{ + int opt, ret; + char **argvopt; + int option_index; + char *prgname = argv[0]; + static struct option lgopts[] = { + {CMD_LINE_OPT_CONFIG, 1, 0, 0}, + {CMD_LINE_OPT_ETH_DEST, 1, 0, 0}, + {CMD_LINE_OPT_NO_NUMA, 0, 0, 0}, + {CMD_LINE_OPT_IPV6, 0, 0, 0}, + {CMD_LINE_OPT_ENABLE_JUMBO, 0, 0, 0}, + {CMD_LINE_OPT_HASH_ENTRY_NUM, 1, 0, 0}, + {CMD_LINE_OPT_PARSE_PTYPE, 0, 0, 0}, + {NULL, 0, 0, 0} + }; + + argvopt = argv; + + /* Error or normal output strings. */ + const char *str1 = "L3FWD: Invalid portmask"; + const char *str2 = "L3FWD: Promiscuous mode selected"; + const char *str3 = "L3FWD: Exact match selected"; + const char *str4 = "L3FWD: Longest-prefix match selected"; + const char *str5 = "L3FWD: Invalid config"; + const char *str6 = "L3FWD: NUMA is disabled"; + const char *str7 = "L3FWD: IPV6 is specified"; + const char *str8 = + "L3FWD: Jumbo frame is enabled - disabling simple TX path"; + const char *str9 = "L3FWD: Invalid packet length"; + const char *str10 = "L3FWD: Set jumbo frame max packet len to "; + const char *str11 = "L3FWD: Invalid hash entry number"; + const char *str12 = + "L3FWD: LPM and EM are mutually exclusive, select only one"; + const char *str13 = "L3FWD: LPM or EM none selected, default LPM on"; + + while ((opt = getopt_long(argc, argvopt, "p:PLE", + lgopts, &option_index)) != EOF) { + + switch (opt) { + /* portmask */ + case 'p': + enabled_port_mask = parse_portmask(optarg); + if (enabled_port_mask == 0) { + printf("%s\n", str1); + print_usage(prgname); + return -1; + } + break; + case 'P': + printf("%s\n", str2); + promiscuous_on = 1; + break; + + case 'E': + printf("%s\n", str3); + l3fwd_em_on = 1; + break; + + case 'L': + printf("%s\n", str4); + l3fwd_lpm_on = 1; + break; + + /* long options */ + case 0: + if (!strncmp(lgopts[option_index].name, + CMD_LINE_OPT_CONFIG, + sizeof(CMD_LINE_OPT_CONFIG))) { + + ret = parse_config(optarg); + if (ret) { + printf("%s\n", str5); + print_usage(prgname); + return -1; + } + } + + if (!strncmp(lgopts[option_index].name, + CMD_LINE_OPT_ETH_DEST, + sizeof(CMD_LINE_OPT_ETH_DEST))) { + parse_eth_dest(optarg); + } + + if (!strncmp(lgopts[option_index].name, + CMD_LINE_OPT_NO_NUMA, + sizeof(CMD_LINE_OPT_NO_NUMA))) { + printf("%s\n", str6); + numa_on = 0; + } + + if (!strncmp(lgopts[option_index].name, + CMD_LINE_OPT_IPV6, + sizeof(CMD_LINE_OPT_IPV6))) { + printf("%sn", str7); + ipv6 = 1; + } + + if (!strncmp(lgopts[option_index].name, + CMD_LINE_OPT_ENABLE_JUMBO, + sizeof(CMD_LINE_OPT_ENABLE_JUMBO))) { + struct option lenopts = { + "max-pkt-len", required_argument, 0, 0 + }; + + printf("%s\n", str8); + port_conf.rxmode.jumbo_frame = 1; + + /* + * if no max-pkt-len set, use the default + * value ETHER_MAX_LEN. + */ + if (0 == getopt_long(argc, argvopt, "", + &lenopts, &option_index)) { + ret = parse_max_pkt_len(optarg); + if ((ret < 64) || + (ret > MAX_JUMBO_PKT_LEN)) { + printf("%s\n", str9); + print_usage(prgname); + return -1; + } + port_conf.rxmode.max_rx_pkt_len = ret; + } + printf("%s %u\n", str10, + (unsigned int)port_conf.rxmode.max_rx_pkt_len); + } + + if (!strncmp(lgopts[option_index].name, + CMD_LINE_OPT_HASH_ENTRY_NUM, + sizeof(CMD_LINE_OPT_HASH_ENTRY_NUM))) { + + ret = parse_hash_entry_number(optarg); + if ((ret > 0) && (ret <= L3FWD_HASH_ENTRIES)) { + hash_entry_number = ret; + } else { + printf("%s\n", str11); + print_usage(prgname); + return -1; + } + } + + if (!strncmp(lgopts[option_index].name, + CMD_LINE_OPT_PARSE_PTYPE, + sizeof(CMD_LINE_OPT_PARSE_PTYPE))) { + printf("soft parse-ptype is enabled\n"); + parse_ptype = 1; + } + + break; + + default: + print_usage(prgname); + return -1; + } + } + + /* If both LPM and EM are selected, return error. */ + if (l3fwd_lpm_on && l3fwd_em_on) { + printf("%s\n", str12); + return -1; + } + + /* + * Nothing is selected, pick longest-prefix match + * as default match. + */ + if (!l3fwd_lpm_on && !l3fwd_em_on) { + l3fwd_lpm_on = 1; + printf("%s\n", str13); + } + + /* + * ipv6 and hash flags are valid only for + * exact macth, reset them to default for + * longest-prefix match. + */ + if (l3fwd_lpm_on) { + ipv6 = 0; + hash_entry_number = HASH_ENTRY_NUMBER_DEFAULT; + } + + if (optind >= 0) + argv[optind-1] = prgname; + + ret = optind-1; + optind = 0; /* reset getopt lib */ + return ret; +} + +static void +print_ethaddr(const char *name, const struct ether_addr *eth_addr) +{ + char buf[ETHER_ADDR_FMT_SIZE]; + ether_format_addr(buf, ETHER_ADDR_FMT_SIZE, eth_addr); + printf("%s%s", name, buf); +} + +static int +init_mem(unsigned nb_mbuf) +{ + struct lcore_conf *qconf; + int socketid; + unsigned lcore_id; + char s[64]; + + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { + if (rte_lcore_is_enabled(lcore_id) == 0) + continue; + + if (numa_on) + socketid = rte_lcore_to_socket_id(lcore_id); + else + socketid = 0; + + if (socketid >= NB_SOCKETS) { + rte_exit(EXIT_FAILURE, + "Socket %d of lcore %u is out of range %d\n", + socketid, lcore_id, NB_SOCKETS); + } + + if (pktmbuf_pool[socketid] == NULL) { + snprintf(s, sizeof(s), "mbuf_pool_%d", socketid); + pktmbuf_pool[socketid] = + rte_pktmbuf_pool_create(s, nb_mbuf, + MEMPOOL_CACHE_SIZE, 0, + RTE_MBUF_DEFAULT_BUF_SIZE, socketid); + if (pktmbuf_pool[socketid] == NULL) + rte_exit(EXIT_FAILURE, + "Cannot init mbuf pool on socket %d\n", + socketid); + else + printf("Allocated mbuf pool on socket %d\n", + socketid); + + /* Setup either LPM or EM(f.e Hash). */ + l3fwd_lkp.setup(socketid); + } + qconf = &lcore_conf[lcore_id]; + qconf->ipv4_lookup_struct = + l3fwd_lkp.get_ipv4_lookup_struct(socketid); + qconf->ipv6_lookup_struct = + l3fwd_lkp.get_ipv6_lookup_struct(socketid); + } + return 0; +} + +/* Check the link status of all ports in up to 9s, and print them finally */ +static void +check_all_ports_link_status(uint8_t port_num, uint32_t port_mask) +{ +#define CHECK_INTERVAL 100 /* 100ms */ +#define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */ + uint8_t portid, count, all_ports_up, print_flag = 0; + struct rte_eth_link link; + + printf("\nChecking link status"); + fflush(stdout); + for (count = 0; count <= MAX_CHECK_TIME; count++) { + if (force_quit) + return; + all_ports_up = 1; + for (portid = 0; portid < port_num; portid++) { + if (force_quit) + return; + if ((port_mask & (1 << portid)) == 0) + continue; + memset(&link, 0, sizeof(link)); + rte_eth_link_get_nowait(portid, &link); + /* print link status if flag set */ + if (print_flag == 1) { + if (link.link_status) + printf("Port %d Link Up - speed %u " + "Mbps - %s\n", (uint8_t)portid, + (unsigned)link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? + ("full-duplex") : ("half-duplex\n")); + else + printf("Port %d Link Down\n", + (uint8_t)portid); + continue; + } + /* clear all_ports_up flag if any link down */ + if (link.link_status == ETH_LINK_DOWN) { + all_ports_up = 0; + break; + } + } + /* after finally printing all link status, get out */ + if (print_flag == 1) + break; + + if (all_ports_up == 0) { + printf("."); + fflush(stdout); + rte_delay_ms(CHECK_INTERVAL); + } + + /* set the print_flag if all ports up or timeout */ + if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) { + print_flag = 1; + printf("done\n"); + } + } +} + +static void +signal_handler(int signum) +{ + if (signum == SIGINT || signum == SIGTERM) { + printf("\n\nSignal %d received, preparing to exit...\n", + signum); + force_quit = true; + } +} + +static int +prepare_ptype_parser(uint8_t portid, uint16_t queueid) +{ + if (parse_ptype) { + printf("Port %d: softly parse packet type info\n", portid); + if (rte_eth_add_rx_callback(portid, queueid, + l3fwd_lkp.cb_parse_ptype, + NULL)) + return 1; + + printf("Failed to add rx callback: port=%d\n", portid); + return 0; + } + + if (l3fwd_lkp.check_ptype(portid)) + return 1; + + printf("port %d cannot parse packet type, please add --%s\n", + portid, CMD_LINE_OPT_PARSE_PTYPE); + return 0; +} + +int +main(int argc, char **argv) +{ + struct lcore_conf *qconf; + struct rte_eth_dev_info dev_info; + struct rte_eth_txconf *txconf; + int ret; + unsigned nb_ports; + uint16_t queueid; + unsigned lcore_id; + uint32_t n_tx_queue, nb_lcores; + uint8_t portid, nb_rx_queue, queue, socketid; + + /* init EAL */ + ret = rte_eal_init(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Invalid EAL parameters\n"); + argc -= ret; + argv += ret; + + force_quit = false; + signal(SIGINT, signal_handler); + signal(SIGTERM, signal_handler); + + /* pre-init dst MACs for all ports to 02:00:00:00:00:xx */ + for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) { + dest_eth_addr[portid] = + ETHER_LOCAL_ADMIN_ADDR + ((uint64_t)portid << 40); + *(uint64_t *)(val_eth + portid) = dest_eth_addr[portid]; + } + + /* parse application arguments (after the EAL ones) */ + ret = parse_args(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Invalid L3FWD parameters\n"); + + if (check_lcore_params() < 0) + rte_exit(EXIT_FAILURE, "check_lcore_params failed\n"); + + ret = init_lcore_rx_queues(); + if (ret < 0) + rte_exit(EXIT_FAILURE, "init_lcore_rx_queues failed\n"); + + nb_ports = rte_eth_dev_count(); + if (nb_ports > RTE_MAX_ETHPORTS) + nb_ports = RTE_MAX_ETHPORTS; + + if (check_port_config(nb_ports) < 0) + rte_exit(EXIT_FAILURE, "check_port_config failed\n"); + + nb_lcores = rte_lcore_count(); + + /* Setup function pointers for lookup method. */ + setup_l3fwd_lookup_tables(); + + /* initialize all ports */ + for (portid = 0; portid < nb_ports; portid++) { + /* skip ports that are not enabled */ + if ((enabled_port_mask & (1 << portid)) == 0) { + printf("\nSkipping disabled port %d\n", portid); + continue; + } + + /* init port */ + printf("Initializing port %d ... ", portid ); + fflush(stdout); + + nb_rx_queue = get_port_n_rx_queues(portid); + n_tx_queue = nb_lcores; + if (n_tx_queue > MAX_TX_QUEUE_PER_PORT) + n_tx_queue = MAX_TX_QUEUE_PER_PORT; + printf("Creating queues: nb_rxq=%d nb_txq=%u... ", + nb_rx_queue, (unsigned)n_tx_queue ); + ret = rte_eth_dev_configure(portid, nb_rx_queue, + (uint16_t)n_tx_queue, &port_conf); + if (ret < 0) + rte_exit(EXIT_FAILURE, + "Cannot configure device: err=%d, port=%d\n", + ret, portid); + + rte_eth_macaddr_get(portid, &ports_eth_addr[portid]); + print_ethaddr(" Address:", &ports_eth_addr[portid]); + printf(", "); + print_ethaddr("Destination:", + (const struct ether_addr *)&dest_eth_addr[portid]); + printf(", "); + + /* + * prepare src MACs for each port. + */ + ether_addr_copy(&ports_eth_addr[portid], + (struct ether_addr *)(val_eth + portid) + 1); + + /* init memory */ + ret = init_mem(NB_MBUF); + if (ret < 0) + rte_exit(EXIT_FAILURE, "init_mem failed\n"); + + /* init one TX queue per couple (lcore,port) */ + queueid = 0; + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { + if (rte_lcore_is_enabled(lcore_id) == 0) + continue; + + if (numa_on) + socketid = + (uint8_t)rte_lcore_to_socket_id(lcore_id); + else + socketid = 0; + + printf("txq=%u,%d,%d ", lcore_id, queueid, socketid); + fflush(stdout); + + rte_eth_dev_info_get(portid, &dev_info); + txconf = &dev_info.default_txconf; + if (port_conf.rxmode.jumbo_frame) + txconf->txq_flags = 0; + ret = rte_eth_tx_queue_setup(portid, queueid, nb_txd, + socketid, txconf); + if (ret < 0) + rte_exit(EXIT_FAILURE, + "rte_eth_tx_queue_setup: err=%d, " + "port=%d\n", ret, portid); + + qconf = &lcore_conf[lcore_id]; + qconf->tx_queue_id[portid] = queueid; + queueid++; + + qconf->tx_port_id[qconf->n_tx_port] = portid; + qconf->n_tx_port++; + } + printf("\n"); + } + + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { + if (rte_lcore_is_enabled(lcore_id) == 0) + continue; + qconf = &lcore_conf[lcore_id]; + printf("\nInitializing rx queues on lcore %u ... ", lcore_id ); + fflush(stdout); + /* init RX queues */ + for(queue = 0; queue < qconf->n_rx_queue; ++queue) { + portid = qconf->rx_queue_list[queue].port_id; + queueid = qconf->rx_queue_list[queue].queue_id; + + if (numa_on) + socketid = + (uint8_t)rte_lcore_to_socket_id(lcore_id); + else + socketid = 0; + + printf("rxq=%d,%d,%d ", portid, queueid, socketid); + fflush(stdout); + + ret = rte_eth_rx_queue_setup(portid, queueid, nb_rxd, + socketid, + NULL, + pktmbuf_pool[socketid]); + if (ret < 0) + rte_exit(EXIT_FAILURE, + "rte_eth_rx_queue_setup: err=%d, port=%d\n", + ret, portid); + } + } + + printf("\n"); + + /* start ports */ + for (portid = 0; portid < nb_ports; portid++) { + if ((enabled_port_mask & (1 << portid)) == 0) { + continue; + } + /* Start device */ + ret = rte_eth_dev_start(portid); + if (ret < 0) + rte_exit(EXIT_FAILURE, + "rte_eth_dev_start: err=%d, port=%d\n", + ret, portid); + + /* + * If enabled, put device in promiscuous mode. + * This allows IO forwarding mode to forward packets + * to itself through 2 cross-connected ports of the + * target machine. + */ + if (promiscuous_on) + rte_eth_promiscuous_enable(portid); + } + + printf("\n"); + + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { + if (rte_lcore_is_enabled(lcore_id) == 0) + continue; + qconf = &lcore_conf[lcore_id]; + for (queue = 0; queue < qconf->n_rx_queue; ++queue) { + portid = qconf->rx_queue_list[queue].port_id; + queueid = qconf->rx_queue_list[queue].queue_id; + if (prepare_ptype_parser(portid, queueid) == 0) + rte_exit(EXIT_FAILURE, "ptype check fails\n"); + } + } + + + check_all_ports_link_status((uint8_t)nb_ports, enabled_port_mask); + + ret = 0; + /* launch per-lcore init on every lcore */ + rte_eal_mp_remote_launch(l3fwd_lkp.main_loop, NULL, CALL_MASTER); + RTE_LCORE_FOREACH_SLAVE(lcore_id) { + if (rte_eal_wait_lcore(lcore_id) < 0) { + ret = -1; + break; + } + } + + /* stop ports */ + for (portid = 0; portid < nb_ports; portid++) { + if ((enabled_port_mask & (1 << portid)) == 0) + continue; + printf("Closing port %d...", portid); + rte_eth_dev_stop(portid); + rte_eth_dev_close(portid); + printf(" Done\n"); + } + printf("Bye...\n"); + + return ret; +} diff --git a/examples/link_status_interrupt/Makefile b/examples/link_status_interrupt/Makefile new file mode 100644 index 00000000..9ecc7fc4 --- /dev/null +++ b/examples/link_status_interrupt/Makefile @@ -0,0 +1,50 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overriden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +# binary name +APP = link_status_interrupt + +# all source are stored in SRCS-y +SRCS-y := main.c + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) + +include $(RTE_SDK)/mk/rte.extapp.mk diff --git a/examples/link_status_interrupt/main.c b/examples/link_status_interrupt/main.c new file mode 100644 index 00000000..99815989 --- /dev/null +++ b/examples/link_status_interrupt/main.c @@ -0,0 +1,732 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <inttypes.h> +#include <sys/types.h> +#include <string.h> +#include <sys/queue.h> +#include <netinet/in.h> +#include <setjmp.h> +#include <stdarg.h> +#include <ctype.h> +#include <errno.h> +#include <getopt.h> + +#include <rte_common.h> +#include <rte_log.h> +#include <rte_malloc.h> +#include <rte_memory.h> +#include <rte_memcpy.h> +#include <rte_memzone.h> +#include <rte_eal.h> +#include <rte_per_lcore.h> +#include <rte_launch.h> +#include <rte_atomic.h> +#include <rte_cycles.h> +#include <rte_prefetch.h> +#include <rte_lcore.h> +#include <rte_per_lcore.h> +#include <rte_branch_prediction.h> +#include <rte_interrupts.h> +#include <rte_pci.h> +#include <rte_random.h> +#include <rte_debug.h> +#include <rte_ether.h> +#include <rte_ethdev.h> +#include <rte_ring.h> +#include <rte_mempool.h> +#include <rte_mbuf.h> + +#define RTE_LOGTYPE_LSI RTE_LOGTYPE_USER1 + +#define NB_MBUF 8192 + +#define MAX_PKT_BURST 32 +#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */ + +/* + * Configurable number of RX/TX ring descriptors + */ +#define RTE_TEST_RX_DESC_DEFAULT 128 +#define RTE_TEST_TX_DESC_DEFAULT 512 +static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; +static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; + +/* ethernet addresses of ports */ +static struct ether_addr lsi_ports_eth_addr[RTE_MAX_ETHPORTS]; + +/* mask of enabled ports */ +static uint32_t lsi_enabled_port_mask = 0; + +static unsigned int lsi_rx_queue_per_lcore = 1; + +/* destination port for L2 forwarding */ +static unsigned lsi_dst_ports[RTE_MAX_ETHPORTS] = {0}; + +#define MAX_PKT_BURST 32 + +#define MAX_RX_QUEUE_PER_LCORE 16 +#define MAX_TX_QUEUE_PER_PORT 16 +struct lcore_queue_conf { + unsigned n_rx_port; + unsigned rx_port_list[MAX_RX_QUEUE_PER_LCORE]; + unsigned tx_queue_id; +} __rte_cache_aligned; +struct lcore_queue_conf lcore_queue_conf[RTE_MAX_LCORE]; + +struct rte_eth_dev_tx_buffer *tx_buffer[RTE_MAX_ETHPORTS]; + +static const struct rte_eth_conf port_conf = { + .rxmode = { + .split_hdr_size = 0, + .header_split = 0, /**< Header Split disabled */ + .hw_ip_checksum = 0, /**< IP checksum offload disabled */ + .hw_vlan_filter = 0, /**< VLAN filtering disabled */ + .jumbo_frame = 0, /**< Jumbo Frame Support disabled */ + .hw_strip_crc = 0, /**< CRC stripped by hardware */ + }, + .txmode = { + .mq_mode = ETH_MQ_TX_NONE, + }, + .intr_conf = { + .lsc = 1, /**< lsc interrupt feature enabled */ + }, +}; + +struct rte_mempool * lsi_pktmbuf_pool = NULL; + +/* Per-port statistics struct */ +struct lsi_port_statistics { + uint64_t tx; + uint64_t rx; + uint64_t dropped; +} __rte_cache_aligned; +struct lsi_port_statistics port_statistics[RTE_MAX_ETHPORTS]; + +/* A tsc-based timer responsible for triggering statistics printout */ +#define TIMER_MILLISECOND 2000000ULL /* around 1ms at 2 Ghz */ +#define MAX_TIMER_PERIOD 86400 /* 1 day max */ +static int64_t timer_period = 10 * TIMER_MILLISECOND * 1000; /* default period is 10 seconds */ + +/* Print out statistics on packets dropped */ +static void +print_stats(void) +{ + struct rte_eth_link link; + uint64_t total_packets_dropped, total_packets_tx, total_packets_rx; + unsigned portid; + + total_packets_dropped = 0; + total_packets_tx = 0; + total_packets_rx = 0; + + const char clr[] = { 27, '[', '2', 'J', '\0' }; + const char topLeft[] = { 27, '[', '1', ';', '1', 'H','\0' }; + + /* Clear screen and move to top left */ + printf("%s%s", clr, topLeft); + + printf("\nPort statistics ===================================="); + + for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) { + /* skip ports that are not enabled */ + if ((lsi_enabled_port_mask & (1 << portid)) == 0) + continue; + + memset(&link, 0, sizeof(link)); + rte_eth_link_get_nowait((uint8_t)portid, &link); + printf("\nStatistics for port %u ------------------------------" + "\nLink status: %25s" + "\nLink speed: %26u" + "\nLink duplex: %25s" + "\nPackets sent: %24"PRIu64 + "\nPackets received: %20"PRIu64 + "\nPackets dropped: %21"PRIu64, + portid, + (link.link_status ? "Link up" : "Link down"), + (unsigned)link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX ? \ + "full-duplex" : "half-duplex"), + port_statistics[portid].tx, + port_statistics[portid].rx, + port_statistics[portid].dropped); + + total_packets_dropped += port_statistics[portid].dropped; + total_packets_tx += port_statistics[portid].tx; + total_packets_rx += port_statistics[portid].rx; + } + printf("\nAggregate statistics ===============================" + "\nTotal packets sent: %18"PRIu64 + "\nTotal packets received: %14"PRIu64 + "\nTotal packets dropped: %15"PRIu64, + total_packets_tx, + total_packets_rx, + total_packets_dropped); + printf("\n====================================================\n"); +} + +static void +lsi_simple_forward(struct rte_mbuf *m, unsigned portid) +{ + struct ether_hdr *eth; + void *tmp; + unsigned dst_port = lsi_dst_ports[portid]; + int sent; + struct rte_eth_dev_tx_buffer *buffer; + + eth = rte_pktmbuf_mtod(m, struct ether_hdr *); + + /* 02:00:00:00:00:xx */ + tmp = ð->d_addr.addr_bytes[0]; + *((uint64_t *)tmp) = 0x000000000002 + ((uint64_t)dst_port << 40); + + /* src addr */ + ether_addr_copy(&lsi_ports_eth_addr[dst_port], ð->s_addr); + + buffer = tx_buffer[dst_port]; + sent = rte_eth_tx_buffer(dst_port, 0, buffer, m); + if (sent) + port_statistics[dst_port].tx += sent; +} + +/* main processing loop */ +static void +lsi_main_loop(void) +{ + struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; + struct rte_mbuf *m; + unsigned lcore_id; + unsigned sent; + uint64_t prev_tsc, diff_tsc, cur_tsc, timer_tsc; + unsigned i, j, portid, nb_rx; + struct lcore_queue_conf *qconf; + const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * + BURST_TX_DRAIN_US; + struct rte_eth_dev_tx_buffer *buffer; + + prev_tsc = 0; + timer_tsc = 0; + + lcore_id = rte_lcore_id(); + qconf = &lcore_queue_conf[lcore_id]; + + if (qconf->n_rx_port == 0) { + RTE_LOG(INFO, LSI, "lcore %u has nothing to do\n", lcore_id); + return; + } + + RTE_LOG(INFO, LSI, "entering main loop on lcore %u\n", lcore_id); + + for (i = 0; i < qconf->n_rx_port; i++) { + + portid = qconf->rx_port_list[i]; + RTE_LOG(INFO, LSI, " -- lcoreid=%u portid=%u\n", lcore_id, + portid); + } + + while (1) { + + cur_tsc = rte_rdtsc(); + + /* + * TX burst queue drain + */ + diff_tsc = cur_tsc - prev_tsc; + if (unlikely(diff_tsc > drain_tsc)) { + + for (i = 0; i < qconf->n_rx_port; i++) { + + portid = lsi_dst_ports[qconf->rx_port_list[i]]; + buffer = tx_buffer[portid]; + + sent = rte_eth_tx_buffer_flush(portid, 0, buffer); + if (sent) + port_statistics[portid].tx += sent; + + } + + /* if timer is enabled */ + if (timer_period > 0) { + + /* advance the timer */ + timer_tsc += diff_tsc; + + /* if timer has reached its timeout */ + if (unlikely(timer_tsc >= (uint64_t) timer_period)) { + + /* do this only on master core */ + if (lcore_id == rte_get_master_lcore()) { + print_stats(); + /* reset the timer */ + timer_tsc = 0; + } + } + } + + prev_tsc = cur_tsc; + } + + /* + * Read packet from RX queues + */ + for (i = 0; i < qconf->n_rx_port; i++) { + + portid = qconf->rx_port_list[i]; + nb_rx = rte_eth_rx_burst((uint8_t) portid, 0, + pkts_burst, MAX_PKT_BURST); + + port_statistics[portid].rx += nb_rx; + + for (j = 0; j < nb_rx; j++) { + m = pkts_burst[j]; + rte_prefetch0(rte_pktmbuf_mtod(m, void *)); + lsi_simple_forward(m, portid); + } + } + } +} + +static int +lsi_launch_one_lcore(__attribute__((unused)) void *dummy) +{ + lsi_main_loop(); + return 0; +} + +/* display usage */ +static void +lsi_usage(const char *prgname) +{ + printf("%s [EAL options] -- -p PORTMASK [-q NQ]\n" + " -p PORTMASK: hexadecimal bitmask of ports to configure\n" + " -q NQ: number of queue (=ports) per lcore (default is 1)\n" + " -T PERIOD: statistics will be refreshed each PERIOD seconds (0 to disable, 10 default, 86400 maximum)\n", + prgname); +} + +static int +lsi_parse_portmask(const char *portmask) +{ + char *end = NULL; + unsigned long pm; + + /* parse hexadecimal string */ + pm = strtoul(portmask, &end, 16); + if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0')) + return -1; + + if (pm == 0) + return -1; + + return pm; +} + +static unsigned int +lsi_parse_nqueue(const char *q_arg) +{ + char *end = NULL; + unsigned long n; + + /* parse hexadecimal string */ + n = strtoul(q_arg, &end, 10); + if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0')) + return 0; + if (n == 0) + return 0; + if (n >= MAX_RX_QUEUE_PER_LCORE) + return 0; + + return n; +} + +static int +lsi_parse_timer_period(const char *q_arg) +{ + char *end = NULL; + int n; + + /* parse number string */ + n = strtol(q_arg, &end, 10); + if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0')) + return -1; + if (n >= MAX_TIMER_PERIOD) + return -1; + + return n; +} + +/* Parse the argument given in the command line of the application */ +static int +lsi_parse_args(int argc, char **argv) +{ + int opt, ret; + char **argvopt; + int option_index; + char *prgname = argv[0]; + static struct option lgopts[] = { + {NULL, 0, 0, 0} + }; + + argvopt = argv; + + while ((opt = getopt_long(argc, argvopt, "p:q:T:", + lgopts, &option_index)) != EOF) { + + switch (opt) { + /* portmask */ + case 'p': + lsi_enabled_port_mask = lsi_parse_portmask(optarg); + if (lsi_enabled_port_mask == 0) { + printf("invalid portmask\n"); + lsi_usage(prgname); + return -1; + } + break; + + /* nqueue */ + case 'q': + lsi_rx_queue_per_lcore = lsi_parse_nqueue(optarg); + if (lsi_rx_queue_per_lcore == 0) { + printf("invalid queue number\n"); + lsi_usage(prgname); + return -1; + } + break; + + /* timer period */ + case 'T': + timer_period = lsi_parse_timer_period(optarg) * 1000 * TIMER_MILLISECOND; + if (timer_period < 0) { + printf("invalid timer period\n"); + lsi_usage(prgname); + return -1; + } + break; + + /* long options */ + case 0: + lsi_usage(prgname); + return -1; + + default: + lsi_usage(prgname); + return -1; + } + } + + if (optind >= 0) + argv[optind-1] = prgname; + + ret = optind-1; + optind = 0; /* reset getopt lib */ + return ret; +} + +/** + * It will be called as the callback for specified port after a LSI interrupt + * has been fully handled. This callback needs to be implemented carefully as + * it will be called in the interrupt host thread which is different from the + * application main thread. + * + * @param port_id + * Port id. + * @param type + * event type. + * @param param + * Pointer to(address of) the parameters. + * + * @return + * void. + */ +static void +lsi_event_callback(uint8_t port_id, enum rte_eth_event_type type, void *param) +{ + struct rte_eth_link link; + + RTE_SET_USED(param); + + printf("\n\nIn registered callback...\n"); + printf("Event type: %s\n", type == RTE_ETH_EVENT_INTR_LSC ? "LSC interrupt" : "unknown event"); + rte_eth_link_get_nowait(port_id, &link); + if (link.link_status) { + printf("Port %d Link Up - speed %u Mbps - %s\n\n", + port_id, (unsigned)link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? + ("full-duplex") : ("half-duplex")); + } else + printf("Port %d Link Down\n\n", port_id); +} + +/* Check the link status of all ports in up to 9s, and print them finally */ +static void +check_all_ports_link_status(uint8_t port_num, uint32_t port_mask) +{ +#define CHECK_INTERVAL 100 /* 100ms */ +#define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */ + uint8_t portid, count, all_ports_up, print_flag = 0; + struct rte_eth_link link; + + printf("\nChecking link status"); + fflush(stdout); + for (count = 0; count <= MAX_CHECK_TIME; count++) { + all_ports_up = 1; + for (portid = 0; portid < port_num; portid++) { + if ((port_mask & (1 << portid)) == 0) + continue; + memset(&link, 0, sizeof(link)); + rte_eth_link_get_nowait(portid, &link); + /* print link status if flag set */ + if (print_flag == 1) { + if (link.link_status) + printf("Port %d Link Up - speed %u " + "Mbps - %s\n", (uint8_t)portid, + (unsigned)link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? + ("full-duplex") : ("half-duplex\n")); + else + printf("Port %d Link Down\n", + (uint8_t)portid); + continue; + } + /* clear all_ports_up flag if any link down */ + if (link.link_status == ETH_LINK_DOWN) { + all_ports_up = 0; + break; + } + } + /* after finally printing all link status, get out */ + if (print_flag == 1) + break; + + if (all_ports_up == 0) { + printf("."); + fflush(stdout); + rte_delay_ms(CHECK_INTERVAL); + } + + /* set the print_flag if all ports up or timeout */ + if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) { + print_flag = 1; + printf("done\n"); + } + } +} + +int +main(int argc, char **argv) +{ + struct lcore_queue_conf *qconf; + struct rte_eth_dev_info dev_info; + int ret; + uint8_t nb_ports; + uint8_t portid, portid_last = 0; + unsigned lcore_id, rx_lcore_id; + unsigned nb_ports_in_mask = 0; + + /* init EAL */ + ret = rte_eal_init(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "rte_eal_init failed"); + argc -= ret; + argv += ret; + + /* parse application arguments (after the EAL ones) */ + ret = lsi_parse_args(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Invalid arguments"); + + /* create the mbuf pool */ + lsi_pktmbuf_pool = + rte_pktmbuf_pool_create("mbuf_pool", NB_MBUF, 32, 0, + RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id()); + if (lsi_pktmbuf_pool == NULL) + rte_panic("Cannot init mbuf pool\n"); + + nb_ports = rte_eth_dev_count(); + if (nb_ports == 0) + rte_panic("No Ethernet port - bye\n"); + + if (nb_ports > RTE_MAX_ETHPORTS) + nb_ports = RTE_MAX_ETHPORTS; + + /* + * Each logical core is assigned a dedicated TX queue on each port. + */ + for (portid = 0; portid < nb_ports; portid++) { + /* skip ports that are not enabled */ + if ((lsi_enabled_port_mask & (1 << portid)) == 0) + continue; + + /* save the destination port id */ + if (nb_ports_in_mask % 2) { + lsi_dst_ports[portid] = portid_last; + lsi_dst_ports[portid_last] = portid; + } + else + portid_last = portid; + + nb_ports_in_mask++; + + rte_eth_dev_info_get(portid, &dev_info); + } + if (nb_ports_in_mask < 2 || nb_ports_in_mask % 2) + rte_exit(EXIT_FAILURE, "Current enabled port number is %u, " + "but it should be even and at least 2\n", + nb_ports_in_mask); + + rx_lcore_id = 0; + qconf = &lcore_queue_conf[rx_lcore_id]; + + /* Initialize the port/queue configuration of each logical core */ + for (portid = 0; portid < nb_ports; portid++) { + /* skip ports that are not enabled */ + if ((lsi_enabled_port_mask & (1 << portid)) == 0) + continue; + + /* get the lcore_id for this port */ + while (rte_lcore_is_enabled(rx_lcore_id) == 0 || + lcore_queue_conf[rx_lcore_id].n_rx_port == + lsi_rx_queue_per_lcore) { + + rx_lcore_id++; + if (rx_lcore_id >= RTE_MAX_LCORE) + rte_exit(EXIT_FAILURE, "Not enough cores\n"); + } + if (qconf != &lcore_queue_conf[rx_lcore_id]) + /* Assigned a new logical core in the loop above. */ + qconf = &lcore_queue_conf[rx_lcore_id]; + + qconf->rx_port_list[qconf->n_rx_port] = portid; + qconf->n_rx_port++; + printf("Lcore %u: RX port %u\n",rx_lcore_id, (unsigned) portid); + } + + /* Initialise each port */ + for (portid = 0; portid < nb_ports; portid++) { + /* skip ports that are not enabled */ + if ((lsi_enabled_port_mask & (1 << portid)) == 0) { + printf("Skipping disabled port %u\n", (unsigned) portid); + continue; + } + /* init port */ + printf("Initializing port %u... ", (unsigned) portid); + fflush(stdout); + ret = rte_eth_dev_configure(portid, 1, 1, &port_conf); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Cannot configure device: err=%d, port=%u\n", + ret, (unsigned) portid); + + /* register lsi interrupt callback, need to be after + * rte_eth_dev_configure(). if (intr_conf.lsc == 0), no + * lsc interrupt will be present, and below callback to + * be registered will never be called. + */ + rte_eth_dev_callback_register(portid, + RTE_ETH_EVENT_INTR_LSC, lsi_event_callback, NULL); + + rte_eth_macaddr_get(portid, + &lsi_ports_eth_addr[portid]); + + /* init one RX queue */ + fflush(stdout); + ret = rte_eth_rx_queue_setup(portid, 0, nb_rxd, + rte_eth_dev_socket_id(portid), + NULL, + lsi_pktmbuf_pool); + if (ret < 0) + rte_exit(EXIT_FAILURE, "rte_eth_rx_queue_setup: err=%d, port=%u\n", + ret, (unsigned) portid); + + /* init one TX queue logical core on each port */ + fflush(stdout); + ret = rte_eth_tx_queue_setup(portid, 0, nb_txd, + rte_eth_dev_socket_id(portid), + NULL); + if (ret < 0) + rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup: err=%d,port=%u\n", + ret, (unsigned) portid); + + /* Initialize TX buffers */ + tx_buffer[portid] = rte_zmalloc_socket("tx_buffer", + RTE_ETH_TX_BUFFER_SIZE(MAX_PKT_BURST), 0, + rte_eth_dev_socket_id(portid)); + if (tx_buffer[portid] == NULL) + rte_exit(EXIT_FAILURE, "Cannot allocate buffer for tx on port %u\n", + (unsigned) portid); + + rte_eth_tx_buffer_init(tx_buffer[portid], MAX_PKT_BURST); + + ret = rte_eth_tx_buffer_set_err_callback(tx_buffer[portid], + rte_eth_tx_buffer_count_callback, + &port_statistics[portid].dropped); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Cannot set error callback for " + "tx buffer on port %u\n", (unsigned) portid); + + /* Start device */ + ret = rte_eth_dev_start(portid); + if (ret < 0) + rte_exit(EXIT_FAILURE, "rte_eth_dev_start: err=%d, port=%u\n", + ret, (unsigned) portid); + printf("done:\n"); + + rte_eth_promiscuous_enable(portid); + + printf("Port %u, MAC address: %02X:%02X:%02X:%02X:%02X:%02X\n\n", + (unsigned) portid, + lsi_ports_eth_addr[portid].addr_bytes[0], + lsi_ports_eth_addr[portid].addr_bytes[1], + lsi_ports_eth_addr[portid].addr_bytes[2], + lsi_ports_eth_addr[portid].addr_bytes[3], + lsi_ports_eth_addr[portid].addr_bytes[4], + lsi_ports_eth_addr[portid].addr_bytes[5]); + + /* initialize port stats */ + memset(&port_statistics, 0, sizeof(port_statistics)); + } + + check_all_ports_link_status(nb_ports, lsi_enabled_port_mask); + + /* launch per-lcore init on every lcore */ + rte_eal_mp_remote_launch(lsi_launch_one_lcore, NULL, CALL_MASTER); + RTE_LCORE_FOREACH_SLAVE(lcore_id) { + if (rte_eal_wait_lcore(lcore_id) < 0) + return -1; + } + + return 0; +} diff --git a/examples/load_balancer/Makefile b/examples/load_balancer/Makefile new file mode 100644 index 00000000..2c5fd9b0 --- /dev/null +++ b/examples/load_balancer/Makefile @@ -0,0 +1,57 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overriden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +# binary name +APP = load_balancer + +# all source are stored in SRCS-y +SRCS-y := main.c config.c init.c runtime.c + +CFLAGS += -O3 -g +CFLAGS += $(WERROR_FLAGS) +CFLAGS_config.o := -D_GNU_SOURCE + +# workaround for a gcc bug with noreturn attribute +# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603 +ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y) +CFLAGS_main.o += -Wno-return-type +endif + +include $(RTE_SDK)/mk/rte.extapp.mk diff --git a/examples/load_balancer/config.c b/examples/load_balancer/config.c new file mode 100644 index 00000000..3f6ddee5 --- /dev/null +++ b/examples/load_balancer/config.c @@ -0,0 +1,1063 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <inttypes.h> +#include <sys/types.h> +#include <string.h> +#include <sys/queue.h> +#include <stdarg.h> +#include <errno.h> +#include <getopt.h> + +#include <rte_common.h> +#include <rte_byteorder.h> +#include <rte_log.h> +#include <rte_memory.h> +#include <rte_memcpy.h> +#include <rte_memzone.h> +#include <rte_eal.h> +#include <rte_per_lcore.h> +#include <rte_launch.h> +#include <rte_atomic.h> +#include <rte_cycles.h> +#include <rte_prefetch.h> +#include <rte_lcore.h> +#include <rte_per_lcore.h> +#include <rte_branch_prediction.h> +#include <rte_interrupts.h> +#include <rte_pci.h> +#include <rte_random.h> +#include <rte_debug.h> +#include <rte_ether.h> +#include <rte_ethdev.h> +#include <rte_ring.h> +#include <rte_mempool.h> +#include <rte_mbuf.h> +#include <rte_ip.h> +#include <rte_tcp.h> +#include <rte_lpm.h> +#include <rte_string_fns.h> + +#include "main.h" + +struct app_params app; + +static const char usage[] = +" \n" +" load_balancer <EAL PARAMS> -- <APP PARAMS> \n" +" \n" +"Application manadatory parameters: \n" +" --rx \"(PORT, QUEUE, LCORE), ...\" : List of NIC RX ports and queues \n" +" handled by the I/O RX lcores \n" +" --tx \"(PORT, LCORE), ...\" : List of NIC TX ports handled by the I/O TX \n" +" lcores \n" +" --w \"LCORE, ...\" : List of the worker lcores \n" +" --lpm \"IP / PREFIX => PORT; ...\" : List of LPM rules used by the worker \n" +" lcores for packet forwarding \n" +" \n" +"Application optional parameters: \n" +" --rsz \"A, B, C, D\" : Ring sizes \n" +" A = Size (in number of buffer descriptors) of each of the NIC RX \n" +" rings read by the I/O RX lcores (default value is %u) \n" +" B = Size (in number of elements) of each of the SW rings used by the\n" +" I/O RX lcores to send packets to worker lcores (default value is\n" +" %u) \n" +" C = Size (in number of elements) of each of the SW rings used by the\n" +" worker lcores to send packets to I/O TX lcores (default value is\n" +" %u) \n" +" D = Size (in number of buffer descriptors) of each of the NIC TX \n" +" rings written by I/O TX lcores (default value is %u) \n" +" --bsz \"(A, B), (C, D), (E, F)\" : Burst sizes \n" +" A = I/O RX lcore read burst size from NIC RX (default value is %u) \n" +" B = I/O RX lcore write burst size to output SW rings (default value \n" +" is %u) \n" +" C = Worker lcore read burst size from input SW rings (default value \n" +" is %u) \n" +" D = Worker lcore write burst size to output SW rings (default value \n" +" is %u) \n" +" E = I/O TX lcore read burst size from input SW rings (default value \n" +" is %u) \n" +" F = I/O TX lcore write burst size to NIC TX (default value is %u) \n" +" --pos-lb POS : Position of the 1-byte field within the input packet used by\n" +" the I/O RX lcores to identify the worker lcore for the current \n" +" packet (default value is %u) \n"; + +void +app_print_usage(void) +{ + printf(usage, + APP_DEFAULT_NIC_RX_RING_SIZE, + APP_DEFAULT_RING_RX_SIZE, + APP_DEFAULT_RING_TX_SIZE, + APP_DEFAULT_NIC_TX_RING_SIZE, + APP_DEFAULT_BURST_SIZE_IO_RX_READ, + APP_DEFAULT_BURST_SIZE_IO_RX_WRITE, + APP_DEFAULT_BURST_SIZE_WORKER_READ, + APP_DEFAULT_BURST_SIZE_WORKER_WRITE, + APP_DEFAULT_BURST_SIZE_IO_TX_READ, + APP_DEFAULT_BURST_SIZE_IO_TX_WRITE, + APP_DEFAULT_IO_RX_LB_POS + ); +} + +#ifndef APP_ARG_RX_MAX_CHARS +#define APP_ARG_RX_MAX_CHARS 4096 +#endif + +#ifndef APP_ARG_RX_MAX_TUPLES +#define APP_ARG_RX_MAX_TUPLES 128 +#endif + +static int +str_to_unsigned_array( + const char *s, size_t sbuflen, + char separator, + unsigned num_vals, + unsigned *vals) +{ + char str[sbuflen+1]; + char *splits[num_vals]; + char *endptr = NULL; + int i, num_splits = 0; + + /* copy s so we don't modify original string */ + snprintf(str, sizeof(str), "%s", s); + num_splits = rte_strsplit(str, sizeof(str), splits, num_vals, separator); + + errno = 0; + for (i = 0; i < num_splits; i++) { + vals[i] = strtoul(splits[i], &endptr, 0); + if (errno != 0 || *endptr != '\0') + return -1; + } + + return num_splits; +} + +static int +str_to_unsigned_vals( + const char *s, + size_t sbuflen, + char separator, + unsigned num_vals, ...) +{ + unsigned i, vals[num_vals]; + va_list ap; + + num_vals = str_to_unsigned_array(s, sbuflen, separator, num_vals, vals); + + va_start(ap, num_vals); + for (i = 0; i < num_vals; i++) { + unsigned *u = va_arg(ap, unsigned *); + *u = vals[i]; + } + va_end(ap); + return num_vals; +} + +static int +parse_arg_rx(const char *arg) +{ + const char *p0 = arg, *p = arg; + uint32_t n_tuples; + + if (strnlen(arg, APP_ARG_RX_MAX_CHARS + 1) == APP_ARG_RX_MAX_CHARS + 1) { + return -1; + } + + n_tuples = 0; + while ((p = strchr(p0,'(')) != NULL) { + struct app_lcore_params *lp; + uint32_t port, queue, lcore, i; + + p0 = strchr(p++, ')'); + if ((p0 == NULL) || + (str_to_unsigned_vals(p, p0 - p, ',', 3, &port, &queue, &lcore) != 3)) { + return -2; + } + + /* Enable port and queue for later initialization */ + if ((port >= APP_MAX_NIC_PORTS) || (queue >= APP_MAX_RX_QUEUES_PER_NIC_PORT)) { + return -3; + } + if (app.nic_rx_queue_mask[port][queue] != 0) { + return -4; + } + app.nic_rx_queue_mask[port][queue] = 1; + + /* Check and assign (port, queue) to I/O lcore */ + if (rte_lcore_is_enabled(lcore) == 0) { + return -5; + } + + if (lcore >= APP_MAX_LCORES) { + return -6; + } + lp = &app.lcore_params[lcore]; + if (lp->type == e_APP_LCORE_WORKER) { + return -7; + } + lp->type = e_APP_LCORE_IO; + const size_t n_queues = RTE_MIN(lp->io.rx.n_nic_queues, + RTE_DIM(lp->io.rx.nic_queues)); + for (i = 0; i < n_queues; i ++) { + if ((lp->io.rx.nic_queues[i].port == port) && + (lp->io.rx.nic_queues[i].queue == queue)) { + return -8; + } + } + if (lp->io.rx.n_nic_queues >= APP_MAX_NIC_RX_QUEUES_PER_IO_LCORE) { + return -9; + } + lp->io.rx.nic_queues[lp->io.rx.n_nic_queues].port = (uint8_t) port; + lp->io.rx.nic_queues[lp->io.rx.n_nic_queues].queue = (uint8_t) queue; + lp->io.rx.n_nic_queues ++; + + n_tuples ++; + if (n_tuples > APP_ARG_RX_MAX_TUPLES) { + return -10; + } + } + + if (n_tuples == 0) { + return -11; + } + + return 0; +} + +#ifndef APP_ARG_TX_MAX_CHARS +#define APP_ARG_TX_MAX_CHARS 4096 +#endif + +#ifndef APP_ARG_TX_MAX_TUPLES +#define APP_ARG_TX_MAX_TUPLES 128 +#endif + +static int +parse_arg_tx(const char *arg) +{ + const char *p0 = arg, *p = arg; + uint32_t n_tuples; + + if (strnlen(arg, APP_ARG_TX_MAX_CHARS + 1) == APP_ARG_TX_MAX_CHARS + 1) { + return -1; + } + + n_tuples = 0; + while ((p = strchr(p0,'(')) != NULL) { + struct app_lcore_params *lp; + uint32_t port, lcore, i; + + p0 = strchr(p++, ')'); + if ((p0 == NULL) || + (str_to_unsigned_vals(p, p0 - p, ',', 2, &port, &lcore) != 2)) { + return -2; + } + + /* Enable port and queue for later initialization */ + if (port >= APP_MAX_NIC_PORTS) { + return -3; + } + if (app.nic_tx_port_mask[port] != 0) { + return -4; + } + app.nic_tx_port_mask[port] = 1; + + /* Check and assign (port, queue) to I/O lcore */ + if (rte_lcore_is_enabled(lcore) == 0) { + return -5; + } + + if (lcore >= APP_MAX_LCORES) { + return -6; + } + lp = &app.lcore_params[lcore]; + if (lp->type == e_APP_LCORE_WORKER) { + return -7; + } + lp->type = e_APP_LCORE_IO; + const size_t n_ports = RTE_MIN(lp->io.tx.n_nic_ports, + RTE_DIM(lp->io.tx.nic_ports)); + for (i = 0; i < n_ports; i ++) { + if (lp->io.tx.nic_ports[i] == port) { + return -8; + } + } + if (lp->io.tx.n_nic_ports >= APP_MAX_NIC_TX_PORTS_PER_IO_LCORE) { + return -9; + } + lp->io.tx.nic_ports[lp->io.tx.n_nic_ports] = (uint8_t) port; + lp->io.tx.n_nic_ports ++; + + n_tuples ++; + if (n_tuples > APP_ARG_TX_MAX_TUPLES) { + return -10; + } + } + + if (n_tuples == 0) { + return -11; + } + + return 0; +} + +#ifndef APP_ARG_W_MAX_CHARS +#define APP_ARG_W_MAX_CHARS 4096 +#endif + +#ifndef APP_ARG_W_MAX_TUPLES +#define APP_ARG_W_MAX_TUPLES APP_MAX_WORKER_LCORES +#endif + +static int +parse_arg_w(const char *arg) +{ + const char *p = arg; + uint32_t n_tuples; + + if (strnlen(arg, APP_ARG_W_MAX_CHARS + 1) == APP_ARG_W_MAX_CHARS + 1) { + return -1; + } + + n_tuples = 0; + while (*p != 0) { + struct app_lcore_params *lp; + uint32_t lcore; + + errno = 0; + lcore = strtoul(p, NULL, 0); + if ((errno != 0)) { + return -2; + } + + /* Check and enable worker lcore */ + if (rte_lcore_is_enabled(lcore) == 0) { + return -3; + } + + if (lcore >= APP_MAX_LCORES) { + return -4; + } + lp = &app.lcore_params[lcore]; + if (lp->type == e_APP_LCORE_IO) { + return -5; + } + lp->type = e_APP_LCORE_WORKER; + + n_tuples ++; + if (n_tuples > APP_ARG_W_MAX_TUPLES) { + return -6; + } + + p = strchr(p, ','); + if (p == NULL) { + break; + } + p ++; + } + + if (n_tuples == 0) { + return -7; + } + + if ((n_tuples & (n_tuples - 1)) != 0) { + return -8; + } + + return 0; +} + +#ifndef APP_ARG_LPM_MAX_CHARS +#define APP_ARG_LPM_MAX_CHARS 4096 +#endif + +static int +parse_arg_lpm(const char *arg) +{ + const char *p = arg, *p0; + + if (strnlen(arg, APP_ARG_LPM_MAX_CHARS + 1) == APP_ARG_TX_MAX_CHARS + 1) { + return -1; + } + + while (*p != 0) { + uint32_t ip_a, ip_b, ip_c, ip_d, ip, depth, if_out; + char *endptr; + + p0 = strchr(p, '/'); + if ((p0 == NULL) || + (str_to_unsigned_vals(p, p0 - p, '.', 4, &ip_a, &ip_b, &ip_c, &ip_d) != 4)) { + return -2; + } + + p = p0 + 1; + errno = 0; + depth = strtoul(p, &endptr, 0); + if (errno != 0 || *endptr != '=') { + return -3; + } + p = strchr(p, '>'); + if (p == NULL) { + return -4; + } + if_out = strtoul(++p, &endptr, 0); + if (errno != 0 || (*endptr != '\0' && *endptr != ';')) { + return -5; + } + + if ((ip_a >= 256) || (ip_b >= 256) || (ip_c >= 256) || (ip_d >= 256) || + (depth == 0) || (depth >= 32) || + (if_out >= APP_MAX_NIC_PORTS)) { + return -6; + } + ip = (ip_a << 24) | (ip_b << 16) | (ip_c << 8) | ip_d; + + if (app.n_lpm_rules >= APP_MAX_LPM_RULES) { + return -7; + } + app.lpm_rules[app.n_lpm_rules].ip = ip; + app.lpm_rules[app.n_lpm_rules].depth = (uint8_t) depth; + app.lpm_rules[app.n_lpm_rules].if_out = (uint8_t) if_out; + app.n_lpm_rules ++; + + p = strchr(p, ';'); + if (p == NULL) { + return -8; + } + p ++; + } + + if (app.n_lpm_rules == 0) { + return -9; + } + + return 0; +} + +static int +app_check_lpm_table(void) +{ + uint32_t rule; + + /* For each rule, check that the output I/F is enabled */ + for (rule = 0; rule < app.n_lpm_rules; rule ++) + { + uint32_t port = app.lpm_rules[rule].if_out; + + if (app.nic_tx_port_mask[port] == 0) { + return -1; + } + } + + return 0; +} + +static int +app_check_every_rx_port_is_tx_enabled(void) +{ + uint8_t port; + + for (port = 0; port < APP_MAX_NIC_PORTS; port ++) { + if ((app_get_nic_rx_queues_per_port(port) > 0) && (app.nic_tx_port_mask[port] == 0)) { + return -1; + } + } + + return 0; +} + +#ifndef APP_ARG_RSZ_CHARS +#define APP_ARG_RSZ_CHARS 63 +#endif + +static int +parse_arg_rsz(const char *arg) +{ + if (strnlen(arg, APP_ARG_RSZ_CHARS + 1) == APP_ARG_RSZ_CHARS + 1) { + return -1; + } + + if (str_to_unsigned_vals(arg, APP_ARG_RSZ_CHARS, ',', 4, + &app.nic_rx_ring_size, + &app.ring_rx_size, + &app.ring_tx_size, + &app.nic_tx_ring_size) != 4) + return -2; + + + if ((app.nic_rx_ring_size == 0) || + (app.nic_tx_ring_size == 0) || + (app.ring_rx_size == 0) || + (app.ring_tx_size == 0)) { + return -3; + } + + return 0; +} + +#ifndef APP_ARG_BSZ_CHARS +#define APP_ARG_BSZ_CHARS 63 +#endif + +static int +parse_arg_bsz(const char *arg) +{ + const char *p = arg, *p0; + if (strnlen(arg, APP_ARG_BSZ_CHARS + 1) == APP_ARG_BSZ_CHARS + 1) { + return -1; + } + + p0 = strchr(p++, ')'); + if ((p0 == NULL) || + (str_to_unsigned_vals(p, p0 - p, ',', 2, &app.burst_size_io_rx_read, &app.burst_size_io_rx_write) != 2)) { + return -2; + } + + p = strchr(p0, '('); + if (p == NULL) { + return -3; + } + + p0 = strchr(p++, ')'); + if ((p0 == NULL) || + (str_to_unsigned_vals(p, p0 - p, ',', 2, &app.burst_size_worker_read, &app.burst_size_worker_write) != 2)) { + return -4; + } + + p = strchr(p0, '('); + if (p == NULL) { + return -5; + } + + p0 = strchr(p++, ')'); + if ((p0 == NULL) || + (str_to_unsigned_vals(p, p0 - p, ',', 2, &app.burst_size_io_tx_read, &app.burst_size_io_tx_write) != 2)) { + return -6; + } + + if ((app.burst_size_io_rx_read == 0) || + (app.burst_size_io_rx_write == 0) || + (app.burst_size_worker_read == 0) || + (app.burst_size_worker_write == 0) || + (app.burst_size_io_tx_read == 0) || + (app.burst_size_io_tx_write == 0)) { + return -7; + } + + if ((app.burst_size_io_rx_read > APP_MBUF_ARRAY_SIZE) || + (app.burst_size_io_rx_write > APP_MBUF_ARRAY_SIZE) || + (app.burst_size_worker_read > APP_MBUF_ARRAY_SIZE) || + (app.burst_size_worker_write > APP_MBUF_ARRAY_SIZE) || + ((2 * app.burst_size_io_tx_read) > APP_MBUF_ARRAY_SIZE) || + (app.burst_size_io_tx_write > APP_MBUF_ARRAY_SIZE)) { + return -8; + } + + return 0; +} + +#ifndef APP_ARG_NUMERICAL_SIZE_CHARS +#define APP_ARG_NUMERICAL_SIZE_CHARS 15 +#endif + +static int +parse_arg_pos_lb(const char *arg) +{ + uint32_t x; + char *endpt; + + if (strnlen(arg, APP_ARG_NUMERICAL_SIZE_CHARS + 1) == APP_ARG_NUMERICAL_SIZE_CHARS + 1) { + return -1; + } + + errno = 0; + x = strtoul(arg, &endpt, 10); + if (errno != 0 || endpt == arg || *endpt != '\0'){ + return -2; + } + + if (x >= 64) { + return -3; + } + + app.pos_lb = (uint8_t) x; + + return 0; +} + +/* Parse the argument given in the command line of the application */ +int +app_parse_args(int argc, char **argv) +{ + int opt, ret; + char **argvopt; + int option_index; + char *prgname = argv[0]; + static struct option lgopts[] = { + {"rx", 1, 0, 0}, + {"tx", 1, 0, 0}, + {"w", 1, 0, 0}, + {"lpm", 1, 0, 0}, + {"rsz", 1, 0, 0}, + {"bsz", 1, 0, 0}, + {"pos-lb", 1, 0, 0}, + {NULL, 0, 0, 0} + }; + uint32_t arg_w = 0; + uint32_t arg_rx = 0; + uint32_t arg_tx = 0; + uint32_t arg_lpm = 0; + uint32_t arg_rsz = 0; + uint32_t arg_bsz = 0; + uint32_t arg_pos_lb = 0; + + argvopt = argv; + + while ((opt = getopt_long(argc, argvopt, "", + lgopts, &option_index)) != EOF) { + + switch (opt) { + /* long options */ + case 0: + if (!strcmp(lgopts[option_index].name, "rx")) { + arg_rx = 1; + ret = parse_arg_rx(optarg); + if (ret) { + printf("Incorrect value for --rx argument (%d)\n", ret); + return -1; + } + } + if (!strcmp(lgopts[option_index].name, "tx")) { + arg_tx = 1; + ret = parse_arg_tx(optarg); + if (ret) { + printf("Incorrect value for --tx argument (%d)\n", ret); + return -1; + } + } + if (!strcmp(lgopts[option_index].name, "w")) { + arg_w = 1; + ret = parse_arg_w(optarg); + if (ret) { + printf("Incorrect value for --w argument (%d)\n", ret); + return -1; + } + } + if (!strcmp(lgopts[option_index].name, "lpm")) { + arg_lpm = 1; + ret = parse_arg_lpm(optarg); + if (ret) { + printf("Incorrect value for --lpm argument (%d)\n", ret); + return -1; + } + } + if (!strcmp(lgopts[option_index].name, "rsz")) { + arg_rsz = 1; + ret = parse_arg_rsz(optarg); + if (ret) { + printf("Incorrect value for --rsz argument (%d)\n", ret); + return -1; + } + } + if (!strcmp(lgopts[option_index].name, "bsz")) { + arg_bsz = 1; + ret = parse_arg_bsz(optarg); + if (ret) { + printf("Incorrect value for --bsz argument (%d)\n", ret); + return -1; + } + } + if (!strcmp(lgopts[option_index].name, "pos-lb")) { + arg_pos_lb = 1; + ret = parse_arg_pos_lb(optarg); + if (ret) { + printf("Incorrect value for --pos-lb argument (%d)\n", ret); + return -1; + } + } + break; + + default: + return -1; + } + } + + /* Check that all mandatory arguments are provided */ + if ((arg_rx == 0) || (arg_tx == 0) || (arg_w == 0) || (arg_lpm == 0)){ + printf("Not all mandatory arguments are present\n"); + return -1; + } + + /* Assign default values for the optional arguments not provided */ + if (arg_rsz == 0) { + app.nic_rx_ring_size = APP_DEFAULT_NIC_RX_RING_SIZE; + app.nic_tx_ring_size = APP_DEFAULT_NIC_TX_RING_SIZE; + app.ring_rx_size = APP_DEFAULT_RING_RX_SIZE; + app.ring_tx_size = APP_DEFAULT_RING_TX_SIZE; + } + + if (arg_bsz == 0) { + app.burst_size_io_rx_read = APP_DEFAULT_BURST_SIZE_IO_RX_READ; + app.burst_size_io_rx_write = APP_DEFAULT_BURST_SIZE_IO_RX_WRITE; + app.burst_size_io_tx_read = APP_DEFAULT_BURST_SIZE_IO_TX_READ; + app.burst_size_io_tx_write = APP_DEFAULT_BURST_SIZE_IO_TX_WRITE; + app.burst_size_worker_read = APP_DEFAULT_BURST_SIZE_WORKER_READ; + app.burst_size_worker_write = APP_DEFAULT_BURST_SIZE_WORKER_WRITE; + } + + if (arg_pos_lb == 0) { + app.pos_lb = APP_DEFAULT_IO_RX_LB_POS; + } + + /* Check cross-consistency of arguments */ + if ((ret = app_check_lpm_table()) < 0) { + printf("At least one LPM rule is inconsistent (%d)\n", ret); + return -1; + } + if (app_check_every_rx_port_is_tx_enabled() < 0) { + printf("On LPM lookup miss, packet is sent back on the input port.\n"); + printf("At least one RX port is not enabled for TX.\n"); + return -2; + } + + if (optind >= 0) + argv[optind - 1] = prgname; + + ret = optind - 1; + optind = 0; /* reset getopt lib */ + return ret; +} + +int +app_get_nic_rx_queues_per_port(uint8_t port) +{ + uint32_t i, count; + + if (port >= APP_MAX_NIC_PORTS) { + return -1; + } + + count = 0; + for (i = 0; i < APP_MAX_RX_QUEUES_PER_NIC_PORT; i ++) { + if (app.nic_rx_queue_mask[port][i] == 1) { + count ++; + } + } + + return count; +} + +int +app_get_lcore_for_nic_rx(uint8_t port, uint8_t queue, uint32_t *lcore_out) +{ + uint32_t lcore; + + for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) { + struct app_lcore_params_io *lp = &app.lcore_params[lcore].io; + uint32_t i; + + if (app.lcore_params[lcore].type != e_APP_LCORE_IO) { + continue; + } + + const size_t n_queues = RTE_MIN(lp->rx.n_nic_queues, + RTE_DIM(lp->rx.nic_queues)); + for (i = 0; i < n_queues; i ++) { + if ((lp->rx.nic_queues[i].port == port) && + (lp->rx.nic_queues[i].queue == queue)) { + *lcore_out = lcore; + return 0; + } + } + } + + return -1; +} + +int +app_get_lcore_for_nic_tx(uint8_t port, uint32_t *lcore_out) +{ + uint32_t lcore; + + for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) { + struct app_lcore_params_io *lp = &app.lcore_params[lcore].io; + uint32_t i; + + if (app.lcore_params[lcore].type != e_APP_LCORE_IO) { + continue; + } + + const size_t n_ports = RTE_MIN(lp->tx.n_nic_ports, + RTE_DIM(lp->tx.nic_ports)); + for (i = 0; i < n_ports; i ++) { + if (lp->tx.nic_ports[i] == port) { + *lcore_out = lcore; + return 0; + } + } + } + + return -1; +} + +int +app_is_socket_used(uint32_t socket) +{ + uint32_t lcore; + + for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) { + if (app.lcore_params[lcore].type == e_APP_LCORE_DISABLED) { + continue; + } + + if (socket == rte_lcore_to_socket_id(lcore)) { + return 1; + } + } + + return 0; +} + +uint32_t +app_get_lcores_io_rx(void) +{ + uint32_t lcore, count; + + count = 0; + for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) { + struct app_lcore_params_io *lp_io = &app.lcore_params[lcore].io; + + if ((app.lcore_params[lcore].type != e_APP_LCORE_IO) || + (lp_io->rx.n_nic_queues == 0)) { + continue; + } + + count ++; + } + + return count; +} + +uint32_t +app_get_lcores_worker(void) +{ + uint32_t lcore, count; + + count = 0; + for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) { + if (app.lcore_params[lcore].type != e_APP_LCORE_WORKER) { + continue; + } + + count ++; + } + + if (count > APP_MAX_WORKER_LCORES) { + rte_panic("Algorithmic error (too many worker lcores)\n"); + return 0; + } + + return count; +} + +void +app_print_params(void) +{ + unsigned port, queue, lcore, rule, i, j; + + /* Print NIC RX configuration */ + printf("NIC RX ports: "); + for (port = 0; port < APP_MAX_NIC_PORTS; port ++) { + uint32_t n_rx_queues = app_get_nic_rx_queues_per_port((uint8_t) port); + + if (n_rx_queues == 0) { + continue; + } + + printf("%u (", port); + for (queue = 0; queue < APP_MAX_RX_QUEUES_PER_NIC_PORT; queue ++) { + if (app.nic_rx_queue_mask[port][queue] == 1) { + printf("%u ", queue); + } + } + printf(") "); + } + printf(";\n"); + + /* Print I/O lcore RX params */ + for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) { + struct app_lcore_params_io *lp = &app.lcore_params[lcore].io; + + if ((app.lcore_params[lcore].type != e_APP_LCORE_IO) || + (lp->rx.n_nic_queues == 0)) { + continue; + } + + printf("I/O lcore %u (socket %u): ", lcore, rte_lcore_to_socket_id(lcore)); + + printf("RX ports "); + for (i = 0; i < lp->rx.n_nic_queues; i ++) { + printf("(%u, %u) ", + (unsigned) lp->rx.nic_queues[i].port, + (unsigned) lp->rx.nic_queues[i].queue); + } + printf("; "); + + printf("Output rings "); + for (i = 0; i < lp->rx.n_rings; i ++) { + printf("%p ", lp->rx.rings[i]); + } + printf(";\n"); + } + + /* Print worker lcore RX params */ + for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) { + struct app_lcore_params_worker *lp = &app.lcore_params[lcore].worker; + + if (app.lcore_params[lcore].type != e_APP_LCORE_WORKER) { + continue; + } + + printf("Worker lcore %u (socket %u) ID %u: ", + lcore, + rte_lcore_to_socket_id(lcore), + (unsigned)lp->worker_id); + + printf("Input rings "); + for (i = 0; i < lp->n_rings_in; i ++) { + printf("%p ", lp->rings_in[i]); + } + + printf(";\n"); + } + + printf("\n"); + + /* Print NIC TX configuration */ + printf("NIC TX ports: "); + for (port = 0; port < APP_MAX_NIC_PORTS; port ++) { + if (app.nic_tx_port_mask[port] == 1) { + printf("%u ", port); + } + } + printf(";\n"); + + /* Print I/O TX lcore params */ + for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) { + struct app_lcore_params_io *lp = &app.lcore_params[lcore].io; + uint32_t n_workers = app_get_lcores_worker(); + + if ((app.lcore_params[lcore].type != e_APP_LCORE_IO) || + (lp->tx.n_nic_ports == 0)) { + continue; + } + + printf("I/O lcore %u (socket %u): ", lcore, rte_lcore_to_socket_id(lcore)); + + printf("Input rings per TX port "); + for (i = 0; i < lp->tx.n_nic_ports; i ++) { + port = lp->tx.nic_ports[i]; + + printf("%u (", port); + for (j = 0; j < n_workers; j ++) { + printf("%p ", lp->tx.rings[port][j]); + } + printf(") "); + + } + + printf(";\n"); + } + + /* Print worker lcore TX params */ + for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) { + struct app_lcore_params_worker *lp = &app.lcore_params[lcore].worker; + + if (app.lcore_params[lcore].type != e_APP_LCORE_WORKER) { + continue; + } + + printf("Worker lcore %u (socket %u) ID %u: \n", + lcore, + rte_lcore_to_socket_id(lcore), + (unsigned)lp->worker_id); + + printf("Output rings per TX port "); + for (port = 0; port < APP_MAX_NIC_PORTS; port ++) { + if (lp->rings_out[port] != NULL) { + printf("%u (%p) ", port, lp->rings_out[port]); + } + } + + printf(";\n"); + } + + /* Print LPM rules */ + printf("LPM rules: \n"); + for (rule = 0; rule < app.n_lpm_rules; rule ++) { + uint32_t ip = app.lpm_rules[rule].ip; + uint8_t depth = app.lpm_rules[rule].depth; + uint8_t if_out = app.lpm_rules[rule].if_out; + + printf("\t%u: %u.%u.%u.%u/%u => %u;\n", + rule, + (unsigned) (ip & 0xFF000000) >> 24, + (unsigned) (ip & 0x00FF0000) >> 16, + (unsigned) (ip & 0x0000FF00) >> 8, + (unsigned) ip & 0x000000FF, + (unsigned) depth, + (unsigned) if_out + ); + } + + /* Rings */ + printf("Ring sizes: NIC RX = %u; Worker in = %u; Worker out = %u; NIC TX = %u;\n", + (unsigned) app.nic_rx_ring_size, + (unsigned) app.ring_rx_size, + (unsigned) app.ring_tx_size, + (unsigned) app.nic_tx_ring_size); + + /* Bursts */ + printf("Burst sizes: I/O RX (rd = %u, wr = %u); Worker (rd = %u, wr = %u); I/O TX (rd = %u, wr = %u)\n", + (unsigned) app.burst_size_io_rx_read, + (unsigned) app.burst_size_io_rx_write, + (unsigned) app.burst_size_worker_read, + (unsigned) app.burst_size_worker_write, + (unsigned) app.burst_size_io_tx_read, + (unsigned) app.burst_size_io_tx_write); +} diff --git a/examples/load_balancer/init.c b/examples/load_balancer/init.c new file mode 100644 index 00000000..e07850be --- /dev/null +++ b/examples/load_balancer/init.c @@ -0,0 +1,521 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <inttypes.h> +#include <sys/types.h> +#include <string.h> +#include <sys/queue.h> +#include <stdarg.h> +#include <errno.h> +#include <getopt.h> + +#include <rte_common.h> +#include <rte_byteorder.h> +#include <rte_log.h> +#include <rte_memory.h> +#include <rte_memcpy.h> +#include <rte_memzone.h> +#include <rte_eal.h> +#include <rte_per_lcore.h> +#include <rte_launch.h> +#include <rte_atomic.h> +#include <rte_cycles.h> +#include <rte_prefetch.h> +#include <rte_lcore.h> +#include <rte_per_lcore.h> +#include <rte_branch_prediction.h> +#include <rte_interrupts.h> +#include <rte_pci.h> +#include <rte_random.h> +#include <rte_debug.h> +#include <rte_ether.h> +#include <rte_ethdev.h> +#include <rte_ring.h> +#include <rte_mempool.h> +#include <rte_mbuf.h> +#include <rte_string_fns.h> +#include <rte_ip.h> +#include <rte_tcp.h> +#include <rte_lpm.h> + +#include "main.h" + +static struct rte_eth_conf port_conf = { + .rxmode = { + .mq_mode = ETH_MQ_RX_RSS, + .split_hdr_size = 0, + .header_split = 0, /**< Header Split disabled */ + .hw_ip_checksum = 1, /**< IP checksum offload enabled */ + .hw_vlan_filter = 0, /**< VLAN filtering disabled */ + .jumbo_frame = 0, /**< Jumbo Frame Support disabled */ + .hw_strip_crc = 0, /**< CRC stripped by hardware */ + }, + .rx_adv_conf = { + .rss_conf = { + .rss_key = NULL, + .rss_hf = ETH_RSS_IP, + }, + }, + .txmode = { + .mq_mode = ETH_MQ_TX_NONE, + }, +}; + +static void +app_assign_worker_ids(void) +{ + uint32_t lcore, worker_id; + + /* Assign ID for each worker */ + worker_id = 0; + for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) { + struct app_lcore_params_worker *lp_worker = &app.lcore_params[lcore].worker; + + if (app.lcore_params[lcore].type != e_APP_LCORE_WORKER) { + continue; + } + + lp_worker->worker_id = worker_id; + worker_id ++; + } +} + +static void +app_init_mbuf_pools(void) +{ + unsigned socket, lcore; + + /* Init the buffer pools */ + for (socket = 0; socket < APP_MAX_SOCKETS; socket ++) { + char name[32]; + if (app_is_socket_used(socket) == 0) { + continue; + } + + snprintf(name, sizeof(name), "mbuf_pool_%u", socket); + printf("Creating the mbuf pool for socket %u ...\n", socket); + app.pools[socket] = rte_pktmbuf_pool_create( + name, APP_DEFAULT_MEMPOOL_BUFFERS, + APP_DEFAULT_MEMPOOL_CACHE_SIZE, + 0, APP_DEFAULT_MBUF_DATA_SIZE, socket); + if (app.pools[socket] == NULL) { + rte_panic("Cannot create mbuf pool on socket %u\n", socket); + } + } + + for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) { + if (app.lcore_params[lcore].type == e_APP_LCORE_DISABLED) { + continue; + } + + socket = rte_lcore_to_socket_id(lcore); + app.lcore_params[lcore].pool = app.pools[socket]; + } +} + +static void +app_init_lpm_tables(void) +{ + unsigned socket, lcore; + + /* Init the LPM tables */ + for (socket = 0; socket < APP_MAX_SOCKETS; socket ++) { + char name[32]; + uint32_t rule; + + if (app_is_socket_used(socket) == 0) { + continue; + } + + struct rte_lpm_config lpm_config; + + lpm_config.max_rules = APP_MAX_LPM_RULES; + lpm_config.number_tbl8s = 256; + lpm_config.flags = 0; + snprintf(name, sizeof(name), "lpm_table_%u", socket); + printf("Creating the LPM table for socket %u ...\n", socket); + app.lpm_tables[socket] = rte_lpm_create( + name, + socket, + &lpm_config); + if (app.lpm_tables[socket] == NULL) { + rte_panic("Unable to create LPM table on socket %u\n", socket); + } + + for (rule = 0; rule < app.n_lpm_rules; rule ++) { + int ret; + + ret = rte_lpm_add(app.lpm_tables[socket], + app.lpm_rules[rule].ip, + app.lpm_rules[rule].depth, + app.lpm_rules[rule].if_out); + + if (ret < 0) { + rte_panic("Unable to add entry %u (%x/%u => %u) to the LPM table on socket %u (%d)\n", + (unsigned) rule, + (unsigned) app.lpm_rules[rule].ip, + (unsigned) app.lpm_rules[rule].depth, + (unsigned) app.lpm_rules[rule].if_out, + socket, + ret); + } + } + + } + + for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) { + if (app.lcore_params[lcore].type != e_APP_LCORE_WORKER) { + continue; + } + + socket = rte_lcore_to_socket_id(lcore); + app.lcore_params[lcore].worker.lpm_table = app.lpm_tables[socket]; + } +} + +static void +app_init_rings_rx(void) +{ + unsigned lcore; + + /* Initialize the rings for the RX side */ + for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) { + struct app_lcore_params_io *lp_io = &app.lcore_params[lcore].io; + unsigned socket_io, lcore_worker; + + if ((app.lcore_params[lcore].type != e_APP_LCORE_IO) || + (lp_io->rx.n_nic_queues == 0)) { + continue; + } + + socket_io = rte_lcore_to_socket_id(lcore); + + for (lcore_worker = 0; lcore_worker < APP_MAX_LCORES; lcore_worker ++) { + char name[32]; + struct app_lcore_params_worker *lp_worker = &app.lcore_params[lcore_worker].worker; + struct rte_ring *ring = NULL; + + if (app.lcore_params[lcore_worker].type != e_APP_LCORE_WORKER) { + continue; + } + + printf("Creating ring to connect I/O lcore %u (socket %u) with worker lcore %u ...\n", + lcore, + socket_io, + lcore_worker); + snprintf(name, sizeof(name), "app_ring_rx_s%u_io%u_w%u", + socket_io, + lcore, + lcore_worker); + ring = rte_ring_create( + name, + app.ring_rx_size, + socket_io, + RING_F_SP_ENQ | RING_F_SC_DEQ); + if (ring == NULL) { + rte_panic("Cannot create ring to connect I/O core %u with worker core %u\n", + lcore, + lcore_worker); + } + + lp_io->rx.rings[lp_io->rx.n_rings] = ring; + lp_io->rx.n_rings ++; + + lp_worker->rings_in[lp_worker->n_rings_in] = ring; + lp_worker->n_rings_in ++; + } + } + + for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) { + struct app_lcore_params_io *lp_io = &app.lcore_params[lcore].io; + + if ((app.lcore_params[lcore].type != e_APP_LCORE_IO) || + (lp_io->rx.n_nic_queues == 0)) { + continue; + } + + if (lp_io->rx.n_rings != app_get_lcores_worker()) { + rte_panic("Algorithmic error (I/O RX rings)\n"); + } + } + + for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) { + struct app_lcore_params_worker *lp_worker = &app.lcore_params[lcore].worker; + + if (app.lcore_params[lcore].type != e_APP_LCORE_WORKER) { + continue; + } + + if (lp_worker->n_rings_in != app_get_lcores_io_rx()) { + rte_panic("Algorithmic error (worker input rings)\n"); + } + } +} + +static void +app_init_rings_tx(void) +{ + unsigned lcore; + + /* Initialize the rings for the TX side */ + for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) { + struct app_lcore_params_worker *lp_worker = &app.lcore_params[lcore].worker; + unsigned port; + + if (app.lcore_params[lcore].type != e_APP_LCORE_WORKER) { + continue; + } + + for (port = 0; port < APP_MAX_NIC_PORTS; port ++) { + char name[32]; + struct app_lcore_params_io *lp_io = NULL; + struct rte_ring *ring; + uint32_t socket_io, lcore_io; + + if (app.nic_tx_port_mask[port] == 0) { + continue; + } + + if (app_get_lcore_for_nic_tx((uint8_t) port, &lcore_io) < 0) { + rte_panic("Algorithmic error (no I/O core to handle TX of port %u)\n", + port); + } + + lp_io = &app.lcore_params[lcore_io].io; + socket_io = rte_lcore_to_socket_id(lcore_io); + + printf("Creating ring to connect worker lcore %u with TX port %u (through I/O lcore %u) (socket %u) ...\n", + lcore, port, (unsigned)lcore_io, (unsigned)socket_io); + snprintf(name, sizeof(name), "app_ring_tx_s%u_w%u_p%u", socket_io, lcore, port); + ring = rte_ring_create( + name, + app.ring_tx_size, + socket_io, + RING_F_SP_ENQ | RING_F_SC_DEQ); + if (ring == NULL) { + rte_panic("Cannot create ring to connect worker core %u with TX port %u\n", + lcore, + port); + } + + lp_worker->rings_out[port] = ring; + lp_io->tx.rings[port][lp_worker->worker_id] = ring; + } + } + + for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) { + struct app_lcore_params_io *lp_io = &app.lcore_params[lcore].io; + unsigned i; + + if ((app.lcore_params[lcore].type != e_APP_LCORE_IO) || + (lp_io->tx.n_nic_ports == 0)) { + continue; + } + + for (i = 0; i < lp_io->tx.n_nic_ports; i ++){ + unsigned port, j; + + port = lp_io->tx.nic_ports[i]; + for (j = 0; j < app_get_lcores_worker(); j ++) { + if (lp_io->tx.rings[port][j] == NULL) { + rte_panic("Algorithmic error (I/O TX rings)\n"); + } + } + } + } +} + +/* Check the link status of all ports in up to 9s, and print them finally */ +static void +check_all_ports_link_status(uint8_t port_num, uint32_t port_mask) +{ +#define CHECK_INTERVAL 100 /* 100ms */ +#define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */ + uint8_t portid, count, all_ports_up, print_flag = 0; + struct rte_eth_link link; + uint32_t n_rx_queues, n_tx_queues; + + printf("\nChecking link status"); + fflush(stdout); + for (count = 0; count <= MAX_CHECK_TIME; count++) { + all_ports_up = 1; + for (portid = 0; portid < port_num; portid++) { + if ((port_mask & (1 << portid)) == 0) + continue; + n_rx_queues = app_get_nic_rx_queues_per_port(portid); + n_tx_queues = app.nic_tx_port_mask[portid]; + if ((n_rx_queues == 0) && (n_tx_queues == 0)) + continue; + memset(&link, 0, sizeof(link)); + rte_eth_link_get_nowait(portid, &link); + /* print link status if flag set */ + if (print_flag == 1) { + if (link.link_status) + printf("Port %d Link Up - speed %u " + "Mbps - %s\n", (uint8_t)portid, + (unsigned)link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? + ("full-duplex") : ("half-duplex\n")); + else + printf("Port %d Link Down\n", + (uint8_t)portid); + continue; + } + /* clear all_ports_up flag if any link down */ + if (link.link_status == ETH_LINK_DOWN) { + all_ports_up = 0; + break; + } + } + /* after finally printing all link status, get out */ + if (print_flag == 1) + break; + + if (all_ports_up == 0) { + printf("."); + fflush(stdout); + rte_delay_ms(CHECK_INTERVAL); + } + + /* set the print_flag if all ports up or timeout */ + if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) { + print_flag = 1; + printf("done\n"); + } + } +} + +static void +app_init_nics(void) +{ + unsigned socket; + uint32_t lcore; + uint8_t port, queue; + int ret; + uint32_t n_rx_queues, n_tx_queues; + + /* Init NIC ports and queues, then start the ports */ + for (port = 0; port < APP_MAX_NIC_PORTS; port ++) { + struct rte_mempool *pool; + + n_rx_queues = app_get_nic_rx_queues_per_port(port); + n_tx_queues = app.nic_tx_port_mask[port]; + + if ((n_rx_queues == 0) && (n_tx_queues == 0)) { + continue; + } + + /* Init port */ + printf("Initializing NIC port %u ...\n", (unsigned) port); + ret = rte_eth_dev_configure( + port, + (uint8_t) n_rx_queues, + (uint8_t) n_tx_queues, + &port_conf); + if (ret < 0) { + rte_panic("Cannot init NIC port %u (%d)\n", (unsigned) port, ret); + } + rte_eth_promiscuous_enable(port); + + /* Init RX queues */ + for (queue = 0; queue < APP_MAX_RX_QUEUES_PER_NIC_PORT; queue ++) { + if (app.nic_rx_queue_mask[port][queue] == 0) { + continue; + } + + app_get_lcore_for_nic_rx(port, queue, &lcore); + socket = rte_lcore_to_socket_id(lcore); + pool = app.lcore_params[lcore].pool; + + printf("Initializing NIC port %u RX queue %u ...\n", + (unsigned) port, + (unsigned) queue); + ret = rte_eth_rx_queue_setup( + port, + queue, + (uint16_t) app.nic_rx_ring_size, + socket, + NULL, + pool); + if (ret < 0) { + rte_panic("Cannot init RX queue %u for port %u (%d)\n", + (unsigned) queue, + (unsigned) port, + ret); + } + } + + /* Init TX queues */ + if (app.nic_tx_port_mask[port] == 1) { + app_get_lcore_for_nic_tx(port, &lcore); + socket = rte_lcore_to_socket_id(lcore); + printf("Initializing NIC port %u TX queue 0 ...\n", + (unsigned) port); + ret = rte_eth_tx_queue_setup( + port, + 0, + (uint16_t) app.nic_tx_ring_size, + socket, + NULL); + if (ret < 0) { + rte_panic("Cannot init TX queue 0 for port %d (%d)\n", + port, + ret); + } + } + + /* Start port */ + ret = rte_eth_dev_start(port); + if (ret < 0) { + rte_panic("Cannot start port %d (%d)\n", port, ret); + } + } + + check_all_ports_link_status(APP_MAX_NIC_PORTS, (~0x0)); +} + +void +app_init(void) +{ + app_assign_worker_ids(); + app_init_mbuf_pools(); + app_init_lpm_tables(); + app_init_rings_rx(); + app_init_rings_tx(); + app_init_nics(); + + printf("Initialization completed.\n"); +} diff --git a/examples/load_balancer/main.c b/examples/load_balancer/main.c new file mode 100644 index 00000000..7ede3585 --- /dev/null +++ b/examples/load_balancer/main.c @@ -0,0 +1,109 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <inttypes.h> +#include <sys/types.h> +#include <string.h> +#include <sys/queue.h> +#include <stdarg.h> +#include <errno.h> +#include <getopt.h> +#include <unistd.h> + +#include <rte_common.h> +#include <rte_byteorder.h> +#include <rte_log.h> +#include <rte_memory.h> +#include <rte_memcpy.h> +#include <rte_memzone.h> +#include <rte_eal.h> +#include <rte_per_lcore.h> +#include <rte_launch.h> +#include <rte_atomic.h> +#include <rte_cycles.h> +#include <rte_prefetch.h> +#include <rte_lcore.h> +#include <rte_per_lcore.h> +#include <rte_branch_prediction.h> +#include <rte_interrupts.h> +#include <rte_pci.h> +#include <rte_random.h> +#include <rte_debug.h> +#include <rte_ether.h> +#include <rte_ethdev.h> +#include <rte_ring.h> +#include <rte_mempool.h> +#include <rte_mbuf.h> +#include <rte_ip.h> +#include <rte_tcp.h> +#include <rte_lpm.h> + +#include "main.h" + +int +main(int argc, char **argv) +{ + uint32_t lcore; + int ret; + + /* Init EAL */ + ret = rte_eal_init(argc, argv); + if (ret < 0) + return -1; + argc -= ret; + argv += ret; + + /* Parse application arguments (after the EAL ones) */ + ret = app_parse_args(argc, argv); + if (ret < 0) { + app_print_usage(); + return -1; + } + + /* Init */ + app_init(); + app_print_params(); + + /* Launch per-lcore init on every lcore */ + rte_eal_mp_remote_launch(app_lcore_main_loop, NULL, CALL_MASTER); + RTE_LCORE_FOREACH_SLAVE(lcore) { + if (rte_eal_wait_lcore(lcore) < 0) { + return -1; + } + } + + return 0; +} diff --git a/examples/load_balancer/main.h b/examples/load_balancer/main.h new file mode 100644 index 00000000..d98468a7 --- /dev/null +++ b/examples/load_balancer/main.h @@ -0,0 +1,371 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _MAIN_H_ +#define _MAIN_H_ + +/* Logical cores */ +#ifndef APP_MAX_SOCKETS +#define APP_MAX_SOCKETS 2 +#endif + +#ifndef APP_MAX_LCORES +#define APP_MAX_LCORES RTE_MAX_LCORE +#endif + +#ifndef APP_MAX_NIC_PORTS +#define APP_MAX_NIC_PORTS RTE_MAX_ETHPORTS +#endif + +#ifndef APP_MAX_RX_QUEUES_PER_NIC_PORT +#define APP_MAX_RX_QUEUES_PER_NIC_PORT 128 +#endif + +#ifndef APP_MAX_TX_QUEUES_PER_NIC_PORT +#define APP_MAX_TX_QUEUES_PER_NIC_PORT 128 +#endif + +#ifndef APP_MAX_IO_LCORES +#define APP_MAX_IO_LCORES 16 +#endif +#if (APP_MAX_IO_LCORES > APP_MAX_LCORES) +#error "APP_MAX_IO_LCORES is too big" +#endif + +#ifndef APP_MAX_NIC_RX_QUEUES_PER_IO_LCORE +#define APP_MAX_NIC_RX_QUEUES_PER_IO_LCORE 16 +#endif + +#ifndef APP_MAX_NIC_TX_PORTS_PER_IO_LCORE +#define APP_MAX_NIC_TX_PORTS_PER_IO_LCORE 16 +#endif +#if (APP_MAX_NIC_TX_PORTS_PER_IO_LCORE > APP_MAX_NIC_PORTS) +#error "APP_MAX_NIC_TX_PORTS_PER_IO_LCORE too big" +#endif + +#ifndef APP_MAX_WORKER_LCORES +#define APP_MAX_WORKER_LCORES 16 +#endif +#if (APP_MAX_WORKER_LCORES > APP_MAX_LCORES) +#error "APP_MAX_WORKER_LCORES is too big" +#endif + + +/* Mempools */ +#ifndef APP_DEFAULT_MBUF_DATA_SIZE +#define APP_DEFAULT_MBUF_DATA_SIZE RTE_MBUF_DEFAULT_BUF_SIZE +#endif + +#ifndef APP_DEFAULT_MEMPOOL_BUFFERS +#define APP_DEFAULT_MEMPOOL_BUFFERS 8192 * 4 +#endif + +#ifndef APP_DEFAULT_MEMPOOL_CACHE_SIZE +#define APP_DEFAULT_MEMPOOL_CACHE_SIZE 256 +#endif + +/* LPM Tables */ +#ifndef APP_MAX_LPM_RULES +#define APP_MAX_LPM_RULES 1024 +#endif + +/* NIC RX */ +#ifndef APP_DEFAULT_NIC_RX_RING_SIZE +#define APP_DEFAULT_NIC_RX_RING_SIZE 1024 +#endif + +/* + * RX and TX Prefetch, Host, and Write-back threshold values should be + * carefully set for optimal performance. Consult the network + * controller's datasheet and supporting DPDK documentation for guidance + * on how these parameters should be set. + */ +#ifndef APP_DEFAULT_NIC_RX_PTHRESH +#define APP_DEFAULT_NIC_RX_PTHRESH 8 +#endif + +#ifndef APP_DEFAULT_NIC_RX_HTHRESH +#define APP_DEFAULT_NIC_RX_HTHRESH 8 +#endif + +#ifndef APP_DEFAULT_NIC_RX_WTHRESH +#define APP_DEFAULT_NIC_RX_WTHRESH 4 +#endif + +#ifndef APP_DEFAULT_NIC_RX_FREE_THRESH +#define APP_DEFAULT_NIC_RX_FREE_THRESH 64 +#endif + +#ifndef APP_DEFAULT_NIC_RX_DROP_EN +#define APP_DEFAULT_NIC_RX_DROP_EN 0 +#endif + +/* NIC TX */ +#ifndef APP_DEFAULT_NIC_TX_RING_SIZE +#define APP_DEFAULT_NIC_TX_RING_SIZE 1024 +#endif + +/* + * These default values are optimized for use with the Intel(R) 82599 10 GbE + * Controller and the DPDK ixgbe PMD. Consider using other values for other + * network controllers and/or network drivers. + */ +#ifndef APP_DEFAULT_NIC_TX_PTHRESH +#define APP_DEFAULT_NIC_TX_PTHRESH 36 +#endif + +#ifndef APP_DEFAULT_NIC_TX_HTHRESH +#define APP_DEFAULT_NIC_TX_HTHRESH 0 +#endif + +#ifndef APP_DEFAULT_NIC_TX_WTHRESH +#define APP_DEFAULT_NIC_TX_WTHRESH 0 +#endif + +#ifndef APP_DEFAULT_NIC_TX_FREE_THRESH +#define APP_DEFAULT_NIC_TX_FREE_THRESH 0 +#endif + +#ifndef APP_DEFAULT_NIC_TX_RS_THRESH +#define APP_DEFAULT_NIC_TX_RS_THRESH 0 +#endif + +/* Software Rings */ +#ifndef APP_DEFAULT_RING_RX_SIZE +#define APP_DEFAULT_RING_RX_SIZE 1024 +#endif + +#ifndef APP_DEFAULT_RING_TX_SIZE +#define APP_DEFAULT_RING_TX_SIZE 1024 +#endif + +/* Bursts */ +#ifndef APP_MBUF_ARRAY_SIZE +#define APP_MBUF_ARRAY_SIZE 512 +#endif + +#ifndef APP_DEFAULT_BURST_SIZE_IO_RX_READ +#define APP_DEFAULT_BURST_SIZE_IO_RX_READ 144 +#endif +#if (APP_DEFAULT_BURST_SIZE_IO_RX_READ > APP_MBUF_ARRAY_SIZE) +#error "APP_DEFAULT_BURST_SIZE_IO_RX_READ is too big" +#endif + +#ifndef APP_DEFAULT_BURST_SIZE_IO_RX_WRITE +#define APP_DEFAULT_BURST_SIZE_IO_RX_WRITE 144 +#endif +#if (APP_DEFAULT_BURST_SIZE_IO_RX_WRITE > APP_MBUF_ARRAY_SIZE) +#error "APP_DEFAULT_BURST_SIZE_IO_RX_WRITE is too big" +#endif + +#ifndef APP_DEFAULT_BURST_SIZE_IO_TX_READ +#define APP_DEFAULT_BURST_SIZE_IO_TX_READ 144 +#endif +#if (APP_DEFAULT_BURST_SIZE_IO_TX_READ > APP_MBUF_ARRAY_SIZE) +#error "APP_DEFAULT_BURST_SIZE_IO_TX_READ is too big" +#endif + +#ifndef APP_DEFAULT_BURST_SIZE_IO_TX_WRITE +#define APP_DEFAULT_BURST_SIZE_IO_TX_WRITE 144 +#endif +#if (APP_DEFAULT_BURST_SIZE_IO_TX_WRITE > APP_MBUF_ARRAY_SIZE) +#error "APP_DEFAULT_BURST_SIZE_IO_TX_WRITE is too big" +#endif + +#ifndef APP_DEFAULT_BURST_SIZE_WORKER_READ +#define APP_DEFAULT_BURST_SIZE_WORKER_READ 144 +#endif +#if ((2 * APP_DEFAULT_BURST_SIZE_WORKER_READ) > APP_MBUF_ARRAY_SIZE) +#error "APP_DEFAULT_BURST_SIZE_WORKER_READ is too big" +#endif + +#ifndef APP_DEFAULT_BURST_SIZE_WORKER_WRITE +#define APP_DEFAULT_BURST_SIZE_WORKER_WRITE 144 +#endif +#if (APP_DEFAULT_BURST_SIZE_WORKER_WRITE > APP_MBUF_ARRAY_SIZE) +#error "APP_DEFAULT_BURST_SIZE_WORKER_WRITE is too big" +#endif + +/* Load balancing logic */ +#ifndef APP_DEFAULT_IO_RX_LB_POS +#define APP_DEFAULT_IO_RX_LB_POS 29 +#endif +#if (APP_DEFAULT_IO_RX_LB_POS >= 64) +#error "APP_DEFAULT_IO_RX_LB_POS is too big" +#endif + +struct app_mbuf_array { + struct rte_mbuf *array[APP_MBUF_ARRAY_SIZE]; + uint32_t n_mbufs; +}; + +enum app_lcore_type { + e_APP_LCORE_DISABLED = 0, + e_APP_LCORE_IO, + e_APP_LCORE_WORKER +}; + +struct app_lcore_params_io { + /* I/O RX */ + struct { + /* NIC */ + struct { + uint8_t port; + uint8_t queue; + } nic_queues[APP_MAX_NIC_RX_QUEUES_PER_IO_LCORE]; + uint32_t n_nic_queues; + + /* Rings */ + struct rte_ring *rings[APP_MAX_WORKER_LCORES]; + uint32_t n_rings; + + /* Internal buffers */ + struct app_mbuf_array mbuf_in; + struct app_mbuf_array mbuf_out[APP_MAX_WORKER_LCORES]; + uint8_t mbuf_out_flush[APP_MAX_WORKER_LCORES]; + + /* Stats */ + uint32_t nic_queues_count[APP_MAX_NIC_RX_QUEUES_PER_IO_LCORE]; + uint32_t nic_queues_iters[APP_MAX_NIC_RX_QUEUES_PER_IO_LCORE]; + uint32_t rings_count[APP_MAX_WORKER_LCORES]; + uint32_t rings_iters[APP_MAX_WORKER_LCORES]; + } rx; + + /* I/O TX */ + struct { + /* Rings */ + struct rte_ring *rings[APP_MAX_NIC_PORTS][APP_MAX_WORKER_LCORES]; + + /* NIC */ + uint8_t nic_ports[APP_MAX_NIC_TX_PORTS_PER_IO_LCORE]; + uint32_t n_nic_ports; + + /* Internal buffers */ + struct app_mbuf_array mbuf_out[APP_MAX_NIC_TX_PORTS_PER_IO_LCORE]; + uint8_t mbuf_out_flush[APP_MAX_NIC_TX_PORTS_PER_IO_LCORE]; + + /* Stats */ + uint32_t rings_count[APP_MAX_NIC_PORTS][APP_MAX_WORKER_LCORES]; + uint32_t rings_iters[APP_MAX_NIC_PORTS][APP_MAX_WORKER_LCORES]; + uint32_t nic_ports_count[APP_MAX_NIC_TX_PORTS_PER_IO_LCORE]; + uint32_t nic_ports_iters[APP_MAX_NIC_TX_PORTS_PER_IO_LCORE]; + } tx; +}; + +struct app_lcore_params_worker { + /* Rings */ + struct rte_ring *rings_in[APP_MAX_IO_LCORES]; + uint32_t n_rings_in; + struct rte_ring *rings_out[APP_MAX_NIC_PORTS]; + + /* LPM table */ + struct rte_lpm *lpm_table; + uint32_t worker_id; + + /* Internal buffers */ + struct app_mbuf_array mbuf_in; + struct app_mbuf_array mbuf_out[APP_MAX_NIC_PORTS]; + uint8_t mbuf_out_flush[APP_MAX_NIC_PORTS]; + + /* Stats */ + uint32_t rings_in_count[APP_MAX_IO_LCORES]; + uint32_t rings_in_iters[APP_MAX_IO_LCORES]; + uint32_t rings_out_count[APP_MAX_NIC_PORTS]; + uint32_t rings_out_iters[APP_MAX_NIC_PORTS]; +}; + +struct app_lcore_params { + union { + struct app_lcore_params_io io; + struct app_lcore_params_worker worker; + }; + enum app_lcore_type type; + struct rte_mempool *pool; +} __rte_cache_aligned; + +struct app_lpm_rule { + uint32_t ip; + uint8_t depth; + uint8_t if_out; +}; + +struct app_params { + /* lcore */ + struct app_lcore_params lcore_params[APP_MAX_LCORES]; + + /* NIC */ + uint8_t nic_rx_queue_mask[APP_MAX_NIC_PORTS][APP_MAX_RX_QUEUES_PER_NIC_PORT]; + uint8_t nic_tx_port_mask[APP_MAX_NIC_PORTS]; + + /* mbuf pools */ + struct rte_mempool *pools[APP_MAX_SOCKETS]; + + /* LPM tables */ + struct rte_lpm *lpm_tables[APP_MAX_SOCKETS]; + struct app_lpm_rule lpm_rules[APP_MAX_LPM_RULES]; + uint32_t n_lpm_rules; + + /* rings */ + uint32_t nic_rx_ring_size; + uint32_t nic_tx_ring_size; + uint32_t ring_rx_size; + uint32_t ring_tx_size; + + /* burst size */ + uint32_t burst_size_io_rx_read; + uint32_t burst_size_io_rx_write; + uint32_t burst_size_io_tx_read; + uint32_t burst_size_io_tx_write; + uint32_t burst_size_worker_read; + uint32_t burst_size_worker_write; + + /* load balancing */ + uint8_t pos_lb; +} __rte_cache_aligned; + +extern struct app_params app; + +int app_parse_args(int argc, char **argv); +void app_print_usage(void); +void app_init(void); +int app_lcore_main_loop(void *arg); + +int app_get_nic_rx_queues_per_port(uint8_t port); +int app_get_lcore_for_nic_rx(uint8_t port, uint8_t queue, uint32_t *lcore_out); +int app_get_lcore_for_nic_tx(uint8_t port, uint32_t *lcore_out); +int app_is_socket_used(uint32_t socket); +uint32_t app_get_lcores_io_rx(void); +uint32_t app_get_lcores_worker(void); +void app_print_params(void); + +#endif /* _MAIN_H_ */ diff --git a/examples/load_balancer/runtime.c b/examples/load_balancer/runtime.c new file mode 100644 index 00000000..6944325d --- /dev/null +++ b/examples/load_balancer/runtime.c @@ -0,0 +1,668 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <inttypes.h> +#include <sys/types.h> +#include <string.h> +#include <sys/queue.h> +#include <stdarg.h> +#include <errno.h> +#include <getopt.h> + +#include <rte_common.h> +#include <rte_byteorder.h> +#include <rte_log.h> +#include <rte_memory.h> +#include <rte_memcpy.h> +#include <rte_memzone.h> +#include <rte_eal.h> +#include <rte_per_lcore.h> +#include <rte_launch.h> +#include <rte_atomic.h> +#include <rte_cycles.h> +#include <rte_prefetch.h> +#include <rte_lcore.h> +#include <rte_per_lcore.h> +#include <rte_branch_prediction.h> +#include <rte_interrupts.h> +#include <rte_pci.h> +#include <rte_random.h> +#include <rte_debug.h> +#include <rte_ether.h> +#include <rte_ethdev.h> +#include <rte_ring.h> +#include <rte_mempool.h> +#include <rte_mbuf.h> +#include <rte_ip.h> +#include <rte_tcp.h> +#include <rte_lpm.h> + +#include "main.h" + +#ifndef APP_LCORE_IO_FLUSH +#define APP_LCORE_IO_FLUSH 1000000 +#endif + +#ifndef APP_LCORE_WORKER_FLUSH +#define APP_LCORE_WORKER_FLUSH 1000000 +#endif + +#ifndef APP_STATS +#define APP_STATS 1000000 +#endif + +#define APP_IO_RX_DROP_ALL_PACKETS 0 +#define APP_WORKER_DROP_ALL_PACKETS 0 +#define APP_IO_TX_DROP_ALL_PACKETS 0 + +#ifndef APP_IO_RX_PREFETCH_ENABLE +#define APP_IO_RX_PREFETCH_ENABLE 1 +#endif + +#ifndef APP_WORKER_PREFETCH_ENABLE +#define APP_WORKER_PREFETCH_ENABLE 1 +#endif + +#ifndef APP_IO_TX_PREFETCH_ENABLE +#define APP_IO_TX_PREFETCH_ENABLE 1 +#endif + +#if APP_IO_RX_PREFETCH_ENABLE +#define APP_IO_RX_PREFETCH0(p) rte_prefetch0(p) +#define APP_IO_RX_PREFETCH1(p) rte_prefetch1(p) +#else +#define APP_IO_RX_PREFETCH0(p) +#define APP_IO_RX_PREFETCH1(p) +#endif + +#if APP_WORKER_PREFETCH_ENABLE +#define APP_WORKER_PREFETCH0(p) rte_prefetch0(p) +#define APP_WORKER_PREFETCH1(p) rte_prefetch1(p) +#else +#define APP_WORKER_PREFETCH0(p) +#define APP_WORKER_PREFETCH1(p) +#endif + +#if APP_IO_TX_PREFETCH_ENABLE +#define APP_IO_TX_PREFETCH0(p) rte_prefetch0(p) +#define APP_IO_TX_PREFETCH1(p) rte_prefetch1(p) +#else +#define APP_IO_TX_PREFETCH0(p) +#define APP_IO_TX_PREFETCH1(p) +#endif + +static inline void +app_lcore_io_rx_buffer_to_send ( + struct app_lcore_params_io *lp, + uint32_t worker, + struct rte_mbuf *mbuf, + uint32_t bsz) +{ + uint32_t pos; + int ret; + + pos = lp->rx.mbuf_out[worker].n_mbufs; + lp->rx.mbuf_out[worker].array[pos ++] = mbuf; + if (likely(pos < bsz)) { + lp->rx.mbuf_out[worker].n_mbufs = pos; + return; + } + + ret = rte_ring_sp_enqueue_bulk( + lp->rx.rings[worker], + (void **) lp->rx.mbuf_out[worker].array, + bsz); + + if (unlikely(ret == -ENOBUFS)) { + uint32_t k; + for (k = 0; k < bsz; k ++) { + struct rte_mbuf *m = lp->rx.mbuf_out[worker].array[k]; + rte_pktmbuf_free(m); + } + } + + lp->rx.mbuf_out[worker].n_mbufs = 0; + lp->rx.mbuf_out_flush[worker] = 0; + +#if APP_STATS + lp->rx.rings_iters[worker] ++; + if (likely(ret == 0)) { + lp->rx.rings_count[worker] ++; + } + if (unlikely(lp->rx.rings_iters[worker] == APP_STATS)) { + unsigned lcore = rte_lcore_id(); + + printf("\tI/O RX %u out (worker %u): enq success rate = %.2f\n", + lcore, + (unsigned)worker, + ((double) lp->rx.rings_count[worker]) / ((double) lp->rx.rings_iters[worker])); + lp->rx.rings_iters[worker] = 0; + lp->rx.rings_count[worker] = 0; + } +#endif +} + +static inline void +app_lcore_io_rx( + struct app_lcore_params_io *lp, + uint32_t n_workers, + uint32_t bsz_rd, + uint32_t bsz_wr, + uint8_t pos_lb) +{ + struct rte_mbuf *mbuf_1_0, *mbuf_1_1, *mbuf_2_0, *mbuf_2_1; + uint8_t *data_1_0, *data_1_1 = NULL; + uint32_t i; + + for (i = 0; i < lp->rx.n_nic_queues; i ++) { + uint8_t port = lp->rx.nic_queues[i].port; + uint8_t queue = lp->rx.nic_queues[i].queue; + uint32_t n_mbufs, j; + + n_mbufs = rte_eth_rx_burst( + port, + queue, + lp->rx.mbuf_in.array, + (uint16_t) bsz_rd); + + if (unlikely(n_mbufs == 0)) { + continue; + } + +#if APP_STATS + lp->rx.nic_queues_iters[i] ++; + lp->rx.nic_queues_count[i] += n_mbufs; + if (unlikely(lp->rx.nic_queues_iters[i] == APP_STATS)) { + struct rte_eth_stats stats; + unsigned lcore = rte_lcore_id(); + + rte_eth_stats_get(port, &stats); + + printf("I/O RX %u in (NIC port %u): NIC drop ratio = %.2f avg burst size = %.2f\n", + lcore, + (unsigned) port, + (double) stats.imissed / (double) (stats.imissed + stats.ipackets), + ((double) lp->rx.nic_queues_count[i]) / ((double) lp->rx.nic_queues_iters[i])); + lp->rx.nic_queues_iters[i] = 0; + lp->rx.nic_queues_count[i] = 0; + } +#endif + +#if APP_IO_RX_DROP_ALL_PACKETS + for (j = 0; j < n_mbufs; j ++) { + struct rte_mbuf *pkt = lp->rx.mbuf_in.array[j]; + rte_pktmbuf_free(pkt); + } + + continue; +#endif + + mbuf_1_0 = lp->rx.mbuf_in.array[0]; + mbuf_1_1 = lp->rx.mbuf_in.array[1]; + data_1_0 = rte_pktmbuf_mtod(mbuf_1_0, uint8_t *); + if (likely(n_mbufs > 1)) { + data_1_1 = rte_pktmbuf_mtod(mbuf_1_1, uint8_t *); + } + + mbuf_2_0 = lp->rx.mbuf_in.array[2]; + mbuf_2_1 = lp->rx.mbuf_in.array[3]; + APP_IO_RX_PREFETCH0(mbuf_2_0); + APP_IO_RX_PREFETCH0(mbuf_2_1); + + for (j = 0; j + 3 < n_mbufs; j += 2) { + struct rte_mbuf *mbuf_0_0, *mbuf_0_1; + uint8_t *data_0_0, *data_0_1; + uint32_t worker_0, worker_1; + + mbuf_0_0 = mbuf_1_0; + mbuf_0_1 = mbuf_1_1; + data_0_0 = data_1_0; + data_0_1 = data_1_1; + + mbuf_1_0 = mbuf_2_0; + mbuf_1_1 = mbuf_2_1; + data_1_0 = rte_pktmbuf_mtod(mbuf_2_0, uint8_t *); + data_1_1 = rte_pktmbuf_mtod(mbuf_2_1, uint8_t *); + APP_IO_RX_PREFETCH0(data_1_0); + APP_IO_RX_PREFETCH0(data_1_1); + + mbuf_2_0 = lp->rx.mbuf_in.array[j+4]; + mbuf_2_1 = lp->rx.mbuf_in.array[j+5]; + APP_IO_RX_PREFETCH0(mbuf_2_0); + APP_IO_RX_PREFETCH0(mbuf_2_1); + + worker_0 = data_0_0[pos_lb] & (n_workers - 1); + worker_1 = data_0_1[pos_lb] & (n_workers - 1); + + app_lcore_io_rx_buffer_to_send(lp, worker_0, mbuf_0_0, bsz_wr); + app_lcore_io_rx_buffer_to_send(lp, worker_1, mbuf_0_1, bsz_wr); + } + + /* Handle the last 1, 2 (when n_mbufs is even) or 3 (when n_mbufs is odd) packets */ + for ( ; j < n_mbufs; j += 1) { + struct rte_mbuf *mbuf; + uint8_t *data; + uint32_t worker; + + mbuf = mbuf_1_0; + mbuf_1_0 = mbuf_1_1; + mbuf_1_1 = mbuf_2_0; + mbuf_2_0 = mbuf_2_1; + + data = rte_pktmbuf_mtod(mbuf, uint8_t *); + + APP_IO_RX_PREFETCH0(mbuf_1_0); + + worker = data[pos_lb] & (n_workers - 1); + + app_lcore_io_rx_buffer_to_send(lp, worker, mbuf, bsz_wr); + } + } +} + +static inline void +app_lcore_io_rx_flush(struct app_lcore_params_io *lp, uint32_t n_workers) +{ + uint32_t worker; + + for (worker = 0; worker < n_workers; worker ++) { + int ret; + + if (likely((lp->rx.mbuf_out_flush[worker] == 0) || + (lp->rx.mbuf_out[worker].n_mbufs == 0))) { + lp->rx.mbuf_out_flush[worker] = 1; + continue; + } + + ret = rte_ring_sp_enqueue_bulk( + lp->rx.rings[worker], + (void **) lp->rx.mbuf_out[worker].array, + lp->rx.mbuf_out[worker].n_mbufs); + + if (unlikely(ret < 0)) { + uint32_t k; + for (k = 0; k < lp->rx.mbuf_out[worker].n_mbufs; k ++) { + struct rte_mbuf *pkt_to_free = lp->rx.mbuf_out[worker].array[k]; + rte_pktmbuf_free(pkt_to_free); + } + } + + lp->rx.mbuf_out[worker].n_mbufs = 0; + lp->rx.mbuf_out_flush[worker] = 1; + } +} + +static inline void +app_lcore_io_tx( + struct app_lcore_params_io *lp, + uint32_t n_workers, + uint32_t bsz_rd, + uint32_t bsz_wr) +{ + uint32_t worker; + + for (worker = 0; worker < n_workers; worker ++) { + uint32_t i; + + for (i = 0; i < lp->tx.n_nic_ports; i ++) { + uint8_t port = lp->tx.nic_ports[i]; + struct rte_ring *ring = lp->tx.rings[port][worker]; + uint32_t n_mbufs, n_pkts; + int ret; + + n_mbufs = lp->tx.mbuf_out[port].n_mbufs; + ret = rte_ring_sc_dequeue_bulk( + ring, + (void **) &lp->tx.mbuf_out[port].array[n_mbufs], + bsz_rd); + + if (unlikely(ret == -ENOENT)) { + continue; + } + + n_mbufs += bsz_rd; + +#if APP_IO_TX_DROP_ALL_PACKETS + { + uint32_t j; + APP_IO_TX_PREFETCH0(lp->tx.mbuf_out[port].array[0]); + APP_IO_TX_PREFETCH0(lp->tx.mbuf_out[port].array[1]); + + for (j = 0; j < n_mbufs; j ++) { + if (likely(j < n_mbufs - 2)) { + APP_IO_TX_PREFETCH0(lp->tx.mbuf_out[port].array[j + 2]); + } + + rte_pktmbuf_free(lp->tx.mbuf_out[port].array[j]); + } + + lp->tx.mbuf_out[port].n_mbufs = 0; + + continue; + } +#endif + + if (unlikely(n_mbufs < bsz_wr)) { + lp->tx.mbuf_out[port].n_mbufs = n_mbufs; + continue; + } + + n_pkts = rte_eth_tx_burst( + port, + 0, + lp->tx.mbuf_out[port].array, + (uint16_t) n_mbufs); + +#if APP_STATS + lp->tx.nic_ports_iters[port] ++; + lp->tx.nic_ports_count[port] += n_pkts; + if (unlikely(lp->tx.nic_ports_iters[port] == APP_STATS)) { + unsigned lcore = rte_lcore_id(); + + printf("\t\t\tI/O TX %u out (port %u): avg burst size = %.2f\n", + lcore, + (unsigned) port, + ((double) lp->tx.nic_ports_count[port]) / ((double) lp->tx.nic_ports_iters[port])); + lp->tx.nic_ports_iters[port] = 0; + lp->tx.nic_ports_count[port] = 0; + } +#endif + + if (unlikely(n_pkts < n_mbufs)) { + uint32_t k; + for (k = n_pkts; k < n_mbufs; k ++) { + struct rte_mbuf *pkt_to_free = lp->tx.mbuf_out[port].array[k]; + rte_pktmbuf_free(pkt_to_free); + } + } + lp->tx.mbuf_out[port].n_mbufs = 0; + lp->tx.mbuf_out_flush[port] = 0; + } + } +} + +static inline void +app_lcore_io_tx_flush(struct app_lcore_params_io *lp) +{ + uint8_t port; + + for (port = 0; port < lp->tx.n_nic_ports; port ++) { + uint32_t n_pkts; + + if (likely((lp->tx.mbuf_out_flush[port] == 0) || + (lp->tx.mbuf_out[port].n_mbufs == 0))) { + lp->tx.mbuf_out_flush[port] = 1; + continue; + } + + n_pkts = rte_eth_tx_burst( + port, + 0, + lp->tx.mbuf_out[port].array, + (uint16_t) lp->tx.mbuf_out[port].n_mbufs); + + if (unlikely(n_pkts < lp->tx.mbuf_out[port].n_mbufs)) { + uint32_t k; + for (k = n_pkts; k < lp->tx.mbuf_out[port].n_mbufs; k ++) { + struct rte_mbuf *pkt_to_free = lp->tx.mbuf_out[port].array[k]; + rte_pktmbuf_free(pkt_to_free); + } + } + + lp->tx.mbuf_out[port].n_mbufs = 0; + lp->tx.mbuf_out_flush[port] = 1; + } +} + +static void +app_lcore_main_loop_io(void) +{ + uint32_t lcore = rte_lcore_id(); + struct app_lcore_params_io *lp = &app.lcore_params[lcore].io; + uint32_t n_workers = app_get_lcores_worker(); + uint64_t i = 0; + + uint32_t bsz_rx_rd = app.burst_size_io_rx_read; + uint32_t bsz_rx_wr = app.burst_size_io_rx_write; + uint32_t bsz_tx_rd = app.burst_size_io_tx_read; + uint32_t bsz_tx_wr = app.burst_size_io_tx_write; + + uint8_t pos_lb = app.pos_lb; + + for ( ; ; ) { + if (APP_LCORE_IO_FLUSH && (unlikely(i == APP_LCORE_IO_FLUSH))) { + if (likely(lp->rx.n_nic_queues > 0)) { + app_lcore_io_rx_flush(lp, n_workers); + } + + if (likely(lp->tx.n_nic_ports > 0)) { + app_lcore_io_tx_flush(lp); + } + + i = 0; + } + + if (likely(lp->rx.n_nic_queues > 0)) { + app_lcore_io_rx(lp, n_workers, bsz_rx_rd, bsz_rx_wr, pos_lb); + } + + if (likely(lp->tx.n_nic_ports > 0)) { + app_lcore_io_tx(lp, n_workers, bsz_tx_rd, bsz_tx_wr); + } + + i ++; + } +} + +static inline void +app_lcore_worker( + struct app_lcore_params_worker *lp, + uint32_t bsz_rd, + uint32_t bsz_wr) +{ + uint32_t i; + + for (i = 0; i < lp->n_rings_in; i ++) { + struct rte_ring *ring_in = lp->rings_in[i]; + uint32_t j; + int ret; + + ret = rte_ring_sc_dequeue_bulk( + ring_in, + (void **) lp->mbuf_in.array, + bsz_rd); + + if (unlikely(ret == -ENOENT)) { + continue; + } + +#if APP_WORKER_DROP_ALL_PACKETS + for (j = 0; j < bsz_rd; j ++) { + struct rte_mbuf *pkt = lp->mbuf_in.array[j]; + rte_pktmbuf_free(pkt); + } + + continue; +#endif + + APP_WORKER_PREFETCH1(rte_pktmbuf_mtod(lp->mbuf_in.array[0], unsigned char *)); + APP_WORKER_PREFETCH0(lp->mbuf_in.array[1]); + + for (j = 0; j < bsz_rd; j ++) { + struct rte_mbuf *pkt; + struct ipv4_hdr *ipv4_hdr; + uint32_t ipv4_dst, pos; + uint32_t port; + + if (likely(j < bsz_rd - 1)) { + APP_WORKER_PREFETCH1(rte_pktmbuf_mtod(lp->mbuf_in.array[j+1], unsigned char *)); + } + if (likely(j < bsz_rd - 2)) { + APP_WORKER_PREFETCH0(lp->mbuf_in.array[j+2]); + } + + pkt = lp->mbuf_in.array[j]; + ipv4_hdr = rte_pktmbuf_mtod_offset(pkt, + struct ipv4_hdr *, + sizeof(struct ether_hdr)); + ipv4_dst = rte_be_to_cpu_32(ipv4_hdr->dst_addr); + + if (unlikely(rte_lpm_lookup(lp->lpm_table, ipv4_dst, &port) != 0)) { + port = pkt->port; + } + + pos = lp->mbuf_out[port].n_mbufs; + + lp->mbuf_out[port].array[pos ++] = pkt; + if (likely(pos < bsz_wr)) { + lp->mbuf_out[port].n_mbufs = pos; + continue; + } + + ret = rte_ring_sp_enqueue_bulk( + lp->rings_out[port], + (void **) lp->mbuf_out[port].array, + bsz_wr); + +#if APP_STATS + lp->rings_out_iters[port] ++; + if (ret == 0) { + lp->rings_out_count[port] += 1; + } + if (lp->rings_out_iters[port] == APP_STATS){ + printf("\t\tWorker %u out (NIC port %u): enq success rate = %.2f\n", + (unsigned) lp->worker_id, + (unsigned) port, + ((double) lp->rings_out_count[port]) / ((double) lp->rings_out_iters[port])); + lp->rings_out_iters[port] = 0; + lp->rings_out_count[port] = 0; + } +#endif + + if (unlikely(ret == -ENOBUFS)) { + uint32_t k; + for (k = 0; k < bsz_wr; k ++) { + struct rte_mbuf *pkt_to_free = lp->mbuf_out[port].array[k]; + rte_pktmbuf_free(pkt_to_free); + } + } + + lp->mbuf_out[port].n_mbufs = 0; + lp->mbuf_out_flush[port] = 0; + } + } +} + +static inline void +app_lcore_worker_flush(struct app_lcore_params_worker *lp) +{ + uint32_t port; + + for (port = 0; port < APP_MAX_NIC_PORTS; port ++) { + int ret; + + if (unlikely(lp->rings_out[port] == NULL)) { + continue; + } + + if (likely((lp->mbuf_out_flush[port] == 0) || + (lp->mbuf_out[port].n_mbufs == 0))) { + lp->mbuf_out_flush[port] = 1; + continue; + } + + ret = rte_ring_sp_enqueue_bulk( + lp->rings_out[port], + (void **) lp->mbuf_out[port].array, + lp->mbuf_out[port].n_mbufs); + + if (unlikely(ret < 0)) { + uint32_t k; + for (k = 0; k < lp->mbuf_out[port].n_mbufs; k ++) { + struct rte_mbuf *pkt_to_free = lp->mbuf_out[port].array[k]; + rte_pktmbuf_free(pkt_to_free); + } + } + + lp->mbuf_out[port].n_mbufs = 0; + lp->mbuf_out_flush[port] = 1; + } +} + +static void +app_lcore_main_loop_worker(void) { + uint32_t lcore = rte_lcore_id(); + struct app_lcore_params_worker *lp = &app.lcore_params[lcore].worker; + uint64_t i = 0; + + uint32_t bsz_rd = app.burst_size_worker_read; + uint32_t bsz_wr = app.burst_size_worker_write; + + for ( ; ; ) { + if (APP_LCORE_WORKER_FLUSH && (unlikely(i == APP_LCORE_WORKER_FLUSH))) { + app_lcore_worker_flush(lp); + i = 0; + } + + app_lcore_worker(lp, bsz_rd, bsz_wr); + + i ++; + } +} + +int +app_lcore_main_loop(__attribute__((unused)) void *arg) +{ + struct app_lcore_params *lp; + unsigned lcore; + + lcore = rte_lcore_id(); + lp = &app.lcore_params[lcore]; + + if (lp->type == e_APP_LCORE_IO) { + printf("Logical core %u (I/O) main loop.\n", lcore); + app_lcore_main_loop_io(); + } + + if (lp->type == e_APP_LCORE_WORKER) { + printf("Logical core %u (worker %u) main loop.\n", + lcore, + (unsigned) lp->worker.worker_id); + app_lcore_main_loop_worker(); + } + + return 0; +} diff --git a/examples/multi_process/Makefile b/examples/multi_process/Makefile new file mode 100644 index 00000000..6b315cc0 --- /dev/null +++ b/examples/multi_process/Makefile @@ -0,0 +1,45 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overriden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +DIRS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += client_server_mp +DIRS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += simple_mp +DIRS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += symmetric_mp + +include $(RTE_SDK)/mk/rte.extsubdir.mk diff --git a/examples/multi_process/client_server_mp/Makefile b/examples/multi_process/client_server_mp/Makefile new file mode 100644 index 00000000..89cc6bf8 --- /dev/null +++ b/examples/multi_process/client_server_mp/Makefile @@ -0,0 +1,44 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overriden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +DIRS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += mp_client +DIRS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += mp_server + +include $(RTE_SDK)/mk/rte.extsubdir.mk diff --git a/examples/multi_process/client_server_mp/mp_client/Makefile b/examples/multi_process/client_server_mp/mp_client/Makefile new file mode 100644 index 00000000..2688fed0 --- /dev/null +++ b/examples/multi_process/client_server_mp/mp_client/Makefile @@ -0,0 +1,48 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overriden by command line or environment +include $(RTE_SDK)/mk/rte.vars.mk + +# binary name +APP = mp_client + +# all source are stored in SRCS-y +SRCS-y := client.c + +CFLAGS += $(WERROR_FLAGS) -O3 +CFLAGS += -I$(SRCDIR)/../shared + +include $(RTE_SDK)/mk/rte.extapp.mk diff --git a/examples/multi_process/client_server_mp/mp_client/client.c b/examples/multi_process/client_server_mp/mp_client/client.c new file mode 100644 index 00000000..d4f9ca37 --- /dev/null +++ b/examples/multi_process/client_server_mp/mp_client/client.c @@ -0,0 +1,305 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdint.h> +#include <stdio.h> +#include <inttypes.h> +#include <stdarg.h> +#include <errno.h> +#include <sys/queue.h> +#include <stdlib.h> +#include <getopt.h> +#include <string.h> + +#include <rte_common.h> +#include <rte_malloc.h> +#include <rte_memory.h> +#include <rte_memzone.h> +#include <rte_eal.h> +#include <rte_atomic.h> +#include <rte_branch_prediction.h> +#include <rte_log.h> +#include <rte_per_lcore.h> +#include <rte_launch.h> +#include <rte_lcore.h> +#include <rte_ring.h> +#include <rte_launch.h> +#include <rte_lcore.h> +#include <rte_debug.h> +#include <rte_mempool.h> +#include <rte_mbuf.h> +#include <rte_interrupts.h> +#include <rte_pci.h> +#include <rte_ether.h> +#include <rte_ethdev.h> +#include <rte_string_fns.h> + +#include "common.h" + +/* Number of packets to attempt to read from queue */ +#define PKT_READ_SIZE ((uint16_t)32) + +/* our client id number - tells us which rx queue to read, and NIC TX + * queue to write to. */ +static uint8_t client_id = 0; + +#define MBQ_CAPACITY 32 + +/* maps input ports to output ports for packets */ +static uint8_t output_ports[RTE_MAX_ETHPORTS]; + +/* buffers up a set of packet that are ready to send */ +struct rte_eth_dev_tx_buffer *tx_buffer[RTE_MAX_ETHPORTS]; + +/* shared data from server. We update statistics here */ +static volatile struct tx_stats *tx_stats; + + +/* + * print a usage message + */ +static void +usage(const char *progname) +{ + printf("Usage: %s [EAL args] -- -n <client_id>\n\n", progname); +} + +/* + * Convert the client id number from a string to an int. + */ +static int +parse_client_num(const char *client) +{ + char *end = NULL; + unsigned long temp; + + if (client == NULL || *client == '\0') + return -1; + + temp = strtoul(client, &end, 10); + if (end == NULL || *end != '\0') + return -1; + + client_id = (uint8_t)temp; + return 0; +} + +/* + * Parse the application arguments to the client app. + */ +static int +parse_app_args(int argc, char *argv[]) +{ + int option_index, opt; + char **argvopt = argv; + const char *progname = NULL; + static struct option lgopts[] = { /* no long options */ + {NULL, 0, 0, 0 } + }; + progname = argv[0]; + + while ((opt = getopt_long(argc, argvopt, "n:", lgopts, + &option_index)) != EOF){ + switch (opt){ + case 'n': + if (parse_client_num(optarg) != 0){ + usage(progname); + return -1; + } + break; + default: + usage(progname); + return -1; + } + } + return 0; +} + +/* + * Tx buffer error callback + */ +static void +flush_tx_error_callback(struct rte_mbuf **unsent, uint16_t count, + void *userdata) { + int i; + uint8_t port_id = (uintptr_t)userdata; + + tx_stats->tx_drop[port_id] += count; + + /* free the mbufs which failed from transmit */ + for (i = 0; i < count; i++) + rte_pktmbuf_free(unsent[i]); + +} + +static void +configure_tx_buffer(uint8_t port_id, uint16_t size) +{ + int ret; + + /* Initialize TX buffers */ + tx_buffer[port_id] = rte_zmalloc_socket("tx_buffer", + RTE_ETH_TX_BUFFER_SIZE(size), 0, + rte_eth_dev_socket_id(port_id)); + if (tx_buffer[port_id] == NULL) + rte_exit(EXIT_FAILURE, "Cannot allocate buffer for tx on port %u\n", + (unsigned) port_id); + + rte_eth_tx_buffer_init(tx_buffer[port_id], size); + + ret = rte_eth_tx_buffer_set_err_callback(tx_buffer[port_id], + flush_tx_error_callback, (void *)(intptr_t)port_id); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Cannot set error callback for " + "tx buffer on port %u\n", (unsigned) port_id); +} + +/* + * set up output ports so that all traffic on port gets sent out + * its paired port. Index using actual port numbers since that is + * what comes in the mbuf structure. + */ +static void +configure_output_ports(const struct port_info *ports) +{ + int i; + if (ports->num_ports > RTE_MAX_ETHPORTS) + rte_exit(EXIT_FAILURE, "Too many ethernet ports. RTE_MAX_ETHPORTS = %u\n", + (unsigned)RTE_MAX_ETHPORTS); + for (i = 0; i < ports->num_ports - 1; i+=2){ + uint8_t p1 = ports->id[i]; + uint8_t p2 = ports->id[i+1]; + output_ports[p1] = p2; + output_ports[p2] = p1; + + configure_tx_buffer(p1, MBQ_CAPACITY); + configure_tx_buffer(p2, MBQ_CAPACITY); + + } +} + +/* + * This function performs routing of packets + * Just sends each input packet out an output port based solely on the input + * port it arrived on. + */ +static void +handle_packet(struct rte_mbuf *buf) +{ + int sent; + const uint8_t in_port = buf->port; + const uint8_t out_port = output_ports[in_port]; + struct rte_eth_dev_tx_buffer *buffer = tx_buffer[out_port]; + + sent = rte_eth_tx_buffer(out_port, client_id, buffer, buf); + if (sent) + tx_stats->tx[out_port] += sent; + +} + +/* + * Application main function - loops through + * receiving and processing packets. Never returns + */ +int +main(int argc, char *argv[]) +{ + const struct rte_memzone *mz; + struct rte_ring *rx_ring; + struct rte_mempool *mp; + struct port_info *ports; + int need_flush = 0; /* indicates whether we have unsent packets */ + int retval; + void *pkts[PKT_READ_SIZE]; + uint16_t sent; + + if ((retval = rte_eal_init(argc, argv)) < 0) + return -1; + argc -= retval; + argv += retval; + + if (parse_app_args(argc, argv) < 0) + rte_exit(EXIT_FAILURE, "Invalid command-line arguments\n"); + + if (rte_eth_dev_count() == 0) + rte_exit(EXIT_FAILURE, "No Ethernet ports - bye\n"); + + rx_ring = rte_ring_lookup(get_rx_queue_name(client_id)); + if (rx_ring == NULL) + rte_exit(EXIT_FAILURE, "Cannot get RX ring - is server process running?\n"); + + mp = rte_mempool_lookup(PKTMBUF_POOL_NAME); + if (mp == NULL) + rte_exit(EXIT_FAILURE, "Cannot get mempool for mbufs\n"); + + mz = rte_memzone_lookup(MZ_PORT_INFO); + if (mz == NULL) + rte_exit(EXIT_FAILURE, "Cannot get port info structure\n"); + ports = mz->addr; + tx_stats = &(ports->tx_stats[client_id]); + + configure_output_ports(ports); + + RTE_LOG(INFO, APP, "Finished Process Init.\n"); + + printf("\nClient process %d handling packets\n", client_id); + printf("[Press Ctrl-C to quit ...]\n"); + + for (;;) { + uint16_t i, rx_pkts = PKT_READ_SIZE; + uint8_t port; + + /* try dequeuing max possible packets first, if that fails, get the + * most we can. Loop body should only execute once, maximum */ + while (rx_pkts > 0 && + unlikely(rte_ring_dequeue_bulk(rx_ring, pkts, rx_pkts) != 0)) + rx_pkts = (uint16_t)RTE_MIN(rte_ring_count(rx_ring), PKT_READ_SIZE); + + if (unlikely(rx_pkts == 0)){ + if (need_flush) + for (port = 0; port < ports->num_ports; port++) { + sent = rte_eth_tx_buffer_flush(ports->id[port], client_id, + tx_buffer[port]); + if (unlikely(sent)) + tx_stats->tx[port] += sent; + } + need_flush = 0; + continue; + } + + for (i = 0; i < rx_pkts; i++) + handle_packet(pkts[i]); + + need_flush = 1; + } +} diff --git a/examples/multi_process/client_server_mp/mp_server/Makefile b/examples/multi_process/client_server_mp/mp_server/Makefile new file mode 100644 index 00000000..c29e4783 --- /dev/null +++ b/examples/multi_process/client_server_mp/mp_server/Makefile @@ -0,0 +1,61 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overriden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +ifneq ($(CONFIG_RTE_EXEC_ENV),"linuxapp") +$(error This application can only operate in a linuxapp environment, \ +please change the definition of the RTE_TARGET environment variable) +endif + +# binary name +APP = mp_server + +# all source are stored in SRCS-y +SRCS-y := main.c init.c args.c + +INC := $(wildcard *.h) + +CFLAGS += $(WERROR_FLAGS) -O3 +CFLAGS += -I$(SRCDIR)/../shared + +# for newer gcc, e.g. 4.4, no-strict-aliasing may not be necessary +# and so the next line can be removed in those cases. +EXTRA_CFLAGS += -fno-strict-aliasing + +include $(RTE_SDK)/mk/rte.extapp.mk diff --git a/examples/multi_process/client_server_mp/mp_server/args.c b/examples/multi_process/client_server_mp/mp_server/args.c new file mode 100644 index 00000000..bf8c666c --- /dev/null +++ b/examples/multi_process/client_server_mp/mp_server/args.c @@ -0,0 +1,172 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <getopt.h> +#include <stdarg.h> +#include <errno.h> + +#include <rte_memory.h> +#include <rte_string_fns.h> + +#include "common.h" +#include "args.h" +#include "init.h" + +/* global var for number of clients - extern in header */ +uint8_t num_clients; + +static const char *progname; + +/** + * Prints out usage information to stdout + */ +static void +usage(void) +{ + printf( + "%s [EAL options] -- -p PORTMASK -n NUM_CLIENTS [-s NUM_SOCKETS]\n" + " -p PORTMASK: hexadecimal bitmask of ports to use\n" + " -n NUM_CLIENTS: number of client processes to use\n" + , progname); +} + +/** + * The ports to be used by the application are passed in + * the form of a bitmask. This function parses the bitmask + * and places the port numbers to be used into the port[] + * array variable + */ +static int +parse_portmask(uint8_t max_ports, const char *portmask) +{ + char *end = NULL; + unsigned long pm; + uint8_t count = 0; + + if (portmask == NULL || *portmask == '\0') + return -1; + + /* convert parameter to a number and verify */ + pm = strtoul(portmask, &end, 16); + if (end == NULL || *end != '\0' || pm == 0) + return -1; + + /* loop through bits of the mask and mark ports */ + while (pm != 0){ + if (pm & 0x01){ /* bit is set in mask, use port */ + if (count >= max_ports) + printf("WARNING: requested port %u not present" + " - ignoring\n", (unsigned)count); + else + ports->id[ports->num_ports++] = count; + } + pm = (pm >> 1); + count++; + } + + return 0; +} + +/** + * Take the number of clients parameter passed to the app + * and convert to a number to store in the num_clients variable + */ +static int +parse_num_clients(const char *clients) +{ + char *end = NULL; + unsigned long temp; + + if (clients == NULL || *clients == '\0') + return -1; + + temp = strtoul(clients, &end, 10); + if (end == NULL || *end != '\0' || temp == 0) + return -1; + + num_clients = (uint8_t)temp; + return 0; +} + +/** + * The application specific arguments follow the DPDK-specific + * arguments which are stripped by the DPDK init. This function + * processes these application arguments, printing usage info + * on error. + */ +int +parse_app_args(uint8_t max_ports, int argc, char *argv[]) +{ + int option_index, opt; + char **argvopt = argv; + static struct option lgopts[] = { /* no long options */ + {NULL, 0, 0, 0 } + }; + progname = argv[0]; + + while ((opt = getopt_long(argc, argvopt, "n:p:", lgopts, + &option_index)) != EOF){ + switch (opt){ + case 'p': + if (parse_portmask(max_ports, optarg) != 0){ + usage(); + return -1; + } + break; + case 'n': + if (parse_num_clients(optarg) != 0){ + usage(); + return -1; + } + break; + default: + printf("ERROR: Unknown option '%c'\n", opt); + usage(); + return -1; + } + } + + if (ports->num_ports == 0 || num_clients == 0){ + usage(); + return -1; + } + + if (ports->num_ports % 2 != 0){ + printf("ERROR: application requires an even number of ports to use\n"); + return -1; + } + return 0; +} diff --git a/examples/multi_process/client_server_mp/mp_server/args.h b/examples/multi_process/client_server_mp/mp_server/args.h new file mode 100644 index 00000000..23af1bd3 --- /dev/null +++ b/examples/multi_process/client_server_mp/mp_server/args.h @@ -0,0 +1,39 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _ARGS_H_ +#define _ARGS_H_ + +int parse_app_args(uint8_t max_ports, int argc, char *argv[]); + +#endif /* ifndef _ARGS_H_ */ diff --git a/examples/multi_process/client_server_mp/mp_server/init.c b/examples/multi_process/client_server_mp/mp_server/init.c new file mode 100644 index 00000000..ecb61c68 --- /dev/null +++ b/examples/multi_process/client_server_mp/mp_server/init.c @@ -0,0 +1,305 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdint.h> +#include <stdio.h> +#include <string.h> +#include <sys/queue.h> +#include <errno.h> +#include <stdarg.h> +#include <inttypes.h> + +#include <rte_common.h> +#include <rte_memory.h> +#include <rte_memzone.h> +#include <rte_eal.h> +#include <rte_byteorder.h> +#include <rte_atomic.h> +#include <rte_launch.h> +#include <rte_per_lcore.h> +#include <rte_lcore.h> +#include <rte_branch_prediction.h> +#include <rte_debug.h> +#include <rte_ring.h> +#include <rte_log.h> +#include <rte_mempool.h> +#include <rte_memcpy.h> +#include <rte_mbuf.h> +#include <rte_interrupts.h> +#include <rte_pci.h> +#include <rte_ether.h> +#include <rte_ethdev.h> +#include <rte_malloc.h> +#include <rte_fbk_hash.h> +#include <rte_string_fns.h> +#include <rte_cycles.h> + +#include "common.h" +#include "args.h" +#include "init.h" + +#define MBUFS_PER_CLIENT 1536 +#define MBUFS_PER_PORT 1536 +#define MBUF_CACHE_SIZE 512 + +#define RTE_MP_RX_DESC_DEFAULT 512 +#define RTE_MP_TX_DESC_DEFAULT 512 +#define CLIENT_QUEUE_RINGSIZE 128 + +#define NO_FLAGS 0 + +/* The mbuf pool for packet rx */ +struct rte_mempool *pktmbuf_pool; + +/* array of info/queues for clients */ +struct client *clients = NULL; + +/* the port details */ +struct port_info *ports; + +/** + * Initialise the mbuf pool for packet reception for the NIC, and any other + * buffer pools needed by the app - currently none. + */ +static int +init_mbuf_pools(void) +{ + const unsigned num_mbufs = (num_clients * MBUFS_PER_CLIENT) \ + + (ports->num_ports * MBUFS_PER_PORT); + + /* don't pass single-producer/single-consumer flags to mbuf create as it + * seems faster to use a cache instead */ + printf("Creating mbuf pool '%s' [%u mbufs] ...\n", + PKTMBUF_POOL_NAME, num_mbufs); + pktmbuf_pool = rte_pktmbuf_pool_create(PKTMBUF_POOL_NAME, num_mbufs, + MBUF_CACHE_SIZE, 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id()); + + return pktmbuf_pool == NULL; /* 0 on success */ +} + +/** + * Initialise an individual port: + * - configure number of rx and tx rings + * - set up each rx ring, to pull from the main mbuf pool + * - set up each tx ring + * - start the port and report its status to stdout + */ +static int +init_port(uint8_t port_num) +{ + /* for port configuration all features are off by default */ + const struct rte_eth_conf port_conf = { + .rxmode = { + .mq_mode = ETH_MQ_RX_RSS + } + }; + const uint16_t rx_rings = 1, tx_rings = num_clients; + const uint16_t rx_ring_size = RTE_MP_RX_DESC_DEFAULT; + const uint16_t tx_ring_size = RTE_MP_TX_DESC_DEFAULT; + + uint16_t q; + int retval; + + printf("Port %u init ... ", (unsigned)port_num); + fflush(stdout); + + /* Standard DPDK port initialisation - config port, then set up + * rx and tx rings */ + if ((retval = rte_eth_dev_configure(port_num, rx_rings, tx_rings, + &port_conf)) != 0) + return retval; + + for (q = 0; q < rx_rings; q++) { + retval = rte_eth_rx_queue_setup(port_num, q, rx_ring_size, + rte_eth_dev_socket_id(port_num), + NULL, pktmbuf_pool); + if (retval < 0) return retval; + } + + for ( q = 0; q < tx_rings; q ++ ) { + retval = rte_eth_tx_queue_setup(port_num, q, tx_ring_size, + rte_eth_dev_socket_id(port_num), + NULL); + if (retval < 0) return retval; + } + + rte_eth_promiscuous_enable(port_num); + + retval = rte_eth_dev_start(port_num); + if (retval < 0) return retval; + + printf( "done: \n"); + + return 0; +} + +/** + * Set up the DPDK rings which will be used to pass packets, via + * pointers, between the multi-process server and client processes. + * Each client needs one RX queue. + */ +static int +init_shm_rings(void) +{ + unsigned i; + unsigned socket_id; + const char * q_name; + const unsigned ringsize = CLIENT_QUEUE_RINGSIZE; + + clients = rte_malloc("client details", + sizeof(*clients) * num_clients, 0); + if (clients == NULL) + rte_exit(EXIT_FAILURE, "Cannot allocate memory for client program details\n"); + + for (i = 0; i < num_clients; i++) { + /* Create an RX queue for each client */ + socket_id = rte_socket_id(); + q_name = get_rx_queue_name(i); + clients[i].rx_q = rte_ring_create(q_name, + ringsize, socket_id, + RING_F_SP_ENQ | RING_F_SC_DEQ ); /* single prod, single cons */ + if (clients[i].rx_q == NULL) + rte_exit(EXIT_FAILURE, "Cannot create rx ring queue for client %u\n", i); + } + return 0; +} + +/* Check the link status of all ports in up to 9s, and print them finally */ +static void +check_all_ports_link_status(uint8_t port_num, uint32_t port_mask) +{ +#define CHECK_INTERVAL 100 /* 100ms */ +#define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */ + uint8_t portid, count, all_ports_up, print_flag = 0; + struct rte_eth_link link; + + printf("\nChecking link status"); + fflush(stdout); + for (count = 0; count <= MAX_CHECK_TIME; count++) { + all_ports_up = 1; + for (portid = 0; portid < port_num; portid++) { + if ((port_mask & (1 << ports->id[portid])) == 0) + continue; + memset(&link, 0, sizeof(link)); + rte_eth_link_get_nowait(ports->id[portid], &link); + /* print link status if flag set */ + if (print_flag == 1) { + if (link.link_status) + printf("Port %d Link Up - speed %u " + "Mbps - %s\n", ports->id[portid], + (unsigned)link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? + ("full-duplex") : ("half-duplex\n")); + else + printf("Port %d Link Down\n", + (uint8_t)ports->id[portid]); + continue; + } + /* clear all_ports_up flag if any link down */ + if (link.link_status == ETH_LINK_DOWN) { + all_ports_up = 0; + break; + } + } + /* after finally printing all link status, get out */ + if (print_flag == 1) + break; + + if (all_ports_up == 0) { + printf("."); + fflush(stdout); + rte_delay_ms(CHECK_INTERVAL); + } + + /* set the print_flag if all ports up or timeout */ + if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) { + print_flag = 1; + printf("done\n"); + } + } +} + +/** + * Main init function for the multi-process server app, + * calls subfunctions to do each stage of the initialisation. + */ +int +init(int argc, char *argv[]) +{ + int retval; + const struct rte_memzone *mz; + uint8_t i, total_ports; + + /* init EAL, parsing EAL args */ + retval = rte_eal_init(argc, argv); + if (retval < 0) + return -1; + argc -= retval; + argv += retval; + + /* get total number of ports */ + total_ports = rte_eth_dev_count(); + + /* set up array for port data */ + mz = rte_memzone_reserve(MZ_PORT_INFO, sizeof(*ports), + rte_socket_id(), NO_FLAGS); + if (mz == NULL) + rte_exit(EXIT_FAILURE, "Cannot reserve memory zone for port information\n"); + memset(mz->addr, 0, sizeof(*ports)); + ports = mz->addr; + + /* parse additional, application arguments */ + retval = parse_app_args(total_ports, argc, argv); + if (retval != 0) + return -1; + + /* initialise mbuf pools */ + retval = init_mbuf_pools(); + if (retval != 0) + rte_exit(EXIT_FAILURE, "Cannot create needed mbuf pools\n"); + + /* now initialise the ports we will use */ + for (i = 0; i < ports->num_ports; i++) { + retval = init_port(ports->id[i]); + if (retval != 0) + rte_exit(EXIT_FAILURE, "Cannot initialise port %u\n", + (unsigned)i); + } + + check_all_ports_link_status(ports->num_ports, (~0x0)); + + /* initialise the client queues/rings for inter-eu comms */ + init_shm_rings(); + + return 0; +} diff --git a/examples/multi_process/client_server_mp/mp_server/init.h b/examples/multi_process/client_server_mp/mp_server/init.h new file mode 100644 index 00000000..7333614d --- /dev/null +++ b/examples/multi_process/client_server_mp/mp_server/init.h @@ -0,0 +1,72 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _INIT_H_ +#define _INIT_H_ + +/* + * #include <rte_ring.h> + * #include "args.h" + */ + +/* + * Define a client structure with all needed info, including + * stats from the clients. + */ +struct client { + struct rte_ring *rx_q; + unsigned client_id; + /* these stats hold how many packets the client will actually receive, + * and how many packets were dropped because the client's queue was full. + * The port-info stats, in contrast, record how many packets were received + * or transmitted on an actual NIC port. + */ + struct { + volatile uint64_t rx; + volatile uint64_t rx_drop; + } stats; +}; + +extern struct client *clients; + +/* the shared port information: port numbers, rx and tx stats etc. */ +extern struct port_info *ports; + +extern struct rte_mempool *pktmbuf_pool; +extern uint8_t num_clients; +extern unsigned num_sockets; +extern struct port_info *ports; + +int init(int argc, char *argv[]); + +#endif /* ifndef _INIT_H_ */ diff --git a/examples/multi_process/client_server_mp/mp_server/main.c b/examples/multi_process/client_server_mp/mp_server/main.c new file mode 100644 index 00000000..de54c674 --- /dev/null +++ b/examples/multi_process/client_server_mp/mp_server/main.c @@ -0,0 +1,319 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <stdint.h> +#include <stdarg.h> +#include <inttypes.h> +#include <inttypes.h> +#include <sys/queue.h> +#include <errno.h> +#include <netinet/ip.h> + +#include <rte_common.h> +#include <rte_memory.h> +#include <rte_memzone.h> +#include <rte_eal.h> +#include <rte_byteorder.h> +#include <rte_launch.h> +#include <rte_per_lcore.h> +#include <rte_lcore.h> +#include <rte_branch_prediction.h> +#include <rte_atomic.h> +#include <rte_ring.h> +#include <rte_log.h> +#include <rte_debug.h> +#include <rte_mempool.h> +#include <rte_memcpy.h> +#include <rte_mbuf.h> +#include <rte_ether.h> +#include <rte_interrupts.h> +#include <rte_pci.h> +#include <rte_ethdev.h> +#include <rte_byteorder.h> +#include <rte_malloc.h> +#include <rte_fbk_hash.h> +#include <rte_string_fns.h> + +#include "common.h" +#include "args.h" +#include "init.h" + +/* + * When doing reads from the NIC or the client queues, + * use this batch size + */ +#define PACKET_READ_SIZE 32 + +/* + * Local buffers to put packets in, used to send packets in bursts to the + * clients + */ +struct client_rx_buf { + struct rte_mbuf *buffer[PACKET_READ_SIZE]; + uint16_t count; +}; + +/* One buffer per client rx queue - dynamically allocate array */ +static struct client_rx_buf *cl_rx_buf; + +static const char * +get_printable_mac_addr(uint8_t port) +{ + static const char err_address[] = "00:00:00:00:00:00"; + static char addresses[RTE_MAX_ETHPORTS][sizeof(err_address)]; + + if (unlikely(port >= RTE_MAX_ETHPORTS)) + return err_address; + if (unlikely(addresses[port][0]=='\0')){ + struct ether_addr mac; + rte_eth_macaddr_get(port, &mac); + snprintf(addresses[port], sizeof(addresses[port]), + "%02x:%02x:%02x:%02x:%02x:%02x\n", + mac.addr_bytes[0], mac.addr_bytes[1], mac.addr_bytes[2], + mac.addr_bytes[3], mac.addr_bytes[4], mac.addr_bytes[5]); + } + return addresses[port]; +} + +/* + * This function displays the recorded statistics for each port + * and for each client. It uses ANSI terminal codes to clear + * screen when called. It is called from a single non-master + * thread in the server process, when the process is run with more + * than one lcore enabled. + */ +static void +do_stats_display(void) +{ + unsigned i, j; + const char clr[] = { 27, '[', '2', 'J', '\0' }; + const char topLeft[] = { 27, '[', '1', ';', '1', 'H','\0' }; + uint64_t port_tx[RTE_MAX_ETHPORTS], port_tx_drop[RTE_MAX_ETHPORTS]; + uint64_t client_tx[MAX_CLIENTS], client_tx_drop[MAX_CLIENTS]; + + /* to get TX stats, we need to do some summing calculations */ + memset(port_tx, 0, sizeof(port_tx)); + memset(port_tx_drop, 0, sizeof(port_tx_drop)); + memset(client_tx, 0, sizeof(client_tx)); + memset(client_tx_drop, 0, sizeof(client_tx_drop)); + + for (i = 0; i < num_clients; i++){ + const volatile struct tx_stats *tx = &ports->tx_stats[i]; + for (j = 0; j < ports->num_ports; j++){ + /* assign to local variables here, save re-reading volatile vars */ + const uint64_t tx_val = tx->tx[ports->id[j]]; + const uint64_t drop_val = tx->tx_drop[ports->id[j]]; + port_tx[j] += tx_val; + port_tx_drop[j] += drop_val; + client_tx[i] += tx_val; + client_tx_drop[i] += drop_val; + } + } + + /* Clear screen and move to top left */ + printf("%s%s", clr, topLeft); + + printf("PORTS\n"); + printf("-----\n"); + for (i = 0; i < ports->num_ports; i++) + printf("Port %u: '%s'\t", (unsigned)ports->id[i], + get_printable_mac_addr(ports->id[i])); + printf("\n\n"); + for (i = 0; i < ports->num_ports; i++){ + printf("Port %u - rx: %9"PRIu64"\t" + "tx: %9"PRIu64"\n", + (unsigned)ports->id[i], ports->rx_stats.rx[i], + port_tx[i]); + } + + printf("\nCLIENTS\n"); + printf("-------\n"); + for (i = 0; i < num_clients; i++){ + const unsigned long long rx = clients[i].stats.rx; + const unsigned long long rx_drop = clients[i].stats.rx_drop; + printf("Client %2u - rx: %9llu, rx_drop: %9llu\n" + " tx: %9"PRIu64", tx_drop: %9"PRIu64"\n", + i, rx, rx_drop, client_tx[i], client_tx_drop[i]); + } + + printf("\n"); +} + +/* + * The function called from each non-master lcore used by the process. + * The test_and_set function is used to randomly pick a single lcore on which + * the code to display the statistics will run. Otherwise, the code just + * repeatedly sleeps. + */ +static int +sleep_lcore(__attribute__((unused)) void *dummy) +{ + /* Used to pick a display thread - static, so zero-initialised */ + static rte_atomic32_t display_stats; + + /* Only one core should display stats */ + if (rte_atomic32_test_and_set(&display_stats)) { + const unsigned sleeptime = 1; + printf("Core %u displaying statistics\n", rte_lcore_id()); + + /* Longer initial pause so above printf is seen */ + sleep(sleeptime * 3); + + /* Loop forever: sleep always returns 0 or <= param */ + while (sleep(sleeptime) <= sleeptime) + do_stats_display(); + } + return 0; +} + +/* + * Function to set all the client statistic values to zero. + * Called at program startup. + */ +static void +clear_stats(void) +{ + unsigned i; + + for (i = 0; i < num_clients; i++) + clients[i].stats.rx = clients[i].stats.rx_drop = 0; +} + +/* + * send a burst of traffic to a client, assuming there are packets + * available to be sent to this client + */ +static void +flush_rx_queue(uint16_t client) +{ + uint16_t j; + struct client *cl; + + if (cl_rx_buf[client].count == 0) + return; + + cl = &clients[client]; + if (rte_ring_enqueue_bulk(cl->rx_q, (void **)cl_rx_buf[client].buffer, + cl_rx_buf[client].count) != 0){ + for (j = 0; j < cl_rx_buf[client].count; j++) + rte_pktmbuf_free(cl_rx_buf[client].buffer[j]); + cl->stats.rx_drop += cl_rx_buf[client].count; + } + else + cl->stats.rx += cl_rx_buf[client].count; + + cl_rx_buf[client].count = 0; +} + +/* + * marks a packet down to be sent to a particular client process + */ +static inline void +enqueue_rx_packet(uint8_t client, struct rte_mbuf *buf) +{ + cl_rx_buf[client].buffer[cl_rx_buf[client].count++] = buf; +} + +/* + * This function takes a group of packets and routes them + * individually to the client process. Very simply round-robins the packets + * without checking any of the packet contents. + */ +static void +process_packets(uint32_t port_num __rte_unused, + struct rte_mbuf *pkts[], uint16_t rx_count) +{ + uint16_t i; + uint8_t client = 0; + + for (i = 0; i < rx_count; i++) { + enqueue_rx_packet(client, pkts[i]); + + if (++client == num_clients) + client = 0; + } + + for (i = 0; i < num_clients; i++) + flush_rx_queue(i); +} + +/* + * Function called by the master lcore of the DPDK process. + */ +static void +do_packet_forwarding(void) +{ + unsigned port_num = 0; /* indexes the port[] array */ + + for (;;) { + struct rte_mbuf *buf[PACKET_READ_SIZE]; + uint16_t rx_count; + + /* read a port */ + rx_count = rte_eth_rx_burst(ports->id[port_num], 0, \ + buf, PACKET_READ_SIZE); + ports->rx_stats.rx[port_num] += rx_count; + + /* Now process the NIC packets read */ + if (likely(rx_count > 0)) + process_packets(port_num, buf, rx_count); + + /* move to next port */ + if (++port_num == ports->num_ports) + port_num = 0; + } +} + +int +main(int argc, char *argv[]) +{ + /* initialise the system */ + if (init(argc, argv) < 0 ) + return -1; + RTE_LOG(INFO, APP, "Finished Process Init.\n"); + + cl_rx_buf = calloc(num_clients, sizeof(cl_rx_buf[0])); + + /* clear statistics */ + clear_stats(); + + /* put all other cores to sleep bar master */ + rte_eal_mp_remote_launch(sleep_lcore, NULL, SKIP_MASTER); + + do_packet_forwarding(); + return 0; +} diff --git a/examples/multi_process/client_server_mp/shared/common.h b/examples/multi_process/client_server_mp/shared/common.h new file mode 100644 index 00000000..631c4632 --- /dev/null +++ b/examples/multi_process/client_server_mp/shared/common.h @@ -0,0 +1,87 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _COMMON_H_ +#define _COMMON_H_ + +#define MAX_CLIENTS 16 + +/* + * Shared port info, including statistics information for display by server. + * Structure will be put in a memzone. + * - All port id values share one cache line as this data will be read-only + * during operation. + * - All rx statistic values share cache lines, as this data is written only + * by the server process. (rare reads by stats display) + * - The tx statistics have values for all ports per cache line, but the stats + * themselves are written by the clients, so we have a distinct set, on different + * cache lines for each client to use. + */ +struct rx_stats{ + uint64_t rx[RTE_MAX_ETHPORTS]; +} __rte_cache_aligned; + +struct tx_stats{ + uint64_t tx[RTE_MAX_ETHPORTS]; + uint64_t tx_drop[RTE_MAX_ETHPORTS]; +} __rte_cache_aligned; + +struct port_info { + uint8_t num_ports; + uint8_t id[RTE_MAX_ETHPORTS]; + volatile struct rx_stats rx_stats; + volatile struct tx_stats tx_stats[MAX_CLIENTS]; +}; + +/* define common names for structures shared between server and client */ +#define MP_CLIENT_RXQ_NAME "MProc_Client_%u_RX" +#define PKTMBUF_POOL_NAME "MProc_pktmbuf_pool" +#define MZ_PORT_INFO "MProc_port_info" + +/* + * Given the rx queue name template above, get the queue name + */ +static inline const char * +get_rx_queue_name(unsigned id) +{ + /* buffer for return value. Size calculated by %u being replaced + * by maximum 3 digits (plus an extra byte for safety) */ + static char buffer[sizeof(MP_CLIENT_RXQ_NAME) + 2]; + + snprintf(buffer, sizeof(buffer) - 1, MP_CLIENT_RXQ_NAME, id); + return buffer; +} + +#define RTE_LOGTYPE_APP RTE_LOGTYPE_USER1 + +#endif diff --git a/examples/multi_process/l2fwd_fork/Makefile b/examples/multi_process/l2fwd_fork/Makefile new file mode 100644 index 00000000..ff257a35 --- /dev/null +++ b/examples/multi_process/l2fwd_fork/Makefile @@ -0,0 +1,50 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overriden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +# binary name +APP = l2fwd_fork + +# all source are stored in SRCS-y +SRCS-y := main.c flib.c + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) + +include $(RTE_SDK)/mk/rte.extapp.mk diff --git a/examples/multi_process/l2fwd_fork/flib.c b/examples/multi_process/l2fwd_fork/flib.c new file mode 100644 index 00000000..343f09f1 --- /dev/null +++ b/examples/multi_process/l2fwd_fork/flib.c @@ -0,0 +1,313 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include <unistd.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <inttypes.h> +#include <sys/types.h> +#include <sys/queue.h> +#include <sys/wait.h> +#include <sys/prctl.h> +#include <netinet/in.h> +#include <setjmp.h> +#include <stdarg.h> +#include <ctype.h> +#include <errno.h> +#include <getopt.h> +#include <dirent.h> +#include <signal.h> + +#include <rte_common.h> +#include <rte_log.h> +#include <rte_malloc.h> +#include <rte_memory.h> +#include <rte_memcpy.h> +#include <rte_memzone.h> +#include <rte_eal.h> +#include <rte_per_lcore.h> +#include <rte_launch.h> +#include <rte_atomic.h> +#include <rte_cycles.h> +#include <rte_prefetch.h> +#include <rte_lcore.h> +#include <rte_per_lcore.h> +#include <rte_branch_prediction.h> +#include <rte_interrupts.h> +#include <rte_pci.h> +#include <rte_random.h> +#include <rte_debug.h> +#include <rte_ether.h> +#include <rte_ethdev.h> +#include <rte_ring.h> +#include <rte_mempool.h> +#include <rte_mbuf.h> +#include <rte_string_fns.h> + +#include "flib.h" + +#define SIG_PARENT_EXIT SIGUSR1 + +struct lcore_stat { + pid_t pid; /**< pthread identifier */ + lcore_function_t *f; /**< function to call */ + void *arg; /**< argument of function */ + slave_exit_notify *cb_fn; +} __rte_cache_aligned; + + +static struct lcore_stat *core_cfg; +static uint16_t *lcore_cfg = NULL; + +/* signal handler to be notified after parent leaves */ +static void +sighand_parent_exit(int sig) +{ + printf("lcore = %u : Find parent leaves, sig=%d\n", rte_lcore_id(), + sig); + printf("Child leaving\n"); + exit(0); + + return; +} + +/** + * Real function entrance ran in slave process + **/ +static int +slave_proc_func(void) +{ + struct rte_config *config; + unsigned slave_id = rte_lcore_id(); + struct lcore_stat *cfg = &core_cfg[slave_id]; + + if (prctl(PR_SET_PDEATHSIG, SIG_PARENT_EXIT, 0, 0, 0, 0) != 0) + printf("Warning: Slave can't register for being notified in" + "case master process exited\n"); + else { + struct sigaction act; + memset(&act, 0 , sizeof(act)); + act.sa_handler = sighand_parent_exit; + if (sigaction(SIG_PARENT_EXIT, &act, NULL) != 0) + printf("Fail to register signal handler:%d\n", SIG_PARENT_EXIT); + } + + /* Set slave process to SECONDARY to avoid operation like dev_start/stop etc */ + config = rte_eal_get_configuration(); + if (NULL == config) + printf("Warning:Can't get rte_config\n"); + else + config->process_type = RTE_PROC_SECONDARY; + + printf("Core %u is ready (pid=%d)\n", slave_id, (int)cfg->pid); + + exit(cfg->f(cfg->arg)); +} + +/** + * function entrance ran in master thread, which will spawn slave process and wait until + * specific slave exited. + **/ +static int +lcore_func(void *arg __attribute__((unused))) +{ + unsigned slave_id = rte_lcore_id(); + struct lcore_stat *cfg = &core_cfg[slave_id]; + int pid, stat; + + if (rte_get_master_lcore() == slave_id) + return cfg->f(cfg->arg); + + /* fork a slave process */ + pid = fork(); + + if (pid == -1) { + printf("Failed to fork\n"); + return -1; + } else if (pid == 0) /* child */ + return slave_proc_func(); + else { /* parent */ + cfg->pid = pid; + + waitpid(pid, &stat, 0); + + cfg->pid = 0; + cfg->f = NULL; + cfg->arg = NULL; + /* Notify slave's exit if applicable */ + if(cfg->cb_fn) + cfg->cb_fn(slave_id, stat); + return stat; + } +} + +static int +lcore_id_init(void) +{ + int i; + /* Setup lcore ID allocation map */ + lcore_cfg = rte_zmalloc("LCORE_ID_MAP", + sizeof(uint16_t) * RTE_MAX_LCORE, + RTE_CACHE_LINE_SIZE); + + if(lcore_cfg == NULL) + rte_panic("Failed to malloc\n"); + + for (i = 0; i < RTE_MAX_LCORE; i++) { + if (rte_lcore_is_enabled(i)) + lcore_cfg[i] = 1; + } + return 0; +} + +int +flib_assign_lcore_id(void) +{ + unsigned i; + int ret; + + /** + * thread assigned a lcore id previously, or a slave thread. But still have + * a bug here: If the core mask includes core 0, and that core call this + * function, it still can get a new lcore id. + **/ + if (rte_lcore_id() != 0) + return -1; + + do { + /* Find a lcore id not used yet, avoid to use lcore ID 0 */ + for (i = 1; i < RTE_MAX_LCORE; i++) { + if (lcore_cfg[i] == 0) + break; + } + if (i == RTE_MAX_LCORE) + return -1; + + /* Assign new lcore id to this thread */ + + ret = rte_atomic16_cmpset(&lcore_cfg[i], 0, 1); + } while (unlikely(ret == 0)); + + RTE_PER_LCORE(_lcore_id) = i; + return i; +} + +void +flib_free_lcore_id(unsigned lcore_id) +{ + /* id is not valid or belongs to pinned core id */ + if (lcore_id >= RTE_MAX_LCORE || lcore_id == 0 || + rte_lcore_is_enabled(lcore_id)) + return; + + lcore_cfg[lcore_id] = 0; +} + +int +flib_register_slave_exit_notify(unsigned slave_id, + slave_exit_notify *cb) +{ + if (cb == NULL) + return -EFAULT; + + if (!rte_lcore_is_enabled(slave_id)) + return -ENOENT; + + core_cfg[slave_id].cb_fn = cb; + + return 0; +} + +enum slave_stat +flib_query_slave_status(unsigned slave_id) +{ + if (!rte_lcore_is_enabled(slave_id)) + return ST_FREEZE; + /* pid only be set when slave process spawned */ + if (core_cfg[slave_id].pid != 0) + return ST_RUN; + else + return ST_IDLE; +} + +int +flib_remote_launch(lcore_function_t *f, + void *arg, unsigned slave_id) +{ + if (f == NULL) + return -1; + + if (!rte_lcore_is_enabled(slave_id)) + return -1; + + /* Wait until specific lcore state change to WAIT */ + rte_eal_wait_lcore(slave_id); + + core_cfg[slave_id].f = f; + core_cfg[slave_id].arg = arg; + + return rte_eal_remote_launch(lcore_func, NULL, slave_id); +} + +int +flib_mp_remote_launch(lcore_function_t *f, void *arg, + enum rte_rmt_call_master_t call_master) +{ + int i; + + RTE_LCORE_FOREACH_SLAVE(i) { + core_cfg[i].arg = arg; + core_cfg[i].f = f; + } + + return rte_eal_mp_remote_launch(lcore_func, NULL, call_master); +} + +int +flib_init(void) +{ + if ((core_cfg = rte_zmalloc("core_cfg", + sizeof(struct lcore_stat) * RTE_MAX_LCORE, + RTE_CACHE_LINE_SIZE)) == NULL ) { + printf("rte_zmalloc failed\n"); + return -1; + } + + if (lcore_id_init() != 0) { + printf("lcore_id_init failed\n"); + return -1; + } + + return 0; +} diff --git a/examples/multi_process/l2fwd_fork/flib.h b/examples/multi_process/l2fwd_fork/flib.h new file mode 100644 index 00000000..711e3b6d --- /dev/null +++ b/examples/multi_process/l2fwd_fork/flib.h @@ -0,0 +1,149 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __FLIB_H +#define __FLIB_H + +/* callback function pointer when specific slave leaves */ +typedef void (slave_exit_notify)(unsigned slaveid, int stat); + +enum slave_stat{ + ST_FREEZE = 1, + ST_IDLE, + ST_RUN, + ST_ZOMBIE, /* Not implemented yet */ +}; + +/** + * Initialize the fork lib. + * + * @return + * - 0 : fork lib initialized successfully + * - -1 : fork lib initialized failed + */ +int flib_init(void); + +/** + * Check that every SLAVE lcores are in WAIT state, then call + * flib_remote_launch() for all of them. If call_master is true + * (set to CALL_MASTER), also call the function on the master lcore. + * + * @param f: + * function pointer need to run + * @param arg: + * argument for f to carry + * @param call_master + * - SKIP_MASTER : only launch function on slave lcores + * - CALL_MASTER : launch function on master and slave lcores + * @return + * - 0 : function execute successfully + * - -1 : function execute failed + */ +int flib_mp_remote_launch(lcore_function_t *f, + void *arg, enum rte_rmt_call_master_t call_master); + +/** + * Send a message to a slave lcore identified by slave_id to call a + * function f with argument arg. + * + * @param f: + * function pointer need to run + * @param arg: + * argument for f to carry + * @param slave_id + * slave lcore id to run on + * @return + * - 0 : function execute successfully + * - -1 : function execute failed + */ +int flib_remote_launch(lcore_function_t *f, + void *arg, unsigned slave_id); + +/** + * Query the running stat for specific slave, wont' work in with master id + * + * @param slave_id: + * lcore id which should not be master id + * @return + * - ST_FREEZE : lcore is not in enabled core mask + * - ST_IDLE : lcore is idle + * - ST_RUN : lcore is running something + */ +enum slave_stat +flib_query_slave_status(unsigned slave_id); + +/** + * Register a callback function to be notified in case specific slave exit. + * + * @param slave_id: + * lcore id which should not be master id + * @param cb: + * callback pointer to register + * @return + * - 0 : function execute successfully + * - -EFAULT : argument error + * - -ENOENT : slave_id not correct + */ +int flib_register_slave_exit_notify(unsigned slave_id, + slave_exit_notify *cb); + +/** + * Assign a lcore ID to non-slave thread. Non-slave thread refers to thread that + * not created by function rte_eal_remote_launch or rte_eal_mp_remote_launch. + * These threads can either bind lcore or float among differnt lcores. + * This lcore ID will be unique in multi-thread or multi-process DPDK running + * environment, then it can benefit from using the cache mechanism provided in + * mempool library. + * After calling successfully, use rte_lcore_id() to get the assigned lcore ID, but + * other lcore funtions can't guarantee to work correctly. + * + * @return + * - -1 : can't assign a lcore id with 3 possibilities. + * - it's not non-slave thread. + * - it had assign a lcore id previously + * - the lcore id is running out. + * - > 0 : the assigned lcore id. + */ +int flib_assign_lcore_id(void); + +/** + * Free the lcore_id that assigned in flib_assign_lcore_id(). + * call it in case non-slave thread is leaving or left. + * + * @param lcore_id + * The identifier of the lcore, which MUST be between 1 and + * RTE_MAX_LCORE-1. + */ +void flib_free_lcore_id(unsigned lcore_id); + +#endif /* __FLIB_H */ diff --git a/examples/multi_process/l2fwd_fork/main.c b/examples/multi_process/l2fwd_fork/main.c new file mode 100644 index 00000000..2dc8b829 --- /dev/null +++ b/examples/multi_process/l2fwd_fork/main.c @@ -0,0 +1,1288 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#define _GNU_SOURCE +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <string.h> +#include <stdint.h> +#include <sched.h> +#include <inttypes.h> +#include <sys/types.h> +#include <sys/queue.h> +#include <netinet/in.h> +#include <setjmp.h> +#include <stdarg.h> +#include <ctype.h> +#include <errno.h> +#include <getopt.h> + +#include <rte_common.h> +#include <rte_log.h> +#include <rte_memory.h> +#include <rte_memcpy.h> +#include <rte_memzone.h> +#include <rte_eal.h> +#include <rte_per_lcore.h> +#include <rte_launch.h> +#include <rte_atomic.h> +#include <rte_spinlock.h> +#include <rte_cycles.h> +#include <rte_prefetch.h> +#include <rte_lcore.h> +#include <rte_per_lcore.h> +#include <rte_branch_prediction.h> +#include <rte_interrupts.h> +#include <rte_pci.h> +#include <rte_random.h> +#include <rte_debug.h> +#include <rte_ether.h> +#include <rte_ethdev.h> +#include <rte_ring.h> +#include <rte_mempool.h> +#include <rte_mbuf.h> +#include <rte_malloc.h> + +#include "flib.h" + +#define RTE_LOGTYPE_L2FWD RTE_LOGTYPE_USER1 +#define MBUF_NAME "mbuf_pool_%d" +#define MBUF_SIZE \ +(RTE_MBUF_DEFAULT_DATAROOM + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM) +#define NB_MBUF 8192 +#define RING_MASTER_NAME "l2fwd_ring_m2s_" +#define RING_SLAVE_NAME "l2fwd_ring_s2m_" +#define MAX_NAME_LEN 32 +/* RECREATE flag indicate needs initialize resource and launch slave_core again */ +#define SLAVE_RECREATE_FLAG 0x1 +/* RESTART flag indicate needs restart port and send START command again */ +#define SLAVE_RESTART_FLAG 0x2 +#define INVALID_MAPPING_ID ((unsigned)LCORE_ID_ANY) +/* Maximum message buffer per slave */ +#define NB_CORE_MSGBUF 32 +enum l2fwd_cmd{ + CMD_START, + CMD_STOP, +}; + +#define MAX_PKT_BURST 32 +#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */ + +/* + * Configurable number of RX/TX ring descriptors + */ +#define RTE_TEST_RX_DESC_DEFAULT 128 +#define RTE_TEST_TX_DESC_DEFAULT 512 +static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; +static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; + +/* ethernet addresses of ports */ +static struct ether_addr l2fwd_ports_eth_addr[RTE_MAX_ETHPORTS]; + +/* mask of enabled ports */ +static uint32_t l2fwd_enabled_port_mask = 0; + +/* list of enabled ports */ +static uint32_t l2fwd_dst_ports[RTE_MAX_ETHPORTS]; + +static unsigned int l2fwd_rx_queue_per_lcore = 1; + +struct mbuf_table { + unsigned len; + struct rte_mbuf *m_table[MAX_PKT_BURST]; +}; + +#define MAX_RX_QUEUE_PER_LCORE 16 +#define MAX_TX_QUEUE_PER_PORT 16 +struct lcore_queue_conf { + unsigned n_rx_port; + unsigned rx_port_list[MAX_RX_QUEUE_PER_LCORE]; +} __rte_cache_aligned; +struct lcore_queue_conf lcore_queue_conf[RTE_MAX_LCORE]; + +struct rte_eth_dev_tx_buffer *tx_buffer[RTE_MAX_ETHPORTS]; + +struct lcore_resource_struct { + int enabled; /* Only set in case this lcore involved into packet forwarding */ + int flags; /* Set only slave need to restart or recreate */ + unsigned lcore_id; /* lcore ID */ + unsigned pair_id; /* dependency lcore ID on port */ + char ring_name[2][MAX_NAME_LEN]; + /* ring[0] for master send cmd, slave read */ + /* ring[1] for slave send ack, master read */ + struct rte_ring *ring[2]; + int port_num; /* Total port numbers */ + uint8_t port[RTE_MAX_ETHPORTS]; /* Port id for that lcore to receive packets */ +}__attribute__((packed)) __rte_cache_aligned; + +static struct lcore_resource_struct lcore_resource[RTE_MAX_LCORE]; +static struct rte_mempool *message_pool; +static rte_spinlock_t res_lock = RTE_SPINLOCK_INITIALIZER; +/* use floating processes */ +static int float_proc = 0; +/* Save original cpu affinity */ +struct cpu_aff_arg{ + cpu_set_t set; + size_t size; +}cpu_aff; + +static const struct rte_eth_conf port_conf = { + .rxmode = { + .split_hdr_size = 0, + .header_split = 0, /**< Header Split disabled */ + .hw_ip_checksum = 0, /**< IP checksum offload disabled */ + .hw_vlan_filter = 0, /**< VLAN filtering disabled */ + .jumbo_frame = 0, /**< Jumbo Frame Support disabled */ + .hw_strip_crc = 0, /**< CRC stripped by hardware */ + }, + .txmode = { + .mq_mode = ETH_MQ_TX_NONE, + }, +}; + +static struct rte_mempool * l2fwd_pktmbuf_pool[RTE_MAX_ETHPORTS]; + +/* Per-port statistics struct */ +struct l2fwd_port_statistics { + uint64_t tx; + uint64_t rx; + uint64_t dropped; +} __rte_cache_aligned; +struct l2fwd_port_statistics *port_statistics; +/** + * pointer to lcore ID mapping array, used to return lcore id in case slave + * process exited unexpectedly, use only floating process option applied + **/ +unsigned *mapping_id; + +/* A tsc-based timer responsible for triggering statistics printout */ +#define TIMER_MILLISECOND 2000000ULL /* around 1ms at 2 Ghz */ +#define MAX_TIMER_PERIOD 86400 /* 1 day max */ +static int64_t timer_period = 10 * TIMER_MILLISECOND * 1000; /* default period is 10 seconds */ + +static int l2fwd_launch_one_lcore(void *dummy); + +/* Print out statistics on packets dropped */ +static void +print_stats(void) +{ + uint64_t total_packets_dropped, total_packets_tx, total_packets_rx; + unsigned portid; + + total_packets_dropped = 0; + total_packets_tx = 0; + total_packets_rx = 0; + + const char clr[] = { 27, '[', '2', 'J', '\0' }; + const char topLeft[] = { 27, '[', '1', ';', '1', 'H','\0' }; + + /* Clear screen and move to top left */ + printf("%s%s", clr, topLeft); + + printf("\nPort statistics ===================================="); + + for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) { + /* skip disabled ports */ + if ((l2fwd_enabled_port_mask & (1 << portid)) == 0) + continue; + printf("\nStatistics for port %u ------------------------------" + "\nPackets sent: %24"PRIu64 + "\nPackets received: %20"PRIu64 + "\nPackets dropped: %21"PRIu64, + portid, + port_statistics[portid].tx, + port_statistics[portid].rx, + port_statistics[portid].dropped); + + total_packets_dropped += port_statistics[portid].dropped; + total_packets_tx += port_statistics[portid].tx; + total_packets_rx += port_statistics[portid].rx; + } + printf("\nAggregate statistics ===============================" + "\nTotal packets sent: %18"PRIu64 + "\nTotal packets received: %14"PRIu64 + "\nTotal packets dropped: %15"PRIu64, + total_packets_tx, + total_packets_rx, + total_packets_dropped); + printf("\n====================================================\n"); +} + +static int +clear_cpu_affinity(void) +{ + int s; + + s = sched_setaffinity(0, cpu_aff.size, &cpu_aff.set); + if (s != 0) { + printf("sched_setaffinity failed:%s\n", strerror(errno)); + return -1; + } + + return 0; +} + +static int +get_cpu_affinity(void) +{ + int s; + + cpu_aff.size = sizeof(cpu_set_t); + CPU_ZERO(&cpu_aff.set); + + s = sched_getaffinity(0, cpu_aff.size, &cpu_aff.set); + if (s != 0) { + printf("sched_getaffinity failed:%s\n", strerror(errno)); + return -1; + } + + return 0; +} + +/** + * This fnciton demonstrates the approach to create ring in first instance + * or re-attach an existed ring in later instance. + **/ +static struct rte_ring * +create_ring(const char *name, unsigned count, + int socket_id,unsigned flags) +{ + struct rte_ring *ring; + + if (name == NULL) + return NULL; + + /* If already create, just attached it */ + if (likely((ring = rte_ring_lookup(name)) != NULL)) + return ring; + + /* First call it, create one */ + return rte_ring_create(name, count, socket_id, flags); +} + +/* Malloc with rte_malloc on structures that shared by master and slave */ +static int +l2fwd_malloc_shared_struct(void) +{ + port_statistics = rte_zmalloc("port_stat", + sizeof(struct l2fwd_port_statistics) * RTE_MAX_ETHPORTS, + 0); + if (port_statistics == NULL) + return -1; + + /* allocate mapping_id array */ + if (float_proc) { + int i; + mapping_id = rte_malloc("mapping_id", sizeof(unsigned) * RTE_MAX_LCORE, + 0); + + if (mapping_id == NULL) + return -1; + + for (i = 0 ;i < RTE_MAX_LCORE; i++) + mapping_id[i] = INVALID_MAPPING_ID; + } + return 0; +} + +/* Create ring which used for communicate among master and slave */ +static int +create_ms_ring(unsigned slaveid) +{ + unsigned flag = RING_F_SP_ENQ | RING_F_SC_DEQ; + struct lcore_resource_struct *res = &lcore_resource[slaveid]; + unsigned socketid = rte_socket_id(); + + /* Always assume create ring on master socket_id */ + /* Default only create a ring size 32 */ + snprintf(res->ring_name[0], MAX_NAME_LEN, "%s%u", + RING_MASTER_NAME, slaveid); + if ((res->ring[0] = create_ring(res->ring_name[0], NB_CORE_MSGBUF, + socketid, flag)) == NULL) { + printf("Create m2s ring %s failed\n", res->ring_name[0]); + return -1; + } + + snprintf(res->ring_name[1], MAX_NAME_LEN, "%s%u", + RING_SLAVE_NAME, slaveid); + if ((res->ring[1] = create_ring(res->ring_name[1], NB_CORE_MSGBUF, + socketid, flag)) == NULL) { + printf("Create s2m ring %s failed\n", res->ring_name[1]); + return -1; + } + + return 0; +} + +/* send command to pair in paired master and slave ring */ +static inline int +sendcmd(unsigned slaveid, enum l2fwd_cmd cmd, int is_master) +{ + struct lcore_resource_struct *res = &lcore_resource[slaveid]; + void *msg; + int fd = !is_master; + + /* Only check master, it must be enabled and running if it is slave */ + if (is_master && !res->enabled) + return -1; + + if (res->ring[fd] == NULL) + return -1; + + if (rte_mempool_get(message_pool, &msg) < 0) { + printf("Error to get message buffer\n"); + return -1; + } + + *(enum l2fwd_cmd *)msg = cmd; + + if (rte_ring_enqueue(res->ring[fd], msg) != 0) { + printf("Enqueue error\n"); + rte_mempool_put(message_pool, msg); + return -1; + } + + return 0; +} + +/* Get command from pair in paired master and slave ring */ +static inline int +getcmd(unsigned slaveid, enum l2fwd_cmd *cmd, int is_master) +{ + struct lcore_resource_struct *res = &lcore_resource[slaveid]; + void *msg; + int fd = !!is_master; + int ret; + /* Only check master, it must be enabled and running if it is slave */ + if (is_master && (!res->enabled)) + return -1; + + if (res->ring[fd] == NULL) + return -1; + + ret = rte_ring_dequeue(res->ring[fd], &msg); + + if (ret == 0) { + *cmd = *(enum l2fwd_cmd *)msg; + rte_mempool_put(message_pool, msg); + } + return ret; +} + +/* Master send command to slave and wait until ack received or error met */ +static int +master_sendcmd_with_ack(unsigned slaveid, enum l2fwd_cmd cmd) +{ + enum l2fwd_cmd ack_cmd; + int ret = -1; + + if (sendcmd(slaveid, cmd, 1) != 0) + rte_exit(EXIT_FAILURE, "Failed to send message\n"); + + /* Get ack */ + while (1) { + ret = getcmd(slaveid, &ack_cmd, 1); + if (ret == 0 && cmd == ack_cmd) + break; + + /* If slave not running yet, return an error */ + if (flib_query_slave_status(slaveid) != ST_RUN) { + ret = -ENOENT; + break; + } + } + + return ret; +} + +/* restart all port that assigned to that slave lcore */ +static int +reset_slave_all_ports(unsigned slaveid) +{ + struct lcore_resource_struct *slave = &lcore_resource[slaveid]; + int i, ret = 0; + + /* stop/start port */ + for (i = 0; i < slave->port_num; i++) { + char buf_name[RTE_MEMPOOL_NAMESIZE]; + struct rte_mempool *pool; + printf("Stop port :%d\n", slave->port[i]); + rte_eth_dev_stop(slave->port[i]); + snprintf(buf_name, RTE_MEMPOOL_NAMESIZE, MBUF_NAME, slave->port[i]); + pool = rte_mempool_lookup(buf_name); + if (pool) + printf("Port %d mempool free object is %u(%u)\n", slave->port[i], + rte_mempool_count(pool), (unsigned)NB_MBUF); + else + printf("Can't find mempool %s\n", buf_name); + + printf("Start port :%d\n", slave->port[i]); + ret = rte_eth_dev_start(slave->port[i]); + if (ret != 0) + break; + } + return ret; +} + +static int +reset_shared_structures(unsigned slaveid) +{ + int ret; + /* Only port are shared resource here */ + ret = reset_slave_all_ports(slaveid); + + return ret; +} + +/** + * Call this function to re-create resource that needed for slave process that + * exited in last instance + **/ +static int +init_slave_res(unsigned slaveid) +{ + struct lcore_resource_struct *slave = &lcore_resource[slaveid]; + enum l2fwd_cmd cmd; + + if (!slave->enabled) { + printf("Something wrong with lcore=%u enabled=%d\n",slaveid, + slave->enabled); + return -1; + } + + /* Initialize ring */ + if (create_ms_ring(slaveid) != 0) + rte_exit(EXIT_FAILURE, "failed to create ring for slave %u\n", + slaveid); + + /* drain un-read buffer if have */ + while (getcmd(slaveid, &cmd, 1) == 0); + while (getcmd(slaveid, &cmd, 0) == 0); + + return 0; +} + +static int +recreate_one_slave(unsigned slaveid) +{ + int ret = 0; + /* Re-initialize resource for stalled slave */ + if ((ret = init_slave_res(slaveid)) != 0) { + printf("Init slave=%u failed\n", slaveid); + return ret; + } + + if ((ret = flib_remote_launch(l2fwd_launch_one_lcore, NULL, slaveid)) + != 0) + printf("Launch slave %u failed\n", slaveid); + + return ret; +} + +/** + * remapping resource belong to slave_id to new lcore that gets from flib_assign_lcore_id(), + * used only floating process option applied. + * + * @param slaveid + * original lcore_id that apply for remapping + */ +static void +remapping_slave_resource(unsigned slaveid, unsigned map_id) +{ + + /* remapping lcore_resource */ + memcpy(&lcore_resource[map_id], &lcore_resource[slaveid], + sizeof(struct lcore_resource_struct)); + + /* remapping lcore_queue_conf */ + memcpy(&lcore_queue_conf[map_id], &lcore_queue_conf[slaveid], + sizeof(struct lcore_queue_conf)); +} + +static int +reset_pair(unsigned slaveid, unsigned pairid) +{ + int ret; + if ((ret = reset_shared_structures(slaveid)) != 0) + goto back; + + if((ret = reset_shared_structures(pairid)) != 0) + goto back; + + if (float_proc) { + unsigned map_id = mapping_id[slaveid]; + + if (map_id != INVALID_MAPPING_ID) { + printf("%u return mapping id %u\n", slaveid, map_id); + flib_free_lcore_id(map_id); + mapping_id[slaveid] = INVALID_MAPPING_ID; + } + + map_id = mapping_id[pairid]; + if (map_id != INVALID_MAPPING_ID) { + printf("%u return mapping id %u\n", pairid, map_id); + flib_free_lcore_id(map_id); + mapping_id[pairid] = INVALID_MAPPING_ID; + } + } + + if((ret = recreate_one_slave(slaveid)) != 0) + goto back; + + ret = recreate_one_slave(pairid); + +back: + return ret; +} + +static void +slave_exit_cb(unsigned slaveid, __attribute__((unused))int stat) +{ + struct lcore_resource_struct *slave = &lcore_resource[slaveid]; + + printf("Get slave %u leave info\n", slaveid); + if (!slave->enabled) { + printf("Lcore=%u not registered for it's exit\n", slaveid); + return; + } + rte_spinlock_lock(&res_lock); + + /* Change the state and wait master to start them */ + slave->flags = SLAVE_RECREATE_FLAG; + + rte_spinlock_unlock(&res_lock); +} + +static void +l2fwd_simple_forward(struct rte_mbuf *m, unsigned portid) +{ + struct ether_hdr *eth; + void *tmp; + unsigned dst_port; + int sent; + struct rte_eth_dev_tx_buffer *buffer; + + dst_port = l2fwd_dst_ports[portid]; + eth = rte_pktmbuf_mtod(m, struct ether_hdr *); + + /* 02:00:00:00:00:xx */ + tmp = ð->d_addr.addr_bytes[0]; + *((uint64_t *)tmp) = 0x000000000002 + ((uint64_t)dst_port << 40); + + /* src addr */ + ether_addr_copy(&l2fwd_ports_eth_addr[dst_port], ð->s_addr); + + buffer = tx_buffer[dst_port]; + sent = rte_eth_tx_buffer(dst_port, 0, buffer, m); + if (sent) + port_statistics[dst_port].tx += sent; +} + +/* main processing loop */ +static void +l2fwd_main_loop(void) +{ + struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; + struct rte_mbuf *m; + int sent; + unsigned lcore_id; + uint64_t prev_tsc, diff_tsc, cur_tsc; + unsigned i, j, portid, nb_rx; + struct lcore_queue_conf *qconf; + const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * + BURST_TX_DRAIN_US; + struct rte_eth_dev_tx_buffer *buffer; + + prev_tsc = 0; + + lcore_id = rte_lcore_id(); + + qconf = &lcore_queue_conf[lcore_id]; + + if (qconf->n_rx_port == 0) { + RTE_LOG(INFO, L2FWD, "lcore %u has nothing to do\n", lcore_id); + return; + } + + RTE_LOG(INFO, L2FWD, "entering main loop on lcore %u\n", lcore_id); + + for (i = 0; i < qconf->n_rx_port; i++) { + portid = qconf->rx_port_list[i]; + RTE_LOG(INFO, L2FWD, " -- lcoreid=%u portid=%u\n", lcore_id, + portid); + } + + while (1) { + enum l2fwd_cmd cmd; + cur_tsc = rte_rdtsc(); + + if (unlikely(getcmd(lcore_id, &cmd, 0) == 0)) { + sendcmd(lcore_id, cmd, 0); + + /* If get stop command, stop forwarding and exit */ + if (cmd == CMD_STOP) { + return; + } + } + + /* + * TX burst queue drain + */ + diff_tsc = cur_tsc - prev_tsc; + if (unlikely(diff_tsc > drain_tsc)) { + + for (i = 0; i < qconf->n_rx_port; i++) { + + portid = l2fwd_dst_ports[qconf->rx_port_list[i]]; + buffer = tx_buffer[portid]; + + sent = rte_eth_tx_buffer_flush(portid, 0, buffer); + if (sent) + port_statistics[portid].tx += sent; + + } + } + + /* + * Read packet from RX queues + */ + for (i = 0; i < qconf->n_rx_port; i++) { + + portid = qconf->rx_port_list[i]; + nb_rx = rte_eth_rx_burst((uint8_t) portid, 0, + pkts_burst, MAX_PKT_BURST); + + port_statistics[portid].rx += nb_rx; + + for (j = 0; j < nb_rx; j++) { + m = pkts_burst[j]; + rte_prefetch0(rte_pktmbuf_mtod(m, void *)); + l2fwd_simple_forward(m, portid); + } + } + } +} + +static int +l2fwd_launch_one_lcore(__attribute__((unused)) void *dummy) +{ + unsigned lcore_id = rte_lcore_id(); + + if (float_proc) { + unsigned flcore_id; + + /* Change it to floating process, also change it's lcore_id */ + clear_cpu_affinity(); + RTE_PER_LCORE(_lcore_id) = 0; + /* Get a lcore_id */ + if (flib_assign_lcore_id() < 0 ) { + printf("flib_assign_lcore_id failed\n"); + return -1; + } + flcore_id = rte_lcore_id(); + /* Set mapping id, so master can return it after slave exited */ + mapping_id[lcore_id] = flcore_id; + printf("Org lcore_id = %u, cur lcore_id = %u\n", + lcore_id, flcore_id); + remapping_slave_resource(lcore_id, flcore_id); + } + + l2fwd_main_loop(); + + /* return lcore_id before return */ + if (float_proc) { + flib_free_lcore_id(rte_lcore_id()); + mapping_id[lcore_id] = INVALID_MAPPING_ID; + } + return 0; +} + +/* display usage */ +static void +l2fwd_usage(const char *prgname) +{ + printf("%s [EAL options] -- -p PORTMASK -s COREMASK [-q NQ] -f\n" + " -p PORTMASK: hexadecimal bitmask of ports to configure\n" + " -q NQ: number of queue (=ports) per lcore (default is 1)\n" + " -f use floating process which won't bind to any core to run\n" + " -T PERIOD: statistics will be refreshed each PERIOD seconds (0 to disable, 10 default, 86400 maximum)\n", + prgname); +} + +static int +l2fwd_parse_portmask(const char *portmask) +{ + char *end = NULL; + unsigned long pm; + + /* parse hexadecimal string */ + pm = strtoul(portmask, &end, 16); + if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0')) + return -1; + + if (pm == 0) + return -1; + + return pm; +} + +static unsigned int +l2fwd_parse_nqueue(const char *q_arg) +{ + char *end = NULL; + unsigned long n; + + /* parse hexadecimal string */ + n = strtoul(q_arg, &end, 10); + if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0')) + return 0; + if (n == 0) + return 0; + if (n >= MAX_RX_QUEUE_PER_LCORE) + return 0; + + return n; +} + +static int +l2fwd_parse_timer_period(const char *q_arg) +{ + char *end = NULL; + int n; + + /* parse number string */ + n = strtol(q_arg, &end, 10); + if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0')) + return -1; + if (n >= MAX_TIMER_PERIOD) + return -1; + + return n; +} + +/* Parse the argument given in the command line of the application */ +static int +l2fwd_parse_args(int argc, char **argv) +{ + int opt, ret; + char **argvopt; + int option_index; + char *prgname = argv[0]; + static struct option lgopts[] = { + {NULL, 0, 0, 0} + }; + int has_pmask = 0; + + argvopt = argv; + + while ((opt = getopt_long(argc, argvopt, "p:q:T:f", + lgopts, &option_index)) != EOF) { + + switch (opt) { + /* portmask */ + case 'p': + l2fwd_enabled_port_mask = l2fwd_parse_portmask(optarg); + if (l2fwd_enabled_port_mask == 0) { + printf("invalid portmask\n"); + l2fwd_usage(prgname); + return -1; + } + has_pmask = 1; + break; + + /* nqueue */ + case 'q': + l2fwd_rx_queue_per_lcore = l2fwd_parse_nqueue(optarg); + if (l2fwd_rx_queue_per_lcore == 0) { + printf("invalid queue number\n"); + l2fwd_usage(prgname); + return -1; + } + break; + + /* timer period */ + case 'T': + timer_period = l2fwd_parse_timer_period(optarg) * 1000 * TIMER_MILLISECOND; + if (timer_period < 0) { + printf("invalid timer period\n"); + l2fwd_usage(prgname); + return -1; + } + break; + + /* use floating process */ + case 'f': + float_proc = 1; + break; + + /* long options */ + case 0: + l2fwd_usage(prgname); + return -1; + + default: + l2fwd_usage(prgname); + return -1; + } + } + + if (optind >= 0) + argv[optind-1] = prgname; + + if (!has_pmask) { + l2fwd_usage(prgname); + return -1; + } + ret = optind-1; + optind = 0; /* reset getopt lib */ + return ret; +} + +/* Check the link status of all ports in up to 9s, and print them finally */ +static void +check_all_ports_link_status(uint8_t port_num, uint32_t port_mask) +{ +#define CHECK_INTERVAL 100 /* 100ms */ +#define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */ + uint8_t portid, count, all_ports_up, print_flag = 0; + struct rte_eth_link link; + + printf("\nChecking link status"); + fflush(stdout); + for (count = 0; count <= MAX_CHECK_TIME; count++) { + all_ports_up = 1; + for (portid = 0; portid < port_num; portid++) { + if ((port_mask & (1 << portid)) == 0) + continue; + memset(&link, 0, sizeof(link)); + rte_eth_link_get_nowait(portid, &link); + /* print link status if flag set */ + if (print_flag == 1) { + if (link.link_status) + printf("Port %d Link Up - speed %u " + "Mbps - %s\n", (uint8_t)portid, + (unsigned)link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? + ("full-duplex") : ("half-duplex\n")); + else + printf("Port %d Link Down\n", + (uint8_t)portid); + continue; + } + /* clear all_ports_up flag if any link down */ + if (link.link_status == ETH_LINK_DOWN) { + all_ports_up = 0; + break; + } + } + /* after finally printing all link status, get out */ + if (print_flag == 1) + break; + + if (all_ports_up == 0) { + printf("."); + fflush(stdout); + rte_delay_ms(CHECK_INTERVAL); + } + + /* set the print_flag if all ports up or timeout */ + if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) { + print_flag = 1; + printf("done\n"); + } + } +} + +int +main(int argc, char **argv) +{ + struct lcore_queue_conf *qconf; + struct rte_eth_dev_info dev_info; + int ret; + uint8_t nb_ports; + uint8_t nb_ports_available; + uint8_t portid, last_port; + unsigned rx_lcore_id; + unsigned nb_ports_in_mask = 0; + unsigned i; + int flags = 0; + uint64_t prev_tsc, diff_tsc, cur_tsc, timer_tsc; + + /* Save cpu_affinity first, restore it in case it's floating process option */ + if (get_cpu_affinity() != 0) + rte_exit(EXIT_FAILURE, "get_cpu_affinity error\n"); + + /* Also tries to set cpu affinity to detect whether it will fail in child process */ + if(clear_cpu_affinity() != 0) + rte_exit(EXIT_FAILURE, "clear_cpu_affinity error\n"); + + /* init EAL */ + ret = rte_eal_init(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Invalid EAL arguments\n"); + argc -= ret; + argv += ret; + + /* parse application arguments (after the EAL ones) */ + ret = l2fwd_parse_args(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Invalid L2FWD arguments\n"); + + /*flib init */ + if (flib_init() != 0) + rte_exit(EXIT_FAILURE, "flib init error"); + + /** + * Allocated structures that slave lcore would change. For those that slaves are + * read only, needn't use malloc to share and global or static variables is ok since + * slave inherit all the knowledge that master initialized. + **/ + if (l2fwd_malloc_shared_struct() != 0) + rte_exit(EXIT_FAILURE, "malloc mem failed\n"); + + /* Initialize lcore_resource structures */ + memset(lcore_resource, 0, sizeof(lcore_resource)); + for (i = 0; i < RTE_MAX_LCORE; i++) + lcore_resource[i].lcore_id = i; + + nb_ports = rte_eth_dev_count(); + if (nb_ports == 0) + rte_exit(EXIT_FAILURE, "No Ethernet ports - bye\n"); + + if (nb_ports > RTE_MAX_ETHPORTS) + nb_ports = RTE_MAX_ETHPORTS; + + /* create the mbuf pool */ + for (portid = 0; portid < nb_ports; portid++) { + /* skip ports that are not enabled */ + if ((l2fwd_enabled_port_mask & (1 << portid)) == 0) + continue; + char buf_name[RTE_MEMPOOL_NAMESIZE]; + flags = MEMPOOL_F_SP_PUT | MEMPOOL_F_SC_GET; + snprintf(buf_name, RTE_MEMPOOL_NAMESIZE, MBUF_NAME, portid); + l2fwd_pktmbuf_pool[portid] = + rte_mempool_create(buf_name, NB_MBUF, + MBUF_SIZE, 32, + sizeof(struct rte_pktmbuf_pool_private), + rte_pktmbuf_pool_init, NULL, + rte_pktmbuf_init, NULL, + rte_socket_id(), flags); + if (l2fwd_pktmbuf_pool[portid] == NULL) + rte_exit(EXIT_FAILURE, "Cannot init mbuf pool\n"); + + printf("Create mbuf %s\n", buf_name); + } + + /* reset l2fwd_dst_ports */ + for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) + l2fwd_dst_ports[portid] = 0; + last_port = 0; + + /* + * Each logical core is assigned a dedicated TX queue on each port. + */ + for (portid = 0; portid < nb_ports; portid++) { + /* skip ports that are not enabled */ + if ((l2fwd_enabled_port_mask & (1 << portid)) == 0) + continue; + + if (nb_ports_in_mask % 2) { + l2fwd_dst_ports[portid] = last_port; + l2fwd_dst_ports[last_port] = portid; + } + else + last_port = portid; + + nb_ports_in_mask++; + + rte_eth_dev_info_get(portid, &dev_info); + } + if (nb_ports_in_mask % 2) { + printf("Notice: odd number of ports in portmask.\n"); + l2fwd_dst_ports[last_port] = last_port; + } + + rx_lcore_id = 0; + qconf = NULL; + + /* Initialize the port/queue configuration of each logical core */ + for (portid = 0; portid < nb_ports; portid++) { + struct lcore_resource_struct *res; + /* skip ports that are not enabled */ + if ((l2fwd_enabled_port_mask & (1 << portid)) == 0) + continue; + + /* get the lcore_id for this port */ + /* skip master lcore */ + while (rte_lcore_is_enabled(rx_lcore_id) == 0 || + rte_get_master_lcore() == rx_lcore_id || + lcore_queue_conf[rx_lcore_id].n_rx_port == + l2fwd_rx_queue_per_lcore) { + + rx_lcore_id++; + if (rx_lcore_id >= RTE_MAX_LCORE) + rte_exit(EXIT_FAILURE, "Not enough cores\n"); + } + + if (qconf != &lcore_queue_conf[rx_lcore_id]) + /* Assigned a new logical core in the loop above. */ + qconf = &lcore_queue_conf[rx_lcore_id]; + + qconf->rx_port_list[qconf->n_rx_port] = portid; + qconf->n_rx_port++; + + /* Save the port resource info into lcore_resource strucutres */ + res = &lcore_resource[rx_lcore_id]; + res->enabled = 1; + res->port[res->port_num++] = portid; + + printf("Lcore %u: RX port %u\n", rx_lcore_id, (unsigned) portid); + } + + nb_ports_available = nb_ports; + + /* Initialise each port */ + for (portid = 0; portid < nb_ports; portid++) { + /* skip ports that are not enabled */ + if ((l2fwd_enabled_port_mask & (1 << portid)) == 0) { + printf("Skipping disabled port %u\n", (unsigned) portid); + nb_ports_available--; + continue; + } + /* init port */ + printf("Initializing port %u... ", (unsigned) portid); + fflush(stdout); + ret = rte_eth_dev_configure(portid, 1, 1, &port_conf); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Cannot configure device: err=%d, port=%u\n", + ret, (unsigned) portid); + + rte_eth_macaddr_get(portid,&l2fwd_ports_eth_addr[portid]); + + /* init one RX queue */ + fflush(stdout); + ret = rte_eth_rx_queue_setup(portid, 0, nb_rxd, + rte_eth_dev_socket_id(portid), + NULL, + l2fwd_pktmbuf_pool[portid]); + if (ret < 0) + rte_exit(EXIT_FAILURE, "rte_eth_rx_queue_setup:err=%d, port=%u\n", + ret, (unsigned) portid); + + /* init one TX queue on each port */ + fflush(stdout); + ret = rte_eth_tx_queue_setup(portid, 0, nb_txd, + rte_eth_dev_socket_id(portid), + NULL); + if (ret < 0) + rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup:err=%d, port=%u\n", + ret, (unsigned) portid); + + /* Initialize TX buffers */ + tx_buffer[portid] = rte_zmalloc_socket("tx_buffer", + RTE_ETH_TX_BUFFER_SIZE(MAX_PKT_BURST), 0, + rte_eth_dev_socket_id(portid)); + if (tx_buffer[portid] == NULL) + rte_exit(EXIT_FAILURE, "Cannot allocate buffer for tx on port %u\n", + (unsigned) portid); + + rte_eth_tx_buffer_init(tx_buffer[portid], MAX_PKT_BURST); + + ret = rte_eth_tx_buffer_set_err_callback(tx_buffer[portid], + rte_eth_tx_buffer_count_callback, + &port_statistics[portid].dropped); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Cannot set error callback for " + "tx buffer on port %u\n", (unsigned) portid); + + /* Start device */ + ret = rte_eth_dev_start(portid); + if (ret < 0) + rte_exit(EXIT_FAILURE, "rte_eth_dev_start:err=%d, port=%u\n", + ret, (unsigned) portid); + + printf("done: \n"); + + rte_eth_promiscuous_enable(portid); + + printf("Port %u, MAC address: %02X:%02X:%02X:%02X:%02X:%02X\n\n", + (unsigned) portid, + l2fwd_ports_eth_addr[portid].addr_bytes[0], + l2fwd_ports_eth_addr[portid].addr_bytes[1], + l2fwd_ports_eth_addr[portid].addr_bytes[2], + l2fwd_ports_eth_addr[portid].addr_bytes[3], + l2fwd_ports_eth_addr[portid].addr_bytes[4], + l2fwd_ports_eth_addr[portid].addr_bytes[5]); + + /* initialize port stats */ + //memset(&port_statistics, 0, sizeof(port_statistics)); + } + + if (!nb_ports_available) { + rte_exit(EXIT_FAILURE, + "All available ports are disabled. Please set portmask.\n"); + } + + check_all_ports_link_status(nb_ports, l2fwd_enabled_port_mask); + + /* Record pair lcore */ + /** + * Since l2fwd example would create pair between different neighbour port, that's + * port 0 receive and forward to port 1, the same to port 1, these 2 ports will have + * dependency. If one port stopped working (killed, for example), the port need to + * be stopped/started again. During the time, another port need to wait until stop/start + * procedure completed. So, record the pair relationship for those lcores working + * on ports. + **/ + for (portid = 0; portid < nb_ports; portid++) { + uint32_t pair_port; + unsigned lcore = 0, pair_lcore = 0; + unsigned j, find_lcore, find_pair_lcore; + /* skip ports that are not enabled */ + if ((l2fwd_enabled_port_mask & (1 << portid)) == 0) + continue; + + /* Find pair ports' lcores */ + find_lcore = find_pair_lcore = 0; + pair_port = l2fwd_dst_ports[portid]; + for (i = 0; i < RTE_MAX_LCORE; i++) { + if (!rte_lcore_is_enabled(i)) + continue; + for (j = 0; j < lcore_queue_conf[i].n_rx_port;j++) { + if (lcore_queue_conf[i].rx_port_list[j] == portid) { + lcore = i; + find_lcore = 1; + break; + } + if (lcore_queue_conf[i].rx_port_list[j] == pair_port) { + pair_lcore = i; + find_pair_lcore = 1; + break; + } + } + if (find_lcore && find_pair_lcore) + break; + } + if (!find_lcore || !find_pair_lcore) + rte_exit(EXIT_FAILURE, "Not find port=%d pair\n", portid); + + printf("lcore %u and %u paired\n", lcore, pair_lcore); + lcore_resource[lcore].pair_id = pair_lcore; + lcore_resource[pair_lcore].pair_id = lcore; + } + + /* Create message buffer for all master and slave */ + message_pool = rte_mempool_create("ms_msg_pool", + NB_CORE_MSGBUF * RTE_MAX_LCORE, + sizeof(enum l2fwd_cmd), NB_CORE_MSGBUF / 2, + 0, + rte_pktmbuf_pool_init, NULL, + rte_pktmbuf_init, NULL, + rte_socket_id(), 0); + + if (message_pool == NULL) + rte_exit(EXIT_FAILURE, "Create msg mempool failed\n"); + + /* Create ring for each master and slave pair, also register cb when slave leaves */ + for (i = 0; i < RTE_MAX_LCORE; i++) { + /** + * Only create ring and register slave_exit cb in case that core involved into + * packet forwarding + **/ + if (lcore_resource[i].enabled) { + /* Create ring for master and slave communication */ + ret = create_ms_ring(i); + if (ret != 0) + rte_exit(EXIT_FAILURE, "Create ring for lcore=%u failed", + i); + + if (flib_register_slave_exit_notify(i, + slave_exit_cb) != 0) + rte_exit(EXIT_FAILURE, + "Register master_trace_slave_exit failed"); + } + } + + /* launch per-lcore init on every lcore except master */ + flib_mp_remote_launch(l2fwd_launch_one_lcore, NULL, SKIP_MASTER); + + /* print statistics 10 second */ + prev_tsc = cur_tsc = rte_rdtsc(); + timer_tsc = 0; + while (1) { + sleep(1); + cur_tsc = rte_rdtsc(); + diff_tsc = cur_tsc - prev_tsc; + /* if timer is enabled */ + if (timer_period > 0) { + + /* advance the timer */ + timer_tsc += diff_tsc; + + /* if timer has reached its timeout */ + if (unlikely(timer_tsc >= (uint64_t) timer_period)) { + + print_stats(); + /* reset the timer */ + timer_tsc = 0; + } + } + + prev_tsc = cur_tsc; + + /* Check any slave need restart or recreate */ + rte_spinlock_lock(&res_lock); + for (i = 0; i < RTE_MAX_LCORE; i++) { + struct lcore_resource_struct *res = &lcore_resource[i]; + struct lcore_resource_struct *pair = &lcore_resource[res->pair_id]; + + /* If find slave exited, try to reset pair */ + if (res->enabled && res->flags && pair->enabled) { + if (!pair->flags) { + master_sendcmd_with_ack(pair->lcore_id, CMD_STOP); + rte_spinlock_unlock(&res_lock); + sleep(1); + rte_spinlock_lock(&res_lock); + if (pair->flags) + continue; + } + if (reset_pair(res->lcore_id, pair->lcore_id) != 0) + rte_exit(EXIT_FAILURE, "failed to reset slave"); + res->flags = 0; + pair->flags = 0; + } + } + rte_spinlock_unlock(&res_lock); + } + +} diff --git a/examples/multi_process/simple_mp/Makefile b/examples/multi_process/simple_mp/Makefile new file mode 100644 index 00000000..31ec0c80 --- /dev/null +++ b/examples/multi_process/simple_mp/Makefile @@ -0,0 +1,50 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overriden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +# binary name +APP = simple_mp + +# all source are stored in SRCS-y +SRCS-y := main.c mp_commands.c + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) + +include $(RTE_SDK)/mk/rte.extapp.mk diff --git a/examples/multi_process/simple_mp/main.c b/examples/multi_process/simple_mp/main.c new file mode 100644 index 00000000..2843d94e --- /dev/null +++ b/examples/multi_process/simple_mp/main.c @@ -0,0 +1,155 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * This sample application is a simple multi-process application which + * demostrates sharing of queues and memory pools between processes, and + * using those queues/pools for communication between the processes. + * + * Application is designed to run with two processes, a primary and a + * secondary, and each accepts commands on the commandline, the most + * important of which is "send", which just sends a string to the other + * process. + */ + +#include <stdio.h> +#include <string.h> +#include <stdint.h> +#include <inttypes.h> +#include <stdarg.h> +#include <errno.h> +#include <unistd.h> +#include <termios.h> +#include <sys/queue.h> + +#include <rte_common.h> +#include <rte_memory.h> +#include <rte_memzone.h> +#include <rte_launch.h> +#include <rte_eal.h> +#include <rte_per_lcore.h> +#include <rte_lcore.h> +#include <rte_debug.h> +#include <rte_atomic.h> +#include <rte_branch_prediction.h> +#include <rte_ring.h> +#include <rte_log.h> +#include <rte_mempool.h> +#include <cmdline_rdline.h> +#include <cmdline_parse.h> +#include <cmdline_socket.h> +#include <cmdline.h> +#include "mp_commands.h" + +#define RTE_LOGTYPE_APP RTE_LOGTYPE_USER1 + +static const char *_MSG_POOL = "MSG_POOL"; +static const char *_SEC_2_PRI = "SEC_2_PRI"; +static const char *_PRI_2_SEC = "PRI_2_SEC"; +const unsigned string_size = 64; + +struct rte_ring *send_ring, *recv_ring; +struct rte_mempool *message_pool; +volatile int quit = 0; + +static int +lcore_recv(__attribute__((unused)) void *arg) +{ + unsigned lcore_id = rte_lcore_id(); + + printf("Starting core %u\n", lcore_id); + while (!quit){ + void *msg; + if (rte_ring_dequeue(recv_ring, &msg) < 0){ + usleep(5); + continue; + } + printf("core %u: Received '%s'\n", lcore_id, (char *)msg); + rte_mempool_put(message_pool, msg); + } + + return 0; +} + +int +main(int argc, char **argv) +{ + const unsigned flags = 0; + const unsigned ring_size = 64; + const unsigned pool_size = 1024; + const unsigned pool_cache = 32; + const unsigned priv_data_sz = 0; + + int ret; + unsigned lcore_id; + + ret = rte_eal_init(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Cannot init EAL\n"); + + if (rte_eal_process_type() == RTE_PROC_PRIMARY){ + send_ring = rte_ring_create(_PRI_2_SEC, ring_size, rte_socket_id(), flags); + recv_ring = rte_ring_create(_SEC_2_PRI, ring_size, rte_socket_id(), flags); + message_pool = rte_mempool_create(_MSG_POOL, pool_size, + string_size, pool_cache, priv_data_sz, + NULL, NULL, NULL, NULL, + rte_socket_id(), flags); + } else { + recv_ring = rte_ring_lookup(_PRI_2_SEC); + send_ring = rte_ring_lookup(_SEC_2_PRI); + message_pool = rte_mempool_lookup(_MSG_POOL); + } + if (send_ring == NULL) + rte_exit(EXIT_FAILURE, "Problem getting sending ring\n"); + if (recv_ring == NULL) + rte_exit(EXIT_FAILURE, "Problem getting receiving ring\n"); + if (message_pool == NULL) + rte_exit(EXIT_FAILURE, "Problem getting message pool\n"); + + RTE_LOG(INFO, APP, "Finished Process Init.\n"); + + /* call lcore_recv() on every slave lcore */ + RTE_LCORE_FOREACH_SLAVE(lcore_id) { + rte_eal_remote_launch(lcore_recv, NULL, lcore_id); + } + + /* call cmd prompt on master lcore */ + struct cmdline *cl = cmdline_stdin_new(simple_mp_ctx, "\nsimple_mp > "); + if (cl == NULL) + rte_exit(EXIT_FAILURE, "Cannot create cmdline instance\n"); + cmdline_interact(cl); + cmdline_stdin_exit(cl); + + rte_eal_mp_wait_lcore(); + return 0; +} diff --git a/examples/multi_process/simple_mp/mp_commands.c b/examples/multi_process/simple_mp/mp_commands.c new file mode 100644 index 00000000..8da244bb --- /dev/null +++ b/examples/multi_process/simple_mp/mp_commands.c @@ -0,0 +1,166 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include <stdint.h> +#include <string.h> +#include <stdlib.h> +#include <stdarg.h> +#include <inttypes.h> +#include <stdio.h> +#include <termios.h> +#include <errno.h> +#include <sys/queue.h> + +#include <rte_common.h> +#include <rte_memory.h> +#include <rte_memzone.h> +#include <rte_eal.h> +#include <rte_atomic.h> +#include <rte_branch_prediction.h> +#include <rte_launch.h> +#include <rte_log.h> +#include <rte_per_lcore.h> +#include <rte_lcore.h> +#include <rte_ring.h> +#include <rte_debug.h> +#include <rte_mempool.h> +#include <rte_string_fns.h> + +#include <cmdline_rdline.h> +#include <cmdline_parse.h> +#include <cmdline_parse_string.h> +#include <cmdline_socket.h> +#include <cmdline.h> +#include "mp_commands.h" + +/**********************************************************/ + +struct cmd_send_result { + cmdline_fixed_string_t action; + cmdline_fixed_string_t message; +}; + +static void cmd_send_parsed(void *parsed_result, + __attribute__((unused)) struct cmdline *cl, + __attribute__((unused)) void *data) +{ + void *msg = NULL; + struct cmd_send_result *res = parsed_result; + + if (rte_mempool_get(message_pool, &msg) < 0) + rte_panic("Failed to get message buffer\n"); + snprintf((char *)msg, string_size, "%s", res->message); + if (rte_ring_enqueue(send_ring, msg) < 0) { + printf("Failed to send message - message discarded\n"); + rte_mempool_put(message_pool, msg); + } +} + +cmdline_parse_token_string_t cmd_send_action = + TOKEN_STRING_INITIALIZER(struct cmd_send_result, action, "send"); +cmdline_parse_token_string_t cmd_send_message = + TOKEN_STRING_INITIALIZER(struct cmd_send_result, message, NULL); + +cmdline_parse_inst_t cmd_send = { + .f = cmd_send_parsed, /* function to call */ + .data = NULL, /* 2nd arg of func */ + .help_str = "send a string to another process", + .tokens = { /* token list, NULL terminated */ + (void *)&cmd_send_action, + (void *)&cmd_send_message, + NULL, + }, +}; + +/**********************************************************/ + +struct cmd_quit_result { + cmdline_fixed_string_t quit; +}; + +static void cmd_quit_parsed(__attribute__((unused)) void *parsed_result, + struct cmdline *cl, + __attribute__((unused)) void *data) +{ + quit = 1; + cmdline_quit(cl); +} + +cmdline_parse_token_string_t cmd_quit_quit = + TOKEN_STRING_INITIALIZER(struct cmd_quit_result, quit, "quit"); + +cmdline_parse_inst_t cmd_quit = { + .f = cmd_quit_parsed, /* function to call */ + .data = NULL, /* 2nd arg of func */ + .help_str = "close the application", + .tokens = { /* token list, NULL terminated */ + (void *)&cmd_quit_quit, + NULL, + }, +}; + +/**********************************************************/ + +struct cmd_help_result { + cmdline_fixed_string_t help; +}; + +static void cmd_help_parsed(__attribute__((unused)) void *parsed_result, + struct cmdline *cl, + __attribute__((unused)) void *data) +{ + cmdline_printf(cl, "Simple demo example of multi-process in RTE\n\n" + "This is a readline-like interface that can be used to\n" + "send commands to the simple app. Commands supported are:\n\n" + "- send [string]\n" "- help\n" "- quit\n\n"); +} + +cmdline_parse_token_string_t cmd_help_help = + TOKEN_STRING_INITIALIZER(struct cmd_help_result, help, "help"); + +cmdline_parse_inst_t cmd_help = { + .f = cmd_help_parsed, /* function to call */ + .data = NULL, /* 2nd arg of func */ + .help_str = "show help", + .tokens = { /* token list, NULL terminated */ + (void *)&cmd_help_help, + NULL, + }, +}; + +/****** CONTEXT (list of instruction) */ +cmdline_parse_ctx_t simple_mp_ctx[] = { + (cmdline_parse_inst_t *)&cmd_send, + (cmdline_parse_inst_t *)&cmd_quit, + (cmdline_parse_inst_t *)&cmd_help, + NULL, +}; diff --git a/examples/multi_process/simple_mp/mp_commands.h b/examples/multi_process/simple_mp/mp_commands.h new file mode 100644 index 00000000..7e9a4ab2 --- /dev/null +++ b/examples/multi_process/simple_mp/mp_commands.h @@ -0,0 +1,44 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _SIMPLE_MP_COMMANDS_H_ +#define _SIMPLE_MP_COMMANDS_H_ + +extern const unsigned string_size; +extern struct rte_ring *send_ring; +extern struct rte_mempool *message_pool; +extern volatile int quit; + +extern cmdline_parse_ctx_t simple_mp_ctx[]; + +#endif /* _SIMPLE_MP_COMMANDS_H_ */ diff --git a/examples/multi_process/symmetric_mp/Makefile b/examples/multi_process/symmetric_mp/Makefile new file mode 100644 index 00000000..c789f3c9 --- /dev/null +++ b/examples/multi_process/symmetric_mp/Makefile @@ -0,0 +1,50 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overriden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +# binary name +APP = symmetric_mp + +# all source are stored in SRCS-y +SRCS-y := main.c + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) + +include $(RTE_SDK)/mk/rte.extapp.mk diff --git a/examples/multi_process/symmetric_mp/main.c b/examples/multi_process/symmetric_mp/main.c new file mode 100644 index 00000000..6bbff076 --- /dev/null +++ b/examples/multi_process/symmetric_mp/main.c @@ -0,0 +1,472 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Sample application demostrating how to do packet I/O in a multi-process + * environment. The same code can be run as a primary process and as a + * secondary process, just with a different proc-id parameter in each case + * (apart from the EAL flag to indicate a secondary process). + * + * Each process will read from the same ports, given by the port-mask + * parameter, which should be the same in each case, just using a different + * queue per port as determined by the proc-id parameter. + */ + +#include <stdio.h> +#include <string.h> +#include <stdint.h> +#include <stdlib.h> +#include <stdarg.h> +#include <errno.h> +#include <sys/queue.h> +#include <getopt.h> +#include <signal.h> +#include <inttypes.h> + +#include <rte_common.h> +#include <rte_log.h> +#include <rte_memory.h> +#include <rte_memzone.h> +#include <rte_launch.h> +#include <rte_eal.h> +#include <rte_per_lcore.h> +#include <rte_lcore.h> +#include <rte_debug.h> +#include <rte_atomic.h> +#include <rte_branch_prediction.h> +#include <rte_ring.h> +#include <rte_debug.h> +#include <rte_interrupts.h> +#include <rte_pci.h> +#include <rte_ether.h> +#include <rte_ethdev.h> +#include <rte_mempool.h> +#include <rte_memcpy.h> +#include <rte_mbuf.h> +#include <rte_string_fns.h> +#include <rte_cycles.h> + +#define RTE_LOGTYPE_APP RTE_LOGTYPE_USER1 + +#define NB_MBUFS 64*1024 /* use 64k mbufs */ +#define MBUF_CACHE_SIZE 256 +#define PKT_BURST 32 +#define RX_RING_SIZE 128 +#define TX_RING_SIZE 512 + +#define PARAM_PROC_ID "proc-id" +#define PARAM_NUM_PROCS "num-procs" + +/* for each lcore, record the elements of the ports array to use */ +struct lcore_ports{ + unsigned start_port; + unsigned num_ports; +}; + +/* structure to record the rx and tx packets. Put two per cache line as ports + * used in pairs */ +struct port_stats{ + unsigned rx; + unsigned tx; + unsigned drop; +} __attribute__((aligned(RTE_CACHE_LINE_SIZE / 2))); + +static int proc_id = -1; +static unsigned num_procs = 0; + +static uint8_t ports[RTE_MAX_ETHPORTS]; +static unsigned num_ports = 0; + +static struct lcore_ports lcore_ports[RTE_MAX_LCORE]; +static struct port_stats pstats[RTE_MAX_ETHPORTS]; + +/* prints the usage statement and quits with an error message */ +static void +smp_usage(const char *prgname, const char *errmsg) +{ + printf("\nError: %s\n",errmsg); + printf("\n%s [EAL options] -- -p <port mask> " + "--"PARAM_NUM_PROCS" <n>" + " --"PARAM_PROC_ID" <id>\n" + "-p : a hex bitmask indicating what ports are to be used\n" + "--num-procs: the number of processes which will be used\n" + "--proc-id : the id of the current process (id < num-procs)\n" + "\n", + prgname); + exit(1); +} + + +/* signal handler configured for SIGTERM and SIGINT to print stats on exit */ +static void +print_stats(int signum) +{ + unsigned i; + printf("\nExiting on signal %d\n\n", signum); + for (i = 0; i < num_ports; i++){ + const uint8_t p_num = ports[i]; + printf("Port %u: RX - %u, TX - %u, Drop - %u\n", (unsigned)p_num, + pstats[p_num].rx, pstats[p_num].tx, pstats[p_num].drop); + } + exit(0); +} + +/* Parse the argument given in the command line of the application */ +static int +smp_parse_args(int argc, char **argv) +{ + int opt, ret; + char **argvopt; + int option_index; + unsigned i, port_mask = 0; + char *prgname = argv[0]; + static struct option lgopts[] = { + {PARAM_NUM_PROCS, 1, 0, 0}, + {PARAM_PROC_ID, 1, 0, 0}, + {NULL, 0, 0, 0} + }; + + argvopt = argv; + + while ((opt = getopt_long(argc, argvopt, "p:", \ + lgopts, &option_index)) != EOF) { + + switch (opt) { + case 'p': + port_mask = strtoull(optarg, NULL, 16); + break; + /* long options */ + case 0: + if (strncmp(lgopts[option_index].name, PARAM_NUM_PROCS, 8) == 0) + num_procs = atoi(optarg); + else if (strncmp(lgopts[option_index].name, PARAM_PROC_ID, 7) == 0) + proc_id = atoi(optarg); + break; + + default: + smp_usage(prgname, "Cannot parse all command-line arguments\n"); + } + } + + if (optind >= 0) + argv[optind-1] = prgname; + + if (proc_id < 0) + smp_usage(prgname, "Invalid or missing proc-id parameter\n"); + if (rte_eal_process_type() == RTE_PROC_PRIMARY && num_procs == 0) + smp_usage(prgname, "Invalid or missing num-procs parameter\n"); + if (port_mask == 0) + smp_usage(prgname, "Invalid or missing port mask\n"); + + /* get the port numbers from the port mask */ + for(i = 0; i < rte_eth_dev_count(); i++) + if(port_mask & (1 << i)) + ports[num_ports++] = (uint8_t)i; + + ret = optind-1; + optind = 0; /* reset getopt lib */ + + return ret; +} + +/* + * Initialises a given port using global settings and with the rx buffers + * coming from the mbuf_pool passed as parameter + */ +static inline int +smp_port_init(uint8_t port, struct rte_mempool *mbuf_pool, uint16_t num_queues) +{ + struct rte_eth_conf port_conf = { + .rxmode = { + .mq_mode = ETH_MQ_RX_RSS, + .split_hdr_size = 0, + .header_split = 0, /**< Header Split disabled */ + .hw_ip_checksum = 1, /**< IP checksum offload enabled */ + .hw_vlan_filter = 0, /**< VLAN filtering disabled */ + .jumbo_frame = 0, /**< Jumbo Frame Support disabled */ + .hw_strip_crc = 0, /**< CRC stripped by hardware */ + }, + .rx_adv_conf = { + .rss_conf = { + .rss_key = NULL, + .rss_hf = ETH_RSS_IP, + }, + }, + .txmode = { + .mq_mode = ETH_MQ_TX_NONE, + } + }; + const uint16_t rx_rings = num_queues, tx_rings = num_queues; + struct rte_eth_dev_info info; + int retval; + uint16_t q; + + if (rte_eal_process_type() == RTE_PROC_SECONDARY) + return 0; + + if (port >= rte_eth_dev_count()) + return -1; + + printf("# Initialising port %u... ", (unsigned)port); + fflush(stdout); + + rte_eth_dev_info_get(port, &info); + info.default_rxconf.rx_drop_en = 1; + + retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf); + if (retval < 0) + return retval; + + for (q = 0; q < rx_rings; q ++) { + retval = rte_eth_rx_queue_setup(port, q, RX_RING_SIZE, + rte_eth_dev_socket_id(port), + &info.default_rxconf, + mbuf_pool); + if (retval < 0) + return retval; + } + + for (q = 0; q < tx_rings; q ++) { + retval = rte_eth_tx_queue_setup(port, q, TX_RING_SIZE, + rte_eth_dev_socket_id(port), + NULL); + if (retval < 0) + return retval; + } + + rte_eth_promiscuous_enable(port); + + retval = rte_eth_dev_start(port); + if (retval < 0) + return retval; + + return 0; +} + +/* Goes through each of the lcores and calculates what ports should + * be used by that core. Fills in the global lcore_ports[] array. + */ +static void +assign_ports_to_cores(void) +{ + + const unsigned lcores = rte_eal_get_configuration()->lcore_count; + const unsigned port_pairs = num_ports / 2; + const unsigned pairs_per_lcore = port_pairs / lcores; + unsigned extra_pairs = port_pairs % lcores; + unsigned ports_assigned = 0; + unsigned i; + + RTE_LCORE_FOREACH(i) { + lcore_ports[i].start_port = ports_assigned; + lcore_ports[i].num_ports = pairs_per_lcore * 2; + if (extra_pairs > 0) { + lcore_ports[i].num_ports += 2; + extra_pairs--; + } + ports_assigned += lcore_ports[i].num_ports; + } +} + +/* Main function used by the processing threads. + * Prints out some configuration details for the thread and then begins + * performing packet RX and TX. + */ +static int +lcore_main(void *arg __rte_unused) +{ + const unsigned id = rte_lcore_id(); + const unsigned start_port = lcore_ports[id].start_port; + const unsigned end_port = start_port + lcore_ports[id].num_ports; + const uint16_t q_id = (uint16_t)proc_id; + unsigned p, i; + char msgbuf[256]; + int msgbufpos = 0; + + if (start_port == end_port){ + printf("Lcore %u has nothing to do\n", id); + return 0; + } + + /* build up message in msgbuf before printing to decrease likelihood + * of multi-core message interleaving. + */ + msgbufpos += snprintf(msgbuf, sizeof(msgbuf) - msgbufpos, + "Lcore %u using ports ", id); + for (p = start_port; p < end_port; p++){ + msgbufpos += snprintf(msgbuf + msgbufpos, sizeof(msgbuf) - msgbufpos, + "%u ", (unsigned)ports[p]); + } + printf("%s\n", msgbuf); + printf("lcore %u using queue %u of each port\n", id, (unsigned)q_id); + + /* handle packet I/O from the ports, reading and writing to the + * queue number corresponding to our process number (not lcore id) + */ + + for (;;) { + struct rte_mbuf *buf[PKT_BURST]; + + for (p = start_port; p < end_port; p++) { + const uint8_t src = ports[p]; + const uint8_t dst = ports[p ^ 1]; /* 0 <-> 1, 2 <-> 3 etc */ + const uint16_t rx_c = rte_eth_rx_burst(src, q_id, buf, PKT_BURST); + if (rx_c == 0) + continue; + pstats[src].rx += rx_c; + + const uint16_t tx_c = rte_eth_tx_burst(dst, q_id, buf, rx_c); + pstats[dst].tx += tx_c; + if (tx_c != rx_c) { + pstats[dst].drop += (rx_c - tx_c); + for (i = tx_c; i < rx_c; i++) + rte_pktmbuf_free(buf[i]); + } + } + } +} + +/* Check the link status of all ports in up to 9s, and print them finally */ +static void +check_all_ports_link_status(uint8_t port_num, uint32_t port_mask) +{ +#define CHECK_INTERVAL 100 /* 100ms */ +#define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */ + uint8_t portid, count, all_ports_up, print_flag = 0; + struct rte_eth_link link; + + printf("\nChecking link status"); + fflush(stdout); + for (count = 0; count <= MAX_CHECK_TIME; count++) { + all_ports_up = 1; + for (portid = 0; portid < port_num; portid++) { + if ((port_mask & (1 << portid)) == 0) + continue; + memset(&link, 0, sizeof(link)); + rte_eth_link_get_nowait(portid, &link); + /* print link status if flag set */ + if (print_flag == 1) { + if (link.link_status) + printf("Port %d Link Up - speed %u " + "Mbps - %s\n", (uint8_t)portid, + (unsigned)link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? + ("full-duplex") : ("half-duplex\n")); + else + printf("Port %d Link Down\n", + (uint8_t)portid); + continue; + } + /* clear all_ports_up flag if any link down */ + if (link.link_status == ETH_LINK_DOWN) { + all_ports_up = 0; + break; + } + } + /* after finally printing all link status, get out */ + if (print_flag == 1) + break; + + if (all_ports_up == 0) { + printf("."); + fflush(stdout); + rte_delay_ms(CHECK_INTERVAL); + } + + /* set the print_flag if all ports up or timeout */ + if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) { + print_flag = 1; + printf("done\n"); + } + } +} + +/* Main function. + * Performs initialisation and then calls the lcore_main on each core + * to do the packet-processing work. + */ +int +main(int argc, char **argv) +{ + static const char *_SMP_MBUF_POOL = "SMP_MBUF_POOL"; + int ret; + unsigned i; + enum rte_proc_type_t proc_type; + struct rte_mempool *mp; + + /* set up signal handlers to print stats on exit */ + signal(SIGINT, print_stats); + signal(SIGTERM, print_stats); + + /* initialise the EAL for all */ + ret = rte_eal_init(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Cannot init EAL\n"); + argc -= ret; + argv += ret; + + /* determine the NIC devices available */ + if (rte_eth_dev_count() == 0) + rte_exit(EXIT_FAILURE, "No Ethernet ports - bye\n"); + + /* parse application arguments (those after the EAL ones) */ + smp_parse_args(argc, argv); + + proc_type = rte_eal_process_type(); + mp = (proc_type == RTE_PROC_SECONDARY) ? + rte_mempool_lookup(_SMP_MBUF_POOL) : + rte_pktmbuf_pool_create(_SMP_MBUF_POOL, NB_MBUFS, + MBUF_CACHE_SIZE, 0, RTE_MBUF_DEFAULT_BUF_SIZE, + rte_socket_id()); + if (mp == NULL) + rte_exit(EXIT_FAILURE, "Cannot get memory pool for buffers\n"); + + if (num_ports & 1) + rte_exit(EXIT_FAILURE, "Application must use an even number of ports\n"); + for(i = 0; i < num_ports; i++){ + if(proc_type == RTE_PROC_PRIMARY) + if (smp_port_init(ports[i], mp, (uint16_t)num_procs) < 0) + rte_exit(EXIT_FAILURE, "Error initialising ports\n"); + } + + if (proc_type == RTE_PROC_PRIMARY) + check_all_ports_link_status((uint8_t)num_ports, (~0x0)); + + assign_ports_to_cores(); + + RTE_LOG(INFO, APP, "Finished Process Init.\n"); + + rte_eal_mp_remote_launch(lcore_main, NULL, CALL_MASTER); + + return 0; +} diff --git a/examples/netmap_compat/Makefile b/examples/netmap_compat/Makefile new file mode 100644 index 00000000..52d80869 --- /dev/null +++ b/examples/netmap_compat/Makefile @@ -0,0 +1,50 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overriden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk +unexport RTE_SRCDIR RTE_OUTPUT RTE_EXTMK + +DIRS-y += bridge + +.PHONY: all clean $(DIRS-y) + +all: $(DIRS-y) +clean: $(DIRS-y) + +$(DIRS-y): + $(MAKE) -C $@ $(MAKECMDGOALS) O=$(RTE_OUTPUT) diff --git a/examples/netmap_compat/bridge/Makefile b/examples/netmap_compat/bridge/Makefile new file mode 100644 index 00000000..50d96e81 --- /dev/null +++ b/examples/netmap_compat/bridge/Makefile @@ -0,0 +1,62 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define the RTE_SDK environment variable") +endif + +# Default target, can be overriden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +ifneq ($(CONFIG_RTE_EXEC_ENV),"linuxapp") +$(info This application can only operate in a linuxapp environment, \ +please change the definition of the RTE_TARGET environment variable) +all: +else + +# binary name +APP = bridge + +# for compat_netmap.c +VPATH := $(SRCDIR)/../lib + +# all source are stored in SRCS-y +SRCS-y := bridge.c +SRCS-y += compat_netmap.c + +CFLAGS += -O3 -I$(SRCDIR)/../lib -I$(SRCDIR)/../netmap +CFLAGS += $(WERROR_FLAGS) + +include $(RTE_SDK)/mk/rte.extapp.mk + +endif diff --git a/examples/netmap_compat/bridge/bridge.c b/examples/netmap_compat/bridge/bridge.c new file mode 100644 index 00000000..53f5fdb6 --- /dev/null +++ b/examples/netmap_compat/bridge/bridge.c @@ -0,0 +1,377 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <fcntl.h> +#include <getopt.h> +#include <inttypes.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/mman.h> + +#include <rte_eal.h> +#include <rte_ethdev.h> +#include <rte_mbuf.h> +#include <rte_mempool.h> +#include <rte_string_fns.h> +#include "compat_netmap.h" + + +#define BUF_SIZE RTE_MBUF_DEFAULT_DATAROOM +#define MBUF_DATA_SIZE (BUF_SIZE + RTE_PKTMBUF_HEADROOM) + +#define MBUF_PER_POOL 8192 + +struct rte_eth_conf eth_conf = { + .rxmode = { + .split_hdr_size = 0, + .header_split = 0, + .hw_ip_checksum = 0, + .hw_vlan_filter = 0, + .jumbo_frame = 0, + .hw_strip_crc = 0, + }, + .txmode = { + .mq_mode = ETH_MQ_TX_NONE, + }, +}; + +#define MAX_QUEUE_NUM 1 +#define RX_QUEUE_NUM 1 +#define TX_QUEUE_NUM 1 + +#define MAX_DESC_NUM 0x400 +#define RX_DESC_NUM 0x100 +#define TX_DESC_NUM 0x200 + +#define RX_SYNC_NUM 0x20 +#define TX_SYNC_NUM 0x20 + +struct rte_netmap_port_conf port_conf = { + .eth_conf = ð_conf, + .socket_id = SOCKET_ID_ANY, + .nr_tx_rings = TX_QUEUE_NUM, + .nr_rx_rings = RX_QUEUE_NUM, + .nr_tx_slots = TX_DESC_NUM, + .nr_rx_slots = RX_DESC_NUM, + .tx_burst = TX_SYNC_NUM, + .rx_burst = RX_SYNC_NUM, +}; + +struct rte_netmap_conf netmap_conf = { + .socket_id = SOCKET_ID_ANY, + .max_bufsz = BUF_SIZE, + .max_rings = MAX_QUEUE_NUM, + .max_slots = MAX_DESC_NUM, +}; + +static int stop = 0; + +#define MAX_PORT_NUM 2 + +struct netmap_port { + int fd; + struct netmap_if *nmif; + struct netmap_ring *rx_ring; + struct netmap_ring *tx_ring; + const char *str; + uint8_t id; +}; + +static struct { + uint32_t num; + struct netmap_port p[MAX_PORT_NUM]; + void *mem; +} ports; + +static void +usage(const char *prgname) +{ + fprintf(stderr, "Usage: %s [EAL args] -- [OPTION]...\n" + "-h, --help \t Show this help message and exit\n" + "-i INTERFACE_A \t Interface (DPDK port number) to use\n" + "[ -i INTERFACE_B \t Interface (DPDK port number) to use ]\n", + prgname); +} + +static uint8_t +parse_portid(const char *portid_str) +{ + char *end; + unsigned id; + + id = strtoul(portid_str, &end, 10); + + if (end == portid_str || *end != '\0' || id > RTE_MAX_ETHPORTS) + rte_exit(EXIT_FAILURE, "Invalid port number\n"); + + return (uint8_t) id; +} + +static int +parse_args(int argc, char **argv) +{ + int opt; + + while ((opt = getopt(argc, argv, "hi:")) != -1) { + switch (opt) { + case 'h': + usage(argv[0]); + rte_exit(EXIT_SUCCESS, "exiting..."); + break; + case 'i': + if (ports.num >= RTE_DIM(ports.p)) { + usage(argv[0]); + rte_exit(EXIT_FAILURE, "configs with %u " + "ports are not supported\n", + ports.num + 1); + + } + + ports.p[ports.num].str = optarg; + ports.p[ports.num].id = parse_portid(optarg); + ports.num++; + break; + default: + usage(argv[0]); + rte_exit(EXIT_FAILURE, "invalid option: %c\n", opt); + } + } + + return 0; +} + +static void sigint_handler(__rte_unused int sig) +{ + stop = 1; + signal(SIGINT, SIG_DFL); +} + +static void move(int n, struct netmap_ring *rx, struct netmap_ring *tx) +{ + uint32_t tmp; + + while (n-- > 0) { + tmp = tx->slot[tx->cur].buf_idx; + + tx->slot[tx->cur].buf_idx = rx->slot[rx->cur].buf_idx; + tx->slot[tx->cur].len = rx->slot[rx->cur].len; + tx->slot[tx->cur].flags |= NS_BUF_CHANGED; + tx->cur = NETMAP_RING_NEXT(tx, tx->cur); + tx->avail--; + + rx->slot[rx->cur].buf_idx = tmp; + rx->slot[rx->cur].flags |= NS_BUF_CHANGED; + rx->cur = NETMAP_RING_NEXT(rx, rx->cur); + rx->avail--; + } +} + +static int +netmap_port_open(uint32_t idx) +{ + int err; + struct netmap_port *port; + struct nmreq req; + + port = ports.p + idx; + + port->fd = rte_netmap_open("/dev/netmap", O_RDWR); + + snprintf(req.nr_name, sizeof(req.nr_name), "%s", port->str); + req.nr_version = NETMAP_API; + req.nr_ringid = 0; + + err = rte_netmap_ioctl(port->fd, NIOCGINFO, &req); + if (err) { + printf("[E] NIOCGINFO ioctl failed (error %d)\n", err); + return err; + } + + snprintf(req.nr_name, sizeof(req.nr_name), "%s", port->str); + req.nr_version = NETMAP_API; + req.nr_ringid = 0; + + err = rte_netmap_ioctl(port->fd, NIOCREGIF, &req); + if (err) { + printf("[E] NIOCREGIF ioctl failed (error %d)\n", err); + return err; + } + + /* mmap only once. */ + if (ports.mem == NULL) + ports.mem = rte_netmap_mmap(NULL, req.nr_memsize, + PROT_WRITE | PROT_READ, MAP_PRIVATE, port->fd, 0); + + if (ports.mem == MAP_FAILED) { + printf("[E] NETMAP mmap failed for fd: %d)\n", port->fd); + return -ENOMEM; + } + + port->nmif = NETMAP_IF(ports.mem, req.nr_offset); + + port->tx_ring = NETMAP_TXRING(port->nmif, 0); + port->rx_ring = NETMAP_RXRING(port->nmif, 0); + + return 0; +} + + +int main(int argc, char *argv[]) +{ + int err, ret; + uint32_t i, pmsk; + struct nmreq req; + struct pollfd pollfd[MAX_PORT_NUM]; + struct rte_mempool *pool; + struct netmap_ring *rx_ring, *tx_ring; + + ret = rte_eal_init(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Cannot initialize EAL\n"); + + argc -= ret; + argv += ret; + + parse_args(argc, argv); + + if (ports.num == 0) + rte_exit(EXIT_FAILURE, "no ports specified\n"); + + if (rte_eth_dev_count() < 1) + rte_exit(EXIT_FAILURE, "Not enough ethernet ports available\n"); + + pool = rte_pktmbuf_pool_create("mbuf_pool", MBUF_PER_POOL, 32, 0, + MBUF_DATA_SIZE, rte_socket_id()); + if (pool == NULL) + rte_exit(EXIT_FAILURE, "Couldn't create mempool\n"); + + netmap_conf.socket_id = rte_socket_id(); + err = rte_netmap_init(&netmap_conf); + + if (err < 0) + rte_exit(EXIT_FAILURE, + "Couldn't initialize librte_compat_netmap\n"); + else + printf("librte_compat_netmap initialized\n"); + + port_conf.pool = pool; + port_conf.socket_id = rte_socket_id(); + + for (i = 0; i != ports.num; i++) { + + err = rte_netmap_init_port(ports.p[i].id, &port_conf); + if (err < 0) + rte_exit(EXIT_FAILURE, "Couldn't setup port %hhu\n", + ports.p[i].id); + + rte_eth_promiscuous_enable(ports.p[i].id); + } + + for (i = 0; i != ports.num; i++) { + + err = netmap_port_open(i); + if (err) { + rte_exit(EXIT_FAILURE, "Couldn't set port %hhu " + "under NETMAP control\n", + ports.p[i].id); + } + else + printf("Port %hhu now in Netmap mode\n", ports.p[i].id); + } + + memset(pollfd, 0, sizeof(pollfd)); + + for (i = 0; i != ports.num; i++) { + pollfd[i].fd = ports.p[i].fd; + pollfd[i].events = POLLIN | POLLOUT; + } + + signal(SIGINT, sigint_handler); + + pmsk = ports.num - 1; + + printf("Bridge up and running!\n"); + + while (!stop) { + uint32_t n_pkts; + + pollfd[0].revents = 0; + pollfd[1].revents = 0; + + ret = rte_netmap_poll(pollfd, ports.num, 0); + if (ret < 0) { + stop = 1; + printf("[E] poll returned with error %d\n", ret); + } + + if (((pollfd[0].revents | pollfd[1].revents) & POLLERR) != 0) { + printf("POLLERR!\n"); + } + + if ((pollfd[0].revents & POLLIN) != 0 && + (pollfd[pmsk].revents & POLLOUT) != 0) { + + rx_ring = ports.p[0].rx_ring; + tx_ring = ports.p[pmsk].tx_ring; + + n_pkts = RTE_MIN(rx_ring->avail, tx_ring->avail); + move(n_pkts, rx_ring, tx_ring); + } + + if (pmsk != 0 && (pollfd[pmsk].revents & POLLIN) != 0 && + (pollfd[0].revents & POLLOUT) != 0) { + + rx_ring = ports.p[pmsk].rx_ring; + tx_ring = ports.p[0].tx_ring; + + n_pkts = RTE_MIN(rx_ring->avail, tx_ring->avail); + move(n_pkts, rx_ring, tx_ring); + } + } + + printf("Bridge stopped!\n"); + + for (i = 0; i != ports.num; i++) { + err = rte_netmap_ioctl(ports.p[i].fd, NIOCUNREGIF, &req); + if (err) { + printf("[E] NIOCUNREGIF ioctl failed (error %d)\n", + err); + } + else + printf("Port %hhu unregistered from Netmap mode\n", ports.p[i].id); + + rte_netmap_close(ports.p[i].fd); + } + return 0; +} diff --git a/examples/netmap_compat/lib/compat_netmap.c b/examples/netmap_compat/lib/compat_netmap.c new file mode 100644 index 00000000..bf1b418a --- /dev/null +++ b/examples/netmap_compat/lib/compat_netmap.c @@ -0,0 +1,908 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <errno.h> +#include <inttypes.h> +#include <poll.h> +#include <stdio.h> +#include <stdlib.h> +#include <net/if.h> +#include <sys/types.h> +#include <sys/resource.h> +#include <sys/mman.h> + +#include <rte_common.h> +#include <rte_errno.h> +#include <rte_ethdev.h> +#include <rte_log.h> +#include <rte_malloc.h> +#include <rte_mbuf.h> +#include <rte_memzone.h> +#include <rte_spinlock.h> +#include <rte_string_fns.h> + +#include "compat_netmap.h" + +struct netmap_port { + struct rte_mempool *pool; + struct netmap_if *nmif; + struct rte_eth_conf eth_conf; + struct rte_eth_txconf tx_conf; + struct rte_eth_rxconf rx_conf; + int32_t socket_id; + uint16_t nr_tx_rings; + uint16_t nr_rx_rings; + uint32_t nr_tx_slots; + uint32_t nr_rx_slots; + uint16_t tx_burst; + uint16_t rx_burst; + uint32_t fd; +}; + +struct fd_port { + uint32_t port; +}; + +#ifndef POLLRDNORM +#define POLLRDNORM 0x0040 +#endif + +#ifndef POLLWRNORM +#define POLLWRNORM 0x0100 +#endif + +#define FD_PORT_FREE UINT32_MAX +#define FD_PORT_RSRV (FD_PORT_FREE - 1) + +struct netmap_state { + struct rte_netmap_conf conf; + uintptr_t buf_start; + void *mem; + uint32_t mem_sz; + uint32_t netif_memsz; +}; + + +#define COMPAT_NETMAP_MAX_NOFILE (2 * RTE_MAX_ETHPORTS) +#define COMPAT_NETMAP_MAX_BURST 64 +#define COMPAT_NETMAP_MAX_PKT_PER_SYNC (2 * COMPAT_NETMAP_MAX_BURST) + +static struct netmap_port ports[RTE_MAX_ETHPORTS]; +static struct netmap_state netmap; + +static struct fd_port fd_port[COMPAT_NETMAP_MAX_NOFILE]; +static const int next_fd_start = RLIMIT_NOFILE + 1; +static rte_spinlock_t netmap_lock; + +#define IDX_TO_FD(x) ((x) + next_fd_start) +#define FD_TO_IDX(x) ((x) - next_fd_start) +#define FD_VALID(x) ((x) >= next_fd_start && \ + (x) < (typeof (x))(RTE_DIM(fd_port) + next_fd_start)) + +#define PORT_NUM_RINGS (2 * netmap.conf.max_rings) +#define PORT_NUM_SLOTS (PORT_NUM_RINGS * netmap.conf.max_slots) + +#define BUF_IDX(port, ring, slot) \ + (((port) * PORT_NUM_RINGS + (ring)) * netmap.conf.max_slots + \ + (slot)) + +#define NETMAP_IF_RING_OFS(rid, rings, slots) ({\ + struct netmap_if *_if; \ + struct netmap_ring *_rg; \ + sizeof(*_if) + \ + (rings) * sizeof(_if->ring_ofs[0]) + \ + (rid) * sizeof(*_rg) + \ + (slots) * sizeof(_rg->slot[0]); \ + }) + +static void netmap_unregif(uint32_t idx, uint32_t port); + + +static int32_t +ifname_to_portid(const char *ifname, uint8_t *port) +{ + char *endptr; + uint64_t portid; + + errno = 0; + portid = strtoul(ifname, &endptr, 10); + if (endptr == ifname || *endptr != '\0' || + portid >= RTE_DIM(ports) || errno != 0) + return -EINVAL; + + *port = (uint8_t)portid; + return 0; +} + +/** + * Given a dpdk mbuf, fill in the Netmap slot in ring r and its associated + * buffer with the data held by the mbuf. + * Note that mbuf chains are not supported. + */ +static void +mbuf_to_slot(struct rte_mbuf *mbuf, struct netmap_ring *r, uint32_t index) +{ + char *data; + uint16_t length; + + data = rte_pktmbuf_mtod(mbuf, char *); + length = rte_pktmbuf_data_len(mbuf); + + if (length > r->nr_buf_size) + length = 0; + + r->slot[index].len = length; + rte_memcpy(NETMAP_BUF(r, r->slot[index].buf_idx), data, length); +} + +/** + * Given a Netmap ring and a slot index for that ring, construct a dpdk mbuf + * from the data held in the buffer associated with the slot. + * Allocation/deallocation of the dpdk mbuf are the responsability of the + * caller. + * Note that mbuf chains are not supported. + */ +static void +slot_to_mbuf(struct netmap_ring *r, uint32_t index, struct rte_mbuf *mbuf) +{ + char *data; + uint16_t length; + + rte_pktmbuf_reset(mbuf); + length = r->slot[index].len; + data = rte_pktmbuf_append(mbuf, length); + + if (data != NULL) + rte_memcpy(data, NETMAP_BUF(r, r->slot[index].buf_idx), length); +} + +static int32_t +fd_reserve(void) +{ + uint32_t i; + + for (i = 0; i != RTE_DIM(fd_port) && fd_port[i].port != FD_PORT_FREE; + i++) + ; + + if (i == RTE_DIM(fd_port)) + return -ENOMEM; + + fd_port[i].port = FD_PORT_RSRV; + return IDX_TO_FD(i); +} + +static int32_t +fd_release(int32_t fd) +{ + uint32_t idx, port; + + idx = FD_TO_IDX(fd); + + if (!FD_VALID(fd) || (port = fd_port[idx].port) == FD_PORT_FREE) + return -EINVAL; + + /* if we still have a valid port attached, release the port */ + if (port < RTE_DIM(ports) && ports[port].fd == idx) { + netmap_unregif(idx, port); + } + + fd_port[idx].port = FD_PORT_FREE; + return 0; +} + +static int +check_nmreq(struct nmreq *req, uint8_t *port) +{ + int32_t rc; + uint8_t portid; + + if (req == NULL) + return -EINVAL; + + if (req->nr_version != NETMAP_API) { + req->nr_version = NETMAP_API; + return -EINVAL; + } + + if ((rc = ifname_to_portid(req->nr_name, &portid)) != 0) { + RTE_LOG(ERR, USER1, "Invalid interface name:\"%s\" " + "in NIOCGINFO call\n", req->nr_name); + return rc; + } + + if (ports[portid].pool == NULL) { + RTE_LOG(ERR, USER1, "Misconfigured portid %hhu\n", portid); + return -EINVAL; + } + + *port = portid; + return 0; +} + +/** + * Simulate a Netmap NIOCGINFO ioctl: given a struct nmreq holding an interface + * name (a port number in our case), fill the struct nmreq in with advisory + * information about the interface: number of rings and their size, total memory + * required in the map, ... + * Those are preconfigured using rte_eth_{,tx,rx}conf and + * rte_netmap_port_conf structures + * and calls to rte_netmap_init_port() in the Netmap application. + */ +static int +ioctl_niocginfo(__rte_unused int fd, void * param) +{ + uint8_t portid; + struct nmreq *req; + int32_t rc; + + req = (struct nmreq *)param; + if ((rc = check_nmreq(req, &portid)) != 0) + return rc; + + req->nr_tx_rings = (uint16_t)(ports[portid].nr_tx_rings - 1); + req->nr_rx_rings = (uint16_t)(ports[portid].nr_rx_rings - 1); + req->nr_tx_slots = ports[portid].nr_tx_slots; + req->nr_rx_slots = ports[portid].nr_rx_slots; + + /* in current implementation we have all NETIFs shared aone region. */ + req->nr_memsize = netmap.mem_sz; + req->nr_offset = 0; + + return 0; +} + +static void +netmap_ring_setup(struct netmap_ring *ring, uint8_t port, uint32_t ringid, + uint32_t num_slots) +{ + uint32_t j; + + ring->buf_ofs = netmap.buf_start - (uintptr_t)ring; + ring->num_slots = num_slots; + ring->cur = 0; + ring->reserved = 0; + ring->nr_buf_size = netmap.conf.max_bufsz; + ring->flags = 0; + ring->ts.tv_sec = 0; + ring->ts.tv_usec = 0; + + for (j = 0; j < ring->num_slots; j++) { + ring->slot[j].buf_idx = BUF_IDX(port, ringid, j); + ring->slot[j].len = 0; + ring->flags = 0; + } +} + +static int +netmap_regif(struct nmreq *req, uint32_t idx, uint8_t port) +{ + struct netmap_if *nmif; + struct netmap_ring *ring; + uint32_t i, slots, start_ring; + int32_t rc; + + if (ports[port].fd < RTE_DIM(fd_port)) { + RTE_LOG(ERR, USER1, "port %hhu already in use by fd: %u\n", + port, IDX_TO_FD(ports[port].fd)); + return -EBUSY; + } + if (fd_port[idx].port != FD_PORT_RSRV) { + RTE_LOG(ERR, USER1, "fd: %u is misconfigured\n", + IDX_TO_FD(idx)); + return -EBUSY; + } + + nmif = ports[port].nmif; + + /* setup netmap_if fields. */ + memset(nmif, 0, netmap.netif_memsz); + + /* only ALL rings supported right now. */ + if (req->nr_ringid != 0) + return -EINVAL; + + snprintf(nmif->ni_name, sizeof(nmif->ni_name), "%s", req->nr_name); + nmif->ni_version = req->nr_version; + + /* Netmap uses ni_(r|t)x_rings + 1 */ + nmif->ni_rx_rings = ports[port].nr_rx_rings - 1; + nmif->ni_tx_rings = ports[port].nr_tx_rings - 1; + + /* + * Setup TX rings and slots. + * Refer to the comments in netmap.h for details + */ + + slots = 0; + for (i = 0; i < nmif->ni_tx_rings + 1; i++) { + + nmif->ring_ofs[i] = NETMAP_IF_RING_OFS(i, + PORT_NUM_RINGS, slots); + + ring = NETMAP_TXRING(nmif, i); + netmap_ring_setup(ring, port, i, ports[port].nr_tx_slots); + ring->avail = ring->num_slots; + + slots += ports[port].nr_tx_slots; + } + + /* + * Setup RX rings and slots. + * Refer to the comments in netmap.h for details + */ + + start_ring = i; + + for (; i < nmif->ni_rx_rings + 1 + start_ring; i++) { + + nmif->ring_ofs[i] = NETMAP_IF_RING_OFS(i, + PORT_NUM_RINGS, slots); + + ring = NETMAP_RXRING(nmif, (i - start_ring)); + netmap_ring_setup(ring, port, i, ports[port].nr_rx_slots); + ring->avail = 0; + + slots += ports[port].nr_rx_slots; + } + + if ((rc = rte_eth_dev_start(port)) < 0) { + RTE_LOG(ERR, USER1, + "Couldn't start ethernet device %s (error %d)\n", + req->nr_name, rc); + return rc; + } + + /* setup fdi <--> port relationtip. */ + ports[port].fd = idx; + fd_port[idx].port = port; + + req->nr_memsize = netmap.mem_sz; + req->nr_offset = (uintptr_t)nmif - (uintptr_t)netmap.mem; + + return 0; +} + +/** + * Simulate a Netmap NIOCREGIF ioctl: + */ +static int +ioctl_niocregif(int32_t fd, void * param) +{ + uint8_t portid; + int32_t rc; + uint32_t idx; + struct nmreq *req; + + req = (struct nmreq *)param; + if ((rc = check_nmreq(req, &portid)) != 0) + return rc; + + idx = FD_TO_IDX(fd); + + rte_spinlock_lock(&netmap_lock); + rc = netmap_regif(req, idx, portid); + rte_spinlock_unlock(&netmap_lock); + + return rc; +} + +static void +netmap_unregif(uint32_t idx, uint32_t port) +{ + fd_port[idx].port = FD_PORT_RSRV; + ports[port].fd = UINT32_MAX; + rte_eth_dev_stop((uint8_t)port); +} + +/** + * Simulate a Netmap NIOCUNREGIF ioctl: put an interface running in Netmap + * mode back in "normal" mode. In our case, we just stop the port associated + * with this file descriptor. + */ +static int +ioctl_niocunregif(int fd) +{ + uint32_t idx, port; + int32_t rc; + + idx = FD_TO_IDX(fd); + + rte_spinlock_lock(&netmap_lock); + + port = fd_port[idx].port; + if (port < RTE_DIM(ports) && ports[port].fd == idx) { + netmap_unregif(idx, port); + rc = 0; + } else { + RTE_LOG(ERR, USER1, + "%s: %d is not associated with valid port\n", + __func__, fd); + rc = -EINVAL; + } + + rte_spinlock_unlock(&netmap_lock); + return rc; +} + +/** + * A call to rx_sync_ring will try to fill a Netmap RX ring with as many + * packets as it can hold coming from its dpdk port. + */ +static inline int +rx_sync_ring(struct netmap_ring *ring, uint8_t port, uint16_t ring_number, + uint16_t max_burst) +{ + int32_t i, n_rx; + uint16_t burst_size; + uint32_t cur_slot, n_free_slots; + struct rte_mbuf *rx_mbufs[COMPAT_NETMAP_MAX_BURST]; + + n_free_slots = ring->num_slots - (ring->avail + ring->reserved); + n_free_slots = RTE_MIN(n_free_slots, max_burst); + cur_slot = (ring->cur + ring->avail) & (ring->num_slots - 1); + + while (n_free_slots) { + burst_size = (uint16_t)RTE_MIN(n_free_slots, RTE_DIM(rx_mbufs)); + + /* receive up to burst_size packets from the NIC's queue */ + n_rx = rte_eth_rx_burst(port, ring_number, rx_mbufs, + burst_size); + + if (n_rx == 0) + return 0; + if (unlikely(n_rx < 0)) + return -1; + + /* Put those n_rx packets in the Netmap structures */ + for (i = 0; i < n_rx ; i++) { + mbuf_to_slot(rx_mbufs[i], ring, cur_slot); + rte_pktmbuf_free(rx_mbufs[i]); + cur_slot = NETMAP_RING_NEXT(ring, cur_slot); + } + + /* Update the Netmap ring structure to reflect the change */ + ring->avail += n_rx; + n_free_slots -= n_rx; + } + + return 0; +} + +static inline int +rx_sync_if(uint32_t port) +{ + uint16_t burst; + uint32_t i, rc; + struct netmap_if *nifp; + struct netmap_ring *r; + + nifp = ports[port].nmif; + burst = ports[port].rx_burst; + rc = 0; + + for (i = 0; i < nifp->ni_rx_rings + 1; i++) { + r = NETMAP_RXRING(nifp, i); + rx_sync_ring(r, (uint8_t)port, (uint16_t)i, burst); + rc += r->avail; + } + + return rc; +} + +/** + * Simulate a Netmap NIOCRXSYNC ioctl: + */ +static int +ioctl_niocrxsync(int fd) +{ + uint32_t idx, port; + + idx = FD_TO_IDX(fd); + if ((port = fd_port[idx].port) < RTE_DIM(ports) && + ports[port].fd == idx) { + return rx_sync_if(fd_port[idx].port); + } else { + return -EINVAL; + } +} + +/** + * A call to tx_sync_ring will try to empty a Netmap TX ring by converting its + * buffers into rte_mbufs and sending them out on the rings's dpdk port. + */ +static int +tx_sync_ring(struct netmap_ring *ring, uint8_t port, uint16_t ring_number, + struct rte_mempool *pool, uint16_t max_burst) +{ + uint32_t i, n_tx; + uint16_t burst_size; + uint32_t cur_slot, n_used_slots; + struct rte_mbuf *tx_mbufs[COMPAT_NETMAP_MAX_BURST]; + + n_used_slots = ring->num_slots - ring->avail; + n_used_slots = RTE_MIN(n_used_slots, max_burst); + cur_slot = (ring->cur + ring->avail) & (ring->num_slots - 1); + + while (n_used_slots) { + burst_size = (uint16_t)RTE_MIN(n_used_slots, RTE_DIM(tx_mbufs)); + + for (i = 0; i < burst_size; i++) { + tx_mbufs[i] = rte_pktmbuf_alloc(pool); + if (tx_mbufs[i] == NULL) + goto err; + + slot_to_mbuf(ring, cur_slot, tx_mbufs[i]); + cur_slot = NETMAP_RING_NEXT(ring, cur_slot); + } + + n_tx = rte_eth_tx_burst(port, ring_number, tx_mbufs, + burst_size); + + /* Update the Netmap ring structure to reflect the change */ + ring->avail += n_tx; + n_used_slots -= n_tx; + + /* Return the mbufs that failed to transmit to their pool */ + if (unlikely(n_tx != burst_size)) { + for (i = n_tx; i < burst_size; i++) + rte_pktmbuf_free(tx_mbufs[i]); + break; + } + } + + return 0; + +err: + for (; i == 0; --i) + rte_pktmbuf_free(tx_mbufs[i]); + + RTE_LOG(ERR, USER1, + "Couldn't get mbuf from mempool is the mempool too small?\n"); + return -1; +} + +static int +tx_sync_if(uint32_t port) +{ + uint16_t burst; + uint32_t i, rc; + struct netmap_if *nifp; + struct netmap_ring *r; + struct rte_mempool *mp; + + nifp = ports[port].nmif; + mp = ports[port].pool; + burst = ports[port].tx_burst; + rc = 0; + + for (i = 0; i < nifp->ni_tx_rings + 1; i++) { + r = NETMAP_TXRING(nifp, i); + tx_sync_ring(r, (uint8_t)port, (uint16_t)i, mp, burst); + rc += r->avail; + } + + return rc; +} + +/** + * Simulate a Netmap NIOCTXSYNC ioctl: + */ +static inline int +ioctl_nioctxsync(int fd) +{ + uint32_t idx, port; + + idx = FD_TO_IDX(fd); + if ((port = fd_port[idx].port) < RTE_DIM(ports) && + ports[port].fd == idx) { + return tx_sync_if(fd_port[idx].port); + } else { + return -EINVAL; + } +} + +/** + * Give the library a mempool of rte_mbufs with which it can do the + * rte_mbuf <--> netmap slot conversions. + */ +int +rte_netmap_init(const struct rte_netmap_conf *conf) +{ + size_t buf_ofs, nmif_sz, sz; + size_t port_rings, port_slots, port_bufs; + uint32_t i, port_num; + + port_num = RTE_MAX_ETHPORTS; + port_rings = 2 * conf->max_rings; + port_slots = port_rings * conf->max_slots; + port_bufs = port_slots; + + nmif_sz = NETMAP_IF_RING_OFS(port_rings, port_rings, port_slots); + sz = nmif_sz * port_num; + + buf_ofs = RTE_ALIGN_CEIL(sz, RTE_CACHE_LINE_SIZE); + sz = buf_ofs + port_bufs * conf->max_bufsz * port_num; + + if (sz > UINT32_MAX || + (netmap.mem = rte_zmalloc_socket(__func__, sz, + RTE_CACHE_LINE_SIZE, conf->socket_id)) == NULL) { + RTE_LOG(ERR, USER1, "%s: failed to allocate %zu bytes\n", + __func__, sz); + return -ENOMEM; + } + + netmap.mem_sz = sz; + netmap.netif_memsz = nmif_sz; + netmap.buf_start = (uintptr_t)netmap.mem + buf_ofs; + netmap.conf = *conf; + + rte_spinlock_init(&netmap_lock); + + /* Mark all ports as unused and set NETIF pointer. */ + for (i = 0; i != RTE_DIM(ports); i++) { + ports[i].fd = UINT32_MAX; + ports[i].nmif = (struct netmap_if *) + ((uintptr_t)netmap.mem + nmif_sz * i); + } + + /* Mark all fd_ports as unused. */ + for (i = 0; i != RTE_DIM(fd_port); i++) { + fd_port[i].port = FD_PORT_FREE; + } + + return 0; +} + + +int +rte_netmap_init_port(uint8_t portid, const struct rte_netmap_port_conf *conf) +{ + int32_t ret; + uint16_t i; + uint16_t rx_slots, tx_slots; + + if (conf == NULL || + portid >= RTE_DIM(ports) || + conf->nr_tx_rings > netmap.conf.max_rings || + conf->nr_rx_rings > netmap.conf.max_rings) { + RTE_LOG(ERR, USER1, "%s(%hhu): invalid parameters\n", + __func__, portid); + return -EINVAL; + } + + rx_slots = (uint16_t)rte_align32pow2(conf->nr_rx_slots); + tx_slots = (uint16_t)rte_align32pow2(conf->nr_tx_slots); + + if (tx_slots > netmap.conf.max_slots || + rx_slots > netmap.conf.max_slots) { + RTE_LOG(ERR, USER1, "%s(%hhu): invalid parameters\n", + __func__, portid); + return -EINVAL; + } + + ret = rte_eth_dev_configure(portid, conf->nr_rx_rings, + conf->nr_tx_rings, conf->eth_conf); + + if (ret < 0) { + RTE_LOG(ERR, USER1, "Couldn't configure port %hhu\n", portid); + return ret; + } + + for (i = 0; i < conf->nr_tx_rings; i++) { + ret = rte_eth_tx_queue_setup(portid, i, tx_slots, + conf->socket_id, NULL); + + if (ret < 0) { + RTE_LOG(ERR, USER1, + "Couldn't configure TX queue %"PRIu16" of " + "port %"PRIu8"\n", + i, portid); + return ret; + } + + ret = rte_eth_rx_queue_setup(portid, i, rx_slots, + conf->socket_id, NULL, conf->pool); + + if (ret < 0) { + RTE_LOG(ERR, USER1, + "Couldn't configure RX queue %"PRIu16" of " + "port %"PRIu8"\n", + i, portid); + return ret; + } + } + + /* copy config to the private storage. */ + ports[portid].eth_conf = conf->eth_conf[0]; + ports[portid].pool = conf->pool; + ports[portid].socket_id = conf->socket_id; + ports[portid].nr_tx_rings = conf->nr_tx_rings; + ports[portid].nr_rx_rings = conf->nr_rx_rings; + ports[portid].nr_tx_slots = tx_slots; + ports[portid].nr_rx_slots = rx_slots; + ports[portid].tx_burst = conf->tx_burst; + ports[portid].rx_burst = conf->rx_burst; + + return 0; +} + +int +rte_netmap_close(__rte_unused int fd) +{ + int32_t rc; + + rte_spinlock_lock(&netmap_lock); + rc = fd_release(fd); + rte_spinlock_unlock(&netmap_lock); + + if (rc < 0) { + errno =-rc; + rc = -1; + } + return rc; +} + +int rte_netmap_ioctl(int fd, uint32_t op, void *param) +{ + int ret; + + if (!FD_VALID(fd)) { + errno = EBADF; + return -1; + } + + switch (op) { + + case NIOCGINFO: + ret = ioctl_niocginfo(fd, param); + break; + + case NIOCREGIF: + ret = ioctl_niocregif(fd, param); + break; + + case NIOCUNREGIF: + ret = ioctl_niocunregif(fd); + break; + + case NIOCRXSYNC: + ret = ioctl_niocrxsync(fd); + break; + + case NIOCTXSYNC: + ret = ioctl_nioctxsync(fd); + break; + + default: + ret = -ENOTTY; + } + + if (ret < 0) { + errno = -ret; + ret = -1; + } else { + ret = 0; + } + + return ret; +} + +void * +rte_netmap_mmap(void *addr, size_t length, + int prot, int flags, int fd, off_t offset) +{ + static const int cprot = PROT_WRITE | PROT_READ; + + if (!FD_VALID(fd) || length + offset > netmap.mem_sz || + (prot & cprot) != cprot || + ((flags & MAP_FIXED) != 0 && addr != NULL)) { + + errno = EINVAL; + return MAP_FAILED; + } + + return (void *)((uintptr_t)netmap.mem + (uintptr_t)offset); +} + +/** + * Return a "fake" file descriptor with a value above RLIMIT_NOFILE so that + * any attempt to use that file descriptor with the usual API will fail. + */ +int +rte_netmap_open(__rte_unused const char *pathname, __rte_unused int flags) +{ + int fd; + + rte_spinlock_lock(&netmap_lock); + fd = fd_reserve(); + rte_spinlock_unlock(&netmap_lock); + + if (fd < 0) { + errno = -fd; + fd = -1; + } + return fd; +} + +/** + * Doesn't support timeout other than 0 or infinite (negative) timeout + */ +int +rte_netmap_poll(struct pollfd *fds, nfds_t nfds, int timeout) +{ + int32_t count_it, ret; + uint32_t i, idx, port; + uint32_t want_rx, want_tx; + + ret = 0; + do { + for (i = 0; i < nfds; i++) { + + count_it = 0; + + if (!FD_VALID(fds[i].fd) || fds[i].events == 0) { + fds[i].revents = 0; + continue; + } + + idx = FD_TO_IDX(fds[i].fd); + if ((port = fd_port[idx].port) >= RTE_DIM(ports) || + ports[port].fd != idx) { + + fds[i].revents |= POLLERR; + ret++; + continue; + } + + want_rx = fds[i].events & (POLLIN | POLLRDNORM); + want_tx = fds[i].events & (POLLOUT | POLLWRNORM); + + if (want_rx && rx_sync_if(port) > 0) { + fds[i].revents = (uint16_t) + (fds[i].revents | want_rx); + count_it = 1; + } + if (want_tx && tx_sync_if(port) > 0) { + fds[i].revents = (uint16_t) + (fds[i].revents | want_tx); + count_it = 1; + } + + ret += count_it; + } + } + while ((ret == 0 && timeout < 0) || timeout); + + return ret; +} diff --git a/examples/netmap_compat/lib/compat_netmap.h b/examples/netmap_compat/lib/compat_netmap.h new file mode 100644 index 00000000..3dc7a2f4 --- /dev/null +++ b/examples/netmap_compat/lib/compat_netmap.h @@ -0,0 +1,80 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_COMPAT_NETMAP_H_ + +#include <poll.h> +#include <linux/ioctl.h> +#include <net/if.h> + +#include <rte_ethdev.h> +#include <rte_mempool.h> + +#include "netmap.h" +#include "netmap_user.h" + +/** + * One can overwrite Netmap macros here as needed + */ + +struct rte_netmap_conf { + int32_t socket_id; + uint32_t max_rings; /* number of rings(queues) per netmap_if(port) */ + uint32_t max_slots; /* number of slots(descriptors) per netmap ring. */ + uint16_t max_bufsz; /* size of each netmap buffer. */ +}; + +struct rte_netmap_port_conf { + struct rte_eth_conf *eth_conf; + struct rte_mempool *pool; + int32_t socket_id; + uint16_t nr_tx_rings; + uint16_t nr_rx_rings; + uint32_t nr_tx_slots; + uint32_t nr_rx_slots; + uint16_t tx_burst; + uint16_t rx_burst; +}; + +int rte_netmap_init(const struct rte_netmap_conf *conf); +int rte_netmap_init_port(uint8_t portid, + const struct rte_netmap_port_conf *conf); + +int rte_netmap_close(int fd); +int rte_netmap_ioctl(int fd, uint32_t op, void *param); +int rte_netmap_open(const char *pathname, int flags); +int rte_netmap_poll(struct pollfd *fds, nfds_t nfds, int timeout); +void *rte_netmap_mmap(void *addr, size_t length, int prot, int flags, int fd, + off_t offset); + +#endif /* _RTE_COMPAT_NETMAP_H_ */ diff --git a/examples/netmap_compat/netmap/netmap.h b/examples/netmap_compat/netmap/netmap.h new file mode 100644 index 00000000..677c8a9f --- /dev/null +++ b/examples/netmap_compat/netmap/netmap.h @@ -0,0 +1,289 @@ +/* + * Copyright (C) 2011 Matteo Landi, Luigi Rizzo. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the + * distribution. + * + * 3. Neither the name of the authors nor the names of their contributors + * may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY MATTEO LANDI AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL MATTEO LANDI OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * $FreeBSD: head/sys/net/netmap.h 231198 2012-02-08 11:43:29Z luigi $ + * $Id: netmap.h 10879 2012-04-12 22:48:59Z luigi $ + * + * Definitions of constants and the structures used by the netmap + * framework, for the part visible to both kernel and userspace. + * Detailed info on netmap is available with "man netmap" or at + * + * http://info.iet.unipi.it/~luigi/netmap/ + */ + +#ifndef _NET_NETMAP_H_ +#define _NET_NETMAP_H_ + +/* + * --- Netmap data structures --- + * + * The data structures used by netmap are shown below. Those in + * capital letters are in an mmapp()ed area shared with userspace, + * while others are private to the kernel. + * Shared structures do not contain pointers but only memory + * offsets, so that addressing is portable between kernel and userspace. + + + softc ++----------------+ +| standard fields| +| if_pspare[0] ----------+ ++----------------+ | + | ++----------------+<------+ +|(netmap_adapter)| +| | netmap_kring +| tx_rings *--------------------------------->+---------------+ +| | netmap_kring | ring *---------. +| rx_rings *--------->+---------------+ | nr_hwcur | | ++----------------+ | ring *--------. | nr_hwavail | V + | nr_hwcur | | | selinfo | | + | nr_hwavail | | +---------------+ . + | selinfo | | | ... | . + +---------------+ | |(ntx+1 entries)| + | .... | | | | + |(nrx+1 entries)| | +---------------+ + | | | + KERNEL +---------------+ | + | + ==================================================================== + | + USERSPACE | NETMAP_RING + +---->+-------------+ + / | cur | + NETMAP_IF (nifp, one per file desc.) / | avail | + +---------------+ / | buf_ofs | + | ni_tx_rings | / +=============+ + | ni_rx_rings | / | buf_idx | slot[0] + | | / | len, flags | + | | / +-------------+ + +===============+ / | buf_idx | slot[1] + | txring_ofs[0] | (rel.to nifp)--' | len, flags | + | txring_ofs[1] | +-------------+ + (num_rings+1 entries) (nr_num_slots entries) + | txring_ofs[n] | | buf_idx | slot[n-1] + +---------------+ | len, flags | + | rxring_ofs[0] | +-------------+ + | rxring_ofs[1] | + (num_rings+1 entries) + | txring_ofs[n] | + +---------------+ + + * The private descriptor ('softc' or 'adapter') of each interface + * is extended with a "struct netmap_adapter" containing netmap-related + * info (see description in dev/netmap/netmap_kernel.h. + * Among other things, tx_rings and rx_rings point to the arrays of + * "struct netmap_kring" which in turn reache the various + * "struct netmap_ring", shared with userspace. + + * The NETMAP_RING is the userspace-visible replica of the NIC ring. + * Each slot has the index of a buffer, its length and some flags. + * In user space, the buffer address is computed as + * (char *)ring + buf_ofs + index*NETMAP_BUF_SIZE + * In the kernel, buffers do not necessarily need to be contiguous, + * and the virtual and physical addresses are derived through + * a lookup table. + * To associate a different buffer to a slot, applications must + * write the new index in buf_idx, and set NS_BUF_CHANGED flag to + * make sure that the kernel updates the hardware ring as needed. + * + * Normally the driver is not requested to report the result of + * transmissions (this can dramatically speed up operation). + * However the user may request to report completion by setting + * NS_REPORT. + */ +struct netmap_slot { + uint32_t buf_idx; /* buffer index */ + uint16_t len; /* packet length, to be copied to/from the hw ring */ + uint16_t flags; /* buf changed, etc. */ +#define NS_BUF_CHANGED 0x0001 /* must resync the map, buffer changed */ +#define NS_REPORT 0x0002 /* ask the hardware to report results + * e.g. by generating an interrupt + */ +}; + +/* + * Netmap representation of a TX or RX ring (also known as "queue"). + * This is a queue implemented as a fixed-size circular array. + * At the software level, two fields are important: avail and cur. + * + * In TX rings: + * avail indicates the number of slots available for transmission. + * It is updated by the kernel after every netmap system call. + * It MUST BE decremented by the application when it appends a + * packet. + * cur indicates the slot to use for the next packet + * to send (i.e. the "tail" of the queue). + * It MUST BE incremented by the application before + * netmap system calls to reflect the number of newly + * sent packets. + * It is checked by the kernel on netmap system calls + * (normally unmodified by the kernel unless invalid). + * + * The kernel side of netmap uses two additional fields in its own + * private ring structure, netmap_kring: + * nr_hwcur is a copy of nr_cur on an NIOCTXSYNC. + * nr_hwavail is the number of slots known as available by the + * hardware. It is updated on an INTR (inc by the + * number of packets sent) and on a NIOCTXSYNC + * (decrease by nr_cur - nr_hwcur) + * A special case, nr_hwavail is -1 if the transmit + * side is idle (no pending transmits). + * + * In RX rings: + * avail is the number of packets available (possibly 0). + * It MUST BE decremented by the application when it consumes + * a packet, and it is updated to nr_hwavail on a NIOCRXSYNC + * cur indicates the first slot that contains a packet not + * processed yet (the "head" of the queue). + * It MUST BE incremented by the software when it consumes + * a packet. + * reserved indicates the number of buffers before 'cur' + * that the application has still in use. Normally 0, + * it MUST BE incremented by the application when it + * does not return the buffer immediately, and decremented + * when the buffer is finally freed. + * + * The kernel side of netmap uses two additional fields in the kring: + * nr_hwcur is a copy of nr_cur on an NIOCRXSYNC + * nr_hwavail is the number of packets available. It is updated + * on INTR (inc by the number of new packets arrived) + * and on NIOCRXSYNC (decreased by nr_cur - nr_hwcur). + * + * DATA OWNERSHIP/LOCKING: + * The netmap_ring is owned by the user program and it is only + * accessed or modified in the upper half of the kernel during + * a system call. + * + * The netmap_kring is only modified by the upper half of the kernel. + */ +struct netmap_ring { + /* + * nr_buf_base_ofs is meant to be used through macros. + * It contains the offset of the buffer region from this + * descriptor. + */ + ssize_t buf_ofs; + uint32_t num_slots; /* number of slots in the ring. */ + uint32_t avail; /* number of usable slots */ + uint32_t cur; /* 'current' r/w position */ + uint32_t reserved; /* not refilled before current */ + + uint16_t nr_buf_size; + uint16_t flags; +#define NR_TIMESTAMP 0x0002 /* set timestamp on *sync() */ + + struct timeval ts; /* time of last *sync() */ + + /* the slots follow. This struct has variable size */ + struct netmap_slot slot[0]; /* array of slots. */ +}; + + +/* + * Netmap representation of an interface and its queue(s). + * There is one netmap_if for each file descriptor on which we want + * to select/poll. We assume that on each interface has the same number + * of receive and transmit queues. + * select/poll operates on one or all pairs depending on the value of + * nmr_queueid passed on the ioctl. + */ +struct netmap_if { + char ni_name[IFNAMSIZ]; /* name of the interface. */ + u_int ni_version; /* API version, currently unused */ + u_int ni_rx_rings; /* number of rx rings */ + u_int ni_tx_rings; /* if zero, same as ni_rx_rings */ + /* + * The following array contains the offset of each netmap ring + * from this structure. The first ni_tx_queues+1 entries refer + * to the tx rings, the next ni_rx_queues+1 refer to the rx rings + * (the last entry in each block refers to the host stack rings). + * The area is filled up by the kernel on NIOCREG, + * and then only read by userspace code. + */ + ssize_t ring_ofs[0]; +}; + +#ifndef NIOCREGIF +/* + * ioctl names and related fields + * + * NIOCGINFO takes a struct ifreq, the interface name is the input, + * the outputs are number of queues and number of descriptor + * for each queue (useful to set number of threads etc.). + * + * NIOCREGIF takes an interface name within a struct ifreq, + * and activates netmap mode on the interface (if possible). + * + * NIOCUNREGIF unregisters the interface associated to the fd. + * + * NIOCTXSYNC, NIOCRXSYNC synchronize tx or rx queues, + * whose identity is set in NIOCREGIF through nr_ringid + */ + +/* + * struct nmreq overlays a struct ifreq + */ +struct nmreq { + char nr_name[IFNAMSIZ]; + uint32_t nr_version; /* API version */ +#define NETMAP_API 3 /* current version */ + uint32_t nr_offset; /* nifp offset in the shared region */ + uint32_t nr_memsize; /* size of the shared region */ + uint32_t nr_tx_slots; /* slots in tx rings */ + uint32_t nr_rx_slots; /* slots in rx rings */ + uint16_t nr_tx_rings; /* number of tx rings */ + uint16_t nr_rx_rings; /* number of rx rings */ + uint16_t nr_ringid; /* ring(s) we care about */ +#define NETMAP_HW_RING 0x4000 /* low bits indicate one hw ring */ +#define NETMAP_SW_RING 0x2000 /* process the sw ring */ +#define NETMAP_NO_TX_POLL 0x1000 /* no automatic txsync on poll */ +#define NETMAP_RING_MASK 0xfff /* the ring number */ + uint16_t spare1; + uint32_t spare2[4]; +}; + +/* + * FreeBSD uses the size value embedded in the _IOWR to determine + * how much to copy in/out. So we need it to match the actual + * data structure we pass. We put some spares in the structure + * to ease compatibility with other versions + */ +#define NIOCGINFO _IOWR('i', 145, struct nmreq) /* return IF info */ +#define NIOCREGIF _IOWR('i', 146, struct nmreq) /* interface register */ +#define NIOCUNREGIF _IO('i', 147) /* interface unregister */ +#define NIOCTXSYNC _IO('i', 148) /* sync tx queues */ +#define NIOCRXSYNC _IO('i', 149) /* sync rx queues */ +#endif /* !NIOCREGIF */ + +#endif /* _NET_NETMAP_H_ */ diff --git a/examples/netmap_compat/netmap/netmap_user.h b/examples/netmap_compat/netmap/netmap_user.h new file mode 100644 index 00000000..f369592e --- /dev/null +++ b/examples/netmap_compat/netmap/netmap_user.h @@ -0,0 +1,95 @@ +/* + * Copyright (C) 2011 Matteo Landi, Luigi Rizzo. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the + * distribution. + * + * 3. Neither the name of the authors nor the names of their contributors + * may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY MATTEO LANDI AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL MATTEO LANDI OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * $FreeBSD: head/sys/net/netmap_user.h 231198 2012-02-08 11:43:29Z luigi $ + * $Id: netmap_user.h 10879 2012-04-12 22:48:59Z luigi $ + * + * This header contains the macros used to manipulate netmap structures + * and packets in userspace. See netmap(4) for more information. + * + * The address of the struct netmap_if, say nifp, is computed from the + * value returned from ioctl(.., NIOCREG, ...) and the mmap region: + * ioctl(fd, NIOCREG, &req); + * mem = mmap(0, ... ); + * nifp = NETMAP_IF(mem, req.nr_nifp); + * (so simple, we could just do it manually) + * + * From there: + * struct netmap_ring *NETMAP_TXRING(nifp, index) + * struct netmap_ring *NETMAP_RXRING(nifp, index) + * we can access ring->nr_cur, ring->nr_avail, ring->nr_flags + * + * ring->slot[i] gives us the i-th slot (we can access + * directly plen, flags, bufindex) + * + * char *buf = NETMAP_BUF(ring, index) returns a pointer to + * the i-th buffer + * + * Since rings are circular, we have macros to compute the next index + * i = NETMAP_RING_NEXT(ring, i); + */ + +#ifndef _NET_NETMAP_USER_H_ +#define _NET_NETMAP_USER_H_ + +#define NETMAP_IF(b, o) (struct netmap_if *)((char *)(b) + (o)) + +#define NETMAP_TXRING(nifp, index) \ + ((struct netmap_ring *)((char *)(nifp) + \ + (nifp)->ring_ofs[index] ) ) + +#define NETMAP_RXRING(nifp, index) \ + ((struct netmap_ring *)((char *)(nifp) + \ + (nifp)->ring_ofs[index + (nifp)->ni_tx_rings + 1] ) ) + +#define NETMAP_BUF(ring, index) \ + ((char *)(ring) + (ring)->buf_ofs + ((index)*(ring)->nr_buf_size)) + +#define NETMAP_BUF_IDX(ring, buf) \ + ( ((char *)(buf) - ((char *)(ring) + (ring)->buf_ofs) ) / \ + (ring)->nr_buf_size ) + +#define NETMAP_RING_NEXT(r, i) \ + ((i)+1 == (r)->num_slots ? 0 : (i) + 1 ) + +#define NETMAP_RING_FIRST_RESERVED(r) \ + ( (r)->cur < (r)->reserved ? \ + (r)->cur + (r)->num_slots - (r)->reserved : \ + (r)->cur - (r)->reserved ) + +/* + * Return 1 if the given tx ring is empty. + */ +#define NETMAP_TX_RING_EMPTY(r) ((r)->avail >= (r)->num_slots - 1) + +#endif /* _NET_NETMAP_USER_H_ */ diff --git a/examples/packet_ordering/Makefile b/examples/packet_ordering/Makefile new file mode 100644 index 00000000..9e080a30 --- /dev/null +++ b/examples/packet_ordering/Makefile @@ -0,0 +1,50 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overridden by command line or environment +RTE_TARGET ?= x86_64-ivshmem-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +# binary name +APP = packet_ordering + +# all source are stored in SRCS-y +SRCS-y := main.c + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) + +include $(RTE_SDK)/mk/rte.extapp.mk diff --git a/examples/packet_ordering/main.c b/examples/packet_ordering/main.c new file mode 100644 index 00000000..15bb900c --- /dev/null +++ b/examples/packet_ordering/main.c @@ -0,0 +1,756 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <signal.h> +#include <getopt.h> + +#include <rte_eal.h> +#include <rte_common.h> +#include <rte_errno.h> +#include <rte_ethdev.h> +#include <rte_lcore.h> +#include <rte_malloc.h> +#include <rte_mbuf.h> +#include <rte_mempool.h> +#include <rte_ring.h> +#include <rte_reorder.h> + +#define RX_DESC_PER_QUEUE 128 +#define TX_DESC_PER_QUEUE 512 + +#define MAX_PKTS_BURST 32 +#define REORDER_BUFFER_SIZE 8192 +#define MBUF_PER_POOL 65535 +#define MBUF_POOL_CACHE_SIZE 250 + +#define RING_SIZE 16384 + +/* uncomment below line to enable debug logs */ +/* #define DEBUG */ + +#ifdef DEBUG +#define LOG_LEVEL RTE_LOG_DEBUG +#define LOG_DEBUG(log_type, fmt, args...) RTE_LOG(DEBUG, log_type, fmt, ##args) +#else +#define LOG_LEVEL RTE_LOG_INFO +#define LOG_DEBUG(log_type, fmt, args...) do {} while (0) +#endif + +/* Macros for printing using RTE_LOG */ +#define RTE_LOGTYPE_REORDERAPP RTE_LOGTYPE_USER1 + +unsigned int portmask; +unsigned int disable_reorder; +volatile uint8_t quit_signal; + +static struct rte_mempool *mbuf_pool; + +static struct rte_eth_conf port_conf_default; + +struct worker_thread_args { + struct rte_ring *ring_in; + struct rte_ring *ring_out; +}; + +struct send_thread_args { + struct rte_ring *ring_in; + struct rte_reorder_buffer *buffer; +}; + +volatile struct app_stats { + struct { + uint64_t rx_pkts; + uint64_t enqueue_pkts; + uint64_t enqueue_failed_pkts; + } rx __rte_cache_aligned; + + struct { + uint64_t dequeue_pkts; + uint64_t enqueue_pkts; + uint64_t enqueue_failed_pkts; + } wkr __rte_cache_aligned; + + struct { + uint64_t dequeue_pkts; + /* Too early pkts transmitted directly w/o reordering */ + uint64_t early_pkts_txtd_woro; + /* Too early pkts failed from direct transmit */ + uint64_t early_pkts_tx_failed_woro; + uint64_t ro_tx_pkts; + uint64_t ro_tx_failed_pkts; + } tx __rte_cache_aligned; +} app_stats; + +/** + * Get the last enabled lcore ID + * + * @return + * The last enabled lcore ID. + */ +static unsigned int +get_last_lcore_id(void) +{ + int i; + + for (i = RTE_MAX_LCORE - 1; i >= 0; i--) + if (rte_lcore_is_enabled(i)) + return i; + return 0; +} + +/** + * Get the previous enabled lcore ID + * @param id + * The current lcore ID + * @return + * The previous enabled lcore ID or the current lcore + * ID if it is the first available core. + */ +static unsigned int +get_previous_lcore_id(unsigned int id) +{ + int i; + + for (i = id - 1; i >= 0; i--) + if (rte_lcore_is_enabled(i)) + return i; + return id; +} + +static inline void +pktmbuf_free_bulk(struct rte_mbuf *mbuf_table[], unsigned n) +{ + unsigned int i; + + for (i = 0; i < n; i++) + rte_pktmbuf_free(mbuf_table[i]); +} + +/* display usage */ +static void +print_usage(const char *prgname) +{ + printf("%s [EAL options] -- -p PORTMASK\n" + " -p PORTMASK: hexadecimal bitmask of ports to configure\n", + prgname); +} + +static int +parse_portmask(const char *portmask) +{ + unsigned long pm; + char *end = NULL; + + /* parse hexadecimal string */ + pm = strtoul(portmask, &end, 16); + if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0')) + return -1; + + if (pm == 0) + return -1; + + return pm; +} + +/* Parse the argument given in the command line of the application */ +static int +parse_args(int argc, char **argv) +{ + int opt; + int option_index; + char **argvopt; + char *prgname = argv[0]; + static struct option lgopts[] = { + {"disable-reorder", 0, 0, 0}, + {NULL, 0, 0, 0} + }; + + argvopt = argv; + + while ((opt = getopt_long(argc, argvopt, "p:", + lgopts, &option_index)) != EOF) { + switch (opt) { + /* portmask */ + case 'p': + portmask = parse_portmask(optarg); + if (portmask == 0) { + printf("invalid portmask\n"); + print_usage(prgname); + return -1; + } + break; + /* long options */ + case 0: + if (!strcmp(lgopts[option_index].name, "disable-reorder")) { + printf("reorder disabled\n"); + disable_reorder = 1; + } + break; + default: + print_usage(prgname); + return -1; + } + } + if (optind <= 1) { + print_usage(prgname); + return -1; + } + + argv[optind-1] = prgname; + optind = 0; /* reset getopt lib */ + return 0; +} + +/* + * Tx buffer error callback + */ +static void +flush_tx_error_callback(struct rte_mbuf **unsent, uint16_t count, + void *userdata __rte_unused) { + + /* free the mbufs which failed from transmit */ + app_stats.tx.ro_tx_failed_pkts += count; + LOG_DEBUG(REORDERAPP, "%s:Packet loss with tx_burst\n", __func__); + pktmbuf_free_bulk(unsent, count); + +} + +static inline int +free_tx_buffers(struct rte_eth_dev_tx_buffer *tx_buffer[]) { + const uint8_t nb_ports = rte_eth_dev_count(); + unsigned port_id; + + /* initialize buffers for all ports */ + for (port_id = 0; port_id < nb_ports; port_id++) { + /* skip ports that are not enabled */ + if ((portmask & (1 << port_id)) == 0) + continue; + + rte_free(tx_buffer[port_id]); + } + return 0; +} + +static inline int +configure_tx_buffers(struct rte_eth_dev_tx_buffer *tx_buffer[]) +{ + const uint8_t nb_ports = rte_eth_dev_count(); + unsigned port_id; + int ret; + + /* initialize buffers for all ports */ + for (port_id = 0; port_id < nb_ports; port_id++) { + /* skip ports that are not enabled */ + if ((portmask & (1 << port_id)) == 0) + continue; + + /* Initialize TX buffers */ + tx_buffer[port_id] = rte_zmalloc_socket("tx_buffer", + RTE_ETH_TX_BUFFER_SIZE(MAX_PKTS_BURST), 0, + rte_eth_dev_socket_id(port_id)); + if (tx_buffer[port_id] == NULL) + rte_exit(EXIT_FAILURE, "Cannot allocate buffer for tx on port %u\n", + (unsigned) port_id); + + rte_eth_tx_buffer_init(tx_buffer[port_id], MAX_PKTS_BURST); + + ret = rte_eth_tx_buffer_set_err_callback(tx_buffer[port_id], + flush_tx_error_callback, NULL); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Cannot set error callback for " + "tx buffer on port %u\n", (unsigned) port_id); + } + return 0; +} + +static inline int +configure_eth_port(uint8_t port_id) +{ + struct ether_addr addr; + const uint16_t rxRings = 1, txRings = 1; + const uint8_t nb_ports = rte_eth_dev_count(); + int ret; + uint16_t q; + + if (port_id > nb_ports) + return -1; + + ret = rte_eth_dev_configure(port_id, rxRings, txRings, &port_conf_default); + if (ret != 0) + return ret; + + for (q = 0; q < rxRings; q++) { + ret = rte_eth_rx_queue_setup(port_id, q, RX_DESC_PER_QUEUE, + rte_eth_dev_socket_id(port_id), NULL, + mbuf_pool); + if (ret < 0) + return ret; + } + + for (q = 0; q < txRings; q++) { + ret = rte_eth_tx_queue_setup(port_id, q, TX_DESC_PER_QUEUE, + rte_eth_dev_socket_id(port_id), NULL); + if (ret < 0) + return ret; + } + + ret = rte_eth_dev_start(port_id); + if (ret < 0) + return ret; + + rte_eth_macaddr_get(port_id, &addr); + printf("Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8 + " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n", + (unsigned)port_id, + addr.addr_bytes[0], addr.addr_bytes[1], + addr.addr_bytes[2], addr.addr_bytes[3], + addr.addr_bytes[4], addr.addr_bytes[5]); + + rte_eth_promiscuous_enable(port_id); + + return 0; +} + +static void +print_stats(void) +{ + const uint8_t nb_ports = rte_eth_dev_count(); + unsigned i; + struct rte_eth_stats eth_stats; + + printf("\nRX thread stats:\n"); + printf(" - Pkts rxd: %"PRIu64"\n", + app_stats.rx.rx_pkts); + printf(" - Pkts enqd to workers ring: %"PRIu64"\n", + app_stats.rx.enqueue_pkts); + + printf("\nWorker thread stats:\n"); + printf(" - Pkts deqd from workers ring: %"PRIu64"\n", + app_stats.wkr.dequeue_pkts); + printf(" - Pkts enqd to tx ring: %"PRIu64"\n", + app_stats.wkr.enqueue_pkts); + printf(" - Pkts enq to tx failed: %"PRIu64"\n", + app_stats.wkr.enqueue_failed_pkts); + + printf("\nTX stats:\n"); + printf(" - Pkts deqd from tx ring: %"PRIu64"\n", + app_stats.tx.dequeue_pkts); + printf(" - Ro Pkts transmitted: %"PRIu64"\n", + app_stats.tx.ro_tx_pkts); + printf(" - Ro Pkts tx failed: %"PRIu64"\n", + app_stats.tx.ro_tx_failed_pkts); + printf(" - Pkts transmitted w/o reorder: %"PRIu64"\n", + app_stats.tx.early_pkts_txtd_woro); + printf(" - Pkts tx failed w/o reorder: %"PRIu64"\n", + app_stats.tx.early_pkts_tx_failed_woro); + + for (i = 0; i < nb_ports; i++) { + rte_eth_stats_get(i, ð_stats); + printf("\nPort %u stats:\n", i); + printf(" - Pkts in: %"PRIu64"\n", eth_stats.ipackets); + printf(" - Pkts out: %"PRIu64"\n", eth_stats.opackets); + printf(" - In Errs: %"PRIu64"\n", eth_stats.ierrors); + printf(" - Out Errs: %"PRIu64"\n", eth_stats.oerrors); + printf(" - Mbuf Errs: %"PRIu64"\n", eth_stats.rx_nombuf); + } +} + +static void +int_handler(int sig_num) +{ + printf("Exiting on signal %d\n", sig_num); + quit_signal = 1; +} + +/** + * This thread receives mbufs from the port and affects them an internal + * sequence number to keep track of their order of arrival through an + * mbuf structure. + * The mbufs are then passed to the worker threads via the rx_to_workers + * ring. + */ +static int +rx_thread(struct rte_ring *ring_out) +{ + const uint8_t nb_ports = rte_eth_dev_count(); + uint32_t seqn = 0; + uint16_t i, ret = 0; + uint16_t nb_rx_pkts; + uint8_t port_id; + struct rte_mbuf *pkts[MAX_PKTS_BURST]; + + RTE_LOG(INFO, REORDERAPP, "%s() started on lcore %u\n", __func__, + rte_lcore_id()); + + while (!quit_signal) { + + for (port_id = 0; port_id < nb_ports; port_id++) { + if ((portmask & (1 << port_id)) != 0) { + + /* receive packets */ + nb_rx_pkts = rte_eth_rx_burst(port_id, 0, + pkts, MAX_PKTS_BURST); + if (nb_rx_pkts == 0) { + LOG_DEBUG(REORDERAPP, + "%s():Received zero packets\n", __func__); + continue; + } + app_stats.rx.rx_pkts += nb_rx_pkts; + + /* mark sequence number */ + for (i = 0; i < nb_rx_pkts; ) + pkts[i++]->seqn = seqn++; + + /* enqueue to rx_to_workers ring */ + ret = rte_ring_enqueue_burst(ring_out, (void *) pkts, + nb_rx_pkts); + app_stats.rx.enqueue_pkts += ret; + if (unlikely(ret < nb_rx_pkts)) { + app_stats.rx.enqueue_failed_pkts += + (nb_rx_pkts-ret); + pktmbuf_free_bulk(&pkts[ret], nb_rx_pkts - ret); + } + } + } + } + return 0; +} + +/** + * This thread takes bursts of packets from the rx_to_workers ring and + * Changes the input port value to output port value. And feds it to + * workers_to_tx + */ +static int +worker_thread(void *args_ptr) +{ + const uint8_t nb_ports = rte_eth_dev_count(); + uint16_t i, ret = 0; + uint16_t burst_size = 0; + struct worker_thread_args *args; + struct rte_mbuf *burst_buffer[MAX_PKTS_BURST] = { NULL }; + struct rte_ring *ring_in, *ring_out; + const unsigned xor_val = (nb_ports > 1); + + args = (struct worker_thread_args *) args_ptr; + ring_in = args->ring_in; + ring_out = args->ring_out; + + RTE_LOG(INFO, REORDERAPP, "%s() started on lcore %u\n", __func__, + rte_lcore_id()); + + while (!quit_signal) { + + /* dequeue the mbufs from rx_to_workers ring */ + burst_size = rte_ring_dequeue_burst(ring_in, + (void *)burst_buffer, MAX_PKTS_BURST); + if (unlikely(burst_size == 0)) + continue; + + __sync_fetch_and_add(&app_stats.wkr.dequeue_pkts, burst_size); + + /* just do some operation on mbuf */ + for (i = 0; i < burst_size;) + burst_buffer[i++]->port ^= xor_val; + + /* enqueue the modified mbufs to workers_to_tx ring */ + ret = rte_ring_enqueue_burst(ring_out, (void *)burst_buffer, burst_size); + __sync_fetch_and_add(&app_stats.wkr.enqueue_pkts, ret); + if (unlikely(ret < burst_size)) { + /* Return the mbufs to their respective pool, dropping packets */ + __sync_fetch_and_add(&app_stats.wkr.enqueue_failed_pkts, + (int)burst_size - ret); + pktmbuf_free_bulk(&burst_buffer[ret], burst_size - ret); + } + } + return 0; +} + +/** + * Dequeue mbufs from the workers_to_tx ring and reorder them before + * transmitting. + */ +static int +send_thread(struct send_thread_args *args) +{ + int ret; + unsigned int i, dret; + uint16_t nb_dq_mbufs; + uint8_t outp; + unsigned sent; + struct rte_mbuf *mbufs[MAX_PKTS_BURST]; + struct rte_mbuf *rombufs[MAX_PKTS_BURST] = {NULL}; + static struct rte_eth_dev_tx_buffer *tx_buffer[RTE_MAX_ETHPORTS]; + + RTE_LOG(INFO, REORDERAPP, "%s() started on lcore %u\n", __func__, rte_lcore_id()); + + configure_tx_buffers(tx_buffer); + + while (!quit_signal) { + + /* deque the mbufs from workers_to_tx ring */ + nb_dq_mbufs = rte_ring_dequeue_burst(args->ring_in, + (void *)mbufs, MAX_PKTS_BURST); + + if (unlikely(nb_dq_mbufs == 0)) + continue; + + app_stats.tx.dequeue_pkts += nb_dq_mbufs; + + for (i = 0; i < nb_dq_mbufs; i++) { + /* send dequeued mbufs for reordering */ + ret = rte_reorder_insert(args->buffer, mbufs[i]); + + if (ret == -1 && rte_errno == ERANGE) { + /* Too early pkts should be transmitted out directly */ + LOG_DEBUG(REORDERAPP, "%s():Cannot reorder early packet " + "direct enqueuing to TX\n", __func__); + outp = mbufs[i]->port; + if ((portmask & (1 << outp)) == 0) { + rte_pktmbuf_free(mbufs[i]); + continue; + } + if (rte_eth_tx_burst(outp, 0, (void *)mbufs[i], 1) != 1) { + rte_pktmbuf_free(mbufs[i]); + app_stats.tx.early_pkts_tx_failed_woro++; + } else + app_stats.tx.early_pkts_txtd_woro++; + } else if (ret == -1 && rte_errno == ENOSPC) { + /** + * Early pkts just outside of window should be dropped + */ + rte_pktmbuf_free(mbufs[i]); + } + } + + /* + * drain MAX_PKTS_BURST of reordered + * mbufs for transmit + */ + dret = rte_reorder_drain(args->buffer, rombufs, MAX_PKTS_BURST); + for (i = 0; i < dret; i++) { + + struct rte_eth_dev_tx_buffer *outbuf; + uint8_t outp1; + + outp1 = rombufs[i]->port; + /* skip ports that are not enabled */ + if ((portmask & (1 << outp1)) == 0) { + rte_pktmbuf_free(rombufs[i]); + continue; + } + + outbuf = tx_buffer[outp1]; + sent = rte_eth_tx_buffer(outp1, 0, outbuf, rombufs[i]); + if (sent) + app_stats.tx.ro_tx_pkts += sent; + } + } + + free_tx_buffers(tx_buffer); + + return 0; +} + +/** + * Dequeue mbufs from the workers_to_tx ring and transmit them + */ +static int +tx_thread(struct rte_ring *ring_in) +{ + uint32_t i, dqnum; + uint8_t outp; + unsigned sent; + struct rte_mbuf *mbufs[MAX_PKTS_BURST]; + struct rte_eth_dev_tx_buffer *outbuf; + static struct rte_eth_dev_tx_buffer *tx_buffer[RTE_MAX_ETHPORTS]; + + RTE_LOG(INFO, REORDERAPP, "%s() started on lcore %u\n", __func__, + rte_lcore_id()); + + configure_tx_buffers(tx_buffer); + + while (!quit_signal) { + + /* deque the mbufs from workers_to_tx ring */ + dqnum = rte_ring_dequeue_burst(ring_in, + (void *)mbufs, MAX_PKTS_BURST); + + if (unlikely(dqnum == 0)) + continue; + + app_stats.tx.dequeue_pkts += dqnum; + + for (i = 0; i < dqnum; i++) { + outp = mbufs[i]->port; + /* skip ports that are not enabled */ + if ((portmask & (1 << outp)) == 0) { + rte_pktmbuf_free(mbufs[i]); + continue; + } + + outbuf = tx_buffer[outp]; + sent = rte_eth_tx_buffer(outp, 0, outbuf, mbufs[i]); + if (sent) + app_stats.tx.ro_tx_pkts += sent; + } + } + + return 0; +} + +int +main(int argc, char **argv) +{ + int ret; + unsigned nb_ports; + unsigned int lcore_id, last_lcore_id, master_lcore_id; + uint8_t port_id; + uint8_t nb_ports_available; + struct worker_thread_args worker_args = {NULL, NULL}; + struct send_thread_args send_args = {NULL, NULL}; + struct rte_ring *rx_to_workers; + struct rte_ring *workers_to_tx; + + /* catch ctrl-c so we can print on exit */ + signal(SIGINT, int_handler); + + /* Initialize EAL */ + ret = rte_eal_init(argc, argv); + if (ret < 0) + return -1; + + argc -= ret; + argv += ret; + + /* Parse the application specific arguments */ + ret = parse_args(argc, argv); + if (ret < 0) + return -1; + + /* Check if we have enought cores */ + if (rte_lcore_count() < 3) + rte_exit(EXIT_FAILURE, "Error, This application needs at " + "least 3 logical cores to run:\n" + "1 lcore for packet RX\n" + "1 lcore for packet TX\n" + "and at least 1 lcore for worker threads\n"); + + nb_ports = rte_eth_dev_count(); + if (nb_ports == 0) + rte_exit(EXIT_FAILURE, "Error: no ethernet ports detected\n"); + if (nb_ports != 1 && (nb_ports & 1)) + rte_exit(EXIT_FAILURE, "Error: number of ports must be even, except " + "when using a single port\n"); + + mbuf_pool = rte_pktmbuf_pool_create("mbuf_pool", MBUF_PER_POOL, + MBUF_POOL_CACHE_SIZE, 0, RTE_MBUF_DEFAULT_BUF_SIZE, + rte_socket_id()); + if (mbuf_pool == NULL) + rte_exit(EXIT_FAILURE, "%s\n", rte_strerror(rte_errno)); + + nb_ports_available = nb_ports; + + /* initialize all ports */ + for (port_id = 0; port_id < nb_ports; port_id++) { + /* skip ports that are not enabled */ + if ((portmask & (1 << port_id)) == 0) { + printf("\nSkipping disabled port %d\n", port_id); + nb_ports_available--; + continue; + } + /* init port */ + printf("Initializing port %u... done\n", (unsigned) port_id); + + if (configure_eth_port(port_id) != 0) + rte_exit(EXIT_FAILURE, "Cannot initialize port %"PRIu8"\n", + port_id); + } + + if (!nb_ports_available) { + rte_exit(EXIT_FAILURE, + "All available ports are disabled. Please set portmask.\n"); + } + + /* Create rings for inter core communication */ + rx_to_workers = rte_ring_create("rx_to_workers", RING_SIZE, rte_socket_id(), + RING_F_SP_ENQ); + if (rx_to_workers == NULL) + rte_exit(EXIT_FAILURE, "%s\n", rte_strerror(rte_errno)); + + workers_to_tx = rte_ring_create("workers_to_tx", RING_SIZE, rte_socket_id(), + RING_F_SC_DEQ); + if (workers_to_tx == NULL) + rte_exit(EXIT_FAILURE, "%s\n", rte_strerror(rte_errno)); + + if (!disable_reorder) { + send_args.buffer = rte_reorder_create("PKT_RO", rte_socket_id(), + REORDER_BUFFER_SIZE); + if (send_args.buffer == NULL) + rte_exit(EXIT_FAILURE, "%s\n", rte_strerror(rte_errno)); + } + + last_lcore_id = get_last_lcore_id(); + master_lcore_id = rte_get_master_lcore(); + + worker_args.ring_in = rx_to_workers; + worker_args.ring_out = workers_to_tx; + + /* Start worker_thread() on all the available slave cores but the last 1 */ + for (lcore_id = 0; lcore_id <= get_previous_lcore_id(last_lcore_id); lcore_id++) + if (rte_lcore_is_enabled(lcore_id) && lcore_id != master_lcore_id) + rte_eal_remote_launch(worker_thread, (void *)&worker_args, + lcore_id); + + if (disable_reorder) { + /* Start tx_thread() on the last slave core */ + rte_eal_remote_launch((lcore_function_t *)tx_thread, workers_to_tx, + last_lcore_id); + } else { + send_args.ring_in = workers_to_tx; + /* Start send_thread() on the last slave core */ + rte_eal_remote_launch((lcore_function_t *)send_thread, + (void *)&send_args, last_lcore_id); + } + + /* Start rx_thread() on the master core */ + rx_thread(rx_to_workers); + + RTE_LCORE_FOREACH_SLAVE(lcore_id) { + if (rte_eal_wait_lcore(lcore_id) < 0) + return -1; + } + + print_stats(); + return 0; +} diff --git a/examples/performance-thread/Makefile b/examples/performance-thread/Makefile new file mode 100644 index 00000000..d19f8489 --- /dev/null +++ b/examples/performance-thread/Makefile @@ -0,0 +1,49 @@ +# BSD LICENSE +# +# Copyright(c) 2015 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overridden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +ifneq ($(CONFIG_RTE_ARCH),"x86_64") +$(error This application is only supported for x86_64 targets) +endif + +DIRS-y += l3fwd-thread +DIRS-y += pthread_shim + + +include $(RTE_SDK)/mk/rte.extsubdir.mk diff --git a/examples/performance-thread/common/arch/x86/ctx.c b/examples/performance-thread/common/arch/x86/ctx.c new file mode 100644 index 00000000..1e8e2717 --- /dev/null +++ b/examples/performance-thread/common/arch/x86/ctx.c @@ -0,0 +1,93 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * https://github.com/halayli/lthread which carries the following license. + * + * Copyright (C) 2012, Hasan Alayli <halayli@gmail.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + + + +#if defined(__x86_64__) +__asm__ ( +".text\n" +".p2align 4,,15\n" +".globl ctx_switch\n" +".globl _ctx_switch\n" +"ctx_switch:\n" +"_ctx_switch:\n" +" movq %rsp, 0(%rsi) # save stack_pointer\n" +" movq %rbp, 8(%rsi) # save frame_pointer\n" +" movq (%rsp), %rax # save insn_pointer\n" +" movq %rax, 16(%rsi)\n" +" movq %rbx, 24(%rsi)\n # save rbx,r12-r15\n" +" movq 24(%rdi), %rbx\n" +" movq %r15, 56(%rsi)\n" +" movq %r14, 48(%rsi)\n" +" movq 48(%rdi), %r14\n" +" movq 56(%rdi), %r15\n" +" movq %r13, 40(%rsi)\n" +" movq %r12, 32(%rsi)\n" +" movq 32(%rdi), %r12\n" +" movq 40(%rdi), %r13\n" +" movq 0(%rdi), %rsp # restore stack_pointer\n" +" movq 16(%rdi), %rax # restore insn_pointer\n" +" movq 8(%rdi), %rbp # restore frame_pointer\n" +" movq %rax, (%rsp)\n" +" ret\n" + ); +#else +#pragma GCC error "__x86_64__ is not defined" +#endif diff --git a/examples/performance-thread/common/arch/x86/ctx.h b/examples/performance-thread/common/arch/x86/ctx.h new file mode 100644 index 00000000..03860508 --- /dev/null +++ b/examples/performance-thread/common/arch/x86/ctx.h @@ -0,0 +1,57 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +#ifndef CTX_H +#define CTX_H + +/* + * CPU context registers + */ +struct ctx { + void *rsp; /* 0 */ + void *rbp; /* 8 */ + void *rip; /* 16 */ + void *rbx; /* 24 */ + void *r12; /* 32 */ + void *r13; /* 40 */ + void *r14; /* 48 */ + void *r15; /* 56 */ +}; + + +void +ctx_switch(struct ctx *new_ctx, struct ctx *curr_ctx); + + +#endif /* RTE_CTX_H_ */ diff --git a/examples/performance-thread/common/common.mk b/examples/performance-thread/common/common.mk new file mode 100644 index 00000000..d3de5fc6 --- /dev/null +++ b/examples/performance-thread/common/common.mk @@ -0,0 +1,42 @@ +# +# BSD LICENSE +# +# Copyright(c) 2015 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# list the C files belonhing to the lthread subsystem, these are common to all lthread apps +SRCS-y += ../common/lthread.c \ + ../common/lthread_sched.c \ + ../common/lthread_cond.c \ + ../common/lthread_tls.c \ + ../common/lthread_mutex.c \ + ../common/lthread_diag.c \ + ../common/arch/x86/ctx.c + +INCLUDES += -I$(RTE_SDK)/examples/performance-thread/common/ -I$(RTE_SDK)/examples/performance-thread/common/arch/x86/ diff --git a/examples/performance-thread/common/lthread.c b/examples/performance-thread/common/lthread.c new file mode 100644 index 00000000..8fbff737 --- /dev/null +++ b/examples/performance-thread/common/lthread.c @@ -0,0 +1,529 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Some portions of this software is derived from the + * https://github.com/halayli/lthread which carrys the following license. + * + * Copyright (C) 2012, Hasan Alayli <halayli@gmail.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#define RTE_MEM 1 + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <stddef.h> +#include <limits.h> +#include <inttypes.h> +#include <unistd.h> +#include <pthread.h> +#include <fcntl.h> +#include <sys/time.h> +#include <sys/mman.h> + +#include <rte_log.h> +#include <ctx.h> + +#include "lthread_api.h" +#include "lthread.h" +#include "lthread_timer.h" +#include "lthread_tls.h" +#include "lthread_objcache.h" +#include "lthread_diag.h" + + +/* + * This function gets called after an lthread function has returned. + */ +void _lthread_exit_handler(struct lthread *lt) +{ + + lt->state |= BIT(ST_LT_EXITED); + + if (!(lt->state & BIT(ST_LT_DETACH))) { + /* thread is this not explicitly detached + * it must be joinable, so we call lthread_exit(). + */ + lthread_exit(NULL); + } + + /* if we get here the thread is detached so we can reschedule it, + * allowing the scheduler to free it + */ + _reschedule(); +} + + +/* + * Free resources allocated to an lthread + */ +void _lthread_free(struct lthread *lt) +{ + + DIAG_EVENT(lt, LT_DIAG_LTHREAD_FREE, lt, 0); + + /* invoke any user TLS destructor functions */ + _lthread_tls_destroy(lt); + + /* free memory allocated for TLS defined using RTE_PER_LTHREAD macros */ + if (sizeof(void *) < (uint64_t)RTE_PER_LTHREAD_SECTION_SIZE) + _lthread_objcache_free(lt->tls->root_sched->per_lthread_cache, + lt->per_lthread_data); + + /* free pthread style TLS memory */ + _lthread_objcache_free(lt->tls->root_sched->tls_cache, lt->tls); + + /* free the stack */ + _lthread_objcache_free(lt->stack_container->root_sched->stack_cache, + lt->stack_container); + + /* now free the thread */ + _lthread_objcache_free(lt->root_sched->lthread_cache, lt); + +} + +/* + * Allocate a stack and maintain a cache of stacks + */ +struct lthread_stack *_stack_alloc(void) +{ + struct lthread_stack *s; + + s = _lthread_objcache_alloc((THIS_SCHED)->stack_cache); + LTHREAD_ASSERT(s != NULL); + + s->root_sched = THIS_SCHED; + s->stack_size = LTHREAD_MAX_STACK_SIZE; + return s; +} + +/* + * Execute a ctx by invoking the start function + * On return call an exit handler if the user has provided one + */ +static void _lthread_exec(void *arg) +{ + struct lthread *lt = (struct lthread *)arg; + + /* invoke the contexts function */ + lt->fun(lt->arg); + /* do exit handling */ + if (lt->exit_handler != NULL) + lt->exit_handler(lt); +} + +/* + * Initialize an lthread + * Set its function, args, and exit handler + */ +void +_lthread_init(struct lthread *lt, + lthread_func_t fun, void *arg, lthread_exit_func exit_handler) +{ + + /* set ctx func and args */ + lt->fun = fun; + lt->arg = arg; + lt->exit_handler = exit_handler; + + /* set initial state */ + lt->birth = _sched_now(); + lt->state = BIT(ST_LT_INIT); + lt->join = LT_JOIN_INITIAL; +} + +/* + * set the lthread stack + */ +void _lthread_set_stack(struct lthread *lt, void *stack, size_t stack_size) +{ + char *stack_top = (char *)stack + stack_size; + void **s = (void **)stack_top; + + /* set stack */ + lt->stack = stack; + lt->stack_size = stack_size; + + /* set initial context */ + s[-3] = NULL; + s[-2] = (void *)lt; + lt->ctx.rsp = (void *)(stack_top - (4 * sizeof(void *))); + lt->ctx.rbp = (void *)(stack_top - (3 * sizeof(void *))); + lt->ctx.rip = (void *)_lthread_exec; +} + +/* + * Create an lthread on the current scheduler + * If there is no current scheduler on this pthread then first create one + */ +int +lthread_create(struct lthread **new_lt, int lcore_id, + lthread_func_t fun, void *arg) +{ + if ((new_lt == NULL) || (fun == NULL)) + return POSIX_ERRNO(EINVAL); + + if (lcore_id < 0) + lcore_id = rte_lcore_id(); + else if (lcore_id > LTHREAD_MAX_LCORES) + return POSIX_ERRNO(EINVAL); + + struct lthread *lt = NULL; + + if (THIS_SCHED == NULL) { + THIS_SCHED = _lthread_sched_create(0); + if (THIS_SCHED == NULL) { + perror("Failed to create scheduler"); + return POSIX_ERRNO(EAGAIN); + } + } + + /* allocate a thread structure */ + lt = _lthread_objcache_alloc((THIS_SCHED)->lthread_cache); + if (lt == NULL) + return POSIX_ERRNO(EAGAIN); + + bzero(lt, sizeof(struct lthread)); + lt->root_sched = THIS_SCHED; + + /* set the function args and exit handlder */ + _lthread_init(lt, fun, arg, _lthread_exit_handler); + + /* put it in the ready queue */ + *new_lt = lt; + + if (lcore_id < 0) + lcore_id = rte_lcore_id(); + + DIAG_CREATE_EVENT(lt, LT_DIAG_LTHREAD_CREATE); + + rte_wmb(); + _ready_queue_insert(_lthread_sched_get(lcore_id), lt); + return 0; +} + +/* + * Schedules lthread to sleep for `nsecs` + * setting the lthread state to LT_ST_SLEEPING. + * lthread state is cleared upon resumption or expiry. + */ +static inline void _lthread_sched_sleep(struct lthread *lt, uint64_t nsecs) +{ + uint64_t state = lt->state; + uint64_t clks = _ns_to_clks(nsecs); + + if (clks) { + _timer_start(lt, clks); + lt->state = state | BIT(ST_LT_SLEEPING); + } + DIAG_EVENT(lt, LT_DIAG_LTHREAD_SLEEP, clks, 0); + _suspend(); +} + + + +/* + * Cancels any running timer. + * This can be called multiple times on the same lthread regardless if it was + * sleeping or not. + */ +int _lthread_desched_sleep(struct lthread *lt) +{ + uint64_t state = lt->state; + + if (state & BIT(ST_LT_SLEEPING)) { + _timer_stop(lt); + state &= (CLEARBIT(ST_LT_SLEEPING) & CLEARBIT(ST_LT_EXPIRED)); + lt->state = state | BIT(ST_LT_READY); + return 1; + } + return 0; +} + +/* + * set user data pointer in an lthread + */ +void lthread_set_data(void *data) +{ + if (sizeof(void *) == RTE_PER_LTHREAD_SECTION_SIZE) + THIS_LTHREAD->per_lthread_data = data; +} + +/* + * Retrieve user data pointer from an lthread + */ +void *lthread_get_data(void) +{ + return THIS_LTHREAD->per_lthread_data; +} + +/* + * Return the current lthread handle + */ +struct lthread *lthread_current(void) +{ + struct lthread_sched *sched = THIS_SCHED; + + if (sched) + return sched->current_lthread; + return NULL; +} + + + +/* + * Tasklet to cancel a thread + */ +static void +_cancel(void *arg) +{ + struct lthread *lt = (struct lthread *) arg; + + lt->state |= BIT(ST_LT_CANCELLED); + lthread_detach(); +} + + +/* + * Mark the specified as canceled + */ +int lthread_cancel(struct lthread *cancel_lt) +{ + struct lthread *lt; + + if ((cancel_lt == NULL) || (cancel_lt == THIS_LTHREAD)) + return POSIX_ERRNO(EINVAL); + + DIAG_EVENT(cancel_lt, LT_DIAG_LTHREAD_CANCEL, cancel_lt, 0); + + if (cancel_lt->sched != THIS_SCHED) { + + /* spawn task-let to cancel the thread */ + lthread_create(<, + cancel_lt->sched->lcore_id, + _cancel, + cancel_lt); + return 0; + } + cancel_lt->state |= BIT(ST_LT_CANCELLED); + return 0; +} + +/* + * Suspend the current lthread for specified time + */ +void lthread_sleep(uint64_t nsecs) +{ + struct lthread *lt = THIS_LTHREAD; + + _lthread_sched_sleep(lt, nsecs); + +} + +/* + * Suspend the current lthread for specified time + */ +void lthread_sleep_clks(uint64_t clks) +{ + struct lthread *lt = THIS_LTHREAD; + uint64_t state = lt->state; + + if (clks) { + _timer_start(lt, clks); + lt->state = state | BIT(ST_LT_SLEEPING); + } + DIAG_EVENT(lt, LT_DIAG_LTHREAD_SLEEP, clks, 0); + _suspend(); +} + +/* + * Requeue the current thread to the back of the ready queue + */ +void lthread_yield(void) +{ + struct lthread *lt = THIS_LTHREAD; + + DIAG_EVENT(lt, LT_DIAG_LTHREAD_YIELD, 0, 0); + + _ready_queue_insert(THIS_SCHED, lt); + ctx_switch(&(THIS_SCHED)->ctx, <->ctx); +} + +/* + * Exit the current lthread + * If a thread is joining pass the user pointer to it + */ +void lthread_exit(void *ptr) +{ + struct lthread *lt = THIS_LTHREAD; + + /* if thread is detached (this is not valid) just exit */ + if (lt->state & BIT(ST_LT_DETACH)) + return; + + /* There is a race between lthread_join() and lthread_exit() + * - if exit before join then we suspend and resume on join + * - if join before exit then we resume the joining thread + */ + if ((lt->join == LT_JOIN_INITIAL) + && rte_atomic64_cmpset(<->join, LT_JOIN_INITIAL, + LT_JOIN_EXITING)) { + + DIAG_EVENT(lt, LT_DIAG_LTHREAD_EXIT, 1, 0); + _suspend(); + /* set the exit value */ + if ((ptr != NULL) && (lt->lt_join->lt_exit_ptr != NULL)) + *(lt->lt_join->lt_exit_ptr) = ptr; + + /* let the joining thread know we have set the exit value */ + lt->join = LT_JOIN_EXIT_VAL_SET; + } else { + + DIAG_EVENT(lt, LT_DIAG_LTHREAD_EXIT, 0, 0); + /* set the exit value */ + if ((ptr != NULL) && (lt->lt_join->lt_exit_ptr != NULL)) + *(lt->lt_join->lt_exit_ptr) = ptr; + /* let the joining thread know we have set the exit value */ + lt->join = LT_JOIN_EXIT_VAL_SET; + _ready_queue_insert(lt->lt_join->sched, + (struct lthread *)lt->lt_join); + } + + + /* wait until the joinging thread has collected the exit value */ + while (lt->join != LT_JOIN_EXIT_VAL_READ) + _reschedule(); + + /* reset join state */ + lt->join = LT_JOIN_INITIAL; + + /* detach it so its resources can be released */ + lt->state |= (BIT(ST_LT_DETACH) | BIT(ST_LT_EXITED)); +} + +/* + * Join an lthread + * Suspend until the joined thread returns + */ +int lthread_join(struct lthread *lt, void **ptr) +{ + if (lt == NULL) + return POSIX_ERRNO(EINVAL); + + struct lthread *current = THIS_LTHREAD; + uint64_t lt_state = lt->state; + + /* invalid to join a detached thread, or a thread that is joined */ + if ((lt_state & BIT(ST_LT_DETACH)) || (lt->join == LT_JOIN_THREAD_SET)) + return POSIX_ERRNO(EINVAL); + /* pointer to the joining thread and a poingter to return a value */ + lt->lt_join = current; + current->lt_exit_ptr = ptr; + /* There is a race between lthread_join() and lthread_exit() + * - if join before exit we suspend and will resume when exit is called + * - if exit before join we resume the exiting thread + */ + if ((lt->join == LT_JOIN_INITIAL) + && rte_atomic64_cmpset(<->join, LT_JOIN_INITIAL, + LT_JOIN_THREAD_SET)) { + + DIAG_EVENT(current, LT_DIAG_LTHREAD_JOIN, lt, 1); + _suspend(); + } else { + DIAG_EVENT(current, LT_DIAG_LTHREAD_JOIN, lt, 0); + _ready_queue_insert(lt->sched, lt); + } + + /* wait for exiting thread to set return value */ + while (lt->join != LT_JOIN_EXIT_VAL_SET) + _reschedule(); + + /* collect the return value */ + if (ptr != NULL) + *ptr = *current->lt_exit_ptr; + + /* let the exiting thread proceed to exit */ + lt->join = LT_JOIN_EXIT_VAL_READ; + return 0; +} + + +/* + * Detach current lthread + * A detached thread cannot be joined + */ +void lthread_detach(void) +{ + struct lthread *lt = THIS_LTHREAD; + + DIAG_EVENT(lt, LT_DIAG_LTHREAD_DETACH, 0, 0); + + uint64_t state = lt->state; + + lt->state = state | BIT(ST_LT_DETACH); +} + +/* + * Set function name of an lthread + * this is a debug aid + */ +void lthread_set_funcname(const char *f) +{ + struct lthread *lt = THIS_LTHREAD; + + strncpy(lt->funcname, f, sizeof(lt->funcname)); + lt->funcname[sizeof(lt->funcname)-1] = 0; +} diff --git a/examples/performance-thread/common/lthread.h b/examples/performance-thread/common/lthread.h new file mode 100644 index 00000000..8c77af82 --- /dev/null +++ b/examples/performance-thread/common/lthread.h @@ -0,0 +1,99 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Some portions of this software is derived from the + * https://github.com/halayli/lthread which carrys the following license. + * + * Copyright (C) 2012, Hasan Alayli <halayli@gmail.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef LTHREAD_H_ +#define LTHREAD_H_ + +#include <rte_per_lcore.h> + +#include "lthread_api.h" +#include "lthread_diag.h" + +struct lthread; +struct lthread_sched; + +/* function to be called when a context function returns */ +typedef void (*lthread_exit_func) (struct lthread *); + +void _lthread_exit_handler(struct lthread *lt); + +void lthread_set_funcname(const char *f); + +void _lthread_sched_busy_sleep(struct lthread *lt, uint64_t nsecs); + +int _lthread_desched_sleep(struct lthread *lt); + +void _lthread_free(struct lthread *lt); + +struct lthread_sched *_lthread_sched_get(int lcore_id); + +struct lthread_stack *_stack_alloc(void); + +struct +lthread_sched *_lthread_sched_create(size_t stack_size); + +void +_lthread_init(struct lthread *lt, + lthread_func_t fun, void *arg, lthread_exit_func exit_handler); + +void _lthread_set_stack(struct lthread *lt, void *stack, size_t stack_size); + +#endif /* LTHREAD_H_ */ diff --git a/examples/performance-thread/common/lthread_api.h b/examples/performance-thread/common/lthread_api.h new file mode 100644 index 00000000..ec976103 --- /dev/null +++ b/examples/performance-thread/common/lthread_api.h @@ -0,0 +1,832 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Some portions of this software may have been derived from the + * https://github.com/halayli/lthread which carrys the following license. + * + * Copyright (C) 2012, Hasan Alayli <halayli@gmail.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/** + * @file lthread_api.h + * + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * This file contains the public API for the L-thread subsystem + * + * The L_thread subsystem provides a simple cooperative scheduler to + * enable arbitrary functions to run as cooperative threads within a + * single P-thread. + * + * The subsystem provides a P-thread like API that is intended to assist in + * reuse of legacy code written for POSIX p_threads. + * + * The L-thread subsystem relies on cooperative multitasking, as such + * an L-thread must possess frequent rescheduling points. Often these + * rescheduling points are provided transparently when the application + * invokes an L-thread API. + * + * In some applications it is possible that the program may enter a loop the + * exit condition for which depends on the action of another thread or a + * response from hardware. In such a case it is necessary to yield the thread + * periodically in the loop body, to allow other threads an opportunity to + * run. This can be done by inserting a call to lthread_yield() or + * lthread_sleep(n) in the body of the loop. + * + * If the application makes expensive / blocking system calls or does other + * work that would take an inordinate amount of time to complete, this will + * stall the cooperative scheduler resulting in very poor performance. + * + * In such cases an L-thread can be migrated temporarily to another scheduler + * running in a different P-thread on another core. When the expensive or + * blocking operation is completed it can be migrated back to the original + * scheduler. In this way other threads can continue to run on the original + * scheduler and will be completely unaffected by the blocking behaviour. + * To migrate an L-thread to another scheduler the API lthread_set_affinity() + * is provided. + * + * If L-threads that share data are running on the same core it is possible + * to design programs where mutual exclusion mechanisms to protect shared data + * can be avoided. This is due to the fact that the cooperative threads cannot + * preempt each other. + * + * There are two cases where mutual exclusion mechanisms are necessary. + * + * a) Where the L-threads sharing data are running on different cores. + * b) Where code must yield while updating data shared with another thread. + * + * The L-thread subsystem provides a set of mutex APIs to help with such + * scenarios, however excessive reliance on on these will impact performance + * and is best avoided if possible. + * + * L-threads can synchronise using a fast condition variable implementation + * that supports signal and broadcast. An L-thread running on any core can + * wait on a condition. + * + * L-threads can have L-thread local storage with an API modelled on either the + * P-thread get/set specific API or using PER_LTHREAD macros modelled on the + * RTE_PER_LCORE macros. Alternatively a simple user data pointer may be set + * and retrieved from a thread. + */ +#ifndef LTHREAD_H +#define LTHREAD_H + +#include <stdint.h> +#include <sys/socket.h> +#include <fcntl.h> +#include <netinet/in.h> + +#include <rte_cycles.h> + + +struct lthread; +struct lthread_cond; +struct lthread_mutex; + +struct lthread_condattr; +struct lthread_mutexattr; + +typedef void (*lthread_func_t) (void *); + +/* + * Define the size of stack for an lthread + * Then this is the size that will be allocated on lthread creation + * This is a fixed size and will not grow. + */ +#define LTHREAD_MAX_STACK_SIZE (1024*64) + +/** + * Define the maximum number of TLS keys that can be created + * + */ +#define LTHREAD_MAX_KEYS 1024 + +/** + * Define the maximum number of attempts to destroy an lthread's + * TLS data on thread exit + */ +#define LTHREAD_DESTRUCTOR_ITERATIONS 4 + + +/** + * Define the maximum number of lcores that will support lthreads + */ +#define LTHREAD_MAX_LCORES RTE_MAX_LCORE + +/** + * How many lthread objects to pre-allocate as the system grows + * applies to lthreads + stacks, TLS, mutexs, cond vars. + * + * @see _lthread_alloc() + * @see _cond_alloc() + * @see _mutex_alloc() + * + */ +#define LTHREAD_PREALLOC 100 + +/** + * Set the number of schedulers in the system. + * + * This function may optionally be called before starting schedulers. + * + * If the number of schedulers is not set, or set to 0 then each scheduler + * will begin scheduling lthreads immediately it is started. + + * If the number of schedulers is set to greater than 0, then each scheduler + * will wait until all schedulers have started before beginning to schedule + * lthreads. + * + * If an application wishes to have threads migrate between cores using + * lthread_set_affinity(), or join threads running on other cores using + * lthread_join(), then it is prudent to set the number of schedulers to ensure + * that all schedulers are initialised beforehand. + * + * @param num + * the number of schedulers in the system + * @return + * the number of schedulers in the system + */ +int lthread_num_schedulers_set(int num); + +/** + * Return the number of schedulers currently running + * @return + * the number of schedulers in the system + */ +int lthread_active_schedulers(void); + +/** + * Shutdown the specified scheduler + * + * This function tells the specified scheduler to + * exit if/when there is no more work to do. + * + * Note that although the scheduler will stop + * resources are not freed. + * + * @param lcore + * The lcore of the scheduler to shutdown + * + * @return + * none + */ +void lthread_scheduler_shutdown(unsigned lcore); + +/** + * Shutdown all schedulers + * + * This function tells all schedulers including the current scheduler to + * exit if/when there is no more work to do. + * + * Note that although the schedulers will stop + * resources are not freed. + * + * @return + * none + */ +void lthread_scheduler_shutdown_all(void); + +/** + * Run the lthread scheduler + * + * Runs the lthread scheduler. + * This function returns only if/when all lthreads have exited. + * This function must be the main loop of an EAL thread. + * + * @return + * none + */ + +void lthread_run(void); + +/** + * Create an lthread + * + * Creates an lthread and places it in the ready queue on a particular + * lcore. + * + * If no scheduler exists yet on the curret lcore then one is created. + * + * @param new_lt + * Pointer to an lthread pointer that will be initialized + * @param lcore + * the lcore the thread should be started on or the current clore + * -1 the current lcore + * 0 - LTHREAD_MAX_LCORES any other lcore + * @param lthread_func + * Pointer to the function the for the thread to run + * @param arg + * Pointer to args that will be passed to the thread + * + * @return + * 0 success + * EAGAIN no resources available + * EINVAL NULL thread or function pointer, or lcore_id out of range + */ +int +lthread_create(struct lthread **new_lt, + int lcore, lthread_func_t func, void *arg); + +/** + * Cancel an lthread + * + * Cancels an lthread and causes it to be terminated + * If the lthread is detached it will be freed immediately + * otherwise its resources will not be released until it is joined. + * + * @param new_lt + * Pointer to an lthread that will be cancelled + * + * @return + * 0 success + * EINVAL thread was NULL + */ +int lthread_cancel(struct lthread *lt); + +/** + * Join an lthread + * + * Joins the current thread with the specified lthread, and waits for that + * thread to exit. + * Passes an optional pointer to collect returned data. + * + * @param lt + * Pointer to the lthread to be joined + * @param ptr + * Pointer to pointer to collect returned data + * +0 * @return + * 0 success + * EINVAL lthread could not be joined. + */ +int lthread_join(struct lthread *lt, void **ptr); + +/** + * Detach an lthread + * + * Detaches the current thread + * On exit a detached lthread will be freed immediately and will not wait + * to be joined. The default state for a thread is not detached. + * + * @return + * none + */ +void lthread_detach(void); + +/** + * Exit an lthread + * + * Terminate the current thread, optionally return data. + * The data may be collected by lthread_join() + * + * After calling this function the lthread will be suspended until it is + * joined. After it is joined then its resources will be freed. + * + * @param ptr + * Pointer to pointer to data to be returned + * + * @return + * none + */ +void lthread_exit(void *val); + +/** + * Cause the current lthread to sleep for n nanoseconds + * + * The current thread will be suspended until the specified time has elapsed + * or has been exceeded. + * + * Execution will switch to the next lthread that is ready to run + * + * @param nsecs + * Number of nanoseconds to sleep + * + * @return + * none + */ +void lthread_sleep(uint64_t nsecs); + +/** + * Cause the current lthread to sleep for n cpu clock ticks + * + * The current thread will be suspended until the specified time has elapsed + * or has been exceeded. + * + * Execution will switch to the next lthread that is ready to run + * + * @param clks + * Number of clock ticks to sleep + * + * @return + * none + */ +void lthread_sleep_clks(uint64_t clks); + +/** + * Yield the current lthread + * + * The current thread will yield and execution will switch to the + * next lthread that is ready to run + * + * @return + * none + */ +void lthread_yield(void); + +/** + * Migrate the current thread to another scheduler + * + * This function migrates the current thread to another scheduler. + * Execution will switch to the next lthread that is ready to run on the + * current scheduler. The current thread will be resumed on the new scheduler. + * + * @param lcore + * The lcore to migrate to + * + * @return + * 0 success we are now running on the specified core + * EINVAL the destination lcore was not valid + */ +int lthread_set_affinity(unsigned lcore); + +/** + * Return the current lthread + * + * Returns the current lthread + * + * @return + * pointer to the current lthread + */ +struct lthread +*lthread_current(void); + +/** + * Associate user data with an lthread + * + * This function sets a user data pointer in the current lthread + * The pointer can be retrieved with lthread_get_data() + * It is the users responsibility to allocate and free any data referenced + * by the user pointer. + * + * @param data + * pointer to user data + * + * @return + * none + */ +void lthread_set_data(void *data); + +/** + * Get user data for the current lthread + * + * This function returns a user data pointer for the current lthread + * The pointer must first be set with lthread_set_data() + * It is the users responsibility to allocate and free any data referenced + * by the user pointer. + * + * @return + * pointer to user data + */ +void +*lthread_get_data(void); + +struct lthread_key; +typedef void (*tls_destructor_func) (void *); + +/** + * Create a key for lthread TLS + * + * This function is modelled on pthread_key_create + * It creates a thread-specific data key visible to all lthreads on the + * current scheduler. + * + * Key values may be used to locate thread-specific data. + * The same key value may be used by different threads, the values bound + * to the key by lthread_setspecific() are maintained on a per-thread + * basis and persist for the life of the calling thread. + * + * An optional destructor function may be associated with each key value. + * At thread exit, if a key value has a non-NULL destructor pointer, and the + * thread has a non-NULL value associated with the key, the function pointed + * to is called with the current associated value as its sole argument. + * + * @param key + * Pointer to the key to be created + * @param destructor + * Pointer to destructor function + * + * @return + * 0 success + * EINVAL the key ptr was NULL + * EAGAIN no resources available + */ +int lthread_key_create(unsigned int *key, tls_destructor_func destructor); + +/** + * Delete key for lthread TLS + * + * This function is modelled on pthread_key_delete(). + * It deletes a thread-specific data key previously returned by + * lthread_key_create(). + * The thread-specific data values associated with the key need not be NULL + * at the time that lthread_key_delete is called. + * It is the responsibility of the application to free any application + * storage or perform any cleanup actions for data structures related to the + * deleted key. This cleanup can be done either before or after + * lthread_key_delete is called. + * + * @param key + * The key to be deleted + * + * @return + * 0 Success + * EINVAL the key was invalid + */ +int lthread_key_delete(unsigned int key); + +/** + * Get lthread TLS + * + * This function is modelled on pthread_get_specific(). + * It returns the value currently bound to the specified key on behalf of the + * calling thread. Calling lthread_getspecific() with a key value not + * obtained from lthread_key_create() or after key has been deleted with + * lthread_key_delete() will result in undefined behaviour. + * lthread_getspecific() may be called from a thread-specific data destructor + * function. + * + * @param key + * The key for which data is requested + * + * @return + * Pointer to the thread specific data associated with that key + * or NULL if no data has been set. + */ +void +*lthread_getspecific(unsigned int key); + +/** + * Set lthread TLS + * + * This function is modelled on pthread_set_sepcific() + * It associates a thread-specific value with a key obtained via a previous + * call to lthread_key_create(). + * Different threads may bind different values to the same key. These values + * are typically pointers to dynamically allocated memory that have been + * reserved by the calling thread. Calling lthread_setspecific with a key + * value not obtained from lthread_key_create or after the key has been + * deleted with lthread_key_delete will result in undefined behaviour. + * + * @param key + * The key for which data is to be set + * @param key + * Pointer to the user data + * + * @return + * 0 success + * EINVAL the key was invalid + */ + +int lthread_setspecific(unsigned int key, const void *value); + +/** + * The macros below provide an alternative mechanism to access lthread local + * storage. + * + * The macros can be used to declare define and access per lthread local + * storage in a similar way to the RTE_PER_LCORE macros which control storage + * local to an lcore. + * + * Memory for per lthread variables declared in this way is allocated when the + * lthread is created and a pointer to this memory is stored in the lthread. + * The per lthread variables are accessed via the pointer + the offset of the + * particular variable. + * + * The total size of per lthread storage, and the variable offsets are found by + * defining the variables in a unique global memory section, the start and end + * of which is known. This global memory section is used only in the + * computation of the addresses of the lthread variables, and is never actually + * used to store any data. + * + * Due to the fact that variables declared this way may be scattered across + * many files, the start and end of the section and variable offsets are only + * known after linking, thus the computation of section size and variable + * addresses is performed at run time. + * + * These macros are primarily provided to aid porting of code that makes use + * of the existing RTE_PER_LCORE macros. In principle it would be more efficient + * to gather all lthread local variables into a single structure and + * set/retrieve a pointer to that struct using the alternative + * lthread_data_set/get APIs. + * + * These macros are mutually exclusive with the lthread_data_set/get APIs. + * If you define storage using these macros then the lthread_data_set/get APIs + * will not perform as expected, the lthread_data_set API does nothing, and the + * lthread_data_get API returns the start of global section. + * + */ +/* start and end of per lthread section */ +extern char __start_per_lt; +extern char __stop_per_lt; + + +#define RTE_DEFINE_PER_LTHREAD(type, name) \ +__typeof__(type)__attribute((section("per_lt"))) per_lt_##name + +/** + * Macro to declare an extern per lthread variable "var" of type "type" + */ +#define RTE_DECLARE_PER_LTHREAD(type, name) \ +extern __typeof__(type)__attribute((section("per_lt"))) per_lt_##name + +/** + * Read/write the per-lcore variable value + */ +#define RTE_PER_LTHREAD(name) ((typeof(per_lt_##name) *)\ +((char *)lthread_get_data() +\ +((char *) &per_lt_##name - &__start_per_lt))) + +/** + * Initialize a mutex + * + * This function provides a mutual exclusion device, the need for which + * can normally be avoided in a cooperative multitasking environment. + * It is provided to aid porting of legacy code originally written for + * preemptive multitasking environments such as pthreads. + * + * A mutex may be unlocked (not owned by any thread), or locked (owned by + * one thread). + * + * A mutex can never be owned by more than one thread simultaneously. + * A thread attempting to lock a mutex that is already locked by another + * thread is suspended until the owning thread unlocks the mutex. + * + * lthread_mutex_init() initializes the mutex object pointed to by mutex + * Optional mutex attributes specified in mutexattr, are reserved for future + * use and are currently ignored. + * + * If a thread calls lthread_mutex_lock() on the mutex, then if the mutex + * is currently unlocked, it becomes locked and owned by the calling + * thread, and lthread_mutex_lock returns immediately. If the mutex is + * already locked by another thread, lthread_mutex_lock suspends the calling + * thread until the mutex is unlocked. + * + * lthread_mutex_trylock behaves identically to rte_thread_mutex_lock, except + * that it does not block the calling thread if the mutex is already locked + * by another thread. + * + * lthread_mutex_unlock() unlocks the specified mutex. The mutex is assumed + * to be locked and owned by the calling thread. + * + * lthread_mutex_destroy() destroys a mutex object, freeing its resources. + * The mutex must be unlocked with nothing blocked on it before calling + * lthread_mutex_destroy. + * + * @param name + * Optional pointer to string describing the mutex + * @param mutex + * Pointer to pointer to the mutex to be initialized + * @param attribute + * Pointer to attribute - unused reserved + * + * @return + * 0 success + * EINVAL mutex was not a valid pointer + * EAGAIN insufficient resources + */ + +int +lthread_mutex_init(char *name, struct lthread_mutex **mutex, + const struct lthread_mutexattr *attr); + +/** + * Destroy a mutex + * + * This function destroys the specified mutex freeing its resources. + * The mutex must be unlocked before calling lthread_mutex_destroy. + * + * @see lthread_mutex_init() + * + * @param mutex + * Pointer to pointer to the mutex to be initialized + * + * @return + * 0 success + * EINVAL mutex was not an initialized mutex + * EBUSY mutex was still in use + */ +int lthread_mutex_destroy(struct lthread_mutex *mutex); + +/** + * Lock a mutex + * + * This function attempts to lock a mutex. + * If a thread calls lthread_mutex_lock() on the mutex, then if the mutex + * is currently unlocked, it becomes locked and owned by the calling + * thread, and lthread_mutex_lock returns immediately. If the mutex is + * already locked by another thread, lthread_mutex_lock suspends the calling + * thread until the mutex is unlocked. + * + * @see lthread_mutex_init() + * + * @param mutex + * Pointer to pointer to the mutex to be initialized + * + * @return + * 0 success + * EINVAL mutex was not an initialized mutex + * EDEADLOCK the mutex was already owned by the calling thread + */ + +int lthread_mutex_lock(struct lthread_mutex *mutex); + +/** + * Try to lock a mutex + * + * This function attempts to lock a mutex. + * lthread_mutex_trylock behaves identically to rte_thread_mutex_lock, except + * that it does not block the calling thread if the mutex is already locked + * by another thread. + * + * + * @see lthread_mutex_init() + * + * @param mutex + * Pointer to pointer to the mutex to be initialized + * + * @return + * 0 success + * EINVAL mutex was not an initialized mutex + * EBUSY the mutex was already locked by another thread + */ +int lthread_mutex_trylock(struct lthread_mutex *mutex); + +/** + * Unlock a mutex + * + * This function attempts to unlock the specified mutex. The mutex is assumed + * to be locked and owned by the calling thread. + * + * The oldest of any threads blocked on the mutex is made ready and may + * compete with any other running thread to gain the mutex, it fails it will + * be blocked again. + * + * @param mutex + * Pointer to pointer to the mutex to be initialized + * + * @return + * 0 mutex was unlocked + * EINVAL mutex was not an initialized mutex + * EPERM the mutex was not owned by the calling thread + */ + +int lthread_mutex_unlock(struct lthread_mutex *mutex); + +/** + * Initialize a condition variable + * + * This function initializes a condition variable. + * + * Condition variables can be used to communicate changes in the state of data + * shared between threads. + * + * @see lthread_cond_wait() + * + * @param name + * Pointer to optional string describing the condition variable + * @param c + * Pointer to pointer to the condition variable to be initialized + * @param attr + * Pointer to optional attribute reserved for future use, currently ignored + * + * @return + * 0 success + * EINVAL cond was not a valid pointer + * EAGAIN insufficient resources + */ +int +lthread_cond_init(char *name, struct lthread_cond **c, + const struct lthread_condattr *attr); + +/** + * Destroy a condition variable + * + * This function destroys a condition variable that was created with + * lthread_cond_init() and releases its resources. + * + * @param cond + * Pointer to pointer to the condition variable to be destroyed + * + * @return + * 0 Success + * EBUSY condition variable was still in use + * EINVAL was not an initialised condition variable + */ +int lthread_cond_destroy(struct lthread_cond *cond); + +/** + * Wait on a condition variable + * + * The function blocks the current thread waiting on the condition variable + * specified by cond. The waiting thread unblocks only after another thread + * calls lthread_cond_signal, or lthread_cond_broadcast, specifying the + * same condition variable. + * + * @param cond + * Pointer to pointer to the condition variable to be waited on + * + * @param reserved + * reserved for future use + * + * @return + * 0 The condition was signalled ( Success ) + * EINVAL was not a an initialised condition variable + */ +int lthread_cond_wait(struct lthread_cond *c, uint64_t reserved); + +/** + * Signal a condition variable + * + * The function unblocks one thread waiting for the condition variable cond. + * If no threads are waiting on cond, the rte_lthead_cond_signal() function + * has no effect. + * + * @param cond + * Pointer to pointer to the condition variable to be signalled + * + * @return + * 0 The condition was signalled ( Success ) + * EINVAL was not a an initialised condition variable + */ +int lthread_cond_signal(struct lthread_cond *c); + +/** + * Broadcast a condition variable + * + * The function unblocks all threads waiting for the condition variable cond. + * If no threads are waiting on cond, the rte_lthead_cond_broadcast() + * function has no effect. + * + * @param cond + * Pointer to pointer to the condition variable to be signalled + * + * @return + * 0 The condition was signalled ( Success ) + * EINVAL was not a an initialised condition variable + */ +int lthread_cond_broadcast(struct lthread_cond *c); + +#endif /* LTHREAD_H */ diff --git a/examples/performance-thread/common/lthread_cond.c b/examples/performance-thread/common/lthread_cond.c new file mode 100644 index 00000000..96fcce04 --- /dev/null +++ b/examples/performance-thread/common/lthread_cond.c @@ -0,0 +1,239 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Some portions of this software may have been derived from the + * https://github.com/halayli/lthread which carrys the following license. + * + * Copyright (C) 2012, Hasan Alayli <halayli@gmail.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <stddef.h> +#include <limits.h> +#include <inttypes.h> +#include <unistd.h> +#include <pthread.h> +#include <fcntl.h> +#include <sys/time.h> +#include <sys/mman.h> +#include <errno.h> + +#include <rte_log.h> +#include <rte_common.h> + +#include "lthread_api.h" +#include "lthread_diag_api.h" +#include "lthread_diag.h" +#include "lthread_int.h" +#include "lthread_sched.h" +#include "lthread_queue.h" +#include "lthread_objcache.h" +#include "lthread_timer.h" +#include "lthread_mutex.h" +#include "lthread_cond.h" + +/* + * Create a condition variable + */ +int +lthread_cond_init(char *name, struct lthread_cond **cond, + __rte_unused const struct lthread_condattr *attr) +{ + struct lthread_cond *c; + + if (cond == NULL) + return POSIX_ERRNO(EINVAL); + + /* allocate a condition variable from cache */ + c = _lthread_objcache_alloc((THIS_SCHED)->cond_cache); + + if (c == NULL) + return POSIX_ERRNO(EAGAIN); + + c->blocked = _lthread_queue_create("blocked"); + if (c->blocked == NULL) { + _lthread_objcache_free((THIS_SCHED)->cond_cache, (void *)c); + return POSIX_ERRNO(EAGAIN); + } + + if (name == NULL) + strncpy(c->name, "no name", sizeof(c->name)); + else + strncpy(c->name, name, sizeof(c->name)); + c->name[sizeof(c->name)-1] = 0; + + c->root_sched = THIS_SCHED; + + (*cond) = c; + DIAG_CREATE_EVENT((*cond), LT_DIAG_COND_CREATE); + return 0; +} + +/* + * Destroy a condition variable + */ +int lthread_cond_destroy(struct lthread_cond *c) +{ + if (c == NULL) { + DIAG_EVENT(c, LT_DIAG_COND_DESTROY, c, POSIX_ERRNO(EINVAL)); + return POSIX_ERRNO(EINVAL); + } + + /* try to free it */ + if (_lthread_queue_destroy(c->blocked) < 0) { + /* queue in use */ + DIAG_EVENT(c, LT_DIAG_COND_DESTROY, c, POSIX_ERRNO(EBUSY)); + return POSIX_ERRNO(EBUSY); + } + + /* okay free it */ + _lthread_objcache_free(c->root_sched->cond_cache, c); + DIAG_EVENT(c, LT_DIAG_COND_DESTROY, c, 0); + return 0; +} + +/* + * Wait on a condition variable + */ +int lthread_cond_wait(struct lthread_cond *c, __rte_unused uint64_t reserved) +{ + struct lthread *lt = THIS_LTHREAD; + + if (c == NULL) { + DIAG_EVENT(c, LT_DIAG_COND_WAIT, c, POSIX_ERRNO(EINVAL)); + return POSIX_ERRNO(EINVAL); + } + + + DIAG_EVENT(c, LT_DIAG_COND_WAIT, c, 0); + + /* queue the current thread in the blocked queue + * this will be written when we return to the scheduler + * to ensure that the current thread context is saved + * before any signal could result in it being dequeued and + * resumed + */ + lt->pending_wr_queue = c->blocked; + _suspend(); + + /* the condition happened */ + return 0; +} + +/* + * Signal a condition variable + * attempt to resume any blocked thread + */ +int lthread_cond_signal(struct lthread_cond *c) +{ + struct lthread *lt; + + if (c == NULL) { + DIAG_EVENT(c, LT_DIAG_COND_SIGNAL, c, POSIX_ERRNO(EINVAL)); + return POSIX_ERRNO(EINVAL); + } + + lt = _lthread_queue_remove(c->blocked); + + if (lt != NULL) { + /* okay wake up this thread */ + DIAG_EVENT(c, LT_DIAG_COND_SIGNAL, c, lt); + _ready_queue_insert((struct lthread_sched *)lt->sched, lt); + } + return 0; +} + +/* + * Broadcast a condition variable + */ +int lthread_cond_broadcast(struct lthread_cond *c) +{ + struct lthread *lt; + + if (c == NULL) { + DIAG_EVENT(c, LT_DIAG_COND_BROADCAST, c, POSIX_ERRNO(EINVAL)); + return POSIX_ERRNO(EINVAL); + } + + DIAG_EVENT(c, LT_DIAG_COND_BROADCAST, c, 0); + do { + /* drain the queue waking everybody */ + lt = _lthread_queue_remove(c->blocked); + + if (lt != NULL) { + DIAG_EVENT(c, LT_DIAG_COND_BROADCAST, c, lt); + /* wake up */ + _ready_queue_insert((struct lthread_sched *)lt->sched, + lt); + } + } while (!_lthread_queue_empty(c->blocked)); + _reschedule(); + DIAG_EVENT(c, LT_DIAG_COND_BROADCAST, c, 0); + return 0; +} + +/* + * return the diagnostic ref val stored in a condition var + */ +uint64_t +lthread_cond_diag_ref(struct lthread_cond *c) +{ + if (c == NULL) + return 0; + return c->diag_ref; +} diff --git a/examples/performance-thread/common/lthread_cond.h b/examples/performance-thread/common/lthread_cond.h new file mode 100644 index 00000000..5bd02a7d --- /dev/null +++ b/examples/performance-thread/common/lthread_cond.h @@ -0,0 +1,77 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Some portions of this software may have been derived from the + * https://github.com/halayli/lthread which carrys the following license. + * + * Copyright (C) 2012, Hasan Alayli <halayli@gmail.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef LTHREAD_COND_H_ +#define LTHREAD_COND_H_ + +#include "lthread_queue.h" + +#define MAX_COND_NAME_SIZE 64 + +struct lthread_cond { + struct lthread_queue *blocked; + struct lthread_sched *root_sched; + int count; + char name[MAX_COND_NAME_SIZE]; + uint64_t diag_ref; /* optional ref to user diag data */ +} __rte_cache_aligned; + +#endif /* LTHREAD_COND_H_ */ diff --git a/examples/performance-thread/common/lthread_diag.c b/examples/performance-thread/common/lthread_diag.c new file mode 100644 index 00000000..bce1a0c3 --- /dev/null +++ b/examples/performance-thread/common/lthread_diag.c @@ -0,0 +1,323 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <rte_log.h> +#include <rte_common.h> + +#include "lthread_diag.h" +#include "lthread_queue.h" +#include "lthread_pool.h" +#include "lthread_objcache.h" +#include "lthread_sched.h" +#include "lthread_diag_api.h" + + +/* dummy ref value of default diagnostic callback */ +static uint64_t dummy_ref; + +#define DIAG_SCHED_STATS_FORMAT \ +"core %d\n%33s %12s %12s %12s %12s\n" + +#define DIAG_CACHE_STATS_FORMAT \ +"%20s %12lu %12lu %12lu %12lu %12lu\n" + +#define DIAG_QUEUE_STATS_FORMAT \ +"%20s %12lu %12lu %12lu\n" + + +/* + * texts used in diagnostic events, + * corresponding diagnostic mask bit positions are given as comment + */ +const char *diag_event_text[] = { + "LTHREAD_CREATE ", /* 00 */ + "LTHREAD_EXIT ", /* 01 */ + "LTHREAD_JOIN ", /* 02 */ + "LTHREAD_CANCEL ", /* 03 */ + "LTHREAD_DETACH ", /* 04 */ + "LTHREAD_FREE ", /* 05 */ + "LTHREAD_SUSPENDED ", /* 06 */ + "LTHREAD_YIELD ", /* 07 */ + "LTHREAD_RESCHEDULED", /* 08 */ + "LTHREAD_SLEEP ", /* 09 */ + "LTHREAD_RESUMED ", /* 10 */ + "LTHREAD_AFFINITY ", /* 11 */ + "LTHREAD_TMR_START ", /* 12 */ + "LTHREAD_TMR_DELETE ", /* 13 */ + "LTHREAD_TMR_EXPIRED", /* 14 */ + "COND_CREATE ", /* 15 */ + "COND_DESTROY ", /* 16 */ + "COND_WAIT ", /* 17 */ + "COND_SIGNAL ", /* 18 */ + "COND_BROADCAST ", /* 19 */ + "MUTEX_CREATE ", /* 20 */ + "MUTEX_DESTROY ", /* 21 */ + "MUTEX_LOCK ", /* 22 */ + "MUTEX_TRYLOCK ", /* 23 */ + "MUTEX_BLOCKED ", /* 24 */ + "MUTEX_UNLOCKED ", /* 25 */ + "SCHED_CREATE ", /* 26 */ + "SCHED_SHUTDOWN " /* 27 */ +}; + + +/* + * set diagnostic ,ask + */ +void lthread_diagnostic_set_mask(DIAG_USED uint64_t mask) +{ +#if LTHREAD_DIAG + diag_mask = mask; +#else + RTE_LOG(INFO, LTHREAD, + "LTHREAD_DIAG is not set, see lthread_diag_api.h\n"); +#endif +} + + +/* + * Check consistency of the scheduler stats + * Only sensible run after the schedulers are stopped + * Count the number of objects lying in caches and queues + * and available in the qnode pool. + * This should be equal to the total capacity of all + * qnode pools. + */ +void +_sched_stats_consistency_check(void); +void +_sched_stats_consistency_check(void) +{ +#if LTHREAD_DIAG + int i; + struct lthread_sched *sched; + uint64_t count = 0; + uint64_t capacity = 0; + + for (i = 0; i < LTHREAD_MAX_LCORES; i++) { + sched = schedcore[i]; + if (sched == NULL) + continue; + + /* each of these queues consumes a stub node */ + count += 8; + count += DIAG_COUNT(sched->ready, size); + count += DIAG_COUNT(sched->pready, size); + count += DIAG_COUNT(sched->lthread_cache, available); + count += DIAG_COUNT(sched->stack_cache, available); + count += DIAG_COUNT(sched->tls_cache, available); + count += DIAG_COUNT(sched->per_lthread_cache, available); + count += DIAG_COUNT(sched->cond_cache, available); + count += DIAG_COUNT(sched->mutex_cache, available); + + /* the node pool does not consume a stub node */ + if (sched->qnode_pool->fast_alloc != NULL) + count++; + count += DIAG_COUNT(sched->qnode_pool, available); + + capacity += DIAG_COUNT(sched->qnode_pool, capacity); + } + if (count != capacity) { + RTE_LOG(CRIT, LTHREAD, + "Scheduler caches are inconsistent\n"); + } else { + RTE_LOG(INFO, LTHREAD, + "Scheduler caches are ok\n"); + } +#endif +} + + +#if LTHREAD_DIAG +/* + * Display node pool stats + */ +static inline void +_qnode_pool_display(DIAG_USED struct qnode_pool *p) +{ + + printf(DIAG_CACHE_STATS_FORMAT, + p->name, + DIAG_COUNT(p, rd), + DIAG_COUNT(p, wr), + DIAG_COUNT(p, available), + DIAG_COUNT(p, prealloc), + DIAG_COUNT(p, capacity)); + fflush(stdout); +} +#endif + + +#if LTHREAD_DIAG +/* + * Display queue stats + */ +static inline void +_lthread_queue_display(DIAG_USED struct lthread_queue *q) +{ +#if DISPLAY_OBJCACHE_QUEUES + printf(DIAG_QUEUE_STATS_FORMAT, + q->name, + DIAG_COUNT(q, rd), + DIAG_COUNT(q, wr), + DIAG_COUNT(q, size)); + fflush(stdout); +#else + printf("%s: queue stats disabled\n", + q->name); + +#endif +} +#endif + +#if LTHREAD_DIAG +/* + * Display objcache stats + */ +static inline void +_objcache_display(DIAG_USED struct lthread_objcache *c) +{ + + printf(DIAG_CACHE_STATS_FORMAT, + c->name, + DIAG_COUNT(c, rd), + DIAG_COUNT(c, wr), + DIAG_COUNT(c, available), + DIAG_COUNT(c, prealloc), + DIAG_COUNT(c, capacity)); + _lthread_queue_display(c->q); + fflush(stdout); +} +#endif + +/* + * Display sched stats + */ +void +lthread_sched_stats_display(void) +{ +#if LTHREAD_DIAG + int i; + struct lthread_sched *sched; + + for (i = 0; i < LTHREAD_MAX_LCORES; i++) { + sched = schedcore[i]; + if (sched != NULL) { + printf(DIAG_SCHED_STATS_FORMAT, + sched->lcore_id, + "rd", + "wr", + "present", + "nb preallocs", + "capacity"); + _lthread_queue_display(sched->ready); + _lthread_queue_display(sched->pready); + _qnode_pool_display(sched->qnode_pool); + _objcache_display(sched->lthread_cache); + _objcache_display(sched->stack_cache); + _objcache_display(sched->tls_cache); + _objcache_display(sched->per_lthread_cache); + _objcache_display(sched->cond_cache); + _objcache_display(sched->mutex_cache); + fflush(stdout); + } + } + _sched_stats_consistency_check(); +#else + RTE_LOG(INFO, LTHREAD, + "lthread diagnostics disabled\n" + "hint - set LTHREAD_DIAG in lthread_diag_api.h\n"); +#endif +} + +/* + * Defafult diagnostic callback + */ +static uint64_t +_lthread_diag_default_cb(uint64_t time, struct lthread *lt, int diag_event, + uint64_t diag_ref, const char *text, uint64_t p1, uint64_t p2) +{ + uint64_t _p2; + int lcore = (int) rte_lcore_id(); + + switch (diag_event) { + case LT_DIAG_LTHREAD_CREATE: + case LT_DIAG_MUTEX_CREATE: + case LT_DIAG_COND_CREATE: + _p2 = dummy_ref; + break; + default: + _p2 = p2; + break; + } + + printf("%"PRIu64" %d %8.8lx %8.8lx %s %8.8lx %8.8lx\n", + time, + lcore, + (uint64_t) lt, + diag_ref, + text, + p1, + _p2); + + return dummy_ref++; +} + +/* + * plug in default diag callback with mask off + */ +void _lthread_diag_ctor(void)__attribute__((constructor)); +void _lthread_diag_ctor(void) +{ + diag_cb = _lthread_diag_default_cb; + diag_mask = 0; +} + + +/* + * enable diagnostics + */ +void lthread_diagnostic_enable(DIAG_USED diag_callback cb, + DIAG_USED uint64_t mask) +{ +#if LTHREAD_DIAG + if (cb == NULL) + diag_cb = _lthread_diag_default_cb; + else + diag_cb = cb; + diag_mask = mask; +#else + RTE_LOG(INFO, LTHREAD, + "LTHREAD_DIAG is not set, see lthread_diag_api.h\n"); +#endif +} diff --git a/examples/performance-thread/common/lthread_diag.h b/examples/performance-thread/common/lthread_diag.h new file mode 100644 index 00000000..2877d311 --- /dev/null +++ b/examples/performance-thread/common/lthread_diag.h @@ -0,0 +1,132 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef LTHREAD_DIAG_H_ +#define LTHREAD_DIAG_H_ + +#include <stdint.h> +#include <inttypes.h> + +#include <rte_log.h> +#include <rte_common.h> + +#include "lthread_api.h" +#include "lthread_diag_api.h" + +extern diag_callback diag_cb; + +extern const char *diag_event_text[]; +extern uint64_t diag_mask; + +/* max size of name strings */ +#define LT_MAX_NAME_SIZE 64 + +#if LTHREAD_DIAG +#define DISPLAY_OBJCACHE_QUEUES 1 + +/* + * Generate a diagnostic trace or event in the case where an object is created. + * + * The value returned by the callback is stored in the object. + * + * @ param obj + * pointer to the object that was created + * @ param ev + * the event code + * + */ +#define DIAG_CREATE_EVENT(obj, ev) do { \ + struct lthread *ct = RTE_PER_LCORE(this_sched)->current_lthread;\ + if ((BIT(ev) & diag_mask) && (ev < LT_DIAG_EVENT_MAX)) { \ + (obj)->diag_ref = (diag_cb)(rte_rdtsc(), \ + ct, \ + (ev), \ + 0, \ + diag_event_text[(ev)], \ + (uint64_t)obj, \ + 0); \ + } \ +} while (0) + +/* + * Generate a diagnostic trace event. + * + * @ param obj + * pointer to the lthread, cond or mutex object + * @ param ev + * the event code + * @ param p1 + * object specific value ( see lthread_diag_api.h ) + * @ param p2 + * object specific value ( see lthread_diag_api.h ) + */ +#define DIAG_EVENT(obj, ev, p1, p2) do { \ + struct lthread *ct = RTE_PER_LCORE(this_sched)->current_lthread;\ + if ((BIT(ev) & diag_mask) && (ev < LT_DIAG_EVENT_MAX)) { \ + (diag_cb)(rte_rdtsc(), \ + ct, \ + ev, \ + (obj)->diag_ref, \ + diag_event_text[(ev)], \ + (uint64_t)(p1), \ + (uint64_t)(p2)); \ + } \ +} while (0) + +#define DIAG_COUNT_DEFINE(x) rte_atomic64_t count_##x +#define DIAG_COUNT_INIT(o, x) rte_atomic64_init(&((o)->count_##x)) +#define DIAG_COUNT_INC(o, x) rte_atomic64_inc(&((o)->count_##x)) +#define DIAG_COUNT_DEC(o, x) rte_atomic64_dec(&((o)->count_##x)) +#define DIAG_COUNT(o, x) rte_atomic64_read(&((o)->count_##x)) + +#define DIAG_USED + +#else + +/* no diagnostics configured */ + +#define DISPLAY_OBJCACHE_QUEUES 0 + +#define DIAG_CREATE_EVENT(obj, ev) +#define DIAG_EVENT(obj, ev, p1, p) + +#define DIAG_COUNT_DEFINE(x) +#define DIAG_COUNT_INIT(o, x) do {} while (0) +#define DIAG_COUNT_INC(o, x) do {} while (0) +#define DIAG_COUNT_DEC(o, x) do {} while (0) +#define DIAG_COUNT(o, x) 0 + +#define DIAG_USED __rte_unused + +#endif /* LTHREAD_DIAG */ +#endif /* LTHREAD_DIAG_H_ */ diff --git a/examples/performance-thread/common/lthread_diag_api.h b/examples/performance-thread/common/lthread_diag_api.h new file mode 100644 index 00000000..7ee514f8 --- /dev/null +++ b/examples/performance-thread/common/lthread_diag_api.h @@ -0,0 +1,325 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef LTHREAD_DIAG_API_H_ +#define LTHREAD_DIAG_API_H_ + +#include <stdint.h> +#include <inttypes.h> + +/* + * Enable diagnostics + * 0 = conditionally compiled out + * 1 = compiled in and maskable at run time, see below for details + */ +#define LTHREAD_DIAG 0 + +/** + * + * @file lthread_diag_api.h + * + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * lthread diagnostic interface + * + * If enabled via configuration file option ( tbd ) the lthread subsystem + * can generate selected trace information, either RTE_LOG (INFO) messages, + * or else invoke a user supplied callback function when any of the events + * listed below occur. + * + * Reporting of events can be selectively masked, the bit position in the + * mask is determined by the corresponding event identifier listed below. + * + * Diagnostics are enabled by registering the callback function and mask + * using the API lthread_diagnostic_enable(). + * + * Various interesting parameters are passed to the callback, including the + * time in cpu clks, the lthread id, the diagnostic event id, a user ref value, + * event text string, object being traced, and two context dependent parameters + * (p1 and p2). The meaning of the two parameters p1 and p2 depends on + * the specific event. + * + * The events LT_DIAG_LTHREAD_CREATE, LT_DIAG_MUTEX_CREATE and + * LT_DIAG_COND_CREATE are implicitly enabled if the event mask includes any of + * the LT_DIAG_LTHREAD_XXX, LT_DIAG_MUTEX_XXX or LT_DIAG_COND_XXX events + * respectively. + * + * These create events may also be included in the mask discreetly if it is + * desired to monitor only create events. + * + * @param time + * The time in cpu clks at which the event occurred + * + * @param lthread + * The current lthread + * + * @param diag_event + * The diagnostic event id (bit position in the mask) + * + * @param diag_ref + * + * For LT_DIAG_LTHREAD_CREATE, LT_DIAG_MUTEX_CREATE or LT_DIAG_COND_CREATE + * this parameter is not used and set to 0. + * All other events diag_ref contains the user ref value returned by the + * callback function when lthread is created. + * + * The diag_ref values assigned to mutex and cond var can be retrieved + * using the APIs lthread_mutex_diag_ref(), and lthread_cond_diag_ref() + * respectively. + * + * @param p1 + * see below + * + * @param p1 + * see below + * + * @returns + * For LT_DIAG_LTHREAD_CREATE, LT_DIAG_MUTEX_CREATE or LT_DIAG_COND_CREATE + * expects a user diagnostic ref value that will be saved in the lthread, mutex + * or cond var. + * + * For all other events return value is ignored. + * + * LT_DIAG_SCHED_CREATE - Invoked when a scheduler is created + * p1 = the scheduler that was created + * p2 = not used + * return value will be ignored + * + * LT_DIAG_SCHED_SHUTDOWN - Invoked when a shutdown request is received + * p1 = the scheduler to be shutdown + * p2 = not used + * return value will be ignored + * + * LT_DIAG_LTHREAD_CREATE - Invoked when a thread is created + * p1 = the lthread that was created + * p2 = not used + * return value will be stored in the lthread + * + * LT_DIAG_LTHREAD_EXIT - Invoked when a lthread exits + * p2 = 0 if the thread was already joined + * p2 = 1 if the thread was not already joined + * return val ignored + * + * LT_DIAG_LTHREAD_JOIN - Invoked when a lthread exits + * p1 = the lthread that is being joined + * p2 = 0 if the thread was already exited + * p2 = 1 if the thread was not already exited + * return val ignored + * + * LT_DIAG_LTHREAD_CANCELLED - Invoked when an lthread is cancelled + * p1 = not used + * p2 = not used + * return val ignored + * + * LT_DIAG_LTHREAD_DETACH - Invoked when an lthread is detached + * p1 = not used + * p2 = not used + * return val ignored + * + * LT_DIAG_LTHREAD_FREE - Invoked when an lthread is freed + * p1 = not used + * p2 = not used + * return val ignored + * + * LT_DIAG_LTHREAD_SUSPENDED - Invoked when an lthread is suspended + * p1 = not used + * p2 = not used + * return val ignored + * + * LT_DIAG_LTHREAD_YIELD - Invoked when an lthread explicitly yields + * p1 = not used + * p2 = not used + * return val ignored + * + * LT_DIAG_LTHREAD_RESCHEDULED - Invoked when an lthread is rescheduled + * p1 = not used + * p2 = not used + * return val ignored + * + * LT_DIAG_LTHREAD_RESUMED - Invoked when an lthread is resumed + * p1 = not used + * p2 = not used + * return val ignored + * + * LT_DIAG_LTHREAD_AFFINITY - Invoked when an lthread is affinitised + * p1 = the destination lcore_id + * p2 = not used + * return val ignored + * + * LT_DIAG_LTHREAD_TMR_START - Invoked when an lthread starts a timer + * p1 = address of timer node + * p2 = the timeout value + * return val ignored + * + * LT_DIAG_LTHREAD_TMR_DELETE - Invoked when an lthread deletes a timer + * p1 = address of the timer node + * p2 = 0 the timer and the was successfully deleted + * p2 = not usee + * return val ignored + * + * LT_DIAG_LTHREAD_TMR_EXPIRED - Invoked when an lthread timer expires + * p1 = address of scheduler the timer expired on + * p2 = the thread associated with the timer + * return val ignored + * + * LT_DIAG_COND_CREATE - Invoked when a condition variable is created + * p1 = address of cond var that was created + * p2 = not used + * return diag ref value will be stored in the condition variable + * + * LT_DIAG_COND_DESTROY - Invoked when a condition variable is destroyed + * p1 = not used + * p2 = not used + * return val ignored + * + * LT_DIAG_COND_WAIT - Invoked when an lthread waits on a cond var + * p1 = the address of the condition variable + * p2 = not used + * return val ignored + * + * LT_DIAG_COND_SIGNAL - Invoked when an lthread signals a cond var + * p1 = the address of the cond var + * p2 = the lthread that was signalled, or error code + * return val ignored + * + * LT_DIAG_COND_BROADCAST - Invoked when an lthread broadcasts a cond var + * p1 = the address of the condition variable + * p2 = the lthread(s) that are signalled, or error code + * + * LT_DIAG_MUTEX_CREATE - Invoked when a mutex is created + * p1 = address of muex + * p2 = not used + * return diag ref value will be stored in the mutex variable + * + * LT_DIAG_MUTEX_DESTROY - Invoked when a mutex is destroyed + * p1 = address of mutex + * p2 = not used + * return val ignored + * + * LT_DIAG_MUTEX_LOCK - Invoked when a mutex lock is obtained + * p1 = address of mutex + * p2 = function return value + * return val ignored + * + * LT_DIAG_MUTEX_BLOCKED - Invoked when an lthread blocks on a mutex + * p1 = address of mutex + * p2 = function return value + * return val ignored + * + * LT_DIAG_MUTEX_TRYLOCK - Invoked when a mutex try lock is attempted + * p1 = address of mutex + * p2 = the function return value + * return val ignored + * + * LT_DIAG_MUTEX_UNLOCKED - Invoked when a mutex is unlocked + * p1 = address of mutex + * p2 = the thread that was unlocked, or error code + * return val ignored + */ +typedef uint64_t (*diag_callback) (uint64_t time, struct lthread *lt, + int diag_event, uint64_t diag_ref, + const char *text, uint64_t p1, uint64_t p2); + +/* + * Set user diagnostic callback and mask + * If the callback function pointer is NULL the default + * callback handler will be restored. + */ +void lthread_diagnostic_enable(diag_callback cb, uint64_t diag_mask); + +/* + * Set diagnostic mask + */ +void lthread_diagnostic_set_mask(uint64_t mask); + +/* + * lthread diagnostic callback + */ +enum lthread_diag_ev { + /* bits 0 - 14 lthread flag group */ + LT_DIAG_LTHREAD_CREATE, /* 00 mask 0x00000001 */ + LT_DIAG_LTHREAD_EXIT, /* 01 mask 0x00000002 */ + LT_DIAG_LTHREAD_JOIN, /* 02 mask 0x00000004 */ + LT_DIAG_LTHREAD_CANCEL, /* 03 mask 0x00000008 */ + LT_DIAG_LTHREAD_DETACH, /* 04 mask 0x00000010 */ + LT_DIAG_LTHREAD_FREE, /* 05 mask 0x00000020 */ + LT_DIAG_LTHREAD_SUSPENDED, /* 06 mask 0x00000040 */ + LT_DIAG_LTHREAD_YIELD, /* 07 mask 0x00000080 */ + LT_DIAG_LTHREAD_RESCHEDULED, /* 08 mask 0x00000100 */ + LT_DIAG_LTHREAD_SLEEP, /* 09 mask 0x00000200 */ + LT_DIAG_LTHREAD_RESUMED, /* 10 mask 0x00000400 */ + LT_DIAG_LTHREAD_AFFINITY, /* 11 mask 0x00000800 */ + LT_DIAG_LTHREAD_TMR_START, /* 12 mask 0x00001000 */ + LT_DIAG_LTHREAD_TMR_DELETE, /* 13 mask 0x00002000 */ + LT_DIAG_LTHREAD_TMR_EXPIRED, /* 14 mask 0x00004000 */ + /* bits 15 - 19 conditional variable flag group */ + LT_DIAG_COND_CREATE, /* 15 mask 0x00008000 */ + LT_DIAG_COND_DESTROY, /* 16 mask 0x00010000 */ + LT_DIAG_COND_WAIT, /* 17 mask 0x00020000 */ + LT_DIAG_COND_SIGNAL, /* 18 mask 0x00040000 */ + LT_DIAG_COND_BROADCAST, /* 19 mask 0x00080000 */ + /* bits 20 - 25 mutex flag group */ + LT_DIAG_MUTEX_CREATE, /* 20 mask 0x00100000 */ + LT_DIAG_MUTEX_DESTROY, /* 21 mask 0x00200000 */ + LT_DIAG_MUTEX_LOCK, /* 22 mask 0x00400000 */ + LT_DIAG_MUTEX_TRYLOCK, /* 23 mask 0x00800000 */ + LT_DIAG_MUTEX_BLOCKED, /* 24 mask 0x01000000 */ + LT_DIAG_MUTEX_UNLOCKED, /* 25 mask 0x02000000 */ + /* bits 26 - 27 scheduler flag group - 8 bits */ + LT_DIAG_SCHED_CREATE, /* 26 mask 0x04000000 */ + LT_DIAG_SCHED_SHUTDOWN, /* 27 mask 0x08000000 */ + LT_DIAG_EVENT_MAX +}; + +#define LT_DIAG_ALL 0xffffffffffffffff + + +/* + * Display scheduler stats + */ +void +lthread_sched_stats_display(void); + +/* + * return the diagnostic ref val stored in a condition var + */ +uint64_t +lthread_cond_diag_ref(struct lthread_cond *c); + +/* + * return the diagnostic ref val stored in a mutex + */ +uint64_t +lthread_mutex_diag_ref(struct lthread_mutex *m); + +#endif /* LTHREAD_DIAG_API_H_ */ diff --git a/examples/performance-thread/common/lthread_int.h b/examples/performance-thread/common/lthread_int.h new file mode 100644 index 00000000..c8357f4a --- /dev/null +++ b/examples/performance-thread/common/lthread_int.h @@ -0,0 +1,212 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Some portions of this software may have been derived from the + * https://github.com/halayli/lthread which carrys the following license. + * + * Copyright (C) 2012, Hasan Alayli <halayli@gmail.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#ifndef LTHREAD_INT_H +#include <lthread_api.h> +#define LTHREAD_INT_H + +#include <stdint.h> +#include <sys/time.h> +#include <sys/types.h> +#include <errno.h> +#include <pthread.h> +#include <time.h> + +#include <rte_cycles.h> +#include <rte_per_lcore.h> +#include <rte_timer.h> +#include <rte_ring.h> +#include <rte_atomic_64.h> +#include <rte_spinlock.h> +#include <ctx.h> + +#include <lthread_api.h> +#include "lthread.h" +#include "lthread_diag.h" +#include "lthread_tls.h" + +struct lthread; +struct lthread_sched; +struct lthread_cond; +struct lthread_mutex; +struct lthread_key; + +struct key_pool; +struct qnode; +struct qnode_pool; +struct lthread_sched; +struct lthread_tls; + + +#define BIT(x) (1 << (x)) +#define CLEARBIT(x) ~(1 << (x)) + +#define POSIX_ERRNO(x) (x) + +#define MAX_LTHREAD_NAME_SIZE 64 + +#define RTE_LOGTYPE_LTHREAD RTE_LOGTYPE_USER1 + + +/* define some shorthand for current scheduler and current thread */ +#define THIS_SCHED RTE_PER_LCORE(this_sched) +#define THIS_LTHREAD RTE_PER_LCORE(this_sched)->current_lthread + +/* + * Definition of an scheduler struct + */ +struct lthread_sched { + struct ctx ctx; /* cpu context */ + uint64_t birth; /* time created */ + struct lthread *current_lthread; /* running thread */ + unsigned lcore_id; /* this sched lcore */ + int run_flag; /* sched shutdown */ + uint64_t nb_blocked_threads; /* blocked threads */ + struct lthread_queue *ready; /* local ready queue */ + struct lthread_queue *pready; /* peer ready queue */ + struct lthread_objcache *lthread_cache; /* free lthreads */ + struct lthread_objcache *stack_cache; /* free stacks */ + struct lthread_objcache *per_lthread_cache; /* free per lthread */ + struct lthread_objcache *tls_cache; /* free TLS */ + struct lthread_objcache *cond_cache; /* free cond vars */ + struct lthread_objcache *mutex_cache; /* free mutexes */ + struct qnode_pool *qnode_pool; /* pool of queue nodes */ + struct key_pool *key_pool; /* pool of free TLS keys */ + size_t stack_size; + uint64_t diag_ref; /* diag ref */ +} __rte_cache_aligned; + +RTE_DECLARE_PER_LCORE(struct lthread_sched *, this_sched); + + +/* + * State for an lthread + */ +enum lthread_st { + ST_LT_INIT, /* initial state */ + ST_LT_READY, /* lthread is ready to run */ + ST_LT_SLEEPING, /* lthread is sleeping */ + ST_LT_EXPIRED, /* lthread timeout has expired */ + ST_LT_EXITED, /* lthread has exited and needs cleanup */ + ST_LT_DETACH, /* lthread frees on exit*/ + ST_LT_CANCELLED, /* lthread has been cancelled */ +}; + +/* + * lthread sub states for exit/join + */ +enum join_st { + LT_JOIN_INITIAL, /* initial state */ + LT_JOIN_EXITING, /* thread is exiting */ + LT_JOIN_THREAD_SET, /* joining thread has been set */ + LT_JOIN_EXIT_VAL_SET, /* exiting thread has set ret val */ + LT_JOIN_EXIT_VAL_READ, /* joining thread has collected ret val */ +}; + +/* defnition of an lthread stack object */ +struct lthread_stack { + uint8_t stack[LTHREAD_MAX_STACK_SIZE]; + size_t stack_size; + struct lthread_sched *root_sched; +} __rte_cache_aligned; + +/* + * Definition of an lthread + */ +struct lthread { + struct ctx ctx; /* cpu context */ + + uint64_t state; /* current lthread state */ + + struct lthread_sched *sched; /* current scheduler */ + void *stack; /* ptr to actual stack */ + size_t stack_size; /* current stack_size */ + size_t last_stack_size; /* last yield stack_size */ + lthread_func_t fun; /* func ctx is running */ + void *arg; /* func args passed to func */ + void *per_lthread_data; /* per lthread user data */ + lthread_exit_func exit_handler; /* called when thread exits */ + uint64_t birth; /* time lthread was born */ + struct lthread_queue *pending_wr_queue; /* deferred queue to write */ + struct lthread *lt_join; /* lthread to join on */ + uint64_t join; /* state for joining */ + void **lt_exit_ptr; /* exit ptr for lthread_join */ + struct lthread_sched *root_sched; /* thread was created here*/ + struct queue_node *qnode; /* node when in a queue */ + struct rte_timer tim; /* sleep timer */ + struct lthread_tls *tls; /* keys in use by the thread */ + struct lthread_stack *stack_container; /* stack */ + char funcname[MAX_LTHREAD_NAME_SIZE]; /* thread func name */ + uint64_t diag_ref; /* ref to user diag data */ +} __rte_cache_aligned; + +/* + * Assert + */ +#if LTHREAD_DIAG +#define LTHREAD_ASSERT(expr) do { \ + if (!(expr)) \ + rte_panic("line%d\tassert \"" #expr "\" failed\n", __LINE__);\ +} while (0) +#else +#define LTHREAD_ASSERT(expr) do {} while (0) +#endif + +#endif /* LTHREAD_INT_H */ diff --git a/examples/performance-thread/common/lthread_mutex.c b/examples/performance-thread/common/lthread_mutex.c new file mode 100644 index 00000000..af8b82d2 --- /dev/null +++ b/examples/performance-thread/common/lthread_mutex.c @@ -0,0 +1,254 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <stddef.h> +#include <limits.h> +#include <inttypes.h> +#include <unistd.h> +#include <pthread.h> +#include <fcntl.h> +#include <sys/time.h> +#include <sys/mman.h> + +#include <rte_per_lcore.h> +#include <rte_log.h> +#include <rte_spinlock.h> +#include <rte_common.h> + +#include "lthread_api.h" +#include "lthread_int.h" +#include "lthread_mutex.h" +#include "lthread_sched.h" +#include "lthread_queue.h" +#include "lthread_objcache.h" +#include "lthread_diag.h" + +/* + * Create a mutex + */ +int +lthread_mutex_init(char *name, struct lthread_mutex **mutex, + __rte_unused const struct lthread_mutexattr *attr) +{ + struct lthread_mutex *m; + + if (mutex == NULL) + return POSIX_ERRNO(EINVAL); + + + m = _lthread_objcache_alloc((THIS_SCHED)->mutex_cache); + if (m == NULL) + return POSIX_ERRNO(EAGAIN); + + m->blocked = _lthread_queue_create("blocked queue"); + if (m->blocked == NULL) { + _lthread_objcache_free((THIS_SCHED)->mutex_cache, m); + return POSIX_ERRNO(EAGAIN); + } + + if (name == NULL) + strncpy(m->name, "no name", sizeof(m->name)); + else + strncpy(m->name, name, sizeof(m->name)); + m->name[sizeof(m->name)-1] = 0; + + m->root_sched = THIS_SCHED; + m->owner = NULL; + + rte_atomic64_init(&m->count); + + DIAG_CREATE_EVENT(m, LT_DIAG_MUTEX_CREATE); + /* success */ + (*mutex) = m; + return 0; +} + +/* + * Destroy a mutex + */ +int lthread_mutex_destroy(struct lthread_mutex *m) +{ + if ((m == NULL) || (m->blocked == NULL)) { + DIAG_EVENT(m, LT_DIAG_MUTEX_DESTROY, m, POSIX_ERRNO(EINVAL)); + return POSIX_ERRNO(EINVAL); + } + + if (m->owner == NULL) { + /* try to delete the blocked queue */ + if (_lthread_queue_destroy(m->blocked) < 0) { + DIAG_EVENT(m, LT_DIAG_MUTEX_DESTROY, + m, POSIX_ERRNO(EBUSY)); + return POSIX_ERRNO(EBUSY); + } + + /* free the mutex to cache */ + _lthread_objcache_free(m->root_sched->mutex_cache, m); + DIAG_EVENT(m, LT_DIAG_MUTEX_DESTROY, m, 0); + return 0; + } + /* can't do its still in use */ + DIAG_EVENT(m, LT_DIAG_MUTEX_DESTROY, m, POSIX_ERRNO(EBUSY)); + return POSIX_ERRNO(EBUSY); +} + +/* + * Try to obtain a mutex + */ +int lthread_mutex_lock(struct lthread_mutex *m) +{ + struct lthread *lt = THIS_LTHREAD; + + if ((m == NULL) || (m->blocked == NULL)) { + DIAG_EVENT(m, LT_DIAG_MUTEX_LOCK, m, POSIX_ERRNO(EINVAL)); + return POSIX_ERRNO(EINVAL); + } + + /* allow no recursion */ + if (m->owner == lt) { + DIAG_EVENT(m, LT_DIAG_MUTEX_LOCK, m, POSIX_ERRNO(EDEADLK)); + return POSIX_ERRNO(EDEADLK); + } + + for (;;) { + rte_atomic64_inc(&m->count); + do { + if (rte_atomic64_cmpset + ((uint64_t *) &m->owner, 0, (uint64_t) lt)) { + /* happy days, we got the lock */ + DIAG_EVENT(m, LT_DIAG_MUTEX_LOCK, m, 0); + return 0; + } + /* spin due to race with unlock when + * nothing was blocked + */ + } while ((rte_atomic64_read(&m->count) == 1) && + (m->owner == NULL)); + + /* queue the current thread in the blocked queue + * we defer this to after we return to the scheduler + * to ensure that the current thread context is saved + * before unlock could result in it being dequeued and + * resumed + */ + DIAG_EVENT(m, LT_DIAG_MUTEX_BLOCKED, m, lt); + lt->pending_wr_queue = m->blocked; + /* now relinquish cpu */ + _suspend(); + /* resumed, must loop and compete for the lock again */ + } + LTHREAD_ASSERT(0); + return 0; +} + +/* try to lock a mutex but dont block */ +int lthread_mutex_trylock(struct lthread_mutex *m) +{ + struct lthread *lt = THIS_LTHREAD; + + if ((m == NULL) || (m->blocked == NULL)) { + DIAG_EVENT(m, LT_DIAG_MUTEX_TRYLOCK, m, POSIX_ERRNO(EINVAL)); + return POSIX_ERRNO(EINVAL); + } + + if (m->owner == lt) { + /* no recursion */ + DIAG_EVENT(m, LT_DIAG_MUTEX_TRYLOCK, m, POSIX_ERRNO(EDEADLK)); + return POSIX_ERRNO(EDEADLK); + } + + rte_atomic64_inc(&m->count); + if (rte_atomic64_cmpset + ((uint64_t *) &m->owner, (uint64_t) NULL, (uint64_t) lt)) { + /* got the lock */ + DIAG_EVENT(m, LT_DIAG_MUTEX_TRYLOCK, m, 0); + return 0; + } + + /* failed so return busy */ + rte_atomic64_dec(&m->count); + DIAG_EVENT(m, LT_DIAG_MUTEX_TRYLOCK, m, POSIX_ERRNO(EBUSY)); + return POSIX_ERRNO(EBUSY); +} + +/* + * Unlock a mutex + */ +int lthread_mutex_unlock(struct lthread_mutex *m) +{ + struct lthread *lt = THIS_LTHREAD; + struct lthread *unblocked; + + if ((m == NULL) || (m->blocked == NULL)) { + DIAG_EVENT(m, LT_DIAG_MUTEX_UNLOCKED, m, POSIX_ERRNO(EINVAL)); + return POSIX_ERRNO(EINVAL); + } + + /* fail if its owned */ + if (m->owner != lt || m->owner == NULL) { + DIAG_EVENT(m, LT_DIAG_MUTEX_UNLOCKED, m, POSIX_ERRNO(EPERM)); + return POSIX_ERRNO(EPERM); + } + + rte_atomic64_dec(&m->count); + /* if there are blocked threads then make one ready */ + while (rte_atomic64_read(&m->count) > 0) { + unblocked = _lthread_queue_remove(m->blocked); + + if (unblocked != NULL) { + rte_atomic64_dec(&m->count); + DIAG_EVENT(m, LT_DIAG_MUTEX_UNLOCKED, m, unblocked); + LTHREAD_ASSERT(unblocked->sched != NULL); + _ready_queue_insert((struct lthread_sched *) + unblocked->sched, unblocked); + break; + } + } + /* release the lock */ + m->owner = NULL; + return 0; +} + +/* + * return the diagnostic ref val stored in a mutex + */ +uint64_t +lthread_mutex_diag_ref(struct lthread_mutex *m) +{ + if (m == NULL) + return 0; + return m->diag_ref; +} diff --git a/examples/performance-thread/common/lthread_mutex.h b/examples/performance-thread/common/lthread_mutex.h new file mode 100644 index 00000000..4d30b2e7 --- /dev/null +++ b/examples/performance-thread/common/lthread_mutex.h @@ -0,0 +1,52 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +#ifndef LTHREAD_MUTEX_H_ +#define LTHREAD_MUTEX_H_ + +#include "lthread_queue.h" + + +#define MAX_MUTEX_NAME_SIZE 64 + +struct lthread_mutex { + struct lthread *owner; + rte_atomic64_t count; + struct lthread_queue *blocked __rte_cache_aligned; + struct lthread_sched *root_sched; + char name[MAX_MUTEX_NAME_SIZE]; + uint64_t diag_ref; /* optional ref to user diag data */ +} __rte_cache_aligned; + +#endif /* LTHREAD_MUTEX_H_ */ diff --git a/examples/performance-thread/common/lthread_objcache.h b/examples/performance-thread/common/lthread_objcache.h new file mode 100644 index 00000000..d7e35825 --- /dev/null +++ b/examples/performance-thread/common/lthread_objcache.h @@ -0,0 +1,158 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef LTHREAD_OBJCACHE_H_ +#define LTHREAD_OBJCACHE_H_ + +#include <string.h> + +#include <rte_per_lcore.h> +#include <rte_malloc.h> +#include <rte_memory.h> + +#include "lthread_int.h" +#include "lthread_diag.h" +#include "lthread_queue.h" + + +RTE_DECLARE_PER_LCORE(struct lthread_sched *, this_sched); + +struct lthread_objcache { + struct lthread_queue *q; + size_t obj_size; + int prealloc_size; + char name[LT_MAX_NAME_SIZE]; + + DIAG_COUNT_DEFINE(rd); + DIAG_COUNT_DEFINE(wr); + DIAG_COUNT_DEFINE(prealloc); + DIAG_COUNT_DEFINE(capacity); + DIAG_COUNT_DEFINE(available); +}; + +/* + * Create a cache + */ +static inline struct +lthread_objcache *_lthread_objcache_create(const char *name, + size_t obj_size, + int prealloc_size) +{ + struct lthread_objcache *c = + rte_malloc_socket(NULL, sizeof(struct lthread_objcache), + RTE_CACHE_LINE_SIZE, + rte_socket_id()); + if (c == NULL) + return NULL; + + c->q = _lthread_queue_create("cache queue"); + if (c->q == NULL) { + rte_free(c); + return NULL; + } + c->obj_size = obj_size; + c->prealloc_size = prealloc_size; + + if (name != NULL) + strncpy(c->name, name, LT_MAX_NAME_SIZE); + c->name[sizeof(c->name)-1] = 0; + + DIAG_COUNT_INIT(c, rd); + DIAG_COUNT_INIT(c, wr); + DIAG_COUNT_INIT(c, prealloc); + DIAG_COUNT_INIT(c, capacity); + DIAG_COUNT_INIT(c, available); + return c; +} + +/* + * Destroy an objcache + */ +static inline int +_lthread_objcache_destroy(struct lthread_objcache *c) +{ + if (_lthread_queue_destroy(c->q) == 0) { + rte_free(c); + return 0; + } + return -1; +} + +/* + * Allocate an object from an object cache + */ +static inline void * +_lthread_objcache_alloc(struct lthread_objcache *c) +{ + int i; + void *data; + struct lthread_queue *q = c->q; + size_t obj_size = c->obj_size; + int prealloc_size = c->prealloc_size; + + data = _lthread_queue_remove(q); + + if (data == NULL) { + DIAG_COUNT_INC(c, prealloc); + for (i = 0; i < prealloc_size; i++) { + data = + rte_zmalloc_socket(NULL, obj_size, + RTE_CACHE_LINE_SIZE, + rte_socket_id()); + if (data == NULL) + return NULL; + + DIAG_COUNT_INC(c, available); + DIAG_COUNT_INC(c, capacity); + _lthread_queue_insert_mp(q, data); + } + data = _lthread_queue_remove(q); + } + DIAG_COUNT_INC(c, rd); + DIAG_COUNT_DEC(c, available); + return data; +} + +/* + * free an object to a cache + */ +static inline void +_lthread_objcache_free(struct lthread_objcache *c, void *obj) +{ + DIAG_COUNT_INC(c, wr); + DIAG_COUNT_INC(c, available); + _lthread_queue_insert_mp(c->q, obj); +} + + + +#endif /* LTHREAD_OBJCACHE_H_ */ diff --git a/examples/performance-thread/common/lthread_pool.h b/examples/performance-thread/common/lthread_pool.h new file mode 100644 index 00000000..a5f32515 --- /dev/null +++ b/examples/performance-thread/common/lthread_pool.h @@ -0,0 +1,332 @@ +/* + *- + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Some portions of this software is derived from the producer + * consumer queues described by Dmitry Vyukov and published here + * http://www.1024cores.net + * + * Copyright (c) 2010-2011 Dmitry Vyukov. All rights reserved. + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY DMITRY VYUKOV "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL DMITRY VYUKOV OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, + * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT + * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * The views and conclusions contained in the software and documentation are + * those of the authors and should not be interpreted as representing official + * policies, either expressed or implied, of Dmitry Vyukov. + */ + +#ifndef LTHREAD_POOL_H_ +#define LTHREAD_POOL_H_ + +#include <rte_malloc.h> +#include <rte_per_lcore.h> +#include <rte_log.h> + +#include "lthread_int.h" +#include "lthread_diag.h" + +/* + * This file implements pool of queue nodes used by the queue implemented + * in lthread_queue.h. + * + * The pool is an intrusive lock free MPSC queue. + * + * The pool is created empty and populated lazily, i.e. on first attempt to + * allocate a the pool. + * + * Whenever the pool is empty more nodes are added to the pool + * The number of nodes preallocated in this way is a parameter of + * _qnode_pool_create. Freeing an object returns it to the pool. + * + * Each lthread scheduler maintains its own pool of nodes. L-threads must always + * allocate from this local pool ( because it is a single consumer queue ). + * L-threads can free nodes to any pool (because it is a multi producer queue) + * This enables threads that have affined to a different scheduler to free + * nodes safely. + */ + +struct qnode; +struct qnode_cache; + +/* + * define intermediate node + */ +struct qnode { + struct qnode *next; + void *data; + struct qnode_pool *pool; +} __rte_cache_aligned; + +/* + * a pool structure + */ +struct qnode_pool { + struct qnode *head; + struct qnode *stub; + struct qnode *fast_alloc; + struct qnode *tail __rte_cache_aligned; + int pre_alloc; + char name[LT_MAX_NAME_SIZE]; + + DIAG_COUNT_DEFINE(rd); + DIAG_COUNT_DEFINE(wr); + DIAG_COUNT_DEFINE(available); + DIAG_COUNT_DEFINE(prealloc); + DIAG_COUNT_DEFINE(capacity); +} __rte_cache_aligned; + +/* + * Create a pool of qnodes + */ + +static inline struct qnode_pool * +_qnode_pool_create(const char *name, int prealloc_size) { + + struct qnode_pool *p = rte_malloc_socket(NULL, + sizeof(struct qnode_pool), + RTE_CACHE_LINE_SIZE, + rte_socket_id()); + + LTHREAD_ASSERT(p); + + p->stub = rte_malloc_socket(NULL, + sizeof(struct qnode), + RTE_CACHE_LINE_SIZE, + rte_socket_id()); + + LTHREAD_ASSERT(p->stub); + + if (name != NULL) + strncpy(p->name, name, LT_MAX_NAME_SIZE); + p->name[sizeof(p->name)-1] = 0; + + p->stub->pool = p; + p->stub->next = NULL; + p->tail = p->stub; + p->head = p->stub; + p->pre_alloc = prealloc_size; + + DIAG_COUNT_INIT(p, rd); + DIAG_COUNT_INIT(p, wr); + DIAG_COUNT_INIT(p, available); + DIAG_COUNT_INIT(p, prealloc); + DIAG_COUNT_INIT(p, capacity); + + return p; +} + + +/* + * Insert a node into the pool + */ +static inline void __attribute__ ((always_inline)) +_qnode_pool_insert(struct qnode_pool *p, struct qnode *n) +{ + n->next = NULL; + struct qnode *prev = n; + /* We insert at the head */ + prev = (struct qnode *) __sync_lock_test_and_set((uint64_t *)&p->head, + (uint64_t) prev); + /* there is a window of inconsistency until prev next is set */ + /* which is why remove must retry */ + prev->next = (n); +} + +/* + * Remove a node from the pool + * + * There is a race with _qnode_pool_insert() whereby the queue could appear + * empty during a concurrent insert, this is handled by retrying + * + * The queue uses a stub node, which must be swung as the queue becomes + * empty, this requires an insert of the stub, which means that removing the + * last item from the queue incurs the penalty of an atomic exchange. Since the + * pool is maintained with a bulk pre-allocation the cost of this is amortised. + */ +static inline struct qnode *__attribute__ ((always_inline)) +_pool_remove(struct qnode_pool *p) +{ + struct qnode *head; + struct qnode *tail = p->tail; + struct qnode *next = tail->next; + + /* we remove from the tail */ + if (tail == p->stub) { + if (next == NULL) + return NULL; + /* advance the tail */ + p->tail = next; + tail = next; + next = next->next; + } + if (likely(next != NULL)) { + p->tail = next; + return tail; + } + + head = p->head; + if (tail == head) + return NULL; + + /* swing stub node */ + _qnode_pool_insert(p, p->stub); + + next = tail->next; + if (next) { + p->tail = next; + return tail; + } + return NULL; +} + + +/* + * This adds a retry to the _pool_remove function + * defined above + */ +static inline struct qnode *__attribute__ ((always_inline)) +_qnode_pool_remove(struct qnode_pool *p) +{ + struct qnode *n; + + do { + n = _pool_remove(p); + if (likely(n != NULL)) + return n; + + rte_compiler_barrier(); + } while ((p->head != p->tail) && + (p->tail != p->stub)); + return NULL; +} + +/* + * Allocate a node from the pool + * If the pool is empty add mode nodes + */ +static inline struct qnode *__attribute__ ((always_inline)) +_qnode_alloc(void) +{ + struct qnode_pool *p = (THIS_SCHED)->qnode_pool; + int prealloc_size = p->pre_alloc; + struct qnode *n; + int i; + + if (likely(p->fast_alloc != NULL)) { + n = p->fast_alloc; + p->fast_alloc = NULL; + return n; + } + + n = _qnode_pool_remove(p); + + if (unlikely(n == NULL)) { + DIAG_COUNT_INC(p, prealloc); + for (i = 0; i < prealloc_size; i++) { + n = rte_malloc_socket(NULL, + sizeof(struct qnode), + RTE_CACHE_LINE_SIZE, + rte_socket_id()); + if (n == NULL) + return NULL; + + DIAG_COUNT_INC(p, available); + DIAG_COUNT_INC(p, capacity); + + n->pool = p; + _qnode_pool_insert(p, n); + } + n = _qnode_pool_remove(p); + } + n->pool = p; + DIAG_COUNT_INC(p, rd); + DIAG_COUNT_DEC(p, available); + return n; +} + + + +/* +* free a queue node to the per scheduler pool from which it came +*/ +static inline void __attribute__ ((always_inline)) +_qnode_free(struct qnode *n) +{ + struct qnode_pool *p = n->pool; + + + if (unlikely(p->fast_alloc != NULL) || + unlikely(n->pool != (THIS_SCHED)->qnode_pool)) { + DIAG_COUNT_INC(p, wr); + DIAG_COUNT_INC(p, available); + _qnode_pool_insert(p, n); + return; + } + p->fast_alloc = n; +} + +/* + * Destroy an qnode pool + * queue must be empty when this is called + */ +static inline int +_qnode_pool_destroy(struct qnode_pool *p) +{ + rte_free(p->stub); + rte_free(p); + return 0; +} + + +#endif /* LTHREAD_POOL_H_ */ diff --git a/examples/performance-thread/common/lthread_queue.h b/examples/performance-thread/common/lthread_queue.h new file mode 100644 index 00000000..0c395167 --- /dev/null +++ b/examples/performance-thread/common/lthread_queue.h @@ -0,0 +1,302 @@ +/* + *- + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Some portions of this software is derived from the producer + * consumer queues described by Dmitry Vyukov and published here + * http://www.1024cores.net + * + * Copyright (c) 2010-2011 Dmitry Vyukov. All rights reserved. + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY DMITRY VYUKOV "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL DMITRY VYUKOV OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, + * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT + * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * The views and conclusions contained in the software and documentation are + * those of the authors and should not be interpreted as representing official + * policies, either expressed or implied, of Dmitry Vyukov. + */ + +#ifndef LTHREAD_QUEUE_H_ +#define LTHREAD_QUEUE_H_ + +#include <string.h> + +#include <rte_prefetch.h> +#include <rte_per_lcore.h> + +#include "lthread_int.h" +#include "lthread.h" +#include "lthread_diag.h" +#include "lthread_pool.h" + +struct lthread_queue; + +/* + * This file implements an unbounded FIFO queue based on a lock free + * linked list. + * + * The queue is non-intrusive in that it uses intermediate nodes, and does + * not require these nodes to be inserted into the object being placed + * in the queue. + * + * This is slightly more efficient than the very similar queue in lthread_pool + * in that it does not have to swing a stub node as the queue becomes empty. + * + * The queue access functions allocate and free intermediate node + * transparently from/to a per scheduler pool ( see lthread_pool.h ). + * + * The queue provides both MPSC and SPSC insert methods + */ + +/* + * define a queue of lthread nodes + */ +struct lthread_queue { + struct qnode *head; + struct qnode *tail __rte_cache_aligned; + struct lthread_queue *p; + char name[LT_MAX_NAME_SIZE]; + + DIAG_COUNT_DEFINE(rd); + DIAG_COUNT_DEFINE(wr); + DIAG_COUNT_DEFINE(size); + +} __rte_cache_aligned; + + + +static inline struct lthread_queue * +_lthread_queue_create(const char *name) +{ + struct qnode *stub; + struct lthread_queue *new_queue; + + new_queue = rte_malloc_socket(NULL, sizeof(struct lthread_queue), + RTE_CACHE_LINE_SIZE, + rte_socket_id()); + if (new_queue == NULL) + return NULL; + + /* allocated stub node */ + stub = _qnode_alloc(); + LTHREAD_ASSERT(stub); + + if (name != NULL) + strncpy(new_queue->name, name, sizeof(new_queue->name)); + new_queue->name[sizeof(new_queue->name)-1] = 0; + + /* initialize queue as empty */ + stub->next = NULL; + new_queue->head = stub; + new_queue->tail = stub; + + DIAG_COUNT_INIT(new_queue, rd); + DIAG_COUNT_INIT(new_queue, wr); + DIAG_COUNT_INIT(new_queue, size); + + return new_queue; +} + +/** + * Return true if the queue is empty + */ +static inline int __attribute__ ((always_inline)) +_lthread_queue_empty(struct lthread_queue *q) +{ + return q->tail == q->head; +} + + + +/** + * Destroy a queue + * fail if queue is not empty + */ +static inline int _lthread_queue_destroy(struct lthread_queue *q) +{ + if (q == NULL) + return -1; + + if (!_lthread_queue_empty(q)) + return -1; + + _qnode_free(q->head); + rte_free(q); + return 0; +} + +RTE_DECLARE_PER_LCORE(struct lthread_sched *, this_sched); + +/* + * Insert a node into a queue + * this implementation is multi producer safe + */ +static inline struct qnode *__attribute__ ((always_inline)) +_lthread_queue_insert_mp(struct lthread_queue + *q, void *data) +{ + struct qnode *prev; + struct qnode *n = _qnode_alloc(); + + if (n == NULL) + return NULL; + + /* set object in node */ + n->data = data; + n->next = NULL; + + /* this is an MPSC method, perform a locked update */ + prev = n; + prev = + (struct qnode *)__sync_lock_test_and_set((uint64_t *) &(q)->head, + (uint64_t) prev); + /* there is a window of inconsistency until prev next is set, + * which is why remove must retry + */ + prev->next = n; + + DIAG_COUNT_INC(q, wr); + DIAG_COUNT_INC(q, size); + + return n; +} + +/* + * Insert an node into a queue in single producer mode + * this implementation is NOT mult producer safe + */ +static inline struct qnode *__attribute__ ((always_inline)) +_lthread_queue_insert_sp(struct lthread_queue + *q, void *data) +{ + /* allocate a queue node */ + struct qnode *prev; + struct qnode *n = _qnode_alloc(); + + if (n == NULL) + return NULL; + + /* set data in node */ + n->data = data; + n->next = NULL; + + /* this is an SPSC method, no need for locked exchange operation */ + prev = q->head; + prev->next = q->head = n; + + DIAG_COUNT_INC(q, wr); + DIAG_COUNT_INC(q, size); + + return n; +} + +/* + * Remove a node from a queue + */ +static inline void *__attribute__ ((always_inline)) +_lthread_queue_poll(struct lthread_queue *q) +{ + void *data = NULL; + struct qnode *tail = q->tail; + struct qnode *next = (struct qnode *)tail->next; + /* + * There is a small window of inconsistency between producer and + * consumer whereby the queue may appear empty if consumer and + * producer access it at the same time. + * The consumer must handle this by retrying + */ + + if (likely(next != NULL)) { + q->tail = next; + tail->data = next->data; + data = tail->data; + + /* free the node */ + _qnode_free(tail); + + DIAG_COUNT_INC(q, rd); + DIAG_COUNT_DEC(q, size); + return data; + } + return NULL; +} + +/* + * Remove a node from a queue + */ +static inline void *__attribute__ ((always_inline)) +_lthread_queue_remove(struct lthread_queue *q) +{ + void *data = NULL; + + /* + * There is a small window of inconsistency between producer and + * consumer whereby the queue may appear empty if consumer and + * producer access it at the same time. We handle this by retrying + */ + do { + data = _lthread_queue_poll(q); + + if (likely(data != NULL)) { + + DIAG_COUNT_INC(q, rd); + DIAG_COUNT_DEC(q, size); + return data; + } + rte_compiler_barrier(); + } while (unlikely(!_lthread_queue_empty(q))); + return NULL; +} + + +#endif /* LTHREAD_QUEUE_H_ */ diff --git a/examples/performance-thread/common/lthread_sched.c b/examples/performance-thread/common/lthread_sched.c new file mode 100644 index 00000000..7c40bc05 --- /dev/null +++ b/examples/performance-thread/common/lthread_sched.c @@ -0,0 +1,599 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Some portions of this software is derived from the + * https://github.com/halayli/lthread which carrys the following license. + * + * Copyright (C) 2012, Hasan Alayli <halayli@gmail.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + + +#define RTE_MEM 1 + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <stddef.h> +#include <limits.h> +#include <inttypes.h> +#include <unistd.h> +#include <pthread.h> +#include <fcntl.h> +#include <sys/time.h> +#include <sys/mman.h> +#include <sched.h> + +#include <rte_prefetch.h> +#include <rte_per_lcore.h> +#include <rte_atomic.h> +#include <rte_atomic_64.h> +#include <rte_log.h> +#include <rte_common.h> +#include <rte_branch_prediction.h> + +#include "lthread_api.h" +#include "lthread_int.h" +#include "lthread_sched.h" +#include "lthread_objcache.h" +#include "lthread_timer.h" +#include "lthread_mutex.h" +#include "lthread_cond.h" +#include "lthread_tls.h" +#include "lthread_diag.h" + +/* + * This file implements the lthread scheduler + * The scheduler is the function lthread_run() + * This must be run as the main loop of an EAL thread. + * + * Currently once a scheduler is created it cannot be destroyed + * When a scheduler shuts down it is assumed that the application is terminating + */ + +static rte_atomic16_t num_schedulers; +static rte_atomic16_t active_schedulers; + +/* one scheduler per lcore */ +RTE_DEFINE_PER_LCORE(struct lthread_sched *, this_sched) = NULL; + +struct lthread_sched *schedcore[LTHREAD_MAX_LCORES]; + +diag_callback diag_cb; + +uint64_t diag_mask; + + +/* constructor */ +void lthread_sched_ctor(void) __attribute__ ((constructor)); +void lthread_sched_ctor(void) +{ + memset(schedcore, 0, sizeof(schedcore)); + rte_atomic16_init(&num_schedulers); + rte_atomic16_set(&num_schedulers, 1); + rte_atomic16_init(&active_schedulers); + rte_atomic16_set(&active_schedulers, 0); + diag_cb = NULL; +} + + +enum sched_alloc_phase { + SCHED_ALLOC_OK, + SCHED_ALLOC_QNODE_POOL, + SCHED_ALLOC_READY_QUEUE, + SCHED_ALLOC_PREADY_QUEUE, + SCHED_ALLOC_LTHREAD_CACHE, + SCHED_ALLOC_STACK_CACHE, + SCHED_ALLOC_PERLT_CACHE, + SCHED_ALLOC_TLS_CACHE, + SCHED_ALLOC_COND_CACHE, + SCHED_ALLOC_MUTEX_CACHE, +}; + +static int +_lthread_sched_alloc_resources(struct lthread_sched *new_sched) +{ + int alloc_status; + + do { + /* Initialize per scheduler queue node pool */ + alloc_status = SCHED_ALLOC_QNODE_POOL; + new_sched->qnode_pool = + _qnode_pool_create("qnode pool", LTHREAD_PREALLOC); + if (new_sched->qnode_pool == NULL) + break; + + /* Initialize per scheduler local ready queue */ + alloc_status = SCHED_ALLOC_READY_QUEUE; + new_sched->ready = _lthread_queue_create("ready queue"); + if (new_sched->ready == NULL) + break; + + /* Initialize per scheduler local peer ready queue */ + alloc_status = SCHED_ALLOC_PREADY_QUEUE; + new_sched->pready = _lthread_queue_create("pready queue"); + if (new_sched->pready == NULL) + break; + + /* Initialize per scheduler local free lthread cache */ + alloc_status = SCHED_ALLOC_LTHREAD_CACHE; + new_sched->lthread_cache = + _lthread_objcache_create("lthread cache", + sizeof(struct lthread), + LTHREAD_PREALLOC); + if (new_sched->lthread_cache == NULL) + break; + + /* Initialize per scheduler local free stack cache */ + alloc_status = SCHED_ALLOC_STACK_CACHE; + new_sched->stack_cache = + _lthread_objcache_create("stack_cache", + sizeof(struct lthread_stack), + LTHREAD_PREALLOC); + if (new_sched->stack_cache == NULL) + break; + + /* Initialize per scheduler local free per lthread data cache */ + alloc_status = SCHED_ALLOC_PERLT_CACHE; + new_sched->per_lthread_cache = + _lthread_objcache_create("per_lt cache", + RTE_PER_LTHREAD_SECTION_SIZE, + LTHREAD_PREALLOC); + if (new_sched->per_lthread_cache == NULL) + break; + + /* Initialize per scheduler local free tls cache */ + alloc_status = SCHED_ALLOC_TLS_CACHE; + new_sched->tls_cache = + _lthread_objcache_create("TLS cache", + sizeof(struct lthread_tls), + LTHREAD_PREALLOC); + if (new_sched->tls_cache == NULL) + break; + + /* Initialize per scheduler local free cond var cache */ + alloc_status = SCHED_ALLOC_COND_CACHE; + new_sched->cond_cache = + _lthread_objcache_create("cond cache", + sizeof(struct lthread_cond), + LTHREAD_PREALLOC); + if (new_sched->cond_cache == NULL) + break; + + /* Initialize per scheduler local free mutex cache */ + alloc_status = SCHED_ALLOC_MUTEX_CACHE; + new_sched->mutex_cache = + _lthread_objcache_create("mutex cache", + sizeof(struct lthread_mutex), + LTHREAD_PREALLOC); + if (new_sched->mutex_cache == NULL) + break; + + alloc_status = SCHED_ALLOC_OK; + } while (0); + + /* roll back on any failure */ + switch (alloc_status) { + case SCHED_ALLOC_MUTEX_CACHE: + _lthread_objcache_destroy(new_sched->cond_cache); + /* fall through */ + case SCHED_ALLOC_COND_CACHE: + _lthread_objcache_destroy(new_sched->tls_cache); + /* fall through */ + case SCHED_ALLOC_TLS_CACHE: + _lthread_objcache_destroy(new_sched->per_lthread_cache); + /* fall through */ + case SCHED_ALLOC_PERLT_CACHE: + _lthread_objcache_destroy(new_sched->stack_cache); + /* fall through */ + case SCHED_ALLOC_STACK_CACHE: + _lthread_objcache_destroy(new_sched->lthread_cache); + /* fall through */ + case SCHED_ALLOC_LTHREAD_CACHE: + _lthread_queue_destroy(new_sched->pready); + /* fall through */ + case SCHED_ALLOC_PREADY_QUEUE: + _lthread_queue_destroy(new_sched->ready); + /* fall through */ + case SCHED_ALLOC_READY_QUEUE: + _qnode_pool_destroy(new_sched->qnode_pool); + /* fall through */ + case SCHED_ALLOC_QNODE_POOL: + /* fall through */ + case SCHED_ALLOC_OK: + break; + } + return alloc_status; +} + + +/* + * Create a scheduler on the current lcore + */ +struct lthread_sched *_lthread_sched_create(size_t stack_size) +{ + int status; + struct lthread_sched *new_sched; + unsigned lcoreid = rte_lcore_id(); + + LTHREAD_ASSERT(stack_size <= LTHREAD_MAX_STACK_SIZE); + + if (stack_size == 0) + stack_size = LTHREAD_MAX_STACK_SIZE; + + new_sched = + rte_calloc_socket(NULL, 1, sizeof(struct lthread_sched), + RTE_CACHE_LINE_SIZE, + rte_socket_id()); + if (new_sched == NULL) { + RTE_LOG(CRIT, LTHREAD, + "Failed to allocate memory for scheduler\n"); + return NULL; + } + + _lthread_key_pool_init(); + + new_sched->stack_size = stack_size; + new_sched->birth = rte_rdtsc(); + THIS_SCHED = new_sched; + + status = _lthread_sched_alloc_resources(new_sched); + if (status != SCHED_ALLOC_OK) { + RTE_LOG(CRIT, LTHREAD, + "Failed to allocate resources for scheduler code = %d\n", + status); + rte_free(new_sched); + return NULL; + } + + bzero(&new_sched->ctx, sizeof(struct ctx)); + + new_sched->lcore_id = lcoreid; + + schedcore[lcoreid] = new_sched; + + new_sched->run_flag = 1; + + DIAG_EVENT(new_sched, LT_DIAG_SCHED_CREATE, rte_lcore_id(), 0); + + rte_wmb(); + return new_sched; +} + +/* + * Set the number of schedulers in the system + */ +int lthread_num_schedulers_set(int num) +{ + rte_atomic16_set(&num_schedulers, num); + return (int)rte_atomic16_read(&num_schedulers); +} + +/* + * Return the number of schedulers active + */ +int lthread_active_schedulers(void) +{ + return (int)rte_atomic16_read(&active_schedulers); +} + + +/** + * shutdown the scheduler running on the specified lcore + */ +void lthread_scheduler_shutdown(unsigned lcoreid) +{ + uint64_t coreid = (uint64_t) lcoreid; + + if (coreid < LTHREAD_MAX_LCORES) { + if (schedcore[coreid] != NULL) + schedcore[coreid]->run_flag = 0; + } +} + +/** + * shutdown all schedulers + */ +void lthread_scheduler_shutdown_all(void) +{ + uint64_t i; + + /* + * give time for all schedulers to have started + * Note we use sched_yield() rather than pthread_yield() to allow + * for the possibility of a pthread wrapper on lthread_yield(), + * something that is not possible unless the scheduler is running. + */ + while (rte_atomic16_read(&active_schedulers) < + rte_atomic16_read(&num_schedulers)) + sched_yield(); + + for (i = 0; i < LTHREAD_MAX_LCORES; i++) { + if (schedcore[i] != NULL) + schedcore[i]->run_flag = 0; + } +} + +/* + * Resume a suspended lthread + */ +static inline void +_lthread_resume(struct lthread *lt) __attribute__ ((always_inline)); +static inline void _lthread_resume(struct lthread *lt) +{ + struct lthread_sched *sched = THIS_SCHED; + struct lthread_stack *s; + uint64_t state = lt->state; +#if LTHREAD_DIAG + int init = 0; +#endif + + sched->current_lthread = lt; + + if (state & (BIT(ST_LT_CANCELLED) | BIT(ST_LT_EXITED))) { + /* if detached we can free the thread now */ + if (state & BIT(ST_LT_DETACH)) { + _lthread_free(lt); + sched->current_lthread = NULL; + return; + } + } + + if (state & BIT(ST_LT_INIT)) { + /* first time this thread has been run */ + /* assign thread to this scheduler */ + lt->sched = THIS_SCHED; + + /* allocate stack */ + s = _stack_alloc(); + + lt->stack_container = s; + _lthread_set_stack(lt, s->stack, s->stack_size); + + /* allocate memory for TLS used by this thread */ + _lthread_tls_alloc(lt); + + lt->state = BIT(ST_LT_READY); +#if LTHREAD_DIAG + init = 1; +#endif + } + + DIAG_EVENT(lt, LT_DIAG_LTHREAD_RESUMED, init, lt); + + /* switch to the new thread */ + ctx_switch(<->ctx, &sched->ctx); + + /* If posting to a queue that could be read by another lcore + * we defer the queue write till now to ensure the context has been + * saved before the other core tries to resume it + * This applies to blocking on mutex, cond, and to set_affinity + */ + if (lt->pending_wr_queue != NULL) { + struct lthread_queue *dest = lt->pending_wr_queue; + + lt->pending_wr_queue = NULL; + + /* queue the current thread to the specified queue */ + _lthread_queue_insert_mp(dest, lt); + } + + sched->current_lthread = NULL; +} + +/* + * Handle sleep timer expiry +*/ +void +_sched_timer_cb(struct rte_timer *tim, void *arg) +{ + struct lthread *lt = (struct lthread *) arg; + uint64_t state = lt->state; + + DIAG_EVENT(lt, LT_DIAG_LTHREAD_TMR_EXPIRED, <->tim, 0); + + rte_timer_stop(tim); + + if (lt->state & BIT(ST_LT_CANCELLED)) + (THIS_SCHED)->nb_blocked_threads--; + + lt->state = state | BIT(ST_LT_EXPIRED); + _lthread_resume(lt); + lt->state = state & CLEARBIT(ST_LT_EXPIRED); +} + + + +/* + * Returns 0 if there is a pending job in scheduler or 1 if done and can exit. + */ +static inline int _lthread_sched_isdone(struct lthread_sched *sched) +{ + return (sched->run_flag == 0) && + (_lthread_queue_empty(sched->ready)) && + (_lthread_queue_empty(sched->pready)) && + (sched->nb_blocked_threads == 0); +} + +/* + * Wait for all schedulers to start + */ +static inline void _lthread_schedulers_sync_start(void) +{ + rte_atomic16_inc(&active_schedulers); + + /* wait for lthread schedulers + * Note we use sched_yield() rather than pthread_yield() to allow + * for the possibility of a pthread wrapper on lthread_yield(), + * something that is not possible unless the scheduler is running. + */ + while (rte_atomic16_read(&active_schedulers) < + rte_atomic16_read(&num_schedulers)) + sched_yield(); + +} + +/* + * Wait for all schedulers to stop + */ +static inline void _lthread_schedulers_sync_stop(void) +{ + rte_atomic16_dec(&active_schedulers); + rte_atomic16_dec(&num_schedulers); + + /* wait for schedulers + * Note we use sched_yield() rather than pthread_yield() to allow + * for the possibility of a pthread wrapper on lthread_yield(), + * something that is not possible unless the scheduler is running. + */ + while (rte_atomic16_read(&active_schedulers) > 0) + sched_yield(); + +} + + +/* + * Run the lthread scheduler + * This loop is the heart of the system + */ +void lthread_run(void) +{ + + struct lthread_sched *sched = THIS_SCHED; + struct lthread *lt = NULL; + + RTE_LOG(INFO, LTHREAD, + "starting scheduler %p on lcore %u phys core %u\n", + sched, rte_lcore_id(), + rte_lcore_index(rte_lcore_id())); + + /* if more than one, wait for all schedulers to start */ + _lthread_schedulers_sync_start(); + + + /* + * This is the main scheduling loop + * So long as there are tasks in existence we run this loop. + * We check for:- + * expired timers, + * the local ready queue, + * and the peer ready queue, + * + * and resume lthreads ad infinitum. + */ + while (!_lthread_sched_isdone(sched)) { + + rte_timer_manage(); + + lt = _lthread_queue_poll(sched->ready); + if (lt != NULL) + _lthread_resume(lt); + lt = _lthread_queue_poll(sched->pready); + if (lt != NULL) + _lthread_resume(lt); + } + + + /* if more than one wait for all schedulers to stop */ + _lthread_schedulers_sync_stop(); + + (THIS_SCHED) = NULL; + + RTE_LOG(INFO, LTHREAD, + "stopping scheduler %p on lcore %u phys core %u\n", + sched, rte_lcore_id(), + rte_lcore_index(rte_lcore_id())); + fflush(stdout); +} + +/* + * Return the scheduler for this lcore + * + */ +struct lthread_sched *_lthread_sched_get(int lcore_id) +{ + if (lcore_id > LTHREAD_MAX_LCORES) + return NULL; + return schedcore[lcore_id]; +} + +/* + * migrate the current thread to another scheduler running + * on the specified lcore. + */ +int lthread_set_affinity(unsigned lcoreid) +{ + struct lthread *lt = THIS_LTHREAD; + struct lthread_sched *dest_sched; + + if (unlikely(lcoreid > LTHREAD_MAX_LCORES)) + return POSIX_ERRNO(EINVAL); + + + DIAG_EVENT(lt, LT_DIAG_LTHREAD_AFFINITY, lcoreid, 0); + + dest_sched = schedcore[lcoreid]; + + if (unlikely(dest_sched == NULL)) + return POSIX_ERRNO(EINVAL); + + if (likely(dest_sched != THIS_SCHED)) { + lt->sched = dest_sched; + lt->pending_wr_queue = dest_sched->pready; + _affinitize(); + return 0; + } + return 0; +} diff --git a/examples/performance-thread/common/lthread_sched.h b/examples/performance-thread/common/lthread_sched.h new file mode 100644 index 00000000..4ce56c27 --- /dev/null +++ b/examples/performance-thread/common/lthread_sched.h @@ -0,0 +1,152 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Some portions of this software is derived from the + * https://github.com/halayli/lthread which carrys the following license. + * + * Copyright (C) 2012, Hasan Alayli <halayli@gmail.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef LTHREAD_SCHED_H_ +#define LTHREAD_SCHED_H_ + +#include "lthread_int.h" +#include "lthread_queue.h" +#include "lthread_objcache.h" +#include "lthread_diag.h" +#include "ctx.h" + +/* + * insert an lthread into a queue + */ +static inline void +_ready_queue_insert(struct lthread_sched *sched, struct lthread *lt) +{ + if (sched == THIS_SCHED) + _lthread_queue_insert_sp((THIS_SCHED)->ready, lt); + else + _lthread_queue_insert_mp(sched->pready, lt); +} + +/* + * remove an lthread from a queue + */ +static inline struct lthread *_ready_queue_remove(struct lthread_queue *q) +{ + return _lthread_queue_remove(q); +} + +/** + * Return true if the ready queue is empty + */ +static inline int _ready_queue_empty(struct lthread_queue *q) +{ + return _lthread_queue_empty(q); +} + +static inline uint64_t _sched_now(void) +{ + uint64_t now = rte_rdtsc(); + + if (now > (THIS_SCHED)->birth) + return now - (THIS_SCHED)->birth; + if (now < (THIS_SCHED)->birth) + return (THIS_SCHED)->birth - now; + /* never return 0 because this means sleep forever */ + return 1; +} + +static inline void +_affinitize(void) __attribute__ ((always_inline)); +static inline void +_affinitize(void) +{ + struct lthread *lt = THIS_LTHREAD; + + DIAG_EVENT(lt, LT_DIAG_LTHREAD_SUSPENDED, 0, 0); + ctx_switch(&(THIS_SCHED)->ctx, <->ctx); +} + +static inline void +_suspend(void) __attribute__ ((always_inline)); +static inline void +_suspend(void) +{ + struct lthread *lt = THIS_LTHREAD; + + (THIS_SCHED)->nb_blocked_threads++; + DIAG_EVENT(lt, LT_DIAG_LTHREAD_SUSPENDED, 0, 0); + ctx_switch(&(THIS_SCHED)->ctx, <->ctx); + (THIS_SCHED)->nb_blocked_threads--; +} + +static inline void +_reschedule(void) __attribute__ ((always_inline)); +static inline void +_reschedule(void) +{ + struct lthread *lt = THIS_LTHREAD; + + DIAG_EVENT(lt, LT_DIAG_LTHREAD_RESCHEDULED, 0, 0); + _ready_queue_insert(THIS_SCHED, lt); + ctx_switch(&(THIS_SCHED)->ctx, <->ctx); +} + +extern struct lthread_sched *schedcore[]; +void _sched_timer_cb(struct rte_timer *tim, void *arg); +void _sched_shutdown(__rte_unused void *arg); + + +#endif /* LTHREAD_SCHED_H_ */ diff --git a/examples/performance-thread/common/lthread_timer.h b/examples/performance-thread/common/lthread_timer.h new file mode 100644 index 00000000..b5e6fb0e --- /dev/null +++ b/examples/performance-thread/common/lthread_timer.h @@ -0,0 +1,79 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +#ifndef LTHREAD_TIMER_H_ +#define LTHREAD_TIMER_H_ + +#include "lthread_int.h" +#include "lthread_sched.h" + + +static inline uint64_t +_ns_to_clks(uint64_t ns) +{ + unsigned __int128 clkns = rte_get_tsc_hz(); + + clkns *= ns; + clkns /= 1000000000; + return (uint64_t) clkns; +} + + +static inline void +_timer_start(struct lthread *lt, uint64_t clks) +{ + if (clks > 0) { + DIAG_EVENT(lt, LT_DIAG_LTHREAD_TMR_START, <->tim, clks); + rte_timer_init(<->tim); + rte_timer_reset(<->tim, + clks, + SINGLE, + rte_lcore_id(), + _sched_timer_cb, + (void *)lt); + } +} + + +static inline void +_timer_stop(struct lthread *lt) +{ + if (lt != NULL) { + DIAG_EVENT(lt, LT_DIAG_LTHREAD_TMR_DELETE, <->tim, 0); + rte_timer_stop(<->tim); + } +} + + +#endif /* LTHREAD_TIMER_H_ */ diff --git a/examples/performance-thread/common/lthread_tls.c b/examples/performance-thread/common/lthread_tls.c new file mode 100644 index 00000000..43cda4ff --- /dev/null +++ b/examples/performance-thread/common/lthread_tls.c @@ -0,0 +1,253 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <limits.h> +#include <inttypes.h> +#include <unistd.h> +#include <pthread.h> +#include <fcntl.h> +#include <sys/time.h> +#include <sys/mman.h> +#include <execinfo.h> +#include <sched.h> + +#include <rte_malloc.h> +#include <rte_log.h> +#include <rte_ring.h> +#include <rte_atomic_64.h> + +#include "lthread_tls.h" +#include "lthread_queue.h" +#include "lthread_objcache.h" +#include "lthread_sched.h" + +static struct rte_ring *key_pool; +static uint64_t key_pool_init; + +/* needed to cause section start and end to be defined */ +RTE_DEFINE_PER_LTHREAD(void *, dummy); + +static struct lthread_key key_table[LTHREAD_MAX_KEYS]; + +void lthread_tls_ctor(void) __attribute__((constructor)); + +void lthread_tls_ctor(void) +{ + key_pool = NULL; + key_pool_init = 0; +} + +/* + * Initialize a pool of keys + * These are unique tokens that can be obtained by threads + * calling lthread_key_create() + */ +void _lthread_key_pool_init(void) +{ + static struct rte_ring *pool; + struct lthread_key *new_key; + char name[MAX_LTHREAD_NAME_SIZE]; + + bzero(key_table, sizeof(key_table)); + + /* only one lcore should do this */ + if (rte_atomic64_cmpset(&key_pool_init, 0, 1)) { + + snprintf(name, + MAX_LTHREAD_NAME_SIZE, + "lthread_key_pool_%d", + getpid()); + + pool = rte_ring_create(name, + LTHREAD_MAX_KEYS, 0, 0); + LTHREAD_ASSERT(pool); + + int i; + + for (i = 1; i < LTHREAD_MAX_KEYS; i++) { + new_key = &key_table[i]; + rte_ring_mp_enqueue((struct rte_ring *)pool, + (void *)new_key); + } + key_pool = pool; + } + /* other lcores wait here till done */ + while (key_pool == NULL) { + rte_compiler_barrier(); + sched_yield(); + }; +} + +/* + * Create a key + * this means getting a key from the the pool + */ +int lthread_key_create(unsigned int *key, tls_destructor_func destructor) +{ + if (key == NULL) + return POSIX_ERRNO(EINVAL); + + struct lthread_key *new_key; + + if (rte_ring_mc_dequeue((struct rte_ring *)key_pool, (void **)&new_key) + == 0) { + new_key->destructor = destructor; + *key = (new_key - key_table); + + return 0; + } + return POSIX_ERRNO(EAGAIN); +} + + +/* + * Delete a key + */ +int lthread_key_delete(unsigned int k) +{ + struct lthread_key *key; + + key = (struct lthread_key *) &key_table[k]; + + if (k > LTHREAD_MAX_KEYS) + return POSIX_ERRNO(EINVAL); + + key->destructor = NULL; + rte_ring_mp_enqueue((struct rte_ring *)key_pool, + (void *)key); + return 0; +} + + + +/* + * Break association for all keys in use by this thread + * invoke the destructor if available. + * Since a destructor can create keys we could enter an infinite loop + * therefore we give up after LTHREAD_DESTRUCTOR_ITERATIONS + * the behavior is modelled on pthread + */ +void _lthread_tls_destroy(struct lthread *lt) +{ + int i, k; + int nb_keys; + void *data; + + for (i = 0; i < LTHREAD_DESTRUCTOR_ITERATIONS; i++) { + + for (k = 1; k < LTHREAD_MAX_KEYS; k++) { + + /* no keys in use ? */ + nb_keys = lt->tls->nb_keys_inuse; + if (nb_keys == 0) + return; + + /* this key not in use ? */ + if (lt->tls->data[k] == NULL) + continue; + + /* remove this key */ + data = lt->tls->data[k]; + lt->tls->data[k] = NULL; + lt->tls->nb_keys_inuse = nb_keys-1; + + /* invoke destructor */ + if (key_table[k].destructor != NULL) + key_table[k].destructor(data); + } + } +} + +/* + * Return the pointer associated with a key + * If the key is no longer valid return NULL + */ +void +*lthread_getspecific(unsigned int k) +{ + + if (k > LTHREAD_MAX_KEYS) + return NULL; + + return THIS_LTHREAD->tls->data[k]; +} + +/* + * Set a value against a key + * If the key is no longer valid return an error + * when storing value + */ +int lthread_setspecific(unsigned int k, const void *data) +{ + if (k > LTHREAD_MAX_KEYS) + return POSIX_ERRNO(EINVAL); + + int n = THIS_LTHREAD->tls->nb_keys_inuse; + + /* discard const qualifier */ + char *p = (char *) (uintptr_t) data; + + + if (data != NULL) { + if (THIS_LTHREAD->tls->data[k] == NULL) + THIS_LTHREAD->tls->nb_keys_inuse = n+1; + } + + THIS_LTHREAD->tls->data[k] = (void *) p; + return 0; +} + +/* + * Allocate data for TLS cache +*/ +void _lthread_tls_alloc(struct lthread *lt) +{ + struct lthread_tls *tls; + + tls = _lthread_objcache_alloc((THIS_SCHED)->tls_cache); + + LTHREAD_ASSERT(tls != NULL); + + tls->root_sched = (THIS_SCHED); + lt->tls = tls; + + /* allocate data for TLS varaiables using RTE_PER_LTHREAD macros */ + if (sizeof(void *) < (uint64_t)RTE_PER_LTHREAD_SECTION_SIZE) { + lt->per_lthread_data = + _lthread_objcache_alloc((THIS_SCHED)->per_lthread_cache); + } +} diff --git a/examples/performance-thread/common/lthread_tls.h b/examples/performance-thread/common/lthread_tls.h new file mode 100644 index 00000000..86cbfadc --- /dev/null +++ b/examples/performance-thread/common/lthread_tls.h @@ -0,0 +1,57 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef LTHREAD_TLS_H_ +#define LTHREAD_TLS_H_ + +#include "lthread_api.h" + +#define RTE_PER_LTHREAD_SECTION_SIZE \ +(&__stop_per_lt - &__start_per_lt) + +struct lthread_key { + tls_destructor_func destructor; +}; + +struct lthread_tls { + void *data[LTHREAD_MAX_KEYS]; + int nb_keys_inuse; + struct lthread_sched *root_sched; +}; + +void _lthread_tls_destroy(struct lthread *lt); +void _lthread_key_pool_init(void); +void _lthread_tls_alloc(struct lthread *lt); + + +#endif /* LTHREAD_TLS_H_ */ diff --git a/examples/performance-thread/l3fwd-thread/Makefile b/examples/performance-thread/l3fwd-thread/Makefile new file mode 100644 index 00000000..d8fe5e68 --- /dev/null +++ b/examples/performance-thread/l3fwd-thread/Makefile @@ -0,0 +1,57 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2015 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overridden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +# binary name +APP = l3fwd-thread + +# all source are stored in SRCS-y +SRCS-y := main.c + +include $(RTE_SDK)/examples/performance-thread/common/common.mk + +CFLAGS += -O3 -g $(USER_FLAGS) $(INCLUDES) $(WERROR_FLAGS) + +# workaround for a gcc bug with noreturn attribute +# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603 +#ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y) +CFLAGS_main.o += -Wno-return-type +#endif + +include $(RTE_SDK)/mk/rte.extapp.mk diff --git a/examples/performance-thread/l3fwd-thread/main.c b/examples/performance-thread/l3fwd-thread/main.c new file mode 100644 index 00000000..15c0a4de --- /dev/null +++ b/examples/performance-thread/l3fwd-thread/main.c @@ -0,0 +1,3651 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#define _GNU_SOURCE + +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <inttypes.h> +#include <sys/types.h> +#include <string.h> +#include <sys/queue.h> +#include <stdarg.h> +#include <errno.h> +#include <getopt.h> + +#include <rte_common.h> +#include <rte_vect.h> +#include <rte_byteorder.h> +#include <rte_log.h> +#include <rte_memory.h> +#include <rte_memcpy.h> +#include <rte_memzone.h> +#include <rte_eal.h> +#include <rte_per_lcore.h> +#include <rte_launch.h> +#include <rte_atomic.h> +#include <rte_cycles.h> +#include <rte_prefetch.h> +#include <rte_lcore.h> +#include <rte_per_lcore.h> +#include <rte_branch_prediction.h> +#include <rte_interrupts.h> +#include <rte_pci.h> +#include <rte_random.h> +#include <rte_debug.h> +#include <rte_ether.h> +#include <rte_ethdev.h> +#include <rte_ring.h> +#include <rte_mempool.h> +#include <rte_mbuf.h> +#include <rte_ip.h> +#include <rte_tcp.h> +#include <rte_udp.h> +#include <rte_string_fns.h> + +#include <cmdline_parse.h> +#include <cmdline_parse_etheraddr.h> + +#include <lthread_api.h> + +#define APP_LOOKUP_EXACT_MATCH 0 +#define APP_LOOKUP_LPM 1 +#define DO_RFC_1812_CHECKS + +/* Enable cpu-load stats 0-off, 1-on */ +#define APP_CPU_LOAD 1 + +#ifndef APP_LOOKUP_METHOD +#define APP_LOOKUP_METHOD APP_LOOKUP_LPM +#endif + +/* + * When set to zero, simple forwaring path is eanbled. + * When set to one, optimized forwarding path is enabled. + * Note that LPM optimisation path uses SSE4.1 instructions. + */ +#if ((APP_LOOKUP_METHOD == APP_LOOKUP_LPM) && !defined(__SSE4_1__)) +#define ENABLE_MULTI_BUFFER_OPTIMIZE 0 +#else +#define ENABLE_MULTI_BUFFER_OPTIMIZE 1 +#endif + +#if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) +#include <rte_hash.h> +#elif (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) +#include <rte_lpm.h> +#include <rte_lpm6.h> +#else +#error "APP_LOOKUP_METHOD set to incorrect value" +#endif + +#define RTE_LOGTYPE_L3FWD RTE_LOGTYPE_USER1 + +#define MAX_JUMBO_PKT_LEN 9600 + +#define IPV6_ADDR_LEN 16 + +#define MEMPOOL_CACHE_SIZE 256 + +/* + * This expression is used to calculate the number of mbufs needed depending on + * user input, taking into account memory for rx and tx hardware rings, cache + * per lcore and mtable per port per lcore. RTE_MAX is used to ensure that + * NB_MBUF never goes below a minimum value of 8192 + */ + +#define NB_MBUF RTE_MAX(\ + (nb_ports*nb_rx_queue*RTE_TEST_RX_DESC_DEFAULT + \ + nb_ports*nb_lcores*MAX_PKT_BURST + \ + nb_ports*n_tx_queue*RTE_TEST_TX_DESC_DEFAULT + \ + nb_lcores*MEMPOOL_CACHE_SIZE), \ + (unsigned)8192) + +#define MAX_PKT_BURST 32 +#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */ + +/* + * Try to avoid TX buffering if we have at least MAX_TX_BURST packets to send. + */ +#define MAX_TX_BURST (MAX_PKT_BURST / 2) +#define BURST_SIZE MAX_TX_BURST + +#define NB_SOCKETS 8 + +/* Configure how many packets ahead to prefetch, when reading packets */ +#define PREFETCH_OFFSET 3 + +/* Used to mark destination port as 'invalid'. */ +#define BAD_PORT ((uint16_t)-1) + +#define FWDSTEP 4 + +/* + * Configurable number of RX/TX ring descriptors + */ +#define RTE_TEST_RX_DESC_DEFAULT 128 +#define RTE_TEST_TX_DESC_DEFAULT 128 +static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; +static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; + +/* ethernet addresses of ports */ +static uint64_t dest_eth_addr[RTE_MAX_ETHPORTS]; +static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; + +static __m128i val_eth[RTE_MAX_ETHPORTS]; + +/* replace first 12B of the ethernet header. */ +#define MASK_ETH 0x3f + +/* mask of enabled ports */ +static uint32_t enabled_port_mask; +static int promiscuous_on; /**< $et in promiscuous mode off by default. */ +static int numa_on = 1; /**< NUMA is enabled by default. */ + +#if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) +static int ipv6; /**< ipv6 is false by default. */ +#endif + +#if (APP_CPU_LOAD == 1) + +#define MAX_CPU RTE_MAX_LCORE +#define CPU_LOAD_TIMEOUT_US (5 * 1000 * 1000) /**< Timeout for collecting 5s */ + +#define CPU_PROCESS 0 +#define CPU_POLL 1 +#define MAX_CPU_COUNTER 2 + +struct cpu_load { + uint16_t n_cpu; + uint64_t counter; + uint64_t hits[MAX_CPU_COUNTER][MAX_CPU]; +} __rte_cache_aligned; + +static struct cpu_load cpu_load; +static int cpu_load_lcore_id = -1; + +#define SET_CPU_BUSY(thread, counter) \ + thread->conf.busy[counter] = 1 + +#define SET_CPU_IDLE(thread, counter) \ + thread->conf.busy[counter] = 0 + +#define IS_CPU_BUSY(thread, counter) \ + (thread->conf.busy[counter] > 0) + +#else + +#define SET_CPU_BUSY(thread, counter) +#define SET_CPU_IDLE(thread, counter) +#define IS_CPU_BUSY(thread, counter) 0 + +#endif + +struct mbuf_table { + uint16_t len; + struct rte_mbuf *m_table[MAX_PKT_BURST]; +}; + +struct lcore_rx_queue { + uint8_t port_id; + uint8_t queue_id; +} __rte_cache_aligned; + +#define MAX_RX_QUEUE_PER_LCORE 16 +#define MAX_TX_QUEUE_PER_PORT RTE_MAX_ETHPORTS +#define MAX_RX_QUEUE_PER_PORT 128 + +#define MAX_LCORE_PARAMS 1024 +struct rx_thread_params { + uint8_t port_id; + uint8_t queue_id; + uint8_t lcore_id; + uint8_t thread_id; +} __rte_cache_aligned; + +static struct rx_thread_params rx_thread_params_array[MAX_LCORE_PARAMS]; +static struct rx_thread_params rx_thread_params_array_default[] = { + {0, 0, 2, 0}, + {0, 1, 2, 1}, + {0, 2, 2, 2}, + {1, 0, 2, 3}, + {1, 1, 2, 4}, + {1, 2, 2, 5}, + {2, 0, 2, 6}, + {3, 0, 3, 7}, + {3, 1, 3, 8}, +}; + +static struct rx_thread_params *rx_thread_params = + rx_thread_params_array_default; +static uint16_t nb_rx_thread_params = RTE_DIM(rx_thread_params_array_default); + +struct tx_thread_params { + uint8_t lcore_id; + uint8_t thread_id; +} __rte_cache_aligned; + +static struct tx_thread_params tx_thread_params_array[MAX_LCORE_PARAMS]; +static struct tx_thread_params tx_thread_params_array_default[] = { + {4, 0}, + {5, 1}, + {6, 2}, + {7, 3}, + {8, 4}, + {9, 5}, + {10, 6}, + {11, 7}, + {12, 8}, +}; + +static struct tx_thread_params *tx_thread_params = + tx_thread_params_array_default; +static uint16_t nb_tx_thread_params = RTE_DIM(tx_thread_params_array_default); + +static struct rte_eth_conf port_conf = { + .rxmode = { + .mq_mode = ETH_MQ_RX_RSS, + .max_rx_pkt_len = ETHER_MAX_LEN, + .split_hdr_size = 0, + .header_split = 0, /**< Header Split disabled */ + .hw_ip_checksum = 1, /**< IP checksum offload enabled */ + .hw_vlan_filter = 0, /**< VLAN filtering disabled */ + .jumbo_frame = 0, /**< Jumbo Frame Support disabled */ + .hw_strip_crc = 0, /**< CRC stripped by hardware */ + }, + .rx_adv_conf = { + .rss_conf = { + .rss_key = NULL, + .rss_hf = ETH_RSS_TCP, + }, + }, + .txmode = { + .mq_mode = ETH_MQ_TX_NONE, + }, +}; + +static struct rte_mempool *pktmbuf_pool[NB_SOCKETS]; + +#if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) + +#ifdef RTE_MACHINE_CPUFLAG_SSE4_2 +#include <rte_hash_crc.h> +#define DEFAULT_HASH_FUNC rte_hash_crc +#else +#include <rte_jhash.h> +#define DEFAULT_HASH_FUNC rte_jhash +#endif + +struct ipv4_5tuple { + uint32_t ip_dst; + uint32_t ip_src; + uint16_t port_dst; + uint16_t port_src; + uint8_t proto; +} __attribute__((__packed__)); + +union ipv4_5tuple_host { + struct { + uint8_t pad0; + uint8_t proto; + uint16_t pad1; + uint32_t ip_src; + uint32_t ip_dst; + uint16_t port_src; + uint16_t port_dst; + }; + __m128i xmm; +}; + +#define XMM_NUM_IN_IPV6_5TUPLE 3 + +struct ipv6_5tuple { + uint8_t ip_dst[IPV6_ADDR_LEN]; + uint8_t ip_src[IPV6_ADDR_LEN]; + uint16_t port_dst; + uint16_t port_src; + uint8_t proto; +} __attribute__((__packed__)); + +union ipv6_5tuple_host { + struct { + uint16_t pad0; + uint8_t proto; + uint8_t pad1; + uint8_t ip_src[IPV6_ADDR_LEN]; + uint8_t ip_dst[IPV6_ADDR_LEN]; + uint16_t port_src; + uint16_t port_dst; + uint64_t reserve; + }; + __m128i xmm[XMM_NUM_IN_IPV6_5TUPLE]; +}; + +struct ipv4_l3fwd_route { + struct ipv4_5tuple key; + uint8_t if_out; +}; + +struct ipv6_l3fwd_route { + struct ipv6_5tuple key; + uint8_t if_out; +}; + +static struct ipv4_l3fwd_route ipv4_l3fwd_route_array[] = { + {{IPv4(101, 0, 0, 0), IPv4(100, 10, 0, 1), 101, 11, IPPROTO_TCP}, 0}, + {{IPv4(201, 0, 0, 0), IPv4(200, 20, 0, 1), 102, 12, IPPROTO_TCP}, 1}, + {{IPv4(111, 0, 0, 0), IPv4(100, 30, 0, 1), 101, 11, IPPROTO_TCP}, 2}, + {{IPv4(211, 0, 0, 0), IPv4(200, 40, 0, 1), 102, 12, IPPROTO_TCP}, 3}, +}; + +static struct ipv6_l3fwd_route ipv6_l3fwd_route_array[] = { + {{ + {0xfe, 0x80, 0, 0, 0, 0, 0, 0, 0x02, 0x1e, 0x67, 0xff, 0xfe, 0, 0, 0}, + {0xfe, 0x80, 0, 0, 0, 0, 0, 0, 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38, + 0x05}, + 101, 11, IPPROTO_TCP}, 0}, + + {{ + {0xfe, 0x90, 0, 0, 0, 0, 0, 0, 0x02, 0x1e, 0x67, 0xff, 0xfe, 0, 0, 0}, + {0xfe, 0x90, 0, 0, 0, 0, 0, 0, 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38, + 0x05}, + 102, 12, IPPROTO_TCP}, 1}, + + {{ + {0xfe, 0xa0, 0, 0, 0, 0, 0, 0, 0x02, 0x1e, 0x67, 0xff, 0xfe, 0, 0, 0}, + {0xfe, 0xa0, 0, 0, 0, 0, 0, 0, 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38, + 0x05}, + 101, 11, IPPROTO_TCP}, 2}, + + {{ + {0xfe, 0xb0, 0, 0, 0, 0, 0, 0, 0x02, 0x1e, 0x67, 0xff, 0xfe, 0, 0, 0}, + {0xfe, 0xb0, 0, 0, 0, 0, 0, 0, 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38, + 0x05}, + 102, 12, IPPROTO_TCP}, 3}, +}; + +typedef struct rte_hash lookup_struct_t; +static lookup_struct_t *ipv4_l3fwd_lookup_struct[NB_SOCKETS]; +static lookup_struct_t *ipv6_l3fwd_lookup_struct[NB_SOCKETS]; + +#ifdef RTE_ARCH_X86_64 +/* default to 4 million hash entries (approx) */ +#define L3FWD_HASH_ENTRIES (1024*1024*4) +#else +/* 32-bit has less address-space for hugepage memory, limit to 1M entries */ +#define L3FWD_HASH_ENTRIES (1024*1024*1) +#endif +#define HASH_ENTRY_NUMBER_DEFAULT 4 + +static uint32_t hash_entry_number = HASH_ENTRY_NUMBER_DEFAULT; + +static inline uint32_t +ipv4_hash_crc(const void *data, __rte_unused uint32_t data_len, + uint32_t init_val) +{ + const union ipv4_5tuple_host *k; + uint32_t t; + const uint32_t *p; + + k = data; + t = k->proto; + p = (const uint32_t *)&k->port_src; + +#ifdef RTE_MACHINE_CPUFLAG_SSE4_2 + init_val = rte_hash_crc_4byte(t, init_val); + init_val = rte_hash_crc_4byte(k->ip_src, init_val); + init_val = rte_hash_crc_4byte(k->ip_dst, init_val); + init_val = rte_hash_crc_4byte(*p, init_val); +#else /* RTE_MACHINE_CPUFLAG_SSE4_2 */ + init_val = rte_jhash_1word(t, init_val); + init_val = rte_jhash_1word(k->ip_src, init_val); + init_val = rte_jhash_1word(k->ip_dst, init_val); + init_val = rte_jhash_1word(*p, init_val); +#endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */ + return init_val; +} + +static inline uint32_t +ipv6_hash_crc(const void *data, __rte_unused uint32_t data_len, + uint32_t init_val) +{ + const union ipv6_5tuple_host *k; + uint32_t t; + const uint32_t *p; +#ifdef RTE_MACHINE_CPUFLAG_SSE4_2 + const uint32_t *ip_src0, *ip_src1, *ip_src2, *ip_src3; + const uint32_t *ip_dst0, *ip_dst1, *ip_dst2, *ip_dst3; +#endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */ + + k = data; + t = k->proto; + p = (const uint32_t *)&k->port_src; + +#ifdef RTE_MACHINE_CPUFLAG_SSE4_2 + ip_src0 = (const uint32_t *) k->ip_src; + ip_src1 = (const uint32_t *)(k->ip_src + 4); + ip_src2 = (const uint32_t *)(k->ip_src + 8); + ip_src3 = (const uint32_t *)(k->ip_src + 12); + ip_dst0 = (const uint32_t *) k->ip_dst; + ip_dst1 = (const uint32_t *)(k->ip_dst + 4); + ip_dst2 = (const uint32_t *)(k->ip_dst + 8); + ip_dst3 = (const uint32_t *)(k->ip_dst + 12); + init_val = rte_hash_crc_4byte(t, init_val); + init_val = rte_hash_crc_4byte(*ip_src0, init_val); + init_val = rte_hash_crc_4byte(*ip_src1, init_val); + init_val = rte_hash_crc_4byte(*ip_src2, init_val); + init_val = rte_hash_crc_4byte(*ip_src3, init_val); + init_val = rte_hash_crc_4byte(*ip_dst0, init_val); + init_val = rte_hash_crc_4byte(*ip_dst1, init_val); + init_val = rte_hash_crc_4byte(*ip_dst2, init_val); + init_val = rte_hash_crc_4byte(*ip_dst3, init_val); + init_val = rte_hash_crc_4byte(*p, init_val); +#else /* RTE_MACHINE_CPUFLAG_SSE4_2 */ + init_val = rte_jhash_1word(t, init_val); + init_val = rte_jhash(k->ip_src, sizeof(uint8_t) * IPV6_ADDR_LEN, init_val); + init_val = rte_jhash(k->ip_dst, sizeof(uint8_t) * IPV6_ADDR_LEN, init_val); + init_val = rte_jhash_1word(*p, init_val); +#endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */ + return init_val; +} + +#define IPV4_L3FWD_NUM_ROUTES RTE_DIM(ipv4_l3fwd_route_array) +#define IPV6_L3FWD_NUM_ROUTES RTE_DIM(ipv6_l3fwd_route_array) + +static uint8_t ipv4_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned; +static uint8_t ipv6_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned; + +#endif + +#if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) +struct ipv4_l3fwd_route { + uint32_t ip; + uint8_t depth; + uint8_t if_out; +}; + +struct ipv6_l3fwd_route { + uint8_t ip[16]; + uint8_t depth; + uint8_t if_out; +}; + +static struct ipv4_l3fwd_route ipv4_l3fwd_route_array[] = { + {IPv4(1, 1, 1, 0), 24, 0}, + {IPv4(2, 1, 1, 0), 24, 1}, + {IPv4(3, 1, 1, 0), 24, 2}, + {IPv4(4, 1, 1, 0), 24, 3}, + {IPv4(5, 1, 1, 0), 24, 4}, + {IPv4(6, 1, 1, 0), 24, 5}, + {IPv4(7, 1, 1, 0), 24, 6}, + {IPv4(8, 1, 1, 0), 24, 7}, +}; + +static struct ipv6_l3fwd_route ipv6_l3fwd_route_array[] = { + {{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 0}, + {{2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 1}, + {{3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 2}, + {{4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 3}, + {{5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 4}, + {{6, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 5}, + {{7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 6}, + {{8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 7}, +}; + +#define IPV4_L3FWD_NUM_ROUTES RTE_DIM(ipv4_l3fwd_route_array) +#define IPV6_L3FWD_NUM_ROUTES RTE_DIM(ipv6_l3fwd_route_array) + +#define IPV4_L3FWD_LPM_MAX_RULES 1024 +#define IPV6_L3FWD_LPM_MAX_RULES 1024 +#define IPV6_L3FWD_LPM_NUMBER_TBL8S (1 << 16) + +typedef struct rte_lpm lookup_struct_t; +typedef struct rte_lpm6 lookup6_struct_t; +static lookup_struct_t *ipv4_l3fwd_lookup_struct[NB_SOCKETS]; +static lookup6_struct_t *ipv6_l3fwd_lookup_struct[NB_SOCKETS]; +#endif + +struct lcore_conf { + lookup_struct_t *ipv4_lookup_struct; +#if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) + lookup6_struct_t *ipv6_lookup_struct; +#else + lookup_struct_t *ipv6_lookup_struct; +#endif + void *data; +} __rte_cache_aligned; + +static struct lcore_conf lcore_conf[RTE_MAX_LCORE]; +RTE_DEFINE_PER_LCORE(struct lcore_conf *, lcore_conf); + +#define MAX_RX_QUEUE_PER_THREAD 16 +#define MAX_TX_PORT_PER_THREAD RTE_MAX_ETHPORTS +#define MAX_TX_QUEUE_PER_PORT RTE_MAX_ETHPORTS +#define MAX_RX_QUEUE_PER_PORT 128 + +#define MAX_RX_THREAD 1024 +#define MAX_TX_THREAD 1024 +#define MAX_THREAD (MAX_RX_THREAD + MAX_TX_THREAD) + +/** + * Producers and consumers threads configuration + */ +static int lthreads_on = 1; /**< Use lthreads for processing*/ + +rte_atomic16_t rx_counter; /**< Number of spawned rx threads */ +rte_atomic16_t tx_counter; /**< Number of spawned tx threads */ + +struct thread_conf { + uint16_t lcore_id; /**< Initial lcore for rx thread */ + uint16_t cpu_id; /**< Cpu id for cpu load stats counter */ + uint16_t thread_id; /**< Thread ID */ + +#if (APP_CPU_LOAD > 0) + int busy[MAX_CPU_COUNTER]; +#endif +}; + +struct thread_rx_conf { + struct thread_conf conf; + + uint16_t n_rx_queue; + struct lcore_rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE]; + + uint16_t n_ring; /**< Number of output rings */ + struct rte_ring *ring[RTE_MAX_LCORE]; + struct lthread_cond *ready[RTE_MAX_LCORE]; + +#if (APP_CPU_LOAD > 0) + int busy[MAX_CPU_COUNTER]; +#endif +} __rte_cache_aligned; + +uint16_t n_rx_thread; +struct thread_rx_conf rx_thread[MAX_RX_THREAD]; + +struct thread_tx_conf { + struct thread_conf conf; + + uint16_t tx_queue_id[RTE_MAX_LCORE]; + struct mbuf_table tx_mbufs[RTE_MAX_LCORE]; + + struct rte_ring *ring; + struct lthread_cond **ready; + +} __rte_cache_aligned; + +uint16_t n_tx_thread; +struct thread_tx_conf tx_thread[MAX_TX_THREAD]; + +/* Send burst of packets on an output interface */ +static inline int +send_burst(struct thread_tx_conf *qconf, uint16_t n, uint8_t port) +{ + struct rte_mbuf **m_table; + int ret; + uint16_t queueid; + + queueid = qconf->tx_queue_id[port]; + m_table = (struct rte_mbuf **)qconf->tx_mbufs[port].m_table; + + ret = rte_eth_tx_burst(port, queueid, m_table, n); + if (unlikely(ret < n)) { + do { + rte_pktmbuf_free(m_table[ret]); + } while (++ret < n); + } + + return 0; +} + +/* Enqueue a single packet, and send burst if queue is filled */ +static inline int +send_single_packet(struct rte_mbuf *m, uint8_t port) +{ + uint16_t len; + struct thread_tx_conf *qconf; + + if (lthreads_on) + qconf = (struct thread_tx_conf *)lthread_get_data(); + else + qconf = (struct thread_tx_conf *)RTE_PER_LCORE(lcore_conf)->data; + + len = qconf->tx_mbufs[port].len; + qconf->tx_mbufs[port].m_table[len] = m; + len++; + + /* enough pkts to be sent */ + if (unlikely(len == MAX_PKT_BURST)) { + send_burst(qconf, MAX_PKT_BURST, port); + len = 0; + } + + qconf->tx_mbufs[port].len = len; + return 0; +} + +#if ((APP_LOOKUP_METHOD == APP_LOOKUP_LPM) && \ + (ENABLE_MULTI_BUFFER_OPTIMIZE == 1)) +static inline __attribute__((always_inline)) void +send_packetsx4(uint8_t port, + struct rte_mbuf *m[], uint32_t num) +{ + uint32_t len, j, n; + struct thread_tx_conf *qconf; + + if (lthreads_on) + qconf = (struct thread_tx_conf *)lthread_get_data(); + else + qconf = (struct thread_tx_conf *)RTE_PER_LCORE(lcore_conf)->data; + + len = qconf->tx_mbufs[port].len; + + /* + * If TX buffer for that queue is empty, and we have enough packets, + * then send them straightway. + */ + if (num >= MAX_TX_BURST && len == 0) { + n = rte_eth_tx_burst(port, qconf->tx_queue_id[port], m, num); + if (unlikely(n < num)) { + do { + rte_pktmbuf_free(m[n]); + } while (++n < num); + } + return; + } + + /* + * Put packets into TX buffer for that queue. + */ + + n = len + num; + n = (n > MAX_PKT_BURST) ? MAX_PKT_BURST - len : num; + + j = 0; + switch (n % FWDSTEP) { + while (j < n) { + case 0: + qconf->tx_mbufs[port].m_table[len + j] = m[j]; + j++; + case 3: + qconf->tx_mbufs[port].m_table[len + j] = m[j]; + j++; + case 2: + qconf->tx_mbufs[port].m_table[len + j] = m[j]; + j++; + case 1: + qconf->tx_mbufs[port].m_table[len + j] = m[j]; + j++; + } + } + + len += n; + + /* enough pkts to be sent */ + if (unlikely(len == MAX_PKT_BURST)) { + + send_burst(qconf, MAX_PKT_BURST, port); + + /* copy rest of the packets into the TX buffer. */ + len = num - n; + j = 0; + switch (len % FWDSTEP) { + while (j < len) { + case 0: + qconf->tx_mbufs[port].m_table[j] = m[n + j]; + j++; + case 3: + qconf->tx_mbufs[port].m_table[j] = m[n + j]; + j++; + case 2: + qconf->tx_mbufs[port].m_table[j] = m[n + j]; + j++; + case 1: + qconf->tx_mbufs[port].m_table[j] = m[n + j]; + j++; + } + } + } + + qconf->tx_mbufs[port].len = len; +} +#endif /* APP_LOOKUP_LPM */ + +#ifdef DO_RFC_1812_CHECKS +static inline int +is_valid_ipv4_pkt(struct ipv4_hdr *pkt, uint32_t link_len) +{ + /* From http://www.rfc-editor.org/rfc/rfc1812.txt section 5.2.2 */ + /* + * 1. The packet length reported by the Link Layer must be large + * enough to hold the minimum length legal IP datagram (20 bytes). + */ + if (link_len < sizeof(struct ipv4_hdr)) + return -1; + + /* 2. The IP checksum must be correct. */ + /* this is checked in H/W */ + + /* + * 3. The IP version number must be 4. If the version number is not 4 + * then the packet may be another version of IP, such as IPng or + * ST-II. + */ + if (((pkt->version_ihl) >> 4) != 4) + return -3; + /* + * 4. The IP header length field must be large enough to hold the + * minimum length legal IP datagram (20 bytes = 5 words). + */ + if ((pkt->version_ihl & 0xf) < 5) + return -4; + + /* + * 5. The IP total length field must be large enough to hold the IP + * datagram header, whose length is specified in the IP header length + * field. + */ + if (rte_cpu_to_be_16(pkt->total_length) < sizeof(struct ipv4_hdr)) + return -5; + + return 0; +} +#endif + +#if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) + +static __m128i mask0; +static __m128i mask1; +static __m128i mask2; +static inline uint8_t +get_ipv4_dst_port(void *ipv4_hdr, uint8_t portid, + lookup_struct_t *ipv4_l3fwd_lookup_struct) +{ + int ret = 0; + union ipv4_5tuple_host key; + + ipv4_hdr = (uint8_t *)ipv4_hdr + offsetof(struct ipv4_hdr, time_to_live); + __m128i data = _mm_loadu_si128((__m128i *)(ipv4_hdr)); + /* Get 5 tuple: dst port, src port, dst IP address, src IP address and + protocol */ + key.xmm = _mm_and_si128(data, mask0); + /* Find destination port */ + ret = rte_hash_lookup(ipv4_l3fwd_lookup_struct, (const void *)&key); + return (uint8_t)((ret < 0) ? portid : ipv4_l3fwd_out_if[ret]); +} + +static inline uint8_t +get_ipv6_dst_port(void *ipv6_hdr, uint8_t portid, + lookup_struct_t *ipv6_l3fwd_lookup_struct) +{ + int ret = 0; + union ipv6_5tuple_host key; + + ipv6_hdr = (uint8_t *)ipv6_hdr + offsetof(struct ipv6_hdr, payload_len); + __m128i data0 = _mm_loadu_si128((__m128i *)(ipv6_hdr)); + __m128i data1 = _mm_loadu_si128((__m128i *)(((uint8_t *)ipv6_hdr) + + sizeof(__m128i))); + __m128i data2 = _mm_loadu_si128((__m128i *)(((uint8_t *)ipv6_hdr) + + sizeof(__m128i) + sizeof(__m128i))); + /* Get part of 5 tuple: src IP address lower 96 bits and protocol */ + key.xmm[0] = _mm_and_si128(data0, mask1); + /* Get part of 5 tuple: dst IP address lower 96 bits and src IP address + higher 32 bits */ + key.xmm[1] = data1; + /* Get part of 5 tuple: dst port and src port and dst IP address higher + 32 bits */ + key.xmm[2] = _mm_and_si128(data2, mask2); + + /* Find destination port */ + ret = rte_hash_lookup(ipv6_l3fwd_lookup_struct, (const void *)&key); + return (uint8_t)((ret < 0) ? portid : ipv6_l3fwd_out_if[ret]); +} +#endif + +#if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) + +static inline uint8_t +get_ipv4_dst_port(void *ipv4_hdr, uint8_t portid, + lookup_struct_t *ipv4_l3fwd_lookup_struct) +{ + uint32_t next_hop; + + return (uint8_t)((rte_lpm_lookup(ipv4_l3fwd_lookup_struct, + rte_be_to_cpu_32(((struct ipv4_hdr *)ipv4_hdr)->dst_addr), + &next_hop) == 0) ? next_hop : portid); +} + +static inline uint8_t +get_ipv6_dst_port(void *ipv6_hdr, uint8_t portid, + lookup6_struct_t *ipv6_l3fwd_lookup_struct) +{ + uint8_t next_hop; + + return (uint8_t) ((rte_lpm6_lookup(ipv6_l3fwd_lookup_struct, + ((struct ipv6_hdr *)ipv6_hdr)->dst_addr, &next_hop) == 0) ? + next_hop : portid); +} +#endif + +static inline void l3fwd_simple_forward(struct rte_mbuf *m, uint8_t portid) + __attribute__((unused)); + +#if ((APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) && \ + (ENABLE_MULTI_BUFFER_OPTIMIZE == 1)) + +#define MASK_ALL_PKTS 0xff +#define EXCLUDE_1ST_PKT 0xfe +#define EXCLUDE_2ND_PKT 0xfd +#define EXCLUDE_3RD_PKT 0xfb +#define EXCLUDE_4TH_PKT 0xf7 +#define EXCLUDE_5TH_PKT 0xef +#define EXCLUDE_6TH_PKT 0xdf +#define EXCLUDE_7TH_PKT 0xbf +#define EXCLUDE_8TH_PKT 0x7f + +static inline void +simple_ipv4_fwd_8pkts(struct rte_mbuf *m[8], uint8_t portid) +{ + struct ether_hdr *eth_hdr[8]; + struct ipv4_hdr *ipv4_hdr[8]; + uint8_t dst_port[8]; + int32_t ret[8]; + union ipv4_5tuple_host key[8]; + __m128i data[8]; + + eth_hdr[0] = rte_pktmbuf_mtod(m[0], struct ether_hdr *); + eth_hdr[1] = rte_pktmbuf_mtod(m[1], struct ether_hdr *); + eth_hdr[2] = rte_pktmbuf_mtod(m[2], struct ether_hdr *); + eth_hdr[3] = rte_pktmbuf_mtod(m[3], struct ether_hdr *); + eth_hdr[4] = rte_pktmbuf_mtod(m[4], struct ether_hdr *); + eth_hdr[5] = rte_pktmbuf_mtod(m[5], struct ether_hdr *); + eth_hdr[6] = rte_pktmbuf_mtod(m[6], struct ether_hdr *); + eth_hdr[7] = rte_pktmbuf_mtod(m[7], struct ether_hdr *); + + /* Handle IPv4 headers.*/ + ipv4_hdr[0] = rte_pktmbuf_mtod_offset(m[0], struct ipv4_hdr *, + sizeof(struct ether_hdr)); + ipv4_hdr[1] = rte_pktmbuf_mtod_offset(m[1], struct ipv4_hdr *, + sizeof(struct ether_hdr)); + ipv4_hdr[2] = rte_pktmbuf_mtod_offset(m[2], struct ipv4_hdr *, + sizeof(struct ether_hdr)); + ipv4_hdr[3] = rte_pktmbuf_mtod_offset(m[3], struct ipv4_hdr *, + sizeof(struct ether_hdr)); + ipv4_hdr[4] = rte_pktmbuf_mtod_offset(m[4], struct ipv4_hdr *, + sizeof(struct ether_hdr)); + ipv4_hdr[5] = rte_pktmbuf_mtod_offset(m[5], struct ipv4_hdr *, + sizeof(struct ether_hdr)); + ipv4_hdr[6] = rte_pktmbuf_mtod_offset(m[6], struct ipv4_hdr *, + sizeof(struct ether_hdr)); + ipv4_hdr[7] = rte_pktmbuf_mtod_offset(m[7], struct ipv4_hdr *, + sizeof(struct ether_hdr)); + +#ifdef DO_RFC_1812_CHECKS + /* Check to make sure the packet is valid (RFC1812) */ + uint8_t valid_mask = MASK_ALL_PKTS; + + if (is_valid_ipv4_pkt(ipv4_hdr[0], m[0]->pkt_len) < 0) { + rte_pktmbuf_free(m[0]); + valid_mask &= EXCLUDE_1ST_PKT; + } + if (is_valid_ipv4_pkt(ipv4_hdr[1], m[1]->pkt_len) < 0) { + rte_pktmbuf_free(m[1]); + valid_mask &= EXCLUDE_2ND_PKT; + } + if (is_valid_ipv4_pkt(ipv4_hdr[2], m[2]->pkt_len) < 0) { + rte_pktmbuf_free(m[2]); + valid_mask &= EXCLUDE_3RD_PKT; + } + if (is_valid_ipv4_pkt(ipv4_hdr[3], m[3]->pkt_len) < 0) { + rte_pktmbuf_free(m[3]); + valid_mask &= EXCLUDE_4TH_PKT; + } + if (is_valid_ipv4_pkt(ipv4_hdr[4], m[4]->pkt_len) < 0) { + rte_pktmbuf_free(m[4]); + valid_mask &= EXCLUDE_5TH_PKT; + } + if (is_valid_ipv4_pkt(ipv4_hdr[5], m[5]->pkt_len) < 0) { + rte_pktmbuf_free(m[5]); + valid_mask &= EXCLUDE_6TH_PKT; + } + if (is_valid_ipv4_pkt(ipv4_hdr[6], m[6]->pkt_len) < 0) { + rte_pktmbuf_free(m[6]); + valid_mask &= EXCLUDE_7TH_PKT; + } + if (is_valid_ipv4_pkt(ipv4_hdr[7], m[7]->pkt_len) < 0) { + rte_pktmbuf_free(m[7]); + valid_mask &= EXCLUDE_8TH_PKT; + } + if (unlikely(valid_mask != MASK_ALL_PKTS)) { + if (valid_mask == 0) + return; + + uint8_t i = 0; + + for (i = 0; i < 8; i++) + if ((0x1 << i) & valid_mask) + l3fwd_simple_forward(m[i], portid); + } +#endif /* End of #ifdef DO_RFC_1812_CHECKS */ + + data[0] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[0], __m128i *, + sizeof(struct ether_hdr) + + offsetof(struct ipv4_hdr, time_to_live))); + data[1] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[1], __m128i *, + sizeof(struct ether_hdr) + + offsetof(struct ipv4_hdr, time_to_live))); + data[2] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[2], __m128i *, + sizeof(struct ether_hdr) + + offsetof(struct ipv4_hdr, time_to_live))); + data[3] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[3], __m128i *, + sizeof(struct ether_hdr) + + offsetof(struct ipv4_hdr, time_to_live))); + data[4] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[4], __m128i *, + sizeof(struct ether_hdr) + + offsetof(struct ipv4_hdr, time_to_live))); + data[5] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[5], __m128i *, + sizeof(struct ether_hdr) + + offsetof(struct ipv4_hdr, time_to_live))); + data[6] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[6], __m128i *, + sizeof(struct ether_hdr) + + offsetof(struct ipv4_hdr, time_to_live))); + data[7] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[7], __m128i *, + sizeof(struct ether_hdr) + + offsetof(struct ipv4_hdr, time_to_live))); + + key[0].xmm = _mm_and_si128(data[0], mask0); + key[1].xmm = _mm_and_si128(data[1], mask0); + key[2].xmm = _mm_and_si128(data[2], mask0); + key[3].xmm = _mm_and_si128(data[3], mask0); + key[4].xmm = _mm_and_si128(data[4], mask0); + key[5].xmm = _mm_and_si128(data[5], mask0); + key[6].xmm = _mm_and_si128(data[6], mask0); + key[7].xmm = _mm_and_si128(data[7], mask0); + + const void *key_array[8] = {&key[0], &key[1], &key[2], &key[3], + &key[4], &key[5], &key[6], &key[7]}; + + rte_hash_lookup_multi(RTE_PER_LCORE(lcore_conf)->ipv4_lookup_struct, + &key_array[0], 8, ret); + dst_port[0] = (uint8_t) ((ret[0] < 0) ? portid : ipv4_l3fwd_out_if[ret[0]]); + dst_port[1] = (uint8_t) ((ret[1] < 0) ? portid : ipv4_l3fwd_out_if[ret[1]]); + dst_port[2] = (uint8_t) ((ret[2] < 0) ? portid : ipv4_l3fwd_out_if[ret[2]]); + dst_port[3] = (uint8_t) ((ret[3] < 0) ? portid : ipv4_l3fwd_out_if[ret[3]]); + dst_port[4] = (uint8_t) ((ret[4] < 0) ? portid : ipv4_l3fwd_out_if[ret[4]]); + dst_port[5] = (uint8_t) ((ret[5] < 0) ? portid : ipv4_l3fwd_out_if[ret[5]]); + dst_port[6] = (uint8_t) ((ret[6] < 0) ? portid : ipv4_l3fwd_out_if[ret[6]]); + dst_port[7] = (uint8_t) ((ret[7] < 0) ? portid : ipv4_l3fwd_out_if[ret[7]]); + + if (dst_port[0] >= RTE_MAX_ETHPORTS || + (enabled_port_mask & 1 << dst_port[0]) == 0) + dst_port[0] = portid; + if (dst_port[1] >= RTE_MAX_ETHPORTS || + (enabled_port_mask & 1 << dst_port[1]) == 0) + dst_port[1] = portid; + if (dst_port[2] >= RTE_MAX_ETHPORTS || + (enabled_port_mask & 1 << dst_port[2]) == 0) + dst_port[2] = portid; + if (dst_port[3] >= RTE_MAX_ETHPORTS || + (enabled_port_mask & 1 << dst_port[3]) == 0) + dst_port[3] = portid; + if (dst_port[4] >= RTE_MAX_ETHPORTS || + (enabled_port_mask & 1 << dst_port[4]) == 0) + dst_port[4] = portid; + if (dst_port[5] >= RTE_MAX_ETHPORTS || + (enabled_port_mask & 1 << dst_port[5]) == 0) + dst_port[5] = portid; + if (dst_port[6] >= RTE_MAX_ETHPORTS || + (enabled_port_mask & 1 << dst_port[6]) == 0) + dst_port[6] = portid; + if (dst_port[7] >= RTE_MAX_ETHPORTS || + (enabled_port_mask & 1 << dst_port[7]) == 0) + dst_port[7] = portid; + +#ifdef DO_RFC_1812_CHECKS + /* Update time to live and header checksum */ + --(ipv4_hdr[0]->time_to_live); + --(ipv4_hdr[1]->time_to_live); + --(ipv4_hdr[2]->time_to_live); + --(ipv4_hdr[3]->time_to_live); + ++(ipv4_hdr[0]->hdr_checksum); + ++(ipv4_hdr[1]->hdr_checksum); + ++(ipv4_hdr[2]->hdr_checksum); + ++(ipv4_hdr[3]->hdr_checksum); + --(ipv4_hdr[4]->time_to_live); + --(ipv4_hdr[5]->time_to_live); + --(ipv4_hdr[6]->time_to_live); + --(ipv4_hdr[7]->time_to_live); + ++(ipv4_hdr[4]->hdr_checksum); + ++(ipv4_hdr[5]->hdr_checksum); + ++(ipv4_hdr[6]->hdr_checksum); + ++(ipv4_hdr[7]->hdr_checksum); +#endif + + /* dst addr */ + *(uint64_t *)ð_hdr[0]->d_addr = dest_eth_addr[dst_port[0]]; + *(uint64_t *)ð_hdr[1]->d_addr = dest_eth_addr[dst_port[1]]; + *(uint64_t *)ð_hdr[2]->d_addr = dest_eth_addr[dst_port[2]]; + *(uint64_t *)ð_hdr[3]->d_addr = dest_eth_addr[dst_port[3]]; + *(uint64_t *)ð_hdr[4]->d_addr = dest_eth_addr[dst_port[4]]; + *(uint64_t *)ð_hdr[5]->d_addr = dest_eth_addr[dst_port[5]]; + *(uint64_t *)ð_hdr[6]->d_addr = dest_eth_addr[dst_port[6]]; + *(uint64_t *)ð_hdr[7]->d_addr = dest_eth_addr[dst_port[7]]; + + /* src addr */ + ether_addr_copy(&ports_eth_addr[dst_port[0]], ð_hdr[0]->s_addr); + ether_addr_copy(&ports_eth_addr[dst_port[1]], ð_hdr[1]->s_addr); + ether_addr_copy(&ports_eth_addr[dst_port[2]], ð_hdr[2]->s_addr); + ether_addr_copy(&ports_eth_addr[dst_port[3]], ð_hdr[3]->s_addr); + ether_addr_copy(&ports_eth_addr[dst_port[4]], ð_hdr[4]->s_addr); + ether_addr_copy(&ports_eth_addr[dst_port[5]], ð_hdr[5]->s_addr); + ether_addr_copy(&ports_eth_addr[dst_port[6]], ð_hdr[6]->s_addr); + ether_addr_copy(&ports_eth_addr[dst_port[7]], ð_hdr[7]->s_addr); + + send_single_packet(m[0], (uint8_t)dst_port[0]); + send_single_packet(m[1], (uint8_t)dst_port[1]); + send_single_packet(m[2], (uint8_t)dst_port[2]); + send_single_packet(m[3], (uint8_t)dst_port[3]); + send_single_packet(m[4], (uint8_t)dst_port[4]); + send_single_packet(m[5], (uint8_t)dst_port[5]); + send_single_packet(m[6], (uint8_t)dst_port[6]); + send_single_packet(m[7], (uint8_t)dst_port[7]); + +} + +static inline void get_ipv6_5tuple(struct rte_mbuf *m0, __m128i mask0, + __m128i mask1, union ipv6_5tuple_host *key) +{ + __m128i tmpdata0 = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m0, + __m128i *, sizeof(struct ether_hdr) + + offsetof(struct ipv6_hdr, payload_len))); + __m128i tmpdata1 = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m0, + __m128i *, sizeof(struct ether_hdr) + + offsetof(struct ipv6_hdr, payload_len) + sizeof(__m128i))); + __m128i tmpdata2 = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m0, + __m128i *, sizeof(struct ether_hdr) + + offsetof(struct ipv6_hdr, payload_len) + sizeof(__m128i) + + sizeof(__m128i))); + key->xmm[0] = _mm_and_si128(tmpdata0, mask0); + key->xmm[1] = tmpdata1; + key->xmm[2] = _mm_and_si128(tmpdata2, mask1); +} + +static inline void +simple_ipv6_fwd_8pkts(struct rte_mbuf *m[8], uint8_t portid) +{ + int32_t ret[8]; + uint8_t dst_port[8]; + struct ether_hdr *eth_hdr[8]; + union ipv6_5tuple_host key[8]; + + __attribute__((unused)) struct ipv6_hdr *ipv6_hdr[8]; + + eth_hdr[0] = rte_pktmbuf_mtod(m[0], struct ether_hdr *); + eth_hdr[1] = rte_pktmbuf_mtod(m[1], struct ether_hdr *); + eth_hdr[2] = rte_pktmbuf_mtod(m[2], struct ether_hdr *); + eth_hdr[3] = rte_pktmbuf_mtod(m[3], struct ether_hdr *); + eth_hdr[4] = rte_pktmbuf_mtod(m[4], struct ether_hdr *); + eth_hdr[5] = rte_pktmbuf_mtod(m[5], struct ether_hdr *); + eth_hdr[6] = rte_pktmbuf_mtod(m[6], struct ether_hdr *); + eth_hdr[7] = rte_pktmbuf_mtod(m[7], struct ether_hdr *); + + /* Handle IPv6 headers.*/ + ipv6_hdr[0] = rte_pktmbuf_mtod_offset(m[0], struct ipv6_hdr *, + sizeof(struct ether_hdr)); + ipv6_hdr[1] = rte_pktmbuf_mtod_offset(m[1], struct ipv6_hdr *, + sizeof(struct ether_hdr)); + ipv6_hdr[2] = rte_pktmbuf_mtod_offset(m[2], struct ipv6_hdr *, + sizeof(struct ether_hdr)); + ipv6_hdr[3] = rte_pktmbuf_mtod_offset(m[3], struct ipv6_hdr *, + sizeof(struct ether_hdr)); + ipv6_hdr[4] = rte_pktmbuf_mtod_offset(m[4], struct ipv6_hdr *, + sizeof(struct ether_hdr)); + ipv6_hdr[5] = rte_pktmbuf_mtod_offset(m[5], struct ipv6_hdr *, + sizeof(struct ether_hdr)); + ipv6_hdr[6] = rte_pktmbuf_mtod_offset(m[6], struct ipv6_hdr *, + sizeof(struct ether_hdr)); + ipv6_hdr[7] = rte_pktmbuf_mtod_offset(m[7], struct ipv6_hdr *, + sizeof(struct ether_hdr)); + + get_ipv6_5tuple(m[0], mask1, mask2, &key[0]); + get_ipv6_5tuple(m[1], mask1, mask2, &key[1]); + get_ipv6_5tuple(m[2], mask1, mask2, &key[2]); + get_ipv6_5tuple(m[3], mask1, mask2, &key[3]); + get_ipv6_5tuple(m[4], mask1, mask2, &key[4]); + get_ipv6_5tuple(m[5], mask1, mask2, &key[5]); + get_ipv6_5tuple(m[6], mask1, mask2, &key[6]); + get_ipv6_5tuple(m[7], mask1, mask2, &key[7]); + + const void *key_array[8] = {&key[0], &key[1], &key[2], &key[3], + &key[4], &key[5], &key[6], &key[7]}; + + rte_hash_lookup_multi(RTE_PER_LCORE(lcore_conf)->ipv6_lookup_struct, + &key_array[0], 4, ret); + dst_port[0] = (uint8_t) ((ret[0] < 0) ? portid : ipv6_l3fwd_out_if[ret[0]]); + dst_port[1] = (uint8_t) ((ret[1] < 0) ? portid : ipv6_l3fwd_out_if[ret[1]]); + dst_port[2] = (uint8_t) ((ret[2] < 0) ? portid : ipv6_l3fwd_out_if[ret[2]]); + dst_port[3] = (uint8_t) ((ret[3] < 0) ? portid : ipv6_l3fwd_out_if[ret[3]]); + dst_port[4] = (uint8_t) ((ret[4] < 0) ? portid : ipv6_l3fwd_out_if[ret[4]]); + dst_port[5] = (uint8_t) ((ret[5] < 0) ? portid : ipv6_l3fwd_out_if[ret[5]]); + dst_port[6] = (uint8_t) ((ret[6] < 0) ? portid : ipv6_l3fwd_out_if[ret[6]]); + dst_port[7] = (uint8_t) ((ret[7] < 0) ? portid : ipv6_l3fwd_out_if[ret[7]]); + + if (dst_port[0] >= RTE_MAX_ETHPORTS || + (enabled_port_mask & 1 << dst_port[0]) == 0) + dst_port[0] = portid; + if (dst_port[1] >= RTE_MAX_ETHPORTS || + (enabled_port_mask & 1 << dst_port[1]) == 0) + dst_port[1] = portid; + if (dst_port[2] >= RTE_MAX_ETHPORTS || + (enabled_port_mask & 1 << dst_port[2]) == 0) + dst_port[2] = portid; + if (dst_port[3] >= RTE_MAX_ETHPORTS || + (enabled_port_mask & 1 << dst_port[3]) == 0) + dst_port[3] = portid; + if (dst_port[4] >= RTE_MAX_ETHPORTS || + (enabled_port_mask & 1 << dst_port[4]) == 0) + dst_port[4] = portid; + if (dst_port[5] >= RTE_MAX_ETHPORTS || + (enabled_port_mask & 1 << dst_port[5]) == 0) + dst_port[5] = portid; + if (dst_port[6] >= RTE_MAX_ETHPORTS || + (enabled_port_mask & 1 << dst_port[6]) == 0) + dst_port[6] = portid; + if (dst_port[7] >= RTE_MAX_ETHPORTS || + (enabled_port_mask & 1 << dst_port[7]) == 0) + dst_port[7] = portid; + + /* dst addr */ + *(uint64_t *)ð_hdr[0]->d_addr = dest_eth_addr[dst_port[0]]; + *(uint64_t *)ð_hdr[1]->d_addr = dest_eth_addr[dst_port[1]]; + *(uint64_t *)ð_hdr[2]->d_addr = dest_eth_addr[dst_port[2]]; + *(uint64_t *)ð_hdr[3]->d_addr = dest_eth_addr[dst_port[3]]; + *(uint64_t *)ð_hdr[4]->d_addr = dest_eth_addr[dst_port[4]]; + *(uint64_t *)ð_hdr[5]->d_addr = dest_eth_addr[dst_port[5]]; + *(uint64_t *)ð_hdr[6]->d_addr = dest_eth_addr[dst_port[6]]; + *(uint64_t *)ð_hdr[7]->d_addr = dest_eth_addr[dst_port[7]]; + + /* src addr */ + ether_addr_copy(&ports_eth_addr[dst_port[0]], ð_hdr[0]->s_addr); + ether_addr_copy(&ports_eth_addr[dst_port[1]], ð_hdr[1]->s_addr); + ether_addr_copy(&ports_eth_addr[dst_port[2]], ð_hdr[2]->s_addr); + ether_addr_copy(&ports_eth_addr[dst_port[3]], ð_hdr[3]->s_addr); + ether_addr_copy(&ports_eth_addr[dst_port[4]], ð_hdr[4]->s_addr); + ether_addr_copy(&ports_eth_addr[dst_port[5]], ð_hdr[5]->s_addr); + ether_addr_copy(&ports_eth_addr[dst_port[6]], ð_hdr[6]->s_addr); + ether_addr_copy(&ports_eth_addr[dst_port[7]], ð_hdr[7]->s_addr); + + send_single_packet(m[0], (uint8_t)dst_port[0]); + send_single_packet(m[1], (uint8_t)dst_port[1]); + send_single_packet(m[2], (uint8_t)dst_port[2]); + send_single_packet(m[3], (uint8_t)dst_port[3]); + send_single_packet(m[4], (uint8_t)dst_port[4]); + send_single_packet(m[5], (uint8_t)dst_port[5]); + send_single_packet(m[6], (uint8_t)dst_port[6]); + send_single_packet(m[7], (uint8_t)dst_port[7]); + +} +#endif /* APP_LOOKUP_METHOD */ + +static inline __attribute__((always_inline)) void +l3fwd_simple_forward(struct rte_mbuf *m, uint8_t portid) +{ + struct ether_hdr *eth_hdr; + struct ipv4_hdr *ipv4_hdr; + uint8_t dst_port; + + eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); + + if (RTE_ETH_IS_IPV4_HDR(m->packet_type)) { + /* Handle IPv4 headers.*/ + ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, + sizeof(struct ether_hdr)); + +#ifdef DO_RFC_1812_CHECKS + /* Check to make sure the packet is valid (RFC1812) */ + if (is_valid_ipv4_pkt(ipv4_hdr, m->pkt_len) < 0) { + rte_pktmbuf_free(m); + return; + } +#endif + + dst_port = get_ipv4_dst_port(ipv4_hdr, portid, + RTE_PER_LCORE(lcore_conf)->ipv4_lookup_struct); + if (dst_port >= RTE_MAX_ETHPORTS || + (enabled_port_mask & 1 << dst_port) == 0) + dst_port = portid; + +#ifdef DO_RFC_1812_CHECKS + /* Update time to live and header checksum */ + --(ipv4_hdr->time_to_live); + ++(ipv4_hdr->hdr_checksum); +#endif + /* dst addr */ + *(uint64_t *)ð_hdr->d_addr = dest_eth_addr[dst_port]; + + /* src addr */ + ether_addr_copy(&ports_eth_addr[dst_port], ð_hdr->s_addr); + + send_single_packet(m, dst_port); + } else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) { + /* Handle IPv6 headers.*/ + struct ipv6_hdr *ipv6_hdr; + + ipv6_hdr = rte_pktmbuf_mtod_offset(m, struct ipv6_hdr *, + sizeof(struct ether_hdr)); + + dst_port = get_ipv6_dst_port(ipv6_hdr, portid, + RTE_PER_LCORE(lcore_conf)->ipv6_lookup_struct); + + if (dst_port >= RTE_MAX_ETHPORTS || + (enabled_port_mask & 1 << dst_port) == 0) + dst_port = portid; + + /* dst addr */ + *(uint64_t *)ð_hdr->d_addr = dest_eth_addr[dst_port]; + + /* src addr */ + ether_addr_copy(&ports_eth_addr[dst_port], ð_hdr->s_addr); + + send_single_packet(m, dst_port); + } else + /* Free the mbuf that contains non-IPV4/IPV6 packet */ + rte_pktmbuf_free(m); +} + +#if ((APP_LOOKUP_METHOD == APP_LOOKUP_LPM) && \ + (ENABLE_MULTI_BUFFER_OPTIMIZE == 1)) +#ifdef DO_RFC_1812_CHECKS + +#define IPV4_MIN_VER_IHL 0x45 +#define IPV4_MAX_VER_IHL 0x4f +#define IPV4_MAX_VER_IHL_DIFF (IPV4_MAX_VER_IHL - IPV4_MIN_VER_IHL) + +/* Minimum value of IPV4 total length (20B) in network byte order. */ +#define IPV4_MIN_LEN_BE (sizeof(struct ipv4_hdr) << 8) + +/* + * From http://www.rfc-editor.org/rfc/rfc1812.txt section 5.2.2: + * - The IP version number must be 4. + * - The IP header length field must be large enough to hold the + * minimum length legal IP datagram (20 bytes = 5 words). + * - The IP total length field must be large enough to hold the IP + * datagram header, whose length is specified in the IP header length + * field. + * If we encounter invalid IPV4 packet, then set destination port for it + * to BAD_PORT value. + */ +static inline __attribute__((always_inline)) void +rfc1812_process(struct ipv4_hdr *ipv4_hdr, uint32_t *dp, uint32_t ptype) +{ + uint8_t ihl; + + if (RTE_ETH_IS_IPV4_HDR(ptype)) { + ihl = ipv4_hdr->version_ihl - IPV4_MIN_VER_IHL; + + ipv4_hdr->time_to_live--; + ipv4_hdr->hdr_checksum++; + + if (ihl > IPV4_MAX_VER_IHL_DIFF || + ((uint8_t)ipv4_hdr->total_length == 0 && + ipv4_hdr->total_length < IPV4_MIN_LEN_BE)) { + dp[0] = BAD_PORT; + } + } +} + +#else +#define rfc1812_process(mb, dp) do { } while (0) +#endif /* DO_RFC_1812_CHECKS */ +#endif /* APP_LOOKUP_LPM && ENABLE_MULTI_BUFFER_OPTIMIZE */ + + +#if ((APP_LOOKUP_METHOD == APP_LOOKUP_LPM) && \ + (ENABLE_MULTI_BUFFER_OPTIMIZE == 1)) + +static inline __attribute__((always_inline)) uint16_t +get_dst_port(struct rte_mbuf *pkt, uint32_t dst_ipv4, uint8_t portid) +{ + uint32_t next_hop_ipv4; + uint8_t next_hop_ipv6; + struct ipv6_hdr *ipv6_hdr; + struct ether_hdr *eth_hdr; + + if (RTE_ETH_IS_IPV4_HDR(pkt->packet_type)) { + if (rte_lpm_lookup(RTE_PER_LCORE(lcore_conf)->ipv4_lookup_struct, + dst_ipv4, &next_hop_ipv4) != 0) { + next_hop_ipv4 = portid; + return next_hop_ipv4; + } + } else if (RTE_ETH_IS_IPV6_HDR(pkt->packet_type)) { + eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *); + ipv6_hdr = (struct ipv6_hdr *)(eth_hdr + 1); + if (rte_lpm6_lookup(RTE_PER_LCORE(lcore_conf)->ipv6_lookup_struct, + ipv6_hdr->dst_addr, &next_hop_ipv6) != 0) { + next_hop_ipv6 = portid; + return next_hop_ipv6; + } + } else { + next_hop_ipv4 = portid; + return next_hop_ipv4; + } + +} + +static inline void +process_packet(struct rte_mbuf *pkt, uint32_t *dst_port, uint8_t portid) +{ + struct ether_hdr *eth_hdr; + struct ipv4_hdr *ipv4_hdr; + uint32_t dst_ipv4; + uint16_t dp; + __m128i te, ve; + + eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *); + ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1); + + dst_ipv4 = ipv4_hdr->dst_addr; + dst_ipv4 = rte_be_to_cpu_32(dst_ipv4); + dp = get_dst_port(pkt, dst_ipv4, portid); + + te = _mm_load_si128((__m128i *)eth_hdr); + ve = val_eth[dp]; + + dst_port[0] = dp; + rfc1812_process(ipv4_hdr, dst_port, pkt->packet_type); + + te = _mm_blend_epi16(te, ve, MASK_ETH); + _mm_store_si128((__m128i *)eth_hdr, te); +} + +/* + * Read packet_type and destination IPV4 addresses from 4 mbufs. + */ +static inline void +processx4_step1(struct rte_mbuf *pkt[FWDSTEP], + __m128i *dip, + uint32_t *ipv4_flag) +{ + struct ipv4_hdr *ipv4_hdr; + struct ether_hdr *eth_hdr; + uint32_t x0, x1, x2, x3; + + eth_hdr = rte_pktmbuf_mtod(pkt[0], struct ether_hdr *); + ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1); + x0 = ipv4_hdr->dst_addr; + ipv4_flag[0] = pkt[0]->packet_type & RTE_PTYPE_L3_IPV4; + + eth_hdr = rte_pktmbuf_mtod(pkt[1], struct ether_hdr *); + ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1); + x1 = ipv4_hdr->dst_addr; + ipv4_flag[0] &= pkt[1]->packet_type; + + eth_hdr = rte_pktmbuf_mtod(pkt[2], struct ether_hdr *); + ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1); + x2 = ipv4_hdr->dst_addr; + ipv4_flag[0] &= pkt[2]->packet_type; + + eth_hdr = rte_pktmbuf_mtod(pkt[3], struct ether_hdr *); + ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1); + x3 = ipv4_hdr->dst_addr; + ipv4_flag[0] &= pkt[3]->packet_type; + + dip[0] = _mm_set_epi32(x3, x2, x1, x0); +} + +/* + * Lookup into LPM for destination port. + * If lookup fails, use incoming port (portid) as destination port. + */ +static inline void +processx4_step2(__m128i dip, + uint32_t ipv4_flag, + uint32_t portid, + struct rte_mbuf *pkt[FWDSTEP], + uint32_t dprt[FWDSTEP]) +{ + rte_xmm_t dst; + const __m128i bswap_mask = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, + 4, 5, 6, 7, 0, 1, 2, 3); + + /* Byte swap 4 IPV4 addresses. */ + dip = _mm_shuffle_epi8(dip, bswap_mask); + + /* if all 4 packets are IPV4. */ + if (likely(ipv4_flag)) { + rte_lpm_lookupx4(RTE_PER_LCORE(lcore_conf)->ipv4_lookup_struct, dip, + dprt, portid); + } else { + dst.x = dip; + dprt[0] = get_dst_port(pkt[0], dst.u32[0], portid); + dprt[1] = get_dst_port(pkt[1], dst.u32[1], portid); + dprt[2] = get_dst_port(pkt[2], dst.u32[2], portid); + dprt[3] = get_dst_port(pkt[3], dst.u32[3], portid); + } +} + +/* + * Update source and destination MAC addresses in the ethernet header. + * Perform RFC1812 checks and updates for IPV4 packets. + */ +static inline void +processx4_step3(struct rte_mbuf *pkt[FWDSTEP], uint32_t dst_port[FWDSTEP]) +{ + __m128i te[FWDSTEP]; + __m128i ve[FWDSTEP]; + __m128i *p[FWDSTEP]; + + p[0] = rte_pktmbuf_mtod(pkt[0], __m128i *); + p[1] = rte_pktmbuf_mtod(pkt[1], __m128i *); + p[2] = rte_pktmbuf_mtod(pkt[2], __m128i *); + p[3] = rte_pktmbuf_mtod(pkt[3], __m128i *); + + ve[0] = val_eth[dst_port[0]]; + te[0] = _mm_load_si128(p[0]); + + ve[1] = val_eth[dst_port[1]]; + te[1] = _mm_load_si128(p[1]); + + ve[2] = val_eth[dst_port[2]]; + te[2] = _mm_load_si128(p[2]); + + ve[3] = val_eth[dst_port[3]]; + te[3] = _mm_load_si128(p[3]); + + /* Update first 12 bytes, keep rest bytes intact. */ + te[0] = _mm_blend_epi16(te[0], ve[0], MASK_ETH); + te[1] = _mm_blend_epi16(te[1], ve[1], MASK_ETH); + te[2] = _mm_blend_epi16(te[2], ve[2], MASK_ETH); + te[3] = _mm_blend_epi16(te[3], ve[3], MASK_ETH); + + _mm_store_si128(p[0], te[0]); + _mm_store_si128(p[1], te[1]); + _mm_store_si128(p[2], te[2]); + _mm_store_si128(p[3], te[3]); + + rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[0] + 1), + &dst_port[0], pkt[0]->packet_type); + rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[1] + 1), + &dst_port[1], pkt[1]->packet_type); + rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[2] + 1), + &dst_port[2], pkt[2]->packet_type); + rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[3] + 1), + &dst_port[3], pkt[3]->packet_type); +} + +/* + * We group consecutive packets with the same destionation port into one burst. + * To avoid extra latency this is done together with some other packet + * processing, but after we made a final decision about packet's destination. + * To do this we maintain: + * pnum - array of number of consecutive packets with the same dest port for + * each packet in the input burst. + * lp - pointer to the last updated element in the pnum. + * dlp - dest port value lp corresponds to. + */ + +#define GRPSZ (1 << FWDSTEP) +#define GRPMSK (GRPSZ - 1) + +#define GROUP_PORT_STEP(dlp, dcp, lp, pn, idx) do { \ + if (likely((dlp) == (dcp)[(idx)])) { \ + (lp)[0]++; \ + } else { \ + (dlp) = (dcp)[idx]; \ + (lp) = (pn) + (idx); \ + (lp)[0] = 1; \ + } \ +} while (0) + +/* + * Group consecutive packets with the same destination port in bursts of 4. + * Suppose we have array of destionation ports: + * dst_port[] = {a, b, c, d,, e, ... } + * dp1 should contain: <a, b, c, d>, dp2: <b, c, d, e>. + * We doing 4 comparisions at once and the result is 4 bit mask. + * This mask is used as an index into prebuild array of pnum values. + */ +static inline uint16_t * +port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp, __m128i dp1, __m128i dp2) +{ + static const struct { + uint64_t pnum; /* prebuild 4 values for pnum[]. */ + int32_t idx; /* index for new last updated elemnet. */ + uint16_t lpv; /* add value to the last updated element. */ + } gptbl[GRPSZ] = { + { + /* 0: a != b, b != c, c != d, d != e */ + .pnum = UINT64_C(0x0001000100010001), + .idx = 4, + .lpv = 0, + }, + { + /* 1: a == b, b != c, c != d, d != e */ + .pnum = UINT64_C(0x0001000100010002), + .idx = 4, + .lpv = 1, + }, + { + /* 2: a != b, b == c, c != d, d != e */ + .pnum = UINT64_C(0x0001000100020001), + .idx = 4, + .lpv = 0, + }, + { + /* 3: a == b, b == c, c != d, d != e */ + .pnum = UINT64_C(0x0001000100020003), + .idx = 4, + .lpv = 2, + }, + { + /* 4: a != b, b != c, c == d, d != e */ + .pnum = UINT64_C(0x0001000200010001), + .idx = 4, + .lpv = 0, + }, + { + /* 5: a == b, b != c, c == d, d != e */ + .pnum = UINT64_C(0x0001000200010002), + .idx = 4, + .lpv = 1, + }, + { + /* 6: a != b, b == c, c == d, d != e */ + .pnum = UINT64_C(0x0001000200030001), + .idx = 4, + .lpv = 0, + }, + { + /* 7: a == b, b == c, c == d, d != e */ + .pnum = UINT64_C(0x0001000200030004), + .idx = 4, + .lpv = 3, + }, + { + /* 8: a != b, b != c, c != d, d == e */ + .pnum = UINT64_C(0x0002000100010001), + .idx = 3, + .lpv = 0, + }, + { + /* 9: a == b, b != c, c != d, d == e */ + .pnum = UINT64_C(0x0002000100010002), + .idx = 3, + .lpv = 1, + }, + { + /* 0xa: a != b, b == c, c != d, d == e */ + .pnum = UINT64_C(0x0002000100020001), + .idx = 3, + .lpv = 0, + }, + { + /* 0xb: a == b, b == c, c != d, d == e */ + .pnum = UINT64_C(0x0002000100020003), + .idx = 3, + .lpv = 2, + }, + { + /* 0xc: a != b, b != c, c == d, d == e */ + .pnum = UINT64_C(0x0002000300010001), + .idx = 2, + .lpv = 0, + }, + { + /* 0xd: a == b, b != c, c == d, d == e */ + .pnum = UINT64_C(0x0002000300010002), + .idx = 2, + .lpv = 1, + }, + { + /* 0xe: a != b, b == c, c == d, d == e */ + .pnum = UINT64_C(0x0002000300040001), + .idx = 1, + .lpv = 0, + }, + { + /* 0xf: a == b, b == c, c == d, d == e */ + .pnum = UINT64_C(0x0002000300040005), + .idx = 0, + .lpv = 4, + }, + }; + + union { + uint16_t u16[FWDSTEP + 1]; + uint64_t u64; + } *pnum = (void *)pn; + + int32_t v; + + dp1 = _mm_cmpeq_epi16(dp1, dp2); + dp1 = _mm_unpacklo_epi16(dp1, dp1); + v = _mm_movemask_ps((__m128)dp1); + + /* update last port counter. */ + lp[0] += gptbl[v].lpv; + + /* if dest port value has changed. */ + if (v != GRPMSK) { + lp = pnum->u16 + gptbl[v].idx; + lp[0] = 1; + pnum->u64 = gptbl[v].pnum; + } + + return lp; +} + +#endif /* APP_LOOKUP_METHOD */ + +static void +process_burst(struct rte_mbuf *pkts_burst[MAX_PKT_BURST], int nb_rx, + uint8_t portid) { + + int j; + +#if ((APP_LOOKUP_METHOD == APP_LOOKUP_LPM) && \ + (ENABLE_MULTI_BUFFER_OPTIMIZE == 1)) + int32_t k; + uint16_t dlp; + uint16_t *lp; + uint32_t dst_port[MAX_PKT_BURST]; + __m128i dip[MAX_PKT_BURST / FWDSTEP]; + uint32_t ipv4_flag[MAX_PKT_BURST / FWDSTEP]; + uint16_t pnum[MAX_PKT_BURST + 1]; +#endif + + +#if (ENABLE_MULTI_BUFFER_OPTIMIZE == 1) +#if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) + { + /* + * Send nb_rx - nb_rx%8 packets + * in groups of 8. + */ + int32_t n = RTE_ALIGN_FLOOR(nb_rx, 8); + + for (j = 0; j < n; j += 8) { + uint32_t pkt_type = + pkts_burst[j]->packet_type & + pkts_burst[j+1]->packet_type & + pkts_burst[j+2]->packet_type & + pkts_burst[j+3]->packet_type & + pkts_burst[j+4]->packet_type & + pkts_burst[j+5]->packet_type & + pkts_burst[j+6]->packet_type & + pkts_burst[j+7]->packet_type; + if (pkt_type & RTE_PTYPE_L3_IPV4) { + simple_ipv4_fwd_8pkts(&pkts_burst[j], portid); + } else if (pkt_type & + RTE_PTYPE_L3_IPV6) { + simple_ipv6_fwd_8pkts(&pkts_burst[j], portid); + } else { + l3fwd_simple_forward(pkts_burst[j], portid); + l3fwd_simple_forward(pkts_burst[j+1], portid); + l3fwd_simple_forward(pkts_burst[j+2], portid); + l3fwd_simple_forward(pkts_burst[j+3], portid); + l3fwd_simple_forward(pkts_burst[j+4], portid); + l3fwd_simple_forward(pkts_burst[j+5], portid); + l3fwd_simple_forward(pkts_burst[j+6], portid); + l3fwd_simple_forward(pkts_burst[j+7], portid); + } + } + for (; j < nb_rx ; j++) + l3fwd_simple_forward(pkts_burst[j], portid); + } +#elif (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) + + k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP); + for (j = 0; j != k; j += FWDSTEP) + processx4_step1(&pkts_burst[j], &dip[j / FWDSTEP], + &ipv4_flag[j / FWDSTEP]); + + k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP); + for (j = 0; j != k; j += FWDSTEP) + processx4_step2(dip[j / FWDSTEP], ipv4_flag[j / FWDSTEP], + portid, &pkts_burst[j], &dst_port[j]); + + /* + * Finish packet processing and group consecutive + * packets with the same destination port. + */ + k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP); + if (k != 0) { + __m128i dp1, dp2; + + lp = pnum; + lp[0] = 1; + + processx4_step3(pkts_burst, dst_port); + + /* dp1: <d[0], d[1], d[2], d[3], ... > */ + dp1 = _mm_loadu_si128((__m128i *)dst_port); + + for (j = FWDSTEP; j != k; j += FWDSTEP) { + processx4_step3(&pkts_burst[j], &dst_port[j]); + + /* + * dp2: + * <d[j-3], d[j-2], d[j-1], d[j], ... > + */ + dp2 = _mm_loadu_si128( + (__m128i *)&dst_port[j - FWDSTEP + 1]); + lp = port_groupx4(&pnum[j - FWDSTEP], lp, dp1, dp2); + + /* + * dp1: + * <d[j], d[j+1], d[j+2], d[j+3], ... > + */ + dp1 = _mm_srli_si128(dp2, (FWDSTEP - 1) * + sizeof(dst_port[0])); + } + + /* + * dp2: <d[j-3], d[j-2], d[j-1], d[j-1], ... > + */ + dp2 = _mm_shufflelo_epi16(dp1, 0xf9); + lp = port_groupx4(&pnum[j - FWDSTEP], lp, dp1, dp2); + + /* + * remove values added by the last repeated + * dst port. + */ + lp[0]--; + dlp = dst_port[j - 1]; + } else { + /* set dlp and lp to the never used values. */ + dlp = BAD_PORT - 1; + lp = pnum + MAX_PKT_BURST; + } + + /* Process up to last 3 packets one by one. */ + switch (nb_rx % FWDSTEP) { + case 3: + process_packet(pkts_burst[j], dst_port + j, portid); + GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j); + j++; + case 2: + process_packet(pkts_burst[j], dst_port + j, portid); + GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j); + j++; + case 1: + process_packet(pkts_burst[j], dst_port + j, portid); + GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j); + j++; + } + + /* + * Send packets out, through destination port. + * Consecuteve pacekts with the same destination port + * are already grouped together. + * If destination port for the packet equals BAD_PORT, + * then free the packet without sending it out. + */ + for (j = 0; j < nb_rx; j += k) { + + int32_t m; + uint16_t pn; + + pn = dst_port[j]; + k = pnum[j]; + + if (likely(pn != BAD_PORT)) + send_packetsx4(pn, pkts_burst + j, k); + else + for (m = j; m != j + k; m++) + rte_pktmbuf_free(pkts_burst[m]); + + } + +#endif /* APP_LOOKUP_METHOD */ +#else /* ENABLE_MULTI_BUFFER_OPTIMIZE == 0 */ + + /* Prefetch first packets */ + for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) + rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[j], void *)); + + /* Prefetch and forward already prefetched packets */ + for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) { + rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[ + j + PREFETCH_OFFSET], void *)); + l3fwd_simple_forward(pkts_burst[j], portid); + } + + /* Forward remaining prefetched packets */ + for (; j < nb_rx; j++) + l3fwd_simple_forward(pkts_burst[j], portid); + +#endif /* ENABLE_MULTI_BUFFER_OPTIMIZE */ + +} + +#if (APP_CPU_LOAD > 0) + +/* + * CPU-load stats collector + */ +static int +cpu_load_collector(__rte_unused void *arg) { + unsigned i, j, k; + uint64_t hits; + uint64_t prev_tsc, diff_tsc, cur_tsc; + uint64_t total[MAX_CPU] = { 0 }; + unsigned min_cpu = MAX_CPU; + unsigned max_cpu = 0; + unsigned cpu_id; + int busy_total = 0; + int busy_flag = 0; + + unsigned int n_thread_per_cpu[MAX_CPU] = { 0 }; + struct thread_conf *thread_per_cpu[MAX_CPU][MAX_THREAD]; + + struct thread_conf *thread_conf; + + const uint64_t interval_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / + US_PER_S * CPU_LOAD_TIMEOUT_US; + + prev_tsc = 0; + /* + * Wait for all threads + */ + + printf("Waiting for %d rx threads and %d tx threads\n", n_rx_thread, + n_tx_thread); + + while (rte_atomic16_read(&rx_counter) < n_rx_thread) + rte_pause(); + + while (rte_atomic16_read(&tx_counter) < n_tx_thread) + rte_pause(); + + for (i = 0; i < n_rx_thread; i++) { + + thread_conf = &rx_thread[i].conf; + cpu_id = thread_conf->cpu_id; + thread_per_cpu[cpu_id][n_thread_per_cpu[cpu_id]++] = thread_conf; + + if (cpu_id > max_cpu) + max_cpu = cpu_id; + if (cpu_id < min_cpu) + min_cpu = cpu_id; + } + for (i = 0; i < n_tx_thread; i++) { + + thread_conf = &tx_thread[i].conf; + cpu_id = thread_conf->cpu_id; + thread_per_cpu[cpu_id][n_thread_per_cpu[cpu_id]++] = thread_conf; + + if (thread_conf->cpu_id > max_cpu) + max_cpu = thread_conf->cpu_id; + if (thread_conf->cpu_id < min_cpu) + min_cpu = thread_conf->cpu_id; + } + + while (1) { + + cpu_load.counter++; + for (i = min_cpu; i <= max_cpu; i++) { + for (j = 0; j < MAX_CPU_COUNTER; j++) { + for (k = 0; k < n_thread_per_cpu[i]; k++) + if (thread_per_cpu[i][k]->busy[j]) { + busy_flag = 1; + break; + } + if (busy_flag) { + cpu_load.hits[j][i]++; + busy_total = 1; + busy_flag = 0; + } + } + + if (busy_total) { + total[i]++; + busy_total = 0; + } + } + + cur_tsc = rte_rdtsc(); + + diff_tsc = cur_tsc - prev_tsc; + if (unlikely(diff_tsc > interval_tsc)) { + + printf("\033c"); + + printf("Cpu usage for %d rx threads and %d tx threads:\n\n", + n_rx_thread, n_tx_thread); + + printf("cpu# proc%% poll%% overhead%%\n\n"); + + for (i = min_cpu; i <= max_cpu; i++) { + hits = 0; + printf("CPU %d:", i); + for (j = 0; j < MAX_CPU_COUNTER; j++) { + printf("%7" PRIu64 "", + cpu_load.hits[j][i] * 100 / cpu_load.counter); + hits += cpu_load.hits[j][i]; + cpu_load.hits[j][i] = 0; + } + printf("%7" PRIu64 "\n", + 100 - total[i] * 100 / cpu_load.counter); + total[i] = 0; + } + cpu_load.counter = 0; + + prev_tsc = cur_tsc; + } + + } +} +#endif /* APP_CPU_LOAD */ + +/* + * Null processing lthread loop + * + * This loop is used to start empty scheduler on lcore. + */ +static void +lthread_null(__rte_unused void *args) +{ + int lcore_id = rte_lcore_id(); + + RTE_LOG(INFO, L3FWD, "Starting scheduler on lcore %d.\n", lcore_id); + lthread_exit(NULL); +} + +/* main processing loop */ +static void +lthread_tx_per_ring(void *dummy) +{ + int nb_rx; + uint8_t portid; + struct rte_ring *ring; + struct thread_tx_conf *tx_conf; + struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; + struct lthread_cond *ready; + + tx_conf = (struct thread_tx_conf *)dummy; + ring = tx_conf->ring; + ready = *tx_conf->ready; + + lthread_set_data((void *)tx_conf); + + /* + * Move this lthread to lcore + */ + lthread_set_affinity(tx_conf->conf.lcore_id); + + RTE_LOG(INFO, L3FWD, "entering main tx loop on lcore %u\n", rte_lcore_id()); + + nb_rx = 0; + rte_atomic16_inc(&tx_counter); + while (1) { + + /* + * Read packet from ring + */ + SET_CPU_BUSY(tx_conf, CPU_POLL); + nb_rx = rte_ring_sc_dequeue_burst(ring, (void **)pkts_burst, + MAX_PKT_BURST); + SET_CPU_IDLE(tx_conf, CPU_POLL); + + if (nb_rx > 0) { + SET_CPU_BUSY(tx_conf, CPU_PROCESS); + portid = pkts_burst[0]->port; + process_burst(pkts_burst, nb_rx, portid); + SET_CPU_IDLE(tx_conf, CPU_PROCESS); + lthread_yield(); + } else + lthread_cond_wait(ready, 0); + + } +} + +/* + * Main tx-lthreads spawner lthread. + * + * This lthread is used to spawn one new lthread per ring from producers. + * + */ +static void +lthread_tx(void *args) +{ + struct lthread *lt; + + unsigned lcore_id; + uint8_t portid; + struct thread_tx_conf *tx_conf; + + tx_conf = (struct thread_tx_conf *)args; + lthread_set_data((void *)tx_conf); + + /* + * Move this lthread to the selected lcore + */ + lthread_set_affinity(tx_conf->conf.lcore_id); + + /* + * Spawn tx readers (one per input ring) + */ + lthread_create(<, tx_conf->conf.lcore_id, lthread_tx_per_ring, + (void *)tx_conf); + + lcore_id = rte_lcore_id(); + + RTE_LOG(INFO, L3FWD, "Entering Tx main loop on lcore %u\n", lcore_id); + + tx_conf->conf.cpu_id = sched_getcpu(); + while (1) { + + lthread_sleep(BURST_TX_DRAIN_US * 1000); + + /* + * TX burst queue drain + */ + for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) { + if (tx_conf->tx_mbufs[portid].len == 0) + continue; + SET_CPU_BUSY(tx_conf, CPU_PROCESS); + send_burst(tx_conf, tx_conf->tx_mbufs[portid].len, portid); + SET_CPU_IDLE(tx_conf, CPU_PROCESS); + tx_conf->tx_mbufs[portid].len = 0; + } + + } +} + +static void +lthread_rx(void *dummy) +{ + int ret; + uint16_t nb_rx; + int i; + uint8_t portid, queueid; + int worker_id; + int len[RTE_MAX_LCORE] = { 0 }; + int old_len, new_len; + struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; + struct thread_rx_conf *rx_conf; + + rx_conf = (struct thread_rx_conf *)dummy; + lthread_set_data((void *)rx_conf); + + /* + * Move this lthread to lcore + */ + lthread_set_affinity(rx_conf->conf.lcore_id); + + if (rx_conf->n_rx_queue == 0) { + RTE_LOG(INFO, L3FWD, "lcore %u has nothing to do\n", rte_lcore_id()); + return; + } + + RTE_LOG(INFO, L3FWD, "Entering main Rx loop on lcore %u\n", rte_lcore_id()); + + for (i = 0; i < rx_conf->n_rx_queue; i++) { + + portid = rx_conf->rx_queue_list[i].port_id; + queueid = rx_conf->rx_queue_list[i].queue_id; + RTE_LOG(INFO, L3FWD, " -- lcoreid=%u portid=%hhu rxqueueid=%hhu\n", + rte_lcore_id(), portid, queueid); + } + + /* + * Init all condition variables (one per rx thread) + */ + for (i = 0; i < rx_conf->n_rx_queue; i++) + lthread_cond_init(NULL, &rx_conf->ready[i], NULL); + + worker_id = 0; + + rx_conf->conf.cpu_id = sched_getcpu(); + rte_atomic16_inc(&rx_counter); + while (1) { + + /* + * Read packet from RX queues + */ + for (i = 0; i < rx_conf->n_rx_queue; ++i) { + portid = rx_conf->rx_queue_list[i].port_id; + queueid = rx_conf->rx_queue_list[i].queue_id; + + SET_CPU_BUSY(rx_conf, CPU_POLL); + nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst, + MAX_PKT_BURST); + SET_CPU_IDLE(rx_conf, CPU_POLL); + + if (nb_rx != 0) { + worker_id = (worker_id + 1) % rx_conf->n_ring; + old_len = len[worker_id]; + + SET_CPU_BUSY(rx_conf, CPU_PROCESS); + ret = rte_ring_sp_enqueue_burst( + rx_conf->ring[worker_id], + (void **) pkts_burst, + nb_rx); + + new_len = old_len + ret; + + if (new_len >= BURST_SIZE) { + lthread_cond_signal(rx_conf->ready[worker_id]); + new_len = 0; + } + + len[worker_id] = new_len; + + if (unlikely(ret < nb_rx)) { + uint32_t k; + + for (k = ret; k < nb_rx; k++) { + struct rte_mbuf *m = pkts_burst[k]; + + rte_pktmbuf_free(m); + } + } + SET_CPU_IDLE(rx_conf, CPU_PROCESS); + } + + lthread_yield(); + } + } +} + +/* + * Start scheduler with initial lthread on lcore + * + * This lthread loop spawns all rx and tx lthreads on master lcore + */ + +static void +lthread_spawner(__rte_unused void *arg) { + struct lthread *lt[MAX_THREAD]; + int i; + int n_thread = 0; + + printf("Entering lthread_spawner\n"); + + /* + * Create producers (rx threads) on default lcore + */ + for (i = 0; i < n_rx_thread; i++) { + rx_thread[i].conf.thread_id = i; + lthread_create(<[n_thread], -1, lthread_rx, + (void *)&rx_thread[i]); + n_thread++; + } + + /* + * Wait for all producers. Until some producers can be started on the same + * scheduler as this lthread, yielding is required to let them to run and + * prevent deadlock here. + */ + while (rte_atomic16_read(&rx_counter) < n_rx_thread) + lthread_sleep(100000); + + /* + * Create consumers (tx threads) on default lcore_id + */ + for (i = 0; i < n_tx_thread; i++) { + tx_thread[i].conf.thread_id = i; + lthread_create(<[n_thread], -1, lthread_tx, + (void *)&tx_thread[i]); + n_thread++; + } + + /* + * Wait for all threads finished + */ + for (i = 0; i < n_thread; i++) + lthread_join(lt[i], NULL); + +} + +/* + * Start master scheduler with initial lthread spawning rx and tx lthreads + * (main_lthread_master). + */ +static int +lthread_master_spawner(__rte_unused void *arg) { + struct lthread *lt; + int lcore_id = rte_lcore_id(); + + RTE_PER_LCORE(lcore_conf) = &lcore_conf[lcore_id]; + lthread_create(<, -1, lthread_spawner, NULL); + lthread_run(); + + return 0; +} + +/* + * Start scheduler on lcore. + */ +static int +sched_spawner(__rte_unused void *arg) { + struct lthread *lt; + int lcore_id = rte_lcore_id(); + +#if (APP_CPU_LOAD) + if (lcore_id == cpu_load_lcore_id) { + cpu_load_collector(arg); + return 0; + } +#endif /* APP_CPU_LOAD */ + + RTE_PER_LCORE(lcore_conf) = &lcore_conf[lcore_id]; + lthread_create(<, -1, lthread_null, NULL); + lthread_run(); + + return 0; +} + +/* main processing loop */ +static int +pthread_tx(void *dummy) +{ + struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; + uint64_t prev_tsc, diff_tsc, cur_tsc; + int nb_rx; + uint8_t portid; + struct thread_tx_conf *tx_conf; + + const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / + US_PER_S * BURST_TX_DRAIN_US; + + prev_tsc = 0; + + tx_conf = (struct thread_tx_conf *)dummy; + + RTE_LOG(INFO, L3FWD, "Entering main Tx loop on lcore %u\n", rte_lcore_id()); + + tx_conf->conf.cpu_id = sched_getcpu(); + rte_atomic16_inc(&tx_counter); + while (1) { + + cur_tsc = rte_rdtsc(); + + /* + * TX burst queue drain + */ + diff_tsc = cur_tsc - prev_tsc; + if (unlikely(diff_tsc > drain_tsc)) { + + /* + * This could be optimized (use queueid instead of + * portid), but it is not called so often + */ + SET_CPU_BUSY(tx_conf, CPU_PROCESS); + for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) { + if (tx_conf->tx_mbufs[portid].len == 0) + continue; + send_burst(tx_conf, tx_conf->tx_mbufs[portid].len, portid); + tx_conf->tx_mbufs[portid].len = 0; + } + SET_CPU_IDLE(tx_conf, CPU_PROCESS); + + prev_tsc = cur_tsc; + } + + /* + * Read packet from ring + */ + SET_CPU_BUSY(tx_conf, CPU_POLL); + nb_rx = rte_ring_sc_dequeue_burst(tx_conf->ring, + (void **)pkts_burst, MAX_PKT_BURST); + SET_CPU_IDLE(tx_conf, CPU_POLL); + + if (unlikely(nb_rx == 0)) { + sched_yield(); + continue; + } + + SET_CPU_BUSY(tx_conf, CPU_PROCESS); + portid = pkts_burst[0]->port; + process_burst(pkts_burst, nb_rx, portid); + SET_CPU_IDLE(tx_conf, CPU_PROCESS); + + } +} + +static int +pthread_rx(void *dummy) +{ + int i; + int worker_id; + uint32_t n; + uint32_t nb_rx; + unsigned lcore_id; + uint8_t portid, queueid; + struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; + + struct thread_rx_conf *rx_conf; + + lcore_id = rte_lcore_id(); + rx_conf = (struct thread_rx_conf *)dummy; + + if (rx_conf->n_rx_queue == 0) { + RTE_LOG(INFO, L3FWD, "lcore %u has nothing to do\n", lcore_id); + return 0; + } + + RTE_LOG(INFO, L3FWD, "entering main rx loop on lcore %u\n", lcore_id); + + for (i = 0; i < rx_conf->n_rx_queue; i++) { + + portid = rx_conf->rx_queue_list[i].port_id; + queueid = rx_conf->rx_queue_list[i].queue_id; + RTE_LOG(INFO, L3FWD, " -- lcoreid=%u portid=%hhu rxqueueid=%hhu\n", + lcore_id, portid, queueid); + } + + worker_id = 0; + rx_conf->conf.cpu_id = sched_getcpu(); + rte_atomic16_inc(&rx_counter); + while (1) { + + /* + * Read packet from RX queues + */ + for (i = 0; i < rx_conf->n_rx_queue; ++i) { + portid = rx_conf->rx_queue_list[i].port_id; + queueid = rx_conf->rx_queue_list[i].queue_id; + + SET_CPU_BUSY(rx_conf, CPU_POLL); + nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst, + MAX_PKT_BURST); + SET_CPU_IDLE(rx_conf, CPU_POLL); + + if (nb_rx == 0) { + sched_yield(); + continue; + } + + SET_CPU_BUSY(rx_conf, CPU_PROCESS); + worker_id = (worker_id + 1) % rx_conf->n_ring; + n = rte_ring_sp_enqueue_burst(rx_conf->ring[worker_id], + (void **)pkts_burst, nb_rx); + + if (unlikely(n != nb_rx)) { + uint32_t k; + + for (k = n; k < nb_rx; k++) { + struct rte_mbuf *m = pkts_burst[k]; + + rte_pktmbuf_free(m); + } + } + + SET_CPU_IDLE(rx_conf, CPU_PROCESS); + + } + } +} + +/* + * P-Thread spawner. + */ +static int +pthread_run(__rte_unused void *arg) { + int lcore_id = rte_lcore_id(); + int i; + + for (i = 0; i < n_rx_thread; i++) + if (rx_thread[i].conf.lcore_id == lcore_id) { + printf("Start rx thread on %d...\n", lcore_id); + RTE_PER_LCORE(lcore_conf) = &lcore_conf[lcore_id]; + RTE_PER_LCORE(lcore_conf)->data = (void *)&rx_thread[i]; + pthread_rx((void *)&rx_thread[i]); + return 0; + } + + for (i = 0; i < n_tx_thread; i++) + if (tx_thread[i].conf.lcore_id == lcore_id) { + printf("Start tx thread on %d...\n", lcore_id); + RTE_PER_LCORE(lcore_conf) = &lcore_conf[lcore_id]; + RTE_PER_LCORE(lcore_conf)->data = (void *)&tx_thread[i]; + pthread_tx((void *)&tx_thread[i]); + return 0; + } + +#if (APP_CPU_LOAD) + if (lcore_id == cpu_load_lcore_id) + cpu_load_collector(arg); +#endif /* APP_CPU_LOAD */ + + return 0; +} + +static int +check_lcore_params(void) +{ + uint8_t queue, lcore; + uint16_t i; + int socketid; + + for (i = 0; i < nb_rx_thread_params; ++i) { + queue = rx_thread_params[i].queue_id; + if (queue >= MAX_RX_QUEUE_PER_PORT) { + printf("invalid queue number: %hhu\n", queue); + return -1; + } + lcore = rx_thread_params[i].lcore_id; + if (!rte_lcore_is_enabled(lcore)) { + printf("error: lcore %hhu is not enabled in lcore mask\n", lcore); + return -1; + } + socketid = rte_lcore_to_socket_id(lcore); + if ((socketid != 0) && (numa_on == 0)) + printf("warning: lcore %hhu is on socket %d with numa off\n", + lcore, socketid); + } + return 0; +} + +static int +check_port_config(const unsigned nb_ports) +{ + unsigned portid; + uint16_t i; + + for (i = 0; i < nb_rx_thread_params; ++i) { + portid = rx_thread_params[i].port_id; + if ((enabled_port_mask & (1 << portid)) == 0) { + printf("port %u is not enabled in port mask\n", portid); + return -1; + } + if (portid >= nb_ports) { + printf("port %u is not present on the board\n", portid); + return -1; + } + } + return 0; +} + +static uint8_t +get_port_n_rx_queues(const uint8_t port) +{ + int queue = -1; + uint16_t i; + + for (i = 0; i < nb_rx_thread_params; ++i) + if (rx_thread_params[i].port_id == port && + rx_thread_params[i].queue_id > queue) + queue = rx_thread_params[i].queue_id; + + return (uint8_t)(++queue); +} + +static int +init_rx_rings(void) +{ + unsigned socket_io; + struct thread_rx_conf *rx_conf; + struct thread_tx_conf *tx_conf; + unsigned rx_thread_id, tx_thread_id; + char name[256]; + struct rte_ring *ring = NULL; + + for (tx_thread_id = 0; tx_thread_id < n_tx_thread; tx_thread_id++) { + + tx_conf = &tx_thread[tx_thread_id]; + + printf("Connecting tx-thread %d with rx-thread %d\n", tx_thread_id, + tx_conf->conf.thread_id); + + rx_thread_id = tx_conf->conf.thread_id; + if (rx_thread_id > n_tx_thread) { + printf("connection from tx-thread %u to rx-thread %u fails " + "(rx-thread not defined)\n", tx_thread_id, rx_thread_id); + return -1; + } + + rx_conf = &rx_thread[rx_thread_id]; + socket_io = rte_lcore_to_socket_id(rx_conf->conf.lcore_id); + + snprintf(name, sizeof(name), "app_ring_s%u_rx%u_tx%u", + socket_io, rx_thread_id, tx_thread_id); + + ring = rte_ring_create(name, 1024 * 4, socket_io, + RING_F_SP_ENQ | RING_F_SC_DEQ); + + if (ring == NULL) { + rte_panic("Cannot create ring to connect rx-thread %u " + "with tx-thread %u\n", rx_thread_id, tx_thread_id); + } + + rx_conf->ring[rx_conf->n_ring] = ring; + + tx_conf->ring = ring; + tx_conf->ready = &rx_conf->ready[rx_conf->n_ring]; + + rx_conf->n_ring++; + } + return 0; +} + +static int +init_rx_queues(void) +{ + uint16_t i, nb_rx_queue; + uint8_t thread; + + n_rx_thread = 0; + + for (i = 0; i < nb_rx_thread_params; ++i) { + thread = rx_thread_params[i].thread_id; + nb_rx_queue = rx_thread[thread].n_rx_queue; + + if (nb_rx_queue >= MAX_RX_QUEUE_PER_LCORE) { + printf("error: too many queues (%u) for thread: %u\n", + (unsigned)nb_rx_queue + 1, (unsigned)thread); + return -1; + } + + rx_thread[thread].conf.thread_id = thread; + rx_thread[thread].conf.lcore_id = rx_thread_params[i].lcore_id; + rx_thread[thread].rx_queue_list[nb_rx_queue].port_id = + rx_thread_params[i].port_id; + rx_thread[thread].rx_queue_list[nb_rx_queue].queue_id = + rx_thread_params[i].queue_id; + rx_thread[thread].n_rx_queue++; + + if (thread >= n_rx_thread) + n_rx_thread = thread + 1; + + } + return 0; +} + +static int +init_tx_threads(void) +{ + int i; + + n_tx_thread = 0; + for (i = 0; i < nb_tx_thread_params; ++i) { + tx_thread[n_tx_thread].conf.thread_id = tx_thread_params[i].thread_id; + tx_thread[n_tx_thread].conf.lcore_id = tx_thread_params[i].lcore_id; + n_tx_thread++; + } + return 0; +} + +/* display usage */ +static void +print_usage(const char *prgname) +{ + printf("%s [EAL options] -- -p PORTMASK -P" + " [--rx (port,queue,lcore,thread)[,(port,queue,lcore,thread]]" + " [--tx (lcore,thread)[,(lcore,thread]]" + " [--enable-jumbo [--max-pkt-len PKTLEN]]\n" + " -p PORTMASK: hexadecimal bitmask of ports to configure\n" + " -P : enable promiscuous mode\n" + " --rx (port,queue,lcore,thread): rx queues configuration\n" + " --tx (lcore,thread): tx threads configuration\n" + " --stat-lcore LCORE: use lcore for stat collector\n" + " --eth-dest=X,MM:MM:MM:MM:MM:MM: optional, ethernet destination for port X\n" + " --no-numa: optional, disable numa awareness\n" + " --ipv6: optional, specify it if running ipv6 packets\n" + " --enable-jumbo: enable jumbo frame" + " which max packet len is PKTLEN in decimal (64-9600)\n" + " --hash-entry-num: specify the hash entry number in hexadecimal to be setup\n" + " --no-lthreads: turn off lthread model\n", + prgname); +} + +static int parse_max_pkt_len(const char *pktlen) +{ + char *end = NULL; + unsigned long len; + + /* parse decimal string */ + len = strtoul(pktlen, &end, 10); + if ((pktlen[0] == '\0') || (end == NULL) || (*end != '\0')) + return -1; + + if (len == 0) + return -1; + + return len; +} + +static int +parse_portmask(const char *portmask) +{ + char *end = NULL; + unsigned long pm; + + /* parse hexadecimal string */ + pm = strtoul(portmask, &end, 16); + if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0')) + return -1; + + if (pm == 0) + return -1; + + return pm; +} + +#if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) +static int +parse_hash_entry_number(const char *hash_entry_num) +{ + char *end = NULL; + unsigned long hash_en; + + /* parse hexadecimal string */ + hash_en = strtoul(hash_entry_num, &end, 16); + if ((hash_entry_num[0] == '\0') || (end == NULL) || (*end != '\0')) + return -1; + + if (hash_en == 0) + return -1; + + return hash_en; +} +#endif + +static int +parse_rx_config(const char *q_arg) +{ + char s[256]; + const char *p, *p0 = q_arg; + char *end; + enum fieldnames { + FLD_PORT = 0, + FLD_QUEUE, + FLD_LCORE, + FLD_THREAD, + _NUM_FLD + }; + unsigned long int_fld[_NUM_FLD]; + char *str_fld[_NUM_FLD]; + int i; + unsigned size; + + nb_rx_thread_params = 0; + + while ((p = strchr(p0, '(')) != NULL) { + ++p; + p0 = strchr(p, ')'); + if (p0 == NULL) + return -1; + + size = p0 - p; + if (size >= sizeof(s)) + return -1; + + snprintf(s, sizeof(s), "%.*s", size, p); + if (rte_strsplit(s, sizeof(s), str_fld, _NUM_FLD, ',') != _NUM_FLD) + return -1; + for (i = 0; i < _NUM_FLD; i++) { + errno = 0; + int_fld[i] = strtoul(str_fld[i], &end, 0); + if (errno != 0 || end == str_fld[i] || int_fld[i] > 255) + return -1; + } + if (nb_rx_thread_params >= MAX_LCORE_PARAMS) { + printf("exceeded max number of rx params: %hu\n", + nb_rx_thread_params); + return -1; + } + rx_thread_params_array[nb_rx_thread_params].port_id = + (uint8_t)int_fld[FLD_PORT]; + rx_thread_params_array[nb_rx_thread_params].queue_id = + (uint8_t)int_fld[FLD_QUEUE]; + rx_thread_params_array[nb_rx_thread_params].lcore_id = + (uint8_t)int_fld[FLD_LCORE]; + rx_thread_params_array[nb_rx_thread_params].thread_id = + (uint8_t)int_fld[FLD_THREAD]; + ++nb_rx_thread_params; + } + rx_thread_params = rx_thread_params_array; + return 0; +} + +static int +parse_tx_config(const char *q_arg) +{ + char s[256]; + const char *p, *p0 = q_arg; + char *end; + enum fieldnames { + FLD_LCORE = 0, + FLD_THREAD, + _NUM_FLD + }; + unsigned long int_fld[_NUM_FLD]; + char *str_fld[_NUM_FLD]; + int i; + unsigned size; + + nb_tx_thread_params = 0; + + while ((p = strchr(p0, '(')) != NULL) { + ++p; + p0 = strchr(p, ')'); + if (p0 == NULL) + return -1; + + size = p0 - p; + if (size >= sizeof(s)) + return -1; + + snprintf(s, sizeof(s), "%.*s", size, p); + if (rte_strsplit(s, sizeof(s), str_fld, _NUM_FLD, ',') != _NUM_FLD) + return -1; + for (i = 0; i < _NUM_FLD; i++) { + errno = 0; + int_fld[i] = strtoul(str_fld[i], &end, 0); + if (errno != 0 || end == str_fld[i] || int_fld[i] > 255) + return -1; + } + if (nb_tx_thread_params >= MAX_LCORE_PARAMS) { + printf("exceeded max number of tx params: %hu\n", + nb_tx_thread_params); + return -1; + } + tx_thread_params_array[nb_tx_thread_params].lcore_id = + (uint8_t)int_fld[FLD_LCORE]; + tx_thread_params_array[nb_tx_thread_params].thread_id = + (uint8_t)int_fld[FLD_THREAD]; + ++nb_tx_thread_params; + } + tx_thread_params = tx_thread_params_array; + + return 0; +} + +#if (APP_CPU_LOAD > 0) +static int +parse_stat_lcore(const char *stat_lcore) +{ + char *end = NULL; + unsigned long lcore_id; + + lcore_id = strtoul(stat_lcore, &end, 10); + if ((stat_lcore[0] == '\0') || (end == NULL) || (*end != '\0')) + return -1; + + return lcore_id; +} +#endif + +static void +parse_eth_dest(const char *optarg) +{ + uint8_t portid; + char *port_end; + uint8_t c, *dest, peer_addr[6]; + + errno = 0; + portid = strtoul(optarg, &port_end, 10); + if (errno != 0 || port_end == optarg || *port_end++ != ',') + rte_exit(EXIT_FAILURE, + "Invalid eth-dest: %s", optarg); + if (portid >= RTE_MAX_ETHPORTS) + rte_exit(EXIT_FAILURE, + "eth-dest: port %d >= RTE_MAX_ETHPORTS(%d)\n", + portid, RTE_MAX_ETHPORTS); + + if (cmdline_parse_etheraddr(NULL, port_end, + &peer_addr, sizeof(peer_addr)) < 0) + rte_exit(EXIT_FAILURE, + "Invalid ethernet address: %s\n", + port_end); + dest = (uint8_t *)&dest_eth_addr[portid]; + for (c = 0; c < 6; c++) + dest[c] = peer_addr[c]; + *(uint64_t *)(val_eth + portid) = dest_eth_addr[portid]; +} + +#define CMD_LINE_OPT_RX_CONFIG "rx" +#define CMD_LINE_OPT_TX_CONFIG "tx" +#define CMD_LINE_OPT_STAT_LCORE "stat-lcore" +#define CMD_LINE_OPT_ETH_DEST "eth-dest" +#define CMD_LINE_OPT_NO_NUMA "no-numa" +#define CMD_LINE_OPT_IPV6 "ipv6" +#define CMD_LINE_OPT_ENABLE_JUMBO "enable-jumbo" +#define CMD_LINE_OPT_HASH_ENTRY_NUM "hash-entry-num" +#define CMD_LINE_OPT_NO_LTHREADS "no-lthreads" + +/* Parse the argument given in the command line of the application */ +static int +parse_args(int argc, char **argv) +{ + int opt, ret; + char **argvopt; + int option_index; + char *prgname = argv[0]; + static struct option lgopts[] = { + {CMD_LINE_OPT_RX_CONFIG, 1, 0, 0}, + {CMD_LINE_OPT_TX_CONFIG, 1, 0, 0}, + {CMD_LINE_OPT_STAT_LCORE, 1, 0, 0}, + {CMD_LINE_OPT_ETH_DEST, 1, 0, 0}, + {CMD_LINE_OPT_NO_NUMA, 0, 0, 0}, + {CMD_LINE_OPT_IPV6, 0, 0, 0}, + {CMD_LINE_OPT_ENABLE_JUMBO, 0, 0, 0}, + {CMD_LINE_OPT_HASH_ENTRY_NUM, 1, 0, 0}, + {CMD_LINE_OPT_NO_LTHREADS, 0, 0, 0}, + {NULL, 0, 0, 0} + }; + + argvopt = argv; + + while ((opt = getopt_long(argc, argvopt, "p:P", + lgopts, &option_index)) != EOF) { + + switch (opt) { + /* portmask */ + case 'p': + enabled_port_mask = parse_portmask(optarg); + if (enabled_port_mask == 0) { + printf("invalid portmask\n"); + print_usage(prgname); + return -1; + } + break; + case 'P': + printf("Promiscuous mode selected\n"); + promiscuous_on = 1; + break; + + /* long options */ + case 0: + if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_RX_CONFIG, + sizeof(CMD_LINE_OPT_RX_CONFIG))) { + ret = parse_rx_config(optarg); + if (ret) { + printf("invalid rx-config\n"); + print_usage(prgname); + return -1; + } + } + + if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_TX_CONFIG, + sizeof(CMD_LINE_OPT_TX_CONFIG))) { + ret = parse_tx_config(optarg); + if (ret) { + printf("invalid tx-config\n"); + print_usage(prgname); + return -1; + } + } + +#if (APP_CPU_LOAD > 0) + if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_STAT_LCORE, + sizeof(CMD_LINE_OPT_STAT_LCORE))) { + cpu_load_lcore_id = parse_stat_lcore(optarg); + } +#endif + + if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_ETH_DEST, + sizeof(CMD_LINE_OPT_ETH_DEST))) + parse_eth_dest(optarg); + + if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_NO_NUMA, + sizeof(CMD_LINE_OPT_NO_NUMA))) { + printf("numa is disabled\n"); + numa_on = 0; + } + +#if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) + if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_IPV6, + sizeof(CMD_LINE_OPT_IPV6))) { + printf("ipv6 is specified\n"); + ipv6 = 1; + } +#endif + + if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_NO_LTHREADS, + sizeof(CMD_LINE_OPT_NO_LTHREADS))) { + printf("l-threads model is disabled\n"); + lthreads_on = 0; + } + + if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_ENABLE_JUMBO, + sizeof(CMD_LINE_OPT_ENABLE_JUMBO))) { + struct option lenopts = {"max-pkt-len", required_argument, 0, + 0}; + + printf("jumbo frame is enabled - disabling simple TX path\n"); + port_conf.rxmode.jumbo_frame = 1; + + /* if no max-pkt-len set, use the default value ETHER_MAX_LEN */ + if (0 == getopt_long(argc, argvopt, "", &lenopts, + &option_index)) { + + ret = parse_max_pkt_len(optarg); + if ((ret < 64) || (ret > MAX_JUMBO_PKT_LEN)) { + printf("invalid packet length\n"); + print_usage(prgname); + return -1; + } + port_conf.rxmode.max_rx_pkt_len = ret; + } + printf("set jumbo frame max packet length to %u\n", + (unsigned int)port_conf.rxmode.max_rx_pkt_len); + } +#if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) + if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_HASH_ENTRY_NUM, + sizeof(CMD_LINE_OPT_HASH_ENTRY_NUM))) { + ret = parse_hash_entry_number(optarg); + if ((ret > 0) && (ret <= L3FWD_HASH_ENTRIES)) { + hash_entry_number = ret; + } else { + printf("invalid hash entry number\n"); + print_usage(prgname); + return -1; + } + } +#endif + break; + + default: + print_usage(prgname); + return -1; + } + } + + if (optind >= 0) + argv[optind-1] = prgname; + + ret = optind-1; + optind = 0; /* reset getopt lib */ + return ret; +} + +static void +print_ethaddr(const char *name, const struct ether_addr *eth_addr) +{ + char buf[ETHER_ADDR_FMT_SIZE]; + + ether_format_addr(buf, ETHER_ADDR_FMT_SIZE, eth_addr); + printf("%s%s", name, buf); +} + +#if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) + +static void convert_ipv4_5tuple(struct ipv4_5tuple *key1, + union ipv4_5tuple_host *key2) +{ + key2->ip_dst = rte_cpu_to_be_32(key1->ip_dst); + key2->ip_src = rte_cpu_to_be_32(key1->ip_src); + key2->port_dst = rte_cpu_to_be_16(key1->port_dst); + key2->port_src = rte_cpu_to_be_16(key1->port_src); + key2->proto = key1->proto; + key2->pad0 = 0; + key2->pad1 = 0; +} + +static void convert_ipv6_5tuple(struct ipv6_5tuple *key1, + union ipv6_5tuple_host *key2) +{ + uint32_t i; + + for (i = 0; i < 16; i++) { + key2->ip_dst[i] = key1->ip_dst[i]; + key2->ip_src[i] = key1->ip_src[i]; + } + key2->port_dst = rte_cpu_to_be_16(key1->port_dst); + key2->port_src = rte_cpu_to_be_16(key1->port_src); + key2->proto = key1->proto; + key2->pad0 = 0; + key2->pad1 = 0; + key2->reserve = 0; +} + +#define BYTE_VALUE_MAX 256 +#define ALL_32_BITS 0xffffffff +#define BIT_8_TO_15 0x0000ff00 +static inline void +populate_ipv4_few_flow_into_table(const struct rte_hash *h) +{ + uint32_t i; + int32_t ret; + uint32_t array_len = RTE_DIM(ipv4_l3fwd_route_array); + + mask0 = _mm_set_epi32(ALL_32_BITS, ALL_32_BITS, ALL_32_BITS, BIT_8_TO_15); + for (i = 0; i < array_len; i++) { + struct ipv4_l3fwd_route entry; + union ipv4_5tuple_host newkey; + + entry = ipv4_l3fwd_route_array[i]; + convert_ipv4_5tuple(&entry.key, &newkey); + ret = rte_hash_add_key(h, (void *)&newkey); + if (ret < 0) { + rte_exit(EXIT_FAILURE, "Unable to add entry %" PRIu32 + " to the l3fwd hash.\n", i); + } + ipv4_l3fwd_out_if[ret] = entry.if_out; + } + printf("Hash: Adding 0x%" PRIx32 " keys\n", array_len); +} + +#define BIT_16_TO_23 0x00ff0000 +static inline void +populate_ipv6_few_flow_into_table(const struct rte_hash *h) +{ + uint32_t i; + int32_t ret; + uint32_t array_len = RTE_DIM(ipv6_l3fwd_route_array); + + mask1 = _mm_set_epi32(ALL_32_BITS, ALL_32_BITS, ALL_32_BITS, BIT_16_TO_23); + mask2 = _mm_set_epi32(0, 0, ALL_32_BITS, ALL_32_BITS); + for (i = 0; i < array_len; i++) { + struct ipv6_l3fwd_route entry; + union ipv6_5tuple_host newkey; + + entry = ipv6_l3fwd_route_array[i]; + convert_ipv6_5tuple(&entry.key, &newkey); + ret = rte_hash_add_key(h, (void *)&newkey); + if (ret < 0) { + rte_exit(EXIT_FAILURE, "Unable to add entry %" PRIu32 + " to the l3fwd hash.\n", i); + } + ipv6_l3fwd_out_if[ret] = entry.if_out; + } + printf("Hash: Adding 0x%" PRIx32 "keys\n", array_len); +} + +#define NUMBER_PORT_USED 4 +static inline void +populate_ipv4_many_flow_into_table(const struct rte_hash *h, + unsigned int nr_flow) +{ + unsigned i; + + mask0 = _mm_set_epi32(ALL_32_BITS, ALL_32_BITS, ALL_32_BITS, BIT_8_TO_15); + + for (i = 0; i < nr_flow; i++) { + struct ipv4_l3fwd_route entry; + union ipv4_5tuple_host newkey; + uint8_t a = (uint8_t)((i / NUMBER_PORT_USED) % BYTE_VALUE_MAX); + uint8_t b = (uint8_t)(((i / NUMBER_PORT_USED) / BYTE_VALUE_MAX) % + BYTE_VALUE_MAX); + uint8_t c = (uint8_t)((i / NUMBER_PORT_USED) / (BYTE_VALUE_MAX * + BYTE_VALUE_MAX)); + /* Create the ipv4 exact match flow */ + memset(&entry, 0, sizeof(entry)); + switch (i & (NUMBER_PORT_USED - 1)) { + case 0: + entry = ipv4_l3fwd_route_array[0]; + entry.key.ip_dst = IPv4(101, c, b, a); + break; + case 1: + entry = ipv4_l3fwd_route_array[1]; + entry.key.ip_dst = IPv4(201, c, b, a); + break; + case 2: + entry = ipv4_l3fwd_route_array[2]; + entry.key.ip_dst = IPv4(111, c, b, a); + break; + case 3: + entry = ipv4_l3fwd_route_array[3]; + entry.key.ip_dst = IPv4(211, c, b, a); + break; + }; + convert_ipv4_5tuple(&entry.key, &newkey); + int32_t ret = rte_hash_add_key(h, (void *)&newkey); + + if (ret < 0) + rte_exit(EXIT_FAILURE, "Unable to add entry %u\n", i); + + ipv4_l3fwd_out_if[ret] = (uint8_t)entry.if_out; + + } + printf("Hash: Adding 0x%x keys\n", nr_flow); +} + +static inline void +populate_ipv6_many_flow_into_table(const struct rte_hash *h, + unsigned int nr_flow) +{ + unsigned i; + + mask1 = _mm_set_epi32(ALL_32_BITS, ALL_32_BITS, ALL_32_BITS, BIT_16_TO_23); + mask2 = _mm_set_epi32(0, 0, ALL_32_BITS, ALL_32_BITS); + for (i = 0; i < nr_flow; i++) { + struct ipv6_l3fwd_route entry; + union ipv6_5tuple_host newkey; + + uint8_t a = (uint8_t) ((i / NUMBER_PORT_USED) % BYTE_VALUE_MAX); + uint8_t b = (uint8_t) (((i / NUMBER_PORT_USED) / BYTE_VALUE_MAX) % + BYTE_VALUE_MAX); + uint8_t c = (uint8_t) ((i / NUMBER_PORT_USED) / (BYTE_VALUE_MAX * + BYTE_VALUE_MAX)); + + /* Create the ipv6 exact match flow */ + memset(&entry, 0, sizeof(entry)); + switch (i & (NUMBER_PORT_USED - 1)) { + case 0: + entry = ipv6_l3fwd_route_array[0]; + break; + case 1: + entry = ipv6_l3fwd_route_array[1]; + break; + case 2: + entry = ipv6_l3fwd_route_array[2]; + break; + case 3: + entry = ipv6_l3fwd_route_array[3]; + break; + }; + entry.key.ip_dst[13] = c; + entry.key.ip_dst[14] = b; + entry.key.ip_dst[15] = a; + convert_ipv6_5tuple(&entry.key, &newkey); + int32_t ret = rte_hash_add_key(h, (void *)&newkey); + + if (ret < 0) + rte_exit(EXIT_FAILURE, "Unable to add entry %u\n", i); + + ipv6_l3fwd_out_if[ret] = (uint8_t) entry.if_out; + + } + printf("Hash: Adding 0x%x keys\n", nr_flow); +} + +static void +setup_hash(int socketid) +{ + struct rte_hash_parameters ipv4_l3fwd_hash_params = { + .name = NULL, + .entries = L3FWD_HASH_ENTRIES, + .key_len = sizeof(union ipv4_5tuple_host), + .hash_func = ipv4_hash_crc, + .hash_func_init_val = 0, + }; + + struct rte_hash_parameters ipv6_l3fwd_hash_params = { + .name = NULL, + .entries = L3FWD_HASH_ENTRIES, + .key_len = sizeof(union ipv6_5tuple_host), + .hash_func = ipv6_hash_crc, + .hash_func_init_val = 0, + }; + + char s[64]; + + /* create ipv4 hash */ + snprintf(s, sizeof(s), "ipv4_l3fwd_hash_%d", socketid); + ipv4_l3fwd_hash_params.name = s; + ipv4_l3fwd_hash_params.socket_id = socketid; + ipv4_l3fwd_lookup_struct[socketid] = + rte_hash_create(&ipv4_l3fwd_hash_params); + if (ipv4_l3fwd_lookup_struct[socketid] == NULL) + rte_exit(EXIT_FAILURE, "Unable to create the l3fwd hash on " + "socket %d\n", socketid); + + /* create ipv6 hash */ + snprintf(s, sizeof(s), "ipv6_l3fwd_hash_%d", socketid); + ipv6_l3fwd_hash_params.name = s; + ipv6_l3fwd_hash_params.socket_id = socketid; + ipv6_l3fwd_lookup_struct[socketid] = + rte_hash_create(&ipv6_l3fwd_hash_params); + if (ipv6_l3fwd_lookup_struct[socketid] == NULL) + rte_exit(EXIT_FAILURE, "Unable to create the l3fwd hash on " + "socket %d\n", socketid); + + if (hash_entry_number != HASH_ENTRY_NUMBER_DEFAULT) { + /* For testing hash matching with a large number of flows we + * generate millions of IP 5-tuples with an incremented dst + * address to initialize the hash table. */ + if (ipv6 == 0) { + /* populate the ipv4 hash */ + populate_ipv4_many_flow_into_table( + ipv4_l3fwd_lookup_struct[socketid], hash_entry_number); + } else { + /* populate the ipv6 hash */ + populate_ipv6_many_flow_into_table( + ipv6_l3fwd_lookup_struct[socketid], hash_entry_number); + } + } else { + /* Use data in ipv4/ipv6 l3fwd lookup table directly to initialize + * the hash table */ + if (ipv6 == 0) { + /* populate the ipv4 hash */ + populate_ipv4_few_flow_into_table( + ipv4_l3fwd_lookup_struct[socketid]); + } else { + /* populate the ipv6 hash */ + populate_ipv6_few_flow_into_table( + ipv6_l3fwd_lookup_struct[socketid]); + } + } +} +#endif + +#if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) +static void +setup_lpm(int socketid) +{ + struct rte_lpm6_config config; + struct rte_lpm_config lpm_ipv4_config; + unsigned i; + int ret; + char s[64]; + + /* create the LPM table */ + snprintf(s, sizeof(s), "IPV4_L3FWD_LPM_%d", socketid); + lpm_ipv4_config.max_rules = IPV4_L3FWD_LPM_MAX_RULES; + lpm_ipv4_config.number_tbl8s = 256; + lpm_ipv4_config.flags = 0; + ipv4_l3fwd_lookup_struct[socketid] = + rte_lpm_create(s, socketid, &lpm_ipv4_config); + if (ipv4_l3fwd_lookup_struct[socketid] == NULL) + rte_exit(EXIT_FAILURE, "Unable to create the l3fwd LPM table" + " on socket %d\n", socketid); + + /* populate the LPM table */ + for (i = 0; i < IPV4_L3FWD_NUM_ROUTES; i++) { + + /* skip unused ports */ + if ((1 << ipv4_l3fwd_route_array[i].if_out & + enabled_port_mask) == 0) + continue; + + ret = rte_lpm_add(ipv4_l3fwd_lookup_struct[socketid], + ipv4_l3fwd_route_array[i].ip, + ipv4_l3fwd_route_array[i].depth, + ipv4_l3fwd_route_array[i].if_out); + + if (ret < 0) { + rte_exit(EXIT_FAILURE, "Unable to add entry %u to the " + "l3fwd LPM table on socket %d\n", + i, socketid); + } + + printf("LPM: Adding route 0x%08x / %d (%d)\n", + (unsigned)ipv4_l3fwd_route_array[i].ip, + ipv4_l3fwd_route_array[i].depth, + ipv4_l3fwd_route_array[i].if_out); + } + + /* create the LPM6 table */ + snprintf(s, sizeof(s), "IPV6_L3FWD_LPM_%d", socketid); + + config.max_rules = IPV6_L3FWD_LPM_MAX_RULES; + config.number_tbl8s = IPV6_L3FWD_LPM_NUMBER_TBL8S; + config.flags = 0; + ipv6_l3fwd_lookup_struct[socketid] = rte_lpm6_create(s, socketid, + &config); + if (ipv6_l3fwd_lookup_struct[socketid] == NULL) + rte_exit(EXIT_FAILURE, "Unable to create the l3fwd LPM table" + " on socket %d\n", socketid); + + /* populate the LPM table */ + for (i = 0; i < IPV6_L3FWD_NUM_ROUTES; i++) { + + /* skip unused ports */ + if ((1 << ipv6_l3fwd_route_array[i].if_out & + enabled_port_mask) == 0) + continue; + + ret = rte_lpm6_add(ipv6_l3fwd_lookup_struct[socketid], + ipv6_l3fwd_route_array[i].ip, + ipv6_l3fwd_route_array[i].depth, + ipv6_l3fwd_route_array[i].if_out); + + if (ret < 0) { + rte_exit(EXIT_FAILURE, "Unable to add entry %u to the " + "l3fwd LPM table on socket %d\n", + i, socketid); + } + + printf("LPM: Adding route %s / %d (%d)\n", + "IPV6", + ipv6_l3fwd_route_array[i].depth, + ipv6_l3fwd_route_array[i].if_out); + } +} +#endif + +static int +init_mem(unsigned nb_mbuf) +{ + struct lcore_conf *qconf; + int socketid; + unsigned lcore_id; + char s[64]; + + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { + if (rte_lcore_is_enabled(lcore_id) == 0) + continue; + + if (numa_on) + socketid = rte_lcore_to_socket_id(lcore_id); + else + socketid = 0; + + if (socketid >= NB_SOCKETS) { + rte_exit(EXIT_FAILURE, "Socket %d of lcore %u is out of range %d\n", + socketid, lcore_id, NB_SOCKETS); + } + if (pktmbuf_pool[socketid] == NULL) { + snprintf(s, sizeof(s), "mbuf_pool_%d", socketid); + pktmbuf_pool[socketid] = + rte_pktmbuf_pool_create(s, nb_mbuf, + MEMPOOL_CACHE_SIZE, 0, + RTE_MBUF_DEFAULT_BUF_SIZE, socketid); + if (pktmbuf_pool[socketid] == NULL) + rte_exit(EXIT_FAILURE, + "Cannot init mbuf pool on socket %d\n", socketid); + else + printf("Allocated mbuf pool on socket %d\n", socketid); + +#if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) + setup_lpm(socketid); +#else + setup_hash(socketid); +#endif + } + qconf = &lcore_conf[lcore_id]; + qconf->ipv4_lookup_struct = ipv4_l3fwd_lookup_struct[socketid]; + qconf->ipv6_lookup_struct = ipv6_l3fwd_lookup_struct[socketid]; + } + return 0; +} + +/* Check the link status of all ports in up to 9s, and print them finally */ +static void +check_all_ports_link_status(uint8_t port_num, uint32_t port_mask) +{ +#define CHECK_INTERVAL 100 /* 100ms */ +#define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */ + uint8_t portid, count, all_ports_up, print_flag = 0; + struct rte_eth_link link; + + printf("\nChecking link status"); + fflush(stdout); + for (count = 0; count <= MAX_CHECK_TIME; count++) { + all_ports_up = 1; + for (portid = 0; portid < port_num; portid++) { + if ((port_mask & (1 << portid)) == 0) + continue; + memset(&link, 0, sizeof(link)); + rte_eth_link_get_nowait(portid, &link); + /* print link status if flag set */ + if (print_flag == 1) { + if (link.link_status) + printf("Port %d Link Up - speed %u " + "Mbps - %s\n", (uint8_t)portid, + (unsigned)link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? + ("full-duplex") : ("half-duplex\n")); + else + printf("Port %d Link Down\n", + (uint8_t)portid); + continue; + } + /* clear all_ports_up flag if any link down */ + if (link.link_status == ETH_LINK_DOWN) { + all_ports_up = 0; + break; + } + } + /* after finally printing all link status, get out */ + if (print_flag == 1) + break; + + if (all_ports_up == 0) { + printf("."); + fflush(stdout); + rte_delay_ms(CHECK_INTERVAL); + } + + /* set the print_flag if all ports up or timeout */ + if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) { + print_flag = 1; + printf("done\n"); + } + } +} + +int +main(int argc, char **argv) +{ + struct rte_eth_dev_info dev_info; + struct rte_eth_txconf *txconf; + int ret; + int i; + unsigned nb_ports; + uint16_t queueid; + unsigned lcore_id; + uint32_t n_tx_queue, nb_lcores; + uint8_t portid, nb_rx_queue, queue, socketid; + + /* init EAL */ + ret = rte_eal_init(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Invalid EAL parameters\n"); + argc -= ret; + argv += ret; + + /* pre-init dst MACs for all ports to 02:00:00:00:00:xx */ + for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) { + dest_eth_addr[portid] = ETHER_LOCAL_ADMIN_ADDR + + ((uint64_t)portid << 40); + *(uint64_t *)(val_eth + portid) = dest_eth_addr[portid]; + } + + /* parse application arguments (after the EAL ones) */ + ret = parse_args(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Invalid L3FWD parameters\n"); + + if (check_lcore_params() < 0) + rte_exit(EXIT_FAILURE, "check_lcore_params failed\n"); + + printf("Initializing rx-queues...\n"); + ret = init_rx_queues(); + if (ret < 0) + rte_exit(EXIT_FAILURE, "init_rx_queues failed\n"); + + printf("Initializing tx-threads...\n"); + ret = init_tx_threads(); + if (ret < 0) + rte_exit(EXIT_FAILURE, "init_tx_threads failed\n"); + + printf("Initializing rings...\n"); + ret = init_rx_rings(); + if (ret < 0) + rte_exit(EXIT_FAILURE, "init_rx_rings failed\n"); + + nb_ports = rte_eth_dev_count(); + if (nb_ports > RTE_MAX_ETHPORTS) + nb_ports = RTE_MAX_ETHPORTS; + + if (check_port_config(nb_ports) < 0) + rte_exit(EXIT_FAILURE, "check_port_config failed\n"); + + nb_lcores = rte_lcore_count(); + + /* initialize all ports */ + for (portid = 0; portid < nb_ports; portid++) { + /* skip ports that are not enabled */ + if ((enabled_port_mask & (1 << portid)) == 0) { + printf("\nSkipping disabled port %d\n", portid); + continue; + } + + /* init port */ + printf("Initializing port %d ... ", portid); + fflush(stdout); + + nb_rx_queue = get_port_n_rx_queues(portid); + n_tx_queue = nb_lcores; + if (n_tx_queue > MAX_TX_QUEUE_PER_PORT) + n_tx_queue = MAX_TX_QUEUE_PER_PORT; + printf("Creating queues: nb_rxq=%d nb_txq=%u... ", + nb_rx_queue, (unsigned)n_tx_queue); + ret = rte_eth_dev_configure(portid, nb_rx_queue, + (uint16_t)n_tx_queue, &port_conf); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Cannot configure device: err=%d, port=%d\n", + ret, portid); + + rte_eth_macaddr_get(portid, &ports_eth_addr[portid]); + print_ethaddr(" Address:", &ports_eth_addr[portid]); + printf(", "); + print_ethaddr("Destination:", + (const struct ether_addr *)&dest_eth_addr[portid]); + printf(", "); + + /* + * prepare src MACs for each port. + */ + ether_addr_copy(&ports_eth_addr[portid], + (struct ether_addr *)(val_eth + portid) + 1); + + /* init memory */ + ret = init_mem(NB_MBUF); + if (ret < 0) + rte_exit(EXIT_FAILURE, "init_mem failed\n"); + + /* init one TX queue per couple (lcore,port) */ + queueid = 0; + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { + if (rte_lcore_is_enabled(lcore_id) == 0) + continue; + + if (numa_on) + socketid = (uint8_t)rte_lcore_to_socket_id(lcore_id); + else + socketid = 0; + + printf("txq=%u,%d,%d ", lcore_id, queueid, socketid); + fflush(stdout); + + rte_eth_dev_info_get(portid, &dev_info); + txconf = &dev_info.default_txconf; + if (port_conf.rxmode.jumbo_frame) + txconf->txq_flags = 0; + ret = rte_eth_tx_queue_setup(portid, queueid, nb_txd, + socketid, txconf); + if (ret < 0) + rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup: err=%d, " + "port=%d\n", ret, portid); + + tx_thread[lcore_id].tx_queue_id[portid] = queueid; + queueid++; + } + printf("\n"); + } + + for (i = 0; i < n_rx_thread; i++) { + lcore_id = rx_thread[i].conf.lcore_id; + + if (rte_lcore_is_enabled(lcore_id) == 0) { + rte_exit(EXIT_FAILURE, + "Cannot start Rx thread on lcore %u: lcore disabled\n", + lcore_id + ); + } + + printf("\nInitializing rx queues for Rx thread %d on lcore %u ... ", + i, lcore_id); + fflush(stdout); + + /* init RX queues */ + for (queue = 0; queue < rx_thread[i].n_rx_queue; ++queue) { + portid = rx_thread[i].rx_queue_list[queue].port_id; + queueid = rx_thread[i].rx_queue_list[queue].queue_id; + + if (numa_on) + socketid = (uint8_t)rte_lcore_to_socket_id(lcore_id); + else + socketid = 0; + + printf("rxq=%d,%d,%d ", portid, queueid, socketid); + fflush(stdout); + + ret = rte_eth_rx_queue_setup(portid, queueid, nb_rxd, + socketid, + NULL, + pktmbuf_pool[socketid]); + if (ret < 0) + rte_exit(EXIT_FAILURE, "rte_eth_rx_queue_setup: err=%d, " + "port=%d\n", ret, portid); + } + } + + printf("\n"); + + /* start ports */ + for (portid = 0; portid < nb_ports; portid++) { + if ((enabled_port_mask & (1 << portid)) == 0) + continue; + + /* Start device */ + ret = rte_eth_dev_start(portid); + if (ret < 0) + rte_exit(EXIT_FAILURE, "rte_eth_dev_start: err=%d, port=%d\n", + ret, portid); + + /* + * If enabled, put device in promiscuous mode. + * This allows IO forwarding mode to forward packets + * to itself through 2 cross-connected ports of the + * target machine. + */ + if (promiscuous_on) + rte_eth_promiscuous_enable(portid); + } + + check_all_ports_link_status((uint8_t)nb_ports, enabled_port_mask); + + if (lthreads_on) { + printf("Starting L-Threading Model\n"); + +#if (APP_CPU_LOAD > 0) + if (cpu_load_lcore_id > 0) + /* Use one lcore for cpu load collector */ + nb_lcores--; +#endif + + lthread_num_schedulers_set(nb_lcores); + rte_eal_mp_remote_launch(sched_spawner, NULL, SKIP_MASTER); + lthread_master_spawner(NULL); + + } else { + printf("Starting P-Threading Model\n"); + /* launch per-lcore init on every lcore */ + rte_eal_mp_remote_launch(pthread_run, NULL, CALL_MASTER); + RTE_LCORE_FOREACH_SLAVE(lcore_id) { + if (rte_eal_wait_lcore(lcore_id) < 0) + return -1; + } + } + + return 0; +} diff --git a/examples/performance-thread/l3fwd-thread/test.sh b/examples/performance-thread/l3fwd-thread/test.sh new file mode 100755 index 00000000..b7718b62 --- /dev/null +++ b/examples/performance-thread/l3fwd-thread/test.sh @@ -0,0 +1,149 @@ +#!/bin/bash + +case "$1" in + + ###################### + # 1 L-core per pcore # + ###################### + + "1.1") + echo "1.1 1 L-core per pcore (N=2)" + + ./build/l3fwd-thread -c ff -n 2 -- -P -p 3 \ + --enable-jumbo --max-pkt-len 1500 \ + --rx="(0,0,0,0)(1,0,0,0)" \ + --tx="(1,0)" \ + --stat-lcore 2 \ + --no-lthread + + ;; + + "1.2") + echo "1.2 1 L-core per pcore (N=4)" + + ./build/l3fwd-thread -c ff -n 2 -- -P -p 3 \ + --enable-jumbo --max-pkt-len 1500 \ + --rx="(0,0,0,0)(1,0,1,1)" \ + --tx="(2,0)(3,1)" \ + --stat-lcore 4 \ + --no-lthread + ;; + + "1.3") + echo "1.3 1 L-core per pcore (N=8)" + + ./build/l3fwd-thread -c 1ff -n 2 -- -P -p 3 \ + --enable-jumbo --max-pkt-len 1500 \ + --rx="(0,0,0,0)(0,1,1,1)(1,0,2,2)(1,1,3,3)" \ + --tx="(4,0)(5,1)(6,2)(7,3)" \ + --stat-lcore 8 \ + --no-lthread + ;; + + "1.4") + echo "1.3 1 L-core per pcore (N=16)" + + ./build/l3fwd-thread -c 3ffff -n 2 -- -P -p 3 \ + --enable-jumbo --max-pkt-len 1500 \ + --rx="(0,0,0,0)(0,1,1,1)(0,2,2,2)(0,3,3,3)(1,0,4,4)(1,1,5,5)(1,2,6,6)(1,3,7,7)" \ + --tx="(8,0)(9,1)(10,2)(11,3)(12,4)(13,5)(14,6)(15,7)" \ + --stat-lcore 16 \ + --no-lthread + ;; + + + ###################### + # N L-core per pcore # + ###################### + + "2.1") + echo "2.1 N L-core per pcore (N=2)" + + ./build/l3fwd-thread -c ff -n 2 --lcores="2,(0-1)@0" -- -P -p 3 \ + --enable-jumbo --max-pkt-len 1500 \ + --rx="(0,0,0,0)(1,0,0,0)" \ + --tx="(1,0)" \ + --stat-lcore 2 \ + --no-lthread + + ;; + + "2.2") + echo "2.2 N L-core per pcore (N=4)" + + ./build/l3fwd-thread -c ff -n 2 --lcores="(0-3)@0,4" -- -P -p 3 \ + --enable-jumbo --max-pkt-len 1500 \ + --rx="(0,0,0,0)(1,0,1,1)" \ + --tx="(2,0)(3,1)" \ + --stat-lcore 4 \ + --no-lthread + ;; + + "2.3") + echo "2.3 N L-core per pcore (N=8)" + + ./build/l3fwd-thread -c 3ffff -n 2 --lcores="(0-7)@0,8" -- -P -p 3 \ + --enable-jumbo --max-pkt-len 1500 \ + --rx="(0,0,0,0)(0,1,1,1)(1,0,2,2)(1,1,3,3)" \ + --tx="(4,0)(5,1)(6,2)(7,3)" \ + --stat-lcore 8 \ + --no-lthread + ;; + + "2.4") + echo "2.3 N L-core per pcore (N=16)" + + ./build/l3fwd-thread -c 3ffff -n 2 --lcores="(0-15)@0,16" -- -P -p 3 \ + --enable-jumbo --max-pkt-len 1500 \ + --rx="(0,0,0,0)(0,1,1,1)(0,2,2,2)(0,3,3,3)(1,0,4,4)(1,1,5,5)(1,2,6,6)(1,3,7,7)" \ + --tx="(8,0)(9,1)(10,2)(11,3)(12,4)(13,5)(14,6)(15,7)" \ + --stat-lcore 16 \ + --no-lthread + ;; + + + ######################### + # N L-threads per pcore # + ######################### + + "3.1") + echo "3.1 N L-threads per pcore (N=2)" + + ./build/l3fwd-thread -c ff -n 2 -- -P -p 3 \ + --enable-jumbo --max-pkt-len 1500 \ + --rx="(0,0,0,0)(1,0,0,0)" \ + --tx="(0,0)" \ + --stat-lcore 1 + ;; + + "3.2") + echo "3.2 N L-threads per pcore (N=4)" + + ./build/l3fwd-thread -c ff -n 2 -- -P -p 3 \ + --enable-jumbo --max-pkt-len 1500 \ + --rx="(0,0,0,0)(1,0,0,1)" \ + --tx="(0,0)(0,1)" \ + --stat-lcore 1 + ;; + + "3.3") + echo "3.2 N L-threads per pcore (N=8)" + + ./build/l3fwd-thread -c ff -n 2 -- -P -p 3 \ + --enable-jumbo --max-pkt-len 1500 \ + --rx="(0,0,0,0)(0,1,0,1)(1,0,0,2)(1,1,0,3)" \ + --tx="(0,0)(0,1)(0,2)(0,3)" \ + --stat-lcore 1 + ;; + + "3.4") + echo "3.2 N L-threads per pcore (N=16)" + + ./build/l3fwd-thread -c ff -n 2 -- -P -p 3 \ + --enable-jumbo --max-pkt-len 1500 \ + --rx="(0,0,0,0)(0,1,0,1)(0,2,0,2)(0,0,0,3)(1,0,0,4)(1,1,0,5)(1,2,0,6)(1,3,0,7)" \ + --tx="(0,0)(0,1)(0,2)(0,3)(0,4)(0,5)(0,6)(0,7)" \ + --stat-lcore 1 + ;; + +esac diff --git a/examples/performance-thread/pthread_shim/Makefile b/examples/performance-thread/pthread_shim/Makefile new file mode 100644 index 00000000..86ac657c --- /dev/null +++ b/examples/performance-thread/pthread_shim/Makefile @@ -0,0 +1,60 @@ +# BSD LICENSE +# +# Copyright(c) 2015 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overridden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +# binary name +APP = lthread_pthread_shim + +# all source are stored in SRCS-y +SRCS-y := main.c pthread_shim.c +INCLUDES := -I$(RTE_SDK)/$(RTE_TARGET)/include -I$(SRCDIR) +include $(RTE_SDK)/examples/performance-thread/common/common.mk + +CFLAGS += -g -O3 $(USER_FLAGS) $(INCLUDES) +CFLAGS += $(WERROR_FLAGS) + +LDFLAGS += -lpthread + +# workaround for a gcc bug with noreturn attribute +# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603 +ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y) +CFLAGS_main.o += -Wno-return-type +endif + +include $(RTE_SDK)/mk/rte.extapp.mk diff --git a/examples/performance-thread/pthread_shim/main.c b/examples/performance-thread/pthread_shim/main.c new file mode 100644 index 00000000..f0357218 --- /dev/null +++ b/examples/performance-thread/pthread_shim/main.c @@ -0,0 +1,287 @@ + +/*- + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <inttypes.h> +#include <sys/types.h> +#include <string.h> +#include <sys/queue.h> +#include <stdarg.h> +#include <errno.h> +#include <getopt.h> +#include <unistd.h> +#include <sched.h> +#include <pthread.h> + +#include <rte_common.h> +#include <rte_lcore.h> +#include <rte_per_lcore.h> +#include <rte_timer.h> + +#include "lthread_api.h" +#include "lthread_diag_api.h" +#include "pthread_shim.h" + +#define DEBUG_APP 0 +#define HELLOW_WORLD_MAX_LTHREADS 10 + +__thread int print_count; +__thread pthread_mutex_t print_lock; + +__thread pthread_mutex_t exit_lock; +__thread pthread_cond_t exit_cond; + +/* + * A simple thread that demonstrates use of a mutex, a condition + * variable, thread local storage, explicit yield, and thread exit. + * + * The thread uses a mutex to protect a shared counter which is incremented + * and then it waits on condition variable before exiting. + * + * The thread argument is stored in and retrieved from TLS, using + * the pthread key create, get and set specific APIs. + * + * The thread yields while holding the mutex, to provide opportunity + * for other threads to contend. + * + * All of the pthread API functions used by this thread are actually + * resolved to corresponding lthread functions by the pthread shim + * implemented in pthread_shim.c + */ +void *helloworld_pthread(void *arg); +void *helloworld_pthread(void *arg) +{ + pthread_key_t key; + + /* create a key for TLS */ + pthread_key_create(&key, NULL); + + /* store the arg in TLS */ + pthread_setspecific(key, arg); + + /* grab lock and increment shared counter */ + pthread_mutex_lock(&print_lock); + print_count++; + + /* yield thread to give opportunity for lock contention */ + pthread_yield(); + + /* retrieve arg from TLS */ + uint64_t thread_no = (uint64_t) pthread_getspecific(key); + + printf("Hello - lcore = %d count = %d thread_no = %d thread_id = %p\n", + sched_getcpu(), + print_count, + (int) thread_no, + (void *)pthread_self()); + + /* release the lock */ + pthread_mutex_unlock(&print_lock); + + /* + * wait on condition variable + * before exiting + */ + pthread_mutex_lock(&exit_lock); + pthread_cond_wait(&exit_cond, &exit_lock); + pthread_mutex_unlock(&exit_lock); + + /* exit */ + pthread_exit((void *) thread_no); +} + + +/* + * This is the initial thread + * + * It demonstrates pthread, mutex and condition variable creation, + * broadcast and pthread join APIs. + * + * This initial thread must always start life as an lthread. + * + * This thread creates many more threads then waits a short time + * before signalling them to exit using a broadcast. + * + * All of the pthread API functions used by this thread are actually + * resolved to corresponding lthread functions by the pthread shim + * implemented in pthread_shim.c + * + * After all threads have finished the lthread scheduler is shutdown + * and normal pthread operation is restored + */ +__thread pthread_t tid[HELLOW_WORLD_MAX_LTHREADS]; + +static void initial_lthread(void *args); +static void initial_lthread(void *args __attribute__((unused))) +{ + int lcore = (int) rte_lcore_id(); + /* + * + * We can now enable pthread API override + * and start to use the pthread APIs + */ + pthread_override_set(1); + + uint64_t i; + + /* initialize mutex for shared counter */ + print_count = 0; + pthread_mutex_init(&print_lock, NULL); + + /* initialize mutex and condition variable controlling thread exit */ + pthread_mutex_init(&exit_lock, NULL); + pthread_cond_init(&exit_cond, NULL); + + /* spawn a number of threads */ + for (i = 0; i < HELLOW_WORLD_MAX_LTHREADS; i++) { + + /* + * Not strictly necessary but + * for the sake of this example + * use an attribute to pass the desired lcore + */ + pthread_attr_t attr; + cpu_set_t cpuset; + + CPU_ZERO(&cpuset); + CPU_SET(lcore, &cpuset); + pthread_attr_init(&attr); + pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset); + + /* create the thread */ + pthread_create(&tid[i], &attr, helloworld_pthread, (void *) i); + } + + /* wait for 1s to allow threads + * to block on the condition variable + * N.B. nanosleep() is resolved to lthread_sleep() + * by the shim. + */ + struct timespec time; + + time.tv_sec = 1; + time.tv_nsec = 0; + nanosleep(&time, NULL); + + /* wake up all the threads */ + pthread_cond_broadcast(&exit_cond); + + /* wait for them to finish */ + for (i = 0; i < HELLOW_WORLD_MAX_LTHREADS; i++) { + + uint64_t thread_no; + + pthread_join(tid[i], (void *) &thread_no); + if (thread_no != i) + printf("error on thread exit\n"); + } + + pthread_cond_destroy(&exit_cond); + pthread_mutex_destroy(&print_lock); + pthread_mutex_destroy(&exit_lock); + + /* shutdown the lthread scheduler */ + lthread_scheduler_shutdown(rte_lcore_id()); + lthread_detach(); +} + + + +/* This thread creates a single initial lthread + * and then runs the scheduler + * An instance of this thread is created on each thread + * in the core mask + */ +static int +lthread_scheduler(void *args); +static int +lthread_scheduler(void *args __attribute__((unused))) +{ + /* create initial thread */ + struct lthread *lt; + + lthread_create(<, -1, initial_lthread, (void *) NULL); + + /* run the lthread scheduler */ + lthread_run(); + + /* restore genuine pthread operation */ + pthread_override_set(0); + return 0; +} + +int main(int argc, char **argv) +{ + int num_sched = 0; + + /* basic DPDK initialization is all that is necessary to run lthreads*/ + int ret = rte_eal_init(argc, argv); + + if (ret < 0) + rte_exit(EXIT_FAILURE, "Invalid EAL parameters\n"); + + /* enable timer subsystem */ + rte_timer_subsystem_init(); + +#if DEBUG_APP + lthread_diagnostic_set_mask(LT_DIAG_ALL); +#endif + + /* create a scheduler on every core in the core mask + * and launch an initial lthread that will spawn many more. + */ + unsigned lcore_id; + + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { + if (rte_lcore_is_enabled(lcore_id)) + num_sched++; + } + + /* set the number of schedulers, this forces all schedulers synchronize + * before entering their main loop + */ + lthread_num_schedulers_set(num_sched); + + /* launch all threads */ + rte_eal_mp_remote_launch(lthread_scheduler, (void *)NULL, CALL_MASTER); + + /* wait for threads to stop */ + RTE_LCORE_FOREACH_SLAVE(lcore_id) { + rte_eal_wait_lcore(lcore_id); + } + return 0; +} diff --git a/examples/performance-thread/pthread_shim/pthread_shim.c b/examples/performance-thread/pthread_shim/pthread_shim.c new file mode 100644 index 00000000..93c3216d --- /dev/null +++ b/examples/performance-thread/pthread_shim/pthread_shim.c @@ -0,0 +1,719 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <sys/types.h> +#include <errno.h> +#define __USE_GNU +#include <sched.h> +#include <dlfcn.h> + +#include <rte_log.h> + +#include "lthread_api.h" +#include "pthread_shim.h" + +#define RTE_LOGTYPE_PTHREAD_SHIM RTE_LOGTYPE_USER3 + +#define POSIX_ERRNO(x) (x) + +/* + * this flag determines at run time if we override pthread + * calls and map then to equivalent lthread calls + * or of we call the standard pthread function + */ +static __thread int override; + + +/* + * this structures contains function pointers that will be + * initialised to the loaded address of the real + * pthread library API functions + */ +struct pthread_lib_funcs { +int (*f_pthread_barrier_destroy) + (pthread_barrier_t *); +int (*f_pthread_barrier_init) + (pthread_barrier_t *, const pthread_barrierattr_t *, unsigned); +int (*f_pthread_barrier_wait) + (pthread_barrier_t *); +int (*f_pthread_cond_broadcast) + (pthread_cond_t *); +int (*f_pthread_cond_destroy) + (pthread_cond_t *); +int (*f_pthread_cond_init) + (pthread_cond_t *, const pthread_condattr_t *); +int (*f_pthread_cond_signal) + (pthread_cond_t *); +int (*f_pthread_cond_timedwait) + (pthread_cond_t *, pthread_mutex_t *, const struct timespec *); +int (*f_pthread_cond_wait) + (pthread_cond_t *, pthread_mutex_t *); +int (*f_pthread_create) + (pthread_t *, const pthread_attr_t *, void *(*)(void *), void *); +int (*f_pthread_detach) + (pthread_t); +int (*f_pthread_equal) + (pthread_t, pthread_t); +void (*f_pthread_exit) + (void *); +void * (*f_pthread_getspecific) + (pthread_key_t); +int (*f_pthread_getcpuclockid) + (pthread_t, clockid_t *); +int (*f_pthread_join) + (pthread_t, void **); +int (*f_pthread_key_create) + (pthread_key_t *, void (*) (void *)); +int (*f_pthread_key_delete) + (pthread_key_t); +int (*f_pthread_mutex_destroy) + (pthread_mutex_t *__mutex); +int (*f_pthread_mutex_init) + (pthread_mutex_t *__mutex, const pthread_mutexattr_t *); +int (*f_pthread_mutex_lock) + (pthread_mutex_t *__mutex); +int (*f_pthread_mutex_trylock) + (pthread_mutex_t *__mutex); +int (*f_pthread_mutex_timedlock) + (pthread_mutex_t *__mutex, const struct timespec *); +int (*f_pthread_mutex_unlock) + (pthread_mutex_t *__mutex); +int (*f_pthread_once) + (pthread_once_t *, void (*) (void)); +int (*f_pthread_rwlock_destroy) + (pthread_rwlock_t *__rwlock); +int (*f_pthread_rwlock_init) + (pthread_rwlock_t *__rwlock, const pthread_rwlockattr_t *); +int (*f_pthread_rwlock_rdlock) + (pthread_rwlock_t *__rwlock); +int (*f_pthread_rwlock_timedrdlock) + (pthread_rwlock_t *__rwlock, const struct timespec *); +int (*f_pthread_rwlock_timedwrlock) + (pthread_rwlock_t *__rwlock, const struct timespec *); +int (*f_pthread_rwlock_tryrdlock) + (pthread_rwlock_t *__rwlock); +int (*f_pthread_rwlock_trywrlock) + (pthread_rwlock_t *__rwlock); +int (*f_pthread_rwlock_unlock) + (pthread_rwlock_t *__rwlock); +int (*f_pthread_rwlock_wrlock) + (pthread_rwlock_t *__rwlock); +pthread_t (*f_pthread_self) + (void); +int (*f_pthread_setspecific) + (pthread_key_t, const void *); +int (*f_pthread_spin_init) + (pthread_spinlock_t *__spin, int); +int (*f_pthread_spin_destroy) + (pthread_spinlock_t *__spin); +int (*f_pthread_spin_lock) + (pthread_spinlock_t *__spin); +int (*f_pthread_spin_trylock) + (pthread_spinlock_t *__spin); +int (*f_pthread_spin_unlock) + (pthread_spinlock_t *__spin); +int (*f_pthread_cancel) + (pthread_t); +int (*f_pthread_setcancelstate) + (int, int *); +int (*f_pthread_setcanceltype) + (int, int *); +void (*f_pthread_testcancel) + (void); +int (*f_pthread_getschedparam) + (pthread_t pthread, int *, struct sched_param *); +int (*f_pthread_setschedparam) + (pthread_t, int, const struct sched_param *); +int (*f_pthread_yield) + (void); +int (*f_pthread_setaffinity_np) + (pthread_t thread, size_t cpusetsize, const cpu_set_t *cpuset); +int (*f_nanosleep) + (const struct timespec *req, struct timespec *rem); +} _sys_pthread_funcs = { + .f_pthread_barrier_destroy = NULL, +}; + + +/* + * this macro obtains the loaded address of a library function + * and saves it. + */ +static void *__libc_dl_handle = RTLD_NEXT; + +#define get_addr_of_loaded_symbol(name) do { \ + char *error_str; \ + _sys_pthread_funcs.f_##name = dlsym(__libc_dl_handle, (#name)); \ + error_str = dlerror(); \ + if (error_str != NULL) { \ + fprintf(stderr, "%s\n", error_str); \ + } \ +} while (0) + + +/* + * The constructor function initialises the + * function pointers for pthread library functions + */ +void +pthread_intercept_ctor(void)__attribute__((constructor)); +void +pthread_intercept_ctor(void) +{ + override = 0; + /* + * Get the original functions + */ + get_addr_of_loaded_symbol(pthread_barrier_destroy); + get_addr_of_loaded_symbol(pthread_barrier_init); + get_addr_of_loaded_symbol(pthread_barrier_wait); + get_addr_of_loaded_symbol(pthread_cond_broadcast); + get_addr_of_loaded_symbol(pthread_cond_destroy); + get_addr_of_loaded_symbol(pthread_cond_init); + get_addr_of_loaded_symbol(pthread_cond_signal); + get_addr_of_loaded_symbol(pthread_cond_timedwait); + get_addr_of_loaded_symbol(pthread_cond_wait); + get_addr_of_loaded_symbol(pthread_create); + get_addr_of_loaded_symbol(pthread_detach); + get_addr_of_loaded_symbol(pthread_equal); + get_addr_of_loaded_symbol(pthread_exit); + get_addr_of_loaded_symbol(pthread_getspecific); + get_addr_of_loaded_symbol(pthread_getcpuclockid); + get_addr_of_loaded_symbol(pthread_join); + get_addr_of_loaded_symbol(pthread_key_create); + get_addr_of_loaded_symbol(pthread_key_delete); + get_addr_of_loaded_symbol(pthread_mutex_destroy); + get_addr_of_loaded_symbol(pthread_mutex_init); + get_addr_of_loaded_symbol(pthread_mutex_lock); + get_addr_of_loaded_symbol(pthread_mutex_trylock); + get_addr_of_loaded_symbol(pthread_mutex_timedlock); + get_addr_of_loaded_symbol(pthread_mutex_unlock); + get_addr_of_loaded_symbol(pthread_once); + get_addr_of_loaded_symbol(pthread_rwlock_destroy); + get_addr_of_loaded_symbol(pthread_rwlock_init); + get_addr_of_loaded_symbol(pthread_rwlock_rdlock); + get_addr_of_loaded_symbol(pthread_rwlock_timedrdlock); + get_addr_of_loaded_symbol(pthread_rwlock_timedwrlock); + get_addr_of_loaded_symbol(pthread_rwlock_tryrdlock); + get_addr_of_loaded_symbol(pthread_rwlock_trywrlock); + get_addr_of_loaded_symbol(pthread_rwlock_unlock); + get_addr_of_loaded_symbol(pthread_rwlock_wrlock); + get_addr_of_loaded_symbol(pthread_self); + get_addr_of_loaded_symbol(pthread_setspecific); + get_addr_of_loaded_symbol(pthread_spin_init); + get_addr_of_loaded_symbol(pthread_spin_destroy); + get_addr_of_loaded_symbol(pthread_spin_lock); + get_addr_of_loaded_symbol(pthread_spin_trylock); + get_addr_of_loaded_symbol(pthread_spin_unlock); + get_addr_of_loaded_symbol(pthread_cancel); + get_addr_of_loaded_symbol(pthread_setcancelstate); + get_addr_of_loaded_symbol(pthread_setcanceltype); + get_addr_of_loaded_symbol(pthread_testcancel); + get_addr_of_loaded_symbol(pthread_getschedparam); + get_addr_of_loaded_symbol(pthread_setschedparam); + get_addr_of_loaded_symbol(pthread_yield); + get_addr_of_loaded_symbol(pthread_setaffinity_np); + get_addr_of_loaded_symbol(nanosleep); +} + + +/* + * Enable/Disable pthread override + * state + * 0 disable + * 1 enable + */ +void pthread_override_set(int state) +{ + override = state; +} + + +/* + * Return pthread override state + * return + * 0 disable + * 1 enable + */ +int pthread_override_get(void) +{ + return override; +} + +/* + * This macro is used to catch and log + * invocation of stubs for unimplemented pthread + * API functions. + */ +#define NOT_IMPLEMENTED do { \ + if (override) { \ + RTE_LOG(WARNING, \ + PTHREAD_SHIM, \ + "WARNING %s NOT IMPLEMENTED\n", \ + __func__); \ + } \ +} while (0) + +/* + * pthread API override functions follow + * Note in this example code only a subset of functions are + * implemented. + * + * The stub functions provided will issue a warning log + * message if an unimplemented function is invoked + * + */ + +int pthread_barrier_destroy(pthread_barrier_t *a) +{ + NOT_IMPLEMENTED; + return _sys_pthread_funcs.f_pthread_barrier_destroy(a); +} + +int +pthread_barrier_init(pthread_barrier_t *a, + const pthread_barrierattr_t *b, unsigned c) +{ + NOT_IMPLEMENTED; + return _sys_pthread_funcs.f_pthread_barrier_init(a, b, c); +} + +int pthread_barrier_wait(pthread_barrier_t *a) +{ + NOT_IMPLEMENTED; + return _sys_pthread_funcs.f_pthread_barrier_wait(a); +} + +int pthread_cond_broadcast(pthread_cond_t *cond) +{ + if (override) { + + lthread_cond_broadcast(*(struct lthread_cond **)cond); + return 0; + } + return _sys_pthread_funcs.f_pthread_cond_broadcast(cond); +} + +int pthread_mutex_destroy(pthread_mutex_t *mutex) +{ + if (override) + return lthread_mutex_destroy(*(struct lthread_mutex **)mutex); + return _sys_pthread_funcs.f_pthread_mutex_destroy(mutex); +} + +int pthread_cond_destroy(pthread_cond_t *cond) +{ + if (override) + return lthread_cond_destroy(*(struct lthread_cond **)cond); + return _sys_pthread_funcs.f_pthread_cond_destroy(cond); +} + +int pthread_cond_init(pthread_cond_t *cond, const pthread_condattr_t *attr) +{ + if (override) + return lthread_cond_init(NULL, + (struct lthread_cond **)cond, + (const struct lthread_condattr *) attr); + return _sys_pthread_funcs.f_pthread_cond_init(cond, attr); +} + +int pthread_cond_signal(pthread_cond_t *cond) +{ + if (override) { + lthread_cond_signal(*(struct lthread_cond **)cond); + return 0; + } + return _sys_pthread_funcs.f_pthread_cond_signal(cond); +} + +int +pthread_cond_timedwait(pthread_cond_t *__restrict cond, + pthread_mutex_t *__restrict mutex, + const struct timespec *__restrict time) +{ + NOT_IMPLEMENTED; + return _sys_pthread_funcs.f_pthread_cond_timedwait(cond, mutex, time); +} + +int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex) +{ + if (override) { + pthread_mutex_unlock(mutex); + int rv = lthread_cond_wait(*(struct lthread_cond **)cond, 0); + + pthread_mutex_lock(mutex); + return rv; + } + return _sys_pthread_funcs.f_pthread_cond_wait(cond, mutex); +} + +int +pthread_create(pthread_t *__restrict tid, + const pthread_attr_t *__restrict attr, + void *(func) (void *), + void *__restrict arg) +{ + if (override) { + int lcore = -1; + + if (attr != NULL) { + /* determine CPU being requested */ + cpu_set_t cpuset; + + CPU_ZERO(&cpuset); + pthread_attr_getaffinity_np(attr, + sizeof(cpu_set_t), + &cpuset); + + if (CPU_COUNT(&cpuset) != 1) + return POSIX_ERRNO(EINVAL); + + for (lcore = 0; lcore < LTHREAD_MAX_LCORES; lcore++) { + if (!CPU_ISSET(lcore, &cpuset)) + continue; + break; + } + } + return lthread_create((struct lthread **)tid, lcore, + (void (*)(void *))func, arg); + } + return _sys_pthread_funcs.f_pthread_create(tid, attr, func, arg); +} + +int pthread_detach(pthread_t tid) +{ + if (override) { + struct lthread *lt = (struct lthread *)tid; + + if (lt == lthread_current()) + lthread_detach(); + return 0; + NOT_IMPLEMENTED; + } + return _sys_pthread_funcs.f_pthread_detach(tid); +} + +int pthread_equal(pthread_t a, pthread_t b) +{ + NOT_IMPLEMENTED; + return _sys_pthread_funcs.f_pthread_equal(a, b); +} + +void pthread_exit_override(void *v) +{ + if (override) { + lthread_exit(v); + return; + } + _sys_pthread_funcs.f_pthread_exit(v); +} + +void +*pthread_getspecific(pthread_key_t key) +{ + if (override) + return lthread_getspecific((unsigned int) key); + return _sys_pthread_funcs.f_pthread_getspecific(key); +} + +int pthread_getcpuclockid(pthread_t a, clockid_t *b) +{ + NOT_IMPLEMENTED; + return _sys_pthread_funcs.f_pthread_getcpuclockid(a, b); +} + +int pthread_join(pthread_t tid, void **val) +{ + if (override) + return lthread_join((struct lthread *)tid, val); + return _sys_pthread_funcs.f_pthread_join(tid, val); +} + +int pthread_key_create(pthread_key_t *keyptr, void (*dtor) (void *)) +{ + if (override) + return lthread_key_create((unsigned int *)keyptr, dtor); + return _sys_pthread_funcs.f_pthread_key_create(keyptr, dtor); +} + +int pthread_key_delete(pthread_key_t key) +{ + if (override) { + lthread_key_delete((unsigned int) key); + return 0; + } + return _sys_pthread_funcs.f_pthread_key_delete(key); +} + + +int +pthread_mutex_init(pthread_mutex_t *mutex, const pthread_mutexattr_t *attr) +{ + if (override) + return lthread_mutex_init(NULL, + (struct lthread_mutex **)mutex, + (const struct lthread_mutexattr *)attr); + return _sys_pthread_funcs.f_pthread_mutex_init(mutex, attr); +} + +int pthread_mutex_lock(pthread_mutex_t *mutex) +{ + if (override) + return lthread_mutex_lock(*(struct lthread_mutex **)mutex); + return _sys_pthread_funcs.f_pthread_mutex_lock(mutex); +} + +int pthread_mutex_trylock(pthread_mutex_t *mutex) +{ + if (override) + return lthread_mutex_trylock(*(struct lthread_mutex **)mutex); + return _sys_pthread_funcs.f_pthread_mutex_trylock(mutex); +} + +int pthread_mutex_timedlock(pthread_mutex_t *mutex, const struct timespec *b) +{ + NOT_IMPLEMENTED; + return _sys_pthread_funcs.f_pthread_mutex_timedlock(mutex, b); +} + +int pthread_mutex_unlock(pthread_mutex_t *mutex) +{ + if (override) + return lthread_mutex_unlock(*(struct lthread_mutex **)mutex); + return _sys_pthread_funcs.f_pthread_mutex_unlock(mutex); +} + +int pthread_once(pthread_once_t *a, void (b) (void)) +{ + NOT_IMPLEMENTED; + return _sys_pthread_funcs.f_pthread_once(a, b); +} + +int pthread_rwlock_destroy(pthread_rwlock_t *a) +{ + NOT_IMPLEMENTED; + return _sys_pthread_funcs.f_pthread_rwlock_destroy(a); +} + +int pthread_rwlock_init(pthread_rwlock_t *a, const pthread_rwlockattr_t *b) +{ + NOT_IMPLEMENTED; + return _sys_pthread_funcs.f_pthread_rwlock_init(a, b); +} + +int pthread_rwlock_rdlock(pthread_rwlock_t *a) +{ + NOT_IMPLEMENTED; + return _sys_pthread_funcs.f_pthread_rwlock_rdlock(a); +} + +int pthread_rwlock_timedrdlock(pthread_rwlock_t *a, const struct timespec *b) +{ + NOT_IMPLEMENTED; + return _sys_pthread_funcs.f_pthread_rwlock_timedrdlock(a, b); +} + +int pthread_rwlock_timedwrlock(pthread_rwlock_t *a, const struct timespec *b) +{ + NOT_IMPLEMENTED; + return _sys_pthread_funcs.f_pthread_rwlock_timedwrlock(a, b); +} + +int pthread_rwlock_tryrdlock(pthread_rwlock_t *a) +{ + NOT_IMPLEMENTED; + return _sys_pthread_funcs.f_pthread_rwlock_tryrdlock(a); +} + +int pthread_rwlock_trywrlock(pthread_rwlock_t *a) +{ + NOT_IMPLEMENTED; + return _sys_pthread_funcs.f_pthread_rwlock_trywrlock(a); +} + +int pthread_rwlock_unlock(pthread_rwlock_t *a) +{ + NOT_IMPLEMENTED; + return _sys_pthread_funcs.f_pthread_rwlock_unlock(a); +} + +int pthread_rwlock_wrlock(pthread_rwlock_t *a) +{ + NOT_IMPLEMENTED; + return _sys_pthread_funcs.f_pthread_rwlock_wrlock(a); +} + +int pthread_yield(void) +{ + if (override) { + lthread_yield(); + return 0; + } + return _sys_pthread_funcs.f_pthread_yield(); + +} + +pthread_t pthread_self(void) +{ + if (override) + return (pthread_t) lthread_current(); + return _sys_pthread_funcs.f_pthread_self(); +} + +int pthread_setspecific(pthread_key_t key, const void *data) +{ + if (override) { + int rv = lthread_setspecific((unsigned int)key, data); + return rv; + } + return _sys_pthread_funcs.f_pthread_setspecific(key, data); +} + +int pthread_spin_init(pthread_spinlock_t *a, int b) +{ + NOT_IMPLEMENTED; + return _sys_pthread_funcs.f_pthread_spin_init(a, b); +} + +int pthread_spin_destroy(pthread_spinlock_t *a) +{ + NOT_IMPLEMENTED; + return _sys_pthread_funcs.f_pthread_spin_destroy(a); +} + +int pthread_spin_lock(pthread_spinlock_t *a) +{ + NOT_IMPLEMENTED; + return _sys_pthread_funcs.f_pthread_spin_lock(a); +} + +int pthread_spin_trylock(pthread_spinlock_t *a) +{ + NOT_IMPLEMENTED; + return _sys_pthread_funcs.f_pthread_spin_trylock(a); +} + +int pthread_spin_unlock(pthread_spinlock_t *a) +{ + NOT_IMPLEMENTED; + return _sys_pthread_funcs.f_pthread_spin_unlock(a); +} + +int pthread_cancel(pthread_t tid) +{ + if (override) { + lthread_cancel(*(struct lthread **)tid); + return 0; + } + return _sys_pthread_funcs.f_pthread_cancel(tid); +} + +int pthread_setcancelstate(int a, int *b) +{ + NOT_IMPLEMENTED; + return _sys_pthread_funcs.f_pthread_setcancelstate(a, b); +} + +int pthread_setcanceltype(int a, int *b) +{ + NOT_IMPLEMENTED; + return _sys_pthread_funcs.f_pthread_setcanceltype(a, b); +} + +void pthread_testcancel(void) +{ + NOT_IMPLEMENTED; + return _sys_pthread_funcs.f_pthread_testcancel(); +} + + +int pthread_getschedparam(pthread_t tid, int *a, struct sched_param *b) +{ + NOT_IMPLEMENTED; + return _sys_pthread_funcs.f_pthread_getschedparam(tid, a, b); +} + +int pthread_setschedparam(pthread_t a, int b, const struct sched_param *c) +{ + NOT_IMPLEMENTED; + return _sys_pthread_funcs.f_pthread_setschedparam(a, b, c); +} + + +int nanosleep(const struct timespec *req, struct timespec *rem) +{ + if (override) { + uint64_t ns = req->tv_sec * 1000000000 + req->tv_nsec; + + lthread_sleep(ns); + return 0; + } + return _sys_pthread_funcs.f_nanosleep(req, rem); +} + +int +pthread_setaffinity_np(pthread_t thread, size_t cpusetsize, + const cpu_set_t *cpuset) +{ + if (override) { + /* we only allow affinity with a single CPU */ + if (CPU_COUNT(cpuset) != 1) + return POSIX_ERRNO(EINVAL); + + /* we only allow the current thread to sets its own affinity */ + struct lthread *lt = (struct lthread *)thread; + + if (lthread_current() != lt) + return POSIX_ERRNO(EINVAL); + + /* determine the CPU being requested */ + int i; + + for (i = 0; i < LTHREAD_MAX_LCORES; i++) { + if (!CPU_ISSET(i, cpuset)) + continue; + break; + } + /* check requested core is allowed */ + if (i == LTHREAD_MAX_LCORES) + return POSIX_ERRNO(EINVAL); + + /* finally we can set affinity to the requested lcore */ + lthread_set_affinity(i); + return 0; + } + return _sys_pthread_funcs.f_pthread_setaffinity_np(thread, cpusetsize, + cpuset); +} diff --git a/examples/performance-thread/pthread_shim/pthread_shim.h b/examples/performance-thread/pthread_shim/pthread_shim.h new file mode 100644 index 00000000..78bbb5ac --- /dev/null +++ b/examples/performance-thread/pthread_shim/pthread_shim.h @@ -0,0 +1,113 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _PTHREAD_SHIM_H_ +#define _PTHREAD_SHIM_H_ +#include <pthread.h> + +/* + * This pthread shim is an example that demonstrates how legacy code + * that makes use of POSIX pthread services can make use of lthreads + * with reduced porting effort. + * + * N.B. The example is not a complete implementation, only a subset of + * pthread APIs sufficient to demonstrate the principle of operation + * are implemented. + * + * In general pthread attribute objects do not have equivalent functions + * in lthreads, and are ignored. + * + * There is one exception and that is the use of attr to specify a + * core affinity in calls to pthread_create. + * + * The shim operates as follows:- + * + * On initialisation a constructor function uses dlsym to obtain and + * save the loaded address of the full set of pthread APIs that will + * be overridden. + * + * For each function there is a stub provided that will invoke either + * the genuine pthread library function saved saved by the constructor, + * or else the corresponding equivalent lthread function. + * + * The stub functions are implemented in pthread_shim.c + * + * The stub will take care of adapting parameters, and any police + * any constraints where lthread functionality differs. + * + * The initial thread must always be a pure lthread. + * + * The decision whether to invoke the real library function or the lthread + * function is controlled by a per pthread flag that can be switched + * on of off by the pthread_override_set() API described below. Typcially + * this should be done as the first action of the initial lthread. + * + * N.B In general it would be poor practice to revert to invoke a real + * pthread function when running as an lthread, since these may block and + * effectively stall the lthread scheduler. + * + */ + + +/* + * An exiting lthread must not terminate the pthread it is running in + * since this would mean terminating the lthread scheduler. + * We override pthread_exit() with a macro because it is typically declared with + * __attribute__((noreturn)) + */ +void pthread_exit_override(void *v); + +#define pthread_exit(v) do { \ + pthread_exit_override((v)); \ + return NULL; \ +} while (0) + +/* + * Enable/Disable pthread override + * state + * 0 disable + * 1 enable + */ +void pthread_override_set(int state); + + +/* + * Return pthread override state + * return + * 0 disable + * 1 enable + */ +int pthread_override_get(void); + + +#endif /* _PTHREAD_SHIM_H_ */ diff --git a/examples/ptpclient/Makefile b/examples/ptpclient/Makefile new file mode 100644 index 00000000..d241730f --- /dev/null +++ b/examples/ptpclient/Makefile @@ -0,0 +1,57 @@ +# BSD LICENSE +# +# Copyright(c) 2015 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overriddegitn by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +# binary name +APP = ptpclient + +# all source are stored in SRCS-y +SRCS-y := ptpclient.c + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) +LDLIBS += -lrt + +# workaround for a gcc bug with noreturn attribute +# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603 +ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y) +CFLAGS_main.o += -Wno-return-type +endif + +include $(RTE_SDK)/mk/rte.extapp.mk diff --git a/examples/ptpclient/ptpclient.c b/examples/ptpclient/ptpclient.c new file mode 100644 index 00000000..0af4f3b6 --- /dev/null +++ b/examples/ptpclient/ptpclient.c @@ -0,0 +1,780 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * This application is a simple Layer 2 PTP v2 client. It shows delta values + * which are used to synchronize the PHC clock. if the "-T 1" parameter is + * passed to the application the Linux kernel clock is also synchronized. + */ + +#include <stdint.h> +#include <inttypes.h> +#include <rte_eal.h> +#include <rte_ethdev.h> +#include <rte_cycles.h> +#include <rte_lcore.h> +#include <rte_mbuf.h> +#include <rte_ip.h> +#include <limits.h> +#include <sys/time.h> +#include <getopt.h> + +#define RX_RING_SIZE 128 +#define TX_RING_SIZE 512 + +#define NUM_MBUFS 8191 +#define MBUF_CACHE_SIZE 250 + +/* Values for the PTP messageType field. */ +#define SYNC 0x0 +#define DELAY_REQ 0x1 +#define PDELAY_REQ 0x2 +#define PDELAY_RESP 0x3 +#define FOLLOW_UP 0x8 +#define DELAY_RESP 0x9 +#define PDELAY_RESP_FOLLOW_UP 0xA +#define ANNOUNCE 0xB +#define SIGNALING 0xC +#define MANAGEMENT 0xD + +#define NSEC_PER_SEC 1000000000L +#define KERNEL_TIME_ADJUST_LIMIT 20000 +#define PTP_PROTOCOL 0x88F7 + +struct rte_mempool *mbuf_pool; +uint32_t ptp_enabled_port_mask; +uint8_t ptp_enabled_port_nb; +static uint8_t ptp_enabled_ports[RTE_MAX_ETHPORTS]; + +static const struct rte_eth_conf port_conf_default = { + .rxmode = { .max_rx_pkt_len = ETHER_MAX_LEN } +}; + +static const struct ether_addr ether_multicast = { + .addr_bytes = {0x01, 0x1b, 0x19, 0x0, 0x0, 0x0} +}; + +/* Structs used for PTP handling. */ +struct tstamp { + uint16_t sec_msb; + uint32_t sec_lsb; + uint32_t ns; +} __attribute__((packed)); + +struct clock_id { + uint8_t id[8]; +}; + +struct port_id { + struct clock_id clock_id; + uint16_t port_number; +} __attribute__((packed)); + +struct ptp_header { + uint8_t msg_type; + uint8_t ver; + uint16_t message_length; + uint8_t domain_number; + uint8_t reserved1; + uint8_t flag_field[2]; + int64_t correction; + uint32_t reserved2; + struct port_id source_port_id; + uint16_t seq_id; + uint8_t control; + int8_t log_message_interval; +} __attribute__((packed)); + +struct sync_msg { + struct ptp_header hdr; + struct tstamp origin_tstamp; +} __attribute__((packed)); + +struct follow_up_msg { + struct ptp_header hdr; + struct tstamp precise_origin_tstamp; + uint8_t suffix[0]; +} __attribute__((packed)); + +struct delay_req_msg { + struct ptp_header hdr; + struct tstamp origin_tstamp; +} __attribute__((packed)); + +struct delay_resp_msg { + struct ptp_header hdr; + struct tstamp rx_tstamp; + struct port_id req_port_id; + uint8_t suffix[0]; +} __attribute__((packed)); + +struct ptp_message { + union { + struct ptp_header header; + struct sync_msg sync; + struct delay_req_msg delay_req; + struct follow_up_msg follow_up; + struct delay_resp_msg delay_resp; + } __attribute__((packed)); +}; + +struct ptpv2_data_slave_ordinary { + struct rte_mbuf *m; + struct timespec tstamp1; + struct timespec tstamp2; + struct timespec tstamp3; + struct timespec tstamp4; + struct clock_id client_clock_id; + struct clock_id master_clock_id; + struct timeval new_adj; + int64_t delta; + uint8_t portid; + uint16_t seqID_SYNC; + uint16_t seqID_FOLLOWUP; + uint8_t ptpset; + uint8_t kernel_time_set; + uint8_t current_ptp_port; +}; + +static struct ptpv2_data_slave_ordinary ptp_data; + +static inline uint64_t timespec64_to_ns(const struct timespec *ts) +{ + return ((uint64_t) ts->tv_sec * NSEC_PER_SEC) + ts->tv_nsec; +} + +static struct timeval +ns_to_timeval(int64_t nsec) +{ + struct timespec t_spec = {0, 0}; + struct timeval t_eval = {0, 0}; + int32_t rem; + + if (nsec == 0) + return t_eval; + rem = nsec % NSEC_PER_SEC; + t_spec.tv_sec = nsec / NSEC_PER_SEC; + + if (rem < 0) { + t_spec.tv_sec--; + rem += NSEC_PER_SEC; + } + + t_spec.tv_nsec = rem; + t_eval.tv_sec = t_spec.tv_sec; + t_eval.tv_usec = t_spec.tv_nsec / 1000; + + return t_eval; +} + +/* + * Initializes a given port using global settings and with the RX buffers + * coming from the mbuf_pool passed as a parameter. + */ +static inline int +port_init(uint8_t port, struct rte_mempool *mbuf_pool) +{ + struct rte_eth_dev_info dev_info; + struct rte_eth_conf port_conf = port_conf_default; + const uint16_t rx_rings = 1; + const uint16_t tx_rings = 1; + int retval; + uint16_t q; + + if (port >= rte_eth_dev_count()) + return -1; + + /* Configure the Ethernet device. */ + retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf); + if (retval != 0) + return retval; + + /* Allocate and set up 1 RX queue per Ethernet port. */ + for (q = 0; q < rx_rings; q++) { + retval = rte_eth_rx_queue_setup(port, q, RX_RING_SIZE, + rte_eth_dev_socket_id(port), NULL, mbuf_pool); + + if (retval < 0) + return retval; + } + + /* Allocate and set up 1 TX queue per Ethernet port. */ + for (q = 0; q < tx_rings; q++) { + /* Setup txq_flags */ + struct rte_eth_txconf *txconf; + + rte_eth_dev_info_get(q, &dev_info); + txconf = &dev_info.default_txconf; + txconf->txq_flags = 0; + + retval = rte_eth_tx_queue_setup(port, q, TX_RING_SIZE, + rte_eth_dev_socket_id(port), txconf); + if (retval < 0) + return retval; + } + + /* Start the Ethernet port. */ + retval = rte_eth_dev_start(port); + if (retval < 0) + return retval; + + /* Enable timesync timestamping for the Ethernet device */ + rte_eth_timesync_enable(port); + + /* Enable RX in promiscuous mode for the Ethernet device. */ + rte_eth_promiscuous_enable(port); + + return 0; +} + +static void +print_clock_info(struct ptpv2_data_slave_ordinary *ptp_data) +{ + int64_t nsec; + struct timespec net_time, sys_time; + + printf("Master Clock id: %02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x", + ptp_data->master_clock_id.id[0], + ptp_data->master_clock_id.id[1], + ptp_data->master_clock_id.id[2], + ptp_data->master_clock_id.id[3], + ptp_data->master_clock_id.id[4], + ptp_data->master_clock_id.id[5], + ptp_data->master_clock_id.id[6], + ptp_data->master_clock_id.id[7]); + + printf("\nT2 - Slave Clock. %lds %ldns", + (ptp_data->tstamp2.tv_sec), + (ptp_data->tstamp2.tv_nsec)); + + printf("\nT1 - Master Clock. %lds %ldns ", + ptp_data->tstamp1.tv_sec, + (ptp_data->tstamp1.tv_nsec)); + + printf("\nT3 - Slave Clock. %lds %ldns", + ptp_data->tstamp3.tv_sec, + (ptp_data->tstamp3.tv_nsec)); + + printf("\nT4 - Master Clock. %lds %ldns ", + ptp_data->tstamp4.tv_sec, + (ptp_data->tstamp4.tv_nsec)); + + printf("\nDelta between master and slave clocks:%"PRId64"ns\n", + ptp_data->delta); + + clock_gettime(CLOCK_REALTIME, &sys_time); + rte_eth_timesync_read_time(ptp_data->current_ptp_port, &net_time); + + time_t ts = net_time.tv_sec; + + printf("\n\nComparison between Linux kernel Time and PTP:"); + + printf("\nCurrent PTP Time: %.24s %.9ld ns", + ctime(&ts), net_time.tv_nsec); + + nsec = (int64_t)timespec64_to_ns(&net_time) - + (int64_t)timespec64_to_ns(&sys_time); + ptp_data->new_adj = ns_to_timeval(nsec); + + gettimeofday(&ptp_data->new_adj, NULL); + + time_t tp = ptp_data->new_adj.tv_sec; + + printf("\nCurrent SYS Time: %.24s %.6ld ns", + ctime(&tp), ptp_data->new_adj.tv_usec); + + printf("\nDelta between PTP and Linux Kernel time:%"PRId64"ns\n", + nsec); + + printf("[Ctrl+C to quit]\n"); + + /* Clear screen and put cursor in column 1, row 1 */ + printf("\033[2J\033[1;1H"); +} + +static int64_t +delta_eval(struct ptpv2_data_slave_ordinary *ptp_data) +{ + int64_t delta; + uint64_t t1 = 0; + uint64_t t2 = 0; + uint64_t t3 = 0; + uint64_t t4 = 0; + + t1 = timespec64_to_ns(&ptp_data->tstamp1); + t2 = timespec64_to_ns(&ptp_data->tstamp2); + t3 = timespec64_to_ns(&ptp_data->tstamp3); + t4 = timespec64_to_ns(&ptp_data->tstamp4); + + delta = -((int64_t)((t2 - t1) - (t4 - t3))) / 2; + + return delta; +} + +/* + * Parse the PTP SYNC message. + */ +static void +parse_sync(struct ptpv2_data_slave_ordinary *ptp_data, uint16_t rx_tstamp_idx) +{ + struct ptp_header *ptp_hdr; + + ptp_hdr = (struct ptp_header *)(rte_pktmbuf_mtod(ptp_data->m, char *) + + sizeof(struct ether_hdr)); + ptp_data->seqID_SYNC = rte_be_to_cpu_16(ptp_hdr->seq_id); + + if (ptp_data->ptpset == 0) { + rte_memcpy(&ptp_data->master_clock_id, + &ptp_hdr->source_port_id.clock_id, + sizeof(struct clock_id)); + ptp_data->ptpset = 1; + } + + if (memcmp(&ptp_hdr->source_port_id.clock_id, + &ptp_hdr->source_port_id.clock_id, + sizeof(struct clock_id)) == 0) { + + if (ptp_data->ptpset == 1) + rte_eth_timesync_read_rx_timestamp(ptp_data->portid, + &ptp_data->tstamp2, rx_tstamp_idx); + } + +} + +/* + * Parse the PTP FOLLOWUP message and send DELAY_REQ to the master clock. + */ +static void +parse_fup(struct ptpv2_data_slave_ordinary *ptp_data) +{ + struct ether_hdr *eth_hdr; + struct ptp_header *ptp_hdr; + struct clock_id *client_clkid; + struct ptp_message *ptp_msg; + struct rte_mbuf *created_pkt; + struct tstamp *origin_tstamp; + struct ether_addr eth_multicast = ether_multicast; + size_t pkt_size; + int wait_us; + struct rte_mbuf *m = ptp_data->m; + + eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); + ptp_hdr = (struct ptp_header *)(rte_pktmbuf_mtod(m, char *) + + sizeof(struct ether_hdr)); + if (memcmp(&ptp_data->master_clock_id, + &ptp_hdr->source_port_id.clock_id, + sizeof(struct clock_id)) != 0) + return; + + ptp_data->seqID_FOLLOWUP = rte_be_to_cpu_16(ptp_hdr->seq_id); + ptp_msg = (struct ptp_message *) (rte_pktmbuf_mtod(m, char *) + + sizeof(struct ether_hdr)); + + origin_tstamp = &ptp_msg->follow_up.precise_origin_tstamp; + ptp_data->tstamp1.tv_nsec = ntohl(origin_tstamp->ns); + ptp_data->tstamp1.tv_sec = + ((uint64_t)ntohl(origin_tstamp->sec_lsb)) | + (((uint64_t)ntohs(origin_tstamp->sec_msb)) << 32); + + if (ptp_data->seqID_FOLLOWUP == ptp_data->seqID_SYNC) { + + created_pkt = rte_pktmbuf_alloc(mbuf_pool); + pkt_size = sizeof(struct ether_hdr) + + sizeof(struct ptp_message); + created_pkt->data_len = pkt_size; + created_pkt->pkt_len = pkt_size; + eth_hdr = rte_pktmbuf_mtod(created_pkt, struct ether_hdr *); + rte_eth_macaddr_get(ptp_data->portid, ð_hdr->s_addr); + + /* Set multicast address 01-1B-19-00-00-00. */ + ether_addr_copy(ð_multicast, ð_hdr->d_addr); + + eth_hdr->ether_type = htons(PTP_PROTOCOL); + ptp_msg = (struct ptp_message *) + (rte_pktmbuf_mtod(created_pkt, char *) + + sizeof(struct ether_hdr)); + + ptp_msg->delay_req.hdr.seq_id = htons(ptp_data->seqID_SYNC); + ptp_msg->delay_req.hdr.msg_type = DELAY_REQ; + ptp_msg->delay_req.hdr.ver = 2; + ptp_msg->delay_req.hdr.control = 1; + ptp_msg->delay_req.hdr.log_message_interval = 127; + + /* Set up clock id. */ + client_clkid = + &ptp_msg->delay_req.hdr.source_port_id.clock_id; + + client_clkid->id[0] = eth_hdr->s_addr.addr_bytes[0]; + client_clkid->id[1] = eth_hdr->s_addr.addr_bytes[1]; + client_clkid->id[2] = eth_hdr->s_addr.addr_bytes[2]; + client_clkid->id[3] = 0xFF; + client_clkid->id[4] = 0xFE; + client_clkid->id[5] = eth_hdr->s_addr.addr_bytes[3]; + client_clkid->id[6] = eth_hdr->s_addr.addr_bytes[4]; + client_clkid->id[7] = eth_hdr->s_addr.addr_bytes[5]; + + rte_memcpy(&ptp_data->client_clock_id, + client_clkid, + sizeof(struct clock_id)); + + /* Enable flag for hardware timestamping. */ + created_pkt->ol_flags |= PKT_TX_IEEE1588_TMST; + + /*Read value from NIC to prevent latching with old value. */ + rte_eth_timesync_read_tx_timestamp(ptp_data->portid, + &ptp_data->tstamp3); + + /* Transmit the packet. */ + rte_eth_tx_burst(ptp_data->portid, 0, &created_pkt, 1); + + wait_us = 0; + ptp_data->tstamp3.tv_nsec = 0; + ptp_data->tstamp3.tv_sec = 0; + + /* Wait at least 1 us to read TX timestamp. */ + while ((rte_eth_timesync_read_tx_timestamp(ptp_data->portid, + &ptp_data->tstamp3) < 0) && (wait_us < 1000)) { + rte_delay_us(1); + wait_us++; + } + } +} + +/* + * Update the kernel time with the difference between it and the current NIC + * time. + */ +static inline void +update_kernel_time(void) +{ + int64_t nsec; + struct timespec net_time, sys_time; + + clock_gettime(CLOCK_REALTIME, &sys_time); + rte_eth_timesync_read_time(ptp_data.current_ptp_port, &net_time); + + nsec = (int64_t)timespec64_to_ns(&net_time) - + (int64_t)timespec64_to_ns(&sys_time); + + ptp_data.new_adj = ns_to_timeval(nsec); + + /* + * If difference between kernel time and system time in NIC is too big + * (more than +/- 20 microseconds), use clock_settime to set directly + * the kernel time, as adjtime is better for small adjustments (takes + * longer to adjust the time). + */ + + if (nsec > KERNEL_TIME_ADJUST_LIMIT || nsec < -KERNEL_TIME_ADJUST_LIMIT) + clock_settime(CLOCK_REALTIME, &net_time); + else + adjtime(&ptp_data.new_adj, 0); + + +} + +/* + * Parse the DELAY_RESP message. + */ +static void +parse_drsp(struct ptpv2_data_slave_ordinary *ptp_data) +{ + struct rte_mbuf *m = ptp_data->m; + struct ptp_message *ptp_msg; + struct tstamp *rx_tstamp; + uint16_t seq_id; + + ptp_msg = (struct ptp_message *) (rte_pktmbuf_mtod(m, char *) + + sizeof(struct ether_hdr)); + seq_id = rte_be_to_cpu_16(ptp_msg->delay_resp.hdr.seq_id); + if (memcmp(&ptp_data->client_clock_id, + &ptp_msg->delay_resp.req_port_id.clock_id, + sizeof(struct clock_id)) == 0) { + if (seq_id == ptp_data->seqID_FOLLOWUP) { + rx_tstamp = &ptp_msg->delay_resp.rx_tstamp; + ptp_data->tstamp4.tv_nsec = ntohl(rx_tstamp->ns); + ptp_data->tstamp4.tv_sec = + ((uint64_t)ntohl(rx_tstamp->sec_lsb)) | + (((uint64_t)ntohs(rx_tstamp->sec_msb)) << 32); + + /* Evaluate the delta for adjustment. */ + ptp_data->delta = delta_eval(ptp_data); + + rte_eth_timesync_adjust_time(ptp_data->portid, + ptp_data->delta); + + ptp_data->current_ptp_port = ptp_data->portid; + + /* Update kernel time if enabled in app parameters. */ + if (ptp_data->kernel_time_set == 1) + update_kernel_time(); + + + + } + } +} + +/* This function processes PTP packets, implementing slave PTP IEEE1588 L2 + * functionality. + */ +static void +parse_ptp_frames(uint8_t portid, struct rte_mbuf *m) { + struct ptp_header *ptp_hdr; + struct ether_hdr *eth_hdr; + uint16_t eth_type; + + eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); + eth_type = rte_be_to_cpu_16(eth_hdr->ether_type); + + if (eth_type == PTP_PROTOCOL) { + ptp_data.m = m; + ptp_data.portid = portid; + ptp_hdr = (struct ptp_header *)(rte_pktmbuf_mtod(m, char *) + + sizeof(struct ether_hdr)); + + switch (ptp_hdr->msg_type) { + case SYNC: + parse_sync(&ptp_data, m->timesync); + break; + case FOLLOW_UP: + parse_fup(&ptp_data); + break; + case DELAY_RESP: + parse_drsp(&ptp_data); + print_clock_info(&ptp_data); + break; + default: + break; + } + } +} + +/* + * The lcore main. This is the main thread that does the work, reading from an + * input port and writing to an output port. + */ +static __attribute__((noreturn)) void +lcore_main(void) +{ + uint8_t portid; + unsigned nb_rx; + struct rte_mbuf *m; + + /* + * Check that the port is on the same NUMA node as the polling thread + * for best performance. + */ + printf("\nCore %u Waiting for SYNC packets. [Ctrl+C to quit]\n", + rte_lcore_id()); + + /* Run until the application is quit or killed. */ + + while (1) { + /* Read packet from RX queues. */ + for (portid = 0; portid < ptp_enabled_port_nb; portid++) { + + portid = ptp_enabled_ports[portid]; + nb_rx = rte_eth_rx_burst(portid, 0, &m, 1); + + if (likely(nb_rx == 0)) + continue; + + if (m->ol_flags & PKT_RX_IEEE1588_PTP) + parse_ptp_frames(portid, m); + + rte_pktmbuf_free(m); + } + } +} + +static void +print_usage(const char *prgname) +{ + printf("%s [EAL options] -- -p PORTMASK -T VALUE\n" + " -T VALUE: 0 - Disable, 1 - Enable Linux Clock" + " Synchronization (0 default)\n" + " -p PORTMASK: hexadecimal bitmask of ports to configure\n", + prgname); +} + +static int +ptp_parse_portmask(const char *portmask) +{ + char *end = NULL; + unsigned long pm; + + /* Parse the hexadecimal string. */ + pm = strtoul(portmask, &end, 16); + + if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0')) + return -1; + + if (pm == 0) + return -1; + + return pm; +} + +static int +parse_ptp_kernel(const char *param) +{ + char *end = NULL; + unsigned long pm; + + /* Parse the hexadecimal string. */ + pm = strtoul(param, &end, 16); + + if ((param[0] == '\0') || (end == NULL) || (*end != '\0')) + return -1; + if (pm == 0) + return 0; + + return 1; +} + +/* Parse the commandline arguments. */ +static int +ptp_parse_args(int argc, char **argv) +{ + int opt, ret; + char **argvopt; + int option_index; + char *prgname = argv[0]; + static struct option lgopts[] = { {NULL, 0, 0, 0} }; + + argvopt = argv; + + while ((opt = getopt_long(argc, argvopt, "p:T:", + lgopts, &option_index)) != EOF) { + + switch (opt) { + + /* Portmask. */ + case 'p': + ptp_enabled_port_mask = ptp_parse_portmask(optarg); + if (ptp_enabled_port_mask == 0) { + printf("invalid portmask\n"); + print_usage(prgname); + return -1; + } + break; + /* Time synchronization. */ + case 'T': + ret = parse_ptp_kernel(optarg); + if (ret < 0) { + print_usage(prgname); + return -1; + } + + ptp_data.kernel_time_set = ret; + break; + + default: + print_usage(prgname); + return -1; + } + } + + argv[optind-1] = prgname; + + optind = 0; /* Reset getopt lib. */ + + return 0; +} + +/* + * The main function, which does initialization and calls the per-lcore + * functions. + */ +int +main(int argc, char *argv[]) +{ + unsigned nb_ports; + + uint8_t portid; + + /* Initialize the Environment Abstraction Layer (EAL). */ + int ret = rte_eal_init(argc, argv); + + if (ret < 0) + rte_exit(EXIT_FAILURE, "Error with EAL initialization\n"); + + memset(&ptp_data, '\0', sizeof(struct ptpv2_data_slave_ordinary)); + + argc -= ret; + argv += ret; + + ret = ptp_parse_args(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Error with PTP initialization\n"); + + /* Check that there is an even number of ports to send/receive on. */ + nb_ports = rte_eth_dev_count(); + + /* Creates a new mempool in memory to hold the mbufs. */ + mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL", NUM_MBUFS * nb_ports, + MBUF_CACHE_SIZE, 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id()); + + if (mbuf_pool == NULL) + rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n"); + + /* Initialize all ports. */ + for (portid = 0; portid < nb_ports; portid++) { + if ((ptp_enabled_port_mask & (1 << portid)) != 0) { + if (port_init(portid, mbuf_pool) == 0) { + ptp_enabled_ports[ptp_enabled_port_nb] = portid; + ptp_enabled_port_nb++; + } else { + rte_exit(EXIT_FAILURE, + "Cannot init port %"PRIu8 "\n", + portid); + } + } else + printf("Skipping disabled port %u\n", portid); + } + + if (ptp_enabled_port_nb == 0) { + rte_exit(EXIT_FAILURE, + "All available ports are disabled." + " Please set portmask.\n"); + } + + if (rte_lcore_count() > 1) + printf("\nWARNING: Too many lcores enabled. Only 1 used.\n"); + + /* Call lcore_main on the master core only. */ + lcore_main(); + + return 0; +} diff --git a/examples/qos_meter/Makefile b/examples/qos_meter/Makefile new file mode 100644 index 00000000..5113a129 --- /dev/null +++ b/examples/qos_meter/Makefile @@ -0,0 +1,56 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overriden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +# binary name +APP = qos_meter + +# all source are stored in SRCS-y +SRCS-y := main.c rte_policer.c + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) + +# workaround for a gcc bug with noreturn attribute +# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603 +ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y) +CFLAGS_main.o += -Wno-return-type +endif + +include $(RTE_SDK)/mk/rte.extapp.mk diff --git a/examples/qos_meter/main.c b/examples/qos_meter/main.c new file mode 100644 index 00000000..b968b001 --- /dev/null +++ b/examples/qos_meter/main.c @@ -0,0 +1,394 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdio.h> +#include <getopt.h> + +#include <rte_common.h> +#include <rte_eal.h> +#include <rte_malloc.h> +#include <rte_mempool.h> +#include <rte_ethdev.h> +#include <rte_cycles.h> +#include <rte_mbuf.h> +#include <rte_meter.h> + +/* + * Traffic metering configuration + * + */ +#define APP_MODE_FWD 0 +#define APP_MODE_SRTCM_COLOR_BLIND 1 +#define APP_MODE_SRTCM_COLOR_AWARE 2 +#define APP_MODE_TRTCM_COLOR_BLIND 3 +#define APP_MODE_TRTCM_COLOR_AWARE 4 + +#define APP_MODE APP_MODE_SRTCM_COLOR_BLIND + + +#include "main.h" + + +#define APP_PKT_FLOW_POS 33 +#define APP_PKT_COLOR_POS 5 + + +#if APP_PKT_FLOW_POS > 64 || APP_PKT_COLOR_POS > 64 +#error Byte offset needs to be less than 64 +#endif + +/* + * Buffer pool configuration + * + ***/ +#define NB_MBUF 8192 +#define MEMPOOL_CACHE_SIZE 256 + +static struct rte_mempool *pool = NULL; + +/* + * NIC configuration + * + ***/ +static struct rte_eth_conf port_conf = { + .rxmode = { + .mq_mode = ETH_MQ_RX_RSS, + .max_rx_pkt_len = ETHER_MAX_LEN, + .split_hdr_size = 0, + .header_split = 0, + .hw_ip_checksum = 1, + .hw_vlan_filter = 0, + .jumbo_frame = 0, + .hw_strip_crc = 0, + }, + .rx_adv_conf = { + .rss_conf = { + .rss_key = NULL, + .rss_hf = ETH_RSS_IP, + }, + }, + .txmode = { + .mq_mode = ETH_DCB_NONE, + }, +}; + +#define NIC_RX_QUEUE_DESC 128 +#define NIC_TX_QUEUE_DESC 512 + +#define NIC_RX_QUEUE 0 +#define NIC_TX_QUEUE 0 + +/* + * Packet RX/TX + * + ***/ +#define PKT_RX_BURST_MAX 32 +#define PKT_TX_BURST_MAX 32 +#define TIME_TX_DRAIN 200000ULL + +static uint8_t port_rx; +static uint8_t port_tx; +static struct rte_mbuf *pkts_rx[PKT_RX_BURST_MAX]; +struct rte_eth_dev_tx_buffer *tx_buffer; + +struct rte_meter_srtcm_params app_srtcm_params[] = { + {.cir = 1000000 * 46, .cbs = 2048, .ebs = 2048}, +}; + +struct rte_meter_trtcm_params app_trtcm_params[] = { + {.cir = 1000000 * 46, .pir = 1500000 * 46, .cbs = 2048, .pbs = 2048}, +}; + +#define APP_FLOWS_MAX 256 + +FLOW_METER app_flows[APP_FLOWS_MAX]; + +static void +app_configure_flow_table(void) +{ + uint32_t i, j; + + for (i = 0, j = 0; i < APP_FLOWS_MAX; i ++, j = (j + 1) % RTE_DIM(PARAMS)){ + FUNC_CONFIG(&app_flows[i], &PARAMS[j]); + } +} + +static inline void +app_set_pkt_color(uint8_t *pkt_data, enum policer_action color) +{ + pkt_data[APP_PKT_COLOR_POS] = (uint8_t)color; +} + +static inline int +app_pkt_handle(struct rte_mbuf *pkt, uint64_t time) +{ + uint8_t input_color, output_color; + uint8_t *pkt_data = rte_pktmbuf_mtod(pkt, uint8_t *); + uint32_t pkt_len = rte_pktmbuf_pkt_len(pkt) - sizeof(struct ether_hdr); + uint8_t flow_id = (uint8_t)(pkt_data[APP_PKT_FLOW_POS] & (APP_FLOWS_MAX - 1)); + input_color = pkt_data[APP_PKT_COLOR_POS]; + enum policer_action action; + + /* color input is not used for blind modes */ + output_color = (uint8_t) FUNC_METER(&app_flows[flow_id], time, pkt_len, + (enum rte_meter_color) input_color); + + /* Apply policing and set the output color */ + action = policer_table[input_color][output_color]; + app_set_pkt_color(pkt_data, action); + + return action; +} + + +static __attribute__((noreturn)) int +main_loop(__attribute__((unused)) void *dummy) +{ + uint64_t current_time, last_time = rte_rdtsc(); + uint32_t lcore_id = rte_lcore_id(); + + printf("Core %u: port RX = %d, port TX = %d\n", lcore_id, port_rx, port_tx); + + while (1) { + uint64_t time_diff; + int i, nb_rx; + + /* Mechanism to avoid stale packets in the output buffer */ + current_time = rte_rdtsc(); + time_diff = current_time - last_time; + if (unlikely(time_diff > TIME_TX_DRAIN)) { + /* Flush tx buffer */ + rte_eth_tx_buffer_flush(port_tx, NIC_TX_QUEUE, tx_buffer); + last_time = current_time; + } + + /* Read packet burst from NIC RX */ + nb_rx = rte_eth_rx_burst(port_rx, NIC_RX_QUEUE, pkts_rx, PKT_RX_BURST_MAX); + + /* Handle packets */ + for (i = 0; i < nb_rx; i ++) { + struct rte_mbuf *pkt = pkts_rx[i]; + + /* Handle current packet */ + if (app_pkt_handle(pkt, current_time) == DROP) + rte_pktmbuf_free(pkt); + else + rte_eth_tx_buffer(port_tx, NIC_TX_QUEUE, tx_buffer, pkt); + } + } +} + +static void +print_usage(const char *prgname) +{ + printf ("%s [EAL options] -- -p PORTMASK\n" + " -p PORTMASK: hexadecimal bitmask of ports to configure\n", + prgname); +} + +static int +parse_portmask(const char *portmask) +{ + char *end = NULL; + unsigned long pm; + + /* parse hexadecimal string */ + pm = strtoul(portmask, &end, 16); + if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0')) + return -1; + + if (pm == 0) + return -1; + + return pm; +} + +/* Parse the argument given in the command line of the application */ +static int +parse_args(int argc, char **argv) +{ + int opt; + char **argvopt; + int option_index; + char *prgname = argv[0]; + static struct option lgopts[] = { + {NULL, 0, 0, 0} + }; + uint64_t port_mask, i, mask; + + argvopt = argv; + + while ((opt = getopt_long(argc, argvopt, "p:", lgopts, &option_index)) != EOF) { + switch (opt) { + case 'p': + port_mask = parse_portmask(optarg); + if (port_mask == 0) { + printf("invalid port mask (null port mask)\n"); + print_usage(prgname); + return -1; + } + + for (i = 0, mask = 1; i < 64; i ++, mask <<= 1){ + if (mask & port_mask){ + port_rx = i; + port_mask &= ~ mask; + break; + } + } + + for (i = 0, mask = 1; i < 64; i ++, mask <<= 1){ + if (mask & port_mask){ + port_tx = i; + port_mask &= ~ mask; + break; + } + } + + if (port_mask != 0) { + printf("invalid port mask (more than 2 ports)\n"); + print_usage(prgname); + return -1; + } + break; + + default: + print_usage(prgname); + return -1; + } + } + + if (optind <= 1) { + print_usage(prgname); + return -1; + } + + argv[optind-1] = prgname; + + optind = 0; /* reset getopt lib */ + return 0; +} + +int +main(int argc, char **argv) +{ + uint32_t lcore_id; + int ret; + + /* EAL init */ + ret = rte_eal_init(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Invalid EAL parameters\n"); + argc -= ret; + argv += ret; + if (rte_lcore_count() != 1) { + rte_exit(EXIT_FAILURE, "This application does not accept more than one core. " + "Please adjust the \"-c COREMASK\" parameter accordingly.\n"); + } + + /* Application non-EAL arguments parse */ + ret = parse_args(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Invalid input arguments\n"); + + /* Buffer pool init */ + pool = rte_pktmbuf_pool_create("pool", NB_MBUF, MEMPOOL_CACHE_SIZE, + 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id()); + if (pool == NULL) + rte_exit(EXIT_FAILURE, "Buffer pool creation error\n"); + + /* NIC init */ + ret = rte_eth_dev_configure(port_rx, 1, 1, &port_conf); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Port %d configuration error (%d)\n", port_rx, ret); + + ret = rte_eth_rx_queue_setup(port_rx, NIC_RX_QUEUE, NIC_RX_QUEUE_DESC, + rte_eth_dev_socket_id(port_rx), + NULL, pool); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Port %d RX queue setup error (%d)\n", port_rx, ret); + + ret = rte_eth_tx_queue_setup(port_rx, NIC_TX_QUEUE, NIC_TX_QUEUE_DESC, + rte_eth_dev_socket_id(port_rx), + NULL); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Port %d TX queue setup error (%d)\n", port_rx, ret); + + ret = rte_eth_dev_configure(port_tx, 1, 1, &port_conf); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Port %d configuration error (%d)\n", port_tx, ret); + + ret = rte_eth_rx_queue_setup(port_tx, NIC_RX_QUEUE, NIC_RX_QUEUE_DESC, + rte_eth_dev_socket_id(port_tx), + NULL, pool); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Port %d RX queue setup error (%d)\n", port_tx, ret); + + ret = rte_eth_tx_queue_setup(port_tx, NIC_TX_QUEUE, NIC_TX_QUEUE_DESC, + rte_eth_dev_socket_id(port_tx), + NULL); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Port %d TX queue setup error (%d)\n", port_tx, ret); + + tx_buffer = rte_zmalloc_socket("tx_buffer", + RTE_ETH_TX_BUFFER_SIZE(PKT_TX_BURST_MAX), 0, + rte_eth_dev_socket_id(port_tx)); + if (tx_buffer == NULL) + rte_exit(EXIT_FAILURE, "Port %d TX buffer allocation error\n", + port_tx); + + rte_eth_tx_buffer_init(tx_buffer, PKT_TX_BURST_MAX); + + ret = rte_eth_dev_start(port_rx); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Port %d start error (%d)\n", port_rx, ret); + + ret = rte_eth_dev_start(port_tx); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Port %d start error (%d)\n", port_tx, ret); + + rte_eth_promiscuous_enable(port_rx); + + rte_eth_promiscuous_enable(port_tx); + + /* App configuration */ + app_configure_flow_table(); + + /* Launch per-lcore init on every lcore */ + rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER); + RTE_LCORE_FOREACH_SLAVE(lcore_id) { + if (rte_eal_wait_lcore(lcore_id) < 0) + return -1; + } + + return 0; +} diff --git a/examples/qos_meter/main.h b/examples/qos_meter/main.h new file mode 100644 index 00000000..530bf69c --- /dev/null +++ b/examples/qos_meter/main.h @@ -0,0 +1,93 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _MAIN_H_ +#define _MAIN_H_ + +enum policer_action { + GREEN = e_RTE_METER_GREEN, + YELLOW = e_RTE_METER_YELLOW, + RED = e_RTE_METER_RED, + DROP = 3, +}; + +enum policer_action policer_table[e_RTE_METER_COLORS][e_RTE_METER_COLORS] = +{ + { GREEN, RED, RED}, + { DROP, YELLOW, RED}, + { DROP, DROP, RED} +}; + +#if APP_MODE == APP_MODE_FWD + +#define FUNC_METER(a,b,c,d) color, flow_id=flow_id, pkt_len=pkt_len, time=time +#define FUNC_CONFIG(a,b) +#define PARAMS app_srtcm_params +#define FLOW_METER int + +#elif APP_MODE == APP_MODE_SRTCM_COLOR_BLIND + +#define FUNC_METER(a,b,c,d) rte_meter_srtcm_color_blind_check(a,b,c) +#define FUNC_CONFIG rte_meter_srtcm_config +#define PARAMS app_srtcm_params +#define FLOW_METER struct rte_meter_srtcm + +#elif (APP_MODE == APP_MODE_SRTCM_COLOR_AWARE) + +#define FUNC_METER rte_meter_srtcm_color_aware_check +#define FUNC_CONFIG rte_meter_srtcm_config +#define PARAMS app_srtcm_params +#define FLOW_METER struct rte_meter_srtcm + +#elif (APP_MODE == APP_MODE_TRTCM_COLOR_BLIND) + +#define FUNC_METER(a,b,c,d) rte_meter_trtcm_color_blind_check(a,b,c) +#define FUNC_CONFIG rte_meter_trtcm_config +#define PARAMS app_trtcm_params +#define FLOW_METER struct rte_meter_trtcm + +#elif (APP_MODE == APP_MODE_TRTCM_COLOR_AWARE) + +#define FUNC_METER rte_meter_trtcm_color_aware_check +#define FUNC_CONFIG rte_meter_trtcm_config +#define PARAMS app_trtcm_params +#define FLOW_METER struct rte_meter_trtcm + +#else +#error Invalid value for APP_MODE +#endif + + + + +#endif /* _MAIN_H_ */ diff --git a/examples/qos_meter/rte_policer.c b/examples/qos_meter/rte_policer.c new file mode 100644 index 00000000..35f5f1b2 --- /dev/null +++ b/examples/qos_meter/rte_policer.c @@ -0,0 +1,58 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdlib.h> +#include "rte_policer.h" + +int +rte_phb_config(struct rte_phb *phb_table, uint32_t phb_table_index, + enum rte_meter_color pre_meter, enum rte_meter_color post_meter, enum rte_phb_action action) +{ + struct rte_phb *phb = NULL; + + /* User argument checking */ + if (phb_table == NULL) { + return -1; + } + + if ((pre_meter > e_RTE_METER_RED) || (post_meter > e_RTE_METER_RED) || (pre_meter > post_meter)) { + return -2; + } + + /* Set action in PHB table entry */ + phb = &phb_table[phb_table_index]; + phb->actions[pre_meter][post_meter] = action; + + + return 0; +} diff --git a/examples/qos_meter/rte_policer.h b/examples/qos_meter/rte_policer.h new file mode 100644 index 00000000..d2bcafbf --- /dev/null +++ b/examples/qos_meter/rte_policer.h @@ -0,0 +1,64 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_POLICER_H__ +#define __INCLUDE_RTE_POLICER_H__ + +#include <stdint.h> +#include <rte_meter.h> + +enum rte_phb_action { + e_RTE_PHB_ACTION_GREEN = e_RTE_METER_GREEN, + e_RTE_PHB_ACTION_YELLOW = e_RTE_METER_YELLOW, + e_RTE_PHB_ACTION_RED = e_RTE_METER_RED, + e_RTE_PHB_ACTION_DROP = 3, +}; + +struct rte_phb { + enum rte_phb_action actions[e_RTE_METER_COLORS][e_RTE_METER_COLORS]; +}; + +int +rte_phb_config(struct rte_phb *phb_table, uint32_t phb_table_index, + enum rte_meter_color pre_meter, enum rte_meter_color post_meter, enum rte_phb_action action); + +static inline enum rte_phb_action +policer_run(struct rte_phb *phb_table, uint32_t phb_table_index, enum rte_meter_color pre_meter, enum rte_meter_color post_meter) +{ + struct rte_phb *phb = &phb_table[phb_table_index]; + enum rte_phb_action action = phb->actions[pre_meter][post_meter]; + + return action; +} + +#endif diff --git a/examples/qos_sched/Makefile b/examples/qos_sched/Makefile new file mode 100644 index 00000000..f59645f5 --- /dev/null +++ b/examples/qos_sched/Makefile @@ -0,0 +1,60 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overriden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +ifneq ($(CONFIG_RTE_EXEC_ENV),"linuxapp") +$(info This application can only operate in a linuxapp environment, \ +please change the definition of the RTE_TARGET environment variable) +all: +else + +# binary name +APP = qos_sched + +# all source are stored in SRCS-y +SRCS-y := main.c args.c init.c app_thread.c cfg_file.c cmdline.c stats.c + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) +CFLAGS_args.o := -D_GNU_SOURCE +CFLAGS_cfg_file.o := -D_GNU_SOURCE + +include $(RTE_SDK)/mk/rte.extapp.mk + +endif diff --git a/examples/qos_sched/app_thread.c b/examples/qos_sched/app_thread.c new file mode 100644 index 00000000..3c678cc4 --- /dev/null +++ b/examples/qos_sched/app_thread.c @@ -0,0 +1,293 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdint.h> + +#include <rte_log.h> +#include <rte_mbuf.h> +#include <rte_malloc.h> +#include <rte_cycles.h> +#include <rte_ethdev.h> +#include <rte_memcpy.h> +#include <rte_byteorder.h> +#include <rte_branch_prediction.h> +#include <rte_sched.h> + +#include "main.h" + +/* + * QoS parameters are encoded as follows: + * Outer VLAN ID defines subport + * Inner VLAN ID defines pipe + * Destination IP 0.0.XXX.0 defines traffic class + * Destination IP host (0.0.0.XXX) defines queue + * Values below define offset to each field from start of frame + */ +#define SUBPORT_OFFSET 7 +#define PIPE_OFFSET 9 +#define TC_OFFSET 20 +#define QUEUE_OFFSET 20 +#define COLOR_OFFSET 19 + +static inline int +get_pkt_sched(struct rte_mbuf *m, uint32_t *subport, uint32_t *pipe, + uint32_t *traffic_class, uint32_t *queue, uint32_t *color) +{ + uint16_t *pdata = rte_pktmbuf_mtod(m, uint16_t *); + + *subport = (rte_be_to_cpu_16(pdata[SUBPORT_OFFSET]) & 0x0FFF) & + (port_params.n_subports_per_port - 1); /* Outer VLAN ID*/ + *pipe = (rte_be_to_cpu_16(pdata[PIPE_OFFSET]) & 0x0FFF) & + (port_params.n_pipes_per_subport - 1); /* Inner VLAN ID */ + *traffic_class = (pdata[QUEUE_OFFSET] & 0x0F) & + (RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE - 1); /* Destination IP */ + *queue = ((pdata[QUEUE_OFFSET] >> 8) & 0x0F) & + (RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS - 1) ; /* Destination IP */ + *color = pdata[COLOR_OFFSET] & 0x03; /* Destination IP */ + + return 0; +} + +void +app_rx_thread(struct thread_conf **confs) +{ + uint32_t i, nb_rx; + struct rte_mbuf *rx_mbufs[burst_conf.rx_burst] __rte_cache_aligned; + struct thread_conf *conf; + int conf_idx = 0; + + uint32_t subport; + uint32_t pipe; + uint32_t traffic_class; + uint32_t queue; + uint32_t color; + + while ((conf = confs[conf_idx])) { + nb_rx = rte_eth_rx_burst(conf->rx_port, conf->rx_queue, rx_mbufs, + burst_conf.rx_burst); + + if (likely(nb_rx != 0)) { + APP_STATS_ADD(conf->stat.nb_rx, nb_rx); + + for(i = 0; i < nb_rx; i++) { + get_pkt_sched(rx_mbufs[i], + &subport, &pipe, &traffic_class, &queue, &color); + rte_sched_port_pkt_write(rx_mbufs[i], subport, pipe, + traffic_class, queue, (enum rte_meter_color) color); + } + + if (unlikely(rte_ring_sp_enqueue_bulk(conf->rx_ring, + (void **)rx_mbufs, nb_rx) != 0)) { + for(i = 0; i < nb_rx; i++) { + rte_pktmbuf_free(rx_mbufs[i]); + + APP_STATS_ADD(conf->stat.nb_drop, 1); + } + } + } + conf_idx++; + if (confs[conf_idx] == NULL) + conf_idx = 0; + } +} + + + +/* Send the packet to an output interface + * For performance reason function returns number of packets dropped, not sent, + * so 0 means that all packets were sent successfully + */ + +static inline void +app_send_burst(struct thread_conf *qconf) +{ + struct rte_mbuf **mbufs; + uint32_t n, ret; + + mbufs = (struct rte_mbuf **)qconf->m_table; + n = qconf->n_mbufs; + + do { + ret = rte_eth_tx_burst(qconf->tx_port, qconf->tx_queue, mbufs, (uint16_t)n); + /* we cannot drop the packets, so re-send */ + /* update number of packets to be sent */ + n -= ret; + mbufs = (struct rte_mbuf **)&mbufs[ret]; + } while (n); +} + + +/* Send the packet to an output interface */ +static void +app_send_packets(struct thread_conf *qconf, struct rte_mbuf **mbufs, uint32_t nb_pkt) +{ + uint32_t i, len; + + len = qconf->n_mbufs; + for(i = 0; i < nb_pkt; i++) { + qconf->m_table[len] = mbufs[i]; + len++; + /* enough pkts to be sent */ + if (unlikely(len == burst_conf.tx_burst)) { + qconf->n_mbufs = len; + app_send_burst(qconf); + len = 0; + } + } + + qconf->n_mbufs = len; +} + +void +app_tx_thread(struct thread_conf **confs) +{ + struct rte_mbuf *mbufs[burst_conf.qos_dequeue]; + struct thread_conf *conf; + int conf_idx = 0; + int retval; + const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US; + + while ((conf = confs[conf_idx])) { + retval = rte_ring_sc_dequeue_bulk(conf->tx_ring, (void **)mbufs, + burst_conf.qos_dequeue); + if (likely(retval == 0)) { + app_send_packets(conf, mbufs, burst_conf.qos_dequeue); + + conf->counter = 0; /* reset empty read loop counter */ + } + + conf->counter++; + + /* drain ring and TX queues */ + if (unlikely(conf->counter > drain_tsc)) { + /* now check is there any packets left to be transmitted */ + if (conf->n_mbufs != 0) { + app_send_burst(conf); + + conf->n_mbufs = 0; + } + conf->counter = 0; + } + + conf_idx++; + if (confs[conf_idx] == NULL) + conf_idx = 0; + } +} + + +void +app_worker_thread(struct thread_conf **confs) +{ + struct rte_mbuf *mbufs[burst_conf.ring_burst]; + struct thread_conf *conf; + int conf_idx = 0; + + while ((conf = confs[conf_idx])) { + uint32_t nb_pkt; + int retval; + + /* Read packet from the ring */ + retval = rte_ring_sc_dequeue_bulk(conf->rx_ring, (void **)mbufs, + burst_conf.ring_burst); + if (likely(retval == 0)) { + int nb_sent = rte_sched_port_enqueue(conf->sched_port, mbufs, + burst_conf.ring_burst); + + APP_STATS_ADD(conf->stat.nb_drop, burst_conf.ring_burst - nb_sent); + APP_STATS_ADD(conf->stat.nb_rx, burst_conf.ring_burst); + } + + nb_pkt = rte_sched_port_dequeue(conf->sched_port, mbufs, + burst_conf.qos_dequeue); + if (likely(nb_pkt > 0)) + while (rte_ring_sp_enqueue_bulk(conf->tx_ring, (void **)mbufs, nb_pkt) != 0); + + conf_idx++; + if (confs[conf_idx] == NULL) + conf_idx = 0; + } +} + + +void +app_mixed_thread(struct thread_conf **confs) +{ + struct rte_mbuf *mbufs[burst_conf.ring_burst]; + struct thread_conf *conf; + int conf_idx = 0; + const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US; + + while ((conf = confs[conf_idx])) { + uint32_t nb_pkt; + int retval; + + /* Read packet from the ring */ + retval = rte_ring_sc_dequeue_bulk(conf->rx_ring, (void **)mbufs, + burst_conf.ring_burst); + if (likely(retval == 0)) { + int nb_sent = rte_sched_port_enqueue(conf->sched_port, mbufs, + burst_conf.ring_burst); + + APP_STATS_ADD(conf->stat.nb_drop, burst_conf.ring_burst - nb_sent); + APP_STATS_ADD(conf->stat.nb_rx, burst_conf.ring_burst); + } + + + nb_pkt = rte_sched_port_dequeue(conf->sched_port, mbufs, + burst_conf.qos_dequeue); + if (likely(nb_pkt > 0)) { + app_send_packets(conf, mbufs, nb_pkt); + + conf->counter = 0; /* reset empty read loop counter */ + } + + conf->counter++; + + /* drain ring and TX queues */ + if (unlikely(conf->counter > drain_tsc)) { + + /* now check is there any packets left to be transmitted */ + if (conf->n_mbufs != 0) { + app_send_burst(conf); + + conf->n_mbufs = 0; + } + conf->counter = 0; + } + + conf_idx++; + if (confs[conf_idx] == NULL) + conf_idx = 0; + } +} diff --git a/examples/qos_sched/args.c b/examples/qos_sched/args.c new file mode 100644 index 00000000..3e7fd087 --- /dev/null +++ b/examples/qos_sched/args.c @@ -0,0 +1,485 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <stdint.h> +#include <locale.h> +#include <unistd.h> +#include <limits.h> +#include <getopt.h> + +#include <rte_log.h> +#include <rte_eal.h> +#include <rte_lcore.h> +#include <rte_string_fns.h> + +#include "main.h" + +#define APP_NAME "qos_sched" +#define MAX_OPT_VALUES 8 +#define SYS_CPU_DIR "/sys/devices/system/cpu/cpu%u/topology/" + +static uint32_t app_master_core = 1; +static uint32_t app_numa_mask; +static uint64_t app_used_core_mask = 0; +static uint64_t app_used_port_mask = 0; +static uint64_t app_used_rx_port_mask = 0; +static uint64_t app_used_tx_port_mask = 0; + + +static const char usage[] = + " \n" + " %s <APP PARAMS> \n" + " \n" + "Application mandatory parameters: \n" + " --pfc \"RX PORT, TX PORT, RX LCORE, WT LCORE\" : Packet flow configuration \n" + " multiple pfc can be configured in command line \n" + " \n" + "Application optional parameters: \n" + " --i : run in interactive mode (default value is %u) \n" + " --mst I : master core index (default value is %u) \n" + " --rsz \"A, B, C\" : Ring sizes \n" + " A = Size (in number of buffer descriptors) of each of the NIC RX \n" + " rings read by the I/O RX lcores (default value is %u) \n" + " B = Size (in number of elements) of each of the SW rings used by the\n" + " I/O RX lcores to send packets to worker lcores (default value is\n" + " %u) \n" + " C = Size (in number of buffer descriptors) of each of the NIC TX \n" + " rings written by worker lcores (default value is %u) \n" + " --bsz \"A, B, C, D\": Burst sizes \n" + " A = I/O RX lcore read burst size from NIC RX (default value is %u) \n" + " B = I/O RX lcore write burst size to output SW rings, \n" + " Worker lcore read burst size from input SW rings, \n" + " QoS enqueue size (default value is %u) \n" + " C = QoS dequeue size (default value is %u) \n" + " D = Worker lcore write burst size to NIC TX (default value is %u) \n" + " --msz M : Mempool size (in number of mbufs) for each pfc (default %u) \n" + " --rth \"A, B, C\" : RX queue threshold parameters \n" + " A = RX prefetch threshold (default value is %u) \n" + " B = RX host threshold (default value is %u) \n" + " C = RX write-back threshold (default value is %u) \n" + " --tth \"A, B, C\" : TX queue threshold parameters \n" + " A = TX prefetch threshold (default value is %u) \n" + " B = TX host threshold (default value is %u) \n" + " C = TX write-back threshold (default value is %u) \n" + " --cfg FILE : profile configuration to load \n" +; + +/* display usage */ +static void +app_usage(const char *prgname) +{ + printf(usage, prgname, APP_INTERACTIVE_DEFAULT, app_master_core, + APP_RX_DESC_DEFAULT, APP_RING_SIZE, APP_TX_DESC_DEFAULT, + MAX_PKT_RX_BURST, PKT_ENQUEUE, PKT_DEQUEUE, + MAX_PKT_TX_BURST, NB_MBUF, + RX_PTHRESH, RX_HTHRESH, RX_WTHRESH, + TX_PTHRESH, TX_HTHRESH, TX_WTHRESH + ); +} + +static inline int str_is(const char *str, const char *is) +{ + return strcmp(str, is) == 0; +} + +/* returns core mask used by DPDK */ +static uint64_t +app_eal_core_mask(void) +{ + uint32_t i; + uint64_t cm = 0; + struct rte_config *cfg = rte_eal_get_configuration(); + + for (i = 0; i < RTE_MAX_LCORE; i++) { + if (cfg->lcore_role[i] == ROLE_RTE) + cm |= (1ULL << i); + } + + cm |= (1ULL << cfg->master_lcore); + + return cm; +} + + +/* returns total number of cores presented in a system */ +static uint32_t +app_cpu_core_count(void) +{ + int i, len; + char path[PATH_MAX]; + uint32_t ncores = 0; + + for(i = 0; i < RTE_MAX_LCORE; i++) { + len = snprintf(path, sizeof(path), SYS_CPU_DIR, i); + if (len <= 0 || (unsigned)len >= sizeof(path)) + continue; + + if (access(path, F_OK) == 0) + ncores++; + } + + return ncores; +} + +/* returns: + number of values parsed + -1 in case of error +*/ +static int +app_parse_opt_vals(const char *conf_str, char separator, uint32_t n_vals, uint32_t *opt_vals) +{ + char *string; + uint32_t i, n_tokens; + char *tokens[MAX_OPT_VALUES]; + + if (conf_str == NULL || opt_vals == NULL || n_vals == 0 || n_vals > MAX_OPT_VALUES) + return -1; + + /* duplicate configuration string before splitting it to tokens */ + string = strdup(conf_str); + if (string == NULL) + return -1; + + n_tokens = rte_strsplit(string, strnlen(string, 32), tokens, n_vals, separator); + + for(i = 0; i < n_tokens; i++) { + opt_vals[i] = (uint32_t)atol(tokens[i]); + } + + free(string); + + return n_tokens; +} + +static int +app_parse_ring_conf(const char *conf_str) +{ + int ret; + uint32_t vals[3]; + + ret = app_parse_opt_vals(conf_str, ',', 3, vals); + if (ret != 3) + return ret; + + ring_conf.rx_size = vals[0]; + ring_conf.ring_size = vals[1]; + ring_conf.tx_size = vals[2]; + + return 0; +} + +static int +app_parse_rth_conf(const char *conf_str) +{ + int ret; + uint32_t vals[3]; + + ret = app_parse_opt_vals(conf_str, ',', 3, vals); + if (ret != 3) + return ret; + + rx_thresh.pthresh = (uint8_t)vals[0]; + rx_thresh.hthresh = (uint8_t)vals[1]; + rx_thresh.wthresh = (uint8_t)vals[2]; + + return 0; +} + +static int +app_parse_tth_conf(const char *conf_str) +{ + int ret; + uint32_t vals[3]; + + ret = app_parse_opt_vals(conf_str, ',', 3, vals); + if (ret != 3) + return ret; + + tx_thresh.pthresh = (uint8_t)vals[0]; + tx_thresh.hthresh = (uint8_t)vals[1]; + tx_thresh.wthresh = (uint8_t)vals[2]; + + return 0; +} + +static int +app_parse_flow_conf(const char *conf_str) +{ + int ret; + uint32_t vals[5]; + struct flow_conf *pconf; + uint64_t mask; + + ret = app_parse_opt_vals(conf_str, ',', 6, vals); + if (ret < 4 || ret > 5) + return ret; + + pconf = &qos_conf[nb_pfc]; + + pconf->rx_port = (uint8_t)vals[0]; + pconf->tx_port = (uint8_t)vals[1]; + pconf->rx_core = (uint8_t)vals[2]; + pconf->wt_core = (uint8_t)vals[3]; + if (ret == 5) + pconf->tx_core = (uint8_t)vals[4]; + else + pconf->tx_core = pconf->wt_core; + + if (pconf->rx_core == pconf->wt_core) { + RTE_LOG(ERR, APP, "pfc %u: rx thread and worker thread cannot share same core\n", nb_pfc); + return -1; + } + + if (pconf->rx_port >= RTE_MAX_ETHPORTS) { + RTE_LOG(ERR, APP, "pfc %u: invalid rx port %"PRIu8" index\n", + nb_pfc, pconf->rx_port); + return -1; + } + if (pconf->tx_port >= RTE_MAX_ETHPORTS) { + RTE_LOG(ERR, APP, "pfc %u: invalid tx port %"PRIu8" index\n", + nb_pfc, pconf->rx_port); + return -1; + } + + mask = 1lu << pconf->rx_port; + if (app_used_rx_port_mask & mask) { + RTE_LOG(ERR, APP, "pfc %u: rx port %"PRIu8" is used already\n", + nb_pfc, pconf->rx_port); + return -1; + } + app_used_rx_port_mask |= mask; + app_used_port_mask |= mask; + + mask = 1lu << pconf->tx_port; + if (app_used_tx_port_mask & mask) { + RTE_LOG(ERR, APP, "pfc %u: port %"PRIu8" is used already\n", + nb_pfc, pconf->tx_port); + return -1; + } + app_used_tx_port_mask |= mask; + app_used_port_mask |= mask; + + mask = 1lu << pconf->rx_core; + app_used_core_mask |= mask; + + mask = 1lu << pconf->wt_core; + app_used_core_mask |= mask; + + mask = 1lu << pconf->tx_core; + app_used_core_mask |= mask; + + nb_pfc++; + + return 0; +} + +static int +app_parse_burst_conf(const char *conf_str) +{ + int ret; + uint32_t vals[4]; + + ret = app_parse_opt_vals(conf_str, ',', 4, vals); + if (ret != 4) + return ret; + + burst_conf.rx_burst = (uint16_t)vals[0]; + burst_conf.ring_burst = (uint16_t)vals[1]; + burst_conf.qos_dequeue = (uint16_t)vals[2]; + burst_conf.tx_burst = (uint16_t)vals[3]; + + return 0; +} + +/* + * Parses the argument given in the command line of the application, + * calculates mask for used cores and initializes EAL with calculated core mask + */ +int +app_parse_args(int argc, char **argv) +{ + int opt, ret; + int option_index; + const char *optname; + char *prgname = argv[0]; + uint32_t i, nb_lcores; + + static struct option lgopts[] = { + { "pfc", 1, 0, 0 }, + { "mst", 1, 0, 0 }, + { "rsz", 1, 0, 0 }, + { "bsz", 1, 0, 0 }, + { "msz", 1, 0, 0 }, + { "rth", 1, 0, 0 }, + { "tth", 1, 0, 0 }, + { "cfg", 1, 0, 0 }, + { NULL, 0, 0, 0 } + }; + + /* initialize EAL first */ + ret = rte_eal_init(argc, argv); + if (ret < 0) + return -1; + + argc -= ret; + argv += ret; + + /* set en_US locale to print big numbers with ',' */ + setlocale(LC_NUMERIC, "en_US.utf-8"); + + while ((opt = getopt_long(argc, argv, "i", + lgopts, &option_index)) != EOF) { + + switch (opt) { + case 'i': + printf("Interactive-mode selected\n"); + interactive = 1; + break; + /* long options */ + case 0: + optname = lgopts[option_index].name; + if (str_is(optname, "pfc")) { + ret = app_parse_flow_conf(optarg); + if (ret) { + RTE_LOG(ERR, APP, "Invalid pipe configuration %s\n", optarg); + return -1; + } + break; + } + if (str_is(optname, "mst")) { + app_master_core = (uint32_t)atoi(optarg); + break; + } + if (str_is(optname, "rsz")) { + ret = app_parse_ring_conf(optarg); + if (ret) { + RTE_LOG(ERR, APP, "Invalid ring configuration %s\n", optarg); + return -1; + } + break; + } + if (str_is(optname, "bsz")) { + ret = app_parse_burst_conf(optarg); + if (ret) { + RTE_LOG(ERR, APP, "Invalid burst configuration %s\n", optarg); + return -1; + } + break; + } + if (str_is(optname, "msz")) { + mp_size = atoi(optarg); + if (mp_size <= 0) { + RTE_LOG(ERR, APP, "Invalid mempool size %s\n", optarg); + return -1; + } + break; + } + if (str_is(optname, "rth")) { + ret = app_parse_rth_conf(optarg); + if (ret) { + RTE_LOG(ERR, APP, "Invalid RX threshold configuration %s\n", optarg); + return -1; + } + break; + } + if (str_is(optname, "tth")) { + ret = app_parse_tth_conf(optarg); + if (ret) { + RTE_LOG(ERR, APP, "Invalid TX threshold configuration %s\n", optarg); + return -1; + } + break; + } + if (str_is(optname, "cfg")) { + cfg_profile = optarg; + break; + } + break; + + default: + app_usage(prgname); + return -1; + } + } + + /* check master core index validity */ + for(i = 0; i <= app_master_core; i++) { + if (app_used_core_mask & (1u << app_master_core)) { + RTE_LOG(ERR, APP, "Master core index is not configured properly\n"); + app_usage(prgname); + return -1; + } + } + app_used_core_mask |= 1u << app_master_core; + + if ((app_used_core_mask != app_eal_core_mask()) || + (app_master_core != rte_get_master_lcore())) { + RTE_LOG(ERR, APP, "EAL core mask not configured properly, must be %" PRIx64 + " instead of %" PRIx64 "\n" , app_used_core_mask, app_eal_core_mask()); + return -1; + } + + if (nb_pfc == 0) { + RTE_LOG(ERR, APP, "Packet flow not configured!\n"); + app_usage(prgname); + return -1; + } + + /* sanity check for cores assignment */ + nb_lcores = app_cpu_core_count(); + + for(i = 0; i < nb_pfc; i++) { + if (qos_conf[i].rx_core >= nb_lcores) { + RTE_LOG(ERR, APP, "pfc %u: invalid RX lcore index %u\n", i + 1, + qos_conf[i].rx_core); + return -1; + } + if (qos_conf[i].wt_core >= nb_lcores) { + RTE_LOG(ERR, APP, "pfc %u: invalid WT lcore index %u\n", i + 1, + qos_conf[i].wt_core); + return -1; + } + uint32_t rx_sock = rte_lcore_to_socket_id(qos_conf[i].rx_core); + uint32_t wt_sock = rte_lcore_to_socket_id(qos_conf[i].wt_core); + if (rx_sock != wt_sock) { + RTE_LOG(ERR, APP, "pfc %u: RX and WT must be on the same socket\n", i + 1); + return -1; + } + app_numa_mask |= 1 << rte_lcore_to_socket_id(qos_conf[i].rx_core); + } + + return 0; +} diff --git a/examples/qos_sched/cfg_file.c b/examples/qos_sched/cfg_file.c new file mode 100644 index 00000000..94a1a221 --- /dev/null +++ b/examples/qos_sched/cfg_file.c @@ -0,0 +1,342 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ctype.h> +#include <rte_string_fns.h> +#include <rte_sched.h> + +#include "cfg_file.h" +#include "main.h" + + +/** when we resize a file structure, how many extra entries + * for new sections do we add in */ +#define CFG_ALLOC_SECTION_BATCH 8 +/** when we resize a section structure, how many extra entries + * for new entries do we add in */ +#define CFG_ALLOC_ENTRY_BATCH 16 + +int +cfg_load_port(struct rte_cfgfile *cfg, struct rte_sched_port_params *port_params) +{ + const char *entry; + int j; + + if (!cfg || !port_params) + return -1; + + entry = rte_cfgfile_get_entry(cfg, "port", "frame overhead"); + if (entry) + port_params->frame_overhead = (uint32_t)atoi(entry); + + entry = rte_cfgfile_get_entry(cfg, "port", "number of subports per port"); + if (entry) + port_params->n_subports_per_port = (uint32_t)atoi(entry); + + entry = rte_cfgfile_get_entry(cfg, "port", "number of pipes per subport"); + if (entry) + port_params->n_pipes_per_subport = (uint32_t)atoi(entry); + + entry = rte_cfgfile_get_entry(cfg, "port", "queue sizes"); + if (entry) { + char *next; + + for(j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) { + port_params->qsize[j] = (uint16_t)strtol(entry, &next, 10); + if (next == NULL) + break; + entry = next; + } + } + +#ifdef RTE_SCHED_RED + for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) { + char str[32]; + + /* Parse WRED min thresholds */ + snprintf(str, sizeof(str), "tc %d wred min", j); + entry = rte_cfgfile_get_entry(cfg, "red", str); + if (entry) { + char *next; + int k; + /* for each packet colour (green, yellow, red) */ + for (k = 0; k < e_RTE_METER_COLORS; k++) { + port_params->red_params[j][k].min_th + = (uint16_t)strtol(entry, &next, 10); + if (next == NULL) + break; + entry = next; + } + } + + /* Parse WRED max thresholds */ + snprintf(str, sizeof(str), "tc %d wred max", j); + entry = rte_cfgfile_get_entry(cfg, "red", str); + if (entry) { + char *next; + int k; + /* for each packet colour (green, yellow, red) */ + for (k = 0; k < e_RTE_METER_COLORS; k++) { + port_params->red_params[j][k].max_th + = (uint16_t)strtol(entry, &next, 10); + if (next == NULL) + break; + entry = next; + } + } + + /* Parse WRED inverse mark probabilities */ + snprintf(str, sizeof(str), "tc %d wred inv prob", j); + entry = rte_cfgfile_get_entry(cfg, "red", str); + if (entry) { + char *next; + int k; + /* for each packet colour (green, yellow, red) */ + for (k = 0; k < e_RTE_METER_COLORS; k++) { + port_params->red_params[j][k].maxp_inv + = (uint8_t)strtol(entry, &next, 10); + + if (next == NULL) + break; + entry = next; + } + } + + /* Parse WRED EWMA filter weights */ + snprintf(str, sizeof(str), "tc %d wred weight", j); + entry = rte_cfgfile_get_entry(cfg, "red", str); + if (entry) { + char *next; + int k; + /* for each packet colour (green, yellow, red) */ + for (k = 0; k < e_RTE_METER_COLORS; k++) { + port_params->red_params[j][k].wq_log2 + = (uint8_t)strtol(entry, &next, 10); + if (next == NULL) + break; + entry = next; + } + } + } +#endif /* RTE_SCHED_RED */ + + return 0; +} + +int +cfg_load_pipe(struct rte_cfgfile *cfg, struct rte_sched_pipe_params *pipe_params) +{ + int i, j; + char *next; + const char *entry; + int profiles; + + if (!cfg || !pipe_params) + return -1; + + profiles = rte_cfgfile_num_sections(cfg, "pipe profile", sizeof("pipe profile") - 1); + port_params.n_pipe_profiles = profiles; + + for (j = 0; j < profiles; j++) { + char pipe_name[32]; + snprintf(pipe_name, sizeof(pipe_name), "pipe profile %d", j); + + entry = rte_cfgfile_get_entry(cfg, pipe_name, "tb rate"); + if (entry) + pipe_params[j].tb_rate = (uint32_t)atoi(entry); + + entry = rte_cfgfile_get_entry(cfg, pipe_name, "tb size"); + if (entry) + pipe_params[j].tb_size = (uint32_t)atoi(entry); + + entry = rte_cfgfile_get_entry(cfg, pipe_name, "tc period"); + if (entry) + pipe_params[j].tc_period = (uint32_t)atoi(entry); + + entry = rte_cfgfile_get_entry(cfg, pipe_name, "tc 0 rate"); + if (entry) + pipe_params[j].tc_rate[0] = (uint32_t)atoi(entry); + + entry = rte_cfgfile_get_entry(cfg, pipe_name, "tc 1 rate"); + if (entry) + pipe_params[j].tc_rate[1] = (uint32_t)atoi(entry); + + entry = rte_cfgfile_get_entry(cfg, pipe_name, "tc 2 rate"); + if (entry) + pipe_params[j].tc_rate[2] = (uint32_t)atoi(entry); + + entry = rte_cfgfile_get_entry(cfg, pipe_name, "tc 3 rate"); + if (entry) + pipe_params[j].tc_rate[3] = (uint32_t)atoi(entry); + +#ifdef RTE_SCHED_SUBPORT_TC_OV + entry = rte_cfgfile_get_entry(cfg, pipe_name, "tc 3 oversubscription weight"); + if (entry) + pipe_params[j].tc_ov_weight = (uint8_t)atoi(entry); +#endif + + entry = rte_cfgfile_get_entry(cfg, pipe_name, "tc 0 wrr weights"); + if (entry) { + for(i = 0; i < RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; i++) { + pipe_params[j].wrr_weights[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE*0 + i] = + (uint8_t)strtol(entry, &next, 10); + if (next == NULL) + break; + entry = next; + } + } + entry = rte_cfgfile_get_entry(cfg, pipe_name, "tc 1 wrr weights"); + if (entry) { + for(i = 0; i < RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; i++) { + pipe_params[j].wrr_weights[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE*1 + i] = + (uint8_t)strtol(entry, &next, 10); + if (next == NULL) + break; + entry = next; + } + } + entry = rte_cfgfile_get_entry(cfg, pipe_name, "tc 2 wrr weights"); + if (entry) { + for(i = 0; i < RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; i++) { + pipe_params[j].wrr_weights[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE*2 + i] = + (uint8_t)strtol(entry, &next, 10); + if (next == NULL) + break; + entry = next; + } + } + entry = rte_cfgfile_get_entry(cfg, pipe_name, "tc 3 wrr weights"); + if (entry) { + for(i = 0; i < RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; i++) { + pipe_params[j].wrr_weights[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE*3 + i] = + (uint8_t)strtol(entry, &next, 10); + if (next == NULL) + break; + entry = next; + } + } + } + return 0; +} + +int +cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subport_params) +{ + const char *entry; + int i, j, k; + + if (!cfg || !subport_params) + return -1; + + memset(app_pipe_to_profile, -1, sizeof(app_pipe_to_profile)); + + for (i = 0; i < MAX_SCHED_SUBPORTS; i++) { + char sec_name[CFG_NAME_LEN]; + snprintf(sec_name, sizeof(sec_name), "subport %d", i); + + if (rte_cfgfile_has_section(cfg, sec_name)) { + entry = rte_cfgfile_get_entry(cfg, sec_name, "tb rate"); + if (entry) + subport_params[i].tb_rate = (uint32_t)atoi(entry); + + entry = rte_cfgfile_get_entry(cfg, sec_name, "tb size"); + if (entry) + subport_params[i].tb_size = (uint32_t)atoi(entry); + + entry = rte_cfgfile_get_entry(cfg, sec_name, "tc period"); + if (entry) + subport_params[i].tc_period = (uint32_t)atoi(entry); + + entry = rte_cfgfile_get_entry(cfg, sec_name, "tc 0 rate"); + if (entry) + subport_params[i].tc_rate[0] = (uint32_t)atoi(entry); + + entry = rte_cfgfile_get_entry(cfg, sec_name, "tc 1 rate"); + if (entry) + subport_params[i].tc_rate[1] = (uint32_t)atoi(entry); + + entry = rte_cfgfile_get_entry(cfg, sec_name, "tc 2 rate"); + if (entry) + subport_params[i].tc_rate[2] = (uint32_t)atoi(entry); + + entry = rte_cfgfile_get_entry(cfg, sec_name, "tc 3 rate"); + if (entry) + subport_params[i].tc_rate[3] = (uint32_t)atoi(entry); + + int n_entries = rte_cfgfile_section_num_entries(cfg, sec_name); + struct rte_cfgfile_entry entries[n_entries]; + + rte_cfgfile_section_entries(cfg, sec_name, entries, n_entries); + + for (j = 0; j < n_entries; j++) { + if (strncmp("pipe", entries[j].name, sizeof("pipe") - 1) == 0) { + int profile; + char *tokens[2] = {NULL, NULL}; + int n_tokens; + int begin, end; + + profile = atoi(entries[j].value); + n_tokens = rte_strsplit(&entries[j].name[sizeof("pipe")], + strnlen(entries[j].name, CFG_NAME_LEN), tokens, 2, '-'); + + begin = atoi(tokens[0]); + if (n_tokens == 2) + end = atoi(tokens[1]); + else + end = begin; + + if (end >= MAX_SCHED_PIPES || begin > end) + return -1; + + for (k = begin; k <= end; k++) { + char profile_name[CFG_NAME_LEN]; + + snprintf(profile_name, sizeof(profile_name), + "pipe profile %d", profile); + if (rte_cfgfile_has_section(cfg, profile_name)) + app_pipe_to_profile[i][k] = profile; + else + rte_exit(EXIT_FAILURE, "Wrong pipe profile %s\n", + entries[j].value); + + } + } + } + } + } + + return 0; +} diff --git a/examples/qos_sched/cfg_file.h b/examples/qos_sched/cfg_file.h new file mode 100644 index 00000000..cc5a2cd5 --- /dev/null +++ b/examples/qos_sched/cfg_file.h @@ -0,0 +1,46 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CFG_FILE_H__ +#define __CFG_FILE_H__ + +#include <rte_sched.h> +#include <rte_cfgfile.h> + +int cfg_load_port(struct rte_cfgfile *cfg, struct rte_sched_port_params *port); + +int cfg_load_pipe(struct rte_cfgfile *cfg, struct rte_sched_pipe_params *pipe); + +int cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subport); + +#endif diff --git a/examples/qos_sched/cmdline.c b/examples/qos_sched/cmdline.c new file mode 100644 index 00000000..f79d5246 --- /dev/null +++ b/examples/qos_sched/cmdline.c @@ -0,0 +1,643 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdio.h> +#include <termios.h> +#include <inttypes.h> +#include <string.h> + +#include <cmdline_rdline.h> +#include <cmdline_parse.h> +#include <cmdline_parse_num.h> +#include <cmdline_parse_string.h> +#include <cmdline_socket.h> +#include <cmdline.h> + +#include "main.h" + +/* *** Help command with introduction. *** */ +struct cmd_help_result { + cmdline_fixed_string_t help; +}; + +static void cmd_help_parsed(__attribute__((unused)) void *parsed_result, + struct cmdline *cl, + __attribute__((unused)) void *data) +{ + cmdline_printf( + cl, + "\n" + "The following commands are currently available:\n\n" + "Control:\n" + " quit : Quit the application.\n" + "\nStatistics:\n" + " stats app : Show app statistics.\n" + " stats port X subport Y : Show stats of a specific subport.\n" + " stats port X subport Y pipe Z : Show stats of a specific pipe.\n" + "\nAverage queue size:\n" + " qavg port X subport Y : Show average queue size per subport.\n" + " qavg port X subport Y tc Z : Show average queue size per subport and TC.\n" + " qavg port X subport Y pipe Z : Show average queue size per pipe.\n" + " qavg port X subport Y pipe Z tc A : Show average queue size per pipe and TC.\n" + " qavg port X subport Y pipe Z tc A q B : Show average queue size of a specific queue.\n" + " qavg [n|period] X : Set number of times and peiod (us).\n\n" + ); + +} + +cmdline_parse_token_string_t cmd_help_help = + TOKEN_STRING_INITIALIZER(struct cmd_help_result, help, "help"); + +cmdline_parse_inst_t cmd_help = { + .f = cmd_help_parsed, + .data = NULL, + .help_str = "show help", + .tokens = { + (void *)&cmd_help_help, + NULL, + }, +}; + +/* *** QUIT *** */ +struct cmd_quit_result { + cmdline_fixed_string_t quit; +}; + +static void cmd_quit_parsed(__attribute__((unused)) void *parsed_result, + struct cmdline *cl, + __attribute__((unused)) void *data) +{ + cmdline_quit(cl); +} + +cmdline_parse_token_string_t cmd_quit_quit = + TOKEN_STRING_INITIALIZER(struct cmd_quit_result, quit, "quit"); + +cmdline_parse_inst_t cmd_quit = { + .f = cmd_quit_parsed, + .data = NULL, + .help_str = "exit application", + .tokens = { + (void *)&cmd_quit_quit, + NULL, + }, +}; + +/* *** SET QAVG PARAMETERS *** */ +struct cmd_setqavg_result { + cmdline_fixed_string_t qavg_string; + cmdline_fixed_string_t param_string; + uint32_t number; +}; + +static void cmd_setqavg_parsed(void *parsed_result, + __attribute__((unused)) struct cmdline *cl, + __attribute__((unused)) void *data) +{ + struct cmd_setqavg_result *res = parsed_result; + + if (!strcmp(res->param_string, "period")) + qavg_period = res->number; + else if (!strcmp(res->param_string, "n")) + qavg_ntimes = res->number; + else + printf("\nUnknown parameter.\n\n"); +} + +cmdline_parse_token_string_t cmd_setqavg_qavg_string = + TOKEN_STRING_INITIALIZER(struct cmd_setqavg_result, qavg_string, + "qavg"); +cmdline_parse_token_string_t cmd_setqavg_param_string = + TOKEN_STRING_INITIALIZER(struct cmd_setqavg_result, param_string, + "period#n"); +cmdline_parse_token_num_t cmd_setqavg_number = + TOKEN_NUM_INITIALIZER(struct cmd_setqavg_result, number, + UINT32); + +cmdline_parse_inst_t cmd_setqavg = { + .f = cmd_setqavg_parsed, + .data = NULL, + .help_str = "Show subport stats.", + .tokens = { + (void *)&cmd_setqavg_qavg_string, + (void *)&cmd_setqavg_param_string, + (void *)&cmd_setqavg_number, + NULL, + }, +}; + +/* *** SHOW APP STATS *** */ +struct cmd_appstats_result { + cmdline_fixed_string_t stats_string; + cmdline_fixed_string_t app_string; +}; + +static void cmd_appstats_parsed(__attribute__((unused)) void *parsed_result, + __attribute__((unused)) struct cmdline *cl, + __attribute__((unused)) void *data) +{ + app_stat(); +} + +cmdline_parse_token_string_t cmd_appstats_stats_string = + TOKEN_STRING_INITIALIZER(struct cmd_appstats_result, stats_string, + "stats"); +cmdline_parse_token_string_t cmd_appstats_app_string = + TOKEN_STRING_INITIALIZER(struct cmd_appstats_result, app_string, + "app"); + +cmdline_parse_inst_t cmd_appstats = { + .f = cmd_appstats_parsed, + .data = NULL, + .help_str = "Show app stats.", + .tokens = { + (void *)&cmd_appstats_stats_string, + (void *)&cmd_appstats_app_string, + NULL, + }, +}; + +/* *** SHOW SUBPORT STATS *** */ +struct cmd_subportstats_result { + cmdline_fixed_string_t stats_string; + cmdline_fixed_string_t port_string; + uint8_t port_number; + cmdline_fixed_string_t subport_string; + uint32_t subport_number; +}; + +static void cmd_subportstats_parsed(void *parsed_result, + __attribute__((unused)) struct cmdline *cl, + __attribute__((unused)) void *data) +{ + struct cmd_subportstats_result *res = parsed_result; + + if (subport_stat(res->port_number, res->subport_number) < 0) + printf ("\nStats not available for these parameters. Check that both the port and subport are correct.\n\n"); +} + +cmdline_parse_token_string_t cmd_subportstats_stats_string = + TOKEN_STRING_INITIALIZER(struct cmd_subportstats_result, stats_string, + "stats"); +cmdline_parse_token_string_t cmd_subportstats_port_string = + TOKEN_STRING_INITIALIZER(struct cmd_subportstats_result, port_string, + "port"); +cmdline_parse_token_string_t cmd_subportstats_subport_string = + TOKEN_STRING_INITIALIZER(struct cmd_subportstats_result, subport_string, + "subport"); +cmdline_parse_token_num_t cmd_subportstats_subport_number = + TOKEN_NUM_INITIALIZER(struct cmd_subportstats_result, subport_number, + UINT32); +cmdline_parse_token_num_t cmd_subportstats_port_number = + TOKEN_NUM_INITIALIZER(struct cmd_subportstats_result, port_number, + UINT8); + +cmdline_parse_inst_t cmd_subportstats = { + .f = cmd_subportstats_parsed, + .data = NULL, + .help_str = "Show subport stats.", + .tokens = { + (void *)&cmd_subportstats_stats_string, + (void *)&cmd_subportstats_port_string, + (void *)&cmd_subportstats_port_number, + (void *)&cmd_subportstats_subport_string, + (void *)&cmd_subportstats_subport_number, + NULL, + }, +}; + +/* *** SHOW PIPE STATS *** */ +struct cmd_pipestats_result { + cmdline_fixed_string_t stats_string; + cmdline_fixed_string_t port_string; + uint8_t port_number; + cmdline_fixed_string_t subport_string; + uint32_t subport_number; + cmdline_fixed_string_t pipe_string; + uint32_t pipe_number; +}; + +static void cmd_pipestats_parsed(void *parsed_result, + __attribute__((unused)) struct cmdline *cl, + __attribute__((unused)) void *data) +{ + struct cmd_pipestats_result *res = parsed_result; + + if (pipe_stat(res->port_number, res->subport_number, res->pipe_number) < 0) + printf ("\nStats not available for these parameters. Check that both the port and subport are correct.\n\n"); +} + +cmdline_parse_token_string_t cmd_pipestats_stats_string = + TOKEN_STRING_INITIALIZER(struct cmd_pipestats_result, stats_string, + "stats"); +cmdline_parse_token_string_t cmd_pipestats_port_string = + TOKEN_STRING_INITIALIZER(struct cmd_pipestats_result, port_string, + "port"); +cmdline_parse_token_num_t cmd_pipestats_port_number = + TOKEN_NUM_INITIALIZER(struct cmd_pipestats_result, port_number, + UINT8); +cmdline_parse_token_string_t cmd_pipestats_subport_string = + TOKEN_STRING_INITIALIZER(struct cmd_pipestats_result, subport_string, + "subport"); +cmdline_parse_token_num_t cmd_pipestats_subport_number = + TOKEN_NUM_INITIALIZER(struct cmd_pipestats_result, subport_number, + UINT32); +cmdline_parse_token_string_t cmd_pipestats_pipe_string = + TOKEN_STRING_INITIALIZER(struct cmd_pipestats_result, pipe_string, + "pipe"); +cmdline_parse_token_num_t cmd_pipestats_pipe_number = + TOKEN_NUM_INITIALIZER(struct cmd_pipestats_result, pipe_number, + UINT32); + +cmdline_parse_inst_t cmd_pipestats = { + .f = cmd_pipestats_parsed, + .data = NULL, + .help_str = "Show pipe stats.", + .tokens = { + (void *)&cmd_pipestats_stats_string, + (void *)&cmd_pipestats_port_string, + (void *)&cmd_pipestats_port_number, + (void *)&cmd_pipestats_subport_string, + (void *)&cmd_pipestats_subport_number, + (void *)&cmd_pipestats_pipe_string, + (void *)&cmd_pipestats_pipe_number, + NULL, + }, +}; + +/* *** SHOW AVERAGE QUEUE SIZE (QUEUE) *** */ +struct cmd_avg_q_result { + cmdline_fixed_string_t qavg_string; + cmdline_fixed_string_t port_string; + uint8_t port_number; + cmdline_fixed_string_t subport_string; + uint32_t subport_number; + cmdline_fixed_string_t pipe_string; + uint32_t pipe_number; + cmdline_fixed_string_t tc_string; + uint8_t tc_number; + cmdline_fixed_string_t q_string; + uint8_t q_number; +}; + +static void cmd_avg_q_parsed(void *parsed_result, + __attribute__((unused)) struct cmdline *cl, + __attribute__((unused)) void *data) +{ + struct cmd_avg_q_result *res = parsed_result; + + if (qavg_q(res->port_number, res->subport_number, res->pipe_number, res->tc_number, res->q_number) < 0) + printf ("\nStats not available for these parameters. Check that both the port and subport are correct.\n\n"); +} + +cmdline_parse_token_string_t cmd_avg_q_qavg_string = + TOKEN_STRING_INITIALIZER(struct cmd_avg_q_result, qavg_string, + "qavg"); +cmdline_parse_token_string_t cmd_avg_q_port_string = + TOKEN_STRING_INITIALIZER(struct cmd_avg_q_result, port_string, + "port"); +cmdline_parse_token_num_t cmd_avg_q_port_number = + TOKEN_NUM_INITIALIZER(struct cmd_avg_q_result, port_number, + UINT8); +cmdline_parse_token_string_t cmd_avg_q_subport_string = + TOKEN_STRING_INITIALIZER(struct cmd_avg_q_result, subport_string, + "subport"); +cmdline_parse_token_num_t cmd_avg_q_subport_number = + TOKEN_NUM_INITIALIZER(struct cmd_avg_q_result, subport_number, + UINT32); +cmdline_parse_token_string_t cmd_avg_q_pipe_string = + TOKEN_STRING_INITIALIZER(struct cmd_avg_q_result, pipe_string, + "pipe"); +cmdline_parse_token_num_t cmd_avg_q_pipe_number = + TOKEN_NUM_INITIALIZER(struct cmd_avg_q_result, pipe_number, + UINT32); +cmdline_parse_token_string_t cmd_avg_q_tc_string = + TOKEN_STRING_INITIALIZER(struct cmd_avg_q_result, tc_string, + "tc"); +cmdline_parse_token_num_t cmd_avg_q_tc_number = + TOKEN_NUM_INITIALIZER(struct cmd_avg_q_result, tc_number, + UINT8); +cmdline_parse_token_string_t cmd_avg_q_q_string = + TOKEN_STRING_INITIALIZER(struct cmd_avg_q_result, q_string, + "q"); +cmdline_parse_token_num_t cmd_avg_q_q_number = + TOKEN_NUM_INITIALIZER(struct cmd_avg_q_result, q_number, + UINT8); + +cmdline_parse_inst_t cmd_avg_q = { + .f = cmd_avg_q_parsed, + .data = NULL, + .help_str = "Show pipe stats.", + .tokens = { + (void *)&cmd_avg_q_qavg_string, + (void *)&cmd_avg_q_port_string, + (void *)&cmd_avg_q_port_number, + (void *)&cmd_avg_q_subport_string, + (void *)&cmd_avg_q_subport_number, + (void *)&cmd_avg_q_pipe_string, + (void *)&cmd_avg_q_pipe_number, + (void *)&cmd_avg_q_tc_string, + (void *)&cmd_avg_q_tc_number, + (void *)&cmd_avg_q_q_string, + (void *)&cmd_avg_q_q_number, + NULL, + }, +}; + +/* *** SHOW AVERAGE QUEUE SIZE (tc/pipe) *** */ +struct cmd_avg_tcpipe_result { + cmdline_fixed_string_t qavg_string; + cmdline_fixed_string_t port_string; + uint8_t port_number; + cmdline_fixed_string_t subport_string; + uint32_t subport_number; + cmdline_fixed_string_t pipe_string; + uint32_t pipe_number; + cmdline_fixed_string_t tc_string; + uint8_t tc_number; +}; + +static void cmd_avg_tcpipe_parsed(void *parsed_result, + __attribute__((unused)) struct cmdline *cl, + __attribute__((unused)) void *data) +{ + struct cmd_avg_tcpipe_result *res = parsed_result; + + if (qavg_tcpipe(res->port_number, res->subport_number, res->pipe_number, res->tc_number) < 0) + printf ("\nStats not available for these parameters. Check that both the port and subport are correct.\n\n"); +} + +cmdline_parse_token_string_t cmd_avg_tcpipe_qavg_string = + TOKEN_STRING_INITIALIZER(struct cmd_avg_tcpipe_result, qavg_string, + "qavg"); +cmdline_parse_token_string_t cmd_avg_tcpipe_port_string = + TOKEN_STRING_INITIALIZER(struct cmd_avg_tcpipe_result, port_string, + "port"); +cmdline_parse_token_num_t cmd_avg_tcpipe_port_number = + TOKEN_NUM_INITIALIZER(struct cmd_avg_tcpipe_result, port_number, + UINT8); +cmdline_parse_token_string_t cmd_avg_tcpipe_subport_string = + TOKEN_STRING_INITIALIZER(struct cmd_avg_tcpipe_result, subport_string, + "subport"); +cmdline_parse_token_num_t cmd_avg_tcpipe_subport_number = + TOKEN_NUM_INITIALIZER(struct cmd_avg_tcpipe_result, subport_number, + UINT32); +cmdline_parse_token_string_t cmd_avg_tcpipe_pipe_string = + TOKEN_STRING_INITIALIZER(struct cmd_avg_tcpipe_result, pipe_string, + "pipe"); +cmdline_parse_token_num_t cmd_avg_tcpipe_pipe_number = + TOKEN_NUM_INITIALIZER(struct cmd_avg_tcpipe_result, pipe_number, + UINT32); +cmdline_parse_token_string_t cmd_avg_tcpipe_tc_string = + TOKEN_STRING_INITIALIZER(struct cmd_avg_tcpipe_result, tc_string, + "tc"); +cmdline_parse_token_num_t cmd_avg_tcpipe_tc_number = + TOKEN_NUM_INITIALIZER(struct cmd_avg_tcpipe_result, tc_number, + UINT8); + +cmdline_parse_inst_t cmd_avg_tcpipe = { + .f = cmd_avg_tcpipe_parsed, + .data = NULL, + .help_str = "Show pipe stats.", + .tokens = { + (void *)&cmd_avg_tcpipe_qavg_string, + (void *)&cmd_avg_tcpipe_port_string, + (void *)&cmd_avg_tcpipe_port_number, + (void *)&cmd_avg_tcpipe_subport_string, + (void *)&cmd_avg_tcpipe_subport_number, + (void *)&cmd_avg_tcpipe_pipe_string, + (void *)&cmd_avg_tcpipe_pipe_number, + (void *)&cmd_avg_tcpipe_tc_string, + (void *)&cmd_avg_tcpipe_tc_number, + NULL, + }, +}; + +/* *** SHOW AVERAGE QUEUE SIZE (pipe) *** */ +struct cmd_avg_pipe_result { + cmdline_fixed_string_t qavg_string; + cmdline_fixed_string_t port_string; + uint8_t port_number; + cmdline_fixed_string_t subport_string; + uint32_t subport_number; + cmdline_fixed_string_t pipe_string; + uint32_t pipe_number; +}; + +static void cmd_avg_pipe_parsed(void *parsed_result, + __attribute__((unused)) struct cmdline *cl, + __attribute__((unused)) void *data) +{ + struct cmd_avg_pipe_result *res = parsed_result; + + if (qavg_pipe(res->port_number, res->subport_number, res->pipe_number) < 0) + printf ("\nStats not available for these parameters. Check that both the port and subport are correct.\n\n"); +} + +cmdline_parse_token_string_t cmd_avg_pipe_qavg_string = + TOKEN_STRING_INITIALIZER(struct cmd_avg_pipe_result, qavg_string, + "qavg"); +cmdline_parse_token_string_t cmd_avg_pipe_port_string = + TOKEN_STRING_INITIALIZER(struct cmd_avg_pipe_result, port_string, + "port"); +cmdline_parse_token_num_t cmd_avg_pipe_port_number = + TOKEN_NUM_INITIALIZER(struct cmd_avg_pipe_result, port_number, + UINT8); +cmdline_parse_token_string_t cmd_avg_pipe_subport_string = + TOKEN_STRING_INITIALIZER(struct cmd_avg_pipe_result, subport_string, + "subport"); +cmdline_parse_token_num_t cmd_avg_pipe_subport_number = + TOKEN_NUM_INITIALIZER(struct cmd_avg_pipe_result, subport_number, + UINT32); +cmdline_parse_token_string_t cmd_avg_pipe_pipe_string = + TOKEN_STRING_INITIALIZER(struct cmd_avg_pipe_result, pipe_string, + "pipe"); +cmdline_parse_token_num_t cmd_avg_pipe_pipe_number = + TOKEN_NUM_INITIALIZER(struct cmd_avg_pipe_result, pipe_number, + UINT32); + +cmdline_parse_inst_t cmd_avg_pipe = { + .f = cmd_avg_pipe_parsed, + .data = NULL, + .help_str = "Show pipe stats.", + .tokens = { + (void *)&cmd_avg_pipe_qavg_string, + (void *)&cmd_avg_pipe_port_string, + (void *)&cmd_avg_pipe_port_number, + (void *)&cmd_avg_pipe_subport_string, + (void *)&cmd_avg_pipe_subport_number, + (void *)&cmd_avg_pipe_pipe_string, + (void *)&cmd_avg_pipe_pipe_number, + NULL, + }, +}; + +/* *** SHOW AVERAGE QUEUE SIZE (tc/subport) *** */ +struct cmd_avg_tcsubport_result { + cmdline_fixed_string_t qavg_string; + cmdline_fixed_string_t port_string; + uint8_t port_number; + cmdline_fixed_string_t subport_string; + uint32_t subport_number; + cmdline_fixed_string_t tc_string; + uint8_t tc_number; +}; + +static void cmd_avg_tcsubport_parsed(void *parsed_result, + __attribute__((unused)) struct cmdline *cl, + __attribute__((unused)) void *data) +{ + struct cmd_avg_tcsubport_result *res = parsed_result; + + if (qavg_tcsubport(res->port_number, res->subport_number, res->tc_number) < 0) + printf ("\nStats not available for these parameters. Check that both the port and subport are correct.\n\n"); +} + +cmdline_parse_token_string_t cmd_avg_tcsubport_qavg_string = + TOKEN_STRING_INITIALIZER(struct cmd_avg_tcsubport_result, qavg_string, + "qavg"); +cmdline_parse_token_string_t cmd_avg_tcsubport_port_string = + TOKEN_STRING_INITIALIZER(struct cmd_avg_tcsubport_result, port_string, + "port"); +cmdline_parse_token_num_t cmd_avg_tcsubport_port_number = + TOKEN_NUM_INITIALIZER(struct cmd_avg_tcsubport_result, port_number, + UINT8); +cmdline_parse_token_string_t cmd_avg_tcsubport_subport_string = + TOKEN_STRING_INITIALIZER(struct cmd_avg_tcsubport_result, subport_string, + "subport"); +cmdline_parse_token_num_t cmd_avg_tcsubport_subport_number = + TOKEN_NUM_INITIALIZER(struct cmd_avg_tcsubport_result, subport_number, + UINT32); +cmdline_parse_token_string_t cmd_avg_tcsubport_tc_string = + TOKEN_STRING_INITIALIZER(struct cmd_avg_tcsubport_result, tc_string, + "tc"); +cmdline_parse_token_num_t cmd_avg_tcsubport_tc_number = + TOKEN_NUM_INITIALIZER(struct cmd_avg_tcsubport_result, tc_number, + UINT8); + +cmdline_parse_inst_t cmd_avg_tcsubport = { + .f = cmd_avg_tcsubport_parsed, + .data = NULL, + .help_str = "Show pipe stats.", + .tokens = { + (void *)&cmd_avg_tcsubport_qavg_string, + (void *)&cmd_avg_tcsubport_port_string, + (void *)&cmd_avg_tcsubport_port_number, + (void *)&cmd_avg_tcsubport_subport_string, + (void *)&cmd_avg_tcsubport_subport_number, + (void *)&cmd_avg_tcsubport_tc_string, + (void *)&cmd_avg_tcsubport_tc_number, + NULL, + }, +}; + +/* *** SHOW AVERAGE QUEUE SIZE (subport) *** */ +struct cmd_avg_subport_result { + cmdline_fixed_string_t qavg_string; + cmdline_fixed_string_t port_string; + uint8_t port_number; + cmdline_fixed_string_t subport_string; + uint32_t subport_number; +}; + +static void cmd_avg_subport_parsed(void *parsed_result, + __attribute__((unused)) struct cmdline *cl, + __attribute__((unused)) void *data) +{ + struct cmd_avg_subport_result *res = parsed_result; + + if (qavg_subport(res->port_number, res->subport_number) < 0) + printf ("\nStats not available for these parameters. Check that both the port and subport are correct.\n\n"); +} + +cmdline_parse_token_string_t cmd_avg_subport_qavg_string = + TOKEN_STRING_INITIALIZER(struct cmd_avg_subport_result, qavg_string, + "qavg"); +cmdline_parse_token_string_t cmd_avg_subport_port_string = + TOKEN_STRING_INITIALIZER(struct cmd_avg_subport_result, port_string, + "port"); +cmdline_parse_token_num_t cmd_avg_subport_port_number = + TOKEN_NUM_INITIALIZER(struct cmd_avg_subport_result, port_number, + UINT8); +cmdline_parse_token_string_t cmd_avg_subport_subport_string = + TOKEN_STRING_INITIALIZER(struct cmd_avg_subport_result, subport_string, + "subport"); +cmdline_parse_token_num_t cmd_avg_subport_subport_number = + TOKEN_NUM_INITIALIZER(struct cmd_avg_subport_result, subport_number, + UINT32); + +cmdline_parse_inst_t cmd_avg_subport = { + .f = cmd_avg_subport_parsed, + .data = NULL, + .help_str = "Show pipe stats.", + .tokens = { + (void *)&cmd_avg_subport_qavg_string, + (void *)&cmd_avg_subport_port_string, + (void *)&cmd_avg_subport_port_number, + (void *)&cmd_avg_subport_subport_string, + (void *)&cmd_avg_subport_subport_number, + NULL, + }, +}; + +/* ******************************************************************************** */ + +/* list of instructions */ +cmdline_parse_ctx_t main_ctx[] = { + (cmdline_parse_inst_t *)&cmd_help, + (cmdline_parse_inst_t *)&cmd_setqavg, + (cmdline_parse_inst_t *)&cmd_appstats, + (cmdline_parse_inst_t *)&cmd_subportstats, + (cmdline_parse_inst_t *)&cmd_pipestats, + (cmdline_parse_inst_t *)&cmd_avg_q, + (cmdline_parse_inst_t *)&cmd_avg_tcpipe, + (cmdline_parse_inst_t *)&cmd_avg_pipe, + (cmdline_parse_inst_t *)&cmd_avg_tcsubport, + (cmdline_parse_inst_t *)&cmd_avg_subport, + (cmdline_parse_inst_t *)&cmd_quit, + NULL, +}; + +/* prompt function, called from main on MASTER lcore */ +void +prompt(void) +{ + struct cmdline *cl; + + cl = cmdline_stdin_new(main_ctx, "qos_sched> "); + if (cl == NULL) { + return; + } + cmdline_interact(cl); + cmdline_stdin_exit(cl); +} diff --git a/examples/qos_sched/init.c b/examples/qos_sched/init.c new file mode 100644 index 00000000..70e12bb4 --- /dev/null +++ b/examples/qos_sched/init.c @@ -0,0 +1,370 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdint.h> +#include <memory.h> + +#include <rte_log.h> +#include <rte_mbuf.h> +#include <rte_debug.h> +#include <rte_ethdev.h> +#include <rte_mempool.h> +#include <rte_sched.h> +#include <rte_cycles.h> +#include <rte_string_fns.h> +#include <rte_cfgfile.h> + +#include "main.h" +#include "cfg_file.h" + +uint32_t app_numa_mask = 0; +static uint32_t app_inited_port_mask = 0; + +int app_pipe_to_profile[MAX_SCHED_SUBPORTS][MAX_SCHED_PIPES]; + +#define MAX_NAME_LEN 32 + +struct ring_conf ring_conf = { + .rx_size = APP_RX_DESC_DEFAULT, + .ring_size = APP_RING_SIZE, + .tx_size = APP_TX_DESC_DEFAULT, +}; + +struct burst_conf burst_conf = { + .rx_burst = MAX_PKT_RX_BURST, + .ring_burst = PKT_ENQUEUE, + .qos_dequeue = PKT_DEQUEUE, + .tx_burst = MAX_PKT_TX_BURST, +}; + +struct ring_thresh rx_thresh = { + .pthresh = RX_PTHRESH, + .hthresh = RX_HTHRESH, + .wthresh = RX_WTHRESH, +}; + +struct ring_thresh tx_thresh = { + .pthresh = TX_PTHRESH, + .hthresh = TX_HTHRESH, + .wthresh = TX_WTHRESH, +}; + +uint32_t nb_pfc; +const char *cfg_profile = NULL; +int mp_size = NB_MBUF; +struct flow_conf qos_conf[MAX_DATA_STREAMS]; + +static const struct rte_eth_conf port_conf = { + .rxmode = { + .max_rx_pkt_len = ETHER_MAX_LEN, + .split_hdr_size = 0, + .header_split = 0, /**< Header Split disabled */ + .hw_ip_checksum = 0, /**< IP checksum offload disabled */ + .hw_vlan_filter = 0, /**< VLAN filtering disabled */ + .jumbo_frame = 0, /**< Jumbo Frame Support disabled */ + .hw_strip_crc = 0, /**< CRC stripped by hardware */ + }, + .txmode = { + .mq_mode = ETH_DCB_NONE, + }, +}; + +static int +app_init_port(uint8_t portid, struct rte_mempool *mp) +{ + int ret; + struct rte_eth_link link; + struct rte_eth_rxconf rx_conf; + struct rte_eth_txconf tx_conf; + + /* check if port already initialized (multistream configuration) */ + if (app_inited_port_mask & (1u << portid)) + return 0; + + rx_conf.rx_thresh.pthresh = rx_thresh.pthresh; + rx_conf.rx_thresh.hthresh = rx_thresh.hthresh; + rx_conf.rx_thresh.wthresh = rx_thresh.wthresh; + rx_conf.rx_free_thresh = 32; + rx_conf.rx_drop_en = 0; + + tx_conf.tx_thresh.pthresh = tx_thresh.pthresh; + tx_conf.tx_thresh.hthresh = tx_thresh.hthresh; + tx_conf.tx_thresh.wthresh = tx_thresh.wthresh; + tx_conf.tx_free_thresh = 0; + tx_conf.tx_rs_thresh = 0; + tx_conf.txq_flags = ETH_TXQ_FLAGS_NOMULTSEGS | ETH_TXQ_FLAGS_NOOFFLOADS; + + /* init port */ + RTE_LOG(INFO, APP, "Initializing port %"PRIu8"... ", portid); + fflush(stdout); + ret = rte_eth_dev_configure(portid, 1, 1, &port_conf); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Cannot configure device: " + "err=%d, port=%"PRIu8"\n", ret, portid); + + /* init one RX queue */ + fflush(stdout); + ret = rte_eth_rx_queue_setup(portid, 0, (uint16_t)ring_conf.rx_size, + rte_eth_dev_socket_id(portid), &rx_conf, mp); + if (ret < 0) + rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup: " + "err=%d, port=%"PRIu8"\n", ret, portid); + + /* init one TX queue */ + fflush(stdout); + ret = rte_eth_tx_queue_setup(portid, 0, + (uint16_t)ring_conf.tx_size, rte_eth_dev_socket_id(portid), &tx_conf); + if (ret < 0) + rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup: err=%d, " + "port=%"PRIu8" queue=%d\n", ret, portid, 0); + + /* Start device */ + ret = rte_eth_dev_start(portid); + if (ret < 0) + rte_exit(EXIT_FAILURE, "rte_pmd_port_start: " + "err=%d, port=%"PRIu8"\n", ret, portid); + + printf("done: "); + + /* get link status */ + rte_eth_link_get(portid, &link); + if (link.link_status) { + printf(" Link Up - speed %u Mbps - %s\n", + (uint32_t) link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? + ("full-duplex") : ("half-duplex\n")); + } else { + printf(" Link Down\n"); + } + rte_eth_promiscuous_enable(portid); + + /* mark port as initialized */ + app_inited_port_mask |= 1u << portid; + + return 0; +} + +static struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = { + { + .tb_rate = 1250000000, + .tb_size = 1000000, + + .tc_rate = {1250000000, 1250000000, 1250000000, 1250000000}, + .tc_period = 10, + }, +}; + +static struct rte_sched_pipe_params pipe_profiles[RTE_SCHED_PIPE_PROFILES_PER_PORT] = { + { /* Profile #0 */ + .tb_rate = 305175, + .tb_size = 1000000, + + .tc_rate = {305175, 305175, 305175, 305175}, + .tc_period = 40, +#ifdef RTE_SCHED_SUBPORT_TC_OV + .tc_ov_weight = 1, +#endif + + .wrr_weights = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, + }, +}; + +struct rte_sched_port_params port_params = { + .name = "port_scheduler_0", + .socket = 0, /* computed */ + .rate = 0, /* computed */ + .mtu = 6 + 6 + 4 + 4 + 2 + 1500, + .frame_overhead = RTE_SCHED_FRAME_OVERHEAD_DEFAULT, + .n_subports_per_port = 1, + .n_pipes_per_subport = 4096, + .qsize = {64, 64, 64, 64}, + .pipe_profiles = pipe_profiles, + .n_pipe_profiles = sizeof(pipe_profiles) / sizeof(struct rte_sched_pipe_params), + +#ifdef RTE_SCHED_RED + .red_params = { + /* Traffic Class 0 Colors Green / Yellow / Red */ + [0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9}, + [0][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9}, + [0][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9}, + + /* Traffic Class 1 - Colors Green / Yellow / Red */ + [1][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9}, + [1][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9}, + [1][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9}, + + /* Traffic Class 2 - Colors Green / Yellow / Red */ + [2][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9}, + [2][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9}, + [2][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9}, + + /* Traffic Class 3 - Colors Green / Yellow / Red */ + [3][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9}, + [3][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9}, + [3][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9} + } +#endif /* RTE_SCHED_RED */ +}; + +static struct rte_sched_port * +app_init_sched_port(uint32_t portid, uint32_t socketid) +{ + static char port_name[32]; /* static as referenced from global port_params*/ + struct rte_eth_link link; + struct rte_sched_port *port = NULL; + uint32_t pipe, subport; + int err; + + rte_eth_link_get((uint8_t)portid, &link); + + port_params.socket = socketid; + port_params.rate = (uint64_t) link.link_speed * 1000 * 1000 / 8; + snprintf(port_name, sizeof(port_name), "port_%d", portid); + port_params.name = port_name; + + port = rte_sched_port_config(&port_params); + if (port == NULL){ + rte_exit(EXIT_FAILURE, "Unable to config sched port\n"); + } + + for (subport = 0; subport < port_params.n_subports_per_port; subport ++) { + err = rte_sched_subport_config(port, subport, &subport_params[subport]); + if (err) { + rte_exit(EXIT_FAILURE, "Unable to config sched subport %u, err=%d\n", + subport, err); + } + + for (pipe = 0; pipe < port_params.n_pipes_per_subport; pipe ++) { + if (app_pipe_to_profile[subport][pipe] != -1) { + err = rte_sched_pipe_config(port, subport, pipe, + app_pipe_to_profile[subport][pipe]); + if (err) { + rte_exit(EXIT_FAILURE, "Unable to config sched pipe %u " + "for profile %d, err=%d\n", pipe, + app_pipe_to_profile[subport][pipe], err); + } + } + } + } + + return port; +} + +static int +app_load_cfg_profile(const char *profile) +{ + if (profile == NULL) + return 0; + struct rte_cfgfile *file = rte_cfgfile_load(profile, 0); + if (file == NULL) + rte_exit(EXIT_FAILURE, "Cannot load configuration profile %s\n", profile); + + cfg_load_port(file, &port_params); + cfg_load_subport(file, subport_params); + cfg_load_pipe(file, pipe_profiles); + + rte_cfgfile_close(file); + + return 0; +} + +int app_init(void) +{ + uint32_t i; + char ring_name[MAX_NAME_LEN]; + char pool_name[MAX_NAME_LEN]; + + if (rte_eth_dev_count() == 0) + rte_exit(EXIT_FAILURE, "No Ethernet port - bye\n"); + + /* load configuration profile */ + if (app_load_cfg_profile(cfg_profile) != 0) + rte_exit(EXIT_FAILURE, "Invalid configuration profile\n"); + + /* Initialize each active flow */ + for(i = 0; i < nb_pfc; i++) { + uint32_t socket = rte_lcore_to_socket_id(qos_conf[i].rx_core); + struct rte_ring *ring; + + snprintf(ring_name, MAX_NAME_LEN, "ring-%u-%u", i, qos_conf[i].rx_core); + ring = rte_ring_lookup(ring_name); + if (ring == NULL) + qos_conf[i].rx_ring = rte_ring_create(ring_name, ring_conf.ring_size, + socket, RING_F_SP_ENQ | RING_F_SC_DEQ); + else + qos_conf[i].rx_ring = ring; + + snprintf(ring_name, MAX_NAME_LEN, "ring-%u-%u", i, qos_conf[i].tx_core); + ring = rte_ring_lookup(ring_name); + if (ring == NULL) + qos_conf[i].tx_ring = rte_ring_create(ring_name, ring_conf.ring_size, + socket, RING_F_SP_ENQ | RING_F_SC_DEQ); + else + qos_conf[i].tx_ring = ring; + + + /* create the mbuf pools for each RX Port */ + snprintf(pool_name, MAX_NAME_LEN, "mbuf_pool%u", i); + qos_conf[i].mbuf_pool = rte_pktmbuf_pool_create(pool_name, + mp_size, burst_conf.rx_burst * 4, 0, + RTE_MBUF_DEFAULT_BUF_SIZE, + rte_eth_dev_socket_id(qos_conf[i].rx_port)); + if (qos_conf[i].mbuf_pool == NULL) + rte_exit(EXIT_FAILURE, "Cannot init mbuf pool for socket %u\n", i); + + app_init_port(qos_conf[i].rx_port, qos_conf[i].mbuf_pool); + app_init_port(qos_conf[i].tx_port, qos_conf[i].mbuf_pool); + + qos_conf[i].sched_port = app_init_sched_port(qos_conf[i].tx_port, socket); + } + + RTE_LOG(INFO, APP, "time stamp clock running at %" PRIu64 " Hz\n", + rte_get_timer_hz()); + + RTE_LOG(INFO, APP, "Ring sizes: NIC RX = %u, Mempool = %d SW queue = %u," + "NIC TX = %u\n", ring_conf.rx_size, mp_size, ring_conf.ring_size, + ring_conf.tx_size); + + RTE_LOG(INFO, APP, "Burst sizes: RX read = %hu, RX write = %hu,\n" + " Worker read/QoS enqueue = %hu,\n" + " QoS dequeue = %hu, Worker write = %hu\n", + burst_conf.rx_burst, burst_conf.ring_burst, burst_conf.ring_burst, + burst_conf.qos_dequeue, burst_conf.tx_burst); + + RTE_LOG(INFO, APP, "NIC thresholds RX (p = %hhu, h = %hhu, w = %hhu)," + "TX (p = %hhu, h = %hhu, w = %hhu)\n", + rx_thresh.pthresh, rx_thresh.hthresh, rx_thresh.wthresh, + tx_thresh.pthresh, tx_thresh.hthresh, tx_thresh.wthresh); + + return 0; +} diff --git a/examples/qos_sched/main.c b/examples/qos_sched/main.c new file mode 100644 index 00000000..e16b164d --- /dev/null +++ b/examples/qos_sched/main.c @@ -0,0 +1,254 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <unistd.h> +#include <stdint.h> + +#include <rte_log.h> +#include <rte_mbuf.h> +#include <rte_malloc.h> +#include <rte_cycles.h> +#include <rte_ethdev.h> +#include <rte_memcpy.h> +#include <rte_byteorder.h> +#include <rte_branch_prediction.h> + +#include <rte_sched.h> + +#include "main.h" + +#define APP_MODE_NONE 0 +#define APP_RX_MODE 1 +#define APP_WT_MODE 2 +#define APP_TX_MODE 4 + +uint8_t interactive = APP_INTERACTIVE_DEFAULT; +uint32_t qavg_period = APP_QAVG_PERIOD; +uint32_t qavg_ntimes = APP_QAVG_NTIMES; + +/* main processing loop */ +static int +app_main_loop(__attribute__((unused))void *dummy) +{ + uint32_t lcore_id; + uint32_t i, mode; + uint32_t rx_idx = 0; + uint32_t wt_idx = 0; + uint32_t tx_idx = 0; + struct thread_conf *rx_confs[MAX_DATA_STREAMS]; + struct thread_conf *wt_confs[MAX_DATA_STREAMS]; + struct thread_conf *tx_confs[MAX_DATA_STREAMS]; + + memset(rx_confs, 0, sizeof(rx_confs)); + memset(wt_confs, 0, sizeof(wt_confs)); + memset(tx_confs, 0, sizeof(tx_confs)); + + + mode = APP_MODE_NONE; + lcore_id = rte_lcore_id(); + + for (i = 0; i < nb_pfc; i++) { + struct flow_conf *flow = &qos_conf[i]; + + if (flow->rx_core == lcore_id) { + flow->rx_thread.rx_port = flow->rx_port; + flow->rx_thread.rx_ring = flow->rx_ring; + flow->rx_thread.rx_queue = flow->rx_queue; + + rx_confs[rx_idx++] = &flow->rx_thread; + + mode |= APP_RX_MODE; + } + if (flow->tx_core == lcore_id) { + flow->tx_thread.tx_port = flow->tx_port; + flow->tx_thread.tx_ring = flow->tx_ring; + flow->tx_thread.tx_queue = flow->tx_queue; + + tx_confs[tx_idx++] = &flow->tx_thread; + + mode |= APP_TX_MODE; + } + if (flow->wt_core == lcore_id) { + flow->wt_thread.rx_ring = flow->rx_ring; + flow->wt_thread.tx_ring = flow->tx_ring; + flow->wt_thread.tx_port = flow->tx_port; + flow->wt_thread.sched_port = flow->sched_port; + + wt_confs[wt_idx++] = &flow->wt_thread; + + mode |= APP_WT_MODE; + } + } + + if (mode == APP_MODE_NONE) { + RTE_LOG(INFO, APP, "lcore %u has nothing to do\n", lcore_id); + return -1; + } + + if (mode == (APP_RX_MODE | APP_WT_MODE)) { + RTE_LOG(INFO, APP, "lcore %u was configured for both RX and WT !!!\n", + lcore_id); + return -1; + } + + RTE_LOG(INFO, APP, "entering main loop on lcore %u\n", lcore_id); + /* initialize mbuf memory */ + if (mode == APP_RX_MODE) { + for (i = 0; i < rx_idx; i++) { + RTE_LOG(INFO, APP, "flow %u lcoreid %u " + "reading port %"PRIu8"\n", + i, lcore_id, rx_confs[i]->rx_port); + } + + app_rx_thread(rx_confs); + } + else if (mode == (APP_TX_MODE | APP_WT_MODE)) { + for (i = 0; i < wt_idx; i++) { + wt_confs[i]->m_table = rte_malloc("table_wt", sizeof(struct rte_mbuf *) + * burst_conf.tx_burst, RTE_CACHE_LINE_SIZE); + + if (wt_confs[i]->m_table == NULL) + rte_panic("flow %u unable to allocate memory buffer\n", i); + + RTE_LOG(INFO, APP, "flow %u lcoreid %u sched+write " + "port %"PRIu8"\n", + i, lcore_id, wt_confs[i]->tx_port); + } + + app_mixed_thread(wt_confs); + } + else if (mode == APP_TX_MODE) { + for (i = 0; i < tx_idx; i++) { + tx_confs[i]->m_table = rte_malloc("table_tx", sizeof(struct rte_mbuf *) + * burst_conf.tx_burst, RTE_CACHE_LINE_SIZE); + + if (tx_confs[i]->m_table == NULL) + rte_panic("flow %u unable to allocate memory buffer\n", i); + + RTE_LOG(INFO, APP, "flow %u lcoreid %u " + "writing port %"PRIu8"\n", + i, lcore_id, tx_confs[i]->tx_port); + } + + app_tx_thread(tx_confs); + } + else if (mode == APP_WT_MODE){ + for (i = 0; i < wt_idx; i++) { + RTE_LOG(INFO, APP, "flow %u lcoreid %u scheduling \n", i, lcore_id); + } + + app_worker_thread(wt_confs); + } + + return 0; +} + +void +app_stat(void) +{ + uint32_t i; + struct rte_eth_stats stats; + static struct rte_eth_stats rx_stats[MAX_DATA_STREAMS]; + static struct rte_eth_stats tx_stats[MAX_DATA_STREAMS]; + + /* print statistics */ + for(i = 0; i < nb_pfc; i++) { + struct flow_conf *flow = &qos_conf[i]; + + rte_eth_stats_get(flow->rx_port, &stats); + printf("\nRX port %"PRIu8": rx: %"PRIu64 " err: %"PRIu64 + " no_mbuf: %"PRIu64 "\n", + flow->rx_port, + stats.ipackets - rx_stats[i].ipackets, + stats.ierrors - rx_stats[i].ierrors, + stats.rx_nombuf - rx_stats[i].rx_nombuf); + memcpy(&rx_stats[i], &stats, sizeof(stats)); + + rte_eth_stats_get(flow->tx_port, &stats); + printf("TX port %"PRIu8": tx: %" PRIu64 " err: %" PRIu64 "\n", + flow->tx_port, + stats.opackets - tx_stats[i].opackets, + stats.oerrors - tx_stats[i].oerrors); + memcpy(&tx_stats[i], &stats, sizeof(stats)); + + //printf("MP = %d\n", rte_mempool_count(conf->app_pktmbuf_pool)); + +#if APP_COLLECT_STAT + printf("-------+------------+------------+\n"); + printf(" | received | dropped |\n"); + printf("-------+------------+------------+\n"); + printf(" RX | %10" PRIu64 " | %10" PRIu64 " |\n", + flow->rx_thread.stat.nb_rx, + flow->rx_thread.stat.nb_drop); + printf("QOS+TX | %10" PRIu64 " | %10" PRIu64 " | pps: %"PRIu64 " \n", + flow->wt_thread.stat.nb_rx, + flow->wt_thread.stat.nb_drop, + flow->wt_thread.stat.nb_rx - flow->wt_thread.stat.nb_drop); + printf("-------+------------+------------+\n"); + + memset(&flow->rx_thread.stat, 0, sizeof(struct thread_stat)); + memset(&flow->wt_thread.stat, 0, sizeof(struct thread_stat)); +#endif + } +} + +int +main(int argc, char **argv) +{ + int ret; + + ret = app_parse_args(argc, argv); + if (ret < 0) + return -1; + + ret = app_init(); + if (ret < 0) + return -1; + + /* launch per-lcore init on every lcore */ + rte_eal_mp_remote_launch(app_main_loop, NULL, SKIP_MASTER); + + if (interactive) { + sleep(1); + prompt(); + } + else { + /* print statistics every second */ + while(1) { + sleep(1); + app_stat(); + } + } + + return 0; +} diff --git a/examples/qos_sched/main.h b/examples/qos_sched/main.h new file mode 100644 index 00000000..82aa0fae --- /dev/null +++ b/examples/qos_sched/main.h @@ -0,0 +1,195 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _MAIN_H_ +#define _MAIN_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <rte_sched.h> + +#define RTE_LOGTYPE_APP RTE_LOGTYPE_USER1 + +/* + * Configurable number of RX/TX ring descriptors + */ +#define APP_INTERACTIVE_DEFAULT 0 + +#define APP_RX_DESC_DEFAULT 128 +#define APP_TX_DESC_DEFAULT 256 + +#define APP_RING_SIZE (8*1024) +#define NB_MBUF (2*1024*1024) + +#define MAX_PKT_RX_BURST 64 +#define PKT_ENQUEUE 64 +#define PKT_DEQUEUE 32 +#define MAX_PKT_TX_BURST 64 + +#define RX_PTHRESH 8 /**< Default values of RX prefetch threshold reg. */ +#define RX_HTHRESH 8 /**< Default values of RX host threshold reg. */ +#define RX_WTHRESH 4 /**< Default values of RX write-back threshold reg. */ + +#define TX_PTHRESH 36 /**< Default values of TX prefetch threshold reg. */ +#define TX_HTHRESH 0 /**< Default values of TX host threshold reg. */ +#define TX_WTHRESH 0 /**< Default values of TX write-back threshold reg. */ + +#define BURST_TX_DRAIN_US 100 + +#define MAX_DATA_STREAMS (RTE_MAX_LCORE/2) +#define MAX_SCHED_SUBPORTS 8 +#define MAX_SCHED_PIPES 4096 + +#ifndef APP_COLLECT_STAT +#define APP_COLLECT_STAT 1 +#endif + +#if APP_COLLECT_STAT +#define APP_STATS_ADD(stat,val) (stat) += (val) +#else +#define APP_STATS_ADD(stat,val) do {(void) (val);} while (0) +#endif + +#define APP_QAVG_NTIMES 10 +#define APP_QAVG_PERIOD 100 + +struct thread_stat +{ + uint64_t nb_rx; + uint64_t nb_drop; +}; + + +struct thread_conf +{ + uint32_t counter; + uint32_t n_mbufs; + struct rte_mbuf **m_table; + + uint8_t rx_port; + uint8_t tx_port; + uint16_t rx_queue; + uint16_t tx_queue; + struct rte_ring *rx_ring; + struct rte_ring *tx_ring; + struct rte_sched_port *sched_port; + +#if APP_COLLECT_STAT + struct thread_stat stat; +#endif +} __rte_cache_aligned; + + +struct flow_conf +{ + uint32_t rx_core; + uint32_t wt_core; + uint32_t tx_core; + uint8_t rx_port; + uint8_t tx_port; + uint16_t rx_queue; + uint16_t tx_queue; + struct rte_ring *rx_ring; + struct rte_ring *tx_ring; + struct rte_sched_port *sched_port; + struct rte_mempool *mbuf_pool; + + struct thread_conf rx_thread; + struct thread_conf wt_thread; + struct thread_conf tx_thread; +}; + + +struct ring_conf +{ + uint32_t rx_size; + uint32_t ring_size; + uint32_t tx_size; +}; + +struct burst_conf +{ + uint16_t rx_burst; + uint16_t ring_burst; + uint16_t qos_dequeue; + uint16_t tx_burst; +}; + +struct ring_thresh +{ + uint8_t pthresh; /**< Ring prefetch threshold. */ + uint8_t hthresh; /**< Ring host threshold. */ + uint8_t wthresh; /**< Ring writeback threshold. */ +}; + +extern uint8_t interactive; +extern uint32_t qavg_period; +extern uint32_t qavg_ntimes; +extern uint32_t nb_pfc; +extern const char *cfg_profile; +extern int mp_size; +extern struct flow_conf qos_conf[]; +extern int app_pipe_to_profile[MAX_SCHED_SUBPORTS][MAX_SCHED_PIPES]; + +extern struct ring_conf ring_conf; +extern struct burst_conf burst_conf; +extern struct ring_thresh rx_thresh; +extern struct ring_thresh tx_thresh; + +extern struct rte_sched_port_params port_params; + +int app_parse_args(int argc, char **argv); +int app_init(void); + +void prompt(void); +void app_rx_thread(struct thread_conf **qconf); +void app_tx_thread(struct thread_conf **qconf); +void app_worker_thread(struct thread_conf **qconf); +void app_mixed_thread(struct thread_conf **qconf); + +void app_stat(void); +int subport_stat(uint8_t port_id, uint32_t subport_id); +int pipe_stat(uint8_t port_id, uint32_t subport_id, uint32_t pipe_id); +int qavg_q(uint8_t port_id, uint32_t subport_id, uint32_t pipe_id, uint8_t tc, uint8_t q); +int qavg_tcpipe(uint8_t port_id, uint32_t subport_id, uint32_t pipe_id, uint8_t tc); +int qavg_pipe(uint8_t port_id, uint32_t subport_id, uint32_t pipe_id); +int qavg_tcsubport(uint8_t port_id, uint32_t subport_id, uint8_t tc); +int qavg_subport(uint8_t port_id, uint32_t subport_id); + +#ifdef __cplusplus +} +#endif + +#endif /* _MAIN_H_ */ diff --git a/examples/qos_sched/profile.cfg b/examples/qos_sched/profile.cfg new file mode 100644 index 00000000..f5b704cc --- /dev/null +++ b/examples/qos_sched/profile.cfg @@ -0,0 +1,104 @@ +; BSD LICENSE +; +; Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +; All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +; This file enables the following hierarchical scheduler configuration for each +; 10GbE output port: +; * Single subport (subport 0): +; - Subport rate set to 100% of port rate +; - Each of the 4 traffic classes has rate set to 100% of port rate +; * 4K pipes per subport 0 (pipes 0 .. 4095) with identical configuration: +; - Pipe rate set to 1/4K of port rate +; - Each of the 4 traffic classes has rate set to 100% of pipe rate +; - Within each traffic class, the byte-level WRR weights for the 4 queues +; are set to 1:1:1:1 +; +; For more details, please refer to chapter "Quality of Service (QoS) Framework" +; of Data Plane Development Kit (DPDK) Programmer's Guide. + +; Port configuration +[port] +frame overhead = 24 +number of subports per port = 1 +number of pipes per subport = 4096 +queue sizes = 64 64 64 64 + +; Subport configuration +[subport 0] +tb rate = 1250000000 ; Bytes per second +tb size = 1000000 ; Bytes + +tc 0 rate = 1250000000 ; Bytes per second +tc 1 rate = 1250000000 ; Bytes per second +tc 2 rate = 1250000000 ; Bytes per second +tc 3 rate = 1250000000 ; Bytes per second +tc period = 10 ; Milliseconds + +pipe 0-4095 = 0 ; These pipes are configured with pipe profile 0 + +; Pipe configuration +[pipe profile 0] +tb rate = 305175 ; Bytes per second +tb size = 1000000 ; Bytes + +tc 0 rate = 305175 ; Bytes per second +tc 1 rate = 305175 ; Bytes per second +tc 2 rate = 305175 ; Bytes per second +tc 3 rate = 305175 ; Bytes per second +tc period = 40 ; Milliseconds + +tc 3 oversubscription weight = 1 + +tc 0 wrr weights = 1 1 1 1 +tc 1 wrr weights = 1 1 1 1 +tc 2 wrr weights = 1 1 1 1 +tc 3 wrr weights = 1 1 1 1 + +; RED params per traffic class and color (Green / Yellow / Red) +[red] +tc 0 wred min = 48 40 32 +tc 0 wred max = 64 64 64 +tc 0 wred inv prob = 10 10 10 +tc 0 wred weight = 9 9 9 + +tc 1 wred min = 48 40 32 +tc 1 wred max = 64 64 64 +tc 1 wred inv prob = 10 10 10 +tc 1 wred weight = 9 9 9 + +tc 2 wred min = 48 40 32 +tc 2 wred max = 64 64 64 +tc 2 wred inv prob = 10 10 10 +tc 2 wred weight = 9 9 9 + +tc 3 wred min = 48 40 32 +tc 3 wred max = 64 64 64 +tc 3 wred inv prob = 10 10 10 +tc 3 wred weight = 9 9 9 diff --git a/examples/qos_sched/profile_ov.cfg b/examples/qos_sched/profile_ov.cfg new file mode 100644 index 00000000..33000df9 --- /dev/null +++ b/examples/qos_sched/profile_ov.cfg @@ -0,0 +1,90 @@ +; BSD LICENSE +; +; Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +; All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +; Port configuration +[port] +frame overhead = 24 +number of subports per port = 1 +number of pipes per subport = 32 +queue sizes = 64 64 64 64 + +; Subport configuration +[subport 0] +tb rate = 8400000 ; Bytes per second +tb size = 100000 ; Bytes + +tc 0 rate = 8400000 ; Bytes per second +tc 1 rate = 8400000 ; Bytes per second +tc 2 rate = 8400000 ; Bytes per second +tc 3 rate = 8400000 ; Bytes per second +tc period = 10 ; Milliseconds + +pipe 0-31 = 0 ; These pipes are configured with pipe profile 0 + +; Pipe configuration +[pipe profile 0] +tb rate = 16800000 ; Bytes per second +tb size = 1000000 ; Bytes + +tc 0 rate = 16800000 ; Bytes per second +tc 1 rate = 16800000 ; Bytes per second +tc 2 rate = 16800000 ; Bytes per second +tc 3 rate = 16800000 ; Bytes per second +tc period = 28 ; Milliseconds + +tc 3 oversubscription weight = 1 + +tc 0 wrr weights = 1 1 1 1 +tc 1 wrr weights = 1 1 1 1 +tc 2 wrr weights = 1 1 1 1 +tc 3 wrr weights = 1 1 1 1 + +; RED params per traffic class and color (Green / Yellow / Red) +[red] +tc 0 wred min = 48 40 32 +tc 0 wred max = 64 64 64 +tc 0 wred inv prob = 10 10 10 +tc 0 wred weight = 9 9 9 + +tc 1 wred min = 48 40 32 +tc 1 wred max = 64 64 64 +tc 1 wred inv prob = 10 10 10 +tc 1 wred weight = 9 9 9 + +tc 2 wred min = 48 40 32 +tc 2 wred max = 64 64 64 +tc 2 wred inv prob = 10 10 10 +tc 2 wred weight = 9 9 9 + +tc 3 wred min = 48 40 32 +tc 3 wred max = 64 64 64 +tc 3 wred inv prob = 10 10 10 +tc 3 wred weight = 9 9 9 diff --git a/examples/qos_sched/stats.c b/examples/qos_sched/stats.c new file mode 100644 index 00000000..5c894455 --- /dev/null +++ b/examples/qos_sched/stats.c @@ -0,0 +1,315 @@ +/*- + * * BSD LICENSE + * * + * * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * * All rights reserved. + * * + * * Redistribution and use in source and binary forms, with or without + * * modification, are permitted provided that the following conditions + * * are met: + * * + * * * Redistributions of source code must retain the above copyright + * * notice, this list of conditions and the following disclaimer. + * * * Redistributions in binary form must reproduce the above copyright + * * notice, this list of conditions and the following disclaimer in + * * the documentation and/or other materials provided with the + * * distribution. + * * * Neither the name of Intel Corporation nor the names of its + * * contributors may be used to endorse or promote products derived + * * from this software without specific prior written permission. + * * + * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * */ + +#include <unistd.h> +#include <string.h> + +#include "main.h" + +int +qavg_q(uint8_t port_id, uint32_t subport_id, uint32_t pipe_id, uint8_t tc, uint8_t q) +{ + struct rte_sched_queue_stats stats; + struct rte_sched_port *port; + uint16_t qlen; + uint32_t queue_id, count, i; + uint32_t average; + + for (i = 0; i < nb_pfc; i++) { + if (qos_conf[i].tx_port == port_id) + break; + } + if (i == nb_pfc || subport_id >= port_params.n_subports_per_port || pipe_id >= port_params.n_pipes_per_subport + || tc >= RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE || q >= RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS) + return -1; + + port = qos_conf[i].sched_port; + + queue_id = RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS * (subport_id * port_params.n_pipes_per_subport + pipe_id); + queue_id = queue_id + (tc * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + q); + + average = 0; + + for (count = 0; count < qavg_ntimes; count++) { + rte_sched_queue_read_stats(port, queue_id, &stats, &qlen); + average += qlen; + usleep(qavg_period); + } + + average /= qavg_ntimes; + + printf("\nAverage queue size: %" PRIu32 " bytes.\n\n", average); + + return 0; +} + +int +qavg_tcpipe(uint8_t port_id, uint32_t subport_id, uint32_t pipe_id, uint8_t tc) +{ + struct rte_sched_queue_stats stats; + struct rte_sched_port *port; + uint16_t qlen; + uint32_t queue_id, count, i; + uint32_t average, part_average; + + for (i = 0; i < nb_pfc; i++) { + if (qos_conf[i].tx_port == port_id) + break; + } + if (i == nb_pfc || subport_id >= port_params.n_subports_per_port || pipe_id >= port_params.n_pipes_per_subport + || tc >= RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE) + return -1; + + port = qos_conf[i].sched_port; + + queue_id = RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS * (subport_id * port_params.n_pipes_per_subport + pipe_id); + + average = 0; + + for (count = 0; count < qavg_ntimes; count++) { + part_average = 0; + for (i = 0; i < RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; i++) { + rte_sched_queue_read_stats(port, queue_id + (tc * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + i), &stats, &qlen); + part_average += qlen; + } + average += part_average / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; + usleep(qavg_period); + } + + average /= qavg_ntimes; + + printf("\nAverage queue size: %" PRIu32 " bytes.\n\n", average); + + return 0; +} + +int +qavg_pipe(uint8_t port_id, uint32_t subport_id, uint32_t pipe_id) +{ + struct rte_sched_queue_stats stats; + struct rte_sched_port *port; + uint16_t qlen; + uint32_t queue_id, count, i; + uint32_t average, part_average; + + for (i = 0; i < nb_pfc; i++) { + if (qos_conf[i].tx_port == port_id) + break; + } + if (i == nb_pfc || subport_id >= port_params.n_subports_per_port || pipe_id >= port_params.n_pipes_per_subport) + return -1; + + port = qos_conf[i].sched_port; + + queue_id = RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS * (subport_id * port_params.n_pipes_per_subport + pipe_id); + + average = 0; + + for (count = 0; count < qavg_ntimes; count++) { + part_average = 0; + for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; i++) { + rte_sched_queue_read_stats(port, queue_id + i, &stats, &qlen); + part_average += qlen; + } + average += part_average / (RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS); + usleep(qavg_period); + } + + average /= qavg_ntimes; + + printf("\nAverage queue size: %" PRIu32 " bytes.\n\n", average); + + return 0; +} + +int +qavg_tcsubport(uint8_t port_id, uint32_t subport_id, uint8_t tc) +{ + struct rte_sched_queue_stats stats; + struct rte_sched_port *port; + uint16_t qlen; + uint32_t queue_id, count, i, j; + uint32_t average, part_average; + + for (i = 0; i < nb_pfc; i++) { + if (qos_conf[i].tx_port == port_id) + break; + } + if (i == nb_pfc || subport_id >= port_params.n_subports_per_port || tc >= RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE) + return -1; + + port = qos_conf[i].sched_port; + + average = 0; + + for (count = 0; count < qavg_ntimes; count++) { + part_average = 0; + for (i = 0; i < port_params.n_pipes_per_subport; i++) { + queue_id = RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS * (subport_id * port_params.n_pipes_per_subport + i); + + for (j = 0; j < RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; j++) { + rte_sched_queue_read_stats(port, queue_id + (tc * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + j), &stats, &qlen); + part_average += qlen; + } + } + + average += part_average / (port_params.n_pipes_per_subport * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS); + usleep(qavg_period); + } + + average /= qavg_ntimes; + + printf("\nAverage queue size: %" PRIu32 " bytes.\n\n", average); + + return 0; +} + +int +qavg_subport(uint8_t port_id, uint32_t subport_id) +{ + struct rte_sched_queue_stats stats; + struct rte_sched_port *port; + uint16_t qlen; + uint32_t queue_id, count, i, j; + uint32_t average, part_average; + + for (i = 0; i < nb_pfc; i++) { + if (qos_conf[i].tx_port == port_id) + break; + } + if (i == nb_pfc || subport_id >= port_params.n_subports_per_port) + return -1; + + port = qos_conf[i].sched_port; + + average = 0; + + for (count = 0; count < qavg_ntimes; count++) { + part_average = 0; + for (i = 0; i < port_params.n_pipes_per_subport; i++) { + queue_id = RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS * (subport_id * port_params.n_pipes_per_subport + i); + + for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; j++) { + rte_sched_queue_read_stats(port, queue_id + j, &stats, &qlen); + part_average += qlen; + } + } + + average += part_average / (port_params.n_pipes_per_subport * RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS); + usleep(qavg_period); + } + + average /= qavg_ntimes; + + printf("\nAverage queue size: %" PRIu32 " bytes.\n\n", average); + + return 0; +} + +int +subport_stat(uint8_t port_id, uint32_t subport_id) +{ + struct rte_sched_subport_stats stats; + struct rte_sched_port *port; + uint32_t tc_ov[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; + uint8_t i; + + for (i = 0; i < nb_pfc; i++) { + if (qos_conf[i].tx_port == port_id) + break; + } + if (i == nb_pfc || subport_id >= port_params.n_subports_per_port) + return -1; + + port = qos_conf[i].sched_port; + memset (tc_ov, 0, sizeof(tc_ov)); + + rte_sched_subport_read_stats(port, subport_id, &stats, tc_ov); + + printf("\n"); + printf("+----+-------------+-------------+-------------+-------------+-------------+\n"); + printf("| TC | Pkts OK |Pkts Dropped | Bytes OK |Bytes Dropped| OV Status |\n"); + printf("+----+-------------+-------------+-------------+-------------+-------------+\n"); + + for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) { + printf("| %d | %11" PRIu32 " | %11" PRIu32 " | %11" PRIu32 " | %11" PRIu32 " | %11" PRIu32 " |\n", i, + stats.n_pkts_tc[i], stats.n_pkts_tc_dropped[i], + stats.n_bytes_tc[i], stats.n_bytes_tc_dropped[i], tc_ov[i]); + printf("+----+-------------+-------------+-------------+-------------+-------------+\n"); + } + printf("\n"); + + return 0; +} + +int +pipe_stat(uint8_t port_id, uint32_t subport_id, uint32_t pipe_id) +{ + struct rte_sched_queue_stats stats; + struct rte_sched_port *port; + uint16_t qlen; + uint8_t i, j; + uint32_t queue_id; + + for (i = 0; i < nb_pfc; i++) { + if (qos_conf[i].tx_port == port_id) + break; + } + if (i == nb_pfc || subport_id >= port_params.n_subports_per_port || pipe_id >= port_params.n_pipes_per_subport) + return -1; + + port = qos_conf[i].sched_port; + + queue_id = RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS * (subport_id * port_params.n_pipes_per_subport + pipe_id); + + printf("\n"); + printf("+----+-------+-------------+-------------+-------------+-------------+-------------+\n"); + printf("| TC | Queue | Pkts OK |Pkts Dropped | Bytes OK |Bytes Dropped| Length |\n"); + printf("+----+-------+-------------+-------------+-------------+-------------+-------------+\n"); + + for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) { + for (j = 0; j < RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; j++) { + + rte_sched_queue_read_stats(port, queue_id + (i * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + j), &stats, &qlen); + + printf("| %d | %d | %11" PRIu32 " | %11" PRIu32 " | %11" PRIu32 " | %11" PRIu32 " | %11i |\n", i, j, + stats.n_pkts, stats.n_pkts_dropped, stats.n_bytes, stats.n_bytes_dropped, qlen); + printf("+----+-------+-------------+-------------+-------------+-------------+-------------+\n"); + } + if (i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE - 1) + printf("+----+-------+-------------+-------------+-------------+-------------+-------------+\n"); + } + printf("\n"); + + return 0; +} diff --git a/examples/quota_watermark/Makefile b/examples/quota_watermark/Makefile new file mode 100644 index 00000000..17fe473b --- /dev/null +++ b/examples/quota_watermark/Makefile @@ -0,0 +1,44 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overriden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +DIRS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += qw +DIRS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += qwctl + +include $(RTE_SDK)/mk/rte.extsubdir.mk diff --git a/examples/quota_watermark/include/conf.h b/examples/quota_watermark/include/conf.h new file mode 100644 index 00000000..bdc3bbed --- /dev/null +++ b/examples/quota_watermark/include/conf.h @@ -0,0 +1,48 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _CONF_H_ +#define _CONF_H_ + +#define RING_SIZE 1024 +#define MAX_PKT_QUOTA 64 + +#define RX_DESC_PER_QUEUE 128 +#define TX_DESC_PER_QUEUE 512 + +#define MBUF_DATA_SIZE RTE_MBUF_DEFAULT_BUF_SIZE +#define MBUF_PER_POOL 8192 + +#define QUOTA_WATERMARK_MEMZONE_NAME "qw_global_vars" + +#endif /* _CONF_H_ */ diff --git a/examples/quota_watermark/qw/Makefile b/examples/quota_watermark/qw/Makefile new file mode 100644 index 00000000..fac9328d --- /dev/null +++ b/examples/quota_watermark/qw/Makefile @@ -0,0 +1,50 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overriden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +# binary name +APP = qw + +# all source are stored in SRCS-y +SRCS-y := args.c init.c main.c + +CFLAGS += -O3 -DQW_SOFTWARE_FC +CFLAGS += $(WERROR_FLAGS) + +include $(RTE_SDK)/mk/rte.extapp.mk diff --git a/examples/quota_watermark/qw/args.c b/examples/quota_watermark/qw/args.c new file mode 100644 index 00000000..408b54d1 --- /dev/null +++ b/examples/quota_watermark/qw/args.c @@ -0,0 +1,104 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include <rte_common.h> +#include <rte_lcore.h> + +#include "args.h" + + +unsigned int portmask = 0; + + +static void +usage(const char *prgname) +{ + fprintf(stderr, "Usage: %s [EAL args] -- -p <portmask>\n" + "-p PORTMASK: hexadecimal bitmask of NIC ports to configure\n", + prgname); +} + +static unsigned long +parse_portmask(const char *portmask_str) +{ + return strtoul(portmask_str, NULL, 16); +} + +static void +check_core_count(void) +{ + if (rte_lcore_count() < 3) + rte_exit(EXIT_FAILURE, "At least 3 cores need to be passed in the coremask\n"); +} + +static void +check_portmask_value(unsigned int portmask) +{ + unsigned int port_nb = 0; + + port_nb = __builtin_popcount(portmask); + + if (port_nb == 0) + rte_exit(EXIT_FAILURE, "At least 2 ports need to be passed in the portmask\n"); + + if (port_nb % 2 != 0) + rte_exit(EXIT_FAILURE, "An even number of ports is required in the portmask\n"); +} + +int +parse_qw_args(int argc, char **argv) +{ + int opt; + + while ((opt = getopt(argc, argv, "h:p:")) != -1) { + switch (opt) { + case 'h': + usage(argv[0]); + break; + case 'p': + portmask = parse_portmask(optarg); + break; + default: + usage(argv[0]); + } + } + + check_core_count(); + check_portmask_value(portmask); + + return 0; +} diff --git a/examples/quota_watermark/qw/args.h b/examples/quota_watermark/qw/args.h new file mode 100644 index 00000000..d6d4fb62 --- /dev/null +++ b/examples/quota_watermark/qw/args.h @@ -0,0 +1,41 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _ARGS_H_ +#define _ARGS_H_ + +extern unsigned int portmask; + +int parse_qw_args(int argc, char **argv); + +#endif /* _ARGS_H_ */ diff --git a/examples/quota_watermark/qw/init.c b/examples/quota_watermark/qw/init.c new file mode 100644 index 00000000..afc13665 --- /dev/null +++ b/examples/quota_watermark/qw/init.c @@ -0,0 +1,174 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <fcntl.h> +#include <unistd.h> +#include <sys/mman.h> + +#include <rte_eal.h> + +#include <rte_common.h> +#include <rte_errno.h> +#include <rte_ethdev.h> +#include <rte_memzone.h> +#include <rte_ring.h> +#include <rte_string_fns.h> + +#include "args.h" +#include "init.h" +#include "main.h" +#include "../include/conf.h" + + +static const struct rte_eth_conf port_conf = { + .rxmode = { + .split_hdr_size = 0, + .header_split = 0, /**< Header Split disabled */ + .hw_ip_checksum = 0, /**< IP checksum offload disabled */ + .hw_vlan_filter = 0, /**< VLAN filtering disabled */ + .jumbo_frame = 0, /**< Jumbo Frame Support disabled */ + .hw_strip_crc = 0, /**< CRC stripped by hardware */ + }, + .txmode = { + .mq_mode = ETH_DCB_NONE, + }, +}; + +static struct rte_eth_fc_conf fc_conf = { + .mode = RTE_FC_TX_PAUSE, + .high_water = 80 * 510 / 100, + .low_water = 60 * 510 / 100, + .pause_time = 1337, + .send_xon = 0, +}; + + +void configure_eth_port(uint8_t port_id) +{ + int ret; + + rte_eth_dev_stop(port_id); + + ret = rte_eth_dev_configure(port_id, 1, 1, &port_conf); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Cannot configure port %u (error %d)\n", + (unsigned) port_id, ret); + + /* Initialize the port's RX queue */ + ret = rte_eth_rx_queue_setup(port_id, 0, RX_DESC_PER_QUEUE, + rte_eth_dev_socket_id(port_id), + NULL, + mbuf_pool); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Failed to setup RX queue on " + "port %u (error %d)\n", (unsigned) port_id, ret); + + /* Initialize the port's TX queue */ + ret = rte_eth_tx_queue_setup(port_id, 0, TX_DESC_PER_QUEUE, + rte_eth_dev_socket_id(port_id), + NULL); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Failed to setup TX queue on " + "port %u (error %d)\n", (unsigned) port_id, ret); + + /* Initialize the port's flow control */ + ret = rte_eth_dev_flow_ctrl_set(port_id, &fc_conf); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Failed to setup hardware flow control on " + "port %u (error %d)\n", (unsigned) port_id, ret); + + /* Start the port */ + ret = rte_eth_dev_start(port_id); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Failed to start port %u (error %d)\n", + (unsigned) port_id, ret); + + /* Put it in promiscuous mode */ + rte_eth_promiscuous_enable(port_id); +} + +void +init_dpdk(void) +{ + if (rte_eth_dev_count() < 2) + rte_exit(EXIT_FAILURE, "Not enough ethernet port available\n"); +} + +void init_ring(int lcore_id, uint8_t port_id) +{ + struct rte_ring *ring; + char ring_name[RTE_RING_NAMESIZE]; + + snprintf(ring_name, RTE_RING_NAMESIZE, + "core%d_port%d", lcore_id, port_id); + ring = rte_ring_create(ring_name, RING_SIZE, rte_socket_id(), + RING_F_SP_ENQ | RING_F_SC_DEQ); + + if (ring == NULL) + rte_exit(EXIT_FAILURE, "%s\n", rte_strerror(rte_errno)); + + rte_ring_set_water_mark(ring, 80 * RING_SIZE / 100); + + rings[lcore_id][port_id] = ring; +} + +void +pair_ports(void) +{ + uint8_t i, j; + + /* Pair ports with their "closest neighbour" in the portmask */ + for (i = 0; i < RTE_MAX_ETHPORTS; i++) + if (is_bit_set(i, portmask)) + for (j = (uint8_t) (i + 1); j < RTE_MAX_ETHPORTS; j++) + if (is_bit_set(j, portmask)) { + port_pairs[i] = j; + port_pairs[j] = i; + i = j; + break; + } +} + +void +setup_shared_variables(void) +{ + const struct rte_memzone *qw_memzone; + + qw_memzone = rte_memzone_reserve(QUOTA_WATERMARK_MEMZONE_NAME, 2 * sizeof(int), + rte_socket_id(), RTE_MEMZONE_2MB); + if (qw_memzone == NULL) + rte_exit(EXIT_FAILURE, "%s\n", rte_strerror(rte_errno)); + + quota = qw_memzone->addr; + low_watermark = (unsigned int *) qw_memzone->addr + sizeof(int); +} diff --git a/examples/quota_watermark/qw/init.h b/examples/quota_watermark/qw/init.h new file mode 100644 index 00000000..6d0af3ab --- /dev/null +++ b/examples/quota_watermark/qw/init.h @@ -0,0 +1,43 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _INIT_H_ +#define _INIT_H_ + +void configure_eth_port(uint8_t port_id); +void init_dpdk(void); +void init_ring(int lcore_id, uint8_t port_id); +void pair_ports(void); +void setup_shared_variables(void); + +#endif /* _INIT_H_ */ diff --git a/examples/quota_watermark/qw/main.c b/examples/quota_watermark/qw/main.c new file mode 100644 index 00000000..8ed02148 --- /dev/null +++ b/examples/quota_watermark/qw/main.c @@ -0,0 +1,372 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <rte_eal.h> + +#include <rte_common.h> +#include <rte_debug.h> +#include <rte_errno.h> +#include <rte_ethdev.h> +#include <rte_launch.h> +#include <rte_lcore.h> +#include <rte_log.h> +#include <rte_mbuf.h> +#include <rte_ring.h> + +#include <rte_byteorder.h> + +#include "args.h" +#include "main.h" +#include "init.h" +#include "../include/conf.h" + + +#ifdef QW_SOFTWARE_FC +#define SEND_PAUSE_FRAME(port_id, duration) send_pause_frame(port_id, duration) +#else +#define SEND_PAUSE_FRAME(port_id, duration) do { } while(0) +#endif + +#define ETHER_TYPE_FLOW_CONTROL 0x8808 + +struct ether_fc_frame { + uint16_t opcode; + uint16_t param; +} __attribute__((__packed__)); + + +int *quota; +unsigned int *low_watermark; + +uint8_t port_pairs[RTE_MAX_ETHPORTS]; + +struct rte_ring *rings[RTE_MAX_LCORE][RTE_MAX_ETHPORTS]; +struct rte_mempool *mbuf_pool; + + +static void send_pause_frame(uint8_t port_id, uint16_t duration) +{ + struct rte_mbuf *mbuf; + struct ether_fc_frame *pause_frame; + struct ether_hdr *hdr; + struct ether_addr mac_addr; + + RTE_LOG(DEBUG, USER1, "Sending PAUSE frame (duration=%d) on port %d\n", + duration, port_id); + + /* Get a mbuf from the pool */ + mbuf = rte_pktmbuf_alloc(mbuf_pool); + if (unlikely(mbuf == NULL)) + return; + + /* Prepare a PAUSE frame */ + hdr = rte_pktmbuf_mtod(mbuf, struct ether_hdr *); + pause_frame = (struct ether_fc_frame *) &hdr[1]; + + rte_eth_macaddr_get(port_id, &mac_addr); + ether_addr_copy(&mac_addr, &hdr->s_addr); + + void *tmp = &hdr->d_addr.addr_bytes[0]; + *((uint64_t *)tmp) = 0x010000C28001ULL; + + hdr->ether_type = rte_cpu_to_be_16(ETHER_TYPE_FLOW_CONTROL); + + pause_frame->opcode = rte_cpu_to_be_16(0x0001); + pause_frame->param = rte_cpu_to_be_16(duration); + + mbuf->pkt_len = 60; + mbuf->data_len = 60; + + rte_eth_tx_burst(port_id, 0, &mbuf, 1); +} + +/** + * Get the previous enabled lcore ID + * + * @param lcore_id + * The current lcore ID. + * @return + * The previous enabled lcore_id or -1 if not found. + */ +static unsigned int +get_previous_lcore_id(unsigned int lcore_id) +{ + int i; + + for (i = lcore_id - 1; i >= 0; i--) + if (rte_lcore_is_enabled(i)) + return i; + + return -1; +} + +/** + * Get the last enabled lcore ID + * + * @return + * The last enabled lcore_id. + */ +static unsigned int +get_last_lcore_id(void) +{ + int i; + + for (i = RTE_MAX_LCORE; i >= 0; i--) + if (rte_lcore_is_enabled(i)) + return i; + + return 0; +} + +static void +receive_stage(__attribute__((unused)) void *args) +{ + int i, ret; + + uint8_t port_id; + uint16_t nb_rx_pkts; + + unsigned int lcore_id; + + struct rte_mbuf *pkts[MAX_PKT_QUOTA]; + struct rte_ring *ring; + enum ring_state ring_state[RTE_MAX_ETHPORTS] = { RING_READY }; + + lcore_id = rte_lcore_id(); + + RTE_LOG(INFO, USER1, + "%s() started on core %u\n", __func__, lcore_id); + + while (1) { + + /* Process each port round robin style */ + for (port_id = 0; port_id < RTE_MAX_ETHPORTS; port_id++) { + + if (!is_bit_set(port_id, portmask)) + continue; + + ring = rings[lcore_id][port_id]; + + if (ring_state[port_id] != RING_READY) { + if (rte_ring_count(ring) > *low_watermark) + continue; + else + ring_state[port_id] = RING_READY; + } + + /* Enqueue received packets on the RX ring */ + nb_rx_pkts = rte_eth_rx_burst(port_id, 0, pkts, (uint16_t) *quota); + ret = rte_ring_enqueue_bulk(ring, (void *) pkts, nb_rx_pkts); + if (ret == -EDQUOT) { + ring_state[port_id] = RING_OVERLOADED; + send_pause_frame(port_id, 1337); + } + + else if (ret == -ENOBUFS) { + + /* Return mbufs to the pool, effectively dropping packets */ + for (i = 0; i < nb_rx_pkts; i++) + rte_pktmbuf_free(pkts[i]); + } + } + } +} + +static void +pipeline_stage(__attribute__((unused)) void *args) +{ + int i, ret; + int nb_dq_pkts; + + uint8_t port_id; + + unsigned int lcore_id, previous_lcore_id; + + void *pkts[MAX_PKT_QUOTA]; + struct rte_ring *rx, *tx; + enum ring_state ring_state[RTE_MAX_ETHPORTS] = { RING_READY }; + + lcore_id = rte_lcore_id(); + previous_lcore_id = get_previous_lcore_id(lcore_id); + + RTE_LOG(INFO, USER1, + "%s() started on core %u - processing packets from core %u\n", + __func__, lcore_id, previous_lcore_id); + + while (1) { + + for (port_id = 0; port_id < RTE_MAX_ETHPORTS; port_id++) { + + if (!is_bit_set(port_id, portmask)) + continue; + + tx = rings[lcore_id][port_id]; + rx = rings[previous_lcore_id][port_id]; + + if (ring_state[port_id] != RING_READY) { + if (rte_ring_count(tx) > *low_watermark) + continue; + else + ring_state[port_id] = RING_READY; + } + + /* Dequeue up to quota mbuf from rx */ + nb_dq_pkts = rte_ring_dequeue_burst(rx, pkts, *quota); + if (unlikely(nb_dq_pkts < 0)) + continue; + + /* Enqueue them on tx */ + ret = rte_ring_enqueue_bulk(tx, pkts, nb_dq_pkts); + if (ret == -EDQUOT) + ring_state[port_id] = RING_OVERLOADED; + + else if (ret == -ENOBUFS) { + + /* Return mbufs to the pool, effectively dropping packets */ + for (i = 0; i < nb_dq_pkts; i++) + rte_pktmbuf_free(pkts[i]); + } + } + } +} + +static void +send_stage(__attribute__((unused)) void *args) +{ + uint16_t nb_dq_pkts; + + uint8_t port_id; + uint8_t dest_port_id; + + unsigned int lcore_id, previous_lcore_id; + + struct rte_ring *tx; + struct rte_mbuf *tx_pkts[MAX_PKT_QUOTA]; + + lcore_id = rte_lcore_id(); + previous_lcore_id = get_previous_lcore_id(lcore_id); + + RTE_LOG(INFO, USER1, + "%s() started on core %u - processing packets from core %u\n", + __func__, lcore_id, previous_lcore_id); + + while (1) { + + /* Process each ring round robin style */ + for (port_id = 0; port_id < RTE_MAX_ETHPORTS; port_id++) { + + if (!is_bit_set(port_id, portmask)) + continue; + + dest_port_id = port_pairs[port_id]; + tx = rings[previous_lcore_id][port_id]; + + if (rte_ring_empty(tx)) + continue; + + /* Dequeue packets from tx and send them */ + nb_dq_pkts = (uint16_t) rte_ring_dequeue_burst(tx, (void *) tx_pkts, *quota); + rte_eth_tx_burst(dest_port_id, 0, tx_pkts, nb_dq_pkts); + + /* TODO: Check if nb_dq_pkts == nb_tx_pkts? */ + } + } +} + +int +main(int argc, char **argv) +{ + int ret; + unsigned int lcore_id, master_lcore_id, last_lcore_id; + + uint8_t port_id; + + rte_set_log_level(RTE_LOG_INFO); + + ret = rte_eal_init(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Cannot initialize EAL\n"); + + argc -= ret; + argv += ret; + + init_dpdk(); + setup_shared_variables(); + + *quota = 32; + *low_watermark = 60 * RING_SIZE / 100; + + last_lcore_id = get_last_lcore_id(); + master_lcore_id = rte_get_master_lcore(); + + /* Parse the application's arguments */ + ret = parse_qw_args(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Invalid quota/watermark argument(s)\n"); + + /* Create a pool of mbuf to store packets */ + mbuf_pool = rte_pktmbuf_pool_create("mbuf_pool", MBUF_PER_POOL, 32, 0, + MBUF_DATA_SIZE, rte_socket_id()); + if (mbuf_pool == NULL) + rte_panic("%s\n", rte_strerror(rte_errno)); + + for (port_id = 0; port_id < RTE_MAX_ETHPORTS; port_id++) + if (is_bit_set(port_id, portmask)) { + configure_eth_port(port_id); + init_ring(master_lcore_id, port_id); + } + + pair_ports(); + + /* Start pipeline_connect() on all the available slave lcore but the last */ + for (lcore_id = 0 ; lcore_id < last_lcore_id; lcore_id++) { + if (rte_lcore_is_enabled(lcore_id) && lcore_id != master_lcore_id) { + + for (port_id = 0; port_id < RTE_MAX_ETHPORTS; port_id++) + if (is_bit_set(port_id, portmask)) + init_ring(lcore_id, port_id); + + /* typecast is a workaround for GCC 4.3 bug */ + rte_eal_remote_launch((int (*)(void *))pipeline_stage, NULL, lcore_id); + } + } + + /* Start send_stage() on the last slave core */ + /* typecast is a workaround for GCC 4.3 bug */ + rte_eal_remote_launch((int (*)(void *))send_stage, NULL, last_lcore_id); + + /* Start receive_stage() on the master core */ + receive_stage(NULL); + + return 0; +} diff --git a/examples/quota_watermark/qw/main.h b/examples/quota_watermark/qw/main.h new file mode 100644 index 00000000..6b364898 --- /dev/null +++ b/examples/quota_watermark/qw/main.h @@ -0,0 +1,59 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _MAIN_H_ +#define _MAIN_H_ + +#include "../include/conf.h" + +enum ring_state { + RING_READY, + RING_OVERLOADED, +}; + +extern int *quota; +extern unsigned int *low_watermark; + +extern uint8_t port_pairs[RTE_MAX_ETHPORTS]; + +extern struct rte_ring *rings[RTE_MAX_LCORE][RTE_MAX_ETHPORTS]; +extern struct rte_mempool *mbuf_pool; + + +static inline int +is_bit_set(int i, unsigned int mask) +{ + return (1 << i) & mask; +} + +#endif /* _MAIN_H_ */ diff --git a/examples/quota_watermark/qwctl/Makefile b/examples/quota_watermark/qwctl/Makefile new file mode 100644 index 00000000..1ca2f1e9 --- /dev/null +++ b/examples/quota_watermark/qwctl/Makefile @@ -0,0 +1,50 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overriden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +# binary name +APP = qwctl + +# all source are stored in SRCS-y +SRCS-y := commands.c qwctl.c + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) + +include $(RTE_SDK)/mk/rte.extapp.mk diff --git a/examples/quota_watermark/qwctl/commands.c b/examples/quota_watermark/qwctl/commands.c new file mode 100644 index 00000000..5348dd3d --- /dev/null +++ b/examples/quota_watermark/qwctl/commands.c @@ -0,0 +1,217 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdio.h> +#include <stdint.h> +#include <string.h> +#include <termios.h> + +#include <cmdline_rdline.h> +#include <cmdline_parse.h> +#include <cmdline_parse_num.h> +#include <cmdline_parse_string.h> +#include <cmdline.h> + +#include <rte_ring.h> + +#include "qwctl.h" +#include "../include/conf.h" + + +/** + * help command + */ + +struct cmd_help_tokens { + cmdline_fixed_string_t verb; +}; + +cmdline_parse_token_string_t cmd_help_verb = + TOKEN_STRING_INITIALIZER(struct cmd_help_tokens, verb, "help"); + +static void +cmd_help_handler(__attribute__((unused)) void *parsed_result, + struct cmdline *cl, + __attribute__((unused)) void *data) +{ + cmdline_printf(cl, "Available commands:\n" + "- help\n" + "- set [ring_name|variable] <value>\n" + "- show [ring_name|variable]\n" + "\n" + "Available variables:\n" + "- low_watermark\n" + "- quota\n" + "- ring names follow the core%%u_port%%u format\n"); +} + +cmdline_parse_inst_t cmd_help = { + .f = cmd_help_handler, + .data = NULL, + .help_str = "show help", + .tokens = { + (void *) &cmd_help_verb, + NULL, + }, +}; + + +/** + * set command + */ + +struct cmd_set_tokens { + cmdline_fixed_string_t verb; + cmdline_fixed_string_t variable; + uint32_t value; +}; + +cmdline_parse_token_string_t cmd_set_verb = + TOKEN_STRING_INITIALIZER(struct cmd_set_tokens, verb, "set"); + +cmdline_parse_token_string_t cmd_set_variable = + TOKEN_STRING_INITIALIZER(struct cmd_set_tokens, variable, NULL); + +cmdline_parse_token_num_t cmd_set_value = + TOKEN_NUM_INITIALIZER(struct cmd_set_tokens, value, UINT32); + +static void +cmd_set_handler(__attribute__((unused)) void *parsed_result, + struct cmdline *cl, + __attribute__((unused)) void *data) +{ + struct cmd_set_tokens *tokens = parsed_result; + struct rte_ring *ring; + + if (!strcmp(tokens->variable, "quota")) { + + if (tokens->value > 0 && tokens->value <= MAX_PKT_QUOTA) + *quota = tokens->value; + else + cmdline_printf(cl, "quota must be between 1 and %u\n", MAX_PKT_QUOTA); + } + + else if (!strcmp(tokens->variable, "low_watermark")) { + + if (tokens->value <= 100) + *low_watermark = tokens->value * RING_SIZE / 100; + else + cmdline_printf(cl, "low_watermark must be between 0%% and 100%%\n"); + } + + else { + + ring = rte_ring_lookup(tokens->variable); + if (ring == NULL) + cmdline_printf(cl, "Cannot find ring \"%s\"\n", tokens->variable); + else + if (tokens->value >= *low_watermark * 100 / RING_SIZE + && tokens->value <= 100) + rte_ring_set_water_mark(ring, tokens->value * RING_SIZE / 100); + else + cmdline_printf(cl, "ring high watermark must be between %u%% " + "and 100%%\n", *low_watermark * 100 / RING_SIZE); + } +} + +cmdline_parse_inst_t cmd_set = { + .f = cmd_set_handler, + .data = NULL, + .help_str = "Set a variable value", + .tokens = { + (void *) &cmd_set_verb, + (void *) &cmd_set_variable, + (void *) &cmd_set_value, + NULL, + }, +}; + + +/** + * show command + */ + +struct cmd_show_tokens { + cmdline_fixed_string_t verb; + cmdline_fixed_string_t variable; +}; + +cmdline_parse_token_string_t cmd_show_verb = + TOKEN_STRING_INITIALIZER(struct cmd_show_tokens, verb, "show"); + +cmdline_parse_token_string_t cmd_show_variable = + TOKEN_STRING_INITIALIZER(struct cmd_show_tokens, variable, NULL); + + +static void +cmd_show_handler(__attribute__((unused)) void *parsed_result, + struct cmdline *cl, + __attribute__((unused)) void *data) +{ + struct cmd_show_tokens *tokens = parsed_result; + struct rte_ring *ring; + + if (!strcmp(tokens->variable, "quota")) + cmdline_printf(cl, "Global quota: %d\n", *quota); + + else if (!strcmp(tokens->variable, "low_watermark")) + cmdline_printf(cl, "Global low_watermark: %u\n", *low_watermark); + + else { + + ring = rte_ring_lookup(tokens->variable); + if (ring == NULL) + cmdline_printf(cl, "Cannot find ring \"%s\"\n", tokens->variable); + else + rte_ring_dump(stdout, ring); + } +} + +cmdline_parse_inst_t cmd_show = { + .f = cmd_show_handler, + .data = NULL, + .help_str = "Show a variable value", + .tokens = { + (void *) &cmd_show_verb, + (void *) &cmd_show_variable, + NULL, + }, +}; + + +cmdline_parse_ctx_t qwctl_ctx[] = { + (cmdline_parse_inst_t *)&cmd_help, + (cmdline_parse_inst_t *)&cmd_set, + (cmdline_parse_inst_t *)&cmd_show, + NULL, +}; diff --git a/examples/quota_watermark/qwctl/commands.h b/examples/quota_watermark/qwctl/commands.h new file mode 100644 index 00000000..c010941d --- /dev/null +++ b/examples/quota_watermark/qwctl/commands.h @@ -0,0 +1,41 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _COMMANDS_H_ +#define _COMMANDS_H_ + +#include <cmdline_parse.h> + +extern cmdline_parse_ctx_t qwctl_ctx[]; + +#endif /* _COMMANDS_H_ */ diff --git a/examples/quota_watermark/qwctl/qwctl.c b/examples/quota_watermark/qwctl/qwctl.c new file mode 100644 index 00000000..eb2f618a --- /dev/null +++ b/examples/quota_watermark/qwctl/qwctl.c @@ -0,0 +1,95 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <fcntl.h> +#include <stdio.h> +#include <termios.h> +#include <unistd.h> +#include <sys/mman.h> + +#include <rte_eal.h> + +#include <rte_log.h> +#include <rte_memzone.h> +#include <rte_ring.h> + +#include <cmdline_rdline.h> +#include <cmdline_parse.h> +#include <cmdline_socket.h> +#include <cmdline.h> + + +#include "qwctl.h" +#include "commands.h" +#include "../include/conf.h" + + +int *quota; +unsigned int *low_watermark; + + +static void +setup_shared_variables(void) +{ + const struct rte_memzone *qw_memzone; + + qw_memzone = rte_memzone_lookup(QUOTA_WATERMARK_MEMZONE_NAME); + if (qw_memzone == NULL) + rte_exit(EXIT_FAILURE, "Couldn't find memzone\n"); + + quota = qw_memzone->addr; + low_watermark = (unsigned int *) qw_memzone->addr + sizeof(int); +} + +int main(int argc, char **argv) +{ + int ret; + struct cmdline *cl; + + rte_set_log_level(RTE_LOG_INFO); + + ret = rte_eal_init(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Cannot initialize EAL\n"); + + setup_shared_variables(); + + cl = cmdline_stdin_new(qwctl_ctx, "qwctl> "); + if (cl == NULL) + rte_exit(EXIT_FAILURE, "Cannot create cmdline instance\n"); + + cmdline_interact(cl); + cmdline_stdin_exit(cl); + + return 0; +} diff --git a/examples/quota_watermark/qwctl/qwctl.h b/examples/quota_watermark/qwctl/qwctl.h new file mode 100644 index 00000000..8d146e57 --- /dev/null +++ b/examples/quota_watermark/qwctl/qwctl.h @@ -0,0 +1,40 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _MAIN_H_ +#define _MAIN_H_ + +extern int *quota; +extern unsigned int *low_watermark; + +#endif /* _MAIN_H_ */ diff --git a/examples/rxtx_callbacks/Makefile b/examples/rxtx_callbacks/Makefile new file mode 100644 index 00000000..0fafbb72 --- /dev/null +++ b/examples/rxtx_callbacks/Makefile @@ -0,0 +1,57 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2015 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overridden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +# binary name +APP = rxtx_callbacks + +# all source are stored in SRCS-y +SRCS-y := main.c + +CFLAGS += $(WERROR_FLAGS) + +# workaround for a gcc bug with noreturn attribute +# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603 +ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y) +CFLAGS_main.o += -Wno-return-type +endif + +EXTRA_CFLAGS += -O3 -g -Wfatal-errors + +include $(RTE_SDK)/mk/rte.extapp.mk diff --git a/examples/rxtx_callbacks/main.c b/examples/rxtx_callbacks/main.c new file mode 100644 index 00000000..048b23f5 --- /dev/null +++ b/examples/rxtx_callbacks/main.c @@ -0,0 +1,225 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdint.h> +#include <inttypes.h> +#include <rte_eal.h> +#include <rte_ethdev.h> +#include <rte_cycles.h> +#include <rte_lcore.h> +#include <rte_mbuf.h> + +#define RX_RING_SIZE 128 +#define TX_RING_SIZE 512 + +#define NUM_MBUFS 8191 +#define MBUF_CACHE_SIZE 250 +#define BURST_SIZE 32 + +static const struct rte_eth_conf port_conf_default = { + .rxmode = { .max_rx_pkt_len = ETHER_MAX_LEN, }, +}; + +static unsigned nb_ports; + +static struct { + uint64_t total_cycles; + uint64_t total_pkts; +} latency_numbers; + + +static uint16_t +add_timestamps(uint8_t port __rte_unused, uint16_t qidx __rte_unused, + struct rte_mbuf **pkts, uint16_t nb_pkts, + uint16_t max_pkts __rte_unused, void *_ __rte_unused) +{ + unsigned i; + uint64_t now = rte_rdtsc(); + + for (i = 0; i < nb_pkts; i++) + pkts[i]->udata64 = now; + return nb_pkts; +} + +static uint16_t +calc_latency(uint8_t port __rte_unused, uint16_t qidx __rte_unused, + struct rte_mbuf **pkts, uint16_t nb_pkts, void *_ __rte_unused) +{ + uint64_t cycles = 0; + uint64_t now = rte_rdtsc(); + unsigned i; + + for (i = 0; i < nb_pkts; i++) + cycles += now - pkts[i]->udata64; + latency_numbers.total_cycles += cycles; + latency_numbers.total_pkts += nb_pkts; + + if (latency_numbers.total_pkts > (100 * 1000 * 1000ULL)) { + printf("Latency = %"PRIu64" cycles\n", + latency_numbers.total_cycles / latency_numbers.total_pkts); + latency_numbers.total_cycles = latency_numbers.total_pkts = 0; + } + return nb_pkts; +} + +/* + * Initialises a given port using global settings and with the rx buffers + * coming from the mbuf_pool passed as parameter + */ +static inline int +port_init(uint8_t port, struct rte_mempool *mbuf_pool) +{ + struct rte_eth_conf port_conf = port_conf_default; + const uint16_t rx_rings = 1, tx_rings = 1; + int retval; + uint16_t q; + + if (port >= rte_eth_dev_count()) + return -1; + + retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf); + if (retval != 0) + return retval; + + for (q = 0; q < rx_rings; q++) { + retval = rte_eth_rx_queue_setup(port, q, RX_RING_SIZE, + rte_eth_dev_socket_id(port), NULL, mbuf_pool); + if (retval < 0) + return retval; + } + + for (q = 0; q < tx_rings; q++) { + retval = rte_eth_tx_queue_setup(port, q, TX_RING_SIZE, + rte_eth_dev_socket_id(port), NULL); + if (retval < 0) + return retval; + } + + retval = rte_eth_dev_start(port); + if (retval < 0) + return retval; + + struct ether_addr addr; + + rte_eth_macaddr_get(port, &addr); + printf("Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8 + " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n", + (unsigned)port, + addr.addr_bytes[0], addr.addr_bytes[1], + addr.addr_bytes[2], addr.addr_bytes[3], + addr.addr_bytes[4], addr.addr_bytes[5]); + + rte_eth_promiscuous_enable(port); + rte_eth_add_rx_callback(port, 0, add_timestamps, NULL); + rte_eth_add_tx_callback(port, 0, calc_latency, NULL); + + return 0; +} + +/* + * Main thread that does the work, reading from INPUT_PORT + * and writing to OUTPUT_PORT + */ +static __attribute__((noreturn)) void +lcore_main(void) +{ + uint8_t port; + + for (port = 0; port < nb_ports; port++) + if (rte_eth_dev_socket_id(port) > 0 && + rte_eth_dev_socket_id(port) != + (int)rte_socket_id()) + printf("WARNING, port %u is on remote NUMA node to " + "polling thread.\n\tPerformance will " + "not be optimal.\n", port); + + printf("\nCore %u forwarding packets. [Ctrl+C to quit]\n", + rte_lcore_id()); + for (;;) { + for (port = 0; port < nb_ports; port++) { + struct rte_mbuf *bufs[BURST_SIZE]; + const uint16_t nb_rx = rte_eth_rx_burst(port, 0, + bufs, BURST_SIZE); + if (unlikely(nb_rx == 0)) + continue; + const uint16_t nb_tx = rte_eth_tx_burst(port ^ 1, 0, + bufs, nb_rx); + if (unlikely(nb_tx < nb_rx)) { + uint16_t buf; + + for (buf = nb_tx; buf < nb_rx; buf++) + rte_pktmbuf_free(bufs[buf]); + } + } + } +} + +/* Main function, does initialisation and calls the per-lcore functions */ +int +main(int argc, char *argv[]) +{ + struct rte_mempool *mbuf_pool; + uint8_t portid; + + /* init EAL */ + int ret = rte_eal_init(argc, argv); + + if (ret < 0) + rte_exit(EXIT_FAILURE, "Error with EAL initialization\n"); + argc -= ret; + argv += ret; + + nb_ports = rte_eth_dev_count(); + if (nb_ports < 2 || (nb_ports & 1)) + rte_exit(EXIT_FAILURE, "Error: number of ports must be even\n"); + + mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL", + NUM_MBUFS * nb_ports, MBUF_CACHE_SIZE, 0, + RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id()); + if (mbuf_pool == NULL) + rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n"); + + /* initialize all ports */ + for (portid = 0; portid < nb_ports; portid++) + if (port_init(portid, mbuf_pool) != 0) + rte_exit(EXIT_FAILURE, "Cannot init port %"PRIu8"\n", + portid); + + if (rte_lcore_count() > 1) + printf("\nWARNING: Too much enabled lcores - " + "App uses only 1 lcore\n"); + + /* call lcore_main on master core only */ + lcore_main(); + return 0; +} diff --git a/examples/skeleton/Makefile b/examples/skeleton/Makefile new file mode 100644 index 00000000..4a5d99f1 --- /dev/null +++ b/examples/skeleton/Makefile @@ -0,0 +1,57 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overridden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +# binary name +APP = basicfwd + +# all source are stored in SRCS-y +SRCS-y := basicfwd.c + +CFLAGS += $(WERROR_FLAGS) + +# workaround for a gcc bug with noreturn attribute +# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603 +ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y) +CFLAGS_main.o += -Wno-return-type +endif + +EXTRA_CFLAGS += -O3 -g -Wfatal-errors + +include $(RTE_SDK)/mk/rte.extapp.mk diff --git a/examples/skeleton/basicfwd.c b/examples/skeleton/basicfwd.c new file mode 100644 index 00000000..c89822cb --- /dev/null +++ b/examples/skeleton/basicfwd.c @@ -0,0 +1,211 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdint.h> +#include <inttypes.h> +#include <rte_eal.h> +#include <rte_ethdev.h> +#include <rte_cycles.h> +#include <rte_lcore.h> +#include <rte_mbuf.h> + +#define RX_RING_SIZE 128 +#define TX_RING_SIZE 512 + +#define NUM_MBUFS 8191 +#define MBUF_CACHE_SIZE 250 +#define BURST_SIZE 32 + +static const struct rte_eth_conf port_conf_default = { + .rxmode = { .max_rx_pkt_len = ETHER_MAX_LEN } +}; + +/* basicfwd.c: Basic DPDK skeleton forwarding example. */ + +/* + * Initializes a given port using global settings and with the RX buffers + * coming from the mbuf_pool passed as a parameter. + */ +static inline int +port_init(uint8_t port, struct rte_mempool *mbuf_pool) +{ + struct rte_eth_conf port_conf = port_conf_default; + const uint16_t rx_rings = 1, tx_rings = 1; + int retval; + uint16_t q; + + if (port >= rte_eth_dev_count()) + return -1; + + /* Configure the Ethernet device. */ + retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf); + if (retval != 0) + return retval; + + /* Allocate and set up 1 RX queue per Ethernet port. */ + for (q = 0; q < rx_rings; q++) { + retval = rte_eth_rx_queue_setup(port, q, RX_RING_SIZE, + rte_eth_dev_socket_id(port), NULL, mbuf_pool); + if (retval < 0) + return retval; + } + + /* Allocate and set up 1 TX queue per Ethernet port. */ + for (q = 0; q < tx_rings; q++) { + retval = rte_eth_tx_queue_setup(port, q, TX_RING_SIZE, + rte_eth_dev_socket_id(port), NULL); + if (retval < 0) + return retval; + } + + /* Start the Ethernet port. */ + retval = rte_eth_dev_start(port); + if (retval < 0) + return retval; + + /* Display the port MAC address. */ + struct ether_addr addr; + rte_eth_macaddr_get(port, &addr); + printf("Port %u MAC: %02" PRIx8 " %02" PRIx8 " %02" PRIx8 + " %02" PRIx8 " %02" PRIx8 " %02" PRIx8 "\n", + (unsigned)port, + addr.addr_bytes[0], addr.addr_bytes[1], + addr.addr_bytes[2], addr.addr_bytes[3], + addr.addr_bytes[4], addr.addr_bytes[5]); + + /* Enable RX in promiscuous mode for the Ethernet device. */ + rte_eth_promiscuous_enable(port); + + return 0; +} + +/* + * The lcore main. This is the main thread that does the work, reading from + * an input port and writing to an output port. + */ +static __attribute__((noreturn)) void +lcore_main(void) +{ + const uint8_t nb_ports = rte_eth_dev_count(); + uint8_t port; + + /* + * Check that the port is on the same NUMA node as the polling thread + * for best performance. + */ + for (port = 0; port < nb_ports; port++) + if (rte_eth_dev_socket_id(port) > 0 && + rte_eth_dev_socket_id(port) != + (int)rte_socket_id()) + printf("WARNING, port %u is on remote NUMA node to " + "polling thread.\n\tPerformance will " + "not be optimal.\n", port); + + printf("\nCore %u forwarding packets. [Ctrl+C to quit]\n", + rte_lcore_id()); + + /* Run until the application is quit or killed. */ + for (;;) { + /* + * Receive packets on a port and forward them on the paired + * port. The mapping is 0 -> 1, 1 -> 0, 2 -> 3, 3 -> 2, etc. + */ + for (port = 0; port < nb_ports; port++) { + + /* Get burst of RX packets, from first port of pair. */ + struct rte_mbuf *bufs[BURST_SIZE]; + const uint16_t nb_rx = rte_eth_rx_burst(port, 0, + bufs, BURST_SIZE); + + if (unlikely(nb_rx == 0)) + continue; + + /* Send burst of TX packets, to second port of pair. */ + const uint16_t nb_tx = rte_eth_tx_burst(port ^ 1, 0, + bufs, nb_rx); + + /* Free any unsent packets. */ + if (unlikely(nb_tx < nb_rx)) { + uint16_t buf; + for (buf = nb_tx; buf < nb_rx; buf++) + rte_pktmbuf_free(bufs[buf]); + } + } + } +} + +/* + * The main function, which does initialization and calls the per-lcore + * functions. + */ +int +main(int argc, char *argv[]) +{ + struct rte_mempool *mbuf_pool; + unsigned nb_ports; + uint8_t portid; + + /* Initialize the Environment Abstraction Layer (EAL). */ + int ret = rte_eal_init(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Error with EAL initialization\n"); + + argc -= ret; + argv += ret; + + /* Check that there is an even number of ports to send/receive on. */ + nb_ports = rte_eth_dev_count(); + if (nb_ports < 2 || (nb_ports & 1)) + rte_exit(EXIT_FAILURE, "Error: number of ports must be even\n"); + + /* Creates a new mempool in memory to hold the mbufs. */ + mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL", NUM_MBUFS * nb_ports, + MBUF_CACHE_SIZE, 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id()); + + if (mbuf_pool == NULL) + rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n"); + + /* Initialize all ports. */ + for (portid = 0; portid < nb_ports; portid++) + if (port_init(portid, mbuf_pool) != 0) + rte_exit(EXIT_FAILURE, "Cannot init port %"PRIu8 "\n", + portid); + + if (rte_lcore_count() > 1) + printf("\nWARNING: Too many lcores enabled. Only 1 used.\n"); + + /* Call lcore_main on the master core only. */ + lcore_main(); + + return 0; +} diff --git a/examples/tep_termination/Makefile b/examples/tep_termination/Makefile new file mode 100644 index 00000000..448e6183 --- /dev/null +++ b/examples/tep_termination/Makefile @@ -0,0 +1,56 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2015 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overridden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +ifneq ($(CONFIG_RTE_EXEC_ENV),"linuxapp") +$(error This application can only operate in a linuxapp environment, \ +please change the definition of the RTE_TARGET environment variable) +endif + +# binary name +APP = tep_termination + +# all source are stored in SRCS-y +SRCS-y := main.c vxlan_setup.c vxlan.c + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) +CFLAGS += -D_GNU_SOURCE + +include $(RTE_SDK)/mk/rte.extapp.mk diff --git a/examples/tep_termination/main.c b/examples/tep_termination/main.c new file mode 100644 index 00000000..f97d552a --- /dev/null +++ b/examples/tep_termination/main.c @@ -0,0 +1,1275 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <arpa/inet.h> +#include <getopt.h> +#include <linux/if_ether.h> +#include <linux/if_vlan.h> +#include <linux/virtio_net.h> +#include <linux/virtio_ring.h> +#include <signal.h> +#include <stdint.h> +#include <sys/eventfd.h> +#include <sys/param.h> +#include <unistd.h> + +#include <rte_atomic.h> +#include <rte_cycles.h> +#include <rte_ethdev.h> +#include <rte_log.h> +#include <rte_string_fns.h> +#include <rte_malloc.h> +#include <rte_virtio_net.h> + +#include "main.h" +#include "vxlan.h" +#include "vxlan_setup.h" + +/* the maximum number of external ports supported */ +#define MAX_SUP_PORTS 1 + +/** + * Calculate the number of buffers needed per port + */ +#define NUM_MBUFS_PER_PORT ((MAX_QUEUES * RTE_TEST_RX_DESC_DEFAULT) +\ + (nb_switching_cores * MAX_PKT_BURST) +\ + (nb_switching_cores * \ + RTE_TEST_TX_DESC_DEFAULT) +\ + (nb_switching_cores * MBUF_CACHE_SIZE)) + +#define MBUF_CACHE_SIZE 128 +#define MBUF_SIZE (2048 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM) + +#define MAX_PKT_BURST 32 /* Max burst size for RX/TX */ +#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */ + +/* Defines how long we wait between retries on RX */ +#define BURST_RX_WAIT_US 15 + +#define BURST_RX_RETRIES 4 /* Number of retries on RX. */ + +#define JUMBO_FRAME_MAX_SIZE 0x2600 + +/* State of virtio device. */ +#define DEVICE_MAC_LEARNING 0 +#define DEVICE_RX 1 +#define DEVICE_SAFE_REMOVE 2 + +/* Config_core_flag status definitions. */ +#define REQUEST_DEV_REMOVAL 1 +#define ACK_DEV_REMOVAL 0 + +/* Configurable number of RX/TX ring descriptors */ +#define RTE_TEST_RX_DESC_DEFAULT 1024 +#define RTE_TEST_TX_DESC_DEFAULT 512 + +/* Get first 4 bytes in mbuf headroom. */ +#define MBUF_HEADROOM_UINT32(mbuf) (*(uint32_t *)((uint8_t *)(mbuf) \ + + sizeof(struct rte_mbuf))) + +#define INVALID_PORT_ID 0xFF + +/* Size of buffers used for snprintfs. */ +#define MAX_PRINT_BUFF 6072 + +/* Maximum character device basename size. */ +#define MAX_BASENAME_SZ 20 + +/* Maximum long option length for option parsing. */ +#define MAX_LONG_OPT_SZ 64 + +/* Used to compare MAC addresses. */ +#define MAC_ADDR_CMP 0xFFFFFFFFFFFFULL + +#define CMD_LINE_OPT_NB_DEVICES "nb-devices" +#define CMD_LINE_OPT_UDP_PORT "udp-port" +#define CMD_LINE_OPT_TX_CHECKSUM "tx-checksum" +#define CMD_LINE_OPT_TSO_SEGSZ "tso-segsz" +#define CMD_LINE_OPT_FILTER_TYPE "filter-type" +#define CMD_LINE_OPT_ENCAP "encap" +#define CMD_LINE_OPT_DECAP "decap" +#define CMD_LINE_OPT_RX_RETRY "rx-retry" +#define CMD_LINE_OPT_RX_RETRY_DELAY "rx-retry-delay" +#define CMD_LINE_OPT_RX_RETRY_NUM "rx-retry-num" +#define CMD_LINE_OPT_STATS "stats" +#define CMD_LINE_OPT_DEV_BASENAME "dev-basename" + +/* mask of enabled ports */ +static uint32_t enabled_port_mask; + +/*Number of switching cores enabled*/ +static uint32_t nb_switching_cores; + +/* number of devices/queues to support*/ +uint16_t nb_devices = 2; + +/* max ring descriptor, ixgbe, i40e, e1000 all are 4096. */ +#define MAX_RING_DESC 4096 + +struct vpool { + struct rte_mempool *pool; + struct rte_ring *ring; + uint32_t buf_size; +} vpool_array[MAX_QUEUES+MAX_QUEUES]; + +/* UDP tunneling port */ +uint16_t udp_port = 4789; + +/* enable/disable inner TX checksum */ +uint8_t tx_checksum = 0; + +/* TCP segment size */ +uint16_t tso_segsz = 0; + +/* enable/disable decapsulation */ +uint8_t rx_decap = 1; + +/* enable/disable encapsulation */ +uint8_t tx_encap = 1; + +/* RX filter type for tunneling packet */ +uint8_t filter_idx = 1; + +/* overlay packet operation */ +struct ol_switch_ops overlay_options = { + .port_configure = vxlan_port_init, + .tunnel_setup = vxlan_link, + .tunnel_destroy = vxlan_unlink, + .tx_handle = vxlan_tx_pkts, + .rx_handle = vxlan_rx_pkts, + .param_handle = NULL, +}; + +/* Enable stats. */ +uint32_t enable_stats = 0; +/* Enable retries on RX. */ +static uint32_t enable_retry = 1; +/* Specify timeout (in useconds) between retries on RX. */ +static uint32_t burst_rx_delay_time = BURST_RX_WAIT_US; +/* Specify the number of retries on RX. */ +static uint32_t burst_rx_retry_num = BURST_RX_RETRIES; + +/* Character device basename. Can be set by user. */ +static char dev_basename[MAX_BASENAME_SZ] = "vhost-net"; + +static unsigned lcore_ids[RTE_MAX_LCORE]; +uint8_t ports[RTE_MAX_ETHPORTS]; + +static unsigned nb_ports; /**< The number of ports specified in command line */ + +/* ethernet addresses of ports */ +struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; + +/* heads for the main used and free linked lists for the data path. */ +static struct virtio_net_data_ll *ll_root_used; +static struct virtio_net_data_ll *ll_root_free; + +/** + * Array of data core structures containing information on + * individual core linked lists. + */ +static struct lcore_info lcore_info[RTE_MAX_LCORE]; + +/* Used for queueing bursts of TX packets. */ +struct mbuf_table { + unsigned len; + unsigned txq_id; + struct rte_mbuf *m_table[MAX_PKT_BURST]; +}; + +/* TX queue for each data core. */ +struct mbuf_table lcore_tx_queue[RTE_MAX_LCORE]; + +struct device_statistics dev_statistics[MAX_DEVICES]; + +/** + * Set character device basename. + */ +static int +us_vhost_parse_basename(const char *q_arg) +{ + /* parse number string */ + if (strlen(q_arg) >= MAX_BASENAME_SZ) + return -1; + else + snprintf((char *)&dev_basename, MAX_BASENAME_SZ, "%s", q_arg); + + return 0; +} + +/** + * Parse the portmask provided at run time. + */ +static int +parse_portmask(const char *portmask) +{ + char *end = NULL; + unsigned long pm; + + /* parse hexadecimal string */ + pm = strtoul(portmask, &end, 16); + if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0')) + return -1; + + if (pm == 0) + return -1; + + return pm; +} + +/** + * Parse num options at run time. + */ +static int +parse_num_opt(const char *q_arg, uint32_t max_valid_value) +{ + char *end = NULL; + unsigned long num; + + /* parse unsigned int string */ + num = strtoul(q_arg, &end, 10); + if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0')) + return -1; + + if (num > max_valid_value) + return -1; + + return num; +} + +/** + * Display usage + */ +static void +tep_termination_usage(const char *prgname) +{ + RTE_LOG(INFO, VHOST_CONFIG, "%s [EAL options] -- -p PORTMASK\n" + " --udp-port: UDP destination port for VXLAN packet\n" + " --nb-devices[1-64]: The number of virtIO device\n" + " --tx-checksum [0|1]: inner Tx checksum offload\n" + " --tso-segsz [0-N]: TCP segment size\n" + " --decap [0|1]: tunneling packet decapsulation\n" + " --encap [0|1]: tunneling packet encapsulation\n" + " --filter-type[1-3]: filter type for tunneling packet\n" + " 1: Inner MAC and tenent ID\n" + " 2: Inner MAC and VLAN, and tenent ID\n" + " 3: Outer MAC, Inner MAC and tenent ID\n" + " -p PORTMASK: Set mask for ports to be used by application\n" + " --rx-retry [0|1]: disable/enable(default) retries on rx." + " Enable retry if destintation queue is full\n" + " --rx-retry-delay [0-N]: timeout(in usecond) between retries on RX." + " This makes effect only if retries on rx enabled\n" + " --rx-retry-num [0-N]: the number of retries on rx." + " This makes effect only if retries on rx enabled\n" + " --stats [0-N]: 0: Disable stats, N: Time in seconds to print stats\n" + " --dev-basename: The basename to be used for the character device.\n", + prgname); +} + +/** + * Parse the arguments given in the command line of the application. + */ +static int +tep_termination_parse_args(int argc, char **argv) +{ + int opt, ret; + int option_index; + unsigned i; + const char *prgname = argv[0]; + static struct option long_option[] = { + {CMD_LINE_OPT_NB_DEVICES, required_argument, NULL, 0}, + {CMD_LINE_OPT_UDP_PORT, required_argument, NULL, 0}, + {CMD_LINE_OPT_TX_CHECKSUM, required_argument, NULL, 0}, + {CMD_LINE_OPT_TSO_SEGSZ, required_argument, NULL, 0}, + {CMD_LINE_OPT_DECAP, required_argument, NULL, 0}, + {CMD_LINE_OPT_ENCAP, required_argument, NULL, 0}, + {CMD_LINE_OPT_FILTER_TYPE, required_argument, NULL, 0}, + {CMD_LINE_OPT_RX_RETRY, required_argument, NULL, 0}, + {CMD_LINE_OPT_RX_RETRY_DELAY, required_argument, NULL, 0}, + {CMD_LINE_OPT_RX_RETRY_NUM, required_argument, NULL, 0}, + {CMD_LINE_OPT_STATS, required_argument, NULL, 0}, + {CMD_LINE_OPT_DEV_BASENAME, required_argument, NULL, 0}, + {NULL, 0, 0, 0}, + }; + + /* Parse command line */ + while ((opt = getopt_long(argc, argv, "p:", + long_option, &option_index)) != EOF) { + switch (opt) { + /* Portmask */ + case 'p': + enabled_port_mask = parse_portmask(optarg); + if (enabled_port_mask == 0) { + RTE_LOG(INFO, VHOST_CONFIG, + "Invalid portmask\n"); + tep_termination_usage(prgname); + return -1; + } + break; + case 0: + if (!strncmp(long_option[option_index].name, + CMD_LINE_OPT_NB_DEVICES, + sizeof(CMD_LINE_OPT_NB_DEVICES))) { + ret = parse_num_opt(optarg, MAX_DEVICES); + if (ret == -1) { + RTE_LOG(INFO, VHOST_CONFIG, + "Invalid argument for nb-devices [0-%d]\n", + MAX_DEVICES); + tep_termination_usage(prgname); + return -1; + } else + nb_devices = ret; + } + + /* Enable/disable retries on RX. */ + if (!strncmp(long_option[option_index].name, + CMD_LINE_OPT_RX_RETRY, + sizeof(CMD_LINE_OPT_RX_RETRY))) { + ret = parse_num_opt(optarg, 1); + if (ret == -1) { + RTE_LOG(INFO, VHOST_CONFIG, + "Invalid argument for rx-retry [0|1]\n"); + tep_termination_usage(prgname); + return -1; + } else + enable_retry = ret; + } + + if (!strncmp(long_option[option_index].name, + CMD_LINE_OPT_TSO_SEGSZ, + sizeof(CMD_LINE_OPT_TSO_SEGSZ))) { + ret = parse_num_opt(optarg, INT16_MAX); + if (ret == -1) { + RTE_LOG(INFO, VHOST_CONFIG, + "Invalid argument for TCP segment size [0-N]\n"); + tep_termination_usage(prgname); + return -1; + } else + tso_segsz = ret; + } + + if (!strncmp(long_option[option_index].name, + CMD_LINE_OPT_UDP_PORT, + sizeof(CMD_LINE_OPT_UDP_PORT))) { + ret = parse_num_opt(optarg, INT16_MAX); + if (ret == -1) { + RTE_LOG(INFO, VHOST_CONFIG, + "Invalid argument for UDP port [0-N]\n"); + tep_termination_usage(prgname); + return -1; + } else + udp_port = ret; + } + + /* Specify the retries delay time (in useconds) on RX.*/ + if (!strncmp(long_option[option_index].name, + CMD_LINE_OPT_RX_RETRY_DELAY, + sizeof(CMD_LINE_OPT_RX_RETRY_DELAY))) { + ret = parse_num_opt(optarg, INT32_MAX); + if (ret == -1) { + RTE_LOG(INFO, VHOST_CONFIG, + "Invalid argument for rx-retry-delay [0-N]\n"); + tep_termination_usage(prgname); + return -1; + } else + burst_rx_delay_time = ret; + } + + /* Specify the retries number on RX. */ + if (!strncmp(long_option[option_index].name, + CMD_LINE_OPT_RX_RETRY_NUM, + sizeof(CMD_LINE_OPT_RX_RETRY_NUM))) { + ret = parse_num_opt(optarg, INT32_MAX); + if (ret == -1) { + RTE_LOG(INFO, VHOST_CONFIG, + "Invalid argument for rx-retry-num [0-N]\n"); + tep_termination_usage(prgname); + return -1; + } else + burst_rx_retry_num = ret; + } + + if (!strncmp(long_option[option_index].name, + CMD_LINE_OPT_TX_CHECKSUM, + sizeof(CMD_LINE_OPT_TX_CHECKSUM))) { + ret = parse_num_opt(optarg, 1); + if (ret == -1) { + RTE_LOG(INFO, VHOST_CONFIG, + "Invalid argument for tx-checksum [0|1]\n"); + tep_termination_usage(prgname); + return -1; + } else + tx_checksum = ret; + } + + if (!strncmp(long_option[option_index].name, + CMD_LINE_OPT_FILTER_TYPE, + sizeof(CMD_LINE_OPT_FILTER_TYPE))) { + ret = parse_num_opt(optarg, 3); + if ((ret == -1) || (ret == 0)) { + RTE_LOG(INFO, VHOST_CONFIG, + "Invalid argument for filter type [1-3]\n"); + tep_termination_usage(prgname); + return -1; + } else + filter_idx = ret - 1; + } + + /* Enable/disable encapsulation on RX. */ + if (!strncmp(long_option[option_index].name, + CMD_LINE_OPT_DECAP, + sizeof(CMD_LINE_OPT_DECAP))) { + ret = parse_num_opt(optarg, 1); + if (ret == -1) { + RTE_LOG(INFO, VHOST_CONFIG, + "Invalid argument for decap [0|1]\n"); + tep_termination_usage(prgname); + return -1; + } else + rx_decap = ret; + } + + /* Enable/disable encapsulation on TX. */ + if (!strncmp(long_option[option_index].name, + CMD_LINE_OPT_ENCAP, + sizeof(CMD_LINE_OPT_ENCAP))) { + ret = parse_num_opt(optarg, 1); + if (ret == -1) { + RTE_LOG(INFO, VHOST_CONFIG, + "Invalid argument for encap [0|1]\n"); + tep_termination_usage(prgname); + return -1; + } else + tx_encap = ret; + } + + /* Enable/disable stats. */ + if (!strncmp(long_option[option_index].name, + CMD_LINE_OPT_STATS, + sizeof(CMD_LINE_OPT_STATS))) { + ret = parse_num_opt(optarg, INT32_MAX); + if (ret == -1) { + RTE_LOG(INFO, VHOST_CONFIG, + "Invalid argument for stats [0..N]\n"); + tep_termination_usage(prgname); + return -1; + } else + enable_stats = ret; + } + + /* Set character device basename. */ + if (!strncmp(long_option[option_index].name, + CMD_LINE_OPT_DEV_BASENAME, + sizeof(CMD_LINE_OPT_DEV_BASENAME))) { + if (us_vhost_parse_basename(optarg) == -1) { + RTE_LOG(INFO, VHOST_CONFIG, + "Invalid argument for character " + "device basename (Max %d characters)\n", + MAX_BASENAME_SZ); + tep_termination_usage(prgname); + return -1; + } + } + + break; + + /* Invalid option - print options. */ + default: + tep_termination_usage(prgname); + return -1; + } + } + + for (i = 0; i < RTE_MAX_ETHPORTS; i++) { + if (enabled_port_mask & (1 << i)) + ports[nb_ports++] = (uint8_t)i; + } + + if ((nb_ports == 0) || (nb_ports > MAX_SUP_PORTS)) { + RTE_LOG(INFO, VHOST_PORT, "Current enabled port number is %u," + "but only %u port can be enabled\n", nb_ports, + MAX_SUP_PORTS); + return -1; + } + + return 0; +} + +/** + * Update the global var NB_PORTS and array PORTS + * according to system ports number and return valid ports number + */ +static unsigned +check_ports_num(unsigned max_nb_ports) +{ + unsigned valid_nb_ports = nb_ports; + unsigned portid; + + if (nb_ports > max_nb_ports) { + RTE_LOG(INFO, VHOST_PORT, "\nSpecified port number(%u) " + " exceeds total system port number(%u)\n", + nb_ports, max_nb_ports); + nb_ports = max_nb_ports; + } + + for (portid = 0; portid < nb_ports; portid++) { + if (ports[portid] >= max_nb_ports) { + RTE_LOG(INFO, VHOST_PORT, + "\nSpecified port ID(%u) exceeds max " + " system port ID(%u)\n", + ports[portid], (max_nb_ports - 1)); + ports[portid] = INVALID_PORT_ID; + valid_nb_ports--; + } + } + return valid_nb_ports; +} + +/** + * This function routes the TX packet to the correct interface. This may be a local device + * or the physical port. + */ +static inline void __attribute__((always_inline)) +virtio_tx_route(struct vhost_dev *vdev, struct rte_mbuf *m) +{ + struct mbuf_table *tx_q; + struct rte_mbuf **m_table; + unsigned len, ret = 0; + const uint16_t lcore_id = rte_lcore_id(); + struct virtio_net *dev = vdev->dev; + + LOG_DEBUG(VHOST_DATA, "(%"PRIu64") TX: MAC address is external\n", + dev->device_fh); + + /* Add packet to the port tx queue */ + tx_q = &lcore_tx_queue[lcore_id]; + len = tx_q->len; + + tx_q->m_table[len] = m; + len++; + if (enable_stats) { + dev_statistics[dev->device_fh].tx_total++; + dev_statistics[dev->device_fh].tx++; + } + + if (unlikely(len == MAX_PKT_BURST)) { + m_table = (struct rte_mbuf **)tx_q->m_table; + ret = overlay_options.tx_handle(ports[0], + (uint16_t)tx_q->txq_id, m_table, + (uint16_t)tx_q->len); + + /* Free any buffers not handled by TX and update + * the port stats. + */ + if (unlikely(ret < len)) { + do { + rte_pktmbuf_free(m_table[ret]); + } while (++ret < len); + } + + len = 0; + } + + tx_q->len = len; + return; +} + +/** + * This function is called by each data core. It handles all + * RX/TX registered with the core. For TX the specific lcore + * linked list is used. For RX, MAC addresses are compared + * with all devices in the main linked list. + */ +static int +switch_worker(__rte_unused void *arg) +{ + struct rte_mempool *mbuf_pool = arg; + struct virtio_net *dev = NULL; + struct vhost_dev *vdev = NULL; + struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; + struct virtio_net_data_ll *dev_ll; + struct mbuf_table *tx_q; + volatile struct lcore_ll_info *lcore_ll; + const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) + / US_PER_S * BURST_TX_DRAIN_US; + uint64_t prev_tsc, diff_tsc, cur_tsc, ret_count = 0; + unsigned i, ret = 0; + const uint16_t lcore_id = rte_lcore_id(); + const uint16_t num_cores = (uint16_t)rte_lcore_count(); + uint16_t rx_count = 0; + uint16_t tx_count; + uint32_t retry = 0; + + RTE_LOG(INFO, VHOST_DATA, "Procesing on Core %u started\n", lcore_id); + lcore_ll = lcore_info[lcore_id].lcore_ll; + prev_tsc = 0; + + tx_q = &lcore_tx_queue[lcore_id]; + for (i = 0; i < num_cores; i++) { + if (lcore_ids[i] == lcore_id) { + tx_q->txq_id = i; + break; + } + } + + while (1) { + cur_tsc = rte_rdtsc(); + /* + * TX burst queue drain + */ + diff_tsc = cur_tsc - prev_tsc; + if (unlikely(diff_tsc > drain_tsc)) { + + if (tx_q->len) { + LOG_DEBUG(VHOST_DATA, "TX queue drained after " + "timeout with burst size %u\n", + tx_q->len); + ret = overlay_options.tx_handle(ports[0], + (uint16_t)tx_q->txq_id, + (struct rte_mbuf **)tx_q->m_table, + (uint16_t)tx_q->len); + if (unlikely(ret < tx_q->len)) { + do { + rte_pktmbuf_free(tx_q->m_table[ret]); + } while (++ret < tx_q->len); + } + + tx_q->len = 0; + } + + prev_tsc = cur_tsc; + + } + + rte_prefetch0(lcore_ll->ll_root_used); + + /** + * Inform the configuration core that we have exited + * the linked list and that no devices are + * in use if requested. + */ + if (lcore_ll->dev_removal_flag == REQUEST_DEV_REMOVAL) + lcore_ll->dev_removal_flag = ACK_DEV_REMOVAL; + + /* + * Process devices + */ + dev_ll = lcore_ll->ll_root_used; + + while (dev_ll != NULL) { + vdev = dev_ll->vdev; + dev = vdev->dev; + + if (unlikely(vdev->remove)) { + dev_ll = dev_ll->next; + overlay_options.tunnel_destroy(vdev); + vdev->ready = DEVICE_SAFE_REMOVE; + continue; + } + if (likely(vdev->ready == DEVICE_RX)) { + /* Handle guest RX */ + rx_count = rte_eth_rx_burst(ports[0], + vdev->rx_q, pkts_burst, MAX_PKT_BURST); + + if (rx_count) { + /* + * Retry is enabled and the queue is + * full then we wait and retry to + * avoid packet loss. Here MAX_PKT_BURST + * must be less than virtio queue size + */ + if (enable_retry && unlikely(rx_count > + rte_vring_available_entries(dev, VIRTIO_RXQ))) { + for (retry = 0; retry < burst_rx_retry_num; + retry++) { + rte_delay_us(burst_rx_delay_time); + if (rx_count <= rte_vring_available_entries(dev, VIRTIO_RXQ)) + break; + } + } + + ret_count = overlay_options.rx_handle(dev, pkts_burst, rx_count); + if (enable_stats) { + rte_atomic64_add( + &dev_statistics[dev->device_fh].rx_total_atomic, + rx_count); + rte_atomic64_add( + &dev_statistics[dev->device_fh].rx_atomic, ret_count); + } + while (likely(rx_count)) { + rx_count--; + rte_pktmbuf_free(pkts_burst[rx_count]); + } + + } + } + + if (likely(!vdev->remove)) { + /* Handle guest TX*/ + tx_count = rte_vhost_dequeue_burst(dev, + VIRTIO_TXQ, mbuf_pool, + pkts_burst, MAX_PKT_BURST); + /* If this is the first received packet we need to learn the MAC */ + if (unlikely(vdev->ready == DEVICE_MAC_LEARNING) && tx_count) { + if (vdev->remove || + (overlay_options.tunnel_setup(vdev, pkts_burst[0]) == -1)) { + while (tx_count) + rte_pktmbuf_free(pkts_burst[--tx_count]); + } + } + while (tx_count) + virtio_tx_route(vdev, pkts_burst[--tx_count]); + } + + /* move to the next device in the list */ + dev_ll = dev_ll->next; + } + } + + return 0; +} + +/** + * Add an entry to a used linked list. A free entry must first be found + * in the free linked list using get_data_ll_free_entry(); + */ +static void +add_data_ll_entry(struct virtio_net_data_ll **ll_root_addr, + struct virtio_net_data_ll *ll_dev) +{ + struct virtio_net_data_ll *ll = *ll_root_addr; + + /* Set next as NULL and use a compiler barrier to avoid reordering. */ + ll_dev->next = NULL; + rte_compiler_barrier(); + + /* If ll == NULL then this is the first device. */ + if (ll) { + /* Increment to the tail of the linked list. */ + while (ll->next != NULL) + ll = ll->next; + + ll->next = ll_dev; + } else { + *ll_root_addr = ll_dev; + } +} + +/** + * Remove an entry from a used linked list. The entry must then be added to + * the free linked list using put_data_ll_free_entry(). + */ +static void +rm_data_ll_entry(struct virtio_net_data_ll **ll_root_addr, + struct virtio_net_data_ll *ll_dev, + struct virtio_net_data_ll *ll_dev_last) +{ + struct virtio_net_data_ll *ll = *ll_root_addr; + + if (unlikely((ll == NULL) || (ll_dev == NULL))) + return; + + if (ll_dev == ll) + *ll_root_addr = ll_dev->next; + else + if (likely(ll_dev_last != NULL)) + ll_dev_last->next = ll_dev->next; + else + RTE_LOG(ERR, VHOST_CONFIG, + "Remove entry form ll failed.\n"); +} + +/** + * Find and return an entry from the free linked list. + */ +static struct virtio_net_data_ll * +get_data_ll_free_entry(struct virtio_net_data_ll **ll_root_addr) +{ + struct virtio_net_data_ll *ll_free = *ll_root_addr; + struct virtio_net_data_ll *ll_dev; + + if (ll_free == NULL) + return NULL; + + ll_dev = ll_free; + *ll_root_addr = ll_free->next; + + return ll_dev; +} + +/** + * Place an entry back on to the free linked list. + */ +static void +put_data_ll_free_entry(struct virtio_net_data_ll **ll_root_addr, + struct virtio_net_data_ll *ll_dev) +{ + struct virtio_net_data_ll *ll_free = *ll_root_addr; + + if (ll_dev == NULL) + return; + + ll_dev->next = ll_free; + *ll_root_addr = ll_dev; +} + +/** + * Creates a linked list of a given size. + */ +static struct virtio_net_data_ll * +alloc_data_ll(uint32_t size) +{ + struct virtio_net_data_ll *ll_new; + uint32_t i; + + /* Malloc and then chain the linked list. */ + ll_new = malloc(size * sizeof(struct virtio_net_data_ll)); + if (ll_new == NULL) { + RTE_LOG(ERR, VHOST_CONFIG, + "Failed to allocate memory for ll_new.\n"); + return NULL; + } + + for (i = 0; i < size - 1; i++) { + ll_new[i].vdev = NULL; + ll_new[i].next = &ll_new[i+1]; + } + ll_new[i].next = NULL; + + return ll_new; +} + +/** + * Create the main linked list along with each individual cores + * linked list. A used and a free list are created to manage entries. + */ +static int +init_data_ll(void) +{ + int lcore; + + RTE_LCORE_FOREACH_SLAVE(lcore) { + lcore_info[lcore].lcore_ll = + malloc(sizeof(struct lcore_ll_info)); + if (lcore_info[lcore].lcore_ll == NULL) { + RTE_LOG(ERR, VHOST_CONFIG, + "Failed to allocate memory for lcore_ll.\n"); + return -1; + } + + lcore_info[lcore].lcore_ll->device_num = 0; + lcore_info[lcore].lcore_ll->dev_removal_flag = ACK_DEV_REMOVAL; + lcore_info[lcore].lcore_ll->ll_root_used = NULL; + if (nb_devices % nb_switching_cores) + lcore_info[lcore].lcore_ll->ll_root_free = + alloc_data_ll((nb_devices / nb_switching_cores) + + 1); + else + lcore_info[lcore].lcore_ll->ll_root_free = + alloc_data_ll(nb_devices / nb_switching_cores); + } + + /* Allocate devices up to a maximum of MAX_DEVICES. */ + ll_root_free = alloc_data_ll(MIN((nb_devices), MAX_DEVICES)); + + return 0; +} + +/** + * Remove a device from the specific data core linked list and + * from the main linked list. Synchonization occurs through the use + * of the lcore dev_removal_flag. Device is made volatile here + * to avoid re-ordering of dev->remove=1 which can cause an infinite + * loop in the rte_pause loop. + */ +static void +destroy_device(volatile struct virtio_net *dev) +{ + struct virtio_net_data_ll *ll_lcore_dev_cur; + struct virtio_net_data_ll *ll_main_dev_cur; + struct virtio_net_data_ll *ll_lcore_dev_last = NULL; + struct virtio_net_data_ll *ll_main_dev_last = NULL; + struct vhost_dev *vdev; + int lcore; + + dev->flags &= ~VIRTIO_DEV_RUNNING; + + vdev = (struct vhost_dev *)dev->priv; + + /* set the remove flag. */ + vdev->remove = 1; + while (vdev->ready != DEVICE_SAFE_REMOVE) + rte_pause(); + + /* Search for entry to be removed from lcore ll */ + ll_lcore_dev_cur = lcore_info[vdev->coreid].lcore_ll->ll_root_used; + while (ll_lcore_dev_cur != NULL) { + if (ll_lcore_dev_cur->vdev == vdev) { + break; + } else { + ll_lcore_dev_last = ll_lcore_dev_cur; + ll_lcore_dev_cur = ll_lcore_dev_cur->next; + } + } + + if (ll_lcore_dev_cur == NULL) { + RTE_LOG(ERR, VHOST_CONFIG, + "(%"PRIu64") Failed to find the dev to be destroy.\n", + dev->device_fh); + return; + } + + /* Search for entry to be removed from main ll */ + ll_main_dev_cur = ll_root_used; + ll_main_dev_last = NULL; + while (ll_main_dev_cur != NULL) { + if (ll_main_dev_cur->vdev == vdev) { + break; + } else { + ll_main_dev_last = ll_main_dev_cur; + ll_main_dev_cur = ll_main_dev_cur->next; + } + } + + /* Remove entries from the lcore and main ll. */ + rm_data_ll_entry(&lcore_info[vdev->coreid].lcore_ll->ll_root_used, + ll_lcore_dev_cur, ll_lcore_dev_last); + rm_data_ll_entry(&ll_root_used, ll_main_dev_cur, ll_main_dev_last); + + /* Set the dev_removal_flag on each lcore. */ + RTE_LCORE_FOREACH_SLAVE(lcore) { + lcore_info[lcore].lcore_ll->dev_removal_flag = + REQUEST_DEV_REMOVAL; + } + + /* + * Once each core has set the dev_removal_flag to + * ACK_DEV_REMOVAL we can be sure that they can no longer access + * the device removed from the linked lists and that the devices + * are no longer in use. + */ + RTE_LCORE_FOREACH_SLAVE(lcore) { + while (lcore_info[lcore].lcore_ll->dev_removal_flag + != ACK_DEV_REMOVAL) + rte_pause(); + } + + /* Add the entries back to the lcore and main free ll.*/ + put_data_ll_free_entry(&lcore_info[vdev->coreid].lcore_ll->ll_root_free, + ll_lcore_dev_cur); + put_data_ll_free_entry(&ll_root_free, ll_main_dev_cur); + + /* Decrement number of device on the lcore. */ + lcore_info[vdev->coreid].lcore_ll->device_num--; + + RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") Device has been removed " + "from data core\n", dev->device_fh); + + rte_free(vdev); + +} + +/** + * A new device is added to a data core. First the device is added + * to the main linked list and the allocated to a specific data core. + */ +static int +new_device(struct virtio_net *dev) +{ + struct virtio_net_data_ll *ll_dev; + int lcore, core_add = 0; + uint32_t device_num_min = nb_devices; + struct vhost_dev *vdev; + + vdev = rte_zmalloc("vhost device", sizeof(*vdev), RTE_CACHE_LINE_SIZE); + if (vdev == NULL) { + RTE_LOG(INFO, VHOST_DATA, + "(%"PRIu64") Couldn't allocate memory for vhost dev\n", + dev->device_fh); + return -1; + } + vdev->dev = dev; + dev->priv = vdev; + /* Add device to main ll */ + ll_dev = get_data_ll_free_entry(&ll_root_free); + if (ll_dev == NULL) { + RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") No free entry found in" + " linked list Device limit of %d devices per core" + " has been reached\n", dev->device_fh, nb_devices); + if (vdev->regions_hpa) + rte_free(vdev->regions_hpa); + rte_free(vdev); + return -1; + } + ll_dev->vdev = vdev; + add_data_ll_entry(&ll_root_used, ll_dev); + vdev->rx_q = dev->device_fh; + + /* reset ready flag */ + vdev->ready = DEVICE_MAC_LEARNING; + vdev->remove = 0; + + /* Find a suitable lcore to add the device. */ + RTE_LCORE_FOREACH_SLAVE(lcore) { + if (lcore_info[lcore].lcore_ll->device_num < device_num_min) { + device_num_min = lcore_info[lcore].lcore_ll->device_num; + core_add = lcore; + } + } + /* Add device to lcore ll */ + ll_dev = get_data_ll_free_entry(&lcore_info[core_add].lcore_ll->ll_root_free); + if (ll_dev == NULL) { + RTE_LOG(INFO, VHOST_DATA, + "(%"PRIu64") Failed to add device to data core\n", + dev->device_fh); + vdev->ready = DEVICE_SAFE_REMOVE; + destroy_device(dev); + rte_free(vdev->regions_hpa); + rte_free(vdev); + return -1; + } + ll_dev->vdev = vdev; + vdev->coreid = core_add; + + add_data_ll_entry(&lcore_info[vdev->coreid].lcore_ll->ll_root_used, + ll_dev); + + /* Initialize device stats */ + memset(&dev_statistics[dev->device_fh], 0, + sizeof(struct device_statistics)); + + /* Disable notifications. */ + rte_vhost_enable_guest_notification(dev, VIRTIO_RXQ, 0); + rte_vhost_enable_guest_notification(dev, VIRTIO_TXQ, 0); + lcore_info[vdev->coreid].lcore_ll->device_num++; + dev->flags |= VIRTIO_DEV_RUNNING; + + RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") Device has been added to data core %d\n", + dev->device_fh, vdev->coreid); + + return 0; +} + +/** + * These callback allow devices to be added to the data core when configuration + * has been fully complete. + */ +static const struct virtio_net_device_ops virtio_net_device_ops = { + .new_device = new_device, + .destroy_device = destroy_device, +}; + +/** + * This is a thread will wake up after a period to print stats if the user has + * enabled them. + */ +static void +print_stats(void) +{ + struct virtio_net_data_ll *dev_ll; + uint64_t tx_dropped, rx_dropped; + uint64_t tx, tx_total, rx, rx_total, rx_ip_csum, rx_l4_csum; + uint32_t device_fh; + const char clr[] = { 27, '[', '2', 'J', '\0' }; + const char top_left[] = { 27, '[', '1', ';', '1', 'H', '\0' }; + + while (1) { + sleep(enable_stats); + + /* Clear screen and move to top left */ + printf("%s%s", clr, top_left); + + printf("\nDevice statistics ================================"); + + dev_ll = ll_root_used; + while (dev_ll != NULL) { + device_fh = (uint32_t)dev_ll->vdev->dev->device_fh; + tx_total = dev_statistics[device_fh].tx_total; + tx = dev_statistics[device_fh].tx; + tx_dropped = tx_total - tx; + + rx_total = rte_atomic64_read( + &dev_statistics[device_fh].rx_total_atomic); + rx = rte_atomic64_read( + &dev_statistics[device_fh].rx_atomic); + rx_dropped = rx_total - rx; + rx_ip_csum = rte_atomic64_read( + &dev_statistics[device_fh].rx_bad_ip_csum); + rx_l4_csum = rte_atomic64_read( + &dev_statistics[device_fh].rx_bad_l4_csum); + + printf("\nStatistics for device %"PRIu32" ----------" + "\nTX total: %"PRIu64"" + "\nTX dropped: %"PRIu64"" + "\nTX successful: %"PRIu64"" + "\nRX total: %"PRIu64"" + "\nRX bad IP csum: %"PRIu64"" + "\nRX bad L4 csum: %"PRIu64"" + "\nRX dropped: %"PRIu64"" + "\nRX successful: %"PRIu64"", + device_fh, + tx_total, + tx_dropped, + tx, + rx_total, + rx_ip_csum, + rx_l4_csum, + rx_dropped, + rx); + + dev_ll = dev_ll->next; + } + printf("\n================================================\n"); + } +} + +/** + * Main function, does initialisation and calls the per-lcore functions. The CUSE + * device is also registered here to handle the IOCTLs. + */ +int +main(int argc, char *argv[]) +{ + struct rte_mempool *mbuf_pool = NULL; + unsigned lcore_id, core_id = 0; + unsigned nb_ports, valid_nb_ports; + int ret; + uint8_t portid; + uint16_t queue_id; + static pthread_t tid; + char thread_name[RTE_MAX_THREAD_NAME_LEN]; + + /* init EAL */ + ret = rte_eal_init(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Error with EAL initialization\n"); + argc -= ret; + argv += ret; + + /* parse app arguments */ + ret = tep_termination_parse_args(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Invalid argument\n"); + + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) + if (rte_lcore_is_enabled(lcore_id)) + lcore_ids[core_id++] = lcore_id; + + /* set the number of swithcing cores available */ + nb_switching_cores = rte_lcore_count()-1; + + /* Get the number of physical ports. */ + nb_ports = rte_eth_dev_count(); + if (nb_ports > RTE_MAX_ETHPORTS) + nb_ports = RTE_MAX_ETHPORTS; + + /* + * Update the global var NB_PORTS and global array PORTS + * and get value of var VALID_NB_PORTS according to system ports number + */ + valid_nb_ports = check_ports_num(nb_ports); + + if ((valid_nb_ports == 0) || (valid_nb_ports > MAX_SUP_PORTS)) { + rte_exit(EXIT_FAILURE, "Current enabled port number is %u," + "but only %u port can be enabled\n", nb_ports, + MAX_SUP_PORTS); + } + /* Create the mbuf pool. */ + mbuf_pool = rte_mempool_create( + "MBUF_POOL", + NUM_MBUFS_PER_PORT + * valid_nb_ports, + MBUF_SIZE, MBUF_CACHE_SIZE, + sizeof(struct rte_pktmbuf_pool_private), + rte_pktmbuf_pool_init, NULL, + rte_pktmbuf_init, NULL, + rte_socket_id(), 0); + if (mbuf_pool == NULL) + rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n"); + + for (queue_id = 0; queue_id < MAX_QUEUES + 1; queue_id++) + vpool_array[queue_id].pool = mbuf_pool; + + /* Set log level. */ + rte_set_log_level(LOG_LEVEL); + + /* initialize all ports */ + for (portid = 0; portid < nb_ports; portid++) { + /* skip ports that are not enabled */ + if ((enabled_port_mask & (1 << portid)) == 0) { + RTE_LOG(INFO, VHOST_PORT, + "Skipping disabled port %d\n", portid); + continue; + } + if (overlay_options.port_configure(portid, mbuf_pool) != 0) + rte_exit(EXIT_FAILURE, + "Cannot initialize network ports\n"); + } + + /* Initialise all linked lists. */ + if (init_data_ll() == -1) + rte_exit(EXIT_FAILURE, "Failed to initialize linked list\n"); + + /* Initialize device stats */ + memset(&dev_statistics, 0, sizeof(dev_statistics)); + + /* Enable stats if the user option is set. */ + if (enable_stats) { + ret = pthread_create(&tid, NULL, (void *)print_stats, NULL); + if (ret != 0) + rte_exit(EXIT_FAILURE, "Cannot create print-stats thread\n"); + snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "print-stats"); + ret = rte_thread_setname(tid, thread_name); + if (ret != 0) + RTE_LOG(ERR, VHOST_CONFIG, "Cannot set print-stats name\n"); + } + + /* Launch all data cores. */ + RTE_LCORE_FOREACH_SLAVE(lcore_id) { + rte_eal_remote_launch(switch_worker, + mbuf_pool, lcore_id); + } + rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_MRG_RXBUF); + + /* Register CUSE device to handle IOCTLs. */ + ret = rte_vhost_driver_register((char *)&dev_basename); + if (ret != 0) + rte_exit(EXIT_FAILURE, "CUSE device setup failure.\n"); + + rte_vhost_driver_callback_register(&virtio_net_device_ops); + + /* Start CUSE session. */ + rte_vhost_driver_session_start(); + + return 0; +} diff --git a/examples/tep_termination/main.h b/examples/tep_termination/main.h new file mode 100644 index 00000000..a34301ad --- /dev/null +++ b/examples/tep_termination/main.h @@ -0,0 +1,129 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _MAIN_H_ +#define _MAIN_H_ + +#include <rte_ether.h> + +#ifdef DEBUG +#define LOG_LEVEL RTE_LOG_DEBUG +#define LOG_DEBUG(log_type, fmt, args...) RTE_LOG(DEBUG, log_type, fmt, ##args) +#else +#define LOG_LEVEL RTE_LOG_INFO +#define LOG_DEBUG(log_type, fmt, args...) do {} while (0) +#endif + +/* Macros for printing using RTE_LOG */ +#define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1 +#define RTE_LOGTYPE_VHOST_DATA RTE_LOGTYPE_USER2 +#define RTE_LOGTYPE_VHOST_PORT RTE_LOGTYPE_USER3 + +/* State of virtio device. */ +#define DEVICE_MAC_LEARNING 0 +#define DEVICE_RX 1 +#define DEVICE_SAFE_REMOVE 2 + +#define MAX_QUEUES 512 + +/* Max burst size for RX/TX */ +#define MAX_PKT_BURST 32 + +/* Max number of devices. Limited by the application. */ +#define MAX_DEVICES 64 + +/* Per-device statistics struct */ +struct device_statistics { + uint64_t tx_total; + rte_atomic64_t rx_total_atomic; + uint64_t rx_total; + uint64_t tx; + rte_atomic64_t rx_atomic; + /**< Bad inner IP csum for tunneling pkt */ + rte_atomic64_t rx_bad_ip_csum; + /**< Bad inner L4 csum for tunneling pkt */ + rte_atomic64_t rx_bad_l4_csum; +} __rte_cache_aligned; + +/** + * Device linked list structure for data path. + */ +struct vhost_dev { + /**< Pointer to device created by vhost lib. */ + struct virtio_net *dev; + /**< Number of memory regions for gpa to hpa translation. */ + uint32_t nregions_hpa; + /**< Memory region information for gpa to hpa translation. */ + struct virtio_memory_regions_hpa *regions_hpa; + /**< Device MAC address (Obtained on first TX packet). */ + struct ether_addr mac_address; + /**< RX queue number. */ + uint16_t rx_q; + /**< Data core that the device is added to. */ + uint16_t coreid; + /**< A device is set as ready if the MAC address has been set. */ + volatile uint8_t ready; + /**< Device is marked for removal from the data core. */ + volatile uint8_t remove; +} __rte_cache_aligned; + +/** + * Structure containing data core specific information. + */ +struct lcore_ll_info { + /**< Pointer to head in free linked list. */ + struct virtio_net_data_ll *ll_root_free; + /**< Pointer to head of used linked list. */ + struct virtio_net_data_ll *ll_root_used; + /**< Number of devices on lcore. */ + uint32_t device_num; + /**< Flag to synchronize device removal. */ + volatile uint8_t dev_removal_flag; +}; + +struct lcore_info { + /**< Pointer to data core specific lcore_ll_info struct */ + struct lcore_ll_info *lcore_ll; +}; + +struct virtio_net_data_ll { + /**< Pointer to device created by configuration core. */ + struct vhost_dev *vdev; + /**< Pointer to next device in linked list. */ + struct virtio_net_data_ll *next; +}; + +uint32_t +virtio_dev_rx(struct virtio_net *dev, struct rte_mbuf **pkts, uint32_t count); + +#endif /* _MAIN_H_ */ diff --git a/examples/tep_termination/vxlan.c b/examples/tep_termination/vxlan.c new file mode 100644 index 00000000..5ee1f956 --- /dev/null +++ b/examples/tep_termination/vxlan.c @@ -0,0 +1,259 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include <stdint.h> +#include <rte_mbuf.h> +#include <rte_hash_crc.h> +#include <rte_byteorder.h> +#include <rte_udp.h> +#include <rte_tcp.h> +#include <rte_sctp.h> + +#include "main.h" +#include "vxlan.h" + +static uint16_t +get_psd_sum(void *l3_hdr, uint16_t ethertype, uint64_t ol_flags) +{ + if (ethertype == ETHER_TYPE_IPv4) + return rte_ipv4_phdr_cksum(l3_hdr, ol_flags); + else /* assume ethertype == ETHER_TYPE_IPv6 */ + return rte_ipv6_phdr_cksum(l3_hdr, ol_flags); +} + +/** + * Parse an ethernet header to fill the ethertype, outer_l2_len, outer_l3_len and + * ipproto. This function is able to recognize IPv4/IPv6 with one optional vlan + * header. + */ +static void +parse_ethernet(struct ether_hdr *eth_hdr, union tunnel_offload_info *info, + uint8_t *l4_proto) +{ + struct ipv4_hdr *ipv4_hdr; + struct ipv6_hdr *ipv6_hdr; + uint16_t ethertype; + + info->outer_l2_len = sizeof(struct ether_hdr); + ethertype = rte_be_to_cpu_16(eth_hdr->ether_type); + + if (ethertype == ETHER_TYPE_VLAN) { + struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1); + info->outer_l2_len += sizeof(struct vlan_hdr); + ethertype = rte_be_to_cpu_16(vlan_hdr->eth_proto); + } + + switch (ethertype) { + case ETHER_TYPE_IPv4: + ipv4_hdr = (struct ipv4_hdr *) + ((char *)eth_hdr + info->outer_l2_len); + info->outer_l3_len = sizeof(struct ipv4_hdr); + *l4_proto = ipv4_hdr->next_proto_id; + break; + case ETHER_TYPE_IPv6: + ipv6_hdr = (struct ipv6_hdr *) + ((char *)eth_hdr + info->outer_l2_len); + info->outer_l3_len = sizeof(struct ipv6_hdr); + *l4_proto = ipv6_hdr->proto; + break; + default: + info->outer_l3_len = 0; + *l4_proto = 0; + break; + } +} + +/** + * Calculate the checksum of a packet in hardware + */ +static uint64_t +process_inner_cksums(struct ether_hdr *eth_hdr, union tunnel_offload_info *info) +{ + void *l3_hdr = NULL; + uint8_t l4_proto; + uint16_t ethertype; + struct ipv4_hdr *ipv4_hdr; + struct ipv6_hdr *ipv6_hdr; + struct udp_hdr *udp_hdr; + struct tcp_hdr *tcp_hdr; + struct sctp_hdr *sctp_hdr; + uint64_t ol_flags = 0; + + info->l2_len = sizeof(struct ether_hdr); + ethertype = rte_be_to_cpu_16(eth_hdr->ether_type); + + if (ethertype == ETHER_TYPE_VLAN) { + struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1); + info->l2_len += sizeof(struct vlan_hdr); + ethertype = rte_be_to_cpu_16(vlan_hdr->eth_proto); + } + + l3_hdr = (char *)eth_hdr + info->l2_len; + + if (ethertype == ETHER_TYPE_IPv4) { + ipv4_hdr = (struct ipv4_hdr *)l3_hdr; + ipv4_hdr->hdr_checksum = 0; + ol_flags |= PKT_TX_IPV4; + ol_flags |= PKT_TX_IP_CKSUM; + info->l3_len = sizeof(struct ipv4_hdr); + l4_proto = ipv4_hdr->next_proto_id; + } else if (ethertype == ETHER_TYPE_IPv6) { + ipv6_hdr = (struct ipv6_hdr *)l3_hdr; + info->l3_len = sizeof(struct ipv6_hdr); + l4_proto = ipv6_hdr->proto; + ol_flags |= PKT_TX_IPV6; + } else + return 0; /* packet type not supported, nothing to do */ + + if (l4_proto == IPPROTO_UDP) { + udp_hdr = (struct udp_hdr *)((char *)l3_hdr + info->l3_len); + ol_flags |= PKT_TX_UDP_CKSUM; + udp_hdr->dgram_cksum = get_psd_sum(l3_hdr, + ethertype, ol_flags); + } else if (l4_proto == IPPROTO_TCP) { + tcp_hdr = (struct tcp_hdr *)((char *)l3_hdr + info->l3_len); + ol_flags |= PKT_TX_TCP_CKSUM; + tcp_hdr->cksum = get_psd_sum(l3_hdr, ethertype, + ol_flags); + if (tso_segsz != 0) { + ol_flags |= PKT_TX_TCP_SEG; + info->tso_segsz = tso_segsz; + info->l4_len = sizeof(struct tcp_hdr); + } + + } else if (l4_proto == IPPROTO_SCTP) { + sctp_hdr = (struct sctp_hdr *)((char *)l3_hdr + info->l3_len); + sctp_hdr->cksum = 0; + ol_flags |= PKT_TX_SCTP_CKSUM; + } + + return ol_flags; +} + +int +decapsulation(struct rte_mbuf *pkt) +{ + uint8_t l4_proto = 0; + uint16_t outer_header_len; + struct udp_hdr *udp_hdr; + union tunnel_offload_info info = { .data = 0 }; + struct ether_hdr *phdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *); + + parse_ethernet(phdr, &info, &l4_proto); + + if (l4_proto != IPPROTO_UDP) + return -1; + + udp_hdr = (struct udp_hdr *)((char *)phdr + + info.outer_l2_len + info.outer_l3_len); + + /** check udp destination port, 4789 is the default vxlan port + * (rfc7348) or that the rx offload flag is set (i40e only + * currently)*/ + if (udp_hdr->dst_port != rte_cpu_to_be_16(DEFAULT_VXLAN_PORT) && + (pkt->packet_type & RTE_PTYPE_TUNNEL_MASK) == 0) + return -1; + outer_header_len = info.outer_l2_len + info.outer_l3_len + + sizeof(struct udp_hdr) + sizeof(struct vxlan_hdr); + + rte_pktmbuf_adj(pkt, outer_header_len); + + return 0; +} + +void +encapsulation(struct rte_mbuf *m, uint8_t queue_id) +{ + uint vport_id; + uint64_t ol_flags = 0; + uint32_t old_len = m->pkt_len, hash; + union tunnel_offload_info tx_offload = { .data = 0 }; + struct ether_hdr *phdr = rte_pktmbuf_mtod(m, struct ether_hdr *); + + /*Allocate space for new ethernet, IPv4, UDP and VXLAN headers*/ + struct ether_hdr *pneth = (struct ether_hdr *) rte_pktmbuf_prepend(m, + sizeof(struct ether_hdr) + sizeof(struct ipv4_hdr) + + sizeof(struct udp_hdr) + sizeof(struct vxlan_hdr)); + + struct ipv4_hdr *ip = (struct ipv4_hdr *) &pneth[1]; + struct udp_hdr *udp = (struct udp_hdr *) &ip[1]; + struct vxlan_hdr *vxlan = (struct vxlan_hdr *) &udp[1]; + + /* convert TX queue ID to vport ID */ + vport_id = queue_id - 1; + + /* replace original Ethernet header with ours */ + pneth = rte_memcpy(pneth, &app_l2_hdr[vport_id], + sizeof(struct ether_hdr)); + + /* copy in IP header */ + ip = rte_memcpy(ip, &app_ip_hdr[vport_id], + sizeof(struct ipv4_hdr)); + ip->total_length = rte_cpu_to_be_16(m->data_len + - sizeof(struct ether_hdr)); + + /* outer IP checksum */ + ol_flags |= PKT_TX_OUTER_IP_CKSUM; + ip->hdr_checksum = 0; + + /* inner IP checksum offload */ + if (tx_checksum) { + ol_flags |= process_inner_cksums(phdr, &tx_offload); + m->l2_len = tx_offload.l2_len; + m->l3_len = tx_offload.l3_len; + m->l4_len = tx_offload.l4_len; + m->l2_len += ETHER_VXLAN_HLEN; + } + + m->outer_l2_len = sizeof(struct ether_hdr); + m->outer_l3_len = sizeof(struct ipv4_hdr); + + m->ol_flags |= ol_flags; + m->tso_segsz = tx_offload.tso_segsz; + + /*VXLAN HEADER*/ + vxlan->vx_flags = rte_cpu_to_be_32(VXLAN_HF_VNI); + vxlan->vx_vni = rte_cpu_to_be_32(vxdev.out_key << 8); + + /*UDP HEADER*/ + udp->dgram_cksum = 0; + udp->dgram_len = rte_cpu_to_be_16(old_len + + sizeof(struct udp_hdr) + + sizeof(struct vxlan_hdr)); + + udp->dst_port = rte_cpu_to_be_16(vxdev.dst_port); + hash = rte_hash_crc(phdr, 2 * ETHER_ADDR_LEN, phdr->ether_type); + udp->src_port = rte_cpu_to_be_16((((uint64_t) hash * PORT_RANGE) >> 32) + + PORT_MIN); + + return; +} diff --git a/examples/tep_termination/vxlan.h b/examples/tep_termination/vxlan.h new file mode 100644 index 00000000..4242e111 --- /dev/null +++ b/examples/tep_termination/vxlan.h @@ -0,0 +1,86 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _VXLAN_H_ +#define _VXLAN_H_ + +#include <rte_ether.h> +#include <rte_ip.h> + +#define PORT_MIN 49152 +#define PORT_MAX 65535 +#define PORT_RANGE ((PORT_MAX - PORT_MIN) + 1) + +#define VXLAN_N_PORTS 2 +#define VXLAN_HF_VNI 0x08000000 +#define DEFAULT_VXLAN_PORT 4789 + +extern struct ipv4_hdr app_ip_hdr[VXLAN_N_PORTS]; +extern struct ether_hdr app_l2_hdr[VXLAN_N_PORTS]; +extern uint8_t tx_checksum; +extern uint16_t tso_segsz; + +struct vxlan_port { + uint32_t vport_id; /**< VirtIO port id */ + uint32_t peer_ip; /**< remote VTEP IP address */ + struct ether_addr peer_mac; /**< remote VTEP MAC address */ + struct ether_addr vport_mac; /**< VirtIO port MAC address */ +} __rte_cache_aligned; + +struct vxlan_conf { + uint16_t dst_port; /**< VXLAN UDP destination port */ + uint32_t port_ip; /**< DPDK port IP address*/ + uint32_t in_key; /**< VLAN ID */ + uint32_t out_key; /**< VXLAN VNI */ + struct vxlan_port port[VXLAN_N_PORTS]; /**< VXLAN configuration */ +} __rte_cache_aligned; + +extern struct vxlan_conf vxdev; + +/* structure that caches offload info for the current packet */ +union tunnel_offload_info { + uint64_t data; + struct { + uint64_t l2_len:7; /**< L2 (MAC) Header Length. */ + uint64_t l3_len:9; /**< L3 (IP) Header Length. */ + uint64_t l4_len:8; /**< L4 Header Length. */ + uint64_t tso_segsz:16; /**< TCP TSO segment size */ + uint64_t outer_l2_len:7; /**< outer L2 Header Length */ + uint64_t outer_l3_len:16; /**< outer L3 Header Length */ + }; +} __rte_cache_aligned; + +int decapsulation(struct rte_mbuf *pkt); +void encapsulation(struct rte_mbuf *m, uint8_t queue_id); + +#endif /* _VXLAN_H_ */ diff --git a/examples/tep_termination/vxlan_setup.c b/examples/tep_termination/vxlan_setup.c new file mode 100644 index 00000000..2a48e142 --- /dev/null +++ b/examples/tep_termination/vxlan_setup.c @@ -0,0 +1,457 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <getopt.h> +#include <linux/if_ether.h> +#include <linux/if_vlan.h> +#include <linux/virtio_net.h> +#include <linux/virtio_ring.h> +#include <sys/param.h> +#include <unistd.h> + +#include <rte_ethdev.h> +#include <rte_log.h> +#include <rte_string_fns.h> +#include <rte_mbuf.h> +#include <rte_malloc.h> +#include <rte_ip.h> +#include <rte_udp.h> +#include <rte_tcp.h> + +#include "main.h" +#include "rte_virtio_net.h" +#include "vxlan.h" +#include "vxlan_setup.h" + +#define IPV4_HEADER_LEN 20 +#define UDP_HEADER_LEN 8 +#define VXLAN_HEADER_LEN 8 + +#define IP_VERSION 0x40 +#define IP_HDRLEN 0x05 /* default IP header length == five 32-bits words. */ +#define IP_DEFTTL 64 /* from RFC 1340. */ +#define IP_VHL_DEF (IP_VERSION | IP_HDRLEN) + +#define IP_DN_FRAGMENT_FLAG 0x0040 + +/* Used to compare MAC addresses. */ +#define MAC_ADDR_CMP 0xFFFFFFFFFFFFULL + +/* Configurable number of RX/TX ring descriptors */ +#define RTE_TEST_RX_DESC_DEFAULT 1024 +#define RTE_TEST_TX_DESC_DEFAULT 512 + +/* Default inner VLAN ID */ +#define INNER_VLAN_ID 100 + +/* VXLAN device */ +struct vxlan_conf vxdev; + +struct ipv4_hdr app_ip_hdr[VXLAN_N_PORTS]; +struct ether_hdr app_l2_hdr[VXLAN_N_PORTS]; + +/* local VTEP IP address */ +uint8_t vxlan_multicast_ips[2][4] = { {239, 1, 1, 1 }, {239, 1, 2, 1 } }; + +/* Remote VTEP IP address */ +uint8_t vxlan_overlay_ips[2][4] = { {192, 168, 10, 1}, {192, 168, 30, 1} }; + +/* Remote VTEP MAC address */ +uint8_t peer_mac[6] = {0x00, 0x11, 0x01, 0x00, 0x00, 0x01}; + +/* VXLAN RX filter type */ +uint8_t tep_filter_type[] = {RTE_TUNNEL_FILTER_IMAC_TENID, + RTE_TUNNEL_FILTER_IMAC_IVLAN_TENID, + RTE_TUNNEL_FILTER_OMAC_TENID_IMAC,}; + +/* Options for configuring ethernet port */ +static const struct rte_eth_conf port_conf = { + .rxmode = { + .split_hdr_size = 0, + .header_split = 0, /**< Header Split disabled */ + .hw_ip_checksum = 0, /**< IP checksum offload disabled */ + .hw_vlan_filter = 0, /**< VLAN filtering disabled */ + .jumbo_frame = 0, /**< Jumbo Frame Support disabled */ + .hw_strip_crc = 0, /**< CRC stripped by hardware */ + }, + .txmode = { + .mq_mode = ETH_MQ_TX_NONE, + }, +}; + +/** + * The one or two device(s) that belongs to the same tenant ID can + * be assigned in a VM. + */ +const uint16_t tenant_id_conf[] = { + 1000, 1000, 1001, 1001, 1002, 1002, 1003, 1003, + 1004, 1004, 1005, 1005, 1006, 1006, 1007, 1007, + 1008, 1008, 1009, 1009, 1010, 1010, 1011, 1011, + 1012, 1012, 1013, 1013, 1014, 1014, 1015, 1015, + 1016, 1016, 1017, 1017, 1018, 1018, 1019, 1019, + 1020, 1020, 1021, 1021, 1022, 1022, 1023, 1023, + 1024, 1024, 1025, 1025, 1026, 1026, 1027, 1027, + 1028, 1028, 1029, 1029, 1030, 1030, 1031, 1031, +}; + +/** + * Initialises a given port using global settings and with the rx buffers + * coming from the mbuf_pool passed as parameter + */ +int +vxlan_port_init(uint8_t port, struct rte_mempool *mbuf_pool) +{ + int retval; + uint16_t q; + struct rte_eth_dev_info dev_info; + uint16_t rx_rings, tx_rings = (uint16_t)rte_lcore_count(); + const uint16_t rx_ring_size = RTE_TEST_RX_DESC_DEFAULT; + const uint16_t tx_ring_size = RTE_TEST_TX_DESC_DEFAULT; + struct rte_eth_udp_tunnel tunnel_udp; + struct rte_eth_rxconf *rxconf; + struct rte_eth_txconf *txconf; + struct vxlan_conf *pconf = &vxdev; + + pconf->dst_port = udp_port; + + rte_eth_dev_info_get(port, &dev_info); + + if (dev_info.max_rx_queues > MAX_QUEUES) { + rte_exit(EXIT_FAILURE, + "please define MAX_QUEUES no less than %u in %s\n", + dev_info.max_rx_queues, __FILE__); + } + + rxconf = &dev_info.default_rxconf; + txconf = &dev_info.default_txconf; + txconf->txq_flags = 0; + + if (port >= rte_eth_dev_count()) + return -1; + + rx_rings = nb_devices; + + /* Configure ethernet device. */ + retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf); + if (retval != 0) + return retval; + + /* Setup the queues. */ + for (q = 0; q < rx_rings; q++) { + retval = rte_eth_rx_queue_setup(port, q, rx_ring_size, + rte_eth_dev_socket_id(port), + rxconf, + mbuf_pool); + if (retval < 0) + return retval; + } + for (q = 0; q < tx_rings; q++) { + retval = rte_eth_tx_queue_setup(port, q, tx_ring_size, + rte_eth_dev_socket_id(port), + txconf); + if (retval < 0) + return retval; + } + + /* Start the device. */ + retval = rte_eth_dev_start(port); + if (retval < 0) + return retval; + + /* Configure UDP port for UDP tunneling */ + tunnel_udp.udp_port = udp_port; + tunnel_udp.prot_type = RTE_TUNNEL_TYPE_VXLAN; + retval = rte_eth_dev_udp_tunnel_port_add(port, &tunnel_udp); + if (retval < 0) + return retval; + rte_eth_macaddr_get(port, &ports_eth_addr[port]); + RTE_LOG(INFO, PORT, "Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8 + " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n", + (unsigned)port, + ports_eth_addr[port].addr_bytes[0], + ports_eth_addr[port].addr_bytes[1], + ports_eth_addr[port].addr_bytes[2], + ports_eth_addr[port].addr_bytes[3], + ports_eth_addr[port].addr_bytes[4], + ports_eth_addr[port].addr_bytes[5]); + + if (tso_segsz != 0) { + struct rte_eth_dev_info dev_info; + rte_eth_dev_info_get(port, &dev_info); + if ((dev_info.tx_offload_capa & DEV_TX_OFFLOAD_TCP_TSO) == 0) + RTE_LOG(WARNING, PORT, + "hardware TSO offload is not supported\n"); + } + return 0; +} + +static int +vxlan_rx_process(struct rte_mbuf *pkt) +{ + int ret = 0; + + if (rx_decap) + ret = decapsulation(pkt); + + return ret; +} + +static void +vxlan_tx_process(uint8_t queue_id, struct rte_mbuf *pkt) +{ + if (tx_encap) + encapsulation(pkt, queue_id); + + return; +} + +/* + * This function learns the MAC address of the device and set init + * L2 header and L3 header info. + */ +int +vxlan_link(struct vhost_dev *vdev, struct rte_mbuf *m) +{ + int i, ret; + struct ether_hdr *pkt_hdr; + struct virtio_net *dev = vdev->dev; + uint64_t portid = dev->device_fh; + struct ipv4_hdr *ip; + + struct rte_eth_tunnel_filter_conf tunnel_filter_conf; + + if (unlikely(portid > VXLAN_N_PORTS)) { + RTE_LOG(INFO, VHOST_DATA, + "(%"PRIu64") WARNING: Not configuring device," + "as already have %d ports for VXLAN.", + dev->device_fh, VXLAN_N_PORTS); + return -1; + } + + /* Learn MAC address of guest device from packet */ + pkt_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); + if (is_same_ether_addr(&(pkt_hdr->s_addr), &vdev->mac_address)) { + RTE_LOG(INFO, VHOST_DATA, + "(%"PRIu64") WARNING: This device is using an existing" + " MAC address and has not been registered.\n", + dev->device_fh); + return -1; + } + + for (i = 0; i < ETHER_ADDR_LEN; i++) { + vdev->mac_address.addr_bytes[i] = + vxdev.port[portid].vport_mac.addr_bytes[i] = + pkt_hdr->s_addr.addr_bytes[i]; + vxdev.port[portid].peer_mac.addr_bytes[i] = peer_mac[i]; + } + + memset(&tunnel_filter_conf, 0, + sizeof(struct rte_eth_tunnel_filter_conf)); + + ether_addr_copy(&ports_eth_addr[0], &tunnel_filter_conf.outer_mac); + tunnel_filter_conf.filter_type = tep_filter_type[filter_idx]; + + /* inner MAC */ + ether_addr_copy(&vdev->mac_address, &tunnel_filter_conf.inner_mac); + + tunnel_filter_conf.queue_id = vdev->rx_q; + tunnel_filter_conf.tenant_id = tenant_id_conf[vdev->rx_q]; + + if (tep_filter_type[filter_idx] == RTE_TUNNEL_FILTER_IMAC_IVLAN_TENID) + tunnel_filter_conf.inner_vlan = INNER_VLAN_ID; + + tunnel_filter_conf.tunnel_type = RTE_TUNNEL_TYPE_VXLAN; + + ret = rte_eth_dev_filter_ctrl(ports[0], + RTE_ETH_FILTER_TUNNEL, + RTE_ETH_FILTER_ADD, + &tunnel_filter_conf); + if (ret) { + RTE_LOG(ERR, VHOST_DATA, + "%d Failed to add device MAC address to cloud filter\n", + vdev->rx_q); + return -1; + } + + /* Print out inner MAC and VNI info. */ + RTE_LOG(INFO, VHOST_DATA, + "(%d) MAC_ADDRESS %02x:%02x:%02x:%02x:%02x:%02x and VNI %d registered\n", + vdev->rx_q, + vdev->mac_address.addr_bytes[0], + vdev->mac_address.addr_bytes[1], + vdev->mac_address.addr_bytes[2], + vdev->mac_address.addr_bytes[3], + vdev->mac_address.addr_bytes[4], + vdev->mac_address.addr_bytes[5], + tenant_id_conf[vdev->rx_q]); + + vxdev.port[portid].vport_id = portid; + + for (i = 0; i < 4; i++) { + /* Local VTEP IP */ + vxdev.port_ip |= vxlan_multicast_ips[portid][i] << (8 * i); + /* Remote VTEP IP */ + vxdev.port[portid].peer_ip |= + vxlan_overlay_ips[portid][i] << (8 * i); + } + + vxdev.out_key = tenant_id_conf[vdev->rx_q]; + ether_addr_copy(&vxdev.port[portid].peer_mac, + &app_l2_hdr[portid].d_addr); + ether_addr_copy(&ports_eth_addr[0], + &app_l2_hdr[portid].s_addr); + app_l2_hdr[portid].ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4); + + ip = &app_ip_hdr[portid]; + ip->version_ihl = IP_VHL_DEF; + ip->type_of_service = 0; + ip->total_length = 0; + ip->packet_id = 0; + ip->fragment_offset = IP_DN_FRAGMENT_FLAG; + ip->time_to_live = IP_DEFTTL; + ip->next_proto_id = IPPROTO_UDP; + ip->hdr_checksum = 0; + ip->src_addr = vxdev.port_ip; + ip->dst_addr = vxdev.port[portid].peer_ip; + + /* Set device as ready for RX. */ + vdev->ready = DEVICE_RX; + + return 0; +} + +/** + * Removes cloud filter. Ensures that nothing is adding buffers to the RX + * queue before disabling RX on the device. + */ +void +vxlan_unlink(struct vhost_dev *vdev) +{ + unsigned i = 0, rx_count; + int ret; + struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; + struct rte_eth_tunnel_filter_conf tunnel_filter_conf; + + if (vdev->ready == DEVICE_RX) { + memset(&tunnel_filter_conf, 0, + sizeof(struct rte_eth_tunnel_filter_conf)); + + ether_addr_copy(&ports_eth_addr[0], &tunnel_filter_conf.outer_mac); + ether_addr_copy(&vdev->mac_address, &tunnel_filter_conf.inner_mac); + tunnel_filter_conf.tenant_id = tenant_id_conf[vdev->rx_q]; + tunnel_filter_conf.filter_type = tep_filter_type[filter_idx]; + + if (tep_filter_type[filter_idx] == + RTE_TUNNEL_FILTER_IMAC_IVLAN_TENID) + tunnel_filter_conf.inner_vlan = INNER_VLAN_ID; + + tunnel_filter_conf.queue_id = vdev->rx_q; + tunnel_filter_conf.tunnel_type = RTE_TUNNEL_TYPE_VXLAN; + + ret = rte_eth_dev_filter_ctrl(ports[0], + RTE_ETH_FILTER_TUNNEL, + RTE_ETH_FILTER_DELETE, + &tunnel_filter_conf); + if (ret) { + RTE_LOG(ERR, VHOST_DATA, + "%d Failed to add device MAC address to cloud filter\n", + vdev->rx_q); + return; + } + for (i = 0; i < ETHER_ADDR_LEN; i++) + vdev->mac_address.addr_bytes[i] = 0; + + /* Clear out the receive buffers */ + rx_count = rte_eth_rx_burst(ports[0], + (uint16_t)vdev->rx_q, + pkts_burst, MAX_PKT_BURST); + + while (rx_count) { + for (i = 0; i < rx_count; i++) + rte_pktmbuf_free(pkts_burst[i]); + + rx_count = rte_eth_rx_burst(ports[0], + (uint16_t)vdev->rx_q, + pkts_burst, MAX_PKT_BURST); + } + vdev->ready = DEVICE_MAC_LEARNING; + } +} + +/* Transmit packets after encapsulating */ +int +vxlan_tx_pkts(uint8_t port_id, uint16_t queue_id, + struct rte_mbuf **tx_pkts, uint16_t nb_pkts) { + int ret = 0; + uint16_t i; + + for (i = 0; i < nb_pkts; i++) + vxlan_tx_process(queue_id, tx_pkts[i]); + + ret = rte_eth_tx_burst(port_id, queue_id, tx_pkts, nb_pkts); + + return ret; +} + +/* Check for decapsulation and pass packets directly to VIRTIO device */ +int +vxlan_rx_pkts(struct virtio_net *dev, struct rte_mbuf **pkts_burst, + uint32_t rx_count) +{ + uint32_t i = 0; + uint32_t count = 0; + int ret; + struct rte_mbuf *pkts_valid[rx_count]; + + for (i = 0; i < rx_count; i++) { + if (enable_stats) { + rte_atomic64_add( + &dev_statistics[dev->device_fh].rx_bad_ip_csum, + (pkts_burst[i]->ol_flags & PKT_RX_IP_CKSUM_BAD) + != 0); + rte_atomic64_add( + &dev_statistics[dev->device_fh].rx_bad_ip_csum, + (pkts_burst[i]->ol_flags & PKT_RX_L4_CKSUM_BAD) + != 0); + } + ret = vxlan_rx_process(pkts_burst[i]); + if (unlikely(ret < 0)) + continue; + + pkts_valid[count] = pkts_burst[i]; + count++; + } + + ret = rte_vhost_enqueue_burst(dev, VIRTIO_RXQ, pkts_valid, count); + return ret; +} diff --git a/examples/tep_termination/vxlan_setup.h b/examples/tep_termination/vxlan_setup.h new file mode 100644 index 00000000..1846540f --- /dev/null +++ b/examples/tep_termination/vxlan_setup.h @@ -0,0 +1,87 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef VXLAN_SETUP_H_ +#define VXLAN_SETUP_H_ + +extern uint16_t nb_devices; +extern uint16_t udp_port; +extern uint8_t filter_idx; +extern uint8_t ports[RTE_MAX_ETHPORTS]; +extern struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; +extern uint32_t enable_stats; +extern struct device_statistics dev_statistics[MAX_DEVICES]; +extern uint8_t rx_decap; +extern uint8_t tx_encap; + +typedef int (*ol_port_configure_t)(uint8_t port, + struct rte_mempool *mbuf_pool); + +typedef int (*ol_tunnel_setup_t)(struct vhost_dev *vdev, + struct rte_mbuf *m); + +typedef void (*ol_tunnel_destroy_t)(struct vhost_dev *vdev); + +typedef int (*ol_tx_handle_t)(uint8_t port_id, uint16_t queue_id, + struct rte_mbuf **tx_pkts, uint16_t nb_pkts); + +typedef int (*ol_rx_handle_t)(struct virtio_net *dev, struct rte_mbuf **pkts, + uint32_t count); + +typedef int (*ol_param_handle)(struct virtio_net *dev); + +struct ol_switch_ops { + ol_port_configure_t port_configure; + ol_tunnel_setup_t tunnel_setup; + ol_tunnel_destroy_t tunnel_destroy; + ol_tx_handle_t tx_handle; + ol_rx_handle_t rx_handle; + ol_param_handle param_handle; +}; + +int +vxlan_port_init(uint8_t port, struct rte_mempool *mbuf_pool); + +int +vxlan_link(struct vhost_dev *vdev, struct rte_mbuf *m); + +void +vxlan_unlink(struct vhost_dev *vdev); + +int +vxlan_tx_pkts(uint8_t port_id, uint16_t queue_id, + struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +int +vxlan_rx_pkts(struct virtio_net *dev, struct rte_mbuf **pkts, uint32_t count); + +#endif /* VXLAN_SETUP_H_ */ diff --git a/examples/timer/Makefile b/examples/timer/Makefile new file mode 100644 index 00000000..af12b7ba --- /dev/null +++ b/examples/timer/Makefile @@ -0,0 +1,56 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overriden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +# binary name +APP = timer + +# all source are stored in SRCS-y +SRCS-y := main.c + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) + +# workaround for a gcc bug with noreturn attribute +# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603 +ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y) +CFLAGS_main.o += -Wno-return-type +endif + +include $(RTE_SDK)/mk/rte.extapp.mk diff --git a/examples/timer/main.c b/examples/timer/main.c new file mode 100644 index 00000000..37ad559e --- /dev/null +++ b/examples/timer/main.c @@ -0,0 +1,151 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdio.h> +#include <string.h> +#include <stdint.h> +#include <errno.h> +#include <sys/queue.h> + +#include <rte_common.h> +#include <rte_memory.h> +#include <rte_memzone.h> +#include <rte_launch.h> +#include <rte_eal.h> +#include <rte_per_lcore.h> +#include <rte_lcore.h> +#include <rte_cycles.h> +#include <rte_timer.h> +#include <rte_debug.h> + +#define TIMER_RESOLUTION_CYCLES 20000000ULL /* around 10ms at 2 Ghz */ + +static struct rte_timer timer0; +static struct rte_timer timer1; + +/* timer0 callback */ +static void +timer0_cb(__attribute__((unused)) struct rte_timer *tim, + __attribute__((unused)) void *arg) +{ + static unsigned counter = 0; + unsigned lcore_id = rte_lcore_id(); + + printf("%s() on lcore %u\n", __func__, lcore_id); + + /* this timer is automatically reloaded until we decide to + * stop it, when counter reaches 20. */ + if ((counter ++) == 20) + rte_timer_stop(tim); +} + +/* timer1 callback */ +static void +timer1_cb(__attribute__((unused)) struct rte_timer *tim, + __attribute__((unused)) void *arg) +{ + unsigned lcore_id = rte_lcore_id(); + uint64_t hz; + + printf("%s() on lcore %u\n", __func__, lcore_id); + + /* reload it on another lcore */ + hz = rte_get_timer_hz(); + lcore_id = rte_get_next_lcore(lcore_id, 0, 1); + rte_timer_reset(tim, hz/3, SINGLE, lcore_id, timer1_cb, NULL); +} + +static __attribute__((noreturn)) int +lcore_mainloop(__attribute__((unused)) void *arg) +{ + uint64_t prev_tsc = 0, cur_tsc, diff_tsc; + unsigned lcore_id; + + lcore_id = rte_lcore_id(); + printf("Starting mainloop on core %u\n", lcore_id); + + while (1) { + /* + * Call the timer handler on each core: as we don't + * need a very precise timer, so only call + * rte_timer_manage() every ~10ms (at 2Ghz). In a real + * application, this will enhance performances as + * reading the HPET timer is not efficient. + */ + cur_tsc = rte_rdtsc(); + diff_tsc = cur_tsc - prev_tsc; + if (diff_tsc > TIMER_RESOLUTION_CYCLES) { + rte_timer_manage(); + prev_tsc = cur_tsc; + } + } +} + +int +main(int argc, char **argv) +{ + int ret; + uint64_t hz; + unsigned lcore_id; + + /* init EAL */ + ret = rte_eal_init(argc, argv); + if (ret < 0) + rte_panic("Cannot init EAL\n"); + + /* init RTE timer library */ + rte_timer_subsystem_init(); + + /* init timer structures */ + rte_timer_init(&timer0); + rte_timer_init(&timer1); + + /* load timer0, every second, on master lcore, reloaded automatically */ + hz = rte_get_timer_hz(); + lcore_id = rte_lcore_id(); + rte_timer_reset(&timer0, hz, PERIODICAL, lcore_id, timer0_cb, NULL); + + /* load timer1, every second/3, on next lcore, reloaded manually */ + lcore_id = rte_get_next_lcore(lcore_id, 0, 1); + rte_timer_reset(&timer1, hz/3, SINGLE, lcore_id, timer1_cb, NULL); + + /* call lcore_mainloop() on every slave lcore */ + RTE_LCORE_FOREACH_SLAVE(lcore_id) { + rte_eal_remote_launch(lcore_mainloop, NULL, lcore_id); + } + + /* call it on master lcore too */ + (void) lcore_mainloop(NULL); + + return 0; +} diff --git a/examples/vhost/Makefile b/examples/vhost/Makefile new file mode 100644 index 00000000..e95c68ae --- /dev/null +++ b/examples/vhost/Makefile @@ -0,0 +1,59 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overriden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +ifneq ($(CONFIG_RTE_EXEC_ENV),"linuxapp") +$(info This application can only operate in a linuxapp environment, \ +please change the definition of the RTE_TARGET environment variable) +all: +else + +# binary name +APP = vhost-switch + +# all source are stored in SRCS-y +SRCS-y := main.c + +CFLAGS += -O2 -D_FILE_OFFSET_BITS=64 +CFLAGS += $(WERROR_FLAGS) +CFLAGS += -D_GNU_SOURCE + +include $(RTE_SDK)/mk/rte.extapp.mk + +endif diff --git a/examples/vhost/main.c b/examples/vhost/main.c new file mode 100644 index 00000000..28c17afd --- /dev/null +++ b/examples/vhost/main.c @@ -0,0 +1,3157 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <arpa/inet.h> +#include <getopt.h> +#include <linux/if_ether.h> +#include <linux/if_vlan.h> +#include <linux/virtio_net.h> +#include <linux/virtio_ring.h> +#include <signal.h> +#include <stdint.h> +#include <sys/eventfd.h> +#include <sys/param.h> +#include <unistd.h> + +#include <rte_atomic.h> +#include <rte_cycles.h> +#include <rte_ethdev.h> +#include <rte_log.h> +#include <rte_string_fns.h> +#include <rte_malloc.h> +#include <rte_virtio_net.h> +#include <rte_ip.h> +#include <rte_tcp.h> + +#include "main.h" + +#ifndef MAX_QUEUES +#define MAX_QUEUES 128 +#endif + +/* the maximum number of external ports supported */ +#define MAX_SUP_PORTS 1 + +/* + * Calculate the number of buffers needed per port + */ +#define NUM_MBUFS_PER_PORT ((MAX_QUEUES*RTE_TEST_RX_DESC_DEFAULT) + \ + (num_switching_cores*MAX_PKT_BURST) + \ + (num_switching_cores*RTE_TEST_TX_DESC_DEFAULT) +\ + ((num_switching_cores+1)*MBUF_CACHE_SIZE)) + +#define MBUF_CACHE_SIZE 128 +#define MBUF_DATA_SIZE RTE_MBUF_DEFAULT_BUF_SIZE + +/* + * No frame data buffer allocated from host are required for zero copy + * implementation, guest will allocate the frame data buffer, and vhost + * directly use it. + */ +#define VIRTIO_DESCRIPTOR_LEN_ZCP RTE_MBUF_DEFAULT_DATAROOM +#define MBUF_DATA_SIZE_ZCP RTE_MBUF_DEFAULT_BUF_SIZE +#define MBUF_CACHE_SIZE_ZCP 0 + +#define MAX_PKT_BURST 32 /* Max burst size for RX/TX */ +#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */ + +#define BURST_RX_WAIT_US 15 /* Defines how long we wait between retries on RX */ +#define BURST_RX_RETRIES 4 /* Number of retries on RX. */ + +#define JUMBO_FRAME_MAX_SIZE 0x2600 + +/* State of virtio device. */ +#define DEVICE_MAC_LEARNING 0 +#define DEVICE_RX 1 +#define DEVICE_SAFE_REMOVE 2 + +/* Config_core_flag status definitions. */ +#define REQUEST_DEV_REMOVAL 1 +#define ACK_DEV_REMOVAL 0 + +/* Configurable number of RX/TX ring descriptors */ +#define RTE_TEST_RX_DESC_DEFAULT 1024 +#define RTE_TEST_TX_DESC_DEFAULT 512 + +/* + * Need refine these 2 macros for legacy and DPDK based front end: + * Max vring avail descriptor/entries from guest - MAX_PKT_BURST + * And then adjust power 2. + */ +/* + * For legacy front end, 128 descriptors, + * half for virtio header, another half for mbuf. + */ +#define RTE_TEST_RX_DESC_DEFAULT_ZCP 32 /* legacy: 32, DPDK virt FE: 128. */ +#define RTE_TEST_TX_DESC_DEFAULT_ZCP 64 /* legacy: 64, DPDK virt FE: 64. */ + +/* Get first 4 bytes in mbuf headroom. */ +#define MBUF_HEADROOM_UINT32(mbuf) (*(uint32_t *)((uint8_t *)(mbuf) \ + + sizeof(struct rte_mbuf))) + +/* true if x is a power of 2 */ +#define POWEROF2(x) ((((x)-1) & (x)) == 0) + +#define INVALID_PORT_ID 0xFF + +/* Max number of devices. Limited by vmdq. */ +#define MAX_DEVICES 64 + +/* Size of buffers used for snprintfs. */ +#define MAX_PRINT_BUFF 6072 + +/* Maximum character device basename size. */ +#define MAX_BASENAME_SZ 10 + +/* Maximum long option length for option parsing. */ +#define MAX_LONG_OPT_SZ 64 + +/* Used to compare MAC addresses. */ +#define MAC_ADDR_CMP 0xFFFFFFFFFFFFULL + +/* Number of descriptors per cacheline. */ +#define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc)) + +#define MBUF_EXT_MEM(mb) (rte_mbuf_from_indirect(mb) != (mb)) + +/* mask of enabled ports */ +static uint32_t enabled_port_mask = 0; + +/* Promiscuous mode */ +static uint32_t promiscuous; + +/*Number of switching cores enabled*/ +static uint32_t num_switching_cores = 0; + +/* number of devices/queues to support*/ +static uint32_t num_queues = 0; +static uint32_t num_devices; + +/* + * Enable zero copy, pkts buffer will directly dma to hw descriptor, + * disabled on default. + */ +static uint32_t zero_copy; +static int mergeable; + +/* Do vlan strip on host, enabled on default */ +static uint32_t vlan_strip = 1; + +/* number of descriptors to apply*/ +static uint32_t num_rx_descriptor = RTE_TEST_RX_DESC_DEFAULT_ZCP; +static uint32_t num_tx_descriptor = RTE_TEST_TX_DESC_DEFAULT_ZCP; + +/* max ring descriptor, ixgbe, i40e, e1000 all are 4096. */ +#define MAX_RING_DESC 4096 + +struct vpool { + struct rte_mempool *pool; + struct rte_ring *ring; + uint32_t buf_size; +} vpool_array[MAX_QUEUES+MAX_QUEUES]; + +/* Enable VM2VM communications. If this is disabled then the MAC address compare is skipped. */ +typedef enum { + VM2VM_DISABLED = 0, + VM2VM_SOFTWARE = 1, + VM2VM_HARDWARE = 2, + VM2VM_LAST +} vm2vm_type; +static vm2vm_type vm2vm_mode = VM2VM_SOFTWARE; + +/* The type of host physical address translated from guest physical address. */ +typedef enum { + PHYS_ADDR_CONTINUOUS = 0, + PHYS_ADDR_CROSS_SUBREG = 1, + PHYS_ADDR_INVALID = 2, + PHYS_ADDR_LAST +} hpa_type; + +/* Enable stats. */ +static uint32_t enable_stats = 0; +/* Enable retries on RX. */ +static uint32_t enable_retry = 1; + +/* Disable TX checksum offload */ +static uint32_t enable_tx_csum; + +/* Disable TSO offload */ +static uint32_t enable_tso; + +/* Specify timeout (in useconds) between retries on RX. */ +static uint32_t burst_rx_delay_time = BURST_RX_WAIT_US; +/* Specify the number of retries on RX. */ +static uint32_t burst_rx_retry_num = BURST_RX_RETRIES; + +/* Character device basename. Can be set by user. */ +static char dev_basename[MAX_BASENAME_SZ] = "vhost-net"; + +/* empty vmdq configuration structure. Filled in programatically */ +static struct rte_eth_conf vmdq_conf_default = { + .rxmode = { + .mq_mode = ETH_MQ_RX_VMDQ_ONLY, + .split_hdr_size = 0, + .header_split = 0, /**< Header Split disabled */ + .hw_ip_checksum = 0, /**< IP checksum offload disabled */ + .hw_vlan_filter = 0, /**< VLAN filtering disabled */ + /* + * It is necessary for 1G NIC such as I350, + * this fixes bug of ipv4 forwarding in guest can't + * forward pakets from one virtio dev to another virtio dev. + */ + .hw_vlan_strip = 1, /**< VLAN strip enabled. */ + .jumbo_frame = 0, /**< Jumbo Frame Support disabled */ + .hw_strip_crc = 0, /**< CRC stripped by hardware */ + }, + + .txmode = { + .mq_mode = ETH_MQ_TX_NONE, + }, + .rx_adv_conf = { + /* + * should be overridden separately in code with + * appropriate values + */ + .vmdq_rx_conf = { + .nb_queue_pools = ETH_8_POOLS, + .enable_default_pool = 0, + .default_pool = 0, + .nb_pool_maps = 0, + .pool_map = {{0, 0},}, + }, + }, +}; + +static unsigned lcore_ids[RTE_MAX_LCORE]; +static uint8_t ports[RTE_MAX_ETHPORTS]; +static unsigned num_ports = 0; /**< The number of ports specified in command line */ +static uint16_t num_pf_queues, num_vmdq_queues; +static uint16_t vmdq_pool_base, vmdq_queue_base; +static uint16_t queues_per_pool; + +static const uint16_t external_pkt_default_vlan_tag = 2000; +const uint16_t vlan_tags[] = { + 1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007, + 1008, 1009, 1010, 1011, 1012, 1013, 1014, 1015, + 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023, + 1024, 1025, 1026, 1027, 1028, 1029, 1030, 1031, + 1032, 1033, 1034, 1035, 1036, 1037, 1038, 1039, + 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047, + 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055, + 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063, +}; + +/* ethernet addresses of ports */ +static struct ether_addr vmdq_ports_eth_addr[RTE_MAX_ETHPORTS]; + +/* heads for the main used and free linked lists for the data path. */ +static struct virtio_net_data_ll *ll_root_used = NULL; +static struct virtio_net_data_ll *ll_root_free = NULL; + +/* Array of data core structures containing information on individual core linked lists. */ +static struct lcore_info lcore_info[RTE_MAX_LCORE]; + +/* Used for queueing bursts of TX packets. */ +struct mbuf_table { + unsigned len; + unsigned txq_id; + struct rte_mbuf *m_table[MAX_PKT_BURST]; +}; + +/* TX queue for each data core. */ +struct mbuf_table lcore_tx_queue[RTE_MAX_LCORE]; + +/* TX queue fori each virtio device for zero copy. */ +struct mbuf_table tx_queue_zcp[MAX_QUEUES]; + +/* Vlan header struct used to insert vlan tags on TX. */ +struct vlan_ethhdr { + unsigned char h_dest[ETH_ALEN]; + unsigned char h_source[ETH_ALEN]; + __be16 h_vlan_proto; + __be16 h_vlan_TCI; + __be16 h_vlan_encapsulated_proto; +}; + +/* Header lengths. */ +#define VLAN_HLEN 4 +#define VLAN_ETH_HLEN 18 + +/* Per-device statistics struct */ +struct device_statistics { + uint64_t tx_total; + rte_atomic64_t rx_total_atomic; + uint64_t rx_total; + uint64_t tx; + rte_atomic64_t rx_atomic; + uint64_t rx; +} __rte_cache_aligned; +struct device_statistics dev_statistics[MAX_DEVICES]; + +/* + * Builds up the correct configuration for VMDQ VLAN pool map + * according to the pool & queue limits. + */ +static inline int +get_eth_conf(struct rte_eth_conf *eth_conf, uint32_t num_devices) +{ + struct rte_eth_vmdq_rx_conf conf; + struct rte_eth_vmdq_rx_conf *def_conf = + &vmdq_conf_default.rx_adv_conf.vmdq_rx_conf; + unsigned i; + + memset(&conf, 0, sizeof(conf)); + conf.nb_queue_pools = (enum rte_eth_nb_pools)num_devices; + conf.nb_pool_maps = num_devices; + conf.enable_loop_back = def_conf->enable_loop_back; + conf.rx_mode = def_conf->rx_mode; + + for (i = 0; i < conf.nb_pool_maps; i++) { + conf.pool_map[i].vlan_id = vlan_tags[ i ]; + conf.pool_map[i].pools = (1UL << i); + } + + (void)(rte_memcpy(eth_conf, &vmdq_conf_default, sizeof(*eth_conf))); + (void)(rte_memcpy(ð_conf->rx_adv_conf.vmdq_rx_conf, &conf, + sizeof(eth_conf->rx_adv_conf.vmdq_rx_conf))); + return 0; +} + +/* + * Validate the device number according to the max pool number gotten form + * dev_info. If the device number is invalid, give the error message and + * return -1. Each device must have its own pool. + */ +static inline int +validate_num_devices(uint32_t max_nb_devices) +{ + if (num_devices > max_nb_devices) { + RTE_LOG(ERR, VHOST_PORT, "invalid number of devices\n"); + return -1; + } + return 0; +} + +/* + * Initialises a given port using global settings and with the rx buffers + * coming from the mbuf_pool passed as parameter + */ +static inline int +port_init(uint8_t port) +{ + struct rte_eth_dev_info dev_info; + struct rte_eth_conf port_conf; + struct rte_eth_rxconf *rxconf; + struct rte_eth_txconf *txconf; + int16_t rx_rings, tx_rings; + uint16_t rx_ring_size, tx_ring_size; + int retval; + uint16_t q; + + /* The max pool number from dev_info will be used to validate the pool number specified in cmd line */ + rte_eth_dev_info_get (port, &dev_info); + + if (dev_info.max_rx_queues > MAX_QUEUES) { + rte_exit(EXIT_FAILURE, + "please define MAX_QUEUES no less than %u in %s\n", + dev_info.max_rx_queues, __FILE__); + } + + rxconf = &dev_info.default_rxconf; + txconf = &dev_info.default_txconf; + rxconf->rx_drop_en = 1; + + /* Enable vlan offload */ + txconf->txq_flags &= ~ETH_TXQ_FLAGS_NOVLANOFFL; + + /* + * Zero copy defers queue RX/TX start to the time when guest + * finishes its startup and packet buffers from that guest are + * available. + */ + if (zero_copy) { + rxconf->rx_deferred_start = 1; + rxconf->rx_drop_en = 0; + txconf->tx_deferred_start = 1; + } + + /*configure the number of supported virtio devices based on VMDQ limits */ + num_devices = dev_info.max_vmdq_pools; + + if (zero_copy) { + rx_ring_size = num_rx_descriptor; + tx_ring_size = num_tx_descriptor; + tx_rings = dev_info.max_tx_queues; + } else { + rx_ring_size = RTE_TEST_RX_DESC_DEFAULT; + tx_ring_size = RTE_TEST_TX_DESC_DEFAULT; + tx_rings = (uint16_t)rte_lcore_count(); + } + + retval = validate_num_devices(MAX_DEVICES); + if (retval < 0) + return retval; + + /* Get port configuration. */ + retval = get_eth_conf(&port_conf, num_devices); + if (retval < 0) + return retval; + /* NIC queues are divided into pf queues and vmdq queues. */ + num_pf_queues = dev_info.max_rx_queues - dev_info.vmdq_queue_num; + queues_per_pool = dev_info.vmdq_queue_num / dev_info.max_vmdq_pools; + num_vmdq_queues = num_devices * queues_per_pool; + num_queues = num_pf_queues + num_vmdq_queues; + vmdq_queue_base = dev_info.vmdq_queue_base; + vmdq_pool_base = dev_info.vmdq_pool_base; + printf("pf queue num: %u, configured vmdq pool num: %u, each vmdq pool has %u queues\n", + num_pf_queues, num_devices, queues_per_pool); + + if (port >= rte_eth_dev_count()) return -1; + + if (enable_tx_csum == 0) + rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_CSUM); + + if (enable_tso == 0) { + rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_HOST_TSO4); + rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_HOST_TSO6); + } + + rx_rings = (uint16_t)dev_info.max_rx_queues; + /* Configure ethernet device. */ + retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf); + if (retval != 0) + return retval; + + /* Setup the queues. */ + for (q = 0; q < rx_rings; q ++) { + retval = rte_eth_rx_queue_setup(port, q, rx_ring_size, + rte_eth_dev_socket_id(port), + rxconf, + vpool_array[q].pool); + if (retval < 0) + return retval; + } + for (q = 0; q < tx_rings; q ++) { + retval = rte_eth_tx_queue_setup(port, q, tx_ring_size, + rte_eth_dev_socket_id(port), + txconf); + if (retval < 0) + return retval; + } + + /* Start the device. */ + retval = rte_eth_dev_start(port); + if (retval < 0) { + RTE_LOG(ERR, VHOST_DATA, "Failed to start the device.\n"); + return retval; + } + + if (promiscuous) + rte_eth_promiscuous_enable(port); + + rte_eth_macaddr_get(port, &vmdq_ports_eth_addr[port]); + RTE_LOG(INFO, VHOST_PORT, "Max virtio devices supported: %u\n", num_devices); + RTE_LOG(INFO, VHOST_PORT, "Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8 + " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n", + (unsigned)port, + vmdq_ports_eth_addr[port].addr_bytes[0], + vmdq_ports_eth_addr[port].addr_bytes[1], + vmdq_ports_eth_addr[port].addr_bytes[2], + vmdq_ports_eth_addr[port].addr_bytes[3], + vmdq_ports_eth_addr[port].addr_bytes[4], + vmdq_ports_eth_addr[port].addr_bytes[5]); + + return 0; +} + +/* + * Set character device basename. + */ +static int +us_vhost_parse_basename(const char *q_arg) +{ + /* parse number string */ + + if (strnlen(q_arg, MAX_BASENAME_SZ) > MAX_BASENAME_SZ) + return -1; + else + snprintf((char*)&dev_basename, MAX_BASENAME_SZ, "%s", q_arg); + + return 0; +} + +/* + * Parse the portmask provided at run time. + */ +static int +parse_portmask(const char *portmask) +{ + char *end = NULL; + unsigned long pm; + + errno = 0; + + /* parse hexadecimal string */ + pm = strtoul(portmask, &end, 16); + if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0') || (errno != 0)) + return -1; + + if (pm == 0) + return -1; + + return pm; + +} + +/* + * Parse num options at run time. + */ +static int +parse_num_opt(const char *q_arg, uint32_t max_valid_value) +{ + char *end = NULL; + unsigned long num; + + errno = 0; + + /* parse unsigned int string */ + num = strtoul(q_arg, &end, 10); + if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0') || (errno != 0)) + return -1; + + if (num > max_valid_value) + return -1; + + return num; + +} + +/* + * Display usage + */ +static void +us_vhost_usage(const char *prgname) +{ + RTE_LOG(INFO, VHOST_CONFIG, "%s [EAL options] -- -p PORTMASK\n" + " --vm2vm [0|1|2]\n" + " --rx_retry [0|1] --mergeable [0|1] --stats [0-N]\n" + " --dev-basename <name>\n" + " --nb-devices ND\n" + " -p PORTMASK: Set mask for ports to be used by application\n" + " --vm2vm [0|1|2]: disable/software(default)/hardware vm2vm comms\n" + " --rx-retry [0|1]: disable/enable(default) retries on rx. Enable retry if destintation queue is full\n" + " --rx-retry-delay [0-N]: timeout(in usecond) between retries on RX. This makes effect only if retries on rx enabled\n" + " --rx-retry-num [0-N]: the number of retries on rx. This makes effect only if retries on rx enabled\n" + " --mergeable [0|1]: disable(default)/enable RX mergeable buffers\n" + " --vlan-strip [0|1]: disable/enable(default) RX VLAN strip on host\n" + " --stats [0-N]: 0: Disable stats, N: Time in seconds to print stats\n" + " --dev-basename: The basename to be used for the character device.\n" + " --zero-copy [0|1]: disable(default)/enable rx/tx " + "zero copy\n" + " --rx-desc-num [0-N]: the number of descriptors on rx, " + "used only when zero copy is enabled.\n" + " --tx-desc-num [0-N]: the number of descriptors on tx, " + "used only when zero copy is enabled.\n" + " --tx-csum [0|1] disable/enable TX checksum offload.\n" + " --tso [0|1] disable/enable TCP segment offload.\n", + prgname); +} + +/* + * Parse the arguments given in the command line of the application. + */ +static int +us_vhost_parse_args(int argc, char **argv) +{ + int opt, ret; + int option_index; + unsigned i; + const char *prgname = argv[0]; + static struct option long_option[] = { + {"vm2vm", required_argument, NULL, 0}, + {"rx-retry", required_argument, NULL, 0}, + {"rx-retry-delay", required_argument, NULL, 0}, + {"rx-retry-num", required_argument, NULL, 0}, + {"mergeable", required_argument, NULL, 0}, + {"vlan-strip", required_argument, NULL, 0}, + {"stats", required_argument, NULL, 0}, + {"dev-basename", required_argument, NULL, 0}, + {"zero-copy", required_argument, NULL, 0}, + {"rx-desc-num", required_argument, NULL, 0}, + {"tx-desc-num", required_argument, NULL, 0}, + {"tx-csum", required_argument, NULL, 0}, + {"tso", required_argument, NULL, 0}, + {NULL, 0, 0, 0}, + }; + + /* Parse command line */ + while ((opt = getopt_long(argc, argv, "p:P", + long_option, &option_index)) != EOF) { + switch (opt) { + /* Portmask */ + case 'p': + enabled_port_mask = parse_portmask(optarg); + if (enabled_port_mask == 0) { + RTE_LOG(INFO, VHOST_CONFIG, "Invalid portmask\n"); + us_vhost_usage(prgname); + return -1; + } + break; + + case 'P': + promiscuous = 1; + vmdq_conf_default.rx_adv_conf.vmdq_rx_conf.rx_mode = + ETH_VMDQ_ACCEPT_BROADCAST | + ETH_VMDQ_ACCEPT_MULTICAST; + rte_vhost_feature_enable(1ULL << VIRTIO_NET_F_CTRL_RX); + + break; + + case 0: + /* Enable/disable vm2vm comms. */ + if (!strncmp(long_option[option_index].name, "vm2vm", + MAX_LONG_OPT_SZ)) { + ret = parse_num_opt(optarg, (VM2VM_LAST - 1)); + if (ret == -1) { + RTE_LOG(INFO, VHOST_CONFIG, + "Invalid argument for " + "vm2vm [0|1|2]\n"); + us_vhost_usage(prgname); + return -1; + } else { + vm2vm_mode = (vm2vm_type)ret; + } + } + + /* Enable/disable retries on RX. */ + if (!strncmp(long_option[option_index].name, "rx-retry", MAX_LONG_OPT_SZ)) { + ret = parse_num_opt(optarg, 1); + if (ret == -1) { + RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for rx-retry [0|1]\n"); + us_vhost_usage(prgname); + return -1; + } else { + enable_retry = ret; + } + } + + /* Enable/disable TX checksum offload. */ + if (!strncmp(long_option[option_index].name, "tx-csum", MAX_LONG_OPT_SZ)) { + ret = parse_num_opt(optarg, 1); + if (ret == -1) { + RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for tx-csum [0|1]\n"); + us_vhost_usage(prgname); + return -1; + } else + enable_tx_csum = ret; + } + + /* Enable/disable TSO offload. */ + if (!strncmp(long_option[option_index].name, "tso", MAX_LONG_OPT_SZ)) { + ret = parse_num_opt(optarg, 1); + if (ret == -1) { + RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for tso [0|1]\n"); + us_vhost_usage(prgname); + return -1; + } else + enable_tso = ret; + } + + /* Specify the retries delay time (in useconds) on RX. */ + if (!strncmp(long_option[option_index].name, "rx-retry-delay", MAX_LONG_OPT_SZ)) { + ret = parse_num_opt(optarg, INT32_MAX); + if (ret == -1) { + RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for rx-retry-delay [0-N]\n"); + us_vhost_usage(prgname); + return -1; + } else { + burst_rx_delay_time = ret; + } + } + + /* Specify the retries number on RX. */ + if (!strncmp(long_option[option_index].name, "rx-retry-num", MAX_LONG_OPT_SZ)) { + ret = parse_num_opt(optarg, INT32_MAX); + if (ret == -1) { + RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for rx-retry-num [0-N]\n"); + us_vhost_usage(prgname); + return -1; + } else { + burst_rx_retry_num = ret; + } + } + + /* Enable/disable RX mergeable buffers. */ + if (!strncmp(long_option[option_index].name, "mergeable", MAX_LONG_OPT_SZ)) { + ret = parse_num_opt(optarg, 1); + if (ret == -1) { + RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for mergeable [0|1]\n"); + us_vhost_usage(prgname); + return -1; + } else { + mergeable = !!ret; + if (ret) { + vmdq_conf_default.rxmode.jumbo_frame = 1; + vmdq_conf_default.rxmode.max_rx_pkt_len + = JUMBO_FRAME_MAX_SIZE; + } + } + } + + /* Enable/disable RX VLAN strip on host. */ + if (!strncmp(long_option[option_index].name, + "vlan-strip", MAX_LONG_OPT_SZ)) { + ret = parse_num_opt(optarg, 1); + if (ret == -1) { + RTE_LOG(INFO, VHOST_CONFIG, + "Invalid argument for VLAN strip [0|1]\n"); + us_vhost_usage(prgname); + return -1; + } else { + vlan_strip = !!ret; + vmdq_conf_default.rxmode.hw_vlan_strip = + vlan_strip; + } + } + + /* Enable/disable stats. */ + if (!strncmp(long_option[option_index].name, "stats", MAX_LONG_OPT_SZ)) { + ret = parse_num_opt(optarg, INT32_MAX); + if (ret == -1) { + RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for stats [0..N]\n"); + us_vhost_usage(prgname); + return -1; + } else { + enable_stats = ret; + } + } + + /* Set character device basename. */ + if (!strncmp(long_option[option_index].name, "dev-basename", MAX_LONG_OPT_SZ)) { + if (us_vhost_parse_basename(optarg) == -1) { + RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for character device basename (Max %d characters)\n", MAX_BASENAME_SZ); + us_vhost_usage(prgname); + return -1; + } + } + + /* Enable/disable rx/tx zero copy. */ + if (!strncmp(long_option[option_index].name, + "zero-copy", MAX_LONG_OPT_SZ)) { + ret = parse_num_opt(optarg, 1); + if (ret == -1) { + RTE_LOG(INFO, VHOST_CONFIG, + "Invalid argument" + " for zero-copy [0|1]\n"); + us_vhost_usage(prgname); + return -1; + } else + zero_copy = ret; + } + + /* Specify the descriptor number on RX. */ + if (!strncmp(long_option[option_index].name, + "rx-desc-num", MAX_LONG_OPT_SZ)) { + ret = parse_num_opt(optarg, MAX_RING_DESC); + if ((ret == -1) || (!POWEROF2(ret))) { + RTE_LOG(INFO, VHOST_CONFIG, + "Invalid argument for rx-desc-num[0-N]," + "power of 2 required.\n"); + us_vhost_usage(prgname); + return -1; + } else { + num_rx_descriptor = ret; + } + } + + /* Specify the descriptor number on TX. */ + if (!strncmp(long_option[option_index].name, + "tx-desc-num", MAX_LONG_OPT_SZ)) { + ret = parse_num_opt(optarg, MAX_RING_DESC); + if ((ret == -1) || (!POWEROF2(ret))) { + RTE_LOG(INFO, VHOST_CONFIG, + "Invalid argument for tx-desc-num [0-N]," + "power of 2 required.\n"); + us_vhost_usage(prgname); + return -1; + } else { + num_tx_descriptor = ret; + } + } + + break; + + /* Invalid option - print options. */ + default: + us_vhost_usage(prgname); + return -1; + } + } + + for (i = 0; i < RTE_MAX_ETHPORTS; i++) { + if (enabled_port_mask & (1 << i)) + ports[num_ports++] = (uint8_t)i; + } + + if ((num_ports == 0) || (num_ports > MAX_SUP_PORTS)) { + RTE_LOG(INFO, VHOST_PORT, "Current enabled port number is %u," + "but only %u port can be enabled\n",num_ports, MAX_SUP_PORTS); + return -1; + } + + if ((zero_copy == 1) && (vm2vm_mode == VM2VM_SOFTWARE)) { + RTE_LOG(INFO, VHOST_PORT, + "Vhost zero copy doesn't support software vm2vm," + "please specify 'vm2vm 2' to use hardware vm2vm.\n"); + return -1; + } + + if ((zero_copy == 1) && (vmdq_conf_default.rxmode.jumbo_frame == 1)) { + RTE_LOG(INFO, VHOST_PORT, + "Vhost zero copy doesn't support jumbo frame," + "please specify '--mergeable 0' to disable the " + "mergeable feature.\n"); + return -1; + } + + return 0; +} + +/* + * Update the global var NUM_PORTS and array PORTS according to system ports number + * and return valid ports number + */ +static unsigned check_ports_num(unsigned nb_ports) +{ + unsigned valid_num_ports = num_ports; + unsigned portid; + + if (num_ports > nb_ports) { + RTE_LOG(INFO, VHOST_PORT, "\nSpecified port number(%u) exceeds total system port number(%u)\n", + num_ports, nb_ports); + num_ports = nb_ports; + } + + for (portid = 0; portid < num_ports; portid ++) { + if (ports[portid] >= nb_ports) { + RTE_LOG(INFO, VHOST_PORT, "\nSpecified port ID(%u) exceeds max system port ID(%u)\n", + ports[portid], (nb_ports - 1)); + ports[portid] = INVALID_PORT_ID; + valid_num_ports--; + } + } + return valid_num_ports; +} + +/* + * Macro to print out packet contents. Wrapped in debug define so that the + * data path is not effected when debug is disabled. + */ +#ifdef DEBUG +#define PRINT_PACKET(device, addr, size, header) do { \ + char *pkt_addr = (char*)(addr); \ + unsigned int index; \ + char packet[MAX_PRINT_BUFF]; \ + \ + if ((header)) \ + snprintf(packet, MAX_PRINT_BUFF, "(%"PRIu64") Header size %d: ", (device->device_fh), (size)); \ + else \ + snprintf(packet, MAX_PRINT_BUFF, "(%"PRIu64") Packet size %d: ", (device->device_fh), (size)); \ + for (index = 0; index < (size); index++) { \ + snprintf(packet + strnlen(packet, MAX_PRINT_BUFF), MAX_PRINT_BUFF - strnlen(packet, MAX_PRINT_BUFF), \ + "%02hhx ", pkt_addr[index]); \ + } \ + snprintf(packet + strnlen(packet, MAX_PRINT_BUFF), MAX_PRINT_BUFF - strnlen(packet, MAX_PRINT_BUFF), "\n"); \ + \ + LOG_DEBUG(VHOST_DATA, "%s", packet); \ +} while(0) +#else +#define PRINT_PACKET(device, addr, size, header) do{} while(0) +#endif + +/* + * Function to convert guest physical addresses to vhost physical addresses. + * This is used to convert virtio buffer addresses. + */ +static inline uint64_t __attribute__((always_inline)) +gpa_to_hpa(struct vhost_dev *vdev, uint64_t guest_pa, + uint32_t buf_len, hpa_type *addr_type) +{ + struct virtio_memory_regions_hpa *region; + uint32_t regionidx; + uint64_t vhost_pa = 0; + + *addr_type = PHYS_ADDR_INVALID; + + for (regionidx = 0; regionidx < vdev->nregions_hpa; regionidx++) { + region = &vdev->regions_hpa[regionidx]; + if ((guest_pa >= region->guest_phys_address) && + (guest_pa <= region->guest_phys_address_end)) { + vhost_pa = region->host_phys_addr_offset + guest_pa; + if (likely((guest_pa + buf_len - 1) + <= region->guest_phys_address_end)) + *addr_type = PHYS_ADDR_CONTINUOUS; + else + *addr_type = PHYS_ADDR_CROSS_SUBREG; + break; + } + } + + LOG_DEBUG(VHOST_DATA, "(%"PRIu64") GPA %p| HPA %p\n", + vdev->dev->device_fh, (void *)(uintptr_t)guest_pa, + (void *)(uintptr_t)vhost_pa); + + return vhost_pa; +} + +/* + * Compares a packet destination MAC address to a device MAC address. + */ +static inline int __attribute__((always_inline)) +ether_addr_cmp(struct ether_addr *ea, struct ether_addr *eb) +{ + return ((*(uint64_t *)ea ^ *(uint64_t *)eb) & MAC_ADDR_CMP) == 0; +} + +/* + * This function learns the MAC address of the device and registers this along with a + * vlan tag to a VMDQ. + */ +static int +link_vmdq(struct vhost_dev *vdev, struct rte_mbuf *m) +{ + struct ether_hdr *pkt_hdr; + struct virtio_net_data_ll *dev_ll; + struct virtio_net *dev = vdev->dev; + int i, ret; + + /* Learn MAC address of guest device from packet */ + pkt_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); + + dev_ll = ll_root_used; + + while (dev_ll != NULL) { + if (ether_addr_cmp(&(pkt_hdr->s_addr), &dev_ll->vdev->mac_address)) { + RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") WARNING: This device is using an existing MAC address and has not been registered.\n", dev->device_fh); + return -1; + } + dev_ll = dev_ll->next; + } + + for (i = 0; i < ETHER_ADDR_LEN; i++) + vdev->mac_address.addr_bytes[i] = pkt_hdr->s_addr.addr_bytes[i]; + + /* vlan_tag currently uses the device_id. */ + vdev->vlan_tag = vlan_tags[dev->device_fh]; + + /* Print out VMDQ registration info. */ + RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") MAC_ADDRESS %02x:%02x:%02x:%02x:%02x:%02x and VLAN_TAG %d registered\n", + dev->device_fh, + vdev->mac_address.addr_bytes[0], vdev->mac_address.addr_bytes[1], + vdev->mac_address.addr_bytes[2], vdev->mac_address.addr_bytes[3], + vdev->mac_address.addr_bytes[4], vdev->mac_address.addr_bytes[5], + vdev->vlan_tag); + + /* Register the MAC address. */ + ret = rte_eth_dev_mac_addr_add(ports[0], &vdev->mac_address, + (uint32_t)dev->device_fh + vmdq_pool_base); + if (ret) + RTE_LOG(ERR, VHOST_DATA, "(%"PRIu64") Failed to add device MAC address to VMDQ\n", + dev->device_fh); + + /* Enable stripping of the vlan tag as we handle routing. */ + if (vlan_strip) + rte_eth_dev_set_vlan_strip_on_queue(ports[0], + (uint16_t)vdev->vmdq_rx_q, 1); + + /* Set device as ready for RX. */ + vdev->ready = DEVICE_RX; + + return 0; +} + +/* + * Removes MAC address and vlan tag from VMDQ. Ensures that nothing is adding buffers to the RX + * queue before disabling RX on the device. + */ +static inline void +unlink_vmdq(struct vhost_dev *vdev) +{ + unsigned i = 0; + unsigned rx_count; + struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; + + if (vdev->ready == DEVICE_RX) { + /*clear MAC and VLAN settings*/ + rte_eth_dev_mac_addr_remove(ports[0], &vdev->mac_address); + for (i = 0; i < 6; i++) + vdev->mac_address.addr_bytes[i] = 0; + + vdev->vlan_tag = 0; + + /*Clear out the receive buffers*/ + rx_count = rte_eth_rx_burst(ports[0], + (uint16_t)vdev->vmdq_rx_q, pkts_burst, MAX_PKT_BURST); + + while (rx_count) { + for (i = 0; i < rx_count; i++) + rte_pktmbuf_free(pkts_burst[i]); + + rx_count = rte_eth_rx_burst(ports[0], + (uint16_t)vdev->vmdq_rx_q, pkts_burst, MAX_PKT_BURST); + } + + vdev->ready = DEVICE_MAC_LEARNING; + } +} + +/* + * Check if the packet destination MAC address is for a local device. If so then put + * the packet on that devices RX queue. If not then return. + */ +static inline int __attribute__((always_inline)) +virtio_tx_local(struct vhost_dev *vdev, struct rte_mbuf *m) +{ + struct virtio_net_data_ll *dev_ll; + struct ether_hdr *pkt_hdr; + uint64_t ret = 0; + struct virtio_net *dev = vdev->dev; + struct virtio_net *tdev; /* destination virito device */ + + pkt_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); + + /*get the used devices list*/ + dev_ll = ll_root_used; + + while (dev_ll != NULL) { + if ((dev_ll->vdev->ready == DEVICE_RX) && ether_addr_cmp(&(pkt_hdr->d_addr), + &dev_ll->vdev->mac_address)) { + + /* Drop the packet if the TX packet is destined for the TX device. */ + if (dev_ll->vdev->dev->device_fh == dev->device_fh) { + LOG_DEBUG(VHOST_DATA, "(%"PRIu64") TX: Source and destination MAC addresses are the same. Dropping packet.\n", + dev->device_fh); + return 0; + } + tdev = dev_ll->vdev->dev; + + + LOG_DEBUG(VHOST_DATA, "(%"PRIu64") TX: MAC address is local\n", tdev->device_fh); + + if (unlikely(dev_ll->vdev->remove)) { + /*drop the packet if the device is marked for removal*/ + LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Device is marked for removal\n", tdev->device_fh); + } else { + /*send the packet to the local virtio device*/ + ret = rte_vhost_enqueue_burst(tdev, VIRTIO_RXQ, &m, 1); + if (enable_stats) { + rte_atomic64_add( + &dev_statistics[tdev->device_fh].rx_total_atomic, + 1); + rte_atomic64_add( + &dev_statistics[tdev->device_fh].rx_atomic, + ret); + dev_statistics[dev->device_fh].tx_total++; + dev_statistics[dev->device_fh].tx += ret; + } + } + + return 0; + } + dev_ll = dev_ll->next; + } + + return -1; +} + +/* + * Check if the destination MAC of a packet is one local VM, + * and get its vlan tag, and offset if it is. + */ +static inline int __attribute__((always_inline)) +find_local_dest(struct virtio_net *dev, struct rte_mbuf *m, + uint32_t *offset, uint16_t *vlan_tag) +{ + struct virtio_net_data_ll *dev_ll = ll_root_used; + struct ether_hdr *pkt_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); + + while (dev_ll != NULL) { + if ((dev_ll->vdev->ready == DEVICE_RX) + && ether_addr_cmp(&(pkt_hdr->d_addr), + &dev_ll->vdev->mac_address)) { + /* + * Drop the packet if the TX packet is + * destined for the TX device. + */ + if (dev_ll->vdev->dev->device_fh == dev->device_fh) { + LOG_DEBUG(VHOST_DATA, + "(%"PRIu64") TX: Source and destination" + " MAC addresses are the same. Dropping " + "packet.\n", + dev_ll->vdev->dev->device_fh); + return -1; + } + + /* + * HW vlan strip will reduce the packet length + * by minus length of vlan tag, so need restore + * the packet length by plus it. + */ + *offset = VLAN_HLEN; + *vlan_tag = + (uint16_t) + vlan_tags[(uint16_t)dev_ll->vdev->dev->device_fh]; + + LOG_DEBUG(VHOST_DATA, + "(%"PRIu64") TX: pkt to local VM device id:" + "(%"PRIu64") vlan tag: %d.\n", + dev->device_fh, dev_ll->vdev->dev->device_fh, + (int)*vlan_tag); + + break; + } + dev_ll = dev_ll->next; + } + return 0; +} + +static uint16_t +get_psd_sum(void *l3_hdr, uint64_t ol_flags) +{ + if (ol_flags & PKT_TX_IPV4) + return rte_ipv4_phdr_cksum(l3_hdr, ol_flags); + else /* assume ethertype == ETHER_TYPE_IPv6 */ + return rte_ipv6_phdr_cksum(l3_hdr, ol_flags); +} + +static void virtio_tx_offload(struct rte_mbuf *m) +{ + void *l3_hdr; + struct ipv4_hdr *ipv4_hdr = NULL; + struct tcp_hdr *tcp_hdr = NULL; + struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); + + l3_hdr = (char *)eth_hdr + m->l2_len; + + if (m->ol_flags & PKT_TX_IPV4) { + ipv4_hdr = l3_hdr; + ipv4_hdr->hdr_checksum = 0; + m->ol_flags |= PKT_TX_IP_CKSUM; + } + + tcp_hdr = (struct tcp_hdr *)((char *)l3_hdr + m->l3_len); + tcp_hdr->cksum = get_psd_sum(l3_hdr, m->ol_flags); +} + +/* + * This function routes the TX packet to the correct interface. This may be a local device + * or the physical port. + */ +static inline void __attribute__((always_inline)) +virtio_tx_route(struct vhost_dev *vdev, struct rte_mbuf *m, uint16_t vlan_tag) +{ + struct mbuf_table *tx_q; + struct rte_mbuf **m_table; + unsigned len, ret, offset = 0; + const uint16_t lcore_id = rte_lcore_id(); + struct virtio_net *dev = vdev->dev; + struct ether_hdr *nh; + + /*check if destination is local VM*/ + if ((vm2vm_mode == VM2VM_SOFTWARE) && (virtio_tx_local(vdev, m) == 0)) { + rte_pktmbuf_free(m); + return; + } + + if (unlikely(vm2vm_mode == VM2VM_HARDWARE)) { + if (unlikely(find_local_dest(dev, m, &offset, &vlan_tag) != 0)) { + rte_pktmbuf_free(m); + return; + } + } + + LOG_DEBUG(VHOST_DATA, "(%"PRIu64") TX: MAC address is external\n", dev->device_fh); + + /*Add packet to the port tx queue*/ + tx_q = &lcore_tx_queue[lcore_id]; + len = tx_q->len; + + nh = rte_pktmbuf_mtod(m, struct ether_hdr *); + if (unlikely(nh->ether_type == rte_cpu_to_be_16(ETHER_TYPE_VLAN))) { + /* Guest has inserted the vlan tag. */ + struct vlan_hdr *vh = (struct vlan_hdr *) (nh + 1); + uint16_t vlan_tag_be = rte_cpu_to_be_16(vlan_tag); + if ((vm2vm_mode == VM2VM_HARDWARE) && + (vh->vlan_tci != vlan_tag_be)) + vh->vlan_tci = vlan_tag_be; + } else { + m->ol_flags |= PKT_TX_VLAN_PKT; + + /* + * Find the right seg to adjust the data len when offset is + * bigger than tail room size. + */ + if (unlikely(vm2vm_mode == VM2VM_HARDWARE)) { + if (likely(offset <= rte_pktmbuf_tailroom(m))) + m->data_len += offset; + else { + struct rte_mbuf *seg = m; + + while ((seg->next != NULL) && + (offset > rte_pktmbuf_tailroom(seg))) + seg = seg->next; + + seg->data_len += offset; + } + m->pkt_len += offset; + } + + m->vlan_tci = vlan_tag; + } + + if (m->ol_flags & PKT_TX_TCP_SEG) + virtio_tx_offload(m); + + tx_q->m_table[len] = m; + len++; + if (enable_stats) { + dev_statistics[dev->device_fh].tx_total++; + dev_statistics[dev->device_fh].tx++; + } + + if (unlikely(len == MAX_PKT_BURST)) { + m_table = (struct rte_mbuf **)tx_q->m_table; + ret = rte_eth_tx_burst(ports[0], (uint16_t)tx_q->txq_id, m_table, (uint16_t) len); + /* Free any buffers not handled by TX and update the port stats. */ + if (unlikely(ret < len)) { + do { + rte_pktmbuf_free(m_table[ret]); + } while (++ret < len); + } + + len = 0; + } + + tx_q->len = len; + return; +} +/* + * This function is called by each data core. It handles all RX/TX registered with the + * core. For TX the specific lcore linked list is used. For RX, MAC addresses are compared + * with all devices in the main linked list. + */ +static int +switch_worker(__attribute__((unused)) void *arg) +{ + struct rte_mempool *mbuf_pool = arg; + struct virtio_net *dev = NULL; + struct vhost_dev *vdev = NULL; + struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; + struct virtio_net_data_ll *dev_ll; + struct mbuf_table *tx_q; + volatile struct lcore_ll_info *lcore_ll; + const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US; + uint64_t prev_tsc, diff_tsc, cur_tsc, ret_count = 0; + unsigned ret, i; + const uint16_t lcore_id = rte_lcore_id(); + const uint16_t num_cores = (uint16_t)rte_lcore_count(); + uint16_t rx_count = 0; + uint16_t tx_count; + uint32_t retry = 0; + + RTE_LOG(INFO, VHOST_DATA, "Procesing on Core %u started\n", lcore_id); + lcore_ll = lcore_info[lcore_id].lcore_ll; + prev_tsc = 0; + + tx_q = &lcore_tx_queue[lcore_id]; + for (i = 0; i < num_cores; i ++) { + if (lcore_ids[i] == lcore_id) { + tx_q->txq_id = i; + break; + } + } + + while(1) { + cur_tsc = rte_rdtsc(); + /* + * TX burst queue drain + */ + diff_tsc = cur_tsc - prev_tsc; + if (unlikely(diff_tsc > drain_tsc)) { + + if (tx_q->len) { + LOG_DEBUG(VHOST_DATA, "TX queue drained after timeout with burst size %u \n", tx_q->len); + + /*Tx any packets in the queue*/ + ret = rte_eth_tx_burst(ports[0], (uint16_t)tx_q->txq_id, + (struct rte_mbuf **)tx_q->m_table, + (uint16_t)tx_q->len); + if (unlikely(ret < tx_q->len)) { + do { + rte_pktmbuf_free(tx_q->m_table[ret]); + } while (++ret < tx_q->len); + } + + tx_q->len = 0; + } + + prev_tsc = cur_tsc; + + } + + rte_prefetch0(lcore_ll->ll_root_used); + /* + * Inform the configuration core that we have exited the linked list and that no devices are + * in use if requested. + */ + if (lcore_ll->dev_removal_flag == REQUEST_DEV_REMOVAL) + lcore_ll->dev_removal_flag = ACK_DEV_REMOVAL; + + /* + * Process devices + */ + dev_ll = lcore_ll->ll_root_used; + + while (dev_ll != NULL) { + /*get virtio device ID*/ + vdev = dev_ll->vdev; + dev = vdev->dev; + + if (unlikely(vdev->remove)) { + dev_ll = dev_ll->next; + unlink_vmdq(vdev); + vdev->ready = DEVICE_SAFE_REMOVE; + continue; + } + if (likely(vdev->ready == DEVICE_RX)) { + /*Handle guest RX*/ + rx_count = rte_eth_rx_burst(ports[0], + vdev->vmdq_rx_q, pkts_burst, MAX_PKT_BURST); + + if (rx_count) { + /* + * Retry is enabled and the queue is full then we wait and retry to avoid packet loss + * Here MAX_PKT_BURST must be less than virtio queue size + */ + if (enable_retry && unlikely(rx_count > rte_vring_available_entries(dev, VIRTIO_RXQ))) { + for (retry = 0; retry < burst_rx_retry_num; retry++) { + rte_delay_us(burst_rx_delay_time); + if (rx_count <= rte_vring_available_entries(dev, VIRTIO_RXQ)) + break; + } + } + ret_count = rte_vhost_enqueue_burst(dev, VIRTIO_RXQ, pkts_burst, rx_count); + if (enable_stats) { + rte_atomic64_add( + &dev_statistics[dev_ll->vdev->dev->device_fh].rx_total_atomic, + rx_count); + rte_atomic64_add( + &dev_statistics[dev_ll->vdev->dev->device_fh].rx_atomic, ret_count); + } + while (likely(rx_count)) { + rx_count--; + rte_pktmbuf_free(pkts_burst[rx_count]); + } + + } + } + + if (likely(!vdev->remove)) { + /* Handle guest TX*/ + tx_count = rte_vhost_dequeue_burst(dev, VIRTIO_TXQ, mbuf_pool, pkts_burst, MAX_PKT_BURST); + /* If this is the first received packet we need to learn the MAC and setup VMDQ */ + if (unlikely(vdev->ready == DEVICE_MAC_LEARNING) && tx_count) { + if (vdev->remove || (link_vmdq(vdev, pkts_burst[0]) == -1)) { + while (tx_count) + rte_pktmbuf_free(pkts_burst[--tx_count]); + } + } + for (i = 0; i < tx_count; ++i) { + virtio_tx_route(vdev, pkts_burst[i], + vlan_tags[(uint16_t)dev->device_fh]); + } + } + + /*move to the next device in the list*/ + dev_ll = dev_ll->next; + } + } + + return 0; +} + +/* + * This function gets available ring number for zero copy rx. + * Only one thread will call this funciton for a paticular virtio device, + * so, it is designed as non-thread-safe function. + */ +static inline uint32_t __attribute__((always_inline)) +get_available_ring_num_zcp(struct virtio_net *dev) +{ + struct vhost_virtqueue *vq = dev->virtqueue[VIRTIO_RXQ]; + uint16_t avail_idx; + + avail_idx = *((volatile uint16_t *)&vq->avail->idx); + return (uint32_t)(avail_idx - vq->last_used_idx_res); +} + +/* + * This function gets available ring index for zero copy rx, + * it will retry 'burst_rx_retry_num' times till it get enough ring index. + * Only one thread will call this funciton for a paticular virtio device, + * so, it is designed as non-thread-safe function. + */ +static inline uint32_t __attribute__((always_inline)) +get_available_ring_index_zcp(struct virtio_net *dev, + uint16_t *res_base_idx, uint32_t count) +{ + struct vhost_virtqueue *vq = dev->virtqueue[VIRTIO_RXQ]; + uint16_t avail_idx; + uint32_t retry = 0; + uint16_t free_entries; + + *res_base_idx = vq->last_used_idx_res; + avail_idx = *((volatile uint16_t *)&vq->avail->idx); + free_entries = (avail_idx - *res_base_idx); + + LOG_DEBUG(VHOST_DATA, "(%"PRIu64") in get_available_ring_index_zcp: " + "avail idx: %d, " + "res base idx:%d, free entries:%d\n", + dev->device_fh, avail_idx, *res_base_idx, + free_entries); + + /* + * If retry is enabled and the queue is full then we wait + * and retry to avoid packet loss. + */ + if (enable_retry && unlikely(count > free_entries)) { + for (retry = 0; retry < burst_rx_retry_num; retry++) { + rte_delay_us(burst_rx_delay_time); + avail_idx = *((volatile uint16_t *)&vq->avail->idx); + free_entries = (avail_idx - *res_base_idx); + if (count <= free_entries) + break; + } + } + + /*check that we have enough buffers*/ + if (unlikely(count > free_entries)) + count = free_entries; + + if (unlikely(count == 0)) { + LOG_DEBUG(VHOST_DATA, + "(%"PRIu64") Fail in get_available_ring_index_zcp: " + "avail idx: %d, res base idx:%d, free entries:%d\n", + dev->device_fh, avail_idx, + *res_base_idx, free_entries); + return 0; + } + + vq->last_used_idx_res = *res_base_idx + count; + + return count; +} + +/* + * This function put descriptor back to used list. + */ +static inline void __attribute__((always_inline)) +put_desc_to_used_list_zcp(struct vhost_virtqueue *vq, uint16_t desc_idx) +{ + uint16_t res_cur_idx = vq->last_used_idx; + vq->used->ring[res_cur_idx & (vq->size - 1)].id = (uint32_t)desc_idx; + vq->used->ring[res_cur_idx & (vq->size - 1)].len = 0; + rte_compiler_barrier(); + *(volatile uint16_t *)&vq->used->idx += 1; + vq->last_used_idx += 1; + + /* Kick the guest if necessary. */ + if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) + eventfd_write(vq->callfd, (eventfd_t)1); +} + +/* + * This function get available descriptor from vitio vring and un-attached mbuf + * from vpool->ring, and then attach them together. It needs adjust the offset + * for buff_addr and phys_addr accroding to PMD implementation, otherwise the + * frame data may be put to wrong location in mbuf. + */ +static inline void __attribute__((always_inline)) +attach_rxmbuf_zcp(struct virtio_net *dev) +{ + uint16_t res_base_idx, desc_idx; + uint64_t buff_addr, phys_addr; + struct vhost_virtqueue *vq; + struct vring_desc *desc; + void *obj = NULL; + struct rte_mbuf *mbuf; + struct vpool *vpool; + hpa_type addr_type; + struct vhost_dev *vdev = (struct vhost_dev *)dev->priv; + + vpool = &vpool_array[vdev->vmdq_rx_q]; + vq = dev->virtqueue[VIRTIO_RXQ]; + + do { + if (unlikely(get_available_ring_index_zcp(vdev->dev, &res_base_idx, + 1) != 1)) + return; + desc_idx = vq->avail->ring[(res_base_idx) & (vq->size - 1)]; + + desc = &vq->desc[desc_idx]; + if (desc->flags & VRING_DESC_F_NEXT) { + desc = &vq->desc[desc->next]; + buff_addr = gpa_to_vva(dev, desc->addr); + phys_addr = gpa_to_hpa(vdev, desc->addr, desc->len, + &addr_type); + } else { + buff_addr = gpa_to_vva(dev, + desc->addr + vq->vhost_hlen); + phys_addr = gpa_to_hpa(vdev, + desc->addr + vq->vhost_hlen, + desc->len, &addr_type); + } + + if (unlikely(addr_type == PHYS_ADDR_INVALID)) { + RTE_LOG(ERR, VHOST_DATA, "(%"PRIu64") Invalid frame buffer" + " address found when attaching RX frame buffer" + " address!\n", dev->device_fh); + put_desc_to_used_list_zcp(vq, desc_idx); + continue; + } + + /* + * Check if the frame buffer address from guest crosses + * sub-region or not. + */ + if (unlikely(addr_type == PHYS_ADDR_CROSS_SUBREG)) { + RTE_LOG(ERR, VHOST_DATA, + "(%"PRIu64") Frame buffer address cross " + "sub-regioin found when attaching RX frame " + "buffer address!\n", + dev->device_fh); + put_desc_to_used_list_zcp(vq, desc_idx); + continue; + } + } while (unlikely(phys_addr == 0)); + + rte_ring_sc_dequeue(vpool->ring, &obj); + mbuf = obj; + if (unlikely(mbuf == NULL)) { + LOG_DEBUG(VHOST_DATA, + "(%"PRIu64") in attach_rxmbuf_zcp: " + "ring_sc_dequeue fail.\n", + dev->device_fh); + put_desc_to_used_list_zcp(vq, desc_idx); + return; + } + + if (unlikely(vpool->buf_size > desc->len)) { + LOG_DEBUG(VHOST_DATA, + "(%"PRIu64") in attach_rxmbuf_zcp: frame buffer " + "length(%d) of descriptor idx: %d less than room " + "size required: %d\n", + dev->device_fh, desc->len, desc_idx, vpool->buf_size); + put_desc_to_used_list_zcp(vq, desc_idx); + rte_ring_sp_enqueue(vpool->ring, obj); + return; + } + + mbuf->buf_addr = (void *)(uintptr_t)(buff_addr - RTE_PKTMBUF_HEADROOM); + mbuf->data_off = RTE_PKTMBUF_HEADROOM; + mbuf->buf_physaddr = phys_addr - RTE_PKTMBUF_HEADROOM; + mbuf->data_len = desc->len; + MBUF_HEADROOM_UINT32(mbuf) = (uint32_t)desc_idx; + + LOG_DEBUG(VHOST_DATA, + "(%"PRIu64") in attach_rxmbuf_zcp: res base idx:%d, " + "descriptor idx:%d\n", + dev->device_fh, res_base_idx, desc_idx); + + __rte_mbuf_raw_free(mbuf); + + return; +} + +/* + * Detach an attched packet mbuf - + * - restore original mbuf address and length values. + * - reset pktmbuf data and data_len to their default values. + * All other fields of the given packet mbuf will be left intact. + * + * @param m + * The attached packet mbuf. + */ +static inline void pktmbuf_detach_zcp(struct rte_mbuf *m) +{ + const struct rte_mempool *mp = m->pool; + void *buf = rte_mbuf_to_baddr(m); + uint32_t buf_ofs; + uint32_t buf_len = mp->elt_size - sizeof(*m); + m->buf_physaddr = rte_mempool_virt2phy(mp, m) + sizeof(*m); + + m->buf_addr = buf; + m->buf_len = (uint16_t)buf_len; + + buf_ofs = (RTE_PKTMBUF_HEADROOM <= m->buf_len) ? + RTE_PKTMBUF_HEADROOM : m->buf_len; + m->data_off = buf_ofs; + + m->data_len = 0; +} + +/* + * This function is called after packets have been transimited. It fetchs mbuf + * from vpool->pool, detached it and put into vpool->ring. It also update the + * used index and kick the guest if necessary. + */ +static inline uint32_t __attribute__((always_inline)) +txmbuf_clean_zcp(struct virtio_net *dev, struct vpool *vpool) +{ + struct rte_mbuf *mbuf; + struct vhost_virtqueue *vq = dev->virtqueue[VIRTIO_TXQ]; + uint32_t used_idx = vq->last_used_idx & (vq->size - 1); + uint32_t index = 0; + uint32_t mbuf_count = rte_mempool_count(vpool->pool); + + LOG_DEBUG(VHOST_DATA, + "(%"PRIu64") in txmbuf_clean_zcp: mbuf count in mempool before " + "clean is: %d\n", + dev->device_fh, mbuf_count); + LOG_DEBUG(VHOST_DATA, + "(%"PRIu64") in txmbuf_clean_zcp: mbuf count in ring before " + "clean is : %d\n", + dev->device_fh, rte_ring_count(vpool->ring)); + + for (index = 0; index < mbuf_count; index++) { + mbuf = __rte_mbuf_raw_alloc(vpool->pool); + if (likely(MBUF_EXT_MEM(mbuf))) + pktmbuf_detach_zcp(mbuf); + rte_ring_sp_enqueue(vpool->ring, mbuf); + + /* Update used index buffer information. */ + vq->used->ring[used_idx].id = MBUF_HEADROOM_UINT32(mbuf); + vq->used->ring[used_idx].len = 0; + + used_idx = (used_idx + 1) & (vq->size - 1); + } + + LOG_DEBUG(VHOST_DATA, + "(%"PRIu64") in txmbuf_clean_zcp: mbuf count in mempool after " + "clean is: %d\n", + dev->device_fh, rte_mempool_count(vpool->pool)); + LOG_DEBUG(VHOST_DATA, + "(%"PRIu64") in txmbuf_clean_zcp: mbuf count in ring after " + "clean is : %d\n", + dev->device_fh, rte_ring_count(vpool->ring)); + LOG_DEBUG(VHOST_DATA, + "(%"PRIu64") in txmbuf_clean_zcp: before updated " + "vq->last_used_idx:%d\n", + dev->device_fh, vq->last_used_idx); + + vq->last_used_idx += mbuf_count; + + LOG_DEBUG(VHOST_DATA, + "(%"PRIu64") in txmbuf_clean_zcp: after updated " + "vq->last_used_idx:%d\n", + dev->device_fh, vq->last_used_idx); + + rte_compiler_barrier(); + + *(volatile uint16_t *)&vq->used->idx += mbuf_count; + + /* Kick guest if required. */ + if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) + eventfd_write(vq->callfd, (eventfd_t)1); + + return 0; +} + +/* + * This function is called when a virtio device is destroy. + * It fetchs mbuf from vpool->pool, and detached it, and put into vpool->ring. + */ +static void mbuf_destroy_zcp(struct vpool *vpool) +{ + struct rte_mbuf *mbuf = NULL; + uint32_t index, mbuf_count = rte_mempool_count(vpool->pool); + + LOG_DEBUG(VHOST_CONFIG, + "in mbuf_destroy_zcp: mbuf count in mempool before " + "mbuf_destroy_zcp is: %d\n", + mbuf_count); + LOG_DEBUG(VHOST_CONFIG, + "in mbuf_destroy_zcp: mbuf count in ring before " + "mbuf_destroy_zcp is : %d\n", + rte_ring_count(vpool->ring)); + + for (index = 0; index < mbuf_count; index++) { + mbuf = __rte_mbuf_raw_alloc(vpool->pool); + if (likely(mbuf != NULL)) { + if (likely(MBUF_EXT_MEM(mbuf))) + pktmbuf_detach_zcp(mbuf); + rte_ring_sp_enqueue(vpool->ring, (void *)mbuf); + } + } + + LOG_DEBUG(VHOST_CONFIG, + "in mbuf_destroy_zcp: mbuf count in mempool after " + "mbuf_destroy_zcp is: %d\n", + rte_mempool_count(vpool->pool)); + LOG_DEBUG(VHOST_CONFIG, + "in mbuf_destroy_zcp: mbuf count in ring after " + "mbuf_destroy_zcp is : %d\n", + rte_ring_count(vpool->ring)); +} + +/* + * This function update the use flag and counter. + */ +static inline uint32_t __attribute__((always_inline)) +virtio_dev_rx_zcp(struct virtio_net *dev, struct rte_mbuf **pkts, + uint32_t count) +{ + struct vhost_virtqueue *vq; + struct vring_desc *desc; + struct rte_mbuf *buff; + /* The virtio_hdr is initialised to 0. */ + struct virtio_net_hdr_mrg_rxbuf virtio_hdr + = {{0, 0, 0, 0, 0, 0}, 0}; + uint64_t buff_hdr_addr = 0; + uint32_t head[MAX_PKT_BURST], packet_len = 0; + uint32_t head_idx, packet_success = 0; + uint16_t res_cur_idx; + + LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_rx()\n", dev->device_fh); + + if (count == 0) + return 0; + + vq = dev->virtqueue[VIRTIO_RXQ]; + count = (count > MAX_PKT_BURST) ? MAX_PKT_BURST : count; + + res_cur_idx = vq->last_used_idx; + LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Current Index %d| End Index %d\n", + dev->device_fh, res_cur_idx, res_cur_idx + count); + + /* Retrieve all of the head indexes first to avoid caching issues. */ + for (head_idx = 0; head_idx < count; head_idx++) + head[head_idx] = MBUF_HEADROOM_UINT32(pkts[head_idx]); + + /*Prefetch descriptor index. */ + rte_prefetch0(&vq->desc[head[packet_success]]); + + while (packet_success != count) { + /* Get descriptor from available ring */ + desc = &vq->desc[head[packet_success]]; + + buff = pkts[packet_success]; + LOG_DEBUG(VHOST_DATA, + "(%"PRIu64") in dev_rx_zcp: update the used idx for " + "pkt[%d] descriptor idx: %d\n", + dev->device_fh, packet_success, + MBUF_HEADROOM_UINT32(buff)); + + PRINT_PACKET(dev, + (uintptr_t)(((uint64_t)(uintptr_t)buff->buf_addr) + + RTE_PKTMBUF_HEADROOM), + rte_pktmbuf_data_len(buff), 0); + + /* Buffer address translation for virtio header. */ + buff_hdr_addr = gpa_to_vva(dev, desc->addr); + packet_len = rte_pktmbuf_data_len(buff) + vq->vhost_hlen; + + /* + * If the descriptors are chained the header and data are + * placed in separate buffers. + */ + if (desc->flags & VRING_DESC_F_NEXT) { + desc->len = vq->vhost_hlen; + desc = &vq->desc[desc->next]; + desc->len = rte_pktmbuf_data_len(buff); + } else { + desc->len = packet_len; + } + + /* Update used ring with desc information */ + vq->used->ring[res_cur_idx & (vq->size - 1)].id + = head[packet_success]; + vq->used->ring[res_cur_idx & (vq->size - 1)].len + = packet_len; + res_cur_idx++; + packet_success++; + + /* A header is required per buffer. */ + rte_memcpy((void *)(uintptr_t)buff_hdr_addr, + (const void *)&virtio_hdr, vq->vhost_hlen); + + PRINT_PACKET(dev, (uintptr_t)buff_hdr_addr, vq->vhost_hlen, 1); + + if (likely(packet_success < count)) { + /* Prefetch descriptor index. */ + rte_prefetch0(&vq->desc[head[packet_success]]); + } + } + + rte_compiler_barrier(); + + LOG_DEBUG(VHOST_DATA, + "(%"PRIu64") in dev_rx_zcp: before update used idx: " + "vq.last_used_idx: %d, vq->used->idx: %d\n", + dev->device_fh, vq->last_used_idx, vq->used->idx); + + *(volatile uint16_t *)&vq->used->idx += count; + vq->last_used_idx += count; + + LOG_DEBUG(VHOST_DATA, + "(%"PRIu64") in dev_rx_zcp: after update used idx: " + "vq.last_used_idx: %d, vq->used->idx: %d\n", + dev->device_fh, vq->last_used_idx, vq->used->idx); + + /* Kick the guest if necessary. */ + if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) + eventfd_write(vq->callfd, (eventfd_t)1); + + return count; +} + +/* + * This function routes the TX packet to the correct interface. + * This may be a local device or the physical port. + */ +static inline void __attribute__((always_inline)) +virtio_tx_route_zcp(struct virtio_net *dev, struct rte_mbuf *m, + uint32_t desc_idx, uint8_t need_copy) +{ + struct mbuf_table *tx_q; + struct rte_mbuf **m_table; + void *obj = NULL; + struct rte_mbuf *mbuf; + unsigned len, ret, offset = 0; + struct vpool *vpool; + uint16_t vlan_tag = (uint16_t)vlan_tags[(uint16_t)dev->device_fh]; + uint16_t vmdq_rx_q = ((struct vhost_dev *)dev->priv)->vmdq_rx_q; + + /*Add packet to the port tx queue*/ + tx_q = &tx_queue_zcp[vmdq_rx_q]; + len = tx_q->len; + + /* Allocate an mbuf and populate the structure. */ + vpool = &vpool_array[MAX_QUEUES + vmdq_rx_q]; + rte_ring_sc_dequeue(vpool->ring, &obj); + mbuf = obj; + if (unlikely(mbuf == NULL)) { + struct vhost_virtqueue *vq = dev->virtqueue[VIRTIO_TXQ]; + RTE_LOG(ERR, VHOST_DATA, + "(%"PRIu64") Failed to allocate memory for mbuf.\n", + dev->device_fh); + put_desc_to_used_list_zcp(vq, desc_idx); + return; + } + + if (vm2vm_mode == VM2VM_HARDWARE) { + /* Avoid using a vlan tag from any vm for external pkt, such as + * vlan_tags[dev->device_fh], oterwise, it conflicts when pool + * selection, MAC address determines it as an external pkt + * which should go to network, while vlan tag determine it as + * a vm2vm pkt should forward to another vm. Hardware confuse + * such a ambiguous situation, so pkt will lost. + */ + vlan_tag = external_pkt_default_vlan_tag; + if (find_local_dest(dev, m, &offset, &vlan_tag) != 0) { + MBUF_HEADROOM_UINT32(mbuf) = (uint32_t)desc_idx; + __rte_mbuf_raw_free(mbuf); + return; + } + } + + mbuf->nb_segs = m->nb_segs; + mbuf->next = m->next; + mbuf->data_len = m->data_len + offset; + mbuf->pkt_len = mbuf->data_len; + if (unlikely(need_copy)) { + /* Copy the packet contents to the mbuf. */ + rte_memcpy(rte_pktmbuf_mtod(mbuf, void *), + rte_pktmbuf_mtod(m, void *), + m->data_len); + } else { + mbuf->data_off = m->data_off; + mbuf->buf_physaddr = m->buf_physaddr; + mbuf->buf_addr = m->buf_addr; + } + mbuf->ol_flags |= PKT_TX_VLAN_PKT; + mbuf->vlan_tci = vlan_tag; + mbuf->l2_len = sizeof(struct ether_hdr); + mbuf->l3_len = sizeof(struct ipv4_hdr); + MBUF_HEADROOM_UINT32(mbuf) = (uint32_t)desc_idx; + + tx_q->m_table[len] = mbuf; + len++; + + LOG_DEBUG(VHOST_DATA, + "(%"PRIu64") in tx_route_zcp: pkt: nb_seg: %d, next:%s\n", + dev->device_fh, + mbuf->nb_segs, + (mbuf->next == NULL) ? "null" : "non-null"); + + if (enable_stats) { + dev_statistics[dev->device_fh].tx_total++; + dev_statistics[dev->device_fh].tx++; + } + + if (unlikely(len == MAX_PKT_BURST)) { + m_table = (struct rte_mbuf **)tx_q->m_table; + ret = rte_eth_tx_burst(ports[0], + (uint16_t)tx_q->txq_id, m_table, (uint16_t) len); + + /* + * Free any buffers not handled by TX and update + * the port stats. + */ + if (unlikely(ret < len)) { + do { + rte_pktmbuf_free(m_table[ret]); + } while (++ret < len); + } + + len = 0; + txmbuf_clean_zcp(dev, vpool); + } + + tx_q->len = len; + + return; +} + +/* + * This function TX all available packets in virtio TX queue for one + * virtio-net device. If it is first packet, it learns MAC address and + * setup VMDQ. + */ +static inline void __attribute__((always_inline)) +virtio_dev_tx_zcp(struct virtio_net *dev) +{ + struct rte_mbuf m; + struct vhost_virtqueue *vq; + struct vring_desc *desc; + uint64_t buff_addr = 0, phys_addr; + uint32_t head[MAX_PKT_BURST]; + uint32_t i; + uint16_t free_entries, packet_success = 0; + uint16_t avail_idx; + uint8_t need_copy = 0; + hpa_type addr_type; + struct vhost_dev *vdev = (struct vhost_dev *)dev->priv; + + vq = dev->virtqueue[VIRTIO_TXQ]; + avail_idx = *((volatile uint16_t *)&vq->avail->idx); + + /* If there are no available buffers then return. */ + if (vq->last_used_idx_res == avail_idx) + return; + + LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_tx()\n", dev->device_fh); + + /* Prefetch available ring to retrieve head indexes. */ + rte_prefetch0(&vq->avail->ring[vq->last_used_idx_res & (vq->size - 1)]); + + /* Get the number of free entries in the ring */ + free_entries = (avail_idx - vq->last_used_idx_res); + + /* Limit to MAX_PKT_BURST. */ + free_entries + = (free_entries > MAX_PKT_BURST) ? MAX_PKT_BURST : free_entries; + + LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Buffers available %d\n", + dev->device_fh, free_entries); + + /* Retrieve all of the head indexes first to avoid caching issues. */ + for (i = 0; i < free_entries; i++) + head[i] + = vq->avail->ring[(vq->last_used_idx_res + i) + & (vq->size - 1)]; + + vq->last_used_idx_res += free_entries; + + /* Prefetch descriptor index. */ + rte_prefetch0(&vq->desc[head[packet_success]]); + rte_prefetch0(&vq->used->ring[vq->last_used_idx & (vq->size - 1)]); + + while (packet_success < free_entries) { + desc = &vq->desc[head[packet_success]]; + + /* Discard first buffer as it is the virtio header */ + desc = &vq->desc[desc->next]; + + /* Buffer address translation. */ + buff_addr = gpa_to_vva(dev, desc->addr); + /* Need check extra VLAN_HLEN size for inserting VLAN tag */ + phys_addr = gpa_to_hpa(vdev, desc->addr, desc->len + VLAN_HLEN, + &addr_type); + + if (likely(packet_success < (free_entries - 1))) + /* Prefetch descriptor index. */ + rte_prefetch0(&vq->desc[head[packet_success + 1]]); + + if (unlikely(addr_type == PHYS_ADDR_INVALID)) { + RTE_LOG(ERR, VHOST_DATA, + "(%"PRIu64") Invalid frame buffer address found" + "when TX packets!\n", + dev->device_fh); + packet_success++; + continue; + } + + /* Prefetch buffer address. */ + rte_prefetch0((void *)(uintptr_t)buff_addr); + + /* + * Setup dummy mbuf. This is copied to a real mbuf if + * transmitted out the physical port. + */ + m.data_len = desc->len; + m.nb_segs = 1; + m.next = NULL; + m.data_off = 0; + m.buf_addr = (void *)(uintptr_t)buff_addr; + m.buf_physaddr = phys_addr; + + /* + * Check if the frame buffer address from guest crosses + * sub-region or not. + */ + if (unlikely(addr_type == PHYS_ADDR_CROSS_SUBREG)) { + RTE_LOG(ERR, VHOST_DATA, + "(%"PRIu64") Frame buffer address cross " + "sub-regioin found when attaching TX frame " + "buffer address!\n", + dev->device_fh); + need_copy = 1; + } else + need_copy = 0; + + PRINT_PACKET(dev, (uintptr_t)buff_addr, desc->len, 0); + + /* + * If this is the first received packet we need to learn + * the MAC and setup VMDQ + */ + if (unlikely(vdev->ready == DEVICE_MAC_LEARNING)) { + if (vdev->remove || (link_vmdq(vdev, &m) == -1)) { + /* + * Discard frame if device is scheduled for + * removal or a duplicate MAC address is found. + */ + packet_success += free_entries; + vq->last_used_idx += packet_success; + break; + } + } + + virtio_tx_route_zcp(dev, &m, head[packet_success], need_copy); + packet_success++; + } +} + +/* + * This function is called by each data core. It handles all RX/TX registered + * with the core. For TX the specific lcore linked list is used. For RX, MAC + * addresses are compared with all devices in the main linked list. + */ +static int +switch_worker_zcp(__attribute__((unused)) void *arg) +{ + struct virtio_net *dev = NULL; + struct vhost_dev *vdev = NULL; + struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; + struct virtio_net_data_ll *dev_ll; + struct mbuf_table *tx_q; + volatile struct lcore_ll_info *lcore_ll; + const uint64_t drain_tsc + = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S + * BURST_TX_DRAIN_US; + uint64_t prev_tsc, diff_tsc, cur_tsc, ret_count = 0; + unsigned ret; + const uint16_t lcore_id = rte_lcore_id(); + uint16_t count_in_ring, rx_count = 0; + + RTE_LOG(INFO, VHOST_DATA, "Procesing on Core %u started\n", lcore_id); + + lcore_ll = lcore_info[lcore_id].lcore_ll; + prev_tsc = 0; + + while (1) { + cur_tsc = rte_rdtsc(); + + /* TX burst queue drain */ + diff_tsc = cur_tsc - prev_tsc; + if (unlikely(diff_tsc > drain_tsc)) { + /* + * Get mbuf from vpool.pool and detach mbuf and + * put back into vpool.ring. + */ + dev_ll = lcore_ll->ll_root_used; + while ((dev_ll != NULL) && (dev_ll->vdev != NULL)) { + /* Get virtio device ID */ + vdev = dev_ll->vdev; + dev = vdev->dev; + + if (likely(!vdev->remove)) { + tx_q = &tx_queue_zcp[(uint16_t)vdev->vmdq_rx_q]; + if (tx_q->len) { + LOG_DEBUG(VHOST_DATA, + "TX queue drained after timeout" + " with burst size %u\n", + tx_q->len); + + /* + * Tx any packets in the queue + */ + ret = rte_eth_tx_burst( + ports[0], + (uint16_t)tx_q->txq_id, + (struct rte_mbuf **) + tx_q->m_table, + (uint16_t)tx_q->len); + if (unlikely(ret < tx_q->len)) { + do { + rte_pktmbuf_free( + tx_q->m_table[ret]); + } while (++ret < tx_q->len); + } + tx_q->len = 0; + + txmbuf_clean_zcp(dev, + &vpool_array[MAX_QUEUES+vdev->vmdq_rx_q]); + } + } + dev_ll = dev_ll->next; + } + prev_tsc = cur_tsc; + } + + rte_prefetch0(lcore_ll->ll_root_used); + + /* + * Inform the configuration core that we have exited the linked + * list and that no devices are in use if requested. + */ + if (lcore_ll->dev_removal_flag == REQUEST_DEV_REMOVAL) + lcore_ll->dev_removal_flag = ACK_DEV_REMOVAL; + + /* Process devices */ + dev_ll = lcore_ll->ll_root_used; + + while ((dev_ll != NULL) && (dev_ll->vdev != NULL)) { + vdev = dev_ll->vdev; + dev = vdev->dev; + if (unlikely(vdev->remove)) { + dev_ll = dev_ll->next; + unlink_vmdq(vdev); + vdev->ready = DEVICE_SAFE_REMOVE; + continue; + } + + if (likely(vdev->ready == DEVICE_RX)) { + uint32_t index = vdev->vmdq_rx_q; + uint16_t i; + count_in_ring + = rte_ring_count(vpool_array[index].ring); + uint16_t free_entries + = (uint16_t)get_available_ring_num_zcp(dev); + + /* + * Attach all mbufs in vpool.ring and put back + * into vpool.pool. + */ + for (i = 0; + i < RTE_MIN(free_entries, + RTE_MIN(count_in_ring, MAX_PKT_BURST)); + i++) + attach_rxmbuf_zcp(dev); + + /* Handle guest RX */ + rx_count = rte_eth_rx_burst(ports[0], + vdev->vmdq_rx_q, pkts_burst, + MAX_PKT_BURST); + + if (rx_count) { + ret_count = virtio_dev_rx_zcp(dev, + pkts_burst, rx_count); + if (enable_stats) { + dev_statistics[dev->device_fh].rx_total + += rx_count; + dev_statistics[dev->device_fh].rx + += ret_count; + } + while (likely(rx_count)) { + rx_count--; + pktmbuf_detach_zcp( + pkts_burst[rx_count]); + rte_ring_sp_enqueue( + vpool_array[index].ring, + (void *)pkts_burst[rx_count]); + } + } + } + + if (likely(!vdev->remove)) + /* Handle guest TX */ + virtio_dev_tx_zcp(dev); + + /* Move to the next device in the list */ + dev_ll = dev_ll->next; + } + } + + return 0; +} + + +/* + * Add an entry to a used linked list. A free entry must first be found + * in the free linked list using get_data_ll_free_entry(); + */ +static void +add_data_ll_entry(struct virtio_net_data_ll **ll_root_addr, + struct virtio_net_data_ll *ll_dev) +{ + struct virtio_net_data_ll *ll = *ll_root_addr; + + /* Set next as NULL and use a compiler barrier to avoid reordering. */ + ll_dev->next = NULL; + rte_compiler_barrier(); + + /* If ll == NULL then this is the first device. */ + if (ll) { + /* Increment to the tail of the linked list. */ + while ((ll->next != NULL) ) + ll = ll->next; + + ll->next = ll_dev; + } else { + *ll_root_addr = ll_dev; + } +} + +/* + * Remove an entry from a used linked list. The entry must then be added to + * the free linked list using put_data_ll_free_entry(). + */ +static void +rm_data_ll_entry(struct virtio_net_data_ll **ll_root_addr, + struct virtio_net_data_ll *ll_dev, + struct virtio_net_data_ll *ll_dev_last) +{ + struct virtio_net_data_ll *ll = *ll_root_addr; + + if (unlikely((ll == NULL) || (ll_dev == NULL))) + return; + + if (ll_dev == ll) + *ll_root_addr = ll_dev->next; + else + if (likely(ll_dev_last != NULL)) + ll_dev_last->next = ll_dev->next; + else + RTE_LOG(ERR, VHOST_CONFIG, "Remove entry form ll failed.\n"); +} + +/* + * Find and return an entry from the free linked list. + */ +static struct virtio_net_data_ll * +get_data_ll_free_entry(struct virtio_net_data_ll **ll_root_addr) +{ + struct virtio_net_data_ll *ll_free = *ll_root_addr; + struct virtio_net_data_ll *ll_dev; + + if (ll_free == NULL) + return NULL; + + ll_dev = ll_free; + *ll_root_addr = ll_free->next; + + return ll_dev; +} + +/* + * Place an entry back on to the free linked list. + */ +static void +put_data_ll_free_entry(struct virtio_net_data_ll **ll_root_addr, + struct virtio_net_data_ll *ll_dev) +{ + struct virtio_net_data_ll *ll_free = *ll_root_addr; + + if (ll_dev == NULL) + return; + + ll_dev->next = ll_free; + *ll_root_addr = ll_dev; +} + +/* + * Creates a linked list of a given size. + */ +static struct virtio_net_data_ll * +alloc_data_ll(uint32_t size) +{ + struct virtio_net_data_ll *ll_new; + uint32_t i; + + /* Malloc and then chain the linked list. */ + ll_new = malloc(size * sizeof(struct virtio_net_data_ll)); + if (ll_new == NULL) { + RTE_LOG(ERR, VHOST_CONFIG, "Failed to allocate memory for ll_new.\n"); + return NULL; + } + + for (i = 0; i < size - 1; i++) { + ll_new[i].vdev = NULL; + ll_new[i].next = &ll_new[i+1]; + } + ll_new[i].next = NULL; + + return ll_new; +} + +/* + * Create the main linked list along with each individual cores linked list. A used and a free list + * are created to manage entries. + */ +static int +init_data_ll (void) +{ + int lcore; + + RTE_LCORE_FOREACH_SLAVE(lcore) { + lcore_info[lcore].lcore_ll = malloc(sizeof(struct lcore_ll_info)); + if (lcore_info[lcore].lcore_ll == NULL) { + RTE_LOG(ERR, VHOST_CONFIG, "Failed to allocate memory for lcore_ll.\n"); + return -1; + } + + lcore_info[lcore].lcore_ll->device_num = 0; + lcore_info[lcore].lcore_ll->dev_removal_flag = ACK_DEV_REMOVAL; + lcore_info[lcore].lcore_ll->ll_root_used = NULL; + if (num_devices % num_switching_cores) + lcore_info[lcore].lcore_ll->ll_root_free = alloc_data_ll((num_devices / num_switching_cores) + 1); + else + lcore_info[lcore].lcore_ll->ll_root_free = alloc_data_ll(num_devices / num_switching_cores); + } + + /* Allocate devices up to a maximum of MAX_DEVICES. */ + ll_root_free = alloc_data_ll(MIN((num_devices), MAX_DEVICES)); + + return 0; +} + +/* + * Remove a device from the specific data core linked list and from the main linked list. Synchonization + * occurs through the use of the lcore dev_removal_flag. Device is made volatile here to avoid re-ordering + * of dev->remove=1 which can cause an infinite loop in the rte_pause loop. + */ +static void +destroy_device (volatile struct virtio_net *dev) +{ + struct virtio_net_data_ll *ll_lcore_dev_cur; + struct virtio_net_data_ll *ll_main_dev_cur; + struct virtio_net_data_ll *ll_lcore_dev_last = NULL; + struct virtio_net_data_ll *ll_main_dev_last = NULL; + struct vhost_dev *vdev; + int lcore; + + dev->flags &= ~VIRTIO_DEV_RUNNING; + + vdev = (struct vhost_dev *)dev->priv; + /*set the remove flag. */ + vdev->remove = 1; + while(vdev->ready != DEVICE_SAFE_REMOVE) { + rte_pause(); + } + + /* Search for entry to be removed from lcore ll */ + ll_lcore_dev_cur = lcore_info[vdev->coreid].lcore_ll->ll_root_used; + while (ll_lcore_dev_cur != NULL) { + if (ll_lcore_dev_cur->vdev == vdev) { + break; + } else { + ll_lcore_dev_last = ll_lcore_dev_cur; + ll_lcore_dev_cur = ll_lcore_dev_cur->next; + } + } + + if (ll_lcore_dev_cur == NULL) { + RTE_LOG(ERR, VHOST_CONFIG, + "(%"PRIu64") Failed to find the dev to be destroy.\n", + dev->device_fh); + return; + } + + /* Search for entry to be removed from main ll */ + ll_main_dev_cur = ll_root_used; + ll_main_dev_last = NULL; + while (ll_main_dev_cur != NULL) { + if (ll_main_dev_cur->vdev == vdev) { + break; + } else { + ll_main_dev_last = ll_main_dev_cur; + ll_main_dev_cur = ll_main_dev_cur->next; + } + } + + /* Remove entries from the lcore and main ll. */ + rm_data_ll_entry(&lcore_info[vdev->coreid].lcore_ll->ll_root_used, ll_lcore_dev_cur, ll_lcore_dev_last); + rm_data_ll_entry(&ll_root_used, ll_main_dev_cur, ll_main_dev_last); + + /* Set the dev_removal_flag on each lcore. */ + RTE_LCORE_FOREACH_SLAVE(lcore) { + lcore_info[lcore].lcore_ll->dev_removal_flag = REQUEST_DEV_REMOVAL; + } + + /* + * Once each core has set the dev_removal_flag to ACK_DEV_REMOVAL we can be sure that + * they can no longer access the device removed from the linked lists and that the devices + * are no longer in use. + */ + RTE_LCORE_FOREACH_SLAVE(lcore) { + while (lcore_info[lcore].lcore_ll->dev_removal_flag != ACK_DEV_REMOVAL) { + rte_pause(); + } + } + + /* Add the entries back to the lcore and main free ll.*/ + put_data_ll_free_entry(&lcore_info[vdev->coreid].lcore_ll->ll_root_free, ll_lcore_dev_cur); + put_data_ll_free_entry(&ll_root_free, ll_main_dev_cur); + + /* Decrement number of device on the lcore. */ + lcore_info[vdev->coreid].lcore_ll->device_num--; + + RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") Device has been removed from data core\n", dev->device_fh); + + if (zero_copy) { + struct vpool *vpool = &vpool_array[vdev->vmdq_rx_q]; + + /* Stop the RX queue. */ + if (rte_eth_dev_rx_queue_stop(ports[0], vdev->vmdq_rx_q) != 0) { + LOG_DEBUG(VHOST_CONFIG, + "(%"PRIu64") In destroy_device: Failed to stop " + "rx queue:%d\n", + dev->device_fh, + vdev->vmdq_rx_q); + } + + LOG_DEBUG(VHOST_CONFIG, + "(%"PRIu64") in destroy_device: Start put mbuf in " + "mempool back to ring for RX queue: %d\n", + dev->device_fh, vdev->vmdq_rx_q); + + mbuf_destroy_zcp(vpool); + + /* Stop the TX queue. */ + if (rte_eth_dev_tx_queue_stop(ports[0], vdev->vmdq_rx_q) != 0) { + LOG_DEBUG(VHOST_CONFIG, + "(%"PRIu64") In destroy_device: Failed to " + "stop tx queue:%d\n", + dev->device_fh, vdev->vmdq_rx_q); + } + + vpool = &vpool_array[vdev->vmdq_rx_q + MAX_QUEUES]; + + LOG_DEBUG(VHOST_CONFIG, + "(%"PRIu64") destroy_device: Start put mbuf in mempool " + "back to ring for TX queue: %d, dev:(%"PRIu64")\n", + dev->device_fh, (vdev->vmdq_rx_q + MAX_QUEUES), + dev->device_fh); + + mbuf_destroy_zcp(vpool); + rte_free(vdev->regions_hpa); + } + rte_free(vdev); + +} + +/* + * Calculate the region count of physical continous regions for one particular + * region of whose vhost virtual address is continous. The particular region + * start from vva_start, with size of 'size' in argument. + */ +static uint32_t +check_hpa_regions(uint64_t vva_start, uint64_t size) +{ + uint32_t i, nregions = 0, page_size = getpagesize(); + uint64_t cur_phys_addr = 0, next_phys_addr = 0; + if (vva_start % page_size) { + LOG_DEBUG(VHOST_CONFIG, + "in check_countinous: vva start(%p) mod page_size(%d) " + "has remainder\n", + (void *)(uintptr_t)vva_start, page_size); + return 0; + } + if (size % page_size) { + LOG_DEBUG(VHOST_CONFIG, + "in check_countinous: " + "size((%"PRIu64")) mod page_size(%d) has remainder\n", + size, page_size); + return 0; + } + for (i = 0; i < size - page_size; i = i + page_size) { + cur_phys_addr + = rte_mem_virt2phy((void *)(uintptr_t)(vva_start + i)); + next_phys_addr = rte_mem_virt2phy( + (void *)(uintptr_t)(vva_start + i + page_size)); + if ((cur_phys_addr + page_size) != next_phys_addr) { + ++nregions; + LOG_DEBUG(VHOST_CONFIG, + "in check_continuous: hva addr:(%p) is not " + "continuous with hva addr:(%p), diff:%d\n", + (void *)(uintptr_t)(vva_start + (uint64_t)i), + (void *)(uintptr_t)(vva_start + (uint64_t)i + + page_size), page_size); + LOG_DEBUG(VHOST_CONFIG, + "in check_continuous: hpa addr:(%p) is not " + "continuous with hpa addr:(%p), " + "diff:(%"PRIu64")\n", + (void *)(uintptr_t)cur_phys_addr, + (void *)(uintptr_t)next_phys_addr, + (next_phys_addr-cur_phys_addr)); + } + } + return nregions; +} + +/* + * Divide each region whose vhost virtual address is continous into a few + * sub-regions, make sure the physical address within each sub-region are + * continous. And fill offset(to GPA) and size etc. information of each + * sub-region into regions_hpa. + */ +static uint32_t +fill_hpa_memory_regions(struct virtio_memory_regions_hpa *mem_region_hpa, struct virtio_memory *virtio_memory) +{ + uint32_t regionidx, regionidx_hpa = 0, i, k, page_size = getpagesize(); + uint64_t cur_phys_addr = 0, next_phys_addr = 0, vva_start; + + if (mem_region_hpa == NULL) + return 0; + + for (regionidx = 0; regionidx < virtio_memory->nregions; regionidx++) { + vva_start = virtio_memory->regions[regionidx].guest_phys_address + + virtio_memory->regions[regionidx].address_offset; + mem_region_hpa[regionidx_hpa].guest_phys_address + = virtio_memory->regions[regionidx].guest_phys_address; + mem_region_hpa[regionidx_hpa].host_phys_addr_offset = + rte_mem_virt2phy((void *)(uintptr_t)(vva_start)) - + mem_region_hpa[regionidx_hpa].guest_phys_address; + LOG_DEBUG(VHOST_CONFIG, + "in fill_hpa_regions: guest phys addr start[%d]:(%p)\n", + regionidx_hpa, + (void *)(uintptr_t) + (mem_region_hpa[regionidx_hpa].guest_phys_address)); + LOG_DEBUG(VHOST_CONFIG, + "in fill_hpa_regions: host phys addr start[%d]:(%p)\n", + regionidx_hpa, + (void *)(uintptr_t) + (mem_region_hpa[regionidx_hpa].host_phys_addr_offset)); + for (i = 0, k = 0; + i < virtio_memory->regions[regionidx].memory_size - + page_size; + i += page_size) { + cur_phys_addr = rte_mem_virt2phy( + (void *)(uintptr_t)(vva_start + i)); + next_phys_addr = rte_mem_virt2phy( + (void *)(uintptr_t)(vva_start + + i + page_size)); + if ((cur_phys_addr + page_size) != next_phys_addr) { + mem_region_hpa[regionidx_hpa].guest_phys_address_end = + mem_region_hpa[regionidx_hpa].guest_phys_address + + k + page_size; + mem_region_hpa[regionidx_hpa].memory_size + = k + page_size; + LOG_DEBUG(VHOST_CONFIG, "in fill_hpa_regions: guest " + "phys addr end [%d]:(%p)\n", + regionidx_hpa, + (void *)(uintptr_t) + (mem_region_hpa[regionidx_hpa].guest_phys_address_end)); + LOG_DEBUG(VHOST_CONFIG, + "in fill_hpa_regions: guest phys addr " + "size [%d]:(%p)\n", + regionidx_hpa, + (void *)(uintptr_t) + (mem_region_hpa[regionidx_hpa].memory_size)); + mem_region_hpa[regionidx_hpa + 1].guest_phys_address + = mem_region_hpa[regionidx_hpa].guest_phys_address_end; + ++regionidx_hpa; + mem_region_hpa[regionidx_hpa].host_phys_addr_offset = + next_phys_addr - + mem_region_hpa[regionidx_hpa].guest_phys_address; + LOG_DEBUG(VHOST_CONFIG, "in fill_hpa_regions: guest" + " phys addr start[%d]:(%p)\n", + regionidx_hpa, + (void *)(uintptr_t) + (mem_region_hpa[regionidx_hpa].guest_phys_address)); + LOG_DEBUG(VHOST_CONFIG, + "in fill_hpa_regions: host phys addr " + "start[%d]:(%p)\n", + regionidx_hpa, + (void *)(uintptr_t) + (mem_region_hpa[regionidx_hpa].host_phys_addr_offset)); + k = 0; + } else { + k += page_size; + } + } + mem_region_hpa[regionidx_hpa].guest_phys_address_end + = mem_region_hpa[regionidx_hpa].guest_phys_address + + k + page_size; + mem_region_hpa[regionidx_hpa].memory_size = k + page_size; + LOG_DEBUG(VHOST_CONFIG, "in fill_hpa_regions: guest phys addr end " + "[%d]:(%p)\n", regionidx_hpa, + (void *)(uintptr_t) + (mem_region_hpa[regionidx_hpa].guest_phys_address_end)); + LOG_DEBUG(VHOST_CONFIG, "in fill_hpa_regions: guest phys addr size " + "[%d]:(%p)\n", regionidx_hpa, + (void *)(uintptr_t) + (mem_region_hpa[regionidx_hpa].memory_size)); + ++regionidx_hpa; + } + return regionidx_hpa; +} + +/* + * A new device is added to a data core. First the device is added to the main linked list + * and the allocated to a specific data core. + */ +static int +new_device (struct virtio_net *dev) +{ + struct virtio_net_data_ll *ll_dev; + int lcore, core_add = 0; + uint32_t device_num_min = num_devices; + struct vhost_dev *vdev; + uint32_t regionidx; + + vdev = rte_zmalloc("vhost device", sizeof(*vdev), RTE_CACHE_LINE_SIZE); + if (vdev == NULL) { + RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") Couldn't allocate memory for vhost dev\n", + dev->device_fh); + return -1; + } + vdev->dev = dev; + dev->priv = vdev; + + if (zero_copy) { + vdev->nregions_hpa = dev->mem->nregions; + for (regionidx = 0; regionidx < dev->mem->nregions; regionidx++) { + vdev->nregions_hpa + += check_hpa_regions( + dev->mem->regions[regionidx].guest_phys_address + + dev->mem->regions[regionidx].address_offset, + dev->mem->regions[regionidx].memory_size); + + } + + vdev->regions_hpa = rte_calloc("vhost hpa region", + vdev->nregions_hpa, + sizeof(struct virtio_memory_regions_hpa), + RTE_CACHE_LINE_SIZE); + if (vdev->regions_hpa == NULL) { + RTE_LOG(ERR, VHOST_CONFIG, "Cannot allocate memory for hpa region\n"); + rte_free(vdev); + return -1; + } + + + if (fill_hpa_memory_regions( + vdev->regions_hpa, dev->mem + ) != vdev->nregions_hpa) { + + RTE_LOG(ERR, VHOST_CONFIG, + "hpa memory regions number mismatch: " + "[%d]\n", vdev->nregions_hpa); + rte_free(vdev->regions_hpa); + rte_free(vdev); + return -1; + } + } + + + /* Add device to main ll */ + ll_dev = get_data_ll_free_entry(&ll_root_free); + if (ll_dev == NULL) { + RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") No free entry found in linked list. Device limit " + "of %d devices per core has been reached\n", + dev->device_fh, num_devices); + if (vdev->regions_hpa) + rte_free(vdev->regions_hpa); + rte_free(vdev); + return -1; + } + ll_dev->vdev = vdev; + add_data_ll_entry(&ll_root_used, ll_dev); + vdev->vmdq_rx_q + = dev->device_fh * queues_per_pool + vmdq_queue_base; + + if (zero_copy) { + uint32_t index = vdev->vmdq_rx_q; + uint32_t count_in_ring, i; + struct mbuf_table *tx_q; + + count_in_ring = rte_ring_count(vpool_array[index].ring); + + LOG_DEBUG(VHOST_CONFIG, + "(%"PRIu64") in new_device: mbuf count in mempool " + "before attach is: %d\n", + dev->device_fh, + rte_mempool_count(vpool_array[index].pool)); + LOG_DEBUG(VHOST_CONFIG, + "(%"PRIu64") in new_device: mbuf count in ring " + "before attach is : %d\n", + dev->device_fh, count_in_ring); + + /* + * Attach all mbufs in vpool.ring and put back intovpool.pool. + */ + for (i = 0; i < count_in_ring; i++) + attach_rxmbuf_zcp(dev); + + LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") in new_device: mbuf count in " + "mempool after attach is: %d\n", + dev->device_fh, + rte_mempool_count(vpool_array[index].pool)); + LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") in new_device: mbuf count in " + "ring after attach is : %d\n", + dev->device_fh, + rte_ring_count(vpool_array[index].ring)); + + tx_q = &tx_queue_zcp[(uint16_t)vdev->vmdq_rx_q]; + tx_q->txq_id = vdev->vmdq_rx_q; + + if (rte_eth_dev_tx_queue_start(ports[0], vdev->vmdq_rx_q) != 0) { + struct vpool *vpool = &vpool_array[vdev->vmdq_rx_q]; + + LOG_DEBUG(VHOST_CONFIG, + "(%"PRIu64") In new_device: Failed to start " + "tx queue:%d\n", + dev->device_fh, vdev->vmdq_rx_q); + + mbuf_destroy_zcp(vpool); + rte_free(vdev->regions_hpa); + rte_free(vdev); + return -1; + } + + if (rte_eth_dev_rx_queue_start(ports[0], vdev->vmdq_rx_q) != 0) { + struct vpool *vpool = &vpool_array[vdev->vmdq_rx_q]; + + LOG_DEBUG(VHOST_CONFIG, + "(%"PRIu64") In new_device: Failed to start " + "rx queue:%d\n", + dev->device_fh, vdev->vmdq_rx_q); + + /* Stop the TX queue. */ + if (rte_eth_dev_tx_queue_stop(ports[0], + vdev->vmdq_rx_q) != 0) { + LOG_DEBUG(VHOST_CONFIG, + "(%"PRIu64") In new_device: Failed to " + "stop tx queue:%d\n", + dev->device_fh, vdev->vmdq_rx_q); + } + + mbuf_destroy_zcp(vpool); + rte_free(vdev->regions_hpa); + rte_free(vdev); + return -1; + } + + } + + /*reset ready flag*/ + vdev->ready = DEVICE_MAC_LEARNING; + vdev->remove = 0; + + /* Find a suitable lcore to add the device. */ + RTE_LCORE_FOREACH_SLAVE(lcore) { + if (lcore_info[lcore].lcore_ll->device_num < device_num_min) { + device_num_min = lcore_info[lcore].lcore_ll->device_num; + core_add = lcore; + } + } + /* Add device to lcore ll */ + ll_dev = get_data_ll_free_entry(&lcore_info[core_add].lcore_ll->ll_root_free); + if (ll_dev == NULL) { + RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") Failed to add device to data core\n", dev->device_fh); + vdev->ready = DEVICE_SAFE_REMOVE; + destroy_device(dev); + rte_free(vdev->regions_hpa); + rte_free(vdev); + return -1; + } + ll_dev->vdev = vdev; + vdev->coreid = core_add; + + add_data_ll_entry(&lcore_info[vdev->coreid].lcore_ll->ll_root_used, ll_dev); + + /* Initialize device stats */ + memset(&dev_statistics[dev->device_fh], 0, sizeof(struct device_statistics)); + + /* Disable notifications. */ + rte_vhost_enable_guest_notification(dev, VIRTIO_RXQ, 0); + rte_vhost_enable_guest_notification(dev, VIRTIO_TXQ, 0); + lcore_info[vdev->coreid].lcore_ll->device_num++; + dev->flags |= VIRTIO_DEV_RUNNING; + + RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") Device has been added to data core %d\n", dev->device_fh, vdev->coreid); + + return 0; +} + +/* + * These callback allow devices to be added to the data core when configuration + * has been fully complete. + */ +static const struct virtio_net_device_ops virtio_net_device_ops = +{ + .new_device = new_device, + .destroy_device = destroy_device, +}; + +/* + * This is a thread will wake up after a period to print stats if the user has + * enabled them. + */ +static void +print_stats(void) +{ + struct virtio_net_data_ll *dev_ll; + uint64_t tx_dropped, rx_dropped; + uint64_t tx, tx_total, rx, rx_total; + uint32_t device_fh; + const char clr[] = { 27, '[', '2', 'J', '\0' }; + const char top_left[] = { 27, '[', '1', ';', '1', 'H','\0' }; + + while(1) { + sleep(enable_stats); + + /* Clear screen and move to top left */ + printf("%s%s", clr, top_left); + + printf("\nDevice statistics ===================================="); + + dev_ll = ll_root_used; + while (dev_ll != NULL) { + device_fh = (uint32_t)dev_ll->vdev->dev->device_fh; + tx_total = dev_statistics[device_fh].tx_total; + tx = dev_statistics[device_fh].tx; + tx_dropped = tx_total - tx; + if (zero_copy == 0) { + rx_total = rte_atomic64_read( + &dev_statistics[device_fh].rx_total_atomic); + rx = rte_atomic64_read( + &dev_statistics[device_fh].rx_atomic); + } else { + rx_total = dev_statistics[device_fh].rx_total; + rx = dev_statistics[device_fh].rx; + } + rx_dropped = rx_total - rx; + + printf("\nStatistics for device %"PRIu32" ------------------------------" + "\nTX total: %"PRIu64"" + "\nTX dropped: %"PRIu64"" + "\nTX successful: %"PRIu64"" + "\nRX total: %"PRIu64"" + "\nRX dropped: %"PRIu64"" + "\nRX successful: %"PRIu64"", + device_fh, + tx_total, + tx_dropped, + tx, + rx_total, + rx_dropped, + rx); + + dev_ll = dev_ll->next; + } + printf("\n======================================================\n"); + } +} + +static void +setup_mempool_tbl(int socket, uint32_t index, char *pool_name, + char *ring_name, uint32_t nb_mbuf) +{ + vpool_array[index].pool = rte_pktmbuf_pool_create(pool_name, nb_mbuf, + MBUF_CACHE_SIZE_ZCP, 0, MBUF_DATA_SIZE_ZCP, socket); + if (vpool_array[index].pool != NULL) { + vpool_array[index].ring + = rte_ring_create(ring_name, + rte_align32pow2(nb_mbuf + 1), + socket, RING_F_SP_ENQ | RING_F_SC_DEQ); + if (likely(vpool_array[index].ring != NULL)) { + LOG_DEBUG(VHOST_CONFIG, + "in setup_mempool_tbl: mbuf count in " + "mempool is: %d\n", + rte_mempool_count(vpool_array[index].pool)); + LOG_DEBUG(VHOST_CONFIG, + "in setup_mempool_tbl: mbuf count in " + "ring is: %d\n", + rte_ring_count(vpool_array[index].ring)); + } else { + rte_exit(EXIT_FAILURE, "ring_create(%s) failed", + ring_name); + } + + /* Need consider head room. */ + vpool_array[index].buf_size = VIRTIO_DESCRIPTOR_LEN_ZCP; + } else { + rte_exit(EXIT_FAILURE, "mempool_create(%s) failed", pool_name); + } +} + +/* When we receive a INT signal, unregister vhost driver */ +static void +sigint_handler(__rte_unused int signum) +{ + /* Unregister vhost driver. */ + int ret = rte_vhost_driver_unregister((char *)&dev_basename); + if (ret != 0) + rte_exit(EXIT_FAILURE, "vhost driver unregister failure.\n"); + exit(0); +} + +/* + * Main function, does initialisation and calls the per-lcore functions. The CUSE + * device is also registered here to handle the IOCTLs. + */ +int +main(int argc, char *argv[]) +{ + struct rte_mempool *mbuf_pool = NULL; + unsigned lcore_id, core_id = 0; + unsigned nb_ports, valid_num_ports; + int ret; + uint8_t portid; + uint16_t queue_id; + static pthread_t tid; + char thread_name[RTE_MAX_THREAD_NAME_LEN]; + + signal(SIGINT, sigint_handler); + + /* init EAL */ + ret = rte_eal_init(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Error with EAL initialization\n"); + argc -= ret; + argv += ret; + + /* parse app arguments */ + ret = us_vhost_parse_args(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Invalid argument\n"); + + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id ++) + if (rte_lcore_is_enabled(lcore_id)) + lcore_ids[core_id ++] = lcore_id; + + if (rte_lcore_count() > RTE_MAX_LCORE) + rte_exit(EXIT_FAILURE,"Not enough cores\n"); + + /*set the number of swithcing cores available*/ + num_switching_cores = rte_lcore_count()-1; + + /* Get the number of physical ports. */ + nb_ports = rte_eth_dev_count(); + if (nb_ports > RTE_MAX_ETHPORTS) + nb_ports = RTE_MAX_ETHPORTS; + + /* + * Update the global var NUM_PORTS and global array PORTS + * and get value of var VALID_NUM_PORTS according to system ports number + */ + valid_num_ports = check_ports_num(nb_ports); + + if ((valid_num_ports == 0) || (valid_num_ports > MAX_SUP_PORTS)) { + RTE_LOG(INFO, VHOST_PORT, "Current enabled port number is %u," + "but only %u port can be enabled\n",num_ports, MAX_SUP_PORTS); + return -1; + } + + if (zero_copy == 0) { + /* Create the mbuf pool. */ + mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL", + NUM_MBUFS_PER_PORT * valid_num_ports, MBUF_CACHE_SIZE, + 0, MBUF_DATA_SIZE, rte_socket_id()); + if (mbuf_pool == NULL) + rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n"); + + for (queue_id = 0; queue_id < MAX_QUEUES + 1; queue_id++) + vpool_array[queue_id].pool = mbuf_pool; + + if (vm2vm_mode == VM2VM_HARDWARE) { + /* Enable VT loop back to let L2 switch to do it. */ + vmdq_conf_default.rx_adv_conf.vmdq_rx_conf.enable_loop_back = 1; + LOG_DEBUG(VHOST_CONFIG, + "Enable loop back for L2 switch in vmdq.\n"); + } + } else { + uint32_t nb_mbuf; + char pool_name[RTE_MEMPOOL_NAMESIZE]; + char ring_name[RTE_MEMPOOL_NAMESIZE]; + + nb_mbuf = num_rx_descriptor + + num_switching_cores * MBUF_CACHE_SIZE_ZCP + + num_switching_cores * MAX_PKT_BURST; + + for (queue_id = 0; queue_id < MAX_QUEUES; queue_id++) { + snprintf(pool_name, sizeof(pool_name), + "rxmbuf_pool_%u", queue_id); + snprintf(ring_name, sizeof(ring_name), + "rxmbuf_ring_%u", queue_id); + setup_mempool_tbl(rte_socket_id(), queue_id, + pool_name, ring_name, nb_mbuf); + } + + nb_mbuf = num_tx_descriptor + + num_switching_cores * MBUF_CACHE_SIZE_ZCP + + num_switching_cores * MAX_PKT_BURST; + + for (queue_id = 0; queue_id < MAX_QUEUES; queue_id++) { + snprintf(pool_name, sizeof(pool_name), + "txmbuf_pool_%u", queue_id); + snprintf(ring_name, sizeof(ring_name), + "txmbuf_ring_%u", queue_id); + setup_mempool_tbl(rte_socket_id(), + (queue_id + MAX_QUEUES), + pool_name, ring_name, nb_mbuf); + } + + if (vm2vm_mode == VM2VM_HARDWARE) { + /* Enable VT loop back to let L2 switch to do it. */ + vmdq_conf_default.rx_adv_conf.vmdq_rx_conf.enable_loop_back = 1; + LOG_DEBUG(VHOST_CONFIG, + "Enable loop back for L2 switch in vmdq.\n"); + } + } + /* Set log level. */ + rte_set_log_level(LOG_LEVEL); + + /* initialize all ports */ + for (portid = 0; portid < nb_ports; portid++) { + /* skip ports that are not enabled */ + if ((enabled_port_mask & (1 << portid)) == 0) { + RTE_LOG(INFO, VHOST_PORT, + "Skipping disabled port %d\n", portid); + continue; + } + if (port_init(portid) != 0) + rte_exit(EXIT_FAILURE, + "Cannot initialize network ports\n"); + } + + /* Initialise all linked lists. */ + if (init_data_ll() == -1) + rte_exit(EXIT_FAILURE, "Failed to initialize linked list\n"); + + /* Initialize device stats */ + memset(&dev_statistics, 0, sizeof(dev_statistics)); + + /* Enable stats if the user option is set. */ + if (enable_stats) { + ret = pthread_create(&tid, NULL, (void *)print_stats, NULL); + if (ret != 0) + rte_exit(EXIT_FAILURE, + "Cannot create print-stats thread\n"); + + /* Set thread_name for aid in debugging. */ + snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "print-stats"); + ret = rte_thread_setname(tid, thread_name); + if (ret != 0) + RTE_LOG(ERR, VHOST_CONFIG, + "Cannot set print-stats name\n"); + } + + /* Launch all data cores. */ + if (zero_copy == 0) { + RTE_LCORE_FOREACH_SLAVE(lcore_id) { + rte_eal_remote_launch(switch_worker, + mbuf_pool, lcore_id); + } + } else { + uint32_t count_in_mempool, index, i; + for (index = 0; index < 2*MAX_QUEUES; index++) { + /* For all RX and TX queues. */ + count_in_mempool + = rte_mempool_count(vpool_array[index].pool); + + /* + * Transfer all un-attached mbufs from vpool.pool + * to vpoo.ring. + */ + for (i = 0; i < count_in_mempool; i++) { + struct rte_mbuf *mbuf + = __rte_mbuf_raw_alloc( + vpool_array[index].pool); + rte_ring_sp_enqueue(vpool_array[index].ring, + (void *)mbuf); + } + + LOG_DEBUG(VHOST_CONFIG, + "in main: mbuf count in mempool at initial " + "is: %d\n", count_in_mempool); + LOG_DEBUG(VHOST_CONFIG, + "in main: mbuf count in ring at initial is :" + " %d\n", + rte_ring_count(vpool_array[index].ring)); + } + + RTE_LCORE_FOREACH_SLAVE(lcore_id) + rte_eal_remote_launch(switch_worker_zcp, NULL, + lcore_id); + } + + if (mergeable == 0) + rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_MRG_RXBUF); + + /* Register vhost(cuse or user) driver to handle vhost messages. */ + ret = rte_vhost_driver_register((char *)&dev_basename); + if (ret != 0) + rte_exit(EXIT_FAILURE, "vhost driver register failure.\n"); + + rte_vhost_driver_callback_register(&virtio_net_device_ops); + + /* Start CUSE session. */ + rte_vhost_driver_session_start(); + return 0; + +} diff --git a/examples/vhost/main.h b/examples/vhost/main.h new file mode 100644 index 00000000..d04e2be2 --- /dev/null +++ b/examples/vhost/main.h @@ -0,0 +1,115 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _MAIN_H_ +#define _MAIN_H_ + +//#define DEBUG + +#ifdef DEBUG +#define LOG_LEVEL RTE_LOG_DEBUG +#define LOG_DEBUG(log_type, fmt, args...) do { \ + RTE_LOG(DEBUG, log_type, fmt, ##args); \ +} while (0) +#else +#define LOG_LEVEL RTE_LOG_INFO +#define LOG_DEBUG(log_type, fmt, args...) do{} while(0) +#endif + +/* Macros for printing using RTE_LOG */ +#define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1 +#define RTE_LOGTYPE_VHOST_DATA RTE_LOGTYPE_USER2 +#define RTE_LOGTYPE_VHOST_PORT RTE_LOGTYPE_USER3 + +/** + * Information relating to memory regions including offsets to + * addresses in host physical space. + */ +struct virtio_memory_regions_hpa { + /**< Base guest physical address of region. */ + uint64_t guest_phys_address; + /**< End guest physical address of region. */ + uint64_t guest_phys_address_end; + /**< Size of region. */ + uint64_t memory_size; + /**< Offset of region for gpa to hpa translation. */ + uint64_t host_phys_addr_offset; +}; + +/* + * Device linked list structure for data path. + */ +struct vhost_dev { + /**< Pointer to device created by vhost lib. */ + struct virtio_net *dev; + /**< Number of memory regions for gpa to hpa translation. */ + uint32_t nregions_hpa; + /**< Memory region information for gpa to hpa translation. */ + struct virtio_memory_regions_hpa *regions_hpa; + /**< Device MAC address (Obtained on first TX packet). */ + struct ether_addr mac_address; + /**< RX VMDQ queue number. */ + uint16_t vmdq_rx_q; + /**< Vlan tag assigned to the pool */ + uint32_t vlan_tag; + /**< Data core that the device is added to. */ + uint16_t coreid; + /**< A device is set as ready if the MAC address has been set. */ + volatile uint8_t ready; + /**< Device is marked for removal from the data core. */ + volatile uint8_t remove; +} __rte_cache_aligned; + +struct virtio_net_data_ll +{ + struct vhost_dev *vdev; /* Pointer to device created by configuration core. */ + struct virtio_net_data_ll *next; /* Pointer to next device in linked list. */ +}; + +/* + * Structure containing data core specific information. + */ +struct lcore_ll_info +{ + struct virtio_net_data_ll *ll_root_free; /* Pointer to head in free linked list. */ + struct virtio_net_data_ll *ll_root_used; /* Pointer to head of used linked list. */ + uint32_t device_num; /* Number of devices on lcore. */ + volatile uint8_t dev_removal_flag; /* Flag to synchronize device removal. */ +}; + +struct lcore_info +{ + struct lcore_ll_info *lcore_ll; /* Pointer to data core specific lcore_ll_info struct */ +}; + +#endif /* _MAIN_H_ */ diff --git a/examples/vhost_xen/Makefile b/examples/vhost_xen/Makefile new file mode 100644 index 00000000..47e14898 --- /dev/null +++ b/examples/vhost_xen/Makefile @@ -0,0 +1,52 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overriden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +# binary name +APP = vhost-switch + +# all source are stored in SRCS-y +SRCS-y := main.c vhost_monitor.c xenstore_parse.c + +CFLAGS += -O2 -I/usr/local/include -D_FILE_OFFSET_BITS=64 -Wno-unused-parameter +CFLAGS += $(WERROR_FLAGS) +CFLAGS += -D_GNU_SOURCE +LDFLAGS += -lxenstore + +include $(RTE_SDK)/mk/rte.extapp.mk diff --git a/examples/vhost_xen/main.c b/examples/vhost_xen/main.c new file mode 100644 index 00000000..d83138d6 --- /dev/null +++ b/examples/vhost_xen/main.c @@ -0,0 +1,1530 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <arpa/inet.h> +#include <getopt.h> +#include <linux/if_ether.h> +#include <linux/if_vlan.h> +#include <linux/virtio_net.h> +#include <linux/virtio_ring.h> +#include <signal.h> +#include <stdint.h> +#include <sys/eventfd.h> +#include <sys/param.h> +#include <unistd.h> + +#include <rte_atomic.h> +#include <rte_cycles.h> +#include <rte_ethdev.h> +#include <rte_log.h> +#include <rte_string_fns.h> + +#include "main.h" +#include "virtio-net.h" +#include "xen_vhost.h" + +#define MAX_QUEUES 128 + +/* the maximum number of external ports supported */ +#define MAX_SUP_PORTS 1 + +/* + * Calculate the number of buffers needed per port + */ +#define NUM_MBUFS_PER_PORT ((MAX_QUEUES*RTE_TEST_RX_DESC_DEFAULT) + \ + (num_switching_cores*MAX_PKT_BURST) + \ + (num_switching_cores*RTE_TEST_TX_DESC_DEFAULT) +\ + (num_switching_cores*MBUF_CACHE_SIZE)) + +#define MBUF_CACHE_SIZE 64 + +/* + * RX and TX Prefetch, Host, and Write-back threshold values should be + * carefully set for optimal performance. Consult the network + * controller's datasheet and supporting DPDK documentation for guidance + * on how these parameters should be set. + */ +#define RX_PTHRESH 8 /* Default values of RX prefetch threshold reg. */ +#define RX_HTHRESH 8 /* Default values of RX host threshold reg. */ +#define RX_WTHRESH 4 /* Default values of RX write-back threshold reg. */ + +/* + * These default values are optimized for use with the Intel(R) 82599 10 GbE + * Controller and the DPDK ixgbe PMD. Consider using other values for other + * network controllers and/or network drivers. + */ +#define TX_PTHRESH 36 /* Default values of TX prefetch threshold reg. */ +#define TX_HTHRESH 0 /* Default values of TX host threshold reg. */ +#define TX_WTHRESH 0 /* Default values of TX write-back threshold reg. */ + +#define MAX_PKT_BURST 32 /* Max burst size for RX/TX */ +#define MAX_MRG_PKT_BURST 16 /* Max burst for merge buffers. Set to 1 due to performance issue. */ +#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */ + +/* State of virtio device. */ +#define DEVICE_NOT_READY 0 +#define DEVICE_READY 1 +#define DEVICE_SAFE_REMOVE 2 + +/* Config_core_flag status definitions. */ +#define REQUEST_DEV_REMOVAL 1 +#define ACK_DEV_REMOVAL 0 + +/* Configurable number of RX/TX ring descriptors */ +#define RTE_TEST_RX_DESC_DEFAULT 128 +#define RTE_TEST_TX_DESC_DEFAULT 512 + +#define INVALID_PORT_ID 0xFF + +/* Max number of devices. Limited by vmdq. */ +#define MAX_DEVICES 64 + +/* Size of buffers used for snprintfs. */ +#define MAX_PRINT_BUFF 6072 + + +/* Maximum long option length for option parsing. */ +#define MAX_LONG_OPT_SZ 64 + +/* Used to compare MAC addresses. */ +#define MAC_ADDR_CMP 0xFFFFFFFFFFFF + +/* mask of enabled ports */ +static uint32_t enabled_port_mask = 0; + +/*Number of switching cores enabled*/ +static uint32_t num_switching_cores = 0; + +/* number of devices/queues to support*/ +static uint32_t num_queues = 0; +uint32_t num_devices = 0; + +/* Enable VM2VM communications. If this is disabled then the MAC address compare is skipped. */ +static uint32_t enable_vm2vm = 1; +/* Enable stats. */ +static uint32_t enable_stats = 0; + +/* empty vmdq configuration structure. Filled in programatically */ +static const struct rte_eth_conf vmdq_conf_default = { + .rxmode = { + .mq_mode = ETH_MQ_RX_VMDQ_ONLY, + .split_hdr_size = 0, + .header_split = 0, /**< Header Split disabled */ + .hw_ip_checksum = 0, /**< IP checksum offload disabled */ + .hw_vlan_filter = 0, /**< VLAN filtering disabled */ + /* + * It is necessary for 1G NIC such as I350, + * this fixes bug of ipv4 forwarding in guest can't + * forward pakets from one virtio dev to another virtio dev. + */ + .hw_vlan_strip = 1, /**< VLAN strip enabled. */ + .jumbo_frame = 0, /**< Jumbo Frame Support disabled */ + .hw_strip_crc = 0, /**< CRC stripped by hardware */ + }, + + .txmode = { + .mq_mode = ETH_MQ_TX_NONE, + }, + .rx_adv_conf = { + /* + * should be overridden separately in code with + * appropriate values + */ + .vmdq_rx_conf = { + .nb_queue_pools = ETH_8_POOLS, + .enable_default_pool = 0, + .default_pool = 0, + .nb_pool_maps = 0, + .pool_map = {{0, 0},}, + }, + }, +}; + +static unsigned lcore_ids[RTE_MAX_LCORE]; +static uint8_t ports[RTE_MAX_ETHPORTS]; +static unsigned num_ports = 0; /**< The number of ports specified in command line */ + +const uint16_t vlan_tags[] = { + 1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007, + 1008, 1009, 1010, 1011, 1012, 1013, 1014, 1015, + 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023, + 1024, 1025, 1026, 1027, 1028, 1029, 1030, 1031, + 1032, 1033, 1034, 1035, 1036, 1037, 1038, 1039, + 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047, + 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055, + 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063, +}; + +/* ethernet addresses of ports */ +static struct ether_addr vmdq_ports_eth_addr[RTE_MAX_ETHPORTS]; + +/* heads for the main used and free linked lists for the data path. */ +static struct virtio_net_data_ll *ll_root_used = NULL; +static struct virtio_net_data_ll *ll_root_free = NULL; + +/* Array of data core structures containing information on individual core linked lists. */ +static struct lcore_info lcore_info[RTE_MAX_LCORE]; + +/* Used for queueing bursts of TX packets. */ +struct mbuf_table { + unsigned len; + unsigned txq_id; + struct rte_mbuf *m_table[MAX_PKT_BURST]; +}; + +/* TX queue for each data core. */ +struct mbuf_table lcore_tx_queue[RTE_MAX_LCORE]; + +/* Vlan header struct used to insert vlan tags on TX. */ +struct vlan_ethhdr { + unsigned char h_dest[ETH_ALEN]; + unsigned char h_source[ETH_ALEN]; + __be16 h_vlan_proto; + __be16 h_vlan_TCI; + __be16 h_vlan_encapsulated_proto; +}; + +/* Header lengths. */ +#define VLAN_HLEN 4 +#define VLAN_ETH_HLEN 18 + +/* Per-device statistics struct */ +struct device_statistics { + uint64_t tx_total; + rte_atomic64_t rx_total; + uint64_t tx; + rte_atomic64_t rx; +} __rte_cache_aligned; +struct device_statistics dev_statistics[MAX_DEVICES]; + +/* + * Builds up the correct configuration for VMDQ VLAN pool map + * according to the pool & queue limits. + */ +static inline int +get_eth_conf(struct rte_eth_conf *eth_conf, uint32_t num_devices) +{ + struct rte_eth_vmdq_rx_conf conf; + unsigned i; + + memset(&conf, 0, sizeof(conf)); + conf.nb_queue_pools = (enum rte_eth_nb_pools)num_devices; + conf.nb_pool_maps = num_devices; + + for (i = 0; i < conf.nb_pool_maps; i++) { + conf.pool_map[i].vlan_id = vlan_tags[ i ]; + conf.pool_map[i].pools = (1UL << i); + } + + (void)(rte_memcpy(eth_conf, &vmdq_conf_default, sizeof(*eth_conf))); + (void)(rte_memcpy(ð_conf->rx_adv_conf.vmdq_rx_conf, &conf, + sizeof(eth_conf->rx_adv_conf.vmdq_rx_conf))); + return 0; +} + +/* + * Validate the device number according to the max pool number gotten form dev_info + * If the device number is invalid, give the error message and return -1. + * Each device must have its own pool. + */ +static inline int +validate_num_devices(uint32_t max_nb_devices) +{ + if (num_devices > max_nb_devices) { + RTE_LOG(ERR, VHOST_PORT, "invalid number of devices\n"); + return -1; + } + return 0; +} + +/* + * Initialises a given port using global settings and with the rx buffers + * coming from the mbuf_pool passed as parameter + */ +static inline int +port_init(uint8_t port, struct rte_mempool *mbuf_pool) +{ + struct rte_eth_dev_info dev_info; + struct rte_eth_rxconf *rxconf; + struct rte_eth_conf port_conf; + uint16_t rx_rings, tx_rings = (uint16_t)rte_lcore_count(); + const uint16_t rx_ring_size = RTE_TEST_RX_DESC_DEFAULT, tx_ring_size = RTE_TEST_TX_DESC_DEFAULT; + int retval; + uint16_t q; + + /* The max pool number from dev_info will be used to validate the pool number specified in cmd line */ + rte_eth_dev_info_get (port, &dev_info); + + /*configure the number of supported virtio devices based on VMDQ limits */ + num_devices = dev_info.max_vmdq_pools; + num_queues = dev_info.max_rx_queues; + + retval = validate_num_devices(MAX_DEVICES); + if (retval < 0) + return retval; + + /* Get port configuration. */ + retval = get_eth_conf(&port_conf, num_devices); + if (retval < 0) + return retval; + + if (port >= rte_eth_dev_count()) return -1; + + rx_rings = (uint16_t)num_queues, + /* Configure ethernet device. */ + retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf); + if (retval != 0) + return retval; + + rte_eth_dev_info_get(port, &dev_info); + rxconf = &dev_info.default_rxconf; + rxconf->rx_drop_en = 1; + /* Setup the queues. */ + for (q = 0; q < rx_rings; q ++) { + retval = rte_eth_rx_queue_setup(port, q, rx_ring_size, + rte_eth_dev_socket_id(port), rxconf, + mbuf_pool); + if (retval < 0) + return retval; + } + for (q = 0; q < tx_rings; q ++) { + retval = rte_eth_tx_queue_setup(port, q, tx_ring_size, + rte_eth_dev_socket_id(port), + NULL); + if (retval < 0) + return retval; + } + + /* Start the device. */ + retval = rte_eth_dev_start(port); + if (retval < 0) + return retval; + + rte_eth_macaddr_get(port, &vmdq_ports_eth_addr[port]); + RTE_LOG(INFO, VHOST_PORT, "Max virtio devices supported: %u\n", num_devices); + RTE_LOG(INFO, VHOST_PORT, "Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8 + " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n", + (unsigned)port, + vmdq_ports_eth_addr[port].addr_bytes[0], + vmdq_ports_eth_addr[port].addr_bytes[1], + vmdq_ports_eth_addr[port].addr_bytes[2], + vmdq_ports_eth_addr[port].addr_bytes[3], + vmdq_ports_eth_addr[port].addr_bytes[4], + vmdq_ports_eth_addr[port].addr_bytes[5]); + + return 0; +} + +/* + * Parse the portmask provided at run time. + */ +static int +parse_portmask(const char *portmask) +{ + char *end = NULL; + unsigned long pm; + + errno = 0; + + /* parse hexadecimal string */ + pm = strtoul(portmask, &end, 16); + if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0') || (errno != 0)) + return -1; + + if (pm == 0) + return -1; + + return pm; + +} + +/* + * Parse num options at run time. + */ +static int +parse_num_opt(const char *q_arg, uint32_t max_valid_value) +{ + char *end = NULL; + unsigned long num; + + errno = 0; + + /* parse unsigned int string */ + num = strtoul(q_arg, &end, 10); + if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0') || (errno != 0)) + return -1; + + if (num > max_valid_value) + return -1; + + return num; + +} + +/* + * Display usage + */ +static void +us_vhost_usage(const char *prgname) +{ + RTE_LOG(INFO, VHOST_CONFIG, "%s [EAL options] -- -p PORTMASK --vm2vm [0|1] --stats [0-N] --nb-devices ND\n" + " -p PORTMASK: Set mask for ports to be used by application\n" + " --vm2vm [0|1]: disable/enable(default) vm2vm comms\n" + " --stats [0-N]: 0: Disable stats, N: Time in seconds to print stats\n", + prgname); +} + +/* + * Parse the arguments given in the command line of the application. + */ +static int +us_vhost_parse_args(int argc, char **argv) +{ + int opt, ret; + int option_index; + unsigned i; + const char *prgname = argv[0]; + static struct option long_option[] = { + {"vm2vm", required_argument, NULL, 0}, + {"stats", required_argument, NULL, 0}, + {NULL, 0, 0, 0} + }; + + /* Parse command line */ + while ((opt = getopt_long(argc, argv, "p:",long_option, &option_index)) != EOF) { + switch (opt) { + /* Portmask */ + case 'p': + enabled_port_mask = parse_portmask(optarg); + if (enabled_port_mask == 0) { + RTE_LOG(INFO, VHOST_CONFIG, "Invalid portmask\n"); + us_vhost_usage(prgname); + return -1; + } + break; + + case 0: + /* Enable/disable vm2vm comms. */ + if (!strncmp(long_option[option_index].name, "vm2vm", MAX_LONG_OPT_SZ)) { + ret = parse_num_opt(optarg, 1); + if (ret == -1) { + RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for vm2vm [0|1]\n"); + us_vhost_usage(prgname); + return -1; + } else { + enable_vm2vm = ret; + } + } + + /* Enable/disable stats. */ + if (!strncmp(long_option[option_index].name, "stats", MAX_LONG_OPT_SZ)) { + ret = parse_num_opt(optarg, INT32_MAX); + if (ret == -1) { + RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for stats [0..N]\n"); + us_vhost_usage(prgname); + return -1; + } else { + enable_stats = ret; + } + } + break; + + /* Invalid option - print options. */ + default: + us_vhost_usage(prgname); + return -1; + } + } + + for (i = 0; i < RTE_MAX_ETHPORTS; i++) { + if (enabled_port_mask & (1 << i)) + ports[num_ports++] = (uint8_t)i; + } + + if ((num_ports == 0) || (num_ports > MAX_SUP_PORTS)) { + RTE_LOG(INFO, VHOST_PORT, "Current enabled port number is %u," + "but only %u port can be enabled\n",num_ports, MAX_SUP_PORTS); + return -1; + } + + return 0; +} + +/* + * Update the global var NUM_PORTS and array PORTS according to system ports number + * and return valid ports number + */ +static unsigned check_ports_num(unsigned nb_ports) +{ + unsigned valid_num_ports = num_ports; + unsigned portid; + + if (num_ports > nb_ports) { + RTE_LOG(INFO, VHOST_PORT, "\nSpecified port number(%u) exceeds total system port number(%u)\n", + num_ports, nb_ports); + num_ports = nb_ports; + } + + for (portid = 0; portid < num_ports; portid ++) { + if (ports[portid] >= nb_ports) { + RTE_LOG(INFO, VHOST_PORT, "\nSpecified port ID(%u) exceeds max system port ID(%u)\n", + ports[portid], (nb_ports - 1)); + ports[portid] = INVALID_PORT_ID; + valid_num_ports--; + } + } + return valid_num_ports; +} + +/* + * Macro to print out packet contents. Wrapped in debug define so that the + * data path is not effected when debug is disabled. + */ +#ifdef DEBUG +#define PRINT_PACKET(device, addr, size, header) do { \ + char *pkt_addr = (char*)(addr); \ + unsigned int index; \ + char packet[MAX_PRINT_BUFF]; \ + \ + if ((header)) \ + snprintf(packet, MAX_PRINT_BUFF, "(%"PRIu64") Header size %d: ", (device->device_fh), (size)); \ + else \ + snprintf(packet, MAX_PRINT_BUFF, "(%"PRIu64") Packet size %d: ", (device->device_fh), (size)); \ + for (index = 0; index < (size); index++) { \ + snprintf(packet + strnlen(packet, MAX_PRINT_BUFF), MAX_PRINT_BUFF - strnlen(packet, MAX_PRINT_BUFF), \ + "%02hhx ", pkt_addr[index]); \ + } \ + snprintf(packet + strnlen(packet, MAX_PRINT_BUFF), MAX_PRINT_BUFF - strnlen(packet, MAX_PRINT_BUFF), "\n"); \ + \ + LOG_DEBUG(VHOST_DATA, "%s", packet); \ +} while(0) +#else +#define PRINT_PACKET(device, addr, size, header) do{} while(0) +#endif + +/* + * Function to convert guest physical addresses to vhost virtual addresses. This + * is used to convert virtio buffer addresses. + */ +static inline uint64_t __attribute__((always_inline)) +gpa_to_vva(struct virtio_net *dev, uint64_t guest_pa) +{ + struct virtio_memory_regions *region; + uint32_t regionidx; + uint64_t vhost_va = 0; + + for (regionidx = 0; regionidx < dev->mem->nregions; regionidx++) { + region = &dev->mem->regions[regionidx]; + if ((guest_pa >= region->guest_phys_address) && + (guest_pa <= region->guest_phys_address_end)) { + vhost_va = region->address_offset + guest_pa; + break; + } + } + LOG_DEBUG(VHOST_DATA, "(%"PRIu64") GPA %p| VVA %p\n", + dev->device_fh, (void*)(uintptr_t)guest_pa, (void*)(uintptr_t)vhost_va); + + return vhost_va; +} + +/* + * This function adds buffers to the virtio devices RX virtqueue. Buffers can + * be received from the physical port or from another virtio device. A packet + * count is returned to indicate the number of packets that were succesfully + * added to the RX queue. + */ +static inline uint32_t __attribute__((always_inline)) +virtio_dev_rx(struct virtio_net *dev, struct rte_mbuf **pkts, uint32_t count) +{ + struct vhost_virtqueue *vq; + struct vring_desc *desc; + struct rte_mbuf *buff; + /* The virtio_hdr is initialised to 0. */ + struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0,0,0,0,0,0},0}; + uint64_t buff_addr = 0; + uint64_t buff_hdr_addr = 0; + uint32_t head[MAX_PKT_BURST], packet_len = 0; + uint32_t head_idx, packet_success = 0; + uint16_t avail_idx, res_cur_idx; + uint16_t res_base_idx, res_end_idx; + uint16_t free_entries; + uint8_t success = 0; + void *userdata; + + LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_rx()\n", dev->device_fh); + vq = dev->virtqueue_rx; + count = (count > MAX_PKT_BURST) ? MAX_PKT_BURST : count; + /* As many data cores may want access to available buffers, they need to be reserved. */ + do { + + res_base_idx = vq->last_used_idx_res; + + avail_idx = *((volatile uint16_t *)&vq->avail->idx); + + free_entries = (avail_idx - res_base_idx); + + /*check that we have enough buffers*/ + if (unlikely(count > free_entries)) + count = free_entries; + + if (count == 0) + return 0; + + res_end_idx = res_base_idx + count; + /* vq->last_used_idx_res is atomically updated. */ + success = rte_atomic16_cmpset(&vq->last_used_idx_res, res_base_idx, + res_end_idx); + } while (unlikely(success == 0)); + res_cur_idx = res_base_idx; + LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Current Index %d| End Index %d\n", dev->device_fh, res_cur_idx, res_end_idx); + + /* Prefetch available ring to retrieve indexes. */ + rte_prefetch0(&vq->avail->ring[res_cur_idx & (vq->size - 1)]); + + /* Retrieve all of the head indexes first to avoid caching issues. */ + for (head_idx = 0; head_idx < count; head_idx++) + head[head_idx] = vq->avail->ring[(res_cur_idx + head_idx) & (vq->size - 1)]; + + /*Prefetch descriptor index. */ + rte_prefetch0(&vq->desc[head[packet_success]]); + + while (res_cur_idx != res_end_idx) { + /* Get descriptor from available ring */ + desc = &vq->desc[head[packet_success]]; + /* Prefetch descriptor address. */ + rte_prefetch0(desc); + + buff = pkts[packet_success]; + + /* Convert from gpa to vva (guest physical addr -> vhost virtual addr) */ + buff_addr = gpa_to_vva(dev, desc->addr); + /* Prefetch buffer address. */ + rte_prefetch0((void*)(uintptr_t)buff_addr); + + { + /* Copy virtio_hdr to packet and increment buffer address */ + buff_hdr_addr = buff_addr; + packet_len = rte_pktmbuf_data_len(buff) + vq->vhost_hlen; + + /* + * If the descriptors are chained the header and data are placed in + * separate buffers. + */ + if (desc->flags & VRING_DESC_F_NEXT) { + desc->len = vq->vhost_hlen; + desc = &vq->desc[desc->next]; + /* Buffer address translation. */ + buff_addr = gpa_to_vva(dev, desc->addr); + desc->len = rte_pktmbuf_data_len(buff); + } else { + buff_addr += vq->vhost_hlen; + desc->len = packet_len; + } + } + + /* Update used ring with desc information */ + vq->used->ring[res_cur_idx & (vq->size - 1)].id = head[packet_success]; + vq->used->ring[res_cur_idx & (vq->size - 1)].len = packet_len; + + /* Copy mbuf data to buffer */ + userdata = rte_pktmbuf_mtod(buff, void *); + rte_memcpy((void *)(uintptr_t)buff_addr, userdata, rte_pktmbuf_data_len(buff)); + + res_cur_idx++; + packet_success++; + + /* mergeable is disabled then a header is required per buffer. */ + rte_memcpy((void *)(uintptr_t)buff_hdr_addr, (const void *)&virtio_hdr, vq->vhost_hlen); + if (res_cur_idx < res_end_idx) { + /* Prefetch descriptor index. */ + rte_prefetch0(&vq->desc[head[packet_success]]); + } + } + + rte_compiler_barrier(); + + /* Wait until it's our turn to add our buffer to the used ring. */ + while (unlikely(vq->last_used_idx != res_base_idx)) + rte_pause(); + + *(volatile uint16_t *)&vq->used->idx += count; + + vq->last_used_idx = res_end_idx; + + return count; +} + +/* + * Compares a packet destination MAC address to a device MAC address. + */ +static inline int __attribute__((always_inline)) +ether_addr_cmp(struct ether_addr *ea, struct ether_addr *eb) +{ + return ((*(uint64_t *)ea ^ *(uint64_t *)eb) & MAC_ADDR_CMP) == 0; +} + +/* + * This function registers mac along with a + * vlan tag to a VMDQ. + */ +static int +link_vmdq(struct virtio_net *dev) +{ + int ret; + struct virtio_net_data_ll *dev_ll; + + dev_ll = ll_root_used; + + while (dev_ll != NULL) { + if ((dev != dev_ll->dev) && ether_addr_cmp(&dev->mac_address, &dev_ll->dev->mac_address)) { + RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") WARNING: This device is using an existing MAC address and has not been registered.\n", dev->device_fh); + return -1; + } + dev_ll = dev_ll->next; + } + + /* vlan_tag currently uses the device_id. */ + dev->vlan_tag = vlan_tags[dev->device_fh]; + dev->vmdq_rx_q = dev->device_fh * (num_queues/num_devices); + + /* Print out VMDQ registration info. */ + RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") MAC_ADDRESS %02x:%02x:%02x:%02x:%02x:%02x and VLAN_TAG %d registered\n", + dev->device_fh, + dev->mac_address.addr_bytes[0], dev->mac_address.addr_bytes[1], + dev->mac_address.addr_bytes[2], dev->mac_address.addr_bytes[3], + dev->mac_address.addr_bytes[4], dev->mac_address.addr_bytes[5], + dev->vlan_tag); + + /* Register the MAC address. */ + ret = rte_eth_dev_mac_addr_add(ports[0], &dev->mac_address, (uint32_t)dev->device_fh); + if (ret) { + RTE_LOG(ERR, VHOST_DATA, "(%"PRIu64") Failed to add device MAC address to VMDQ\n", + dev->device_fh); + return -1; + } + + /* Enable stripping of the vlan tag as we handle routing. */ + rte_eth_dev_set_vlan_strip_on_queue(ports[0], dev->vmdq_rx_q, 1); + + rte_compiler_barrier(); + /* Set device as ready for RX. */ + dev->ready = DEVICE_READY; + + return 0; +} + +/* + * Removes MAC address and vlan tag from VMDQ. Ensures that nothing is adding buffers to the RX + * queue before disabling RX on the device. + */ +static inline void +unlink_vmdq(struct virtio_net *dev) +{ + unsigned i = 0; + unsigned rx_count; + struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; + + if (dev->ready == DEVICE_READY) { + /*clear MAC and VLAN settings*/ + rte_eth_dev_mac_addr_remove(ports[0], &dev->mac_address); + for (i = 0; i < 6; i++) + dev->mac_address.addr_bytes[i] = 0; + + dev->vlan_tag = 0; + + /*Clear out the receive buffers*/ + rx_count = rte_eth_rx_burst(ports[0], + (uint16_t)dev->vmdq_rx_q, pkts_burst, MAX_PKT_BURST); + + while (rx_count) { + for (i = 0; i < rx_count; i++) + rte_pktmbuf_free(pkts_burst[i]); + + rx_count = rte_eth_rx_burst(ports[0], + (uint16_t)dev->vmdq_rx_q, pkts_burst, MAX_PKT_BURST); + } + + dev->ready = DEVICE_NOT_READY; + } +} + +/* + * Check if the packet destination MAC address is for a local device. If so then put + * the packet on that devices RX queue. If not then return. + */ +static inline unsigned __attribute__((always_inline)) +virtio_tx_local(struct virtio_net *dev, struct rte_mbuf *m) +{ + struct virtio_net_data_ll *dev_ll; + struct ether_hdr *pkt_hdr; + uint64_t ret = 0; + + pkt_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); + + /*get the used devices list*/ + dev_ll = ll_root_used; + + while (dev_ll != NULL) { + if (likely(dev_ll->dev->ready == DEVICE_READY) && ether_addr_cmp(&(pkt_hdr->d_addr), + &dev_ll->dev->mac_address)) { + + /* Drop the packet if the TX packet is destined for the TX device. */ + if (dev_ll->dev->device_fh == dev->device_fh) { + LOG_DEBUG(VHOST_DATA, "(%"PRIu64") TX: Source and destination MAC addresses are the same. Dropping packet.\n", + dev_ll->dev->device_fh); + return 0; + } + + + LOG_DEBUG(VHOST_DATA, "(%"PRIu64") TX: MAC address is local\n", dev_ll->dev->device_fh); + + if (dev_ll->dev->remove) { + /*drop the packet if the device is marked for removal*/ + LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Device is marked for removal\n", dev_ll->dev->device_fh); + } else { + /*send the packet to the local virtio device*/ + ret = virtio_dev_rx(dev_ll->dev, &m, 1); + if (enable_stats) { + rte_atomic64_add(&dev_statistics[dev_ll->dev->device_fh].rx_total, 1); + rte_atomic64_add(&dev_statistics[dev_ll->dev->device_fh].rx, ret); + dev_statistics[dev->device_fh].tx_total++; + dev_statistics[dev->device_fh].tx += ret; + } + } + + return 0; + } + dev_ll = dev_ll->next; + } + + return -1; +} + +/* + * This function routes the TX packet to the correct interface. This may be a local device + * or the physical port. + */ +static inline void __attribute__((always_inline)) +virtio_tx_route(struct virtio_net* dev, struct rte_mbuf *m, struct rte_mempool *mbuf_pool, uint16_t vlan_tag) +{ + struct mbuf_table *tx_q; + struct vlan_ethhdr *vlan_hdr; + struct rte_mbuf **m_table; + struct rte_mbuf *mbuf; + unsigned len, ret; + const uint16_t lcore_id = rte_lcore_id(); + + /*check if destination is local VM*/ + if (enable_vm2vm && (virtio_tx_local(dev, m) == 0)) { + return; + } + + LOG_DEBUG(VHOST_DATA, "(%"PRIu64") TX: MAC address is external\n", dev->device_fh); + + /*Add packet to the port tx queue*/ + tx_q = &lcore_tx_queue[lcore_id]; + len = tx_q->len; + + /* Allocate an mbuf and populate the structure. */ + mbuf = rte_pktmbuf_alloc(mbuf_pool); + if(!mbuf) + return; + + mbuf->data_len = m->data_len + VLAN_HLEN; + mbuf->pkt_len = mbuf->data_len; + + /* Copy ethernet header to mbuf. */ + rte_memcpy(rte_pktmbuf_mtod(mbuf, void*), + rte_pktmbuf_mtod(m, const void*), ETH_HLEN); + + + /* Setup vlan header. Bytes need to be re-ordered for network with htons()*/ + vlan_hdr = rte_pktmbuf_mtod(mbuf, struct vlan_ethhdr *); + vlan_hdr->h_vlan_encapsulated_proto = vlan_hdr->h_vlan_proto; + vlan_hdr->h_vlan_proto = htons(ETH_P_8021Q); + vlan_hdr->h_vlan_TCI = htons(vlan_tag); + + /* Copy the remaining packet contents to the mbuf. */ + rte_memcpy(rte_pktmbuf_mtod_offset(mbuf, void *, VLAN_ETH_HLEN), + rte_pktmbuf_mtod_offset(m, const void *, ETH_HLEN), + (m->data_len - ETH_HLEN)); + tx_q->m_table[len] = mbuf; + len++; + if (enable_stats) { + dev_statistics[dev->device_fh].tx_total++; + dev_statistics[dev->device_fh].tx++; + } + + if (unlikely(len == MAX_PKT_BURST)) { + m_table = (struct rte_mbuf **)tx_q->m_table; + ret = rte_eth_tx_burst(ports[0], (uint16_t)tx_q->txq_id, m_table, (uint16_t) len); + /* Free any buffers not handled by TX and update the port stats. */ + if (unlikely(ret < len)) { + do { + rte_pktmbuf_free(m_table[ret]); + } while (++ret < len); + } + + len = 0; + } + + tx_q->len = len; + return; +} + +static inline void __attribute__((always_inline)) +virtio_dev_tx(struct virtio_net* dev, struct rte_mempool *mbuf_pool) +{ + struct rte_mbuf m; + struct vhost_virtqueue *vq; + struct vring_desc *desc; + uint64_t buff_addr = 0; + uint32_t head[MAX_PKT_BURST]; + uint32_t used_idx; + uint32_t i; + uint16_t free_entries, packet_success = 0; + uint16_t avail_idx; + + vq = dev->virtqueue_tx; + avail_idx = *((volatile uint16_t *)&vq->avail->idx); + + /* If there are no available buffers then return. */ + if (vq->last_used_idx == avail_idx) + return; + + LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_tx()\n", dev->device_fh); + + /* Prefetch available ring to retrieve head indexes. */ + rte_prefetch0(&vq->avail->ring[vq->last_used_idx & (vq->size - 1)]); + + /*get the number of free entries in the ring*/ + free_entries = avail_idx - vq->last_used_idx; + free_entries = unlikely(free_entries < MAX_PKT_BURST) ? free_entries : MAX_PKT_BURST; + + LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Buffers available %d\n", dev->device_fh, free_entries); + /* Retrieve all of the head indexes first to avoid caching issues. */ + for (i = 0; i < free_entries; i++) + head[i] = vq->avail->ring[(vq->last_used_idx + i) & (vq->size - 1)]; + + /* Prefetch descriptor index. */ + rte_prefetch0(&vq->desc[head[packet_success]]); + + while (packet_success < free_entries) { + desc = &vq->desc[head[packet_success]]; + /* Prefetch descriptor address. */ + rte_prefetch0(desc); + + if (packet_success < (free_entries - 1)) { + /* Prefetch descriptor index. */ + rte_prefetch0(&vq->desc[head[packet_success+1]]); + } + + /* Update used index buffer information. */ + used_idx = vq->last_used_idx & (vq->size - 1); + vq->used->ring[used_idx].id = head[packet_success]; + vq->used->ring[used_idx].len = 0; + + /* Discard first buffer as it is the virtio header */ + desc = &vq->desc[desc->next]; + + /* Buffer address translation. */ + buff_addr = gpa_to_vva(dev, desc->addr); + /* Prefetch buffer address. */ + rte_prefetch0((void*)(uintptr_t)buff_addr); + + /* Setup dummy mbuf. This is copied to a real mbuf if transmitted out the physical port. */ + m.data_len = desc->len; + m.data_off = 0; + m.nb_segs = 1; + + virtio_tx_route(dev, &m, mbuf_pool, 0); + + vq->last_used_idx++; + packet_success++; + } + + rte_compiler_barrier(); + vq->used->idx += packet_success; + /* Kick guest if required. */ +} + +/* + * This function is called by each data core. It handles all RX/TX registered with the + * core. For TX the specific lcore linked list is used. For RX, MAC addresses are compared + * with all devices in the main linked list. + */ +static int +switch_worker(__attribute__((unused)) void *arg) +{ + struct rte_mempool *mbuf_pool = arg; + struct virtio_net *dev = NULL; + struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; + struct virtio_net_data_ll *dev_ll; + struct mbuf_table *tx_q; + volatile struct lcore_ll_info *lcore_ll; + const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US; + uint64_t prev_tsc, diff_tsc, cur_tsc, ret_count = 0; + unsigned ret, i; + const uint16_t lcore_id = rte_lcore_id(); + const uint16_t num_cores = (uint16_t)rte_lcore_count(); + uint16_t rx_count = 0; + + RTE_LOG(INFO, VHOST_DATA, "Procesing on Core %u started \n", lcore_id); + lcore_ll = lcore_info[lcore_id].lcore_ll; + prev_tsc = 0; + + tx_q = &lcore_tx_queue[lcore_id]; + for (i = 0; i < num_cores; i ++) { + if (lcore_ids[i] == lcore_id) { + tx_q->txq_id = i; + break; + } + } + + while(1) { + cur_tsc = rte_rdtsc(); + /* + * TX burst queue drain + */ + diff_tsc = cur_tsc - prev_tsc; + if (unlikely(diff_tsc > drain_tsc)) { + + if (tx_q->len) { + LOG_DEBUG(VHOST_DATA, "TX queue drained after timeout with burst size %u \n", tx_q->len); + + /*Tx any packets in the queue*/ + ret = rte_eth_tx_burst(ports[0], (uint16_t)tx_q->txq_id, + (struct rte_mbuf **)tx_q->m_table, + (uint16_t)tx_q->len); + if (unlikely(ret < tx_q->len)) { + do { + rte_pktmbuf_free(tx_q->m_table[ret]); + } while (++ret < tx_q->len); + } + + tx_q->len = 0; + } + + prev_tsc = cur_tsc; + + } + + /* + * Inform the configuration core that we have exited the linked list and that no devices are + * in use if requested. + */ + if (lcore_ll->dev_removal_flag == REQUEST_DEV_REMOVAL) + lcore_ll->dev_removal_flag = ACK_DEV_REMOVAL; + + /* + * Process devices + */ + dev_ll = lcore_ll->ll_root_used; + + while (dev_ll != NULL) { + /*get virtio device ID*/ + dev = dev_ll->dev; + + if (unlikely(dev->remove)) { + dev_ll = dev_ll->next; + unlink_vmdq(dev); + dev->ready = DEVICE_SAFE_REMOVE; + continue; + } + if (likely(dev->ready == DEVICE_READY)) { + /*Handle guest RX*/ + rx_count = rte_eth_rx_burst(ports[0], + (uint16_t)dev->vmdq_rx_q, pkts_burst, MAX_PKT_BURST); + + if (rx_count) { + ret_count = virtio_dev_rx(dev, pkts_burst, rx_count); + if (enable_stats) { + rte_atomic64_add(&dev_statistics[dev_ll->dev->device_fh].rx_total, rx_count); + rte_atomic64_add(&dev_statistics[dev_ll->dev->device_fh].rx, ret_count); + } + while (likely(rx_count)) { + rx_count--; + rte_pktmbuf_free_seg(pkts_burst[rx_count]); + } + + } + } + + if (likely(!dev->remove)) + /*Handle guest TX*/ + virtio_dev_tx(dev, mbuf_pool); + + /*move to the next device in the list*/ + dev_ll = dev_ll->next; + } + } + + return 0; +} + +/* + * Add an entry to a used linked list. A free entry must first be found in the free linked list + * using get_data_ll_free_entry(); + */ +static void +add_data_ll_entry(struct virtio_net_data_ll **ll_root_addr, struct virtio_net_data_ll *ll_dev) +{ + struct virtio_net_data_ll *ll = *ll_root_addr; + + /* Set next as NULL and use a compiler barrier to avoid reordering. */ + ll_dev->next = NULL; + rte_compiler_barrier(); + + /* If ll == NULL then this is the first device. */ + if (ll) { + /* Increment to the tail of the linked list. */ + while ((ll->next != NULL) ) + ll = ll->next; + + ll->next = ll_dev; + } else { + *ll_root_addr = ll_dev; + } +} + +/* + * Remove an entry from a used linked list. The entry must then be added to the free linked list + * using put_data_ll_free_entry(). + */ +static void +rm_data_ll_entry(struct virtio_net_data_ll **ll_root_addr, struct virtio_net_data_ll *ll_dev, struct virtio_net_data_ll *ll_dev_last) +{ + struct virtio_net_data_ll *ll = *ll_root_addr; + + if (ll_dev == ll) + *ll_root_addr = ll_dev->next; + else + ll_dev_last->next = ll_dev->next; +} + +/* + * Find and return an entry from the free linked list. + */ +static struct virtio_net_data_ll * +get_data_ll_free_entry(struct virtio_net_data_ll **ll_root_addr) +{ + struct virtio_net_data_ll *ll_free = *ll_root_addr; + struct virtio_net_data_ll *ll_dev; + + if (ll_free == NULL) + return NULL; + + ll_dev = ll_free; + *ll_root_addr = ll_free->next; + + return ll_dev; +} + +/* + * Place an entry back on to the free linked list. + */ +static void +put_data_ll_free_entry(struct virtio_net_data_ll **ll_root_addr, struct virtio_net_data_ll *ll_dev) +{ + struct virtio_net_data_ll *ll_free = *ll_root_addr; + + ll_dev->next = ll_free; + *ll_root_addr = ll_dev; +} + +/* + * Creates a linked list of a given size. + */ +static struct virtio_net_data_ll * +alloc_data_ll(uint32_t size) +{ + struct virtio_net_data_ll *ll_new; + uint32_t i; + + /* Malloc and then chain the linked list. */ + ll_new = malloc(size * sizeof(struct virtio_net_data_ll)); + if (ll_new == NULL) { + RTE_LOG(ERR, VHOST_CONFIG, "Failed to allocate memory for ll_new.\n"); + return NULL; + } + + for (i = 0; i < size - 1; i++) { + ll_new[i].dev = NULL; + ll_new[i].next = &ll_new[i+1]; + } + ll_new[i].next = NULL; + + return ll_new; +} + +/* + * Create the main linked list along with each individual cores linked list. A used and a free list + * are created to manage entries. + */ +static int +init_data_ll (void) +{ + int lcore; + + RTE_LCORE_FOREACH_SLAVE(lcore) { + lcore_info[lcore].lcore_ll = malloc(sizeof(struct lcore_ll_info)); + if (lcore_info[lcore].lcore_ll == NULL) { + RTE_LOG(ERR, VHOST_CONFIG, "Failed to allocate memory for lcore_ll.\n"); + return -1; + } + + lcore_info[lcore].lcore_ll->device_num = 0; + lcore_info[lcore].lcore_ll->dev_removal_flag = ACK_DEV_REMOVAL; + lcore_info[lcore].lcore_ll->ll_root_used = NULL; + if (num_devices % num_switching_cores) + lcore_info[lcore].lcore_ll->ll_root_free = alloc_data_ll((num_devices / num_switching_cores) + 1); + else + lcore_info[lcore].lcore_ll->ll_root_free = alloc_data_ll(num_devices / num_switching_cores); + } + + /* Allocate devices up to a maximum of MAX_DEVICES. */ + ll_root_free = alloc_data_ll(MIN((num_devices), MAX_DEVICES)); + + return 0; +} +/* + * Remove a device from the specific data core linked list and from the main linked list. The + * rx/tx thread must be set the flag to indicate that it is safe to remove the device. + * used. + */ +static void +destroy_device (volatile struct virtio_net *dev) +{ + struct virtio_net_data_ll *ll_lcore_dev_cur; + struct virtio_net_data_ll *ll_main_dev_cur; + struct virtio_net_data_ll *ll_lcore_dev_last = NULL; + struct virtio_net_data_ll *ll_main_dev_last = NULL; + int lcore; + + dev->flags &= ~VIRTIO_DEV_RUNNING; + + /*set the remove flag. */ + dev->remove = 1; + + while(dev->ready != DEVICE_SAFE_REMOVE) { + rte_pause(); + } + + /* Search for entry to be removed from lcore ll */ + ll_lcore_dev_cur = lcore_info[dev->coreid].lcore_ll->ll_root_used; + while (ll_lcore_dev_cur != NULL) { + if (ll_lcore_dev_cur->dev == dev) { + break; + } else { + ll_lcore_dev_last = ll_lcore_dev_cur; + ll_lcore_dev_cur = ll_lcore_dev_cur->next; + } + } + + /* Search for entry to be removed from main ll */ + ll_main_dev_cur = ll_root_used; + ll_main_dev_last = NULL; + while (ll_main_dev_cur != NULL) { + if (ll_main_dev_cur->dev == dev) { + break; + } else { + ll_main_dev_last = ll_main_dev_cur; + ll_main_dev_cur = ll_main_dev_cur->next; + } + } + + if (ll_lcore_dev_cur == NULL || ll_main_dev_cur == NULL) { + RTE_LOG(ERR, XENHOST, "%s: could find device in per_cpu list or main_list\n", __func__); + return; + } + + /* Remove entries from the lcore and main ll. */ + rm_data_ll_entry(&lcore_info[ll_lcore_dev_cur->dev->coreid].lcore_ll->ll_root_used, ll_lcore_dev_cur, ll_lcore_dev_last); + rm_data_ll_entry(&ll_root_used, ll_main_dev_cur, ll_main_dev_last); + + /* Set the dev_removal_flag on each lcore. */ + RTE_LCORE_FOREACH_SLAVE(lcore) { + lcore_info[lcore].lcore_ll->dev_removal_flag = REQUEST_DEV_REMOVAL; + } + + /* + * Once each core has set the dev_removal_flag to ACK_DEV_REMOVAL we can be sure that + * they can no longer access the device removed from the linked lists and that the devices + * are no longer in use. + */ + RTE_LCORE_FOREACH_SLAVE(lcore) { + while (lcore_info[lcore].lcore_ll->dev_removal_flag != ACK_DEV_REMOVAL) { + rte_pause(); + } + } + + /* Add the entries back to the lcore and main free ll.*/ + put_data_ll_free_entry(&lcore_info[ll_lcore_dev_cur->dev->coreid].lcore_ll->ll_root_free, ll_lcore_dev_cur); + put_data_ll_free_entry(&ll_root_free, ll_main_dev_cur); + + /* Decrement number of device on the lcore. */ + lcore_info[ll_lcore_dev_cur->dev->coreid].lcore_ll->device_num--; + + RTE_LOG(INFO, VHOST_DATA, " #####(%"PRIu64") Device has been removed from data core\n", dev->device_fh); +} + +/* + * A new device is added to a data core. First the device is added to the main linked list + * and the allocated to a specific data core. + */ +static int +new_device (struct virtio_net *dev) +{ + struct virtio_net_data_ll *ll_dev; + int lcore, core_add = 0; + uint32_t device_num_min = num_devices; + + /* Add device to main ll */ + ll_dev = get_data_ll_free_entry(&ll_root_free); + if (ll_dev == NULL) { + RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") No free entry found in linked list. Device limit " + "of %d devices per core has been reached\n", + dev->device_fh, num_devices); + return -1; + } + ll_dev->dev = dev; + add_data_ll_entry(&ll_root_used, ll_dev); + + /*reset ready flag*/ + dev->ready = DEVICE_NOT_READY; + dev->remove = 0; + + /* Find a suitable lcore to add the device. */ + RTE_LCORE_FOREACH_SLAVE(lcore) { + if (lcore_info[lcore].lcore_ll->device_num < device_num_min) { + device_num_min = lcore_info[lcore].lcore_ll->device_num; + core_add = lcore; + } + } + /* Add device to lcore ll */ + ll_dev->dev->coreid = core_add; + ll_dev = get_data_ll_free_entry(&lcore_info[ll_dev->dev->coreid].lcore_ll->ll_root_free); + if (ll_dev == NULL) { + RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") Failed to add device to data core\n", dev->device_fh); + destroy_device(dev); + return -1; + } + ll_dev->dev = dev; + add_data_ll_entry(&lcore_info[ll_dev->dev->coreid].lcore_ll->ll_root_used, ll_dev); + + /* Initialize device stats */ + memset(&dev_statistics[dev->device_fh], 0, sizeof(struct device_statistics)); + + lcore_info[ll_dev->dev->coreid].lcore_ll->device_num++; + dev->flags |= VIRTIO_DEV_RUNNING; + + RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") Device has been added to data core %d\n", dev->device_fh, dev->coreid); + + link_vmdq(dev); + + return 0; +} + +/* + * These callback allow devices to be added to the data core when configuration + * has been fully complete. + */ +static const struct virtio_net_device_ops virtio_net_device_ops = +{ + .new_device = new_device, + .destroy_device = destroy_device, +}; + +/* + * This is a thread will wake up after a period to print stats if the user has + * enabled them. + */ +static void +print_stats(void) +{ + struct virtio_net_data_ll *dev_ll; + uint64_t tx_dropped, rx_dropped; + uint64_t tx, tx_total, rx, rx_total; + uint32_t device_fh; + const char clr[] = { 27, '[', '2', 'J', '\0' }; + const char top_left[] = { 27, '[', '1', ';', '1', 'H','\0' }; + + while(1) { + sleep(enable_stats); + + /* Clear screen and move to top left */ + printf("%s%s", clr, top_left); + + printf("\nDevice statistics ===================================="); + + dev_ll = ll_root_used; + while (dev_ll != NULL) { + device_fh = (uint32_t)dev_ll->dev->device_fh; + tx_total = dev_statistics[device_fh].tx_total; + tx = dev_statistics[device_fh].tx; + tx_dropped = tx_total - tx; + rx_total = rte_atomic64_read(&dev_statistics[device_fh].rx_total); + rx = rte_atomic64_read(&dev_statistics[device_fh].rx); + rx_dropped = rx_total - rx; + + printf("\nStatistics for device %"PRIu32" ------------------------------" + "\nTX total: %"PRIu64"" + "\nTX dropped: %"PRIu64"" + "\nTX successful: %"PRIu64"" + "\nRX total: %"PRIu64"" + "\nRX dropped: %"PRIu64"" + "\nRX successful: %"PRIu64"", + device_fh, + tx_total, + tx_dropped, + tx, + rx_total, + rx_dropped, + rx); + + dev_ll = dev_ll->next; + } + printf("\n======================================================\n"); + } +} + + +int init_virtio_net(struct virtio_net_device_ops const * const ops); + +/* + * Main function, does initialisation and calls the per-lcore functions. The CUSE + * device is also registered here to handle the IOCTLs. + */ +int +main(int argc, char *argv[]) +{ + struct rte_mempool *mbuf_pool; + unsigned lcore_id, core_id = 0; + unsigned nb_ports, valid_num_ports; + int ret; + uint8_t portid; + static pthread_t tid; + char thread_name[RTE_MAX_THREAD_NAME_LEN]; + + /* init EAL */ + ret = rte_eal_init(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Error with EAL initialization\n"); + argc -= ret; + argv += ret; + + /* parse app arguments */ + ret = us_vhost_parse_args(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Invalid argument\n"); + + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id ++) + if (rte_lcore_is_enabled(lcore_id)) + lcore_ids[core_id ++] = lcore_id; + + if (rte_lcore_count() > RTE_MAX_LCORE) + rte_exit(EXIT_FAILURE,"Not enough cores\n"); + + /*set the number of swithcing cores available*/ + num_switching_cores = rte_lcore_count()-1; + + /* Get the number of physical ports. */ + nb_ports = rte_eth_dev_count(); + if (nb_ports > RTE_MAX_ETHPORTS) + nb_ports = RTE_MAX_ETHPORTS; + + /* + * Update the global var NUM_PORTS and global array PORTS + * and get value of var VALID_NUM_PORTS according to system ports number + */ + valid_num_ports = check_ports_num(nb_ports); + + if ((valid_num_ports == 0) || (valid_num_ports > MAX_SUP_PORTS)) { + RTE_LOG(INFO, VHOST_PORT, "Current enabled port number is %u," + "but only %u port can be enabled\n",num_ports, MAX_SUP_PORTS); + return -1; + } + + /* Create the mbuf pool. */ + mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL", + NUM_MBUFS_PER_PORT * valid_num_ports, MBUF_CACHE_SIZE, 0, + RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id()); + if (mbuf_pool == NULL) + rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n"); + + /* Set log level. */ + rte_set_log_level(LOG_LEVEL); + + /* initialize all ports */ + for (portid = 0; portid < nb_ports; portid++) { + /* skip ports that are not enabled */ + if ((enabled_port_mask & (1 << portid)) == 0) { + RTE_LOG(INFO, VHOST_PORT, "Skipping disabled port %d\n", portid); + continue; + } + if (port_init(portid, mbuf_pool) != 0) + rte_exit(EXIT_FAILURE, "Cannot initialize network ports\n"); + } + + /* Initialise all linked lists. */ + if (init_data_ll() == -1) + rte_exit(EXIT_FAILURE, "Failed to initialize linked list\n"); + + /* Initialize device stats */ + memset(&dev_statistics, 0, sizeof(dev_statistics)); + + /* Enable stats if the user option is set. */ + if (enable_stats) { + ret = pthread_create(&tid, NULL, (void *)print_stats, NULL); + if (ret != 0) + rte_exit(EXIT_FAILURE, + "Cannot create print-stats thread\n"); + + /* Set thread_name for aid in debugging. */ + snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "print-xen-stats"); + ret = rte_thread_setname(tid, thread_name); + if (ret != 0) + RTE_LOG(ERR, VHOST_CONFIG, + "Cannot set print-stats name\n"); + } + + /* Launch all data cores. */ + RTE_LCORE_FOREACH_SLAVE(lcore_id) { + rte_eal_remote_launch(switch_worker, mbuf_pool, lcore_id); + } + + init_virtio_xen(&virtio_net_device_ops); + + virtio_monitor_loop(); + return 0; +} diff --git a/examples/vhost_xen/main.h b/examples/vhost_xen/main.h new file mode 100644 index 00000000..481572e6 --- /dev/null +++ b/examples/vhost_xen/main.h @@ -0,0 +1,77 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _MAIN_H_ +#define _MAIN_H_ + +//#define DEBUG + +#ifdef DEBUG +#define LOG_LEVEL RTE_LOG_DEBUG +#define LOG_DEBUG(log_type, fmt, args...) \ + RTE_LOG(DEBUG, log_type, fmt, ##args) +#else +#define LOG_LEVEL RTE_LOG_INFO +#define LOG_DEBUG(log_type, fmt, args...) do{} while(0) +#endif + +/* Macros for printing using RTE_LOG */ +#define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1 +#define RTE_LOGTYPE_VHOST_DATA RTE_LOGTYPE_USER2 +#define RTE_LOGTYPE_VHOST_PORT RTE_LOGTYPE_USER3 + +/* + * Device linked list structure for data path. + */ +struct virtio_net_data_ll +{ + struct virtio_net *dev; /* Pointer to device created by configuration core. */ + struct virtio_net_data_ll *next; /* Pointer to next device in linked list. */ +}; + +/* + * Structure containing data core specific information. + */ +struct lcore_ll_info +{ + struct virtio_net_data_ll *ll_root_free; /* Pointer to head in free linked list. */ + struct virtio_net_data_ll *ll_root_used; /* Pointer to head of used linked list. */ + uint32_t device_num; /* Number of devices on lcore. */ + volatile uint8_t dev_removal_flag; /* Flag to synchronize device removal. */ +}; + +struct lcore_info +{ + struct lcore_ll_info *lcore_ll; /* Pointer to data core specific lcore_ll_info struct */ +}; +#endif /* _MAIN_H_ */ diff --git a/examples/vhost_xen/vhost_monitor.c b/examples/vhost_xen/vhost_monitor.c new file mode 100644 index 00000000..fb9606bf --- /dev/null +++ b/examples/vhost_xen/vhost_monitor.c @@ -0,0 +1,595 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdlib.h> +#include <stdio.h> +#include <dirent.h> +#include <unistd.h> +#include <sys/eventfd.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <xen/xen-compat.h> +#if __XEN_LATEST_INTERFACE_VERSION__ < 0x00040200 +#include <xs.h> +#else +#include <xenstore.h> +#endif +#include <linux/virtio_ring.h> +#include <linux/virtio_pci.h> +#include <linux/virtio_net.h> + +#include <rte_ethdev.h> +#include <rte_log.h> +#include <rte_malloc.h> +#include <rte_string_fns.h> + +#include "virtio-net.h" +#include "xen_vhost.h" + +struct virtio_watch { + struct xs_handle *xs; + int watch_fd; +}; + + +/* device ops to add/remove device to/from data core. */ +static struct virtio_net_device_ops const *notify_ops; + +/* root address of the linked list in the configuration core. */ +static struct virtio_net_config_ll *ll_root = NULL; + +/* root address of VM. */ +static struct xen_guestlist guest_root; + +static struct virtio_watch watch; + +static void +vq_vring_init(struct vhost_virtqueue *vq, unsigned int num, uint8_t *p, + unsigned long align) +{ + vq->size = num; + vq->desc = (struct vring_desc *) p; + vq->avail = (struct vring_avail *) (p + + num * sizeof(struct vring_desc)); + vq->used = (void *) + RTE_ALIGN_CEIL( (uintptr_t)(&vq->avail->ring[num]), align); + +} + +static int +init_watch(void) +{ + struct xs_handle *xs; + int ret; + int fd; + + /* get a connection to the daemon */ + xs = xs_daemon_open(); + if (xs == NULL) { + RTE_LOG(ERR, XENHOST, "xs_daemon_open failed\n"); + return -1; + } + + ret = xs_watch(xs, "/local/domain", "mytoken"); + if (ret == 0) { + RTE_LOG(ERR, XENHOST, "%s: xs_watch failed\n", __func__); + xs_daemon_close(xs); + return -1; + } + + /* We are notified of read availability on the watch via the file descriptor. */ + fd = xs_fileno(xs); + watch.xs = xs; + watch.watch_fd = fd; + + TAILQ_INIT(&guest_root); + return 0; +} + +static struct xen_guest * +get_xen_guest(int dom_id) +{ + struct xen_guest *guest = NULL; + + TAILQ_FOREACH(guest, &guest_root, next) { + if(guest->dom_id == dom_id) + return guest; + } + + return NULL; +} + + +static struct xen_guest * +add_xen_guest(int32_t dom_id) +{ + struct xen_guest *guest = NULL; + + if ((guest = get_xen_guest(dom_id)) != NULL) + return guest; + + guest = calloc(1, sizeof(struct xen_guest)); + if (guest) { + RTE_LOG(ERR, XENHOST, " %s: return newly created guest with %d rings\n", __func__, guest->vring_num); + TAILQ_INSERT_TAIL(&guest_root, guest, next); + guest->dom_id = dom_id; + } + + return guest; +} + +static void +cleanup_device(struct virtio_net_config_ll *ll_dev) +{ + if (ll_dev == NULL) + return; + if (ll_dev->dev.virtqueue_rx) { + rte_free(ll_dev->dev.virtqueue_rx); + ll_dev->dev.virtqueue_rx = NULL; + } + if (ll_dev->dev.virtqueue_tx) { + rte_free(ll_dev->dev.virtqueue_tx); + ll_dev->dev.virtqueue_tx = NULL; + } + free(ll_dev); +} + +/* + * Add entry containing a device to the device configuration linked list. + */ +static void +add_config_ll_entry(struct virtio_net_config_ll *new_ll_dev) +{ + struct virtio_net_config_ll *ll_dev = ll_root; + + /* If ll_dev == NULL then this is the first device so go to else */ + if (ll_dev) { + /* If the 1st device_id != 0 then we insert our device here. */ + if (ll_dev->dev.device_fh != 0) { + new_ll_dev->dev.device_fh = 0; + new_ll_dev->next = ll_dev; + ll_root = new_ll_dev; + } else { + /* increment through the ll until we find un unused device_id, + * insert the device at that entry + */ + while ((ll_dev->next != NULL) && (ll_dev->dev.device_fh == (ll_dev->next->dev.device_fh - 1))) + ll_dev = ll_dev->next; + + new_ll_dev->dev.device_fh = ll_dev->dev.device_fh + 1; + new_ll_dev->next = ll_dev->next; + ll_dev->next = new_ll_dev; + } + } else { + ll_root = new_ll_dev; + ll_root->dev.device_fh = 0; + } +} + + +/* + * Remove an entry from the device configuration linked list. + */ +static struct virtio_net_config_ll * +rm_config_ll_entry(struct virtio_net_config_ll *ll_dev, struct virtio_net_config_ll *ll_dev_last) +{ + /* First remove the device and then clean it up. */ + if (ll_dev == ll_root) { + ll_root = ll_dev->next; + cleanup_device(ll_dev); + return ll_root; + } else { + ll_dev_last->next = ll_dev->next; + cleanup_device(ll_dev); + return ll_dev_last->next; + } +} + +/* + * Retrieves an entry from the devices configuration linked list. + */ +static struct virtio_net_config_ll * +get_config_ll_entry(unsigned int virtio_idx, unsigned int dom_id) +{ + struct virtio_net_config_ll *ll_dev = ll_root; + + /* Loop through linked list until the dom_id is found. */ + while (ll_dev != NULL) { + if (ll_dev->dev.dom_id == dom_id && ll_dev->dev.virtio_idx == virtio_idx) + return ll_dev; + ll_dev = ll_dev->next; + } + + return NULL; +} + +/* + * Initialise all variables in device structure. + */ +static void +init_dev(struct virtio_net *dev) +{ + RTE_SET_USED(dev); +} + + +static struct +virtio_net_config_ll *new_device(unsigned int virtio_idx, struct xen_guest *guest) +{ + struct virtio_net_config_ll *new_ll_dev; + struct vhost_virtqueue *virtqueue_rx, *virtqueue_tx; + size_t size, vq_ring_size, vq_size = VQ_DESC_NUM; + void *vq_ring_virt_mem; + uint64_t gpa; + uint32_t i; + + /* Setup device and virtqueues. */ + new_ll_dev = calloc(1, sizeof(struct virtio_net_config_ll)); + virtqueue_rx = rte_zmalloc(NULL, sizeof(struct vhost_virtqueue), RTE_CACHE_LINE_SIZE); + virtqueue_tx = rte_zmalloc(NULL, sizeof(struct vhost_virtqueue), RTE_CACHE_LINE_SIZE); + if (new_ll_dev == NULL || virtqueue_rx == NULL || virtqueue_tx == NULL) + goto err; + + new_ll_dev->dev.virtqueue_rx = virtqueue_rx; + new_ll_dev->dev.virtqueue_tx = virtqueue_tx; + new_ll_dev->dev.dom_id = guest->dom_id; + new_ll_dev->dev.virtio_idx = virtio_idx; + /* Initialise device and virtqueues. */ + init_dev(&new_ll_dev->dev); + + size = vring_size(vq_size, VIRTIO_PCI_VRING_ALIGN); + vq_ring_size = RTE_ALIGN_CEIL(size, VIRTIO_PCI_VRING_ALIGN); + (void)vq_ring_size; + + vq_ring_virt_mem = guest->vring[virtio_idx].rxvring_addr; + vq_vring_init(virtqueue_rx, vq_size, vq_ring_virt_mem, VIRTIO_PCI_VRING_ALIGN); + virtqueue_rx->size = vq_size; + virtqueue_rx->vhost_hlen = sizeof(struct virtio_net_hdr); + + vq_ring_virt_mem = guest->vring[virtio_idx].txvring_addr; + vq_vring_init(virtqueue_tx, vq_size, vq_ring_virt_mem, VIRTIO_PCI_VRING_ALIGN); + virtqueue_tx->size = vq_size; + memcpy(&new_ll_dev->dev.mac_address, &guest->vring[virtio_idx].addr, sizeof(struct ether_addr)); + + /* virtio_memory has to be one per domid */ + new_ll_dev->dev.mem = malloc(sizeof(struct virtio_memory) + sizeof(struct virtio_memory_regions) * MAX_XENVIRT_MEMPOOL); + new_ll_dev->dev.mem->nregions = guest->pool_num; + for (i = 0; i < guest->pool_num; i++) { + gpa = new_ll_dev->dev.mem->regions[i].guest_phys_address = + (uint64_t)((uintptr_t)guest->mempool[i].gva); + new_ll_dev->dev.mem->regions[i].guest_phys_address_end = + gpa + guest->mempool[i].mempfn_num * getpagesize(); + new_ll_dev->dev.mem->regions[i].address_offset = + (uint64_t)((uintptr_t)guest->mempool[i].hva - + (uintptr_t)gpa); + } + + new_ll_dev->next = NULL; + + /* Add entry to device configuration linked list. */ + add_config_ll_entry(new_ll_dev); + return new_ll_dev; +err: + free(new_ll_dev); + rte_free(virtqueue_rx); + rte_free(virtqueue_tx); + + return NULL; +} + +static void +destroy_guest(struct xen_guest *guest) +{ + uint32_t i; + + for (i = 0; i < guest->vring_num; i++) + cleanup_vring(&guest->vring[i]); + /* clean mempool */ + for (i = 0; i < guest->pool_num; i++) + cleanup_mempool(&guest->mempool[i]); + free(guest); + + return; +} + +/* + * This function will cleanup the device and remove it from device configuration linked list. + */ +static void +destroy_device(unsigned int virtio_idx, unsigned int dom_id) +{ + struct virtio_net_config_ll *ll_dev_cur_ctx, *ll_dev_last = NULL; + struct virtio_net_config_ll *ll_dev_cur = ll_root; + + /* clean virtio device */ + struct xen_guest *guest = NULL; + guest = get_xen_guest(dom_id); + if (guest == NULL) + return; + + /* Find the linked list entry for the device to be removed. */ + ll_dev_cur_ctx = get_config_ll_entry(virtio_idx, dom_id); + while (ll_dev_cur != NULL) { + /* If the device is found or a device that doesn't exist is found then it is removed. */ + if (ll_dev_cur == ll_dev_cur_ctx) { + if ((ll_dev_cur->dev.flags & VIRTIO_DEV_RUNNING)) + notify_ops->destroy_device(&(ll_dev_cur->dev)); + ll_dev_cur = rm_config_ll_entry(ll_dev_cur, ll_dev_last); + } else { + ll_dev_last = ll_dev_cur; + ll_dev_cur = ll_dev_cur->next; + } + } + RTE_LOG(INFO, XENHOST, " %s guest:%p vring:%p rxvring:%p txvring:%p flag:%p\n", + __func__, guest, &guest->vring[virtio_idx], guest->vring[virtio_idx].rxvring_addr, guest->vring[virtio_idx].txvring_addr, guest->vring[virtio_idx].flag); + cleanup_vring(&guest->vring[virtio_idx]); + guest->vring[virtio_idx].removed = 1; + guest->vring_num -= 1; +} + + + + +static void +watch_unmap_event(void) +{ + int i; + struct xen_guest *guest = NULL; + bool remove_request; + + TAILQ_FOREACH(guest, &guest_root, next) { + for (i = 0; i < MAX_VIRTIO; i++) { + if (guest->vring[i].dom_id && guest->vring[i].removed == 0 && *guest->vring[i].flag == 0) { + RTE_LOG(INFO, XENHOST, "\n\n"); + RTE_LOG(INFO, XENHOST, " #####%s: (%d, %d) to be removed\n", + __func__, + guest->vring[i].dom_id, + i); + destroy_device(i, guest->dom_id); + RTE_LOG(INFO, XENHOST, " %s: DOM %u, vring num: %d\n", + __func__, + guest->dom_id, + guest->vring_num); + } + } + } + +_find_next_remove: + guest = NULL; + remove_request = false; + TAILQ_FOREACH(guest, &guest_root, next) { + if (guest->vring_num == 0) { + remove_request = true; + break; + } + } + if (remove_request == true) { + TAILQ_REMOVE(&guest_root, guest, next); + RTE_LOG(INFO, XENHOST, " #####%s: destroy guest (%d)\n", __func__, guest->dom_id); + destroy_guest(guest); + goto _find_next_remove; + } + return; +} + +/* + * OK, if the guest starts first, it is ok. + * if host starts first, it is ok. + * if guest starts, and has run for sometime, and host stops and restarts, + * then last_used_idx 0? how to solve this. */ + +static void virtio_init(void) +{ + uint32_t len, e_num; + uint32_t i,j; + char **dom; + char *status; + int dom_id; + char path[PATH_MAX]; + char node[PATH_MAX]; + xs_transaction_t th; + struct xen_guest *guest; + struct virtio_net_config_ll *net_config; + char *end; + int val; + + /* init env for watch the node */ + if (init_watch() < 0) + return; + + dom = xs_directory(watch.xs, XBT_NULL, "/local/domain", &e_num); + + for (i = 0; i < e_num; i++) { + errno = 0; + dom_id = strtol(dom[i], &end, 0); + if (errno != 0 || end == NULL || dom_id == 0) + continue; + + for (j = 0; j < RTE_MAX_ETHPORTS; j++) { + snprintf(node, PATH_MAX, "%s%d", VIRTIO_START, j); + snprintf(path, PATH_MAX, XEN_VM_NODE_FMT, + dom_id, node); + + th = xs_transaction_start(watch.xs); + status = xs_read(watch.xs, th, path, &len); + xs_transaction_end(watch.xs, th, false); + + if (status == NULL) + break; + + /* if there's any valid virtio device */ + errno = 0; + val = strtol(status, &end, 0); + if (errno != 0 || end == NULL || dom_id == 0) + val = 0; + if (val == 1) { + guest = add_xen_guest(dom_id); + if (guest == NULL) + continue; + RTE_LOG(INFO, XENHOST, " there's a new virtio existed, new a virtio device\n\n"); + + RTE_LOG(INFO, XENHOST, " parse_vringnode dom_id %d virtioidx %d\n",dom_id,j); + if (parse_vringnode(guest, j)) { + RTE_LOG(ERR, XENHOST, " there is invalid information in xenstore\n"); + TAILQ_REMOVE(&guest_root, guest, next); + destroy_guest(guest); + + continue; + } + + /*if pool_num > 0, then mempool has already been parsed*/ + if (guest->pool_num == 0 && parse_mempoolnode(guest)) { + RTE_LOG(ERR, XENHOST, " there is error information in xenstore\n"); + TAILQ_REMOVE(&guest_root, guest, next); + destroy_guest(guest); + continue; + } + + net_config = new_device(j, guest); + /* every thing is ready now, added into data core */ + notify_ops->new_device(&net_config->dev); + } + } + } + + free(dom); + return; +} + +void +virtio_monitor_loop(void) +{ + char **vec; + xs_transaction_t th; + char *buf; + unsigned int len; + unsigned int dom_id; + uint32_t virtio_idx; + struct xen_guest *guest; + struct virtio_net_config_ll *net_config; + enum fieldnames { + FLD_NULL = 0, + FLD_LOCAL, + FLD_DOMAIN, + FLD_ID, + FLD_CONTROL, + FLD_DPDK, + FLD_NODE, + _NUM_FLD + }; + char *str_fld[_NUM_FLD]; + char *str; + char *end; + + virtio_init(); + while (1) { + watch_unmap_event(); + + usleep(50); + vec = xs_check_watch(watch.xs); + + if (vec == NULL) + continue; + + th = xs_transaction_start(watch.xs); + + buf = xs_read(watch.xs, th, vec[XS_WATCH_PATH],&len); + xs_transaction_end(watch.xs, th, false); + + if (buf) { + /* theres' some node for vhost existed */ + if (rte_strsplit(vec[XS_WATCH_PATH], strnlen(vec[XS_WATCH_PATH], PATH_MAX), + str_fld, _NUM_FLD, '/') == _NUM_FLD) { + if (strstr(str_fld[FLD_NODE], VIRTIO_START)) { + errno = 0; + str = str_fld[FLD_ID]; + dom_id = strtoul(str, &end, 0); + if (errno != 0 || end == NULL || end == str ) { + RTE_LOG(INFO, XENHOST, "invalid domain id\n"); + continue; + } + + errno = 0; + str = str_fld[FLD_NODE] + sizeof(VIRTIO_START) - 1; + virtio_idx = strtoul(str, &end, 0); + if (errno != 0 || end == NULL || end == str + || virtio_idx > MAX_VIRTIO) { + RTE_LOG(INFO, XENHOST, "invalid virtio idx\n"); + continue; + } + RTE_LOG(INFO, XENHOST, " #####virtio dev (%d, %d) is started\n", dom_id, virtio_idx); + + guest = add_xen_guest(dom_id); + if (guest == NULL) + continue; + guest->dom_id = dom_id; + if (parse_vringnode(guest, virtio_idx)) { + RTE_LOG(ERR, XENHOST, " there is invalid information in xenstore\n"); + /*guest newly created? guest existed ?*/ + TAILQ_REMOVE(&guest_root, guest, next); + destroy_guest(guest); + continue; + } + /*if pool_num > 0, then mempool has already been parsed*/ + if (guest->pool_num == 0 && parse_mempoolnode(guest)) { + RTE_LOG(ERR, XENHOST, " there is error information in xenstore\n"); + TAILQ_REMOVE(&guest_root, guest, next); + destroy_guest(guest); + continue; + } + + + net_config = new_device(virtio_idx, guest); + RTE_LOG(INFO, XENHOST, " Add to dataplane core\n"); + notify_ops->new_device(&net_config->dev); + + } + } + } + + free(vec); + } + return; +} + +/* + * Register ops so that we can add/remove device to data core. + */ +int +init_virtio_xen(struct virtio_net_device_ops const *const ops) +{ + notify_ops = ops; + if (xenhost_init()) + return -1; + return 0; +} diff --git a/examples/vhost_xen/virtio-net.h b/examples/vhost_xen/virtio-net.h new file mode 100644 index 00000000..ab697260 --- /dev/null +++ b/examples/vhost_xen/virtio-net.h @@ -0,0 +1,113 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _VIRTIO_NET_H_ +#define _VIRTIO_NET_H_ + +#include <stdint.h> + +#define VQ_DESC_NUM 256 +/* Used to indicate that the device is running on a data core */ +#define VIRTIO_DEV_RUNNING 1 + +/* + * Structure contains variables relevant to TX/RX virtqueues. + */ +struct vhost_virtqueue +{ + struct vring_desc *desc; /* Virtqueue descriptor ring. */ + struct vring_avail *avail; /* Virtqueue available ring. */ + struct vring_used *used; /* Virtqueue used ring. */ + uint32_t size; /* Size of descriptor ring. */ + uint32_t vhost_hlen; /* Vhost header length (varies depending on RX merge buffers. */ + volatile uint16_t last_used_idx; /* Last index used on the available ring */ + volatile uint16_t last_used_idx_res; /* Used for multiple devices reserving buffers. */ +} __rte_cache_aligned; + +/* + * Device structure contains all configuration information relating to the device. + */ +struct virtio_net +{ + struct vhost_virtqueue *virtqueue_tx; /* Contains all TX virtqueue information. */ + struct vhost_virtqueue *virtqueue_rx; /* Contains all RX virtqueue information. */ + struct virtio_memory *mem; /* QEMU memory and memory region information. */ + struct ether_addr mac_address; /* Device MAC address (Obtained on first TX packet). */ + uint32_t flags; /* Device flags. Only used to check if device is running on data core. */ + uint32_t vlan_tag; /* Vlan tag for device. Currently set to device_id (0-63). */ + uint32_t vmdq_rx_q; + uint64_t device_fh; /* device identifier. */ + uint16_t coreid; + volatile uint8_t ready; /* A device is set as ready if the MAC address has been set. */ + volatile uint8_t remove; /* Device is marked for removal from the data core. */ + uint32_t virtio_idx; /* Index of virtio device */ + uint32_t dom_id; /* Domain id of xen guest */ +} ___rte_cache_aligned; + +/* + * Device linked list structure for configuration. + */ +struct virtio_net_config_ll +{ + struct virtio_net dev; /* Virtio device. */ + struct virtio_net_config_ll *next; /* Next entry on linked list. */ +}; + +/* + * Information relating to memory regions including offsets to addresses in QEMUs memory file. + */ +struct virtio_memory_regions { + uint64_t guest_phys_address; /* Base guest physical address of region. */ + uint64_t guest_phys_address_end; /* End guest physical address of region. */ + uint64_t memory_size; /* Size of region. */ + uint64_t userspace_address; /* Base userspace address of region. */ + uint64_t address_offset; /* Offset of region for address translation. */ +}; + +/* + * Memory structure includes region and mapping information. + */ +struct virtio_memory { + uint32_t nregions; /* Number of memory regions. */ + struct virtio_memory_regions regions[0]; /* Memory region information. */ +}; + +/* + * Device operations to add/remove device. + */ +struct virtio_net_device_ops { + int (* new_device)(struct virtio_net *); /* Add device. */ + void (* destroy_device) (volatile struct virtio_net *); /* Remove device. */ +}; + +#endif diff --git a/examples/vhost_xen/xen_vhost.h b/examples/vhost_xen/xen_vhost.h new file mode 100644 index 00000000..2fc304c7 --- /dev/null +++ b/examples/vhost_xen/xen_vhost.h @@ -0,0 +1,148 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _XEN_VHOST_H_ +#define _XEN_VHOST_H_ + +#include <stdint.h> + +#include <rte_ether.h> + +#include "virtio-net.h" + +#define RTE_LOGTYPE_XENHOST RTE_LOGTYPE_USER1 + +#define XEN_VM_ROOTNODE_FMT "/local/domain/%d/control/dpdk" +#define XEN_VM_NODE_FMT "/local/domain/%d/control/dpdk/%s" +#define XEN_MEMPOOL_SUFFIX "mempool_gref" +#define XEN_RXVRING_SUFFIX "rx_vring_gref" +#define XEN_TXVRING_SUFFIX "tx_vring_gref" +#define XEN_GVA_SUFFIX "mempool_va" +#define XEN_VRINGFLAG_SUFFIX "vring_flag" +#define XEN_ADDR_SUFFIX "ether_addr" +#define VIRTIO_START "event_type_start_" + +#define XEN_GREF_SPLITTOKEN ',' + +#define MAX_XENVIRT_MEMPOOL 16 +#define MAX_VIRTIO 32 +#define MAX_GREF_PER_NODE 64 /* 128 MB memory */ + +#define PAGE_SIZE 4096 +#define PAGE_PFNNUM (PAGE_SIZE / sizeof(uint32_t)) + +#define XEN_GNTDEV_FNAME "/dev/xen/gntdev" + +/* xen grant reference info in one grant node */ +struct xen_gnt { + uint32_t gref; /* grant reference for this node */ + union { + int gref; /* grant reference */ + uint32_t pfn_num; /* guest pfn number of grant reference */ + } gref_pfn[PAGE_PFNNUM]; +}__attribute__((__packed__)); + + +/* structure for mempool or vring node list */ +struct xen_gntnode { + uint32_t gnt_num; /* grant reference number */ + struct xen_gnt *gnt_info; /* grant reference info */ +}; + + +struct xen_vring { + uint32_t dom_id; + uint32_t virtio_idx; /* index of virtio device */ + void *rxvring_addr; /* mapped virtual address of rxvring */ + void *txvring_addr; /* mapped virtual address of txvring */ + uint32_t rxpfn_num; /* number of gpfn for rxvring */ + uint32_t txpfn_num; /* number of gpfn for txvring */ + uint32_t *rxpfn_tbl; /* array of rxvring gpfn */ + uint32_t *txpfn_tbl; /* array of txvring gpfn */ + uint64_t *rx_pindex; /* index used to release rx grefs */ + uint64_t *tx_pindex; /* index used to release tx grefs */ + uint64_t flag_index; + uint8_t *flag; /* cleared to zero on guest unmap */ + struct ether_addr addr; /* ethernet address of virtio device */ + uint8_t removed; + +}; + +struct xen_mempool { + uint32_t dom_id; /* guest domain id */ + uint32_t pool_idx; /* index of memory pool */ + void *gva; /* guest virtual address of mbuf pool */ + void *hva; /* host virtual address of mbuf pool */ + uint32_t mempfn_num; /* number of gpfn for mbuf pool */ + uint32_t *mempfn_tbl; /* array of mbuf pool gpfn */ + uint64_t *pindex; /* index used to release grefs */ +}; + +struct xen_guest { + TAILQ_ENTRY(xen_guest) next; + int32_t dom_id; /* guest domain id */ + uint32_t pool_num; /* number of mbuf pool of the guest */ + uint32_t vring_num; /* number of virtio ports of the guest */ + /* array contain the guest mbuf pool info */ + struct xen_mempool mempool[MAX_XENVIRT_MEMPOOL]; + /* array contain the guest rx/tx vring info */ + struct xen_vring vring[MAX_VIRTIO]; +}; + +TAILQ_HEAD(xen_guestlist, xen_guest); + +int +parse_mempoolnode(struct xen_guest *guest); + +int +xenhost_init(void); + +int +parse_vringnode(struct xen_guest *guest, uint32_t virtio_idx); + +int +parse_mempoolnode(struct xen_guest *guest); + +void +cleanup_mempool(struct xen_mempool *mempool); + +void +cleanup_vring(struct xen_vring *vring); + +void +virtio_monitor_loop(void); + +int +init_virtio_xen(struct virtio_net_device_ops const * const); + +#endif diff --git a/examples/vhost_xen/xenstore_parse.c b/examples/vhost_xen/xenstore_parse.c new file mode 100644 index 00000000..26d24320 --- /dev/null +++ b/examples/vhost_xen/xenstore_parse.c @@ -0,0 +1,775 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdint.h> +#include <unistd.h> +#include <inttypes.h> +#include <errno.h> +#include <fcntl.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <xen/sys/gntalloc.h> +#include <xen/sys/gntdev.h> +#include <xen/xen-compat.h> +#if __XEN_LATEST_INTERFACE_VERSION__ < 0x00040200 +#include <xs.h> +#else +#include <xenstore.h> +#endif + +#include <rte_common.h> +#include <rte_memory.h> +#include <rte_eal.h> +#include <rte_malloc.h> +#include <rte_string_fns.h> +#include <rte_log.h> +#include <rte_debug.h> + +#include "xen_vhost.h" + +/* xenstore handle */ +static struct xs_handle *xs = NULL; + +/* gntdev file descriptor to map grant pages */ +static int d_fd = -1; + +/* + * The grant node format in xenstore for vring/mpool is like: + * idx#_rx_vring_gref = "gref1#, gref2#, gref3#" + * idx#_mempool_gref = "gref1#, gref2#, gref3#" + * each gref# is the grant reference for a shared page. + * In each shared page, we store the grant_node_item items. + */ +struct grant_node_item { + uint32_t gref; + uint32_t pfn; +} __attribute__((packed)); + +int cmdline_parse_etheraddr(void *tk, const char *srcbuf, + void *res, unsigned ressize); + +/* Map grant ref refid at addr_ori*/ +static void * +xen_grant_mmap(void *addr_ori, int domid, int refid, uint64_t *pindex) +{ + struct ioctl_gntdev_map_grant_ref arg; + void *addr = NULL; + int pg_sz = getpagesize(); + + arg.count = 1; + arg.refs[0].domid = domid; + arg.refs[0].ref = refid; + + int rv = ioctl(d_fd, IOCTL_GNTDEV_MAP_GRANT_REF, &arg); + if (rv) { + RTE_LOG(ERR, XENHOST, " %s: (%d,%d) %s (ioctl failed)\n", __func__, + domid, refid, strerror(errno)); + return NULL; + } + + if (addr_ori == NULL) + addr = mmap(addr_ori, pg_sz, PROT_READ|PROT_WRITE, MAP_SHARED, + d_fd, arg.index); + else + addr = mmap(addr_ori, pg_sz, PROT_READ|PROT_WRITE, MAP_SHARED | MAP_FIXED, + d_fd, arg.index); + + if (addr == MAP_FAILED) { + RTE_LOG(ERR, XENHOST, " %s: (%d, %d) %s (map failed)\n", __func__, + domid, refid, strerror(errno)); + return NULL; + } + + if (pindex) + *pindex = arg.index; + + return addr; +} + +/* Unmap one grant ref, and munmap must be called before this */ +static int +xen_unmap_grant_ref(uint64_t index) +{ + struct ioctl_gntdev_unmap_grant_ref arg; + int rv; + + arg.count = 1; + arg.index = index; + rv = ioctl(d_fd, IOCTL_GNTDEV_UNMAP_GRANT_REF, &arg); + if (rv) { + RTE_LOG(ERR, XENHOST, " %s: index 0x%" PRIx64 "unmap failed\n", __func__, index); + return -1; + } + return 0; +} + +/* + * Reserve a virtual address space. + * On success, returns the pointer. On failure, returns NULL. + */ +static void * +get_xen_virtual(size_t size, size_t page_sz) +{ + void *addr; + uintptr_t aligned_addr; + + addr = mmap(NULL, size + page_sz, PROT_READ, MAP_SHARED | MAP_ANONYMOUS, -1, 0); + if (addr == MAP_FAILED) { + RTE_LOG(ERR, XENHOST, "failed get a virtual area\n"); + return NULL; + } + + aligned_addr = RTE_ALIGN_CEIL((uintptr_t)addr, page_sz); + munmap(addr, aligned_addr - (uintptr_t)addr); + munmap((void *)(aligned_addr + size), page_sz + (uintptr_t)addr - aligned_addr); + addr = (void *)(aligned_addr); + + return addr; +} + +static void +free_xen_virtual(void *addr, size_t size, size_t page_sz __rte_unused) +{ + if (addr) + munmap(addr, size); +} + +/* + * Returns val str in xenstore. + * @param path + * Full path string for key + * @return + * Pointer to Val str, NULL on failure + */ +static char * +xen_read_node(char *path, uint32_t *len) +{ + char *buf; + + buf = xs_read(xs, XBT_NULL, path, len); + return buf; +} + +static int +cal_pagenum(struct xen_gnt *gnt) +{ + unsigned int i; + /* + * the items in the page are in the format of + * gref#,pfn#,...,gref#,pfn# + * FIXME, 0 is reserved by system, use it as terminator. + */ + for (i = 0; i < (PAGE_PFNNUM) / 2; i++) { + if (gnt->gref_pfn[i * 2].gref <= 0) + break; + } + + return i; +} + +/* Frees memory allocated to a grant node */ +static void +xen_free_gntnode(struct xen_gntnode *gntnode) +{ + if (gntnode == NULL) + return; + free(gntnode->gnt_info); + free(gntnode); +} + +/* + * Parse a grant node. + * @param domid + * Guest domain id. + * @param path + * Full path string for a grant node, like for the following (key, val) pair + * idx#_mempool_gref = "gref#, gref#, gref#" + * path = 'local/domain/domid/control/dpdk/idx#_mempool_gref' + * gref# is a shared page contain packed (gref,pfn) entries + * @return + * Returns the pointer to xen_gntnode + */ +static struct xen_gntnode * +parse_gntnode(int dom_id, char *path) +{ + char **gref_list = NULL; + uint32_t i, len, gref_num; + void *addr = NULL; + char *buf = NULL; + struct xen_gntnode *gntnode = NULL; + struct xen_gnt *gnt = NULL; + int pg_sz = getpagesize(); + char *end; + uint64_t index; + + if ((buf = xen_read_node(path, &len)) == NULL) + goto err; + + gref_list = malloc(MAX_GREF_PER_NODE * sizeof(char *)); + if (gref_list == NULL) + goto err; + + gref_num = rte_strsplit(buf, len, gref_list, MAX_GREF_PER_NODE, + XEN_GREF_SPLITTOKEN); + if (gref_num == 0) { + RTE_LOG(ERR, XENHOST, " %s: invalid grant node format\n", __func__); + goto err; + } + + gntnode = calloc(1, sizeof(struct xen_gntnode)); + gnt = calloc(gref_num, sizeof(struct xen_gnt)); + if (gnt == NULL || gntnode == NULL) + goto err; + + for (i = 0; i < gref_num; i++) { + errno = 0; + gnt[i].gref = strtol(gref_list[i], &end, 0); + if (errno != 0 || end == NULL || end == gref_list[i] || + (*end != '\0' && *end != XEN_GREF_SPLITTOKEN)) { + RTE_LOG(ERR, XENHOST, " %s: parse grant node item failed\n", __func__); + goto err; + } + addr = xen_grant_mmap(NULL, dom_id, gnt[i].gref, &index); + if (addr == NULL) { + RTE_LOG(ERR, XENHOST, " %s: map gref %u failed\n", __func__, gnt[i].gref); + goto err; + } + RTE_LOG(INFO, XENHOST, " %s: map gref %u to %p\n", __func__, gnt[i].gref, addr); + memcpy(gnt[i].gref_pfn, addr, pg_sz); + if (munmap(addr, pg_sz)) { + RTE_LOG(INFO, XENHOST, " %s: unmap gref %u failed\n", __func__, gnt[i].gref); + goto err; + } + if (xen_unmap_grant_ref(index)) { + RTE_LOG(INFO, XENHOST, " %s: release gref %u failed\n", __func__, gnt[i].gref); + goto err; + } + + } + + gntnode->gnt_num = gref_num; + gntnode->gnt_info = gnt; + + free(buf); + free(gref_list); + return gntnode; + +err: + free(gnt); + free(gntnode); + free(gref_list); + free(buf); + return NULL; +} + +/* + * This function maps grant node of vring or mbuf pool to a continous virtual address space, + * and returns mapped address, pfn array, index array + * @param gntnode + * Pointer to grant node + * @param domid + * Guest domain id + * @param ppfn + * Pointer to pfn array, caller should free this array + * @param pgs + * Pointer to number of pages + * @param ppindex + * Pointer to index array, used to release grefs when to free this node + * @return + * Pointer to mapped virtual address, NULL on failure + */ +static void * +map_gntnode(struct xen_gntnode *gntnode, int domid, uint32_t **ppfn, uint32_t *pgs, uint64_t **ppindex) +{ + struct xen_gnt *gnt; + uint32_t i, j; + size_t total_pages = 0; + void *addr; + uint32_t *pfn; + uint64_t *pindex; + uint32_t pfn_num = 0; + int pg_sz; + + if (gntnode == NULL) + return NULL; + + pg_sz = getpagesize(); + for (i = 0; i < gntnode->gnt_num; i++) { + gnt = gntnode->gnt_info + i; + total_pages += cal_pagenum(gnt); + } + if ((addr = get_xen_virtual(total_pages * pg_sz, pg_sz)) == NULL) { + RTE_LOG(ERR, XENHOST, " %s: failed get_xen_virtual\n", __func__); + return NULL; + } + pfn = calloc(total_pages, (size_t)sizeof(uint32_t)); + pindex = calloc(total_pages, (size_t)sizeof(uint64_t)); + if (pfn == NULL || pindex == NULL) { + free_xen_virtual(addr, total_pages * pg_sz, pg_sz); + free(pfn); + free(pindex); + return NULL; + } + + RTE_LOG(INFO, XENHOST, " %s: total pages:%zu, map to [%p, %p]\n", __func__, total_pages, addr, RTE_PTR_ADD(addr, total_pages * pg_sz - 1)); + for (i = 0; i < gntnode->gnt_num; i++) { + gnt = gntnode->gnt_info + i; + for (j = 0; j < (PAGE_PFNNUM) / 2; j++) { + if ((gnt->gref_pfn[j * 2].gref) <= 0) + goto _end; + /*alternative: batch map, or through libxc*/ + if (xen_grant_mmap(RTE_PTR_ADD(addr, pfn_num * pg_sz), + domid, + gnt->gref_pfn[j * 2].gref, + &pindex[pfn_num]) == NULL) { + goto mmap_failed; + } + pfn[pfn_num] = gnt->gref_pfn[j * 2 + 1].pfn_num; + pfn_num++; + } + } + +mmap_failed: + if (pfn_num) + munmap(addr, pfn_num * pg_sz); + for (i = 0; i < pfn_num; i++) { + xen_unmap_grant_ref(pindex[i]); + } + free(pindex); + free(pfn); + return NULL; + +_end: + if (ppindex) + *ppindex = pindex; + else + free(pindex); + if (ppfn) + *ppfn = pfn; + else + free(pfn); + if (pgs) + *pgs = total_pages; + + return addr; +} + +static int +parse_mpool_va(struct xen_mempool *mempool) +{ + char path[PATH_MAX] = {0}; + char *buf; + uint32_t len; + char *end; + int ret = -1; + + errno = 0; + snprintf(path, sizeof(path), + XEN_VM_ROOTNODE_FMT"/%d_"XEN_GVA_SUFFIX, + mempool->dom_id, mempool->pool_idx); + + if((buf = xen_read_node(path, &len)) == NULL) + goto out; + mempool->gva = (void *)strtoul(buf, &end, 16); + if (errno != 0 || end == NULL || end == buf || *end != '\0') { + mempool->gva = NULL; + goto out; + } + ret = 0; +out: + free(buf); + return ret; +} + +/* + * map mbuf pool + */ +static int +map_mempoolnode(struct xen_gntnode *gntnode, + struct xen_mempool *mempool) +{ + if (gntnode == NULL || mempool == NULL) + return -1; + + mempool->hva = + map_gntnode(gntnode, mempool->dom_id, &mempool->mempfn_tbl, &mempool->mempfn_num, &mempool->pindex); + + RTE_LOG(INFO, XENHOST, " %s: map mempool at %p\n", __func__, (void *)mempool->hva); + if (mempool->hva) + return 0; + else { + return -1; + } +} + +void +cleanup_mempool(struct xen_mempool *mempool) +{ + int pg_sz = getpagesize(); + uint32_t i; + + if (mempool->hva) + munmap(mempool->hva, mempool->mempfn_num * pg_sz); + mempool->hva = NULL; + + if (mempool->pindex) { + RTE_LOG(INFO, XENHOST, " %s: unmap dom %02u mempool%02u %u grefs\n", + __func__, + mempool->dom_id, + mempool->pool_idx, + mempool->mempfn_num); + for (i = 0; i < mempool->mempfn_num; i ++) { + xen_unmap_grant_ref(mempool->pindex[i]); + } + } + mempool->pindex = NULL; + + free(mempool->mempfn_tbl); + mempool->mempfn_tbl = NULL; +} + +/* + * process mempool node idx#_mempool_gref, idx = 0, 1, 2... + * untill we encounter a node that doesn't exist. + */ +int +parse_mempoolnode(struct xen_guest *guest) +{ + uint32_t i, len; + char path[PATH_MAX] = {0}; + struct xen_gntnode *gntnode = NULL; + struct xen_mempool *mempool = NULL; + char *buf; + + bzero(&guest->mempool, MAX_XENVIRT_MEMPOOL * sizeof(guest->mempool[0])); + guest->pool_num = 0; + + while (1) { + /* check if null terminated */ + snprintf(path, sizeof(path), + XEN_VM_ROOTNODE_FMT"/%d_"XEN_MEMPOOL_SUFFIX, + guest->dom_id, + guest->pool_num); + + if ((buf = xen_read_node(path, &len)) != NULL) { + /* this node exists */ + free(buf); + } else { + if (guest->pool_num == 0) { + RTE_LOG(ERR, PMD, "no mempool found\n"); + return -1; + } + break; + } + + mempool = &guest->mempool[guest->pool_num]; + mempool->dom_id = guest->dom_id; + mempool->pool_idx = guest->pool_num; + + RTE_LOG(INFO, XENHOST, " %s: mempool %u parse gntnode %s\n", __func__, guest->pool_num, path); + gntnode = parse_gntnode(guest->dom_id, path); + if (gntnode == NULL) + goto err; + + if (parse_mpool_va(mempool)) + goto err; + + RTE_LOG(INFO, XENHOST, " %s: mempool %u map gntnode %s\n", __func__, guest->pool_num, path); + if (map_mempoolnode(gntnode, mempool)) + goto err; + + xen_free_gntnode(gntnode); + guest->pool_num++; + } + + return 0; +err: + if (gntnode) + xen_free_gntnode(gntnode); + for (i = 0; i < MAX_XENVIRT_MEMPOOL ; i++) { + cleanup_mempool(&guest->mempool[i]); + } + /* reinitialise mempool */ + bzero(&guest->mempool, MAX_XENVIRT_MEMPOOL * sizeof(guest->mempool[0])); + return -1; +} + +static int +xen_map_vringflag(struct xen_vring *vring) +{ + char path[PATH_MAX] = {0}; + char *buf; + uint32_t len,gref; + int pg_sz = getpagesize(); + char *end; + + snprintf(path, sizeof(path), + XEN_VM_ROOTNODE_FMT"/%d_"XEN_VRINGFLAG_SUFFIX, + vring->dom_id, vring->virtio_idx); + + if((buf = xen_read_node(path, &len)) == NULL) + goto err; + + errno = 0; + gref = strtol(buf, &end, 0); + if (errno != 0 || end == NULL || end == buf) { + goto err; + } + vring->flag = xen_grant_mmap(0, vring->dom_id, gref, &vring->flag_index); + if (vring->flag == NULL || *vring->flag == 0) + goto err; + + free(buf); + return 0; +err: + free(buf); + if (vring->flag) { + munmap(vring->flag, pg_sz); + vring->flag = NULL; + xen_unmap_grant_ref(vring->flag_index); + } + return -1; +} + + +static int +xen_map_rxvringnode(struct xen_gntnode *gntnode, + struct xen_vring *vring) +{ + vring->rxvring_addr = + map_gntnode(gntnode, vring->dom_id, &vring->rxpfn_tbl, &vring->rxpfn_num, &vring->rx_pindex); + RTE_LOG(INFO, XENHOST, " %s: map rx vring at %p\n", __func__, (void *)vring->rxvring_addr); + if (vring->rxvring_addr) + return 0; + else + return -1; +} + +static int +xen_map_txvringnode(struct xen_gntnode *gntnode, + struct xen_vring *vring) +{ + vring->txvring_addr = + map_gntnode(gntnode, vring->dom_id, &vring->txpfn_tbl, &vring->txpfn_num, &vring->tx_pindex); + RTE_LOG(INFO, XENHOST, " %s: map tx vring at %p\n", __func__, (void *)vring->txvring_addr); + if (vring->txvring_addr) + return 0; + else + return -1; +} + +void +cleanup_vring(struct xen_vring *vring) +{ + int pg_sz = getpagesize(); + uint32_t i; + + RTE_LOG(INFO, XENHOST, " %s: cleanup dom %u vring %u\n", __func__, vring->dom_id, vring->virtio_idx); + if (vring->rxvring_addr) { + munmap(vring->rxvring_addr, vring->rxpfn_num * pg_sz); + RTE_LOG(INFO, XENHOST, " %s: unmap rx vring [%p, %p]\n", + __func__, + vring->rxvring_addr, + RTE_PTR_ADD(vring->rxvring_addr, + vring->rxpfn_num * pg_sz - 1)); + } + vring->rxvring_addr = NULL; + + + if (vring->rx_pindex) { + RTE_LOG(INFO, XENHOST, " %s: unmap rx vring %u grefs\n", __func__, vring->rxpfn_num); + for (i = 0; i < vring->rxpfn_num; i++) { + xen_unmap_grant_ref(vring->rx_pindex[i]); + } + } + vring->rx_pindex = NULL; + + free(vring->rxpfn_tbl); + vring->rxpfn_tbl = NULL; + + if (vring->txvring_addr) { + munmap(vring->txvring_addr, vring->txpfn_num * pg_sz); + RTE_LOG(INFO, XENHOST, " %s: unmap tx vring [%p, %p]\n", + __func__, + vring->txvring_addr, + RTE_PTR_ADD(vring->txvring_addr, + vring->txpfn_num * pg_sz - 1)); + } + vring->txvring_addr = NULL; + + if (vring->tx_pindex) { + RTE_LOG(INFO, XENHOST, " %s: unmap tx vring %u grefs\n", __func__, vring->txpfn_num); + for (i = 0; i < vring->txpfn_num; i++) { + xen_unmap_grant_ref(vring->tx_pindex[i]); + } + } + vring->tx_pindex = NULL; + + free(vring->txpfn_tbl); + vring->txpfn_tbl = NULL; + + if (vring->flag) { + if (!munmap((void *)vring->flag, pg_sz)) + RTE_LOG(INFO, XENHOST, " %s: unmap flag page at %p\n", __func__, vring->flag); + if (!xen_unmap_grant_ref(vring->flag_index)) + RTE_LOG(INFO, XENHOST, " %s: release flag ref index 0x%" PRIx64 "\n", __func__, vring->flag_index); + } + vring->flag = NULL; + return; +} + + + +static int +xen_parse_etheraddr(struct xen_vring *vring) +{ + char path[PATH_MAX] = {0}; + char *buf; + uint32_t len; + int ret = -1; + + snprintf(path, sizeof(path), + XEN_VM_ROOTNODE_FMT"/%d_"XEN_ADDR_SUFFIX, + vring->dom_id, vring->virtio_idx); + + if ((buf = xen_read_node(path, &len)) == NULL) + goto out; + + if (cmdline_parse_etheraddr(NULL, buf, &vring->addr, + sizeof(vring->addr)) < 0) + goto out; + ret = 0; +out: + free(buf); + return ret; +} + + +int +parse_vringnode(struct xen_guest *guest, uint32_t virtio_idx) +{ + char path[PATH_MAX] = {0}; + struct xen_gntnode *rx_gntnode = NULL; + struct xen_gntnode *tx_gntnode = NULL; + struct xen_vring *vring = NULL; + + /*check if null terminated */ + snprintf(path, sizeof(path), + XEN_VM_ROOTNODE_FMT"/%d_"XEN_RXVRING_SUFFIX, + guest->dom_id, + virtio_idx); + + RTE_LOG(INFO, XENHOST, " %s: virtio %u parse rx gntnode %s\n", __func__, virtio_idx, path); + rx_gntnode = parse_gntnode(guest->dom_id, path); + if (rx_gntnode == NULL) + goto err; + + /*check if null terminated */ + snprintf(path, sizeof(path), + XEN_VM_ROOTNODE_FMT"/%d_"XEN_TXVRING_SUFFIX, + guest->dom_id, + virtio_idx); + + RTE_LOG(INFO, XENHOST, " %s: virtio %u parse tx gntnode %s\n", __func__, virtio_idx, path); + tx_gntnode = parse_gntnode(guest->dom_id, path); + if (tx_gntnode == NULL) + goto err; + + vring = &guest->vring[virtio_idx]; + bzero(vring, sizeof(*vring)); + vring->dom_id = guest->dom_id; + vring->virtio_idx = virtio_idx; + + if (xen_parse_etheraddr(vring) != 0) + goto err; + + RTE_LOG(INFO, XENHOST, " %s: virtio %u map rx gntnode %s\n", __func__, virtio_idx, path); + if (xen_map_rxvringnode(rx_gntnode, vring) != 0) + goto err; + + RTE_LOG(INFO, XENHOST, " %s: virtio %u map tx gntnode %s\n", __func__, virtio_idx, path); + if (xen_map_txvringnode(tx_gntnode, vring) != 0) + goto err; + + if (xen_map_vringflag(vring) != 0) + goto err; + + guest->vring_num++; + + xen_free_gntnode(rx_gntnode); + xen_free_gntnode(tx_gntnode); + + return 0; + +err: + if (rx_gntnode) + xen_free_gntnode(rx_gntnode); + if (tx_gntnode) + xen_free_gntnode(tx_gntnode); + if (vring) { + cleanup_vring(vring); + bzero(vring, sizeof(*vring)); + } + return -1; +} + +/* + * Open xen grant dev driver + * @return + * 0 on success, -1 on failure. + */ +static int +xen_grant_init(void) +{ + d_fd = open(XEN_GNTDEV_FNAME, O_RDWR); + + return d_fd == -1? (-1): (0); +} + +/* + * Initialise xenstore handle and open grant dev driver. + * @return + * 0 on success, -1 on failure. + */ +int +xenhost_init(void) +{ + xs = xs_daemon_open(); + if (xs == NULL) { + rte_panic("failed initialize xen daemon handler"); + return -1; + } + if (xen_grant_init()) + return -1; + return 0; +} diff --git a/examples/vm_power_manager/Makefile b/examples/vm_power_manager/Makefile new file mode 100644 index 00000000..59a96417 --- /dev/null +++ b/examples/vm_power_manager/Makefile @@ -0,0 +1,65 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifneq ($(shell pkg-config --atleast-version=0.9.3 libvirt; echo $$?), 0) +$(error vm_power_manager requires libvirt >= 0.9.3) +else + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overridden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +# binary name +APP = vm_power_mgr + +# all source are stored in SRCS-y +SRCS-y := main.c vm_power_cli.c power_manager.c channel_manager.c +SRCS-y += channel_monitor.c + +CFLAGS += -O3 -I$(RTE_SDK)/lib/librte_power/ +CFLAGS += $(WERROR_FLAGS) + +LDLIBS += -lvirt + +# workaround for a gcc bug with noreturn attribute +# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603 +ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y) +CFLAGS_main.o += -Wno-return-type +endif + +include $(RTE_SDK)/mk/rte.extapp.mk + +endif # libvirt check diff --git a/examples/vm_power_manager/channel_manager.c b/examples/vm_power_manager/channel_manager.c new file mode 100644 index 00000000..22c2ddd5 --- /dev/null +++ b/examples/vm_power_manager/channel_manager.c @@ -0,0 +1,805 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <sys/un.h> +#include <fcntl.h> +#include <unistd.h> +#include <inttypes.h> +#include <dirent.h> +#include <errno.h> + +#include <sys/queue.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/select.h> + +#include <rte_malloc.h> +#include <rte_memory.h> +#include <rte_mempool.h> +#include <rte_log.h> +#include <rte_atomic.h> +#include <rte_spinlock.h> + +#include <libvirt/libvirt.h> + +#include "channel_manager.h" +#include "channel_commands.h" +#include "channel_monitor.h" + + +#define RTE_LOGTYPE_CHANNEL_MANAGER RTE_LOGTYPE_USER1 + +#define ITERATIVE_BITMASK_CHECK_64(mask_u64b, i) \ + for (i = 0; mask_u64b; mask_u64b &= ~(1ULL << i++)) \ + if ((mask_u64b >> i) & 1) \ + +/* Global pointer to libvirt connection */ +static virConnectPtr global_vir_conn_ptr; + +static unsigned char *global_cpumaps; +static virVcpuInfo *global_vircpuinfo; +static size_t global_maplen; + +static unsigned global_n_host_cpus; + +/* + * Represents a single Virtual Machine + */ +struct virtual_machine_info { + char name[CHANNEL_MGR_MAX_NAME_LEN]; + rte_atomic64_t pcpu_mask[CHANNEL_CMDS_MAX_CPUS]; + struct channel_info *channels[CHANNEL_CMDS_MAX_VM_CHANNELS]; + uint64_t channel_mask; + uint8_t num_channels; + enum vm_status status; + virDomainPtr domainPtr; + virDomainInfo info; + rte_spinlock_t config_spinlock; + LIST_ENTRY(virtual_machine_info) vms_info; +}; + +LIST_HEAD(, virtual_machine_info) vm_list_head; + +static struct virtual_machine_info * +find_domain_by_name(const char *name) +{ + struct virtual_machine_info *info; + LIST_FOREACH(info, &vm_list_head, vms_info) { + if (!strncmp(info->name, name, CHANNEL_MGR_MAX_NAME_LEN-1)) + return info; + } + return NULL; +} + +static int +update_pcpus_mask(struct virtual_machine_info *vm_info) +{ + virVcpuInfoPtr cpuinfo; + unsigned i, j; + int n_vcpus; + uint64_t mask; + + memset(global_cpumaps, 0, CHANNEL_CMDS_MAX_CPUS*global_maplen); + + if (!virDomainIsActive(vm_info->domainPtr)) { + n_vcpus = virDomainGetVcpuPinInfo(vm_info->domainPtr, + vm_info->info.nrVirtCpu, global_cpumaps, global_maplen, + VIR_DOMAIN_AFFECT_CONFIG); + if (n_vcpus < 0) { + RTE_LOG(ERR, CHANNEL_MANAGER, "Error getting vCPU info for " + "in-active VM '%s'\n", vm_info->name); + return -1; + } + goto update_pcpus; + } + + memset(global_vircpuinfo, 0, sizeof(*global_vircpuinfo)* + CHANNEL_CMDS_MAX_CPUS); + + cpuinfo = global_vircpuinfo; + + n_vcpus = virDomainGetVcpus(vm_info->domainPtr, cpuinfo, + CHANNEL_CMDS_MAX_CPUS, global_cpumaps, global_maplen); + if (n_vcpus < 0) { + RTE_LOG(ERR, CHANNEL_MANAGER, "Error getting vCPU info for " + "active VM '%s'\n", vm_info->name); + return -1; + } +update_pcpus: + if (n_vcpus >= CHANNEL_CMDS_MAX_CPUS) { + RTE_LOG(ERR, CHANNEL_MANAGER, "Number of vCPUS(%u) is out of range " + "0...%d\n", n_vcpus, CHANNEL_CMDS_MAX_CPUS-1); + return -1; + } + if (n_vcpus != vm_info->info.nrVirtCpu) { + RTE_LOG(INFO, CHANNEL_MANAGER, "Updating the number of vCPUs for VM '%s" + " from %d -> %d\n", vm_info->name, vm_info->info.nrVirtCpu, + n_vcpus); + vm_info->info.nrVirtCpu = n_vcpus; + } + for (i = 0; i < vm_info->info.nrVirtCpu; i++) { + mask = 0; + for (j = 0; j < global_n_host_cpus; j++) { + if (VIR_CPU_USABLE(global_cpumaps, global_maplen, i, j) > 0) { + mask |= 1ULL << j; + } + } + rte_atomic64_set(&vm_info->pcpu_mask[i], mask); + } + return 0; +} + +int +set_pcpus_mask(char *vm_name, unsigned vcpu, uint64_t core_mask) +{ + unsigned i = 0; + int flags = VIR_DOMAIN_AFFECT_LIVE|VIR_DOMAIN_AFFECT_CONFIG; + struct virtual_machine_info *vm_info; + uint64_t mask = core_mask; + + if (vcpu >= CHANNEL_CMDS_MAX_CPUS) { + RTE_LOG(ERR, CHANNEL_MANAGER, "vCPU(%u) exceeds max allowable(%d)\n", + vcpu, CHANNEL_CMDS_MAX_CPUS-1); + return -1; + } + + vm_info = find_domain_by_name(vm_name); + if (vm_info == NULL) { + RTE_LOG(ERR, CHANNEL_MANAGER, "VM '%s' not found\n", vm_name); + return -1; + } + + if (!virDomainIsActive(vm_info->domainPtr)) { + RTE_LOG(ERR, CHANNEL_MANAGER, "Unable to set vCPU(%u) to pCPU " + "mask(0x%"PRIx64") for VM '%s', VM is not active\n", + vcpu, core_mask, vm_info->name); + return -1; + } + + if (vcpu >= vm_info->info.nrVirtCpu) { + RTE_LOG(ERR, CHANNEL_MANAGER, "vCPU(%u) exceeds the assigned number of " + "vCPUs(%u)\n", vcpu, vm_info->info.nrVirtCpu); + return -1; + } + memset(global_cpumaps, 0 , CHANNEL_CMDS_MAX_CPUS * global_maplen); + ITERATIVE_BITMASK_CHECK_64(mask, i) { + VIR_USE_CPU(global_cpumaps, i); + if (i >= global_n_host_cpus) { + RTE_LOG(ERR, CHANNEL_MANAGER, "CPU(%u) exceeds the available " + "number of CPUs(%u)\n", i, global_n_host_cpus); + return -1; + } + } + if (virDomainPinVcpuFlags(vm_info->domainPtr, vcpu, global_cpumaps, + global_maplen, flags) < 0) { + RTE_LOG(ERR, CHANNEL_MANAGER, "Unable to set vCPU(%u) to pCPU " + "mask(0x%"PRIx64") for VM '%s'\n", vcpu, core_mask, + vm_info->name); + return -1; + } + rte_atomic64_set(&vm_info->pcpu_mask[vcpu], core_mask); + return 0; + +} + +int +set_pcpu(char *vm_name, unsigned vcpu, unsigned core_num) +{ + uint64_t mask = 1ULL << core_num; + + return set_pcpus_mask(vm_name, vcpu, mask); +} + +uint64_t +get_pcpus_mask(struct channel_info *chan_info, unsigned vcpu) +{ + struct virtual_machine_info *vm_info = + (struct virtual_machine_info *)chan_info->priv_info; + return rte_atomic64_read(&vm_info->pcpu_mask[vcpu]); +} + +static inline int +channel_exists(struct virtual_machine_info *vm_info, unsigned channel_num) +{ + rte_spinlock_lock(&(vm_info->config_spinlock)); + if (vm_info->channel_mask & (1ULL << channel_num)) { + rte_spinlock_unlock(&(vm_info->config_spinlock)); + return 1; + } + rte_spinlock_unlock(&(vm_info->config_spinlock)); + return 0; +} + + + +static int +open_non_blocking_channel(struct channel_info *info) +{ + int ret, flags; + struct sockaddr_un sock_addr; + fd_set soc_fd_set; + struct timeval tv; + + info->fd = socket(AF_UNIX, SOCK_STREAM, 0); + if (info->fd == -1) { + RTE_LOG(ERR, CHANNEL_MANAGER, "Error(%s) creating socket for '%s'\n", + strerror(errno), + info->channel_path); + return -1; + } + sock_addr.sun_family = AF_UNIX; + memcpy(&sock_addr.sun_path, info->channel_path, + strlen(info->channel_path)+1); + + /* Get current flags */ + flags = fcntl(info->fd, F_GETFL, 0); + if (flags < 0) { + RTE_LOG(WARNING, CHANNEL_MANAGER, "Error(%s) fcntl get flags socket for" + "'%s'\n", strerror(errno), info->channel_path); + return 1; + } + /* Set to Non Blocking */ + flags |= O_NONBLOCK; + if (fcntl(info->fd, F_SETFL, flags) < 0) { + RTE_LOG(WARNING, CHANNEL_MANAGER, "Error(%s) setting non-blocking " + "socket for '%s'\n", strerror(errno), info->channel_path); + return -1; + } + ret = connect(info->fd, (struct sockaddr *)&sock_addr, + sizeof(sock_addr)); + if (ret < 0) { + /* ECONNREFUSED error is given when VM is not active */ + if (errno == ECONNREFUSED) { + RTE_LOG(WARNING, CHANNEL_MANAGER, "VM is not active or has not " + "activated its endpoint to channel %s\n", + info->channel_path); + return -1; + } + /* Wait for tv_sec if in progress */ + else if (errno == EINPROGRESS) { + tv.tv_sec = 2; + tv.tv_usec = 0; + FD_ZERO(&soc_fd_set); + FD_SET(info->fd, &soc_fd_set); + if (select(info->fd+1, NULL, &soc_fd_set, NULL, &tv) > 0) { + RTE_LOG(WARNING, CHANNEL_MANAGER, "Timeout or error on channel " + "'%s'\n", info->channel_path); + return -1; + } + } else { + /* Any other error */ + RTE_LOG(WARNING, CHANNEL_MANAGER, "Error(%s) connecting socket" + " for '%s'\n", strerror(errno), info->channel_path); + return -1; + } + } + return 0; +} + +static int +setup_channel_info(struct virtual_machine_info **vm_info_dptr, + struct channel_info **chan_info_dptr, unsigned channel_num) +{ + struct channel_info *chan_info = *chan_info_dptr; + struct virtual_machine_info *vm_info = *vm_info_dptr; + + chan_info->channel_num = channel_num; + chan_info->priv_info = (void *)vm_info; + chan_info->status = CHANNEL_MGR_CHANNEL_DISCONNECTED; + if (open_non_blocking_channel(chan_info) < 0) { + RTE_LOG(ERR, CHANNEL_MANAGER, "Could not open channel: " + "'%s' for VM '%s'\n", + chan_info->channel_path, vm_info->name); + return -1; + } + if (add_channel_to_monitor(&chan_info) < 0) { + RTE_LOG(ERR, CHANNEL_MANAGER, "Could add channel: " + "'%s' to epoll ctl for VM '%s'\n", + chan_info->channel_path, vm_info->name); + return -1; + + } + rte_spinlock_lock(&(vm_info->config_spinlock)); + vm_info->num_channels++; + vm_info->channel_mask |= 1ULL << channel_num; + vm_info->channels[channel_num] = chan_info; + chan_info->status = CHANNEL_MGR_CHANNEL_CONNECTED; + rte_spinlock_unlock(&(vm_info->config_spinlock)); + return 0; +} + +int +add_all_channels(const char *vm_name) +{ + DIR *d; + struct dirent *dir; + struct virtual_machine_info *vm_info; + struct channel_info *chan_info; + char *token, *remaining, *tail_ptr; + char socket_name[PATH_MAX]; + unsigned channel_num; + int num_channels_enabled = 0; + + /* verify VM exists */ + vm_info = find_domain_by_name(vm_name); + if (vm_info == NULL) { + RTE_LOG(ERR, CHANNEL_MANAGER, "VM: '%s' not found" + " during channel discovery\n", vm_name); + return 0; + } + if (!virDomainIsActive(vm_info->domainPtr)) { + RTE_LOG(ERR, CHANNEL_MANAGER, "VM: '%s' is not active\n", vm_name); + vm_info->status = CHANNEL_MGR_VM_INACTIVE; + return 0; + } + d = opendir(CHANNEL_MGR_SOCKET_PATH); + if (d == NULL) { + RTE_LOG(ERR, CHANNEL_MANAGER, "Error opening directory '%s': %s\n", + CHANNEL_MGR_SOCKET_PATH, strerror(errno)); + return -1; + } + while ((dir = readdir(d)) != NULL) { + if (!strncmp(dir->d_name, ".", 1) || + !strncmp(dir->d_name, "..", 2)) + continue; + + snprintf(socket_name, sizeof(socket_name), "%s", dir->d_name); + remaining = socket_name; + /* Extract vm_name from "<vm_name>.<channel_num>" */ + token = strsep(&remaining, "."); + if (remaining == NULL) + continue; + if (strncmp(vm_name, token, CHANNEL_MGR_MAX_NAME_LEN)) + continue; + + /* remaining should contain only <channel_num> */ + errno = 0; + channel_num = (unsigned)strtol(remaining, &tail_ptr, 0); + if ((errno != 0) || (remaining[0] == '\0') || + tail_ptr == NULL || (*tail_ptr != '\0')) { + RTE_LOG(WARNING, CHANNEL_MANAGER, "Malformed channel name" + "'%s' found it should be in the form of " + "'<guest_name>.<channel_num>(decimal)'\n", + dir->d_name); + continue; + } + if (channel_num >= CHANNEL_CMDS_MAX_VM_CHANNELS) { + RTE_LOG(WARNING, CHANNEL_MANAGER, "Channel number(%u) is " + "greater than max allowable: %d, skipping '%s%s'\n", + channel_num, CHANNEL_CMDS_MAX_VM_CHANNELS-1, + CHANNEL_MGR_SOCKET_PATH, dir->d_name); + continue; + } + /* if channel has not been added previously */ + if (channel_exists(vm_info, channel_num)) + continue; + + chan_info = rte_malloc(NULL, sizeof(*chan_info), + RTE_CACHE_LINE_SIZE); + if (chan_info == NULL) { + RTE_LOG(ERR, CHANNEL_MANAGER, "Error allocating memory for " + "channel '%s%s'\n", CHANNEL_MGR_SOCKET_PATH, dir->d_name); + continue; + } + + snprintf(chan_info->channel_path, + sizeof(chan_info->channel_path), "%s%s", + CHANNEL_MGR_SOCKET_PATH, dir->d_name); + + if (setup_channel_info(&vm_info, &chan_info, channel_num) < 0) { + rte_free(chan_info); + continue; + } + + num_channels_enabled++; + } + closedir(d); + return num_channels_enabled; +} + +int +add_channels(const char *vm_name, unsigned *channel_list, + unsigned len_channel_list) +{ + struct virtual_machine_info *vm_info; + struct channel_info *chan_info; + char socket_path[PATH_MAX]; + unsigned i; + int num_channels_enabled = 0; + + vm_info = find_domain_by_name(vm_name); + if (vm_info == NULL) { + RTE_LOG(ERR, CHANNEL_MANAGER, "Unable to add channels: VM '%s' " + "not found\n", vm_name); + return 0; + } + + if (!virDomainIsActive(vm_info->domainPtr)) { + RTE_LOG(ERR, CHANNEL_MANAGER, "VM: '%s' is not active\n", vm_name); + vm_info->status = CHANNEL_MGR_VM_INACTIVE; + return 0; + } + + for (i = 0; i < len_channel_list; i++) { + + if (channel_list[i] >= CHANNEL_CMDS_MAX_VM_CHANNELS) { + RTE_LOG(INFO, CHANNEL_MANAGER, "Channel(%u) is out of range " + "0...%d\n", channel_list[i], + CHANNEL_CMDS_MAX_VM_CHANNELS-1); + continue; + } + if (channel_exists(vm_info, channel_list[i])) { + RTE_LOG(INFO, CHANNEL_MANAGER, "Channel already exists, skipping " + "'%s.%u'\n", vm_name, i); + continue; + } + + snprintf(socket_path, sizeof(socket_path), "%s%s.%u", + CHANNEL_MGR_SOCKET_PATH, vm_name, channel_list[i]); + errno = 0; + if (access(socket_path, F_OK) < 0) { + RTE_LOG(ERR, CHANNEL_MANAGER, "Channel path '%s' error: " + "%s\n", socket_path, strerror(errno)); + continue; + } + chan_info = rte_malloc(NULL, sizeof(*chan_info), + RTE_CACHE_LINE_SIZE); + if (chan_info == NULL) { + RTE_LOG(ERR, CHANNEL_MANAGER, "Error allocating memory for " + "channel '%s'\n", socket_path); + continue; + } + snprintf(chan_info->channel_path, + sizeof(chan_info->channel_path), "%s%s.%u", + CHANNEL_MGR_SOCKET_PATH, vm_name, channel_list[i]); + if (setup_channel_info(&vm_info, &chan_info, channel_list[i]) < 0) { + rte_free(chan_info); + continue; + } + num_channels_enabled++; + + } + return num_channels_enabled; +} + +int +remove_channel(struct channel_info **chan_info_dptr) +{ + struct virtual_machine_info *vm_info; + struct channel_info *chan_info = *chan_info_dptr; + + close(chan_info->fd); + + vm_info = (struct virtual_machine_info *)chan_info->priv_info; + + rte_spinlock_lock(&(vm_info->config_spinlock)); + vm_info->channel_mask &= ~(1ULL << chan_info->channel_num); + vm_info->num_channels--; + rte_spinlock_unlock(&(vm_info->config_spinlock)); + + rte_free(chan_info); + return 0; +} + +int +set_channel_status_all(const char *vm_name, enum channel_status status) +{ + struct virtual_machine_info *vm_info; + unsigned i; + uint64_t mask; + int num_channels_changed = 0; + + if (!(status == CHANNEL_MGR_CHANNEL_CONNECTED || + status == CHANNEL_MGR_CHANNEL_DISABLED)) { + RTE_LOG(ERR, CHANNEL_MANAGER, "Channels can only be enabled or " + "disabled: Unable to change status for VM '%s'\n", vm_name); + } + vm_info = find_domain_by_name(vm_name); + if (vm_info == NULL) { + RTE_LOG(ERR, CHANNEL_MANAGER, "Unable to disable channels: VM '%s' " + "not found\n", vm_name); + return 0; + } + + rte_spinlock_lock(&(vm_info->config_spinlock)); + mask = vm_info->channel_mask; + ITERATIVE_BITMASK_CHECK_64(mask, i) { + vm_info->channels[i]->status = status; + num_channels_changed++; + } + rte_spinlock_unlock(&(vm_info->config_spinlock)); + return num_channels_changed; + +} + +int +set_channel_status(const char *vm_name, unsigned *channel_list, + unsigned len_channel_list, enum channel_status status) +{ + struct virtual_machine_info *vm_info; + unsigned i; + int num_channels_changed = 0; + + if (!(status == CHANNEL_MGR_CHANNEL_CONNECTED || + status == CHANNEL_MGR_CHANNEL_DISABLED)) { + RTE_LOG(ERR, CHANNEL_MANAGER, "Channels can only be enabled or " + "disabled: Unable to change status for VM '%s'\n", vm_name); + } + vm_info = find_domain_by_name(vm_name); + if (vm_info == NULL) { + RTE_LOG(ERR, CHANNEL_MANAGER, "Unable to add channels: VM '%s' " + "not found\n", vm_name); + return 0; + } + for (i = 0; i < len_channel_list; i++) { + if (channel_exists(vm_info, channel_list[i])) { + rte_spinlock_lock(&(vm_info->config_spinlock)); + vm_info->channels[channel_list[i]]->status = status; + rte_spinlock_unlock(&(vm_info->config_spinlock)); + num_channels_changed++; + } + } + return num_channels_changed; +} + +int +get_info_vm(const char *vm_name, struct vm_info *info) +{ + struct virtual_machine_info *vm_info; + unsigned i, channel_num = 0; + uint64_t mask; + + vm_info = find_domain_by_name(vm_name); + if (vm_info == NULL) { + RTE_LOG(ERR, CHANNEL_MANAGER, "VM '%s' not found\n", vm_name); + return -1; + } + info->status = CHANNEL_MGR_VM_ACTIVE; + if (!virDomainIsActive(vm_info->domainPtr)) + info->status = CHANNEL_MGR_VM_INACTIVE; + + rte_spinlock_lock(&(vm_info->config_spinlock)); + + mask = vm_info->channel_mask; + ITERATIVE_BITMASK_CHECK_64(mask, i) { + info->channels[channel_num].channel_num = i; + memcpy(info->channels[channel_num].channel_path, + vm_info->channels[i]->channel_path, UNIX_PATH_MAX); + info->channels[channel_num].status = vm_info->channels[i]->status; + info->channels[channel_num].fd = vm_info->channels[i]->fd; + channel_num++; + } + + info->num_channels = channel_num; + info->num_vcpus = vm_info->info.nrVirtCpu; + rte_spinlock_unlock(&(vm_info->config_spinlock)); + + memcpy(info->name, vm_info->name, sizeof(vm_info->name)); + for (i = 0; i < info->num_vcpus; i++) { + info->pcpu_mask[i] = rte_atomic64_read(&vm_info->pcpu_mask[i]); + } + return 0; +} + +int +add_vm(const char *vm_name) +{ + struct virtual_machine_info *new_domain; + virDomainPtr dom_ptr; + int i; + + if (find_domain_by_name(vm_name) != NULL) { + RTE_LOG(ERR, CHANNEL_MANAGER, "Unable to add VM: VM '%s' " + "already exists\n", vm_name); + return -1; + } + + if (global_vir_conn_ptr == NULL) { + RTE_LOG(ERR, CHANNEL_MANAGER, "No connection to hypervisor exists\n"); + return -1; + } + dom_ptr = virDomainLookupByName(global_vir_conn_ptr, vm_name); + if (dom_ptr == NULL) { + RTE_LOG(ERR, CHANNEL_MANAGER, "Error on VM lookup with libvirt: " + "VM '%s' not found\n", vm_name); + return -1; + } + + new_domain = rte_malloc("virtual_machine_info", sizeof(*new_domain), + RTE_CACHE_LINE_SIZE); + if (new_domain == NULL) { + RTE_LOG(ERR, CHANNEL_MANAGER, "Unable to allocate memory for VM " + "info\n"); + return -1; + } + new_domain->domainPtr = dom_ptr; + if (virDomainGetInfo(new_domain->domainPtr, &new_domain->info) != 0) { + RTE_LOG(ERR, CHANNEL_MANAGER, "Unable to get libvirt VM info\n"); + rte_free(new_domain); + return -1; + } + if (new_domain->info.nrVirtCpu > CHANNEL_CMDS_MAX_CPUS) { + RTE_LOG(ERR, CHANNEL_MANAGER, "Error the number of virtual CPUs(%u) is " + "greater than allowable(%d)\n", new_domain->info.nrVirtCpu, + CHANNEL_CMDS_MAX_CPUS); + rte_free(new_domain); + return -1; + } + + for (i = 0; i < CHANNEL_CMDS_MAX_CPUS; i++) { + rte_atomic64_init(&new_domain->pcpu_mask[i]); + } + if (update_pcpus_mask(new_domain) < 0) { + RTE_LOG(ERR, CHANNEL_MANAGER, "Error getting physical CPU pinning\n"); + rte_free(new_domain); + return -1; + } + strncpy(new_domain->name, vm_name, sizeof(new_domain->name)); + new_domain->channel_mask = 0; + new_domain->num_channels = 0; + + if (!virDomainIsActive(dom_ptr)) + new_domain->status = CHANNEL_MGR_VM_INACTIVE; + else + new_domain->status = CHANNEL_MGR_VM_ACTIVE; + + rte_spinlock_init(&(new_domain->config_spinlock)); + LIST_INSERT_HEAD(&vm_list_head, new_domain, vms_info); + return 0; +} + +int +remove_vm(const char *vm_name) +{ + struct virtual_machine_info *vm_info = find_domain_by_name(vm_name); + + if (vm_info == NULL) { + RTE_LOG(ERR, CHANNEL_MANAGER, "Unable to remove VM: VM '%s' " + "not found\n", vm_name); + return -1; + } + rte_spinlock_lock(&vm_info->config_spinlock); + if (vm_info->num_channels != 0) { + RTE_LOG(ERR, CHANNEL_MANAGER, "Unable to remove VM '%s', there are " + "%"PRId8" channels still active\n", + vm_name, vm_info->num_channels); + rte_spinlock_unlock(&vm_info->config_spinlock); + return -1; + } + LIST_REMOVE(vm_info, vms_info); + rte_spinlock_unlock(&vm_info->config_spinlock); + rte_free(vm_info); + return 0; +} + +static void +disconnect_hypervisor(void) +{ + if (global_vir_conn_ptr != NULL) { + virConnectClose(global_vir_conn_ptr); + global_vir_conn_ptr = NULL; + } +} + +static int +connect_hypervisor(const char *path) +{ + if (global_vir_conn_ptr != NULL) { + RTE_LOG(ERR, CHANNEL_MANAGER, "Error connecting to %s, connection " + "already established\n", path); + return -1; + } + global_vir_conn_ptr = virConnectOpen(path); + if (global_vir_conn_ptr == NULL) { + RTE_LOG(ERR, CHANNEL_MANAGER, "Error failed to open connection to " + "Hypervisor '%s'\n", path); + return -1; + } + return 0; +} + +int +channel_manager_init(const char *path) +{ + virNodeInfo info; + + LIST_INIT(&vm_list_head); + if (connect_hypervisor(path) < 0) { + RTE_LOG(ERR, CHANNEL_MANAGER, "Unable to initialize channel manager\n"); + return -1; + } + + global_maplen = VIR_CPU_MAPLEN(CHANNEL_CMDS_MAX_CPUS); + + global_vircpuinfo = rte_zmalloc(NULL, sizeof(*global_vircpuinfo) * + CHANNEL_CMDS_MAX_CPUS, RTE_CACHE_LINE_SIZE); + if (global_vircpuinfo == NULL) { + RTE_LOG(ERR, CHANNEL_MANAGER, "Error allocating memory for CPU Info\n"); + goto error; + } + global_cpumaps = rte_zmalloc(NULL, CHANNEL_CMDS_MAX_CPUS * global_maplen, + RTE_CACHE_LINE_SIZE); + if (global_cpumaps == NULL) { + goto error; + } + + if (virNodeGetInfo(global_vir_conn_ptr, &info)) { + RTE_LOG(ERR, CHANNEL_MANAGER, "Unable to retrieve node Info\n"); + goto error; + } + + global_n_host_cpus = (unsigned)info.cpus; + + if (global_n_host_cpus > CHANNEL_CMDS_MAX_CPUS) { + RTE_LOG(WARNING, CHANNEL_MANAGER, "The number of host CPUs(%u) exceeds the " + "maximum of %u. No cores over %u should be used.\n", + global_n_host_cpus, CHANNEL_CMDS_MAX_CPUS, + CHANNEL_CMDS_MAX_CPUS - 1); + global_n_host_cpus = CHANNEL_CMDS_MAX_CPUS; + } + + return 0; +error: + disconnect_hypervisor(); + return -1; +} + +void +channel_manager_exit(void) +{ + unsigned i; + uint64_t mask; + struct virtual_machine_info *vm_info; + + LIST_FOREACH(vm_info, &vm_list_head, vms_info) { + + rte_spinlock_lock(&(vm_info->config_spinlock)); + + mask = vm_info->channel_mask; + ITERATIVE_BITMASK_CHECK_64(mask, i) { + remove_channel_from_monitor(vm_info->channels[i]); + close(vm_info->channels[i]->fd); + rte_free(vm_info->channels[i]); + } + rte_spinlock_unlock(&(vm_info->config_spinlock)); + + LIST_REMOVE(vm_info, vms_info); + rte_free(vm_info); + } + + rte_free(global_cpumaps); + rte_free(global_vircpuinfo); + disconnect_hypervisor(); +} diff --git a/examples/vm_power_manager/channel_manager.h b/examples/vm_power_manager/channel_manager.h new file mode 100644 index 00000000..67e26ecb --- /dev/null +++ b/examples/vm_power_manager/channel_manager.h @@ -0,0 +1,320 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef CHANNEL_MANAGER_H_ +#define CHANNEL_MANAGER_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <linux/limits.h> +#include <sys/un.h> +#include <rte_atomic.h> +#include "channel_commands.h" + +/* Maximum name length including '\0' terminator */ +#define CHANNEL_MGR_MAX_NAME_LEN 64 + +/* Maximum number of channels to each Virtual Machine */ +#define CHANNEL_MGR_MAX_CHANNELS 64 + +/* Hypervisor Path for libvirt(qemu/KVM) */ +#define CHANNEL_MGR_DEFAULT_HV_PATH "qemu:///system" + +/* File socket directory */ +#define CHANNEL_MGR_SOCKET_PATH "/tmp/powermonitor/" + +#ifndef UNIX_PATH_MAX +struct sockaddr_un _sockaddr_un; +#define UNIX_PATH_MAX sizeof(_sockaddr_un.sun_path) +#endif + +/* Communication Channel Status */ +enum channel_status { CHANNEL_MGR_CHANNEL_DISCONNECTED = 0, + CHANNEL_MGR_CHANNEL_CONNECTED, + CHANNEL_MGR_CHANNEL_DISABLED, + CHANNEL_MGR_CHANNEL_PROCESSING}; + +/* VM libvirt(qemu/KVM) connection status */ +enum vm_status { CHANNEL_MGR_VM_INACTIVE = 0, CHANNEL_MGR_VM_ACTIVE}; + +/* + * Represents a single and exclusive VM channel that exists between a guest and + * the host. + */ +struct channel_info { + char channel_path[UNIX_PATH_MAX]; /**< Path to host socket */ + volatile uint32_t status; /**< Connection status(enum channel_status) */ + int fd; /**< AF_UNIX socket fd */ + unsigned channel_num; /**< CHANNEL_MGR_SOCKET_PATH/<vm_name>.channel_num */ + void *priv_info; /**< Pointer to private info, do not modify */ +}; + +/* Represents a single VM instance used to return internal information about + * a VM */ +struct vm_info { + char name[CHANNEL_MGR_MAX_NAME_LEN]; /**< VM name */ + enum vm_status status; /**< libvirt status */ + uint64_t pcpu_mask[CHANNEL_CMDS_MAX_CPUS]; /**< pCPU mask for each vCPU */ + unsigned num_vcpus; /**< number of vCPUS */ + struct channel_info channels[CHANNEL_MGR_MAX_CHANNELS]; /**< Array of channel_info */ + unsigned num_channels; /**< Number of channels */ +}; + +/** + * Initialize the Channel Manager resources and connect to the Hypervisor + * specified in path. + * This must be successfully called first before calling any other functions. + * It must only be call once; + * + * @param path + * Must be a local path, e.g. qemu:///system. + * + * @return + * - 0 on success. + * - Negative on error. + */ +int channel_manager_init(const char *path); + +/** + * Free resources associated with the Channel Manager. + * + * @param path + * Must be a local path, e.g. qemu:///system. + * + * @return + * None + */ +void channel_manager_exit(void); + +/** + * Get the Physical CPU mask for VM lcore channel(vcpu), result is assigned to + * core_mask. + * It is not thread-safe. + * + * @param chan_info + * Pointer to struct channel_info + * + * @param vcpu + * The virtual CPU to query. + * + * + * @return + * - 0 on error. + * - >0 on success. + */ +uint64_t get_pcpus_mask(struct channel_info *chan_info, unsigned vcpu); + +/** + * Set the Physical CPU mask for the specified vCPU. + * It is not thread-safe. + * + * @param name + * Virtual Machine name to lookup + * + * @param vcpu + * The virtual CPU to set. + * + * @param core_mask + * The core mask of the physical CPU(s) to bind the vCPU + * + * @return + * - 0 on success. + * - Negative on error. + */ +int set_pcpus_mask(char *vm_name, unsigned vcpu, uint64_t core_mask); + +/** + * Set the Physical CPU for the specified vCPU. + * It is not thread-safe. + * + * @param name + * Virtual Machine name to lookup + * + * @param vcpu + * The virtual CPU to set. + * + * @param core_num + * The core number of the physical CPU(s) to bind the vCPU + * + * @return + * - 0 on success. + * - Negative on error. + */ +int set_pcpu(char *vm_name, unsigned vcpu, unsigned core_num); +/** + * Add a VM as specified by name to the Channel Manager. The name must + * correspond to a valid libvirt domain name. + * This is required prior to adding channels. + * It is not thread-safe. + * + * @param name + * Virtual Machine name to lookup. + * + * @return + * - 0 on success. + * - Negative on error. + */ +int add_vm(const char *name); + +/** + * Remove a previously added Virtual Machine from the Channel Manager + * It is not thread-safe. + * + * @param name + * Virtual Machine name to lookup. + * + * @return + * - 0 on success. + * - Negative on error. + */ +int remove_vm(const char *name); + +/** + * Add all available channels to the VM as specified by name. + * Channels in the form of paths + * (CHANNEL_MGR_SOCKET_PATH/<vm_name>.<channel_number>) will only be parsed. + * It is not thread-safe. + * + * @param name + * Virtual Machine name to lookup. + * + * @return + * - N the number of channels added for the VM + */ +int add_all_channels(const char *vm_name); + +/** + * Add the channel numbers in channel_list to the domain specified by name. + * Channels in the form of paths + * (CHANNEL_MGR_SOCKET_PATH/<vm_name>.<channel_number>) will only be parsed. + * It is not thread-safe. + * + * @param name + * Virtual Machine name to add channels. + * + * @param channel_list + * Pointer to list of unsigned integers, representing the channel number to add + * It must be allocated outside of this function. + * + * @param num_channels + * The amount of channel numbers in channel_list + * + * @return + * - N the number of channels added for the VM + * - 0 for error + */ +int add_channels(const char *vm_name, unsigned *channel_list, + unsigned num_channels); + +/** + * Remove a channel definition from the channel manager. This must only be + * called from the channel monitor thread. + * + * @param chan_info + * Pointer to a valid struct channel_info. + * + * @return + * - 0 on success. + * - Negative on error. + */ +int remove_channel(struct channel_info **chan_info_dptr); + +/** + * For all channels associated with a Virtual Machine name, update the + * connection status. Valid states are CHANNEL_MGR_CHANNEL_CONNECTED or + * CHANNEL_MGR_CHANNEL_DISABLED only. + * + * + * @param name + * Virtual Machine name to modify all channels. + * + * @param status + * The status to set each channel + * + * @param num_channels + * The amount of channel numbers in channel_list + * + * @return + * - N the number of channels added for the VM + * - 0 for error + */ +int set_channel_status_all(const char *name, enum channel_status status); + +/** + * For all channels in channel_list associated with a Virtual Machine name + * update the connection status of each. + * Valid states are CHANNEL_MGR_CHANNEL_CONNECTED or + * CHANNEL_MGR_CHANNEL_DISABLED only. + * It is not thread-safe. + * + * @param name + * Virtual Machine name to add channels. + * + * @param channel_list + * Pointer to list of unsigned integers, representing the channel numbers to + * modify. + * It must be allocated outside of this function. + * + * @param num_channels + * The amount of channel numbers in channel_list + * + * @return + * - N the number of channels modified for the VM + * - 0 for error + */ +int set_channel_status(const char *vm_name, unsigned *channel_list, + unsigned len_channel_list, enum channel_status status); + +/** + * Populates a pointer to struct vm_info associated with vm_name. + * + * @param vm_name + * The name of the virtual machine to lookup. + * + * @param vm_info + * Pointer to a struct vm_info, this must be allocated prior to calling this + * function. + * + * @return + * - 0 on success. + * - Negative on error. + */ +int get_info_vm(const char *vm_name, struct vm_info *info); + +#ifdef __cplusplus +} +#endif + +#endif /* CHANNEL_MANAGER_H_ */ diff --git a/examples/vm_power_manager/channel_monitor.c b/examples/vm_power_manager/channel_monitor.c new file mode 100644 index 00000000..e7f5cc4a --- /dev/null +++ b/examples/vm_power_manager/channel_monitor.c @@ -0,0 +1,233 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <unistd.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <signal.h> +#include <errno.h> +#include <string.h> +#include <sys/types.h> +#include <sys/epoll.h> +#include <sys/queue.h> + +#include <rte_log.h> +#include <rte_memory.h> +#include <rte_malloc.h> +#include <rte_atomic.h> + + +#include "channel_monitor.h" +#include "channel_commands.h" +#include "channel_manager.h" +#include "power_manager.h" + +#define RTE_LOGTYPE_CHANNEL_MONITOR RTE_LOGTYPE_USER1 + +#define MAX_EVENTS 256 + + +static volatile unsigned run_loop = 1; +static int global_event_fd; +static struct epoll_event *global_events_list; + +void channel_monitor_exit(void) +{ + run_loop = 0; + rte_free(global_events_list); +} + +static int +process_request(struct channel_packet *pkt, struct channel_info *chan_info) +{ + uint64_t core_mask; + + if (chan_info == NULL) + return -1; + + if (rte_atomic32_cmpset(&(chan_info->status), CHANNEL_MGR_CHANNEL_CONNECTED, + CHANNEL_MGR_CHANNEL_PROCESSING) == 0) + return -1; + + if (pkt->command == CPU_POWER) { + core_mask = get_pcpus_mask(chan_info, pkt->resource_id); + if (core_mask == 0) { + RTE_LOG(ERR, CHANNEL_MONITOR, "Error get physical CPU mask for " + "channel '%s' using vCPU(%u)\n", chan_info->channel_path, + (unsigned)pkt->unit); + return -1; + } + if (__builtin_popcountll(core_mask) == 1) { + + unsigned core_num = __builtin_ffsll(core_mask) - 1; + + switch (pkt->unit) { + case(CPU_POWER_SCALE_MIN): + power_manager_scale_core_min(core_num); + break; + case(CPU_POWER_SCALE_MAX): + power_manager_scale_core_max(core_num); + break; + case(CPU_POWER_SCALE_DOWN): + power_manager_scale_core_down(core_num); + break; + case(CPU_POWER_SCALE_UP): + power_manager_scale_core_up(core_num); + break; + default: + break; + } + } else { + switch (pkt->unit) { + case(CPU_POWER_SCALE_MIN): + power_manager_scale_mask_min(core_mask); + break; + case(CPU_POWER_SCALE_MAX): + power_manager_scale_mask_max(core_mask); + break; + case(CPU_POWER_SCALE_DOWN): + power_manager_scale_mask_down(core_mask); + break; + case(CPU_POWER_SCALE_UP): + power_manager_scale_mask_up(core_mask); + break; + default: + break; + } + + } + } + /* Return is not checked as channel status may have been set to DISABLED + * from management thread + */ + rte_atomic32_cmpset(&(chan_info->status), CHANNEL_MGR_CHANNEL_PROCESSING, + CHANNEL_MGR_CHANNEL_CONNECTED); + return 0; + +} + +int +add_channel_to_monitor(struct channel_info **chan_info) +{ + struct channel_info *info = *chan_info; + struct epoll_event event; + + event.events = EPOLLIN; + event.data.ptr = info; + if (epoll_ctl(global_event_fd, EPOLL_CTL_ADD, info->fd, &event) < 0) { + RTE_LOG(ERR, CHANNEL_MONITOR, "Unable to add channel '%s' " + "to epoll\n", info->channel_path); + return -1; + } + return 0; +} + +int +remove_channel_from_monitor(struct channel_info *chan_info) +{ + if (epoll_ctl(global_event_fd, EPOLL_CTL_DEL, chan_info->fd, NULL) < 0) { + RTE_LOG(ERR, CHANNEL_MONITOR, "Unable to remove channel '%s' " + "from epoll\n", chan_info->channel_path); + return -1; + } + return 0; +} + +int +channel_monitor_init(void) +{ + global_event_fd = epoll_create1(0); + if (global_event_fd == 0) { + RTE_LOG(ERR, CHANNEL_MONITOR, "Error creating epoll context with " + "error %s\n", strerror(errno)); + return -1; + } + global_events_list = rte_malloc("epoll_events", sizeof(*global_events_list) + * MAX_EVENTS, RTE_CACHE_LINE_SIZE); + if (global_events_list == NULL) { + RTE_LOG(ERR, CHANNEL_MONITOR, "Unable to rte_malloc for " + "epoll events\n"); + return -1; + } + return 0; +} + +void +run_channel_monitor(void) +{ + while (run_loop) { + int n_events, i; + + n_events = epoll_wait(global_event_fd, global_events_list, + MAX_EVENTS, 1); + if (!run_loop) + break; + for (i = 0; i < n_events; i++) { + struct channel_info *chan_info = (struct channel_info *) + global_events_list[i].data.ptr; + if ((global_events_list[i].events & EPOLLERR) || + (global_events_list[i].events & EPOLLHUP)) { + RTE_LOG(DEBUG, CHANNEL_MONITOR, "Remote closed connection for " + "channel '%s'\n", chan_info->channel_path); + remove_channel(&chan_info); + continue; + } + if (global_events_list[i].events & EPOLLIN) { + + int n_bytes, err = 0; + struct channel_packet pkt; + void *buffer = &pkt; + int buffer_len = sizeof(pkt); + + while (buffer_len > 0) { + n_bytes = read(chan_info->fd, buffer, buffer_len); + if (n_bytes == buffer_len) + break; + if (n_bytes == -1) { + err = errno; + RTE_LOG(DEBUG, CHANNEL_MONITOR, "Received error on " + "channel '%s' read: %s\n", + chan_info->channel_path, strerror(err)); + remove_channel(&chan_info); + break; + } + buffer = (char *)buffer + n_bytes; + buffer_len -= n_bytes; + } + if (!err) + process_request(&pkt, chan_info); + } + } + } +} diff --git a/examples/vm_power_manager/channel_monitor.h b/examples/vm_power_manager/channel_monitor.h new file mode 100644 index 00000000..c1386079 --- /dev/null +++ b/examples/vm_power_manager/channel_monitor.h @@ -0,0 +1,102 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef CHANNEL_MONITOR_H_ +#define CHANNEL_MONITOR_H_ + +#include "channel_manager.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Setup the Channel Monitor resources required to initialize epoll. + * Must be called first before calling other functions. + * + * @return + * - 0 on success. + * - Negative on error. + */ +int channel_monitor_init(void); + +/** + * Run the channel monitor, loops forever on on epoll_wait. + * + * + * @return + * None + */ +void run_channel_monitor(void); + +/** + * Exit the Channel Monitor, exiting the epoll_wait loop and events processing. + * + * @return + * - 0 on success. + * - Negative on error. + */ +void channel_monitor_exit(void); + +/** + * Add an open channel to monitor via epoll. A pointer to struct channel_info + * will be registered with epoll for event processing. + * It is thread-safe. + * + * @param chan_info + * Pointer to struct channel_info pointer. + * + * @return + * - 0 on success. + * - Negative on error. + */ +int add_channel_to_monitor(struct channel_info **chan_info); + +/** + * Remove a previously added channel from epoll control. + * + * @param chan_info + * Pointer to struct channel_info. + * + * @return + * - 0 on success. + * - Negative on error. + */ +int remove_channel_from_monitor(struct channel_info *chan_info); + +#ifdef __cplusplus +} +#endif + + +#endif /* CHANNEL_MONITOR_H_ */ diff --git a/examples/vm_power_manager/guest_cli/Makefile b/examples/vm_power_manager/guest_cli/Makefile new file mode 100644 index 00000000..55072708 --- /dev/null +++ b/examples/vm_power_manager/guest_cli/Makefile @@ -0,0 +1,56 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overridden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +# binary name +APP = guest_vm_power_mgr + +# all source are stored in SRCS-y +SRCS-y := main.c vm_power_cli_guest.c + +CFLAGS += -O3 -I$(RTE_SDK)/lib/librte_power/ +CFLAGS += $(WERROR_FLAGS) + +# workaround for a gcc bug with noreturn attribute +# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603 +ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y) +CFLAGS_main.o += -Wno-return-type +endif + +include $(RTE_SDK)/mk/rte.extapp.mk diff --git a/examples/vm_power_manager/guest_cli/main.c b/examples/vm_power_manager/guest_cli/main.c new file mode 100644 index 00000000..5ac98ed3 --- /dev/null +++ b/examples/vm_power_manager/guest_cli/main.c @@ -0,0 +1,86 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* +#include <stdio.h> +#include <string.h> +#include <stdint.h> +#include <errno.h> +#include <sys/epoll.h> +#include <fcntl.h> +#include <unistd.h> +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> +*/ +#include <signal.h> + +#include <rte_lcore.h> +#include <rte_power.h> +#include <rte_debug.h> + +#include "vm_power_cli_guest.h" + +static void +sig_handler(int signo) +{ + printf("Received signal %d, exiting...\n", signo); + unsigned lcore_id; + + RTE_LCORE_FOREACH(lcore_id) { + rte_power_exit(lcore_id); + } + +} + +int +main(int argc, char **argv) +{ + int ret; + unsigned lcore_id; + + ret = rte_eal_init(argc, argv); + if (ret < 0) + rte_panic("Cannot init EAL\n"); + + signal(SIGINT, sig_handler); + signal(SIGTERM, sig_handler); + + rte_power_set_env(PM_ENV_KVM_VM); + RTE_LCORE_FOREACH(lcore_id) { + rte_power_init(lcore_id); + } + run_cli(NULL); + + return 0; +} diff --git a/examples/vm_power_manager/guest_cli/vm_power_cli_guest.c b/examples/vm_power_manager/guest_cli/vm_power_cli_guest.c new file mode 100644 index 00000000..7931135e --- /dev/null +++ b/examples/vm_power_manager/guest_cli/vm_power_cli_guest.c @@ -0,0 +1,155 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +#include <stdint.h> +#include <string.h> +#include <stdio.h> +#include <termios.h> + +#include <cmdline_rdline.h> +#include <cmdline_parse.h> +#include <cmdline_parse_string.h> +#include <cmdline_parse_num.h> +#include <cmdline_socket.h> +#include <cmdline.h> +#include <rte_log.h> +#include <rte_lcore.h> + +#include <rte_power.h> + +#include "vm_power_cli_guest.h" + + +#define CHANNEL_PATH "/dev/virtio-ports/virtio.serial.port.poweragent" + + +#define RTE_LOGTYPE_GUEST_CHANNEL RTE_LOGTYPE_USER1 + +struct cmd_quit_result { + cmdline_fixed_string_t quit; +}; + +static void cmd_quit_parsed(__attribute__((unused)) void *parsed_result, + __attribute__((unused)) struct cmdline *cl, + __attribute__((unused)) void *data) +{ + unsigned lcore_id; + + RTE_LCORE_FOREACH(lcore_id) { + rte_power_exit(lcore_id); + } + cmdline_quit(cl); +} + +cmdline_parse_token_string_t cmd_quit_quit = + TOKEN_STRING_INITIALIZER(struct cmd_quit_result, quit, "quit"); + +cmdline_parse_inst_t cmd_quit = { + .f = cmd_quit_parsed, /* function to call */ + .data = NULL, /* 2nd arg of func */ + .help_str = "close the application", + .tokens = { /* token list, NULL terminated */ + (void *)&cmd_quit_quit, + NULL, + }, +}; + +/* *** VM operations *** */ + +struct cmd_set_cpu_freq_result { + cmdline_fixed_string_t set_cpu_freq; + uint8_t lcore_id; + cmdline_fixed_string_t cmd; +}; + +static void +cmd_set_cpu_freq_parsed(void *parsed_result, struct cmdline *cl, + __attribute__((unused)) void *data) +{ + int ret = -1; + struct cmd_set_cpu_freq_result *res = parsed_result; + + if (!strcmp(res->cmd , "up")) + ret = rte_power_freq_up(res->lcore_id); + else if (!strcmp(res->cmd , "down")) + ret = rte_power_freq_down(res->lcore_id); + else if (!strcmp(res->cmd , "min")) + ret = rte_power_freq_min(res->lcore_id); + else if (!strcmp(res->cmd , "max")) + ret = rte_power_freq_max(res->lcore_id); + if (ret != 1) + cmdline_printf(cl, "Error sending message: %s\n", strerror(ret)); +} + +cmdline_parse_token_string_t cmd_set_cpu_freq = + TOKEN_STRING_INITIALIZER(struct cmd_set_cpu_freq_result, + set_cpu_freq, "set_cpu_freq"); +cmdline_parse_token_string_t cmd_set_cpu_freq_core_num = + TOKEN_NUM_INITIALIZER(struct cmd_set_cpu_freq_result, + lcore_id, UINT8); +cmdline_parse_token_string_t cmd_set_cpu_freq_cmd_cmd = + TOKEN_STRING_INITIALIZER(struct cmd_set_cpu_freq_result, + cmd, "up#down#min#max"); + +cmdline_parse_inst_t cmd_set_cpu_freq_set = { + .f = cmd_set_cpu_freq_parsed, + .data = NULL, + .help_str = "set_cpu_freq <core_num> <up|down|min|max>, Set the current " + "frequency for the specified core by scaling up/down/min/max", + .tokens = { + (void *)&cmd_set_cpu_freq, + (void *)&cmd_set_cpu_freq_core_num, + (void *)&cmd_set_cpu_freq_cmd_cmd, + NULL, + }, +}; + +cmdline_parse_ctx_t main_ctx[] = { + (cmdline_parse_inst_t *)&cmd_quit, + (cmdline_parse_inst_t *)&cmd_set_cpu_freq_set, + NULL, +}; + +void +run_cli(__attribute__((unused)) void *arg) +{ + struct cmdline *cl; + + cl = cmdline_stdin_new(main_ctx, "vmpower(guest)> "); + if (cl == NULL) + return; + + cmdline_interact(cl); + cmdline_stdin_exit(cl); +} diff --git a/examples/vm_power_manager/guest_cli/vm_power_cli_guest.h b/examples/vm_power_manager/guest_cli/vm_power_cli_guest.h new file mode 100644 index 00000000..0c4bdd5b --- /dev/null +++ b/examples/vm_power_manager/guest_cli/vm_power_cli_guest.h @@ -0,0 +1,55 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef VM_POWER_CLI_H_ +#define VM_POWER_CLI_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "channel_commands.h" + +int guest_channel_host_connect(unsigned lcore_id); + +int guest_channel_send_msg(struct channel_packet *pkt, unsigned lcore_id); + +void guest_channel_host_disconnect(unsigned lcore_id); + +void run_cli(__attribute__((unused)) void *arg); + +#ifdef __cplusplus +} +#endif + +#endif /* VM_POWER_CLI_H_ */ diff --git a/examples/vm_power_manager/main.c b/examples/vm_power_manager/main.c new file mode 100644 index 00000000..97178d14 --- /dev/null +++ b/examples/vm_power_manager/main.c @@ -0,0 +1,115 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdio.h> +#include <string.h> +#include <stdint.h> +#include <errno.h> +#include <sys/epoll.h> +#include <fcntl.h> +#include <unistd.h> +#include <stdlib.h> +#include <unistd.h> +#include <signal.h> +#include <errno.h> + +#include <sys/queue.h> + +#include <rte_common.h> +#include <rte_eal.h> +#include <rte_launch.h> +#include <rte_log.h> +#include <rte_per_lcore.h> +#include <rte_lcore.h> +#include <rte_debug.h> + +#include "channel_manager.h" +#include "channel_monitor.h" +#include "power_manager.h" +#include "vm_power_cli.h" + +static int +run_monitor(__attribute__((unused)) void *arg) +{ + if (channel_monitor_init() < 0) { + printf("Unable to initialize channel monitor\n"); + return -1; + } + run_channel_monitor(); + return 0; +} + +static void +sig_handler(int signo) +{ + printf("Received signal %d, exiting...\n", signo); + channel_monitor_exit(); + channel_manager_exit(); + power_manager_exit(); + +} + +int +main(int argc, char **argv) +{ + int ret; + unsigned lcore_id; + + ret = rte_eal_init(argc, argv); + if (ret < 0) + rte_panic("Cannot init EAL\n"); + + signal(SIGINT, sig_handler); + signal(SIGTERM, sig_handler); + + lcore_id = rte_get_next_lcore(-1, 1, 0); + if (lcore_id == RTE_MAX_LCORE) { + RTE_LOG(ERR, EAL, "A minimum of two cores are required to run " + "application\n"); + return 0; + } + rte_eal_remote_launch(run_monitor, NULL, lcore_id); + + if (power_manager_init() < 0) { + printf("Unable to initialize power manager\n"); + return -1; + } + if (channel_manager_init(CHANNEL_MGR_DEFAULT_HV_PATH) < 0) { + printf("Unable to initialize channel manager\n"); + return -1; + } + run_cli(NULL); + + rte_eal_mp_wait_lcore(); + return 0; +} diff --git a/examples/vm_power_manager/power_manager.c b/examples/vm_power_manager/power_manager.c new file mode 100644 index 00000000..2644fce6 --- /dev/null +++ b/examples/vm_power_manager/power_manager.c @@ -0,0 +1,252 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <inttypes.h> +#include <sys/un.h> +#include <fcntl.h> +#include <unistd.h> +#include <dirent.h> +#include <errno.h> + +#include <sys/types.h> + +#include <rte_log.h> +#include <rte_power.h> +#include <rte_spinlock.h> + +#include "power_manager.h" + +#define RTE_LOGTYPE_POWER_MANAGER RTE_LOGTYPE_USER1 + +#define POWER_SCALE_CORE(DIRECTION, core_num , ret) do { \ + if (core_num >= POWER_MGR_MAX_CPUS) \ + return -1; \ + if (!(global_enabled_cpus & (1ULL << core_num))) \ + return -1; \ + rte_spinlock_lock(&global_core_freq_info[core_num].power_sl); \ + ret = rte_power_freq_##DIRECTION(core_num); \ + rte_spinlock_unlock(&global_core_freq_info[core_num].power_sl); \ +} while (0) + +#define POWER_SCALE_MASK(DIRECTION, core_mask, ret) do { \ + int i; \ + for (i = 0; core_mask; core_mask &= ~(1 << i++)) { \ + if ((core_mask >> i) & 1) { \ + if (!(global_enabled_cpus & (1ULL << i))) \ + continue; \ + rte_spinlock_lock(&global_core_freq_info[i].power_sl); \ + if (rte_power_freq_##DIRECTION(i) != 1) \ + ret = -1; \ + rte_spinlock_unlock(&global_core_freq_info[i].power_sl); \ + } \ + } \ +} while (0) + +struct freq_info { + rte_spinlock_t power_sl; + uint32_t freqs[RTE_MAX_LCORE_FREQS]; + unsigned num_freqs; +} __rte_cache_aligned; + +static struct freq_info global_core_freq_info[POWER_MGR_MAX_CPUS]; + +static uint64_t global_enabled_cpus; + +#define SYSFS_CPU_PATH "/sys/devices/system/cpu/cpu%u/topology/core_id" + +static unsigned +set_host_cpus_mask(void) +{ + char path[PATH_MAX]; + unsigned i; + unsigned num_cpus = 0; + + for (i = 0; i < POWER_MGR_MAX_CPUS; i++) { + snprintf(path, sizeof(path), SYSFS_CPU_PATH, i); + if (access(path, F_OK) == 0) { + global_enabled_cpus |= 1ULL << i; + num_cpus++; + } else + return num_cpus; + } + return num_cpus; +} + +int +power_manager_init(void) +{ + unsigned i, num_cpus; + uint64_t cpu_mask; + int ret = 0; + + num_cpus = set_host_cpus_mask(); + if (num_cpus == 0) { + RTE_LOG(ERR, POWER_MANAGER, "Unable to detected host CPUs, please " + "ensure that sufficient privileges exist to inspect sysfs\n"); + return -1; + } + rte_power_set_env(PM_ENV_ACPI_CPUFREQ); + cpu_mask = global_enabled_cpus; + for (i = 0; cpu_mask; cpu_mask &= ~(1 << i++)) { + if (rte_power_init(i) < 0 || rte_power_freqs(i, + global_core_freq_info[i].freqs, + RTE_MAX_LCORE_FREQS) == 0) { + RTE_LOG(ERR, POWER_MANAGER, "Unable to initialize power manager " + "for core %u\n", i); + global_enabled_cpus &= ~(1 << i); + num_cpus--; + ret = -1; + } + rte_spinlock_init(&global_core_freq_info[i].power_sl); + } + RTE_LOG(INFO, POWER_MANAGER, "Detected %u host CPUs , enabled core mask:" + " 0x%"PRIx64"\n", num_cpus, global_enabled_cpus); + return ret; + +} + +uint32_t +power_manager_get_current_frequency(unsigned core_num) +{ + uint32_t freq, index; + + if (core_num >= POWER_MGR_MAX_CPUS) { + RTE_LOG(ERR, POWER_MANAGER, "Core(%u) is out of range 0...%d\n", + core_num, POWER_MGR_MAX_CPUS-1); + return -1; + } + if (!(global_enabled_cpus & (1ULL << core_num))) + return 0; + + rte_spinlock_lock(&global_core_freq_info[core_num].power_sl); + index = rte_power_get_freq(core_num); + rte_spinlock_unlock(&global_core_freq_info[core_num].power_sl); + if (index >= POWER_MGR_MAX_CPUS) + freq = 0; + else + freq = global_core_freq_info[core_num].freqs[index]; + + return freq; +} + +int +power_manager_exit(void) +{ + unsigned int i; + int ret = 0; + + for (i = 0; global_enabled_cpus; global_enabled_cpus &= ~(1 << i++)) { + if (rte_power_exit(i) < 0) { + RTE_LOG(ERR, POWER_MANAGER, "Unable to shutdown power manager " + "for core %u\n", i); + ret = -1; + } + } + global_enabled_cpus = 0; + return ret; +} + +int +power_manager_scale_mask_up(uint64_t core_mask) +{ + int ret = 0; + + POWER_SCALE_MASK(up, core_mask, ret); + return ret; +} + +int +power_manager_scale_mask_down(uint64_t core_mask) +{ + int ret = 0; + + POWER_SCALE_MASK(down, core_mask, ret); + return ret; +} + +int +power_manager_scale_mask_min(uint64_t core_mask) +{ + int ret = 0; + + POWER_SCALE_MASK(min, core_mask, ret); + return ret; +} + +int +power_manager_scale_mask_max(uint64_t core_mask) +{ + int ret = 0; + + POWER_SCALE_MASK(max, core_mask, ret); + return ret; +} + +int +power_manager_scale_core_up(unsigned core_num) +{ + int ret = 0; + + POWER_SCALE_CORE(up, core_num, ret); + return ret; +} + +int +power_manager_scale_core_down(unsigned core_num) +{ + int ret = 0; + + POWER_SCALE_CORE(down, core_num, ret); + return ret; +} + +int +power_manager_scale_core_min(unsigned core_num) +{ + int ret = 0; + + POWER_SCALE_CORE(min, core_num, ret); + return ret; +} + +int +power_manager_scale_core_max(unsigned core_num) +{ + int ret = 0; + + POWER_SCALE_CORE(max, core_num, ret); + return ret; +} diff --git a/examples/vm_power_manager/power_manager.h b/examples/vm_power_manager/power_manager.h new file mode 100644 index 00000000..1b45babf --- /dev/null +++ b/examples/vm_power_manager/power_manager.h @@ -0,0 +1,188 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef POWER_MANAGER_H_ +#define POWER_MANAGER_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +/* Maximum number of CPUS to manage */ +#define POWER_MGR_MAX_CPUS 64 +/** + * Initialize power management. + * Initializes resources and verifies the number of CPUs on the system. + * Wraps librte_power int rte_power_init(unsigned lcore_id); + * + * @return + * - 0 on success. + * - Negative on error. + */ +int power_manager_init(void); + +/** + * Exit power management. Must be called prior to exiting the application. + * + * @return + * - 0 on success. + * - Negative on error. + */ +int power_manager_exit(void); + +/** + * Scale up the frequency of the cores specified in core_mask. + * It is thread-safe. + * + * @param core_mask + * The uint64_t bit-mask of cores to change frequency. + * + * @return + * - 1 on success. + * - Negative on error. + */ +int power_manager_scale_mask_up(uint64_t core_mask); + +/** + * Scale down the frequency of the cores specified in core_mask. + * It is thread-safe. + * + * @param core_mask + * The uint64_t bit-mask of cores to change frequency. + * + * @return + * - 1 on success. + * - Negative on error. + */ +int power_manager_scale_mask_down(uint64_t core_mask); + +/** + * Scale to the minimum frequency of the cores specified in core_mask. + * It is thread-safe. + * + * @param core_mask + * The uint64_t bit-mask of cores to change frequency. + * + * @return + * - 1 on success. + * - Negative on error. + */ +int power_manager_scale_mask_min(uint64_t core_mask); + +/** + * Scale to the maximum frequency of the cores specified in core_mask. + * It is thread-safe. + * + * @param core_mask + * The uint64_t bit-mask of cores to change frequency. + * + * @return + * - 1 on success. + * - Negative on error. + */ +int power_manager_scale_mask_max(uint64_t core_mask); + +/** + * Scale up frequency for the core specified by core_num. + * It is thread-safe. + * + * @param core_num + * The core number to change frequency + * + * @return + * - 1 on success. + * - Negative on error. + */ +int power_manager_scale_core_up(unsigned core_num); + +/** + * Scale down frequency for the core specified by core_num. + * It is thread-safe. + * + * @param core_num + * The core number to change frequency + * + * @return + * - 1 on success. + * - 0 if frequency not changed. + * - Negative on error. + */ +int power_manager_scale_core_down(unsigned core_num); + +/** + * Scale to minimum frequency for the core specified by core_num. + * It is thread-safe. + * + * @param core_num + * The core number to change frequency + * + * @return + * - 1 on success. + * - 0 if frequency not changed. + * - Negative on error. + */ +int power_manager_scale_core_min(unsigned core_num); + +/** + * Scale to maximum frequency for the core specified by core_num. + * It is thread-safe. + * + * @param core_num + * The core number to change frequency + * + * @return + * - 1 on success. + * - 0 if frequency not changed. + * - Negative on error. + */ +int power_manager_scale_core_max(unsigned core_num); + +/** + * Get the current freuency of the core specified by core_num + * + * @param core_num + * The core number to get the current frequency + * + * @return + * - 0 on error + * - >0 for current frequency. + */ +uint32_t power_manager_get_current_frequency(unsigned core_num); + + +#ifdef __cplusplus +} +#endif + + +#endif /* POWER_MANAGER_H_ */ diff --git a/examples/vm_power_manager/vm_power_cli.c b/examples/vm_power_manager/vm_power_cli.c new file mode 100644 index 00000000..c5e8d934 --- /dev/null +++ b/examples/vm_power_manager/vm_power_cli.c @@ -0,0 +1,672 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdlib.h> +#include <stdint.h> +#include <inttypes.h> +#include <stdio.h> +#include <string.h> +#include <termios.h> +#include <errno.h> + +#include <cmdline_rdline.h> +#include <cmdline_parse.h> +#include <cmdline_parse_string.h> +#include <cmdline_parse_num.h> +#include <cmdline_socket.h> +#include <cmdline.h> + +#include "vm_power_cli.h" +#include "channel_manager.h" +#include "channel_monitor.h" +#include "power_manager.h" +#include "channel_commands.h" + +struct cmd_quit_result { + cmdline_fixed_string_t quit; +}; + +static void cmd_quit_parsed(__attribute__((unused)) void *parsed_result, + struct cmdline *cl, + __attribute__((unused)) void *data) +{ + channel_monitor_exit(); + channel_manager_exit(); + power_manager_exit(); + cmdline_quit(cl); +} + +cmdline_parse_token_string_t cmd_quit_quit = + TOKEN_STRING_INITIALIZER(struct cmd_quit_result, quit, "quit"); + +cmdline_parse_inst_t cmd_quit = { + .f = cmd_quit_parsed, /* function to call */ + .data = NULL, /* 2nd arg of func */ + .help_str = "close the application", + .tokens = { /* token list, NULL terminated */ + (void *)&cmd_quit_quit, + NULL, + }, +}; + +/* *** VM operations *** */ +struct cmd_show_vm_result { + cmdline_fixed_string_t show_vm; + cmdline_fixed_string_t vm_name; +}; + +static void +cmd_show_vm_parsed(void *parsed_result, struct cmdline *cl, + __attribute__((unused)) void *data) +{ + struct cmd_show_vm_result *res = parsed_result; + struct vm_info info; + unsigned i; + + if (get_info_vm(res->vm_name, &info) != 0) + return; + cmdline_printf(cl, "VM: '%s', status = ", info.name); + if (info.status == CHANNEL_MGR_VM_ACTIVE) + cmdline_printf(cl, "ACTIVE\n"); + else + cmdline_printf(cl, "INACTIVE\n"); + cmdline_printf(cl, "Channels %u\n", info.num_channels); + for (i = 0; i < info.num_channels; i++) { + cmdline_printf(cl, " [%u]: %s, status = ", i, + info.channels[i].channel_path); + switch (info.channels[i].status) { + case CHANNEL_MGR_CHANNEL_CONNECTED: + cmdline_printf(cl, "CONNECTED\n"); + break; + case CHANNEL_MGR_CHANNEL_DISCONNECTED: + cmdline_printf(cl, "DISCONNECTED\n"); + break; + case CHANNEL_MGR_CHANNEL_DISABLED: + cmdline_printf(cl, "DISABLED\n"); + break; + case CHANNEL_MGR_CHANNEL_PROCESSING: + cmdline_printf(cl, "PROCESSING\n"); + break; + default: + cmdline_printf(cl, "UNKNOWN\n"); + break; + } + } + cmdline_printf(cl, "Virtual CPU(s): %u\n", info.num_vcpus); + for (i = 0; i < info.num_vcpus; i++) { + cmdline_printf(cl, " [%u]: Physical CPU Mask 0x%"PRIx64"\n", i, + info.pcpu_mask[i]); + } +} + + + +cmdline_parse_token_string_t cmd_vm_show = + TOKEN_STRING_INITIALIZER(struct cmd_show_vm_result, + show_vm, "show_vm"); +cmdline_parse_token_string_t cmd_show_vm_name = + TOKEN_STRING_INITIALIZER(struct cmd_show_vm_result, + vm_name, NULL); + +cmdline_parse_inst_t cmd_show_vm_set = { + .f = cmd_show_vm_parsed, + .data = NULL, + .help_str = "show_vm <vm_name>, prints the information on the " + "specified VM(s), the information lists the number of vCPUS, the " + "pinning to pCPU(s) as a bit mask, along with any communication " + "channels associated with each VM", + .tokens = { + (void *)&cmd_vm_show, + (void *)&cmd_show_vm_name, + NULL, + }, +}; + +/* *** vCPU to pCPU mapping operations *** */ +struct cmd_set_pcpu_mask_result { + cmdline_fixed_string_t set_pcpu_mask; + cmdline_fixed_string_t vm_name; + uint8_t vcpu; + uint64_t core_mask; +}; + +static void +cmd_set_pcpu_mask_parsed(void *parsed_result, struct cmdline *cl, + __attribute__((unused)) void *data) +{ + struct cmd_set_pcpu_mask_result *res = parsed_result; + + if (set_pcpus_mask(res->vm_name, res->vcpu, res->core_mask) == 0) + cmdline_printf(cl, "Pinned vCPU(%"PRId8") to pCPU core " + "mask(0x%"PRIx64")\n", res->vcpu, res->core_mask); + else + cmdline_printf(cl, "Unable to pin vCPU(%"PRId8") to pCPU core " + "mask(0x%"PRIx64")\n", res->vcpu, res->core_mask); +} + +cmdline_parse_token_string_t cmd_set_pcpu_mask = + TOKEN_STRING_INITIALIZER(struct cmd_set_pcpu_mask_result, + set_pcpu_mask, "set_pcpu_mask"); +cmdline_parse_token_string_t cmd_set_pcpu_mask_vm_name = + TOKEN_STRING_INITIALIZER(struct cmd_set_pcpu_mask_result, + vm_name, NULL); +cmdline_parse_token_num_t set_pcpu_mask_vcpu = + TOKEN_NUM_INITIALIZER(struct cmd_set_pcpu_mask_result, + vcpu, UINT8); +cmdline_parse_token_num_t set_pcpu_mask_core_mask = + TOKEN_NUM_INITIALIZER(struct cmd_set_pcpu_mask_result, + core_mask, UINT64); + + +cmdline_parse_inst_t cmd_set_pcpu_mask_set = { + .f = cmd_set_pcpu_mask_parsed, + .data = NULL, + .help_str = "set_pcpu_mask <vm_name> <vcpu> <pcpu>, Set the binding " + "of Virtual CPU on VM to the Physical CPU mask.", + .tokens = { + (void *)&cmd_set_pcpu_mask, + (void *)&cmd_set_pcpu_mask_vm_name, + (void *)&set_pcpu_mask_vcpu, + (void *)&set_pcpu_mask_core_mask, + NULL, + }, +}; + +struct cmd_set_pcpu_result { + cmdline_fixed_string_t set_pcpu; + cmdline_fixed_string_t vm_name; + uint8_t vcpu; + uint8_t core; +}; + +static void +cmd_set_pcpu_parsed(void *parsed_result, struct cmdline *cl, + __attribute__((unused)) void *data) +{ + struct cmd_set_pcpu_result *res = parsed_result; + + if (set_pcpu(res->vm_name, res->vcpu, res->core) == 0) + cmdline_printf(cl, "Pinned vCPU(%"PRId8") to pCPU core " + "%"PRId8")\n", res->vcpu, res->core); + else + cmdline_printf(cl, "Unable to pin vCPU(%"PRId8") to pCPU core " + "%"PRId8")\n", res->vcpu, res->core); +} + +cmdline_parse_token_string_t cmd_set_pcpu = + TOKEN_STRING_INITIALIZER(struct cmd_set_pcpu_result, + set_pcpu, "set_pcpu"); +cmdline_parse_token_string_t cmd_set_pcpu_vm_name = + TOKEN_STRING_INITIALIZER(struct cmd_set_pcpu_result, + vm_name, NULL); +cmdline_parse_token_num_t set_pcpu_vcpu = + TOKEN_NUM_INITIALIZER(struct cmd_set_pcpu_result, + vcpu, UINT8); +cmdline_parse_token_num_t set_pcpu_core = + TOKEN_NUM_INITIALIZER(struct cmd_set_pcpu_result, + core, UINT64); + + +cmdline_parse_inst_t cmd_set_pcpu_set = { + .f = cmd_set_pcpu_parsed, + .data = NULL, + .help_str = "set_pcpu <vm_name> <vcpu> <pcpu>, Set the binding " + "of Virtual CPU on VM to the Physical CPU.", + .tokens = { + (void *)&cmd_set_pcpu, + (void *)&cmd_set_pcpu_vm_name, + (void *)&set_pcpu_vcpu, + (void *)&set_pcpu_core, + NULL, + }, +}; + +struct cmd_vm_op_result { + cmdline_fixed_string_t op_vm; + cmdline_fixed_string_t vm_name; +}; + +static void +cmd_vm_op_parsed(void *parsed_result, struct cmdline *cl, + __attribute__((unused)) void *data) +{ + struct cmd_vm_op_result *res = parsed_result; + + if (!strcmp(res->op_vm, "add_vm")) { + if (add_vm(res->vm_name) < 0) + cmdline_printf(cl, "Unable to add VM '%s'\n", res->vm_name); + } else if (remove_vm(res->vm_name) < 0) + cmdline_printf(cl, "Unable to remove VM '%s'\n", res->vm_name); +} + +cmdline_parse_token_string_t cmd_vm_op = + TOKEN_STRING_INITIALIZER(struct cmd_vm_op_result, + op_vm, "add_vm#rm_vm"); +cmdline_parse_token_string_t cmd_vm_name = + TOKEN_STRING_INITIALIZER(struct cmd_vm_op_result, + vm_name, NULL); + +cmdline_parse_inst_t cmd_vm_op_set = { + .f = cmd_vm_op_parsed, + .data = NULL, + .help_str = "add_vm|rm_vm <name>, add a VM for " + "subsequent operations with the CLI or remove a previously added " + "VM from the VM Power Manager", + .tokens = { + (void *)&cmd_vm_op, + (void *)&cmd_vm_name, + NULL, + }, +}; + +/* *** VM channel operations *** */ +struct cmd_channels_op_result { + cmdline_fixed_string_t op; + cmdline_fixed_string_t vm_name; + cmdline_fixed_string_t channel_list; +}; +static void +cmd_channels_op_parsed(void *parsed_result, struct cmdline *cl, + __attribute__((unused)) void *data) +{ + unsigned num_channels = 0, channel_num, i; + int channels_added; + unsigned channel_list[CHANNEL_CMDS_MAX_VM_CHANNELS]; + char *token, *remaining, *tail_ptr; + struct cmd_channels_op_result *res = parsed_result; + + if (!strcmp(res->channel_list, "all")) { + channels_added = add_all_channels(res->vm_name); + cmdline_printf(cl, "Added %d channels for VM '%s'\n", + channels_added, res->vm_name); + return; + } + + remaining = res->channel_list; + while (1) { + if (remaining == NULL || remaining[0] == '\0') + break; + + token = strsep(&remaining, ","); + if (token == NULL) + break; + errno = 0; + channel_num = (unsigned)strtol(token, &tail_ptr, 10); + if ((errno != 0) || tail_ptr == NULL || (*tail_ptr != '\0')) + break; + + if (channel_num == CHANNEL_CMDS_MAX_VM_CHANNELS) { + cmdline_printf(cl, "Channel number '%u' exceeds the maximum number " + "of allowable channels(%u) for VM '%s'\n", channel_num, + CHANNEL_CMDS_MAX_VM_CHANNELS, res->vm_name); + return; + } + channel_list[num_channels++] = channel_num; + } + for (i = 0; i < num_channels; i++) + cmdline_printf(cl, "[%u]: Adding channel %u\n", i, channel_list[i]); + + channels_added = add_channels(res->vm_name, channel_list, + num_channels); + cmdline_printf(cl, "Enabled %d channels for '%s'\n", channels_added, + res->vm_name); +} + +cmdline_parse_token_string_t cmd_channels_op = + TOKEN_STRING_INITIALIZER(struct cmd_channels_op_result, + op, "add_channels"); +cmdline_parse_token_string_t cmd_channels_vm_name = + TOKEN_STRING_INITIALIZER(struct cmd_channels_op_result, + vm_name, NULL); +cmdline_parse_token_string_t cmd_channels_list = + TOKEN_STRING_INITIALIZER(struct cmd_channels_op_result, + channel_list, NULL); + +cmdline_parse_inst_t cmd_channels_op_set = { + .f = cmd_channels_op_parsed, + .data = NULL, + .help_str = "add_channels <vm_name> <list>|all, add " + "communication channels for the specified VM, the " + "virtio channels must be enabled in the VM " + "configuration(qemu/libvirt) and the associated VM must be active. " + "<list> is a comma-separated list of channel numbers to add, using " + "the keyword 'all' will attempt to add all channels for the VM", + .tokens = { + (void *)&cmd_channels_op, + (void *)&cmd_channels_vm_name, + (void *)&cmd_channels_list, + NULL, + }, +}; + +struct cmd_channels_status_op_result { + cmdline_fixed_string_t op; + cmdline_fixed_string_t vm_name; + cmdline_fixed_string_t channel_list; + cmdline_fixed_string_t status; +}; + +static void +cmd_channels_status_op_parsed(void *parsed_result, struct cmdline *cl, + __attribute__((unused)) void *data) +{ + unsigned num_channels = 0, channel_num; + int changed; + unsigned channel_list[CHANNEL_CMDS_MAX_VM_CHANNELS]; + char *token, *remaining, *tail_ptr; + struct cmd_channels_status_op_result *res = parsed_result; + enum channel_status status; + + if (!strcmp(res->status, "enabled")) + status = CHANNEL_MGR_CHANNEL_CONNECTED; + else + status = CHANNEL_MGR_CHANNEL_DISABLED; + + if (!strcmp(res->channel_list, "all")) { + changed = set_channel_status_all(res->vm_name, status); + cmdline_printf(cl, "Updated status of %d channels " + "for VM '%s'\n", changed, res->vm_name); + return; + } + remaining = res->channel_list; + while (1) { + if (remaining == NULL || remaining[0] == '\0') + break; + token = strsep(&remaining, ","); + if (token == NULL) + break; + errno = 0; + channel_num = (unsigned)strtol(token, &tail_ptr, 10); + if ((errno != 0) || tail_ptr == NULL || (*tail_ptr != '\0')) + break; + + if (channel_num == CHANNEL_CMDS_MAX_VM_CHANNELS) { + cmdline_printf(cl, "%u exceeds the maximum number of allowable " + "channels(%u) for VM '%s'\n", channel_num, + CHANNEL_CMDS_MAX_VM_CHANNELS, res->vm_name); + return; + } + channel_list[num_channels++] = channel_num; + } + changed = set_channel_status(res->vm_name, channel_list, num_channels, + status); + cmdline_printf(cl, "Updated status of %d channels " + "for VM '%s'\n", changed, res->vm_name); +} + +cmdline_parse_token_string_t cmd_channels_status_op = + TOKEN_STRING_INITIALIZER(struct cmd_channels_status_op_result, + op, "set_channel_status"); +cmdline_parse_token_string_t cmd_channels_status_vm_name = + TOKEN_STRING_INITIALIZER(struct cmd_channels_status_op_result, + vm_name, NULL); +cmdline_parse_token_string_t cmd_channels_status_list = + TOKEN_STRING_INITIALIZER(struct cmd_channels_status_op_result, + channel_list, NULL); +cmdline_parse_token_string_t cmd_channels_status = + TOKEN_STRING_INITIALIZER(struct cmd_channels_status_op_result, + status, "enabled#disabled"); + +cmdline_parse_inst_t cmd_channels_status_op_set = { + .f = cmd_channels_status_op_parsed, + .data = NULL, + .help_str = "set_channel_status <vm_name> <list>|all enabled|disabled, " + " enable or disable the communication channels in " + "list(comma-separated) for the specified VM, alternatively " + "list can be replaced with keyword 'all'. " + "Disabled channels will still receive packets on the host, " + "however the commands they specify will be ignored. " + "Set status to 'enabled' to begin processing requests again.", + .tokens = { + (void *)&cmd_channels_status_op, + (void *)&cmd_channels_status_vm_name, + (void *)&cmd_channels_status_list, + (void *)&cmd_channels_status, + NULL, + }, +}; + +/* *** CPU Frequency operations *** */ +struct cmd_show_cpu_freq_mask_result { + cmdline_fixed_string_t show_cpu_freq_mask; + uint64_t core_mask; +}; + +static void +cmd_show_cpu_freq_mask_parsed(void *parsed_result, struct cmdline *cl, + __attribute__((unused)) void *data) +{ + struct cmd_show_cpu_freq_mask_result *res = parsed_result; + unsigned i; + uint64_t mask = res->core_mask; + uint32_t freq; + + for (i = 0; mask; mask &= ~(1ULL << i++)) { + if ((mask >> i) & 1) { + freq = power_manager_get_current_frequency(i); + if (freq > 0) + cmdline_printf(cl, "Core %u: %"PRId32"\n", i, freq); + } + } +} + +cmdline_parse_token_string_t cmd_show_cpu_freq_mask = + TOKEN_STRING_INITIALIZER(struct cmd_show_cpu_freq_mask_result, + show_cpu_freq_mask, "show_cpu_freq_mask"); +cmdline_parse_token_num_t cmd_show_cpu_freq_mask_core_mask = + TOKEN_NUM_INITIALIZER(struct cmd_show_cpu_freq_mask_result, + core_mask, UINT64); + +cmdline_parse_inst_t cmd_show_cpu_freq_mask_set = { + .f = cmd_show_cpu_freq_mask_parsed, + .data = NULL, + .help_str = "show_cpu_freq_mask <mask>, Get the current frequency for each " + "core specified in the mask", + .tokens = { + (void *)&cmd_show_cpu_freq_mask, + (void *)&cmd_show_cpu_freq_mask_core_mask, + NULL, + }, +}; + +struct cmd_set_cpu_freq_mask_result { + cmdline_fixed_string_t set_cpu_freq_mask; + uint64_t core_mask; + cmdline_fixed_string_t cmd; +}; + +static void +cmd_set_cpu_freq_mask_parsed(void *parsed_result, struct cmdline *cl, + __attribute__((unused)) void *data) +{ + struct cmd_set_cpu_freq_mask_result *res = parsed_result; + int ret = -1; + + if (!strcmp(res->cmd , "up")) + ret = power_manager_scale_mask_up(res->core_mask); + else if (!strcmp(res->cmd , "down")) + ret = power_manager_scale_mask_down(res->core_mask); + else if (!strcmp(res->cmd , "min")) + ret = power_manager_scale_mask_min(res->core_mask); + else if (!strcmp(res->cmd , "max")) + ret = power_manager_scale_mask_max(res->core_mask); + if (ret < 0) { + cmdline_printf(cl, "Error scaling core_mask(0x%"PRIx64") '%s' , not " + "all cores specified have been scaled\n", + res->core_mask, res->cmd); + }; +} + +cmdline_parse_token_string_t cmd_set_cpu_freq_mask = + TOKEN_STRING_INITIALIZER(struct cmd_set_cpu_freq_mask_result, + set_cpu_freq_mask, "set_cpu_freq_mask"); +cmdline_parse_token_num_t cmd_set_cpu_freq_mask_core_mask = + TOKEN_NUM_INITIALIZER(struct cmd_set_cpu_freq_mask_result, + core_mask, UINT64); +cmdline_parse_token_string_t cmd_set_cpu_freq_mask_result = + TOKEN_STRING_INITIALIZER(struct cmd_set_cpu_freq_mask_result, + cmd, "up#down#min#max"); + +cmdline_parse_inst_t cmd_set_cpu_freq_mask_set = { + .f = cmd_set_cpu_freq_mask_parsed, + .data = NULL, + .help_str = "set_cpu_freq <core_mask> <up|down|min|max>, Set the current " + "frequency for the cores specified in <core_mask> by scaling " + "each up/down/min/max.", + .tokens = { + (void *)&cmd_set_cpu_freq_mask, + (void *)&cmd_set_cpu_freq_mask_core_mask, + (void *)&cmd_set_cpu_freq_mask_result, + NULL, + }, +}; + + + +struct cmd_show_cpu_freq_result { + cmdline_fixed_string_t show_cpu_freq; + uint8_t core_num; +}; + +static void +cmd_show_cpu_freq_parsed(void *parsed_result, struct cmdline *cl, + __attribute__((unused)) void *data) +{ + struct cmd_show_cpu_freq_result *res = parsed_result; + uint32_t curr_freq = power_manager_get_current_frequency(res->core_num); + + if (curr_freq == 0) { + cmdline_printf(cl, "Unable to get frequency for core %u\n", + res->core_num); + return; + } + cmdline_printf(cl, "Core %u frequency: %"PRId32"\n", res->core_num, + curr_freq); +} + +cmdline_parse_token_string_t cmd_show_cpu_freq = + TOKEN_STRING_INITIALIZER(struct cmd_show_cpu_freq_result, + show_cpu_freq, "show_cpu_freq"); + +cmdline_parse_token_num_t cmd_show_cpu_freq_core_num = + TOKEN_NUM_INITIALIZER(struct cmd_show_cpu_freq_result, + core_num, UINT8); + +cmdline_parse_inst_t cmd_show_cpu_freq_set = { + .f = cmd_show_cpu_freq_parsed, + .data = NULL, + .help_str = "Get the current frequency for the specified core", + .tokens = { + (void *)&cmd_show_cpu_freq, + (void *)&cmd_show_cpu_freq_core_num, + NULL, + }, +}; + +struct cmd_set_cpu_freq_result { + cmdline_fixed_string_t set_cpu_freq; + uint8_t core_num; + cmdline_fixed_string_t cmd; +}; + +static void +cmd_set_cpu_freq_parsed(void *parsed_result, struct cmdline *cl, + __attribute__((unused)) void *data) +{ + int ret = -1; + struct cmd_set_cpu_freq_result *res = parsed_result; + + if (!strcmp(res->cmd , "up")) + ret = power_manager_scale_core_up(res->core_num); + else if (!strcmp(res->cmd , "down")) + ret = power_manager_scale_core_down(res->core_num); + else if (!strcmp(res->cmd , "min")) + ret = power_manager_scale_core_min(res->core_num); + else if (!strcmp(res->cmd , "max")) + ret = power_manager_scale_core_max(res->core_num); + if (ret < 0) { + cmdline_printf(cl, "Error scaling core(%u) '%s'\n", res->core_num, + res->cmd); + } +} + +cmdline_parse_token_string_t cmd_set_cpu_freq = + TOKEN_STRING_INITIALIZER(struct cmd_set_cpu_freq_result, + set_cpu_freq, "set_cpu_freq"); +cmdline_parse_token_num_t cmd_set_cpu_freq_core_num = + TOKEN_NUM_INITIALIZER(struct cmd_set_cpu_freq_result, + core_num, UINT8); +cmdline_parse_token_string_t cmd_set_cpu_freq_cmd_cmd = + TOKEN_STRING_INITIALIZER(struct cmd_set_cpu_freq_result, + cmd, "up#down#min#max"); + +cmdline_parse_inst_t cmd_set_cpu_freq_set = { + .f = cmd_set_cpu_freq_parsed, + .data = NULL, + .help_str = "set_cpu_freq <core_num> <up|down|min|max>, Set the current " + "frequency for the specified core by scaling up/down/min/max", + .tokens = { + (void *)&cmd_set_cpu_freq, + (void *)&cmd_set_cpu_freq_core_num, + (void *)&cmd_set_cpu_freq_cmd_cmd, + NULL, + }, +}; + +cmdline_parse_ctx_t main_ctx[] = { + (cmdline_parse_inst_t *)&cmd_quit, + (cmdline_parse_inst_t *)&cmd_vm_op_set, + (cmdline_parse_inst_t *)&cmd_channels_op_set, + (cmdline_parse_inst_t *)&cmd_channels_status_op_set, + (cmdline_parse_inst_t *)&cmd_show_vm_set, + (cmdline_parse_inst_t *)&cmd_show_cpu_freq_mask_set, + (cmdline_parse_inst_t *)&cmd_set_cpu_freq_mask_set, + (cmdline_parse_inst_t *)&cmd_show_cpu_freq_set, + (cmdline_parse_inst_t *)&cmd_set_cpu_freq_set, + (cmdline_parse_inst_t *)&cmd_set_pcpu_mask_set, + (cmdline_parse_inst_t *)&cmd_set_pcpu_set, + NULL, +}; + +void +run_cli(__attribute__((unused)) void *arg) +{ + struct cmdline *cl; + + cl = cmdline_stdin_new(main_ctx, "vmpower> "); + if (cl == NULL) + return; + + cmdline_interact(cl); + cmdline_stdin_exit(cl); +} diff --git a/examples/vm_power_manager/vm_power_cli.h b/examples/vm_power_manager/vm_power_cli.h new file mode 100644 index 00000000..deccd513 --- /dev/null +++ b/examples/vm_power_manager/vm_power_cli.h @@ -0,0 +1,47 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef VM_POWER_CLI_H_ +#define VM_POWER_CLI_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +void run_cli(__attribute__((unused)) void *arg); + +#ifdef __cplusplus +} +#endif + +#endif /* VM_POWER_CLI_H_ */ diff --git a/examples/vmdq/Makefile b/examples/vmdq/Makefile new file mode 100644 index 00000000..198e3bfe --- /dev/null +++ b/examples/vmdq/Makefile @@ -0,0 +1,51 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overriden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +# binary name +APP = vmdq_app + +# all source are stored in SRCS-y +SRCS-y := main.c + +CFLAGS += $(WERROR_FLAGS) + +EXTRA_CFLAGS += -O3 + +include $(RTE_SDK)/mk/rte.extapp.mk diff --git a/examples/vmdq/main.c b/examples/vmdq/main.c new file mode 100644 index 00000000..178af2f5 --- /dev/null +++ b/examples/vmdq/main.c @@ -0,0 +1,641 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdint.h> +#include <sys/queue.h> +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include <assert.h> +#include <errno.h> +#include <signal.h> +#include <stdarg.h> +#include <inttypes.h> +#include <getopt.h> + +#include <rte_common.h> +#include <rte_log.h> +#include <rte_memory.h> +#include <rte_memcpy.h> +#include <rte_memzone.h> +#include <rte_eal.h> +#include <rte_per_lcore.h> +#include <rte_launch.h> +#include <rte_atomic.h> +#include <rte_cycles.h> +#include <rte_prefetch.h> +#include <rte_lcore.h> +#include <rte_per_lcore.h> +#include <rte_branch_prediction.h> +#include <rte_interrupts.h> +#include <rte_pci.h> +#include <rte_random.h> +#include <rte_debug.h> +#include <rte_ether.h> +#include <rte_ethdev.h> +#include <rte_ring.h> +#include <rte_log.h> +#include <rte_mempool.h> +#include <rte_mbuf.h> +#include <rte_memcpy.h> + +#define MAX_QUEUES 1024 +/* + * 1024 queues require to meet the needs of a large number of vmdq_pools. + * (RX/TX_queue_nb * RX/TX_ring_descriptors_nb) per port. + */ +#define NUM_MBUFS_PER_PORT (MAX_QUEUES * RTE_MAX(RTE_TEST_RX_DESC_DEFAULT, \ + RTE_TEST_TX_DESC_DEFAULT)) +#define MBUF_CACHE_SIZE 64 + +#define MAX_PKT_BURST 32 + +/* + * Configurable number of RX/TX ring descriptors + */ +#define RTE_TEST_RX_DESC_DEFAULT 128 +#define RTE_TEST_TX_DESC_DEFAULT 512 + +#define INVALID_PORT_ID 0xFF + +/* mask of enabled ports */ +static uint32_t enabled_port_mask; + +/* number of pools (if user does not specify any, 8 by default */ +static uint32_t num_queues = 8; +static uint32_t num_pools = 8; + +/* empty vmdq configuration structure. Filled in programatically */ +static const struct rte_eth_conf vmdq_conf_default = { + .rxmode = { + .mq_mode = ETH_MQ_RX_VMDQ_ONLY, + .split_hdr_size = 0, + .header_split = 0, /**< Header Split disabled */ + .hw_ip_checksum = 0, /**< IP checksum offload disabled */ + .hw_vlan_filter = 0, /**< VLAN filtering disabled */ + .jumbo_frame = 0, /**< Jumbo Frame Support disabled */ + }, + + .txmode = { + .mq_mode = ETH_MQ_TX_NONE, + }, + .rx_adv_conf = { + /* + * should be overridden separately in code with + * appropriate values + */ + .vmdq_rx_conf = { + .nb_queue_pools = ETH_8_POOLS, + .enable_default_pool = 0, + .default_pool = 0, + .nb_pool_maps = 0, + .pool_map = {{0, 0},}, + }, + }, +}; + +static unsigned lcore_ids[RTE_MAX_LCORE]; +static uint8_t ports[RTE_MAX_ETHPORTS]; +static unsigned num_ports; /**< The number of ports specified in command line */ + +/* array used for printing out statistics */ +volatile unsigned long rxPackets[MAX_QUEUES] = {0}; + +const uint16_t vlan_tags[] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, + 56, 57, 58, 59, 60, 61, 62, 63, +}; +const uint16_t num_vlans = RTE_DIM(vlan_tags); +static uint16_t num_pf_queues, num_vmdq_queues; +static uint16_t vmdq_pool_base, vmdq_queue_base; +/* pool mac addr template, pool mac addr is like: 52 54 00 12 port# pool# */ +static struct ether_addr pool_addr_template = { + .addr_bytes = {0x52, 0x54, 0x00, 0x12, 0x00, 0x00} +}; + +/* ethernet addresses of ports */ +static struct ether_addr vmdq_ports_eth_addr[RTE_MAX_ETHPORTS]; + +#define MAX_QUEUE_NUM_10G 128 +#define MAX_QUEUE_NUM_1G 8 +#define MAX_POOL_MAP_NUM_10G 64 +#define MAX_POOL_MAP_NUM_1G 32 +#define MAX_POOL_NUM_10G 64 +#define MAX_POOL_NUM_1G 8 +/* + * Builds up the correct configuration for vmdq based on the vlan tags array + * given above, and determine the queue number and pool map number according to + * valid pool number + */ +static inline int +get_eth_conf(struct rte_eth_conf *eth_conf, uint32_t num_pools) +{ + struct rte_eth_vmdq_rx_conf conf; + unsigned i; + + conf.nb_queue_pools = (enum rte_eth_nb_pools)num_pools; + conf.nb_pool_maps = num_pools; + conf.enable_default_pool = 0; + conf.default_pool = 0; /* set explicit value, even if not used */ + + for (i = 0; i < conf.nb_pool_maps; i++) { + conf.pool_map[i].vlan_id = vlan_tags[i]; + conf.pool_map[i].pools = (1UL << (i % num_pools)); + } + + (void)(rte_memcpy(eth_conf, &vmdq_conf_default, sizeof(*eth_conf))); + (void)(rte_memcpy(ð_conf->rx_adv_conf.vmdq_rx_conf, &conf, + sizeof(eth_conf->rx_adv_conf.vmdq_rx_conf))); + return 0; +} + +/* + * Initialises a given port using global settings and with the rx buffers + * coming from the mbuf_pool passed as parameter + */ +static inline int +port_init(uint8_t port, struct rte_mempool *mbuf_pool) +{ + struct rte_eth_dev_info dev_info; + struct rte_eth_rxconf *rxconf; + struct rte_eth_conf port_conf; + uint16_t rxRings, txRings; + const uint16_t rxRingSize = RTE_TEST_RX_DESC_DEFAULT, txRingSize = RTE_TEST_TX_DESC_DEFAULT; + int retval; + uint16_t q; + uint16_t queues_per_pool; + uint32_t max_nb_pools; + + /* + * The max pool number from dev_info will be used to validate the pool + * number specified in cmd line + */ + rte_eth_dev_info_get(port, &dev_info); + max_nb_pools = (uint32_t)dev_info.max_vmdq_pools; + /* + * We allow to process part of VMDQ pools specified by num_pools in + * command line. + */ + if (num_pools > max_nb_pools) { + printf("num_pools %d >max_nb_pools %d\n", + num_pools, max_nb_pools); + return -1; + } + retval = get_eth_conf(&port_conf, max_nb_pools); + if (retval < 0) + return retval; + + /* + * NIC queues are divided into pf queues and vmdq queues. + */ + /* There is assumption here all ports have the same configuration! */ + num_pf_queues = dev_info.max_rx_queues - dev_info.vmdq_queue_num; + queues_per_pool = dev_info.vmdq_queue_num / dev_info.max_vmdq_pools; + num_vmdq_queues = num_pools * queues_per_pool; + num_queues = num_pf_queues + num_vmdq_queues; + vmdq_queue_base = dev_info.vmdq_queue_base; + vmdq_pool_base = dev_info.vmdq_pool_base; + + printf("pf queue num: %u, configured vmdq pool num: %u," + " each vmdq pool has %u queues\n", + num_pf_queues, num_pools, queues_per_pool); + printf("vmdq queue base: %d pool base %d\n", + vmdq_queue_base, vmdq_pool_base); + if (port >= rte_eth_dev_count()) + return -1; + + /* + * Though in this example, we only receive packets from the first queue + * of each pool and send packets through first rte_lcore_count() tx + * queues of vmdq queues, all queues including pf queues are setup. + * This is because VMDQ queues doesn't always start from zero, and the + * PMD layer doesn't support selectively initialising part of rx/tx + * queues. + */ + rxRings = (uint16_t)dev_info.max_rx_queues; + txRings = (uint16_t)dev_info.max_tx_queues; + retval = rte_eth_dev_configure(port, rxRings, txRings, &port_conf); + if (retval != 0) + return retval; + + rte_eth_dev_info_get(port, &dev_info); + rxconf = &dev_info.default_rxconf; + rxconf->rx_drop_en = 1; + for (q = 0; q < rxRings; q++) { + retval = rte_eth_rx_queue_setup(port, q, rxRingSize, + rte_eth_dev_socket_id(port), + rxconf, + mbuf_pool); + if (retval < 0) { + printf("initialise rx queue %d failed\n", q); + return retval; + } + } + + for (q = 0; q < txRings; q++) { + retval = rte_eth_tx_queue_setup(port, q, txRingSize, + rte_eth_dev_socket_id(port), + NULL); + if (retval < 0) { + printf("initialise tx queue %d failed\n", q); + return retval; + } + } + + retval = rte_eth_dev_start(port); + if (retval < 0) { + printf("port %d start failed\n", port); + return retval; + } + + rte_eth_macaddr_get(port, &vmdq_ports_eth_addr[port]); + printf("Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8 + " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n", + (unsigned)port, + vmdq_ports_eth_addr[port].addr_bytes[0], + vmdq_ports_eth_addr[port].addr_bytes[1], + vmdq_ports_eth_addr[port].addr_bytes[2], + vmdq_ports_eth_addr[port].addr_bytes[3], + vmdq_ports_eth_addr[port].addr_bytes[4], + vmdq_ports_eth_addr[port].addr_bytes[5]); + + /* + * Set mac for each pool. + * There is no default mac for the pools in i40. + * Removes this after i40e fixes this issue. + */ + for (q = 0; q < num_pools; q++) { + struct ether_addr mac; + mac = pool_addr_template; + mac.addr_bytes[4] = port; + mac.addr_bytes[5] = q; + printf("Port %u vmdq pool %u set mac %02x:%02x:%02x:%02x:%02x:%02x\n", + port, q, + mac.addr_bytes[0], mac.addr_bytes[1], + mac.addr_bytes[2], mac.addr_bytes[3], + mac.addr_bytes[4], mac.addr_bytes[5]); + retval = rte_eth_dev_mac_addr_add(port, &mac, + q + vmdq_pool_base); + if (retval) { + printf("mac addr add failed at pool %d\n", q); + return retval; + } + } + + return 0; +} + +/* Check num_pools parameter and set it if OK*/ +static int +vmdq_parse_num_pools(const char *q_arg) +{ + char *end = NULL; + int n; + + /* parse number string */ + n = strtol(q_arg, &end, 10); + if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0')) + return -1; + + if (num_pools > num_vlans) { + printf("num_pools %d > num_vlans %d\n", num_pools, num_vlans); + return -1; + } + + num_pools = n; + + return 0; +} + + +static int +parse_portmask(const char *portmask) +{ + char *end = NULL; + unsigned long pm; + + /* parse hexadecimal string */ + pm = strtoul(portmask, &end, 16); + if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0')) + return -1; + + if (pm == 0) + return -1; + + return pm; +} + +/* Display usage */ +static void +vmdq_usage(const char *prgname) +{ + printf("%s [EAL options] -- -p PORTMASK]\n" + " --nb-pools NP: number of pools\n", + prgname); +} + +/* Parse the argument (num_pools) given in the command line of the application */ +static int +vmdq_parse_args(int argc, char **argv) +{ + int opt; + int option_index; + unsigned i; + const char *prgname = argv[0]; + static struct option long_option[] = { + {"nb-pools", required_argument, NULL, 0}, + {NULL, 0, 0, 0} + }; + + /* Parse command line */ + while ((opt = getopt_long(argc, argv, "p:", long_option, + &option_index)) != EOF) { + switch (opt) { + /* portmask */ + case 'p': + enabled_port_mask = parse_portmask(optarg); + if (enabled_port_mask == 0) { + printf("invalid portmask\n"); + vmdq_usage(prgname); + return -1; + } + break; + case 0: + if (vmdq_parse_num_pools(optarg) == -1) { + printf("invalid number of pools\n"); + vmdq_usage(prgname); + return -1; + } + break; + + default: + vmdq_usage(prgname); + return -1; + } + } + + for (i = 0; i < RTE_MAX_ETHPORTS; i++) { + if (enabled_port_mask & (1 << i)) + ports[num_ports++] = (uint8_t)i; + } + + if (num_ports < 2 || num_ports % 2) { + printf("Current enabled port number is %u," + "but it should be even and at least 2\n", num_ports); + return -1; + } + + return 0; +} + +static void +update_mac_address(struct rte_mbuf *m, unsigned dst_port) +{ + struct ether_hdr *eth; + void *tmp; + + eth = rte_pktmbuf_mtod(m, struct ether_hdr *); + + /* 02:00:00:00:00:xx */ + tmp = ð->d_addr.addr_bytes[0]; + *((uint64_t *)tmp) = 0x000000000002 + ((uint64_t)dst_port << 40); + + /* src addr */ + ether_addr_copy(&vmdq_ports_eth_addr[dst_port], ð->s_addr); +} + +/* When we receive a HUP signal, print out our stats */ +static void +sighup_handler(int signum) +{ + unsigned q; + for (q = 0; q < num_queues; q++) { + if (q % (num_queues/num_pools) == 0) + printf("\nPool %u: ", q/(num_queues/num_pools)); + printf("%lu ", rxPackets[q]); + } + printf("\nFinished handling signal %d\n", signum); +} + +/* + * Main thread that does the work, reading from INPUT_PORT + * and writing to OUTPUT_PORT + */ +static int +lcore_main(__attribute__((__unused__)) void *dummy) +{ + const uint16_t lcore_id = (uint16_t)rte_lcore_id(); + const uint16_t num_cores = (uint16_t)rte_lcore_count(); + uint16_t core_id = 0; + uint16_t startQueue, endQueue; + uint16_t q, i, p; + const uint16_t remainder = (uint16_t)(num_vmdq_queues % num_cores); + + for (i = 0; i < num_cores; i++) + if (lcore_ids[i] == lcore_id) { + core_id = i; + break; + } + + if (remainder != 0) { + if (core_id < remainder) { + startQueue = (uint16_t)(core_id * + (num_vmdq_queues / num_cores + 1)); + endQueue = (uint16_t)(startQueue + + (num_vmdq_queues / num_cores) + 1); + } else { + startQueue = (uint16_t)(core_id * + (num_vmdq_queues / num_cores) + + remainder); + endQueue = (uint16_t)(startQueue + + (num_vmdq_queues / num_cores)); + } + } else { + startQueue = (uint16_t)(core_id * + (num_vmdq_queues / num_cores)); + endQueue = (uint16_t)(startQueue + + (num_vmdq_queues / num_cores)); + } + + /* vmdq queue idx doesn't always start from zero.*/ + startQueue += vmdq_queue_base; + endQueue += vmdq_queue_base; + printf("core %u(lcore %u) reading queues %i-%i\n", (unsigned)core_id, + (unsigned)lcore_id, startQueue, endQueue - 1); + + if (startQueue == endQueue) { + printf("lcore %u has nothing to do\n", lcore_id); + return 0; + } + + for (;;) { + struct rte_mbuf *buf[MAX_PKT_BURST]; + const uint16_t buf_size = sizeof(buf) / sizeof(buf[0]); + + for (p = 0; p < num_ports; p++) { + const uint8_t sport = ports[p]; + /* 0 <-> 1, 2 <-> 3 etc */ + const uint8_t dport = ports[p ^ 1]; + if ((sport == INVALID_PORT_ID) || (dport == INVALID_PORT_ID)) + continue; + + for (q = startQueue; q < endQueue; q++) { + const uint16_t rxCount = rte_eth_rx_burst(sport, + q, buf, buf_size); + + if (unlikely(rxCount == 0)) + continue; + + rxPackets[q] += rxCount; + + for (i = 0; i < rxCount; i++) + update_mac_address(buf[i], dport); + + const uint16_t txCount = rte_eth_tx_burst(dport, + vmdq_queue_base + core_id, + buf, + rxCount); + + if (txCount != rxCount) { + for (i = txCount; i < rxCount; i++) + rte_pktmbuf_free(buf[i]); + } + } + } + } +} + +/* + * Update the global var NUM_PORTS and array PORTS according to system ports number + * and return valid ports number + */ +static unsigned check_ports_num(unsigned nb_ports) +{ + unsigned valid_num_ports = num_ports; + unsigned portid; + + if (num_ports > nb_ports) { + printf("\nSpecified port number(%u) exceeds total system port number(%u)\n", + num_ports, nb_ports); + num_ports = nb_ports; + } + + for (portid = 0; portid < num_ports; portid++) { + if (ports[portid] >= nb_ports) { + printf("\nSpecified port ID(%u) exceeds max system port ID(%u)\n", + ports[portid], (nb_ports - 1)); + ports[portid] = INVALID_PORT_ID; + valid_num_ports--; + } + } + return valid_num_ports; +} + +/* Main function, does initialisation and calls the per-lcore functions */ +int +main(int argc, char *argv[]) +{ + struct rte_mempool *mbuf_pool; + unsigned lcore_id, core_id = 0; + int ret; + unsigned nb_ports, valid_num_ports; + uint8_t portid; + + signal(SIGHUP, sighup_handler); + + /* init EAL */ + ret = rte_eal_init(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Error with EAL initialization\n"); + argc -= ret; + argv += ret; + + /* parse app arguments */ + ret = vmdq_parse_args(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Invalid VMDQ argument\n"); + + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) + if (rte_lcore_is_enabled(lcore_id)) + lcore_ids[core_id++] = lcore_id; + + if (rte_lcore_count() > RTE_MAX_LCORE) + rte_exit(EXIT_FAILURE, "Not enough cores\n"); + + nb_ports = rte_eth_dev_count(); + if (nb_ports > RTE_MAX_ETHPORTS) + nb_ports = RTE_MAX_ETHPORTS; + + /* + * Update the global var NUM_PORTS and global array PORTS + * and get value of var VALID_NUM_PORTS according to system ports number + */ + valid_num_ports = check_ports_num(nb_ports); + + if (valid_num_ports < 2 || valid_num_ports % 2) { + printf("Current valid ports number is %u\n", valid_num_ports); + rte_exit(EXIT_FAILURE, "Error with valid ports number is not even or less than 2\n"); + } + + mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL", + NUM_MBUFS_PER_PORT * nb_ports, MBUF_CACHE_SIZE, + 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id()); + if (mbuf_pool == NULL) + rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n"); + + /* initialize all ports */ + for (portid = 0; portid < nb_ports; portid++) { + /* skip ports that are not enabled */ + if ((enabled_port_mask & (1 << portid)) == 0) { + printf("\nSkipping disabled port %d\n", portid); + continue; + } + if (port_init(portid, mbuf_pool) != 0) + rte_exit(EXIT_FAILURE, "Cannot initialize network ports\n"); + } + + /* call lcore_main() on every lcore */ + rte_eal_mp_remote_launch(lcore_main, NULL, CALL_MASTER); + RTE_LCORE_FOREACH_SLAVE(lcore_id) { + if (rte_eal_wait_lcore(lcore_id) < 0) + return -1; + } + + return 0; +} diff --git a/examples/vmdq_dcb/Makefile b/examples/vmdq_dcb/Makefile new file mode 100644 index 00000000..8c51131b --- /dev/null +++ b/examples/vmdq_dcb/Makefile @@ -0,0 +1,59 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overriden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +# binary name +APP = vmdq_dcb_app + +# all source are stored in SRCS-y +SRCS-y := main.c + +CFLAGS += $(WERROR_FLAGS) + +# workaround for a gcc bug with noreturn attribute +# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603 +ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y) +CFLAGS_main.o += -Wno-return-type +endif +ifeq ($(CONFIG_RTE_TOOLCHAIN_ICC),y) +CFLAGS_main.o += -diag-disable=vec +endif +EXTRA_CFLAGS += -O3 -g + +include $(RTE_SDK)/mk/rte.extapp.mk diff --git a/examples/vmdq_dcb/main.c b/examples/vmdq_dcb/main.c new file mode 100644 index 00000000..62e1422a --- /dev/null +++ b/examples/vmdq_dcb/main.c @@ -0,0 +1,705 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdint.h> +#include <sys/queue.h> +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include <assert.h> +#include <errno.h> +#include <signal.h> +#include <stdarg.h> +#include <inttypes.h> +#include <getopt.h> + +#include <rte_common.h> +#include <rte_log.h> +#include <rte_memory.h> +#include <rte_memcpy.h> +#include <rte_memzone.h> +#include <rte_eal.h> +#include <rte_per_lcore.h> +#include <rte_launch.h> +#include <rte_atomic.h> +#include <rte_cycles.h> +#include <rte_prefetch.h> +#include <rte_lcore.h> +#include <rte_per_lcore.h> +#include <rte_branch_prediction.h> +#include <rte_interrupts.h> +#include <rte_pci.h> +#include <rte_random.h> +#include <rte_debug.h> +#include <rte_ether.h> +#include <rte_ethdev.h> +#include <rte_ring.h> +#include <rte_log.h> +#include <rte_mempool.h> +#include <rte_mbuf.h> +#include <rte_memcpy.h> + +/* basic constants used in application */ +#define MAX_QUEUES 1024 +/* + * 1024 queues require to meet the needs of a large number of vmdq_pools. + * (RX/TX_queue_nb * RX/TX_ring_descriptors_nb) per port. + */ +#define NUM_MBUFS_PER_PORT (MAX_QUEUES * RTE_MAX(RTE_TEST_RX_DESC_DEFAULT, \ + RTE_TEST_TX_DESC_DEFAULT)) +#define MBUF_CACHE_SIZE 64 + +#define MAX_PKT_BURST 32 + +/* + * Configurable number of RX/TX ring descriptors + */ +#define RTE_TEST_RX_DESC_DEFAULT 128 +#define RTE_TEST_TX_DESC_DEFAULT 512 + +#define INVALID_PORT_ID 0xFF + +/* mask of enabled ports */ +static uint32_t enabled_port_mask; +static uint8_t ports[RTE_MAX_ETHPORTS]; +static unsigned num_ports; + +/* number of pools (if user does not specify any, 32 by default */ +static enum rte_eth_nb_pools num_pools = ETH_32_POOLS; +static enum rte_eth_nb_tcs num_tcs = ETH_4_TCS; +static uint16_t num_queues, num_vmdq_queues; +static uint16_t vmdq_pool_base, vmdq_queue_base; +static uint8_t rss_enable; + +/* empty vmdq+dcb configuration structure. Filled in programatically */ +static const struct rte_eth_conf vmdq_dcb_conf_default = { + .rxmode = { + .mq_mode = ETH_MQ_RX_VMDQ_DCB, + .split_hdr_size = 0, + .header_split = 0, /**< Header Split disabled */ + .hw_ip_checksum = 0, /**< IP checksum offload disabled */ + .hw_vlan_filter = 0, /**< VLAN filtering disabled */ + .jumbo_frame = 0, /**< Jumbo Frame Support disabled */ + }, + .txmode = { + .mq_mode = ETH_MQ_TX_VMDQ_DCB, + }, + /* + * should be overridden separately in code with + * appropriate values + */ + .rx_adv_conf = { + .vmdq_dcb_conf = { + .nb_queue_pools = ETH_32_POOLS, + .enable_default_pool = 0, + .default_pool = 0, + .nb_pool_maps = 0, + .pool_map = {{0, 0},}, + .dcb_tc = {0}, + }, + .dcb_rx_conf = { + .nb_tcs = ETH_4_TCS, + /** Traffic class each UP mapped to. */ + .dcb_tc = {0}, + }, + .vmdq_rx_conf = { + .nb_queue_pools = ETH_32_POOLS, + .enable_default_pool = 0, + .default_pool = 0, + .nb_pool_maps = 0, + .pool_map = {{0, 0},}, + }, + }, + .tx_adv_conf = { + .vmdq_dcb_tx_conf = { + .nb_queue_pools = ETH_32_POOLS, + .dcb_tc = {0}, + }, + }, +}; + +/* array used for printing out statistics */ +volatile unsigned long rxPackets[MAX_QUEUES] = {0}; + +const uint16_t vlan_tags[] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31 +}; + +const uint16_t num_vlans = RTE_DIM(vlan_tags); +/* pool mac addr template, pool mac addr is like: 52 54 00 12 port# pool# */ +static struct ether_addr pool_addr_template = { + .addr_bytes = {0x52, 0x54, 0x00, 0x12, 0x00, 0x00} +}; + +/* ethernet addresses of ports */ +static struct ether_addr vmdq_ports_eth_addr[RTE_MAX_ETHPORTS]; + +/* Builds up the correct configuration for vmdq+dcb based on the vlan tags array + * given above, and the number of traffic classes available for use. */ +static inline int +get_eth_conf(struct rte_eth_conf *eth_conf) +{ + struct rte_eth_vmdq_dcb_conf conf; + struct rte_eth_vmdq_rx_conf vmdq_conf; + struct rte_eth_dcb_rx_conf dcb_conf; + struct rte_eth_vmdq_dcb_tx_conf tx_conf; + uint8_t i; + + conf.nb_queue_pools = (enum rte_eth_nb_pools)num_pools; + vmdq_conf.nb_queue_pools = (enum rte_eth_nb_pools)num_pools; + tx_conf.nb_queue_pools = (enum rte_eth_nb_pools)num_pools; + conf.nb_pool_maps = num_pools; + vmdq_conf.nb_pool_maps = num_pools; + conf.enable_default_pool = 0; + vmdq_conf.enable_default_pool = 0; + conf.default_pool = 0; /* set explicit value, even if not used */ + vmdq_conf.default_pool = 0; + + for (i = 0; i < conf.nb_pool_maps; i++) { + conf.pool_map[i].vlan_id = vlan_tags[i]; + vmdq_conf.pool_map[i].vlan_id = vlan_tags[i]; + conf.pool_map[i].pools = 1UL << i; + vmdq_conf.pool_map[i].pools = 1UL << i; + } + for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++){ + conf.dcb_tc[i] = i % num_tcs; + dcb_conf.dcb_tc[i] = i % num_tcs; + tx_conf.dcb_tc[i] = i % num_tcs; + } + dcb_conf.nb_tcs = (enum rte_eth_nb_tcs)num_tcs; + (void)(rte_memcpy(eth_conf, &vmdq_dcb_conf_default, sizeof(*eth_conf))); + (void)(rte_memcpy(ð_conf->rx_adv_conf.vmdq_dcb_conf, &conf, + sizeof(conf))); + (void)(rte_memcpy(ð_conf->rx_adv_conf.dcb_rx_conf, &dcb_conf, + sizeof(dcb_conf))); + (void)(rte_memcpy(ð_conf->rx_adv_conf.vmdq_rx_conf, &vmdq_conf, + sizeof(vmdq_conf))); + (void)(rte_memcpy(ð_conf->tx_adv_conf.vmdq_dcb_tx_conf, &tx_conf, + sizeof(tx_conf))); + if (rss_enable) { + eth_conf->rxmode.mq_mode = ETH_MQ_RX_VMDQ_DCB_RSS; + eth_conf->rx_adv_conf.rss_conf.rss_hf = ETH_RSS_IP | + ETH_RSS_UDP | + ETH_RSS_TCP | + ETH_RSS_SCTP; + } + return 0; +} + +/* + * Initialises a given port using global settings and with the rx buffers + * coming from the mbuf_pool passed as parameter + */ +static inline int +port_init(uint8_t port, struct rte_mempool *mbuf_pool) +{ + struct rte_eth_dev_info dev_info; + struct rte_eth_conf port_conf = {0}; + const uint16_t rxRingSize = RTE_TEST_RX_DESC_DEFAULT; + const uint16_t txRingSize = RTE_TEST_TX_DESC_DEFAULT; + int retval; + uint16_t q; + uint16_t queues_per_pool; + uint32_t max_nb_pools; + + /* + * The max pool number from dev_info will be used to validate the pool + * number specified in cmd line + */ + rte_eth_dev_info_get(port, &dev_info); + max_nb_pools = (uint32_t)dev_info.max_vmdq_pools; + /* + * We allow to process part of VMDQ pools specified by num_pools in + * command line. + */ + if (num_pools > max_nb_pools) { + printf("num_pools %d >max_nb_pools %d\n", + num_pools, max_nb_pools); + return -1; + } + + /* + * NIC queues are divided into pf queues and vmdq queues. + * There is assumption here all ports have the same configuration! + */ + vmdq_queue_base = dev_info.vmdq_queue_base; + vmdq_pool_base = dev_info.vmdq_pool_base; + printf("vmdq queue base: %d pool base %d\n", + vmdq_queue_base, vmdq_pool_base); + if (vmdq_pool_base == 0) { + num_vmdq_queues = dev_info.max_rx_queues; + num_queues = dev_info.max_rx_queues; + if (num_tcs != num_vmdq_queues / num_pools) { + printf("nb_tcs %d is invalid considering with" + " nb_pools %d, nb_tcs * nb_pools should = %d\n", + num_tcs, num_pools, num_vmdq_queues); + return -1; + } + } else { + queues_per_pool = dev_info.vmdq_queue_num / + dev_info.max_vmdq_pools; + if (num_tcs > queues_per_pool) { + printf("num_tcs %d > num of queues per pool %d\n", + num_tcs, queues_per_pool); + return -1; + } + num_vmdq_queues = num_pools * queues_per_pool; + num_queues = vmdq_queue_base + num_vmdq_queues; + printf("Configured vmdq pool num: %u," + " each vmdq pool has %u queues\n", + num_pools, queues_per_pool); + } + + if (port >= rte_eth_dev_count()) + return -1; + + retval = get_eth_conf(&port_conf); + if (retval < 0) + return retval; + + /* + * Though in this example, all queues including pf queues are setup. + * This is because VMDQ queues doesn't always start from zero, and the + * PMD layer doesn't support selectively initialising part of rx/tx + * queues. + */ + retval = rte_eth_dev_configure(port, num_queues, num_queues, &port_conf); + if (retval != 0) + return retval; + + for (q = 0; q < num_queues; q++) { + retval = rte_eth_rx_queue_setup(port, q, rxRingSize, + rte_eth_dev_socket_id(port), + NULL, + mbuf_pool); + if (retval < 0) { + printf("initialize rx queue %d failed\n", q); + return retval; + } + } + + for (q = 0; q < num_queues; q++) { + retval = rte_eth_tx_queue_setup(port, q, txRingSize, + rte_eth_dev_socket_id(port), + NULL); + if (retval < 0) { + printf("initialize tx queue %d failed\n", q); + return retval; + } + } + + retval = rte_eth_dev_start(port); + if (retval < 0) { + printf("port %d start failed\n", port); + return retval; + } + + rte_eth_macaddr_get(port, &vmdq_ports_eth_addr[port]); + printf("Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8 + " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n", + (unsigned)port, + vmdq_ports_eth_addr[port].addr_bytes[0], + vmdq_ports_eth_addr[port].addr_bytes[1], + vmdq_ports_eth_addr[port].addr_bytes[2], + vmdq_ports_eth_addr[port].addr_bytes[3], + vmdq_ports_eth_addr[port].addr_bytes[4], + vmdq_ports_eth_addr[port].addr_bytes[5]); + + /* Set mac for each pool.*/ + for (q = 0; q < num_pools; q++) { + struct ether_addr mac; + + mac = pool_addr_template; + mac.addr_bytes[4] = port; + mac.addr_bytes[5] = q; + printf("Port %u vmdq pool %u set mac %02x:%02x:%02x:%02x:%02x:%02x\n", + port, q, + mac.addr_bytes[0], mac.addr_bytes[1], + mac.addr_bytes[2], mac.addr_bytes[3], + mac.addr_bytes[4], mac.addr_bytes[5]); + retval = rte_eth_dev_mac_addr_add(port, &mac, + q + vmdq_pool_base); + if (retval) { + printf("mac addr add failed at pool %d\n", q); + return retval; + } + } + + return 0; +} + +/* Check num_pools parameter and set it if OK*/ +static int +vmdq_parse_num_pools(const char *q_arg) +{ + char *end = NULL; + int n; + + /* parse number string */ + n = strtol(q_arg, &end, 10); + if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0')) + return -1; + if (n != 16 && n != 32) + return -1; + if (n == 16) + num_pools = ETH_16_POOLS; + else + num_pools = ETH_32_POOLS; + + return 0; +} + +/* Check num_tcs parameter and set it if OK*/ +static int +vmdq_parse_num_tcs(const char *q_arg) +{ + char *end = NULL; + int n; + + /* parse number string */ + n = strtol(q_arg, &end, 10); + if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0')) + return -1; + + if (n != 4 && n != 8) + return -1; + if (n == 4) + num_tcs = ETH_4_TCS; + else + num_tcs = ETH_8_TCS; + + return 0; +} + +static int +parse_portmask(const char *portmask) +{ + char *end = NULL; + unsigned long pm; + + /* parse hexadecimal string */ + pm = strtoul(portmask, &end, 16); + if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0')) + return -1; + + if (pm == 0) + return -1; + + return pm; +} + +/* Display usage */ +static void +vmdq_usage(const char *prgname) +{ + printf("%s [EAL options] -- -p PORTMASK]\n" + " --nb-pools NP: number of pools (32 default, 16)\n" + " --nb-tcs NP: number of TCs (4 default, 8)\n" + " --enable-rss: enable RSS (disabled by default)\n", + prgname); +} + +/* Parse the argument (num_pools) given in the command line of the application */ +static int +vmdq_parse_args(int argc, char **argv) +{ + int opt; + int option_index; + unsigned i; + const char *prgname = argv[0]; + static struct option long_option[] = { + {"nb-pools", required_argument, NULL, 0}, + {"nb-tcs", required_argument, NULL, 0}, + {"enable-rss", 0, NULL, 0}, + {NULL, 0, 0, 0} + }; + + /* Parse command line */ + while ((opt = getopt_long(argc, argv, "p:", long_option, + &option_index)) != EOF) { + switch (opt) { + /* portmask */ + case 'p': + enabled_port_mask = parse_portmask(optarg); + if (enabled_port_mask == 0) { + printf("invalid portmask\n"); + vmdq_usage(prgname); + return -1; + } + break; + case 0: + if (!strcmp(long_option[option_index].name, "nb-pools")) { + if (vmdq_parse_num_pools(optarg) == -1) { + printf("invalid number of pools\n"); + return -1; + } + } + + if (!strcmp(long_option[option_index].name, "nb-tcs")) { + if (vmdq_parse_num_tcs(optarg) == -1) { + printf("invalid number of tcs\n"); + return -1; + } + } + + if (!strcmp(long_option[option_index].name, "enable-rss")) + rss_enable = 1; + break; + + default: + vmdq_usage(prgname); + return -1; + } + } + + for (i = 0; i < RTE_MAX_ETHPORTS; i++) { + if (enabled_port_mask & (1 << i)) + ports[num_ports++] = (uint8_t)i; + } + + if (num_ports < 2 || num_ports % 2) { + printf("Current enabled port number is %u," + " but it should be even and at least 2\n", num_ports); + return -1; + } + + return 0; +} + +static void +update_mac_address(struct rte_mbuf *m, unsigned dst_port) +{ + struct ether_hdr *eth; + void *tmp; + + eth = rte_pktmbuf_mtod(m, struct ether_hdr *); + + /* 02:00:00:00:00:xx */ + tmp = ð->d_addr.addr_bytes[0]; + *((uint64_t *)tmp) = 0x000000000002 + ((uint64_t)dst_port << 40); + + /* src addr */ + ether_addr_copy(&vmdq_ports_eth_addr[dst_port], ð->s_addr); +} + +/* When we receive a HUP signal, print out our stats */ +static void +sighup_handler(int signum) +{ + unsigned q = vmdq_queue_base; + + for (; q < num_queues; q++) { + if (q % (num_vmdq_queues / num_pools) == 0) + printf("\nPool %u: ", (q - vmdq_queue_base) / + (num_vmdq_queues / num_pools)); + printf("%lu ", rxPackets[q]); + } + printf("\nFinished handling signal %d\n", signum); +} + +/* + * Main thread that does the work, reading from INPUT_PORT + * and writing to OUTPUT_PORT + */ +static int +lcore_main(void *arg) +{ + const uintptr_t core_num = (uintptr_t)arg; + const unsigned num_cores = rte_lcore_count(); + uint16_t startQueue, endQueue; + uint16_t q, i, p; + const uint16_t quot = (uint16_t)(num_vmdq_queues / num_cores); + const uint16_t remainder = (uint16_t)(num_vmdq_queues % num_cores); + + + if (remainder) { + if (core_num < remainder) { + startQueue = (uint16_t)(core_num * (quot + 1)); + endQueue = (uint16_t)(startQueue + quot + 1); + } else { + startQueue = (uint16_t)(core_num * quot + remainder); + endQueue = (uint16_t)(startQueue + quot); + } + } else { + startQueue = (uint16_t)(core_num * quot); + endQueue = (uint16_t)(startQueue + quot); + } + + /* vmdq queue idx doesn't always start from zero.*/ + startQueue += vmdq_queue_base; + endQueue += vmdq_queue_base; + printf("Core %u(lcore %u) reading queues %i-%i\n", (unsigned)core_num, + rte_lcore_id(), startQueue, endQueue - 1); + + if (startQueue == endQueue) { + printf("lcore %u has nothing to do\n", (unsigned)core_num); + return 0; + } + + for (;;) { + struct rte_mbuf *buf[MAX_PKT_BURST]; + const uint16_t buf_size = sizeof(buf) / sizeof(buf[0]); + for (p = 0; p < num_ports; p++) { + const uint8_t src = ports[p]; + const uint8_t dst = ports[p ^ 1]; /* 0 <-> 1, 2 <-> 3 etc */ + + if ((src == INVALID_PORT_ID) || (dst == INVALID_PORT_ID)) + continue; + + for (q = startQueue; q < endQueue; q++) { + const uint16_t rxCount = rte_eth_rx_burst(src, + q, buf, buf_size); + + if (unlikely(rxCount == 0)) + continue; + + rxPackets[q] += rxCount; + + for (i = 0; i < rxCount; i++) + update_mac_address(buf[i], dst); + + const uint16_t txCount = rte_eth_tx_burst(dst, + q, buf, rxCount); + if (txCount != rxCount) { + for (i = txCount; i < rxCount; i++) + rte_pktmbuf_free(buf[i]); + } + } + } + } +} + +/* + * Update the global var NUM_PORTS and array PORTS according to system ports number + * and return valid ports number + */ +static unsigned check_ports_num(unsigned nb_ports) +{ + unsigned valid_num_ports = num_ports; + unsigned portid; + + if (num_ports > nb_ports) { + printf("\nSpecified port number(%u) exceeds total system port number(%u)\n", + num_ports, nb_ports); + num_ports = nb_ports; + } + + for (portid = 0; portid < num_ports; portid++) { + if (ports[portid] >= nb_ports) { + printf("\nSpecified port ID(%u) exceeds max system port ID(%u)\n", + ports[portid], (nb_ports - 1)); + ports[portid] = INVALID_PORT_ID; + valid_num_ports--; + } + } + return valid_num_ports; +} + + +/* Main function, does initialisation and calls the per-lcore functions */ +int +main(int argc, char *argv[]) +{ + unsigned cores; + struct rte_mempool *mbuf_pool; + unsigned lcore_id; + uintptr_t i; + int ret; + unsigned nb_ports, valid_num_ports; + uint8_t portid; + + signal(SIGHUP, sighup_handler); + + /* init EAL */ + ret = rte_eal_init(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Error with EAL initialization\n"); + argc -= ret; + argv += ret; + + /* parse app arguments */ + ret = vmdq_parse_args(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Invalid VMDQ argument\n"); + + cores = rte_lcore_count(); + if ((cores & (cores - 1)) != 0 || cores > RTE_MAX_LCORE) { + rte_exit(EXIT_FAILURE,"This program can only run on an even" + " number of cores(1-%d)\n\n", RTE_MAX_LCORE); + } + + nb_ports = rte_eth_dev_count(); + if (nb_ports > RTE_MAX_ETHPORTS) + nb_ports = RTE_MAX_ETHPORTS; + + /* + * Update the global var NUM_PORTS and global array PORTS + * and get value of var VALID_NUM_PORTS according to system ports number + */ + valid_num_ports = check_ports_num(nb_ports); + + if (valid_num_ports < 2 || valid_num_ports % 2) { + printf("Current valid ports number is %u\n", valid_num_ports); + rte_exit(EXIT_FAILURE, "Error with valid ports number is not even or less than 2\n"); + } + + mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL", + NUM_MBUFS_PER_PORT * nb_ports, MBUF_CACHE_SIZE, + 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id()); + if (mbuf_pool == NULL) + rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n"); + + /* initialize all ports */ + for (portid = 0; portid < nb_ports; portid++) { + /* skip ports that are not enabled */ + if ((enabled_port_mask & (1 << portid)) == 0) { + printf("\nSkipping disabled port %d\n", portid); + continue; + } + if (port_init(portid, mbuf_pool) != 0) + rte_exit(EXIT_FAILURE, "Cannot initialize network ports\n"); + } + + /* call lcore_main() on every slave lcore */ + i = 0; + RTE_LCORE_FOREACH_SLAVE(lcore_id) { + rte_eal_remote_launch(lcore_main, (void*)i++, lcore_id); + } + /* call on master too */ + (void) lcore_main((void*)i); + + return 0; +} |