From 3395610ea65d66fb96ab98d6915a7ffbd584c34e Mon Sep 17 00:00:00 2001 From: Konstantin Ananyev Date: Fri, 3 Jun 2016 16:43:13 +0100 Subject: Initial commit of tldk code. Change-Id: Ib96fdd2c57bae0a51ed420137c35eb8e2ee58473 Signed-off-by: Konstantin Ananyev Signed-off-by: Ed Warnicke --- examples/udpfwd/Makefile | 44 ++ examples/udpfwd/README | 134 ++++ examples/udpfwd/be.cfg | 5 + examples/udpfwd/fe.cfg | 24 + examples/udpfwd/fwdtbl.h | 117 +++ examples/udpfwd/main.c | 1810 ++++++++++++++++++++++++++++++++++++++++++++++ examples/udpfwd/netbe.h | 251 +++++++ examples/udpfwd/parse.c | 586 +++++++++++++++ examples/udpfwd/parse.h | 75 ++ examples/udpfwd/pkt.c | 579 +++++++++++++++ 10 files changed, 3625 insertions(+) create mode 100644 examples/udpfwd/Makefile create mode 100644 examples/udpfwd/README create mode 100644 examples/udpfwd/be.cfg create mode 100644 examples/udpfwd/fe.cfg create mode 100644 examples/udpfwd/fwdtbl.h create mode 100644 examples/udpfwd/main.c create mode 100644 examples/udpfwd/netbe.h create mode 100644 examples/udpfwd/parse.c create mode 100644 examples/udpfwd/parse.h create mode 100644 examples/udpfwd/pkt.c (limited to 'examples') diff --git a/examples/udpfwd/Makefile b/examples/udpfwd/Makefile new file mode 100644 index 0000000..c23947a --- /dev/null +++ b/examples/udpfwd/Makefile @@ -0,0 +1,44 @@ +# Copyright (c) 2016 Intel Corporation. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +ifeq ($(RTE_TARGET),) +$(error "Please define RTE_TARGET environment variable") +endif + +ifeq ($(TLDK_ROOT),) +$(error "Please define TLDK_ROOT environment variable") +endif + +include $(RTE_SDK)/mk/rte.vars.mk + +# binary name +APP = udpfwd + +# all source are stored in SRCS-y +SRCS-y += parse.c +SRCS-y += pkt.c +SRCS-y += main.c + +CFLAGS += $(WERROR_FLAGS) +CFLAGS += -I$(TLDK_ROOT)/$(RTE_TARGET)/include + +LDLIBS += -L$(TLDK_ROOT)/$(RTE_TARGET)/lib +LDLIBS += -ltle_udp + +EXTRA_CFLAGS += -O3 + +include $(RTE_SDK)/mk/rte.extapp.mk diff --git a/examples/udpfwd/README b/examples/udpfwd/README new file mode 100644 index 0000000..8ab7e98 --- /dev/null +++ b/examples/udpfwd/README @@ -0,0 +1,134 @@ +Introduction +============ + +udpfwd is a sample application to demonstrate and test libtle_udp. +Depending on configuration it can do simple send/recv or both over +opened udp streams. Also it implements ability to do UDP datagram +forwarding between different streams, so it is possible to use that +application as some sort of 'UDP proxy'. +The application can reassemble input fragmented IP packets, +and fragment outgoing IP packets (if destination MTU is less then packet size). +To build and run the application DPDK and TLDK libraries are required. + +Logically the application is divided into two parts: + +- Back End (BE) +BE is responsible for: + - RX over DPDK ports and feed them into UDP TLDK context(s) + (via tle_udp_rx_bulk). + - retrieve packets ready to be send out from UDP TLDK context(s) + and TX them over destined DPDK port. +Right now only one RX/TX queue per port is used. +Each BE lcore can serve multiple DPDK ports, TLDK UDP contexts. + +- Front End (FE) +FE responsibility is to open configured UDP streams and perform +send/recv over them. These streams can belong to different UDP contexts. + +Right now each lcore can act as BE or FE (but not both simultaneously). +Master lcore can act as FE only. + +Usage +===== + +udpfwd -- \ + -P | --promisc /* promiscuous mode enabled. */ \ + -R | --rbufs /* max recv buffers per stream. */ \ + -S | --sbufs /* max send buffers per stream. */ \ + -s | --streams /* streams to open per context. */ \ + -b | --becfg /* backend configuration file. */ \ + -f | --fecfg /* frontend configuration file. */ \ + ... + +port_params: port=,lcore=,\ +[rx_offload=,tx_offload=,mtu=,ipv4=,ipv6=] + +port_params are used to configure the particular DPDK device (rte_ethdev port), +and specify BE lcore that will do RX/TX from/to the device and manage +BE part of corresponding UDP context. + +port - DPDK port id (right now on each port is used just one RX, + one TX queue). +lcore - EAL lcore id to do IO over that port (rx_burst/tx_burst). + several ports can be managed by the same lcore, + but same port can't belong to more than one lcore. +rx_offload - RX HW offload capabilities to enable/use on this port. + (bitmask of DEV_RX_OFFLOAD_* values). +tx_offload - TX HW offload capabilities to enable/use on this port. + (bitmask of DEV_TX_OFFLOAD_* values). +mtu - MTU to be used on that port + ( = UDP data size + L2/L3/L4 headers sizes, default=1514). +ipv4 - ipv4 address to assign to that port. +ipv6 - ipv6 address to assign to that port. + +At least one of ipv4/ipv6 values have to be specified for each port. + +As an example: +udpfwd --lcores='3,6' -w 01:00.0 -- \ +--promisc --rbufs 0x1000 --sbufs 0x1000 --streams 0x100 \ +--fecfg ./fe.cfg --becfg ./be.cfg \ +port=0,lcore=6,rx_offload=0xf,tx_offload=0,\ +ipv4=192.168.1.233,ipv6=2001:4860:b002::28 + +Will create TLDK UDP context on lcore=6 (BE lcore) to manage DPDK port 0. +Will assign IPv4 address 192.168.1.233 and IPv6 address 2001:4860:b002::28 +to that port. +The following supported by DPDK RX HW offloads: + DEV_RX_OFFLOAD_VLAN_STRIP, + DEV_RX_OFFLOAD_IPV4_CKSUM, + DEV_RX_OFFLOAD_UDP_CKSUM, + DEV_RX_OFFLOAD_TCP_CKSUM +will be enabled on that port. +No HW TX offloads will be enabled. + +Fornt-End (FE) and Back-End (BE) configuration files format: +------------------------------------------------------------ + - each record on a separate line. + - lines started with '#' are treated as comments. + - empty lines (containing whitespace chars only) are ignored. + - kvargs style format for each record. + - each FE record correspond to at least one stream to be opened + (could be multiple streams in case of op="fwd"). + - each BE record define a ipv4/ipv6 destination. + +FE config record format: +------------------------ + +lcore=,op=<"rx|tx|echo|fwd">,\ +laddr=,lport=,raddr=,rport=,\ +[txlen=,fwladdr=,fwlport=,fwraddr=,fwrport= + +lcore - EAL lcore to manage that stream(s). +op - operation to perform on that stream: + "rx" - do receive only on that stream. + "tx" - do send only on that stream. + "echo" - mimic recvfrom(..., &addr);sendto(..., &addr); + on that stream. + "fwd" - forward packets between streams. +laddr - local address for the stream to open. +lport - local port for the stream to open. +raddr - remote address for the stream to open. +rport - remote port for the stream to open. +txlen - data length to send with each packet ("tx" mode only). +fwladdr - local address for the forwarding stream(s) to open + ("fwd mode only). +fwlport - local port for the forwarding stream(s) to open + ("fwd mode only). +fwraddr - remote address for the forwarding stream(s) to open + ("fwd mode only). +fwrport - remote port for the forwarding stream(s) to open + ("fwd mode only). + +Refer to fe.cfg for an example. + +BE config record format: +------------------------ + +port=,addr=,masklen=,mac= + +port - port number to be used to send packets to the destination. +addr - destionation network address. +masklen - desitantion network prefix length. +mac - destination ethernet address. + +Refer to fe.cfg for an example. diff --git a/examples/udpfwd/be.cfg b/examples/udpfwd/be.cfg new file mode 100644 index 0000000..5c1d173 --- /dev/null +++ b/examples/udpfwd/be.cfg @@ -0,0 +1,5 @@ +# +# udpfwd BE cconfig file exaple +# +port=0,masklen=16,addr=192.168.0.0,mac=01:de:ad:be:ef:01 +port=0,addr=2001:4860:b002::,masklen=64,mac=01:de:ad:be:ef:01 diff --git a/examples/udpfwd/fe.cfg b/examples/udpfwd/fe.cfg new file mode 100644 index 0000000..2706323 --- /dev/null +++ b/examples/udpfwd/fe.cfg @@ -0,0 +1,24 @@ +# +# udpfwd FE config file example +# + +# open IPv4 stream with local_addr=192.168.1.233:32768, +# and remote_addr as wildcard (any remote addressi/port allowed). +# use it echo mode - for any received packet - send it back to the source +lcore=3,op=echo,laddr=192.168.1.233,lport=0x8000,raddr=0.0.0.0,rport=0 + +# open IPv4 stream with specified local/remote address/port and +# do send only over that stream. +lcore=3,op=tx,laddr=192.168.1.233,lport=0x8001,raddr=192.168.1.56,rport=0x200,txlen=72 + +# open IPv6 stream with specified local port (512) probably over multiple +# eth ports, and do recv only over that stream. +lcore=3,op=rx,laddr=::,lport=0x200,raddr=::,rport=0,txlen=72 + +# fwd mode example. +# open IPv4 stream on local port 11211 (memcached) over all possible ports. +# for each new flow, sort of tunnel will be created, i.e: +# new stream will be opend to communcate with forwarding remote address, +# so all packets with will be forwarded to +# and visa-versa. +lcore=3,op=fwd,laddr=0.0.0.0,lport=11211,raddr=0.0.0.0,rport=0,fwladdr=::,fwlport=0,fwraddr=2001:4860:b002::56,fwrport=11211 diff --git a/examples/udpfwd/fwdtbl.h b/examples/udpfwd/fwdtbl.h new file mode 100644 index 0000000..1c4265e --- /dev/null +++ b/examples/udpfwd/fwdtbl.h @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2016 Intel Corporation. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __FWDTBL_H__ +#define __FWDTBL_H__ + +struct fwd4_key { + uint32_t port; + struct in_addr addr; +} __attribute__((__packed__)); + +struct fwd6_key { + uint32_t port; + struct in6_addr addr; +} __attribute__((__packed__)); + +union fwd_key { + struct fwd4_key k4; + struct fwd6_key k6; +}; + +static struct rte_hash * +fwd_tbl_key_prep(const struct netfe_lcore *fe, uint16_t family, + const struct sockaddr *sa, union fwd_key *key) +{ + struct rte_hash *h; + const struct sockaddr_in *sin4; + const struct sockaddr_in6 *sin6; + + if (family == AF_INET) { + h = fe->fw4h; + sin4 = (const struct sockaddr_in *)sa; + key->k4.port = sin4->sin_port; + key->k4.addr = sin4->sin_addr; + } else { + h = fe->fw6h; + sin6 = (const struct sockaddr_in6 *)sa; + key->k6.port = sin6->sin6_port; + key->k6.addr = sin6->sin6_addr; + } + + return h; +} + +static int +fwd_tbl_add(struct netfe_lcore *fe, uint16_t family, const struct sockaddr *sa, + struct netfe_stream *data) +{ + int32_t rc; + struct rte_hash *h; + union fwd_key key; + + h = fwd_tbl_key_prep(fe, family, sa, &key); + rc = rte_hash_add_key_data(h, &key, data); + return rc; +} + +static struct netfe_stream * +fwd_tbl_lkp(struct netfe_lcore *fe, uint16_t family, const struct sockaddr *sa) +{ + int rc; + void *d; + struct rte_hash *h; + union fwd_key key; + + h = fwd_tbl_key_prep(fe, family, sa, &key); + rc = rte_hash_lookup_data(h, &key, &d); + if (rc < 0) + d = NULL; + return d; +} + +static int +fwd_tbl_init(struct netfe_lcore *fe, uint16_t family, uint32_t lcore) +{ + int32_t rc; + struct rte_hash **h; + struct rte_hash_parameters hprm; + char buf[RTE_HASH_NAMESIZE]; + + if (family == AF_INET) { + snprintf(buf, sizeof(buf), "fwd4tbl@%u", lcore); + h = &fe->fw4h; + hprm.key_len = sizeof(struct fwd4_key); + } else { + snprintf(buf, sizeof(buf), "fwd6tbl@%u", lcore); + h = &fe->fw6h; + hprm.key_len = sizeof(struct fwd6_key); + } + + hprm.name = buf; + hprm.entries = RTE_MAX(2 * fe->snum, 0x10U); + hprm.socket_id = rte_lcore_to_socket_id(lcore); + hprm.hash_func = NULL; + hprm.hash_func_init_val = 0; + + *h = rte_hash_create(&hprm); + if (*h == NULL) + rc = (rte_errno != 0) ? -rte_errno : -ENOMEM; + else + rc = 0; + return rc; +} + +#endif /* __FWDTBL_H__ */ diff --git a/examples/udpfwd/main.c b/examples/udpfwd/main.c new file mode 100644 index 0000000..a907355 --- /dev/null +++ b/examples/udpfwd/main.c @@ -0,0 +1,1810 @@ +/* + * Copyright (c) 2016 Intel Corporation. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "netbe.h" +#include "parse.h" + +#define MAX_RULES 0x100 +#define MAX_TBL8 0x800 + +#define RX_RING_SIZE 0x400 +#define TX_RING_SIZE 0x800 + +#define MPOOL_CACHE_SIZE 0x100 +#define MPOOL_NB_BUF 0x20000 + +#define FRAG_MBUF_BUF_SIZE (RTE_PKTMBUF_HEADROOM + TLE_UDP_MAX_HDR) +#define FRAG_TTL MS_PER_S +#define FRAG_TBL_BUCKET_ENTRIES 16 + +#define FIRST_PORT 0x8000 + +#define RX_CSUM_OFFLOAD (DEV_RX_OFFLOAD_IPV4_CKSUM | DEV_RX_OFFLOAD_UDP_CKSUM) +#define TX_CSUM_OFFLOAD (DEV_TX_OFFLOAD_IPV4_CKSUM | DEV_TX_OFFLOAD_UDP_CKSUM) + +#define OPT_SHORT_PROMISC 'P' +#define OPT_LONG_PROMISC "promisc" + +#define OPT_SHORT_RBUFS 'R' +#define OPT_LONG_RBUFS "rbufs" + +#define OPT_SHORT_SBUFS 'S' +#define OPT_LONG_SBUFS "sbufs" + +#define OPT_SHORT_STREAMS 's' +#define OPT_LONG_STREAMS "streams" + +#define OPT_SHORT_FECFG 'f' +#define OPT_LONG_FECFG "fecfg" + +#define OPT_SHORT_BECFG 'b' +#define OPT_LONG_BECFG "becfg" + +RTE_DEFINE_PER_LCORE(struct netfe_lcore *, _fe); + +#include "fwdtbl.h" + +static const struct option long_opt[] = { + {OPT_LONG_BECFG, 1, 0, OPT_SHORT_BECFG}, + {OPT_LONG_FECFG, 1, 0, OPT_SHORT_FECFG}, + {OPT_LONG_PROMISC, 0, 0, OPT_SHORT_PROMISC}, + {OPT_LONG_RBUFS, 1, 0, OPT_SHORT_RBUFS}, + {OPT_LONG_SBUFS, 1, 0, OPT_SHORT_SBUFS}, + {OPT_LONG_STREAMS, 1, 0, OPT_SHORT_STREAMS}, + {NULL, 0, 0, 0} +}; + +static volatile int force_quit; + +static struct netbe_cfg becfg; +static struct rte_mempool *mpool[RTE_MAX_NUMA_NODES + 1]; +static struct rte_mempool *frag_mpool[RTE_MAX_NUMA_NODES + 1]; + +static const struct rte_eth_conf port_conf_default = { + .rxmode = { + .max_rx_pkt_len = ETHER_MAX_VLAN_FRAME_LEN, + .hw_vlan_strip = 1, + .jumbo_frame = 1, + }, +}; + +#include "parse.h" + +static void +sig_handle(int signum) +{ + RTE_LOG(ERR, USER1, "%s(%d)\n", __func__, signum); + force_quit = 1; +} + +/* + * Initilise DPDK port. + * In current version, only one queue per port is used. + */ +static int +port_init(struct netbe_port *uprt, struct rte_mempool *mp) +{ + int32_t socket, rc; + uint16_t q; + struct rte_eth_conf port_conf; + struct rte_eth_dev_info dev_info; + + const uint16_t rx_rings = 1, tx_rings = 1; + + rte_eth_dev_info_get(uprt->id, &dev_info); + if ((dev_info.rx_offload_capa & uprt->rx_offload) != uprt->rx_offload) { + RTE_LOG(ERR, USER1, + "port#%u supported/requested RX offloads don't match, " + "supported: %#x, requested: %#x;\n", + uprt->id, dev_info.rx_offload_capa, uprt->rx_offload); + return -EINVAL; + } + if ((dev_info.tx_offload_capa & uprt->tx_offload) != uprt->tx_offload) { + RTE_LOG(ERR, USER1, + "port#%u supported/requested TX offloads don't match, " + "supported: %#x, requested: %#x;\n", + uprt->id, dev_info.tx_offload_capa, uprt->tx_offload); + return -EINVAL; + } + + port_conf = port_conf_default; + if ((uprt->rx_offload & RX_CSUM_OFFLOAD) != 0) { + RTE_LOG(ERR, USER1, "%s(%u): enabling RX csum offload;\n", + __func__, uprt->id); + port_conf.rxmode.hw_ip_checksum = 1; + } + + port_conf.rxmode.max_rx_pkt_len = uprt->mtu + ETHER_CRC_LEN; + + rc = rte_eth_dev_configure(uprt->id, rx_rings, tx_rings, &port_conf); + RTE_LOG(NOTICE, USER1, + "%s: rte_eth_dev_configure(%u) returns %d;\n", + __func__, uprt->id, rc); + if (rc != 0) + return rc; + + socket = rte_eth_dev_socket_id(uprt->id); + + dev_info.default_rxconf.rx_drop_en = 1; + + dev_info.default_txconf.tx_free_thresh = TX_RING_SIZE / 2; + if (uprt->tx_offload != 0) { + RTE_LOG(ERR, USER1, "%s(%u): enabling full featured TX;\n", + __func__, uprt->id); + dev_info.default_txconf.txq_flags = 0; + } + + for (q = 0; q < rx_rings; q++) { + rc = rte_eth_rx_queue_setup(uprt->id, q, RX_RING_SIZE, + socket, NULL, mp); + if (rc < 0) + return rc; + } + + for (q = 0; q < tx_rings; q++) { + rc = rte_eth_tx_queue_setup(uprt->id, q, TX_RING_SIZE, + socket, &dev_info.default_txconf); + if (rc < 0) + return rc; + } + + + return 0; +} + +/* + * Check that lcore is enabled, not master, and not in use already. + */ +static int +check_lcore(uint32_t lc) +{ + if (rte_lcore_is_enabled(lc) == 0) { + RTE_LOG(ERR, USER1, "lcore %u is not enabled\n", lc); + return -EINVAL; + } + if (rte_get_master_lcore() == lc) { + RTE_LOG(ERR, USER1, "lcore %u is not slave\n", lc); + return -EINVAL; + } + if (rte_eal_get_lcore_state(lc) == RUNNING) { + RTE_LOG(ERR, USER1, "lcore %u already running %p\n", + lc, lcore_config[lc].f); + return -EINVAL; + } + return 0; +} + +static void +log_netbe_prt(const struct netbe_port *uprt) +{ + RTE_LOG(NOTICE, USER1, + "uprt %p = ;\n", + uprt, uprt->id, uprt->lcore, + uprt->mtu, uprt->rx_offload, uprt->tx_offload, + uprt->ipv4, + uprt->ipv6.s6_addr16[0], uprt->ipv6.s6_addr16[1], + uprt->ipv6.s6_addr16[2], uprt->ipv6.s6_addr16[3], + uprt->ipv6.s6_addr16[4], uprt->ipv6.s6_addr16[5], + uprt->ipv6.s6_addr16[6], uprt->ipv6.s6_addr16[7], + uprt->mac.addr_bytes[0], uprt->mac.addr_bytes[1], + uprt->mac.addr_bytes[2], uprt->mac.addr_bytes[3], + uprt->mac.addr_bytes[4], uprt->mac.addr_bytes[5]); +} + +static void +log_netbe_cfg(const struct netbe_cfg *ucfg) +{ + uint32_t i; + + RTE_LOG(NOTICE, USER1, + "ucfg @ %p, prt_num = %u\n", ucfg, ucfg->prt_num); + + for (i = 0; i != ucfg->prt_num; i++) + log_netbe_prt(ucfg->prt + i); +} + +static int +pool_init(uint32_t sid) +{ + int32_t rc; + struct rte_mempool *mp; + char name[RTE_MEMPOOL_NAMESIZE]; + + snprintf(name, sizeof(name), "MP%u", sid); + mp = rte_pktmbuf_pool_create(name, MPOOL_NB_BUF, MPOOL_CACHE_SIZE, 0, + RTE_MBUF_DEFAULT_BUF_SIZE, sid - 1); + if (mp == NULL) { + rc = -rte_errno; + RTE_LOG(ERR, USER1, "%s(%d) failed with error code: %d\n", + __func__, sid - 1, rc); + return rc; + } + + mpool[sid] = mp; + return 0; +} + +static int +frag_pool_init(uint32_t sid) +{ + int32_t rc; + struct rte_mempool *frag_mp; + char frag_name[RTE_MEMPOOL_NAMESIZE]; + + snprintf(frag_name, sizeof(frag_name), "frag_MP%u", sid); + frag_mp = rte_pktmbuf_pool_create(frag_name, MPOOL_NB_BUF, + MPOOL_CACHE_SIZE, 0, FRAG_MBUF_BUF_SIZE, sid - 1); + if (frag_mp == NULL) { + rc = -rte_errno; + RTE_LOG(ERR, USER1, "%s(%d) failed with error code: %d\n", + __func__, sid - 1, rc); + return rc; + } + + frag_mpool[sid] = frag_mp; + return 0; +} + + +/* + * Setup all enabled ports. + */ +static void +netbe_port_init(struct netbe_cfg *cfg, int argc, char *argv[]) +{ + int32_t rc; + uint32_t i, n, sid; + + n = RTE_MIN(RTE_DIM(cfg->prt), (uint32_t)argc); + + rc = 0; + for (i = 0; i != n; i++) { + rc = parse_netbe_arg(cfg->prt + i, argv[i]); + if (rc != 0) + break; + + rc = check_lcore(cfg->prt[i].lcore); + if (rc != 0) + break; + + sid = rte_lcore_to_socket_id(cfg->prt[i].lcore) + 1; + assert(sid < RTE_DIM(mpool)); + + if (mpool[sid] == NULL && (rc = pool_init(sid)) != 0) + break; + + if (frag_mpool[sid] == NULL && (rc = frag_pool_init(sid)) != 0) + break; + + rc = port_init(cfg->prt + i, mpool[sid]); + if (rc != 0) + break; + + rte_eth_macaddr_get(cfg->prt[i].id, &cfg->prt[i].mac); + if (cfg->promisc) + rte_eth_promiscuous_enable(cfg->prt[i].id); + } + + if (rc != 0) + rte_exit(EXIT_FAILURE, + "%s: processing of \"%s\" failed with error code: %d\n", + __func__, argv[i], rc); + + cfg->prt_num = i; + log_netbe_cfg(cfg); +} + +/* + * UDP IPv4 destination lookup callback. + */ +static int +lpm4_dst_lookup(void *data, const struct in_addr *addr, + struct tle_udp_dest *res) +{ + int32_t rc; + uint32_t idx; + struct netbe_lcore *lc; + struct tle_udp_dest *dst; + + lc = data; + + rc = rte_lpm_lookup(lc->lpm4, rte_be_to_cpu_32(addr->s_addr), &idx); + if (rc == 0) { + dst = &lc->dst4[idx]; + rte_memcpy(res, dst, dst->l2_len + dst->l3_len + + offsetof(struct tle_udp_dest, hdr)); + } + return rc; +} + +/* + * UDP IPv6 destination lookup callback. + */ +static int +lpm6_dst_lookup(void *data, const struct in6_addr *addr, + struct tle_udp_dest *res) +{ + int32_t rc; + uint8_t idx; + struct netbe_lcore *lc; + struct tle_udp_dest *dst; + uintptr_t p; + + lc = data; + p = (uintptr_t)addr->s6_addr; + + rc = rte_lpm6_lookup(lc->lpm6, (uint8_t *)p, &idx); + if (rc == 0) { + dst = &lc->dst6[idx]; + rte_memcpy(res, dst, dst->l2_len + dst->l3_len + + offsetof(struct tle_udp_dest, hdr)); + } + return rc; +} + +static int +netbe_add_ipv4_route(struct netbe_lcore *lc, const struct netbe_dest *dst, + uint8_t idx) +{ + int32_t rc; + uint32_t addr, depth; + char str[INET_ADDRSTRLEN]; + + depth = dst->prfx; + addr = rte_be_to_cpu_32(dst->ipv4.s_addr); + + inet_ntop(AF_INET, &dst->ipv4, str, sizeof(str)); + rc = rte_lpm_add(lc->lpm4, addr, depth, idx); + RTE_LOG(NOTICE, USER1, "%s(lcore=%u,port=%u,dev=%p," + "ipv4=%s/%u,mtu=%u," + "mac=%02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx) " + "returns %d;\n", + __func__, lc->id, dst->port, lc->dst4[idx].dev, + str, depth, lc->dst4[idx].mtu, + dst->mac.addr_bytes[0], dst->mac.addr_bytes[1], + dst->mac.addr_bytes[2], dst->mac.addr_bytes[3], + dst->mac.addr_bytes[4], dst->mac.addr_bytes[5], + rc); + return rc; +} + +static int +netbe_add_ipv6_route(struct netbe_lcore *lc, const struct netbe_dest *dst, + uint8_t idx) +{ + int32_t rc; + uint32_t depth; + char str[INET6_ADDRSTRLEN]; + + depth = dst->prfx; + + rc = rte_lpm6_add(lc->lpm6, (uint8_t *)(uintptr_t)dst->ipv6.s6_addr, + depth, idx); + + inet_ntop(AF_INET6, &dst->ipv6, str, sizeof(str)); + RTE_LOG(NOTICE, USER1, "%s(lcore=%u,port=%u,dev=%p," + "ipv6=%s/%u,mtu=%u," + "mac=%02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx) " + "returns %d;\n", + __func__, lc->id, dst->port, lc->dst6[idx].dev, + str, depth, lc->dst4[idx].mtu, + dst->mac.addr_bytes[0], dst->mac.addr_bytes[1], + dst->mac.addr_bytes[2], dst->mac.addr_bytes[3], + dst->mac.addr_bytes[4], dst->mac.addr_bytes[5], + rc); + return rc; +} + +static int +lcore_lpm_init(struct netbe_lcore *lc) +{ + int32_t sid; + char str[RTE_LPM_NAMESIZE]; + const struct rte_lpm_config lpm4_cfg = { + .max_rules = MAX_RULES, + .number_tbl8s = MAX_TBL8, + }; + const struct rte_lpm6_config lpm6_cfg = { + .max_rules = MAX_RULES, + .number_tbl8s = MAX_TBL8, + }; + + sid = rte_lcore_to_socket_id(lc->id); + + snprintf(str, sizeof(str), "LPM4%u\n", lc->id); + lc->lpm4 = rte_lpm_create(str, sid, &lpm4_cfg); + RTE_LOG(NOTICE, USER1, "%s(lcore=%u): lpm4=%p;\n", + __func__, lc->id, lc->lpm4); + if (lc->lpm4 == NULL) + return -ENOMEM; + + snprintf(str, sizeof(str), "LPM6%u\n", lc->id); + lc->lpm6 = rte_lpm6_create(str, sid, &lpm6_cfg); + RTE_LOG(NOTICE, USER1, "%s(lcore=%u): lpm6=%p;\n", + __func__, lc->id, lc->lpm6); + if (lc->lpm6 == NULL) + return -ENOMEM; + + return 0; +} + +static void +fill_dst(struct tle_udp_dest *dst, struct netbe_dev *bed, + const struct netbe_dest *bdp, uint16_t l3_type, int32_t sid) +{ + struct ether_hdr *eth; + struct ipv4_hdr *ip4h; + struct ipv6_hdr *ip6h; + + static const struct ipv4_hdr ipv4_tmpl = { + .version_ihl = 4 << 4 | sizeof(*ip4h) / IPV4_IHL_MULTIPLIER, + .time_to_live = 64, + .next_proto_id = IPPROTO_UDP, + }; + + static const struct ipv6_hdr ipv6_tmpl = { + .vtc_flow = 6 << 4, + .proto = IPPROTO_UDP, + .hop_limits = 64, + }; + + dst->dev = bed->dev; + dst->head_mp = frag_mpool[sid + 1]; + dst->mtu = RTE_MIN(bdp->mtu, bed->port.mtu); + dst->l2_len = sizeof(*eth); + + eth = (struct ether_hdr *)dst->hdr; + + ether_addr_copy(&bed->port.mac, ð->s_addr); + ether_addr_copy(&bdp->mac, ð->d_addr); + eth->ether_type = rte_cpu_to_be_16(l3_type); + + if (l3_type == ETHER_TYPE_IPv4) { + dst->l3_len = sizeof(*ip4h); + ip4h = (struct ipv4_hdr *)(eth + 1); + ip4h[0] = ipv4_tmpl; + } else if (l3_type == ETHER_TYPE_IPv6) { + dst->l3_len = sizeof(*ip6h); + ip6h = (struct ipv6_hdr *)(eth + 1); + ip6h[0] = ipv6_tmpl; + } +} + + +/* + * BE lcore setup routine. + */ +static int +lcore_init(struct netbe_lcore *lc, const struct tle_udp_ctx_param *ctx_prm, + const struct netbe_port prt[], uint32_t prt_num) +{ + int32_t rc, sid; + uint32_t i; + uint64_t frag_cycles; + struct tle_udp_ctx_param cprm; + struct tle_udp_dev_param dprm; + + lc->id = prt[0].lcore; + lc->prt_num = prt_num; + + sid = rte_lcore_to_socket_id(lc->id); + + rc = lcore_lpm_init(lc); + if (rc != 0) + return rc; + + cprm = *ctx_prm; + cprm.socket_id = sid; + cprm.lookup4 = lpm4_dst_lookup; + cprm.lookup4_data = lc; + cprm.lookup6 = lpm6_dst_lookup; + cprm.lookup6_data = lc; + + /* to facilitate both IPv4 and IPv6. */ + cprm.max_streams *= 2; + + frag_cycles = (rte_get_tsc_hz() + MS_PER_S - 1) / MS_PER_S * FRAG_TTL; + lc->ftbl = rte_ip_frag_table_create(cprm.max_streams, + FRAG_TBL_BUCKET_ENTRIES, cprm.max_streams, frag_cycles, sid); + RTE_LOG(NOTICE, USER1, "%s(lcore=%u): frag_tbl=%p;\n", + __func__, lc->id, lc->ftbl); + + lc->ctx = tle_udp_create(&cprm); + RTE_LOG(NOTICE, USER1, "%s(lcore=%u): udp_ctx=%p;\n", + __func__, lc->id, lc->ctx); + + if (lc->ctx == NULL || lc->ftbl == NULL) + rc = ENOMEM; + + for (i = 0; i != prt_num && rc == 0; i++) { + + memset(&dprm, 0, sizeof(dprm)); + + lc->prt[i].rxqid = 0; + lc->prt[i].txqid = 0; + lc->prt[i].port = prt[i]; + + dprm.rx_offload = prt[i].rx_offload; + dprm.tx_offload = prt[i].tx_offload; + dprm.local_addr4.s_addr = prt[i].ipv4; + memcpy(&dprm.local_addr6, &prt[i].ipv6, sizeof(prt[i].ipv6)); + + lc->prt[i].dev = tle_udp_add_dev(lc->ctx, &dprm); + RTE_LOG(NOTICE, USER1, "%s(lcore=%u, port=%u), udp_dev: %p;\n", + __func__, lc->id, prt[i].id, lc->prt[i].dev); + if (lc->prt[i].dev == NULL) + rc = -rte_errno; + } + + if (rc != 0) { + RTE_LOG(ERR, USER1, "%s(lcore=%u) failed with error code: %d\n", + __func__, lc->id, rc); + tle_udp_destroy(lc->ctx); + rte_ip_frag_table_destroy(lc->ftbl); + rte_lpm_free(lc->lpm4); + rte_lpm6_free(lc->lpm6); + } + + return rc; +} + +static int +prt_lcore_cmp(const void *s1, const void *s2) +{ + const struct netbe_port *p1, *p2; + + p1 = s1; + p2 = s2; + return p1->lcore - p2->lcore; +} + +static void +netbe_lcore_init(struct netbe_cfg *cfg, const struct tle_udp_ctx_param *ctx_prm) +{ + int32_t rc; + uint32_t i, k, n, num; + struct netbe_port sp[RTE_DIM(cfg->prt)]; + + num = cfg->prt_num; + memcpy(sp, cfg->prt, sizeof(sp[0]) * num); + qsort(sp, num, sizeof(sp[0]), prt_lcore_cmp); + + /* Fill ports to be used by each lcore. */ + + k = 0; + n = 0; + rc = 0; + for (i = 0; i != num && rc == 0; i++) { + if (sp[n].lcore != sp[i].lcore) { + rc = lcore_init(cfg->cpu + k, ctx_prm, sp + n, i - n); + n = i; + k++; + } + } + + if (rc == 0 && i != n) { + rc = lcore_init(cfg->cpu + k, ctx_prm, sp + n, i - n); + k++; + } + + if (rc != 0) + rte_exit(EXIT_FAILURE, "%s: failed with error code: %d\n", + __func__, rc); + + cfg->cpu_num = k; +} + +static void +netbe_lcore_fini(struct netbe_cfg *cfg) +{ + uint32_t i; + + for (i = 0; i != cfg->cpu_num; i++) { + tle_udp_destroy(cfg->cpu[i].ctx); + rte_ip_frag_table_destroy(cfg->cpu[i].ftbl); + rte_lpm_free(cfg->cpu[i].lpm4); + rte_lpm6_free(cfg->cpu[i].lpm6); + } + + memset(cfg->cpu, 0, sizeof(cfg->cpu)); + cfg->cpu_num = 0; +} + +static int +netbe_add_dest(struct netbe_lcore *lc, uint32_t dev_idx, uint16_t family, + const struct netbe_dest *dst, uint32_t dnum) +{ + int32_t rc, sid; + uint16_t l3_type; + uint32_t i, n, m; + struct tle_udp_dest *dp; + + if (family == AF_INET) { + n = lc->dst4_num; + dp = lc->dst4 + n; + m = RTE_DIM(lc->dst4); + l3_type = ETHER_TYPE_IPv4; + } else { + n = lc->dst6_num; + dp = lc->dst6 + n; + m = RTE_DIM(lc->dst6); + l3_type = ETHER_TYPE_IPv6; + } + + if (n + dnum >= m) { + RTE_LOG(ERR, USER1, "%s(lcore=%u, family=%hu, dnum=%u) exceeds " + "maximum allowed number of destinations(%u);\n", + __func__, lc->id, family, dnum, m); + return -ENOSPC; + } + + sid = rte_lcore_to_socket_id(lc->id); + rc = 0; + + for (i = 0; i != dnum && rc == 0; i++) { + fill_dst(dp + i, lc->prt + dev_idx, dst + i, l3_type, sid); + if (family == AF_INET) + rc = netbe_add_ipv4_route(lc, dst + i, n + i); + else + rc = netbe_add_ipv6_route(lc, dst + i, n + i); + } + + if (family == AF_INET) + lc->dst4_num = n + i; + else + lc->dst6_num = n + i; + + return rc; +} + +static int +netbe_port2lcore(struct netbe_cfg *cfg, uint32_t port, struct netbe_lcore **plc) +{ + uint32_t i, j; + struct netbe_lcore *lc; + + for (i = 0; i != cfg->cpu_num; i++) { + lc = cfg->cpu + i; + for (j = 0; j != cfg->prt_num; j++) { + if (lc->prt[j].port.id == port) { + *plc = lc; + return j; + } + } + } + + return -ENOENT; +} + +static int +netbe_dest_cmp(const void *s1, const void *s2) +{ + const struct netbe_dest *p1, *p2; + + p1 = s1; + p2 = s2; + if (p1->port == p2->port) + return p1->family - p2->family; + else + return p1->port - p2->port; +} + +static int +netbe_dest_init(const char *fname, struct netbe_cfg *cfg) +{ + int32_t rc; + uint32_t f, i, j, p; + struct netbe_lcore *lc; + struct netbe_dest_prm prm; + + rc = netbe_parse_dest(fname, &prm); + if (rc != 0) + return rc; + + qsort(prm.dest, prm.nb_dest, sizeof(prm.dest[0]), netbe_dest_cmp); + + rc = 0; + for (i = 0; i != prm.nb_dest; i = j) { + + p = prm.dest[i].port; + f = prm.dest[i].family; + for (j = i + 1; j != prm.nb_dest && p == prm.dest[j].port && + f == prm.dest[j].family; + j++) + ; + + rc = netbe_port2lcore(cfg, p, &lc); + if (rc < 0) { + RTE_LOG(ERR, USER1, "%s(%s) error at line %u: " + "port %u not managed by any lcore;\n", + __func__, fname, prm.dest[i].line, p); + break; + } + + rc = netbe_add_dest(lc, rc, f, prm.dest + i, j - i); + if (rc != 0) + break; + } + + free(prm.dest); + return rc; +} + +static void +netfe_stream_close(struct netfe_lcore *fe, uint32_t dec) +{ + uint32_t sidx; + + fe->sidx -= dec; + sidx = fe->sidx; + tle_event_free(fe->fs[sidx].txev); + tle_event_free(fe->fs[sidx].rxev); + tle_udp_stream_close(fe->fs[sidx].s); + memset(&fe->fs[sidx], 0, sizeof(fe->fs[sidx])); +} + +static void +netfe_stream_dump(const struct netfe_stream *fes) +{ + struct sockaddr_in *l4, *r4; + struct sockaddr_in6 *l6, *r6; + uint16_t lport, rport; + struct tle_udp_stream_param sprm; + char laddr[INET6_ADDRSTRLEN]; + char raddr[INET6_ADDRSTRLEN]; + + tle_udp_stream_get_param(fes->s, &sprm); + + if (sprm.local_addr.ss_family == AF_INET) { + + l4 = (struct sockaddr_in *)&sprm.local_addr; + r4 = (struct sockaddr_in *)&sprm.remote_addr; + + lport = l4->sin_port; + rport = r4->sin_port; + + } else if (sprm.local_addr.ss_family == AF_INET6) { + + l6 = (struct sockaddr_in6 *)&sprm.local_addr; + r6 = (struct sockaddr_in6 *)&sprm.remote_addr; + + lport = l6->sin6_port; + rport = r6->sin6_port; + + } else { + RTE_LOG(ERR, USER1, "stream@%p - unknown family=%hu\n", + fes->s, sprm.local_addr.ss_family); + return; + } + + format_addr(&sprm.local_addr, laddr, sizeof(laddr)); + format_addr(&sprm.remote_addr, raddr, sizeof(raddr)); + + RTE_LOG(INFO, USER1, + "stream@%p={" + "family=%hu,laddr=%s,lport=%hu,raddr=%s,rport=%hu," + "stats={" + "rxp=%" PRIu64 ",txp=%" PRIu64 ",drops=%" PRIu64 "," + "rxev[IDLE, DOWN, UP]=[%" PRIu64 ", %" PRIu64 ", %" PRIu64 "]," + "txev[IDLE, DOWN, UP]=[%" PRIu64 ", %" PRIu64 ", %" PRIu64 "]," + "}};\n", + fes->s, + sprm.local_addr.ss_family, + laddr, ntohs(lport), raddr, ntohs(rport), + fes->stat.rxp, fes->stat.txp, fes->stat.drops, + fes->stat.rxev[TLE_SEV_IDLE], + fes->stat.rxev[TLE_SEV_DOWN], + fes->stat.rxev[TLE_SEV_UP], + fes->stat.txev[TLE_SEV_IDLE], + fes->stat.txev[TLE_SEV_DOWN], + fes->stat.txev[TLE_SEV_UP]); +} + + +/* + * helper function: opens IPv4 and IPv6 streams for selected port. + */ +static struct netfe_stream * +netfe_stream_open(struct netfe_lcore *fe, struct tle_udp_stream_param *sprm, + uint32_t lcore, uint16_t op, uint32_t bidx) +{ + int32_t rc; + uint32_t sidx; + struct netfe_stream *fes; + + sidx = fe->sidx; + fes = fe->fs + sidx; + if (sidx >= fe->snum) { + rte_errno = ENOBUFS; + return NULL; + } + + fes->rxev = tle_event_alloc(fe->rxeq, &fe->fs[sidx]); + fes->txev = tle_event_alloc(fe->txeq, &fe->fs[sidx]); + sprm->recv_ev = fes->rxev; + if (op != FWD) + sprm->send_ev = fes->txev; + + RTE_LOG(ERR, USER1, "%s(%u) [%u]={op=%hu, rxev=%p, txev=%p}\n", + __func__, lcore, sidx, op, fes->rxev, fes->txev); + if (fes->rxev == NULL || fes->txev == NULL) { + netfe_stream_close(fe, 0); + rte_errno = ENOMEM; + return NULL; + } + + if (op == TXONLY || op == FWD) { + tle_event_active(fes->txev, TLE_SEV_DOWN); + fes->stat.txev[TLE_SEV_DOWN]++; + } + + if (op != TXONLY) { + tle_event_active(fes->rxev, TLE_SEV_DOWN); + fes->stat.rxev[TLE_SEV_DOWN]++; + } + + fes->s = tle_udp_stream_open(becfg.cpu[bidx].ctx, sprm); + if (fes->s == NULL) { + rc = rte_errno; + netfe_stream_close(fe, 0); + rte_errno = rc; + return NULL; + } + + fes->op = op; + fes->family = sprm->local_addr.ss_family; + + fe->sidx = sidx + 1; + return fes; + +} + +static inline int +netfe_addr_eq(struct sockaddr_storage *l, struct sockaddr_storage *r, + uint16_t family) +{ + struct sockaddr_in *l4, *r4; + struct sockaddr_in6 *l6, *r6; + + if (family == AF_INET) { + l4 = (struct sockaddr_in *)l; + r4 = (struct sockaddr_in *)r; + return (l4->sin_port == r4->sin_port && + l4->sin_addr.s_addr == r4->sin_addr.s_addr); + } else { + l6 = (struct sockaddr_in6 *)l; + r6 = (struct sockaddr_in6 *)r; + return (l6->sin6_port == r6->sin6_port && + memcmp(&l6->sin6_addr, &r6->sin6_addr, + sizeof(l6->sin6_addr))); + } +} + +static inline void +netfe_pkt_addr(const struct rte_mbuf *m, struct sockaddr_storage *ps, + uint16_t family) +{ + const struct ipv4_hdr *ip4h; + const struct ipv6_hdr *ip6h; + const struct udp_hdr *udph; + struct sockaddr_in *in4; + struct sockaddr_in6 *in6; + + NETFE_PKT_DUMP(m); + + udph = rte_pktmbuf_mtod_offset(m, struct udp_hdr *, -m->l4_len); + + if (family == AF_INET) { + in4 = (struct sockaddr_in *)ps; + ip4h = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, + -(m->l4_len + m->l3_len)); + in4->sin_port = udph->src_port; + in4->sin_addr.s_addr = ip4h->src_addr; + } else { + in6 = (struct sockaddr_in6 *)ps; + ip6h = rte_pktmbuf_mtod_offset(m, struct ipv6_hdr *, + -(m->l4_len + m->l3_len)); + in6->sin6_port = udph->src_port; + rte_memcpy(&in6->sin6_addr, ip6h->src_addr, + sizeof(in6->sin6_addr)); + } +} + +static inline uint32_t +pkt_eq_addr(struct rte_mbuf *pkt[], uint32_t num, uint16_t family, + struct sockaddr_storage *cur, struct sockaddr_storage *nxt) +{ + uint32_t i; + + for (i = 0; i != num; i++) { + netfe_pkt_addr(pkt[i], nxt, family); + if (netfe_addr_eq(cur, nxt, family) == 0) + break; + } + + return i; +} + +static inline void +pkt_buf_empty(struct pkt_buf *pb) +{ + uint32_t i; + + for (i = 0; i != pb->num; i++) + rte_pktmbuf_free(pb->pkt[i]); + + pb->num = 0; +} + +static inline void +pkt_buf_fill(uint32_t lcore, struct pkt_buf *pb, uint32_t dlen) +{ + uint32_t i; + int32_t sid; + + sid = rte_lcore_to_socket_id(lcore) + 1; + + for (i = pb->num; i != RTE_DIM(pb->pkt); i++) { + pb->pkt[i] = rte_pktmbuf_alloc(mpool[sid]); + if (pb->pkt[i] == NULL) + break; + rte_pktmbuf_append(pb->pkt[i], dlen); + } + + pb->num = i; +} + +static struct netfe_stream * +find_fwd_dst(uint32_t lcore, struct netfe_stream *fes, + const struct sockaddr *sa) +{ + uint32_t rc; + struct netfe_stream *fed; + struct netfe_lcore *fe; + struct tle_udp_stream_param sprm; + + fe = RTE_PER_LCORE(_fe); + + fed = fwd_tbl_lkp(fe, fes->family, sa); + if (fed != NULL) + return fed; + + /* create a new stream and put it into the fwd table. */ + + sprm = fes->fwdprm.prm; + + /* open forward stream with wildcard remote addr. */ + memset(&sprm.remote_addr.ss_family + 1, 0, + sizeof(sprm.remote_addr) - sizeof(sprm.remote_addr.ss_family)); + fed = netfe_stream_open(fe, &sprm, lcore, FWD, fes->fwdprm.bidx); + if (fed == NULL) + return NULL; + + rc = fwd_tbl_add(fe, fes->family, sa, fed); + if (rc != 0) { + netfe_stream_close(fe, 1); + fed = NULL; + } + + fed->fwdprm.prm.remote_addr = *(const struct sockaddr_storage *)sa; + return fed; +} + +static inline void +netfe_tx_process(uint32_t lcore, struct netfe_stream *fes) +{ + uint32_t i, k, n; + + /* refill with new mbufs. */ + pkt_buf_fill(lcore, &fes->pbuf, fes->txlen); + + n = fes->pbuf.num; + if (n == 0) + return; + + k = tle_udp_stream_send(fes->s, fes->pbuf.pkt, n, NULL); + NETFE_TRACE("%s(%u): tle_udp_stream_send(%p, %u) returns %u\n", + __func__, lcore, fes->s, n, k); + fes->stat.txp += k; + fes->stat.drops += n - k; + + if (k == 0) + return; + + /* adjust pbuf array. */ + fes->pbuf.num = n - k; + for (i = k; i != n; i++) + fes->pbuf.pkt[i - k] = fes->pbuf.pkt[i]; +} + + +static inline void +netfe_fwd(uint32_t lcore, struct netfe_stream *fes) +{ + uint32_t i, j, k, n, x; + uint16_t family; + void *pi0, *pi1, *pt; + struct rte_mbuf **pkt; + struct netfe_stream *fed; + struct sockaddr_storage in[2]; + + family = fes->family; + n = fes->pbuf.num; + pkt = fes->pbuf.pkt; + + if (n == 0) + return; + + in[0].ss_family = family; + in[1].ss_family = family; + pi0 = &in[0]; + pi1 = &in[1]; + + netfe_pkt_addr(pkt[0], pi0, family); + + x = 0; + for (i = 0; i != n; i = j) { + + j = i + pkt_eq_addr(&pkt[i + 1], + n - i - 1, family, pi0, pi1) + 1; + + fed = find_fwd_dst(lcore, fes, (const struct sockaddr *)pi0); + if (fed != NULL) { + + k = tle_udp_stream_send(fed->s, pkt + i, j - i, + (const struct sockaddr *) + &fes->fwdprm.prm.remote_addr); + + NETFE_TRACE("%s(%u): tle_udp_stream_send(%p, %u) " + "returns %u\n", + __func__, lcore, fed->s, j - i, k); + fed->stat.txp += k; + fed->stat.drops += j - i - k; + fes->stat.fwp += k; + + } else { + NETFE_TRACE("%s(%u, %p): no fwd stream for %u pkts;\n", + __func__, lcore, fes->s, j - i); + for (k = i; k != j; k++) { + NETFE_TRACE("%s(%u, %p): free(%p);\n", + __func__, lcore, fes->s, pkt[k]); + rte_pktmbuf_free(pkt[j]); + } + fes->stat.drops += j - i; + } + + /* copy unforwarded mbufs. */ + for (i += k; i != j; i++, x++) + pkt[x] = pkt[i]; + + /* swap the pointers */ + pt = pi0; + pi0 = pi1; + pi1 = pt; + } + + fes->pbuf.num = x; + + if (x != 0) { + tle_event_raise(fes->txev); + fes->stat.txev[TLE_SEV_UP]++; + } + + if (n == RTE_DIM(fes->pbuf.pkt)) { + tle_event_active(fes->rxev, TLE_SEV_UP); + fes->stat.rxev[TLE_SEV_UP]++; + } +} + +static inline void +netfe_rx_process(__rte_unused uint32_t lcore, struct netfe_stream *fes) +{ + uint32_t k, n; + + n = fes->pbuf.num; + k = RTE_DIM(fes->pbuf.pkt) - n; + + /* packet buffer is full, can't receive any new packets. */ + if (k == 0) { + tle_event_idle(fes->rxev); + fes->stat.rxev[TLE_SEV_IDLE]++; + return; + } + + n = tle_udp_stream_recv(fes->s, fes->pbuf.pkt + n, k); + if (n == 0) + return; + + NETFE_TRACE("%s(%u): tle_udp_stream_recv(%p, %u) returns %u\n", + __func__, lcore, fes->s, k, n); + + fes->pbuf.num += n; + fes->stat.rxp += n; + + /* free all received mbufs. */ + if (fes->op == RXONLY) + pkt_buf_empty(&fes->pbuf); + /* mark stream as writable */ + else if (k == RTE_DIM(fes->pbuf.pkt)) { + if (fes->op == RXTX) { + tle_event_active(fes->txev, TLE_SEV_UP); + fes->stat.txev[TLE_SEV_UP]++; + } else if (fes->op == FWD) { + tle_event_raise(fes->txev); + fes->stat.txev[TLE_SEV_UP]++; + } + } +} + +static inline void +netfe_rxtx_process(__rte_unused uint32_t lcore, struct netfe_stream *fes) +{ + uint32_t i, j, k, n; + uint16_t family; + void *pi0, *pi1, *pt; + struct rte_mbuf **pkt; + struct sockaddr_storage in[2]; + + family = fes->family; + n = fes->pbuf.num; + pkt = fes->pbuf.pkt; + + /* there is nothing to send. */ + if (n == 0) { + tle_event_idle(fes->txev); + fes->stat.txev[TLE_SEV_IDLE]++; + return; + } + + in[0].ss_family = family; + in[1].ss_family = family; + pi0 = &in[0]; + pi1 = &in[1]; + + netfe_pkt_addr(pkt[0], pi0, family); + + for (i = 0; i != n; i = j) { + + j = i + pkt_eq_addr(&pkt[i + 1], + n - i - 1, family, pi0, pi1) + 1; + + k = tle_udp_stream_send(fes->s, pkt + i, j - i, + (const struct sockaddr *)pi0); + + NETFE_TRACE("%s(%u): tle_udp_stream_send(%p, %u) returns %u\n", + __func__, lcore, fes->s, j - i, k); + fes->stat.txp += k; + fes->stat.drops += j - i - k; + + i += k; + + /* stream send buffer is full */ + if (i != j) + break; + + /* swap the pointers */ + pt = pi0; + pi0 = pi1; + pi1 = pt; + } + + /* not able to send anything. */ + if (i == 0) + return; + + if (n == RTE_DIM(fes->pbuf.pkt)) { + /* mark stream as readable */ + tle_event_active(fes->rxev, TLE_SEV_UP); + fes->stat.rxev[TLE_SEV_UP]++; + } + + /* adjust pbuf array. */ + fes->pbuf.num = n - i; + for (j = i; j != n; j++) + pkt[j - i] = pkt[j]; +} + +static int +netfe_lcore(void *arg) +{ + size_t sz; + int32_t rc; + uint32_t i, j, n, lcore, snum; + const struct netfe_lcore_prm *prm; + struct netfe_lcore *fe; + struct tle_evq_param eprm; + struct tle_udp_stream_param sprm; + struct netfe_stream *fes, *fs[MAX_PKT_BURST]; + + lcore = rte_lcore_id(); + prm = arg; + + snum = prm->max_streams; + RTE_LOG(NOTICE, USER1, "%s(lcore=%u, nb_streams=%u, max_streams=%u)\n", + __func__, lcore, prm->nb_streams, snum); + + memset(&eprm, 0, sizeof(eprm)); + eprm.socket_id = rte_lcore_to_socket_id(lcore); + eprm.max_events = snum; + + sz = sizeof(*fe) + snum * sizeof(fe->fs[0]); + fe = rte_zmalloc_socket(NULL, sz, RTE_CACHE_LINE_SIZE, + rte_lcore_to_socket_id(lcore)); + + if (fe == NULL) { + RTE_LOG(ERR, USER1, "%s:%d failed to allocate %zu bytes\n", + __func__, __LINE__, sz); + return -ENOMEM; + } + + RTE_PER_LCORE(_fe) = fe; + + fe->snum = snum; + fe->fs = (struct netfe_stream *)(fe + 1); + + fe->rxeq = tle_evq_create(&eprm); + fe->txeq = tle_evq_create(&eprm); + + RTE_LOG(ERR, USER1, "%s(%u) rx evq=%p, tx evq=%p\n", + __func__, lcore, fe->rxeq, fe->txeq); + if (fe->rxeq == NULL || fe->txeq == NULL) + return -ENOMEM; + + rc = fwd_tbl_init(fe, AF_INET, lcore); + RTE_LOG(ERR, USER1, "%s(%u) fwd_tbl_init(%u) returns %d\n", + __func__, lcore, AF_INET, rc); + if (rc != 0) + return rc; + + rc = fwd_tbl_init(fe, AF_INET6, lcore); + RTE_LOG(ERR, USER1, "%s(%u) fwd_tbl_init(%u) returns %d\n", + __func__, lcore, AF_INET6, rc); + if (rc != 0) + return rc; + + /* open all requested streams. */ + for (i = 0; i != prm->nb_streams; i++) { + sprm = prm->stream[i].sprm.prm; + fes = netfe_stream_open(fe, &sprm, lcore, prm->stream[i].op, + prm->stream[i].sprm.bidx); + if (fes == NULL) { + rc = -rte_errno; + break; + } + + netfe_stream_dump(fes); + + if (prm->stream[i].op == FWD) { + fes->fwdprm = prm->stream[i].fprm; + rc = fwd_tbl_add(fe, + prm->stream[i].fprm.prm.remote_addr.ss_family, + (const struct sockaddr *) + &prm->stream[i].fprm.prm.remote_addr, + fes); + if (rc != 0) { + netfe_stream_close(fe, 1); + break; + } + } else if (prm->stream[i].op == TXONLY) { + fes->txlen = prm->stream[i].txlen; + fes->raddr = sprm.remote_addr; + } + } + + while (fe->sidx >= prm->nb_streams && force_quit == 0) { + + n = tle_evq_get(fe->rxeq, (const void **)(uintptr_t)fs, + RTE_DIM(fs)); + + if (n != 0) { + NETFE_TRACE("%s(%u): tle_evq_get(rxevq=%p) " + "returns %u\n", + __func__, lcore, fe->rxeq, n); + for (j = 0; j != n; j++) + netfe_rx_process(lcore, fs[j]); + } + + n = tle_evq_get(fe->txeq, (const void **)(uintptr_t)fs, + RTE_DIM(fs)); + + if (n != 0) { + NETFE_TRACE("%s(%u): tle_evq_get(txevq=%p) " + "returns %u\n", + __func__, lcore, fe->txeq, n); + for (j = 0; j != n; j++) { + if (fs[j]->op == RXTX) + netfe_rxtx_process(lcore, fs[j]); + else if (fs[j]->op == FWD) + netfe_fwd(lcore, fs[j]); + else if (fs[j]->op == TXONLY) + netfe_tx_process(lcore, fs[j]); + } + } + } + + RTE_LOG(NOTICE, USER1, "%s(lcore=%u) finish\n", + __func__, lcore); + + while (fe->sidx != 0) { + + i = fe->sidx - 1; + netfe_stream_dump(fe->fs + i); + netfe_stream_close(fe, 1); + } + + tle_evq_destroy(fe->txeq); + tle_evq_destroy(fe->rxeq); + rte_free(fe); + + return rc; +} + +static inline void +netbe_rx(struct netbe_lcore *lc, uint32_t pidx) +{ + uint32_t j, k, n; + struct rte_mbuf *pkt[MAX_PKT_BURST]; + struct rte_mbuf *rp[MAX_PKT_BURST]; + int32_t rc[MAX_PKT_BURST]; + + n = rte_eth_rx_burst(lc->prt[pidx].port.id, + lc->prt[pidx].rxqid, pkt, RTE_DIM(pkt)); + if (n == 0) + return; + + lc->prt[pidx].rx_stat.in += n; + NETBE_TRACE("%s(%u): rte_eth_rx_burst(%u, %u) returns %u\n", + __func__, lc->id, lc->prt[pidx].port.id, lc->prt[pidx].rxqid, + n); + + k = tle_udp_rx_bulk(lc->prt[pidx].dev, pkt, rp, rc, n); + + lc->prt[pidx].rx_stat.up += k; + lc->prt[pidx].rx_stat.drop += n - k; + NETBE_TRACE("%s(%u): tle_udp_rx_bulk(%p, %u) returns %u\n", + __func__, lc->id, lc->prt[pidx].dev, n, k); + + for (j = 0; j != n - k; j++) { + NETBE_TRACE("%s:%d(port=%u) rp[%u]={%p, %d};\n", + __func__, __LINE__, lc->prt[pidx].port.id, + j, rp[j], rc[j]); + rte_pktmbuf_free(rp[j]); + } +} + +static inline void +netbe_tx(struct netbe_lcore *lc, uint32_t pidx) +{ + uint32_t j, k, n; + struct rte_mbuf **mb; + + n = lc->prt[pidx].tx_buf.num; + k = RTE_DIM(lc->prt[pidx].tx_buf.pkt) - n; + mb = lc->prt[pidx].tx_buf.pkt; + + if (k >= RTE_DIM(lc->prt[pidx].tx_buf.pkt) / 2) { + j = tle_udp_tx_bulk(lc->prt[pidx].dev, mb + n, k); + n += j; + lc->prt[pidx].tx_stat.down += j; + } + + if (n == 0) + return; + + NETBE_TRACE("%s(%u): tle_udp_tx_bulk(%p) returns %u,\n" + "total pkts to send: %u\n", + __func__, lc->id, lc->prt[pidx].dev, j, n); + + for (j = 0; j != n; j++) + NETBE_PKT_DUMP(mb[j]); + + k = rte_eth_tx_burst(lc->prt[pidx].port.id, + lc->prt[pidx].txqid, mb, n); + + lc->prt[pidx].tx_stat.out += k; + lc->prt[pidx].tx_stat.drop += n - k; + NETBE_TRACE("%s(%u): rte_eth_tx_burst(%u, %u, %u) returns %u\n", + __func__, lc->id, lc->prt[pidx].port.id, lc->prt[pidx].txqid, + n, k); + + lc->prt[pidx].tx_buf.num = n - k; + if (k != 0) + for (j = k; j != n; j++) + mb[j - k] = mb[j]; +} + +static int +netbe_lcore(void *arg) +{ + uint32_t i, j; + int32_t rc; + struct netbe_lcore *lc; + + lc = arg; + RTE_LOG(NOTICE, USER1, "%s(lcore=%u, udp_ctx: %p) start\n", + __func__, lc->id, lc->ctx); + + /* + * ??????? + * wait for FE lcores to start, so BE dont' drop any packets + * because corresponding streams not opened yet by FE. + * usefull when used with pcap PMDS. + * think better way, or should this timeout be a cmdlien parameter. + * ??????? + */ + rte_delay_ms(10); + + for (i = 0; i != lc->prt_num; i++) { + RTE_LOG(NOTICE, USER1, "%s:%u(port=%u, udp_dev: %p)\n", + __func__, i, lc->prt[i].port.id, lc->prt[i].dev); + rc = setup_rx_cb(&lc->prt[i].port, lc); + if (rc < 0) + sig_handle(SIGQUIT); + } + + while (force_quit == 0) { + for (i = 0; i != lc->prt_num; i++) { + netbe_rx(lc, i); + netbe_tx(lc, i); + } + } + + RTE_LOG(NOTICE, USER1, "%s(lcore=%u, udp_ctx: %p) finish\n", + __func__, lc->id, lc->ctx); + for (i = 0; i != lc->prt_num; i++) { + RTE_LOG(NOTICE, USER1, "%s:%u(port=%u) " + "rx_stats={" + "in=%" PRIu64 ",up=%" PRIu64 ",drop=%" PRIu64 "}, " + "tx_stats={" + "in=%" PRIu64 ",up=%" PRIu64 ",drop=%" PRIu64 "};\n", + __func__, i, lc->prt[i].port.id, + lc->prt[i].rx_stat.in, + lc->prt[i].rx_stat.up, + lc->prt[i].rx_stat.drop, + lc->prt[i].tx_stat.down, + lc->prt[i].tx_stat.out, + lc->prt[i].tx_stat.drop); + } + + + for (i = 0; i != lc->prt_num; i++) { + for (j = 0; j != lc->prt[i].tx_buf.num; j++) + rte_pktmbuf_free(lc->prt[i].tx_buf.pkt[j]); + } + + return 0; +} + +static int +netfe_lcore_cmp(const void *s1, const void *s2) +{ + const struct netfe_stream_prm *p1, *p2; + + p1 = s1; + p2 = s2; + return p1->lcore - p2->lcore; +} + +/* + * Helper functions, finds BE by given local and remote addresses. + */ +static int +netbe_find4(const struct in_addr *laddr, const struct in_addr *raddr) +{ + uint32_t i, j; + int32_t rc; + uint32_t idx; + struct netbe_lcore *bc; + + if (laddr->s_addr == INADDR_ANY) { + + /* we have exactly one BE, use it for all traffic */ + if (becfg.cpu_num == 1) + return 0; + + /* search by remote address. */ + for (i = 0; i != becfg.cpu_num; i++) { + bc = becfg.cpu + i; + rc = rte_lpm_lookup(bc->lpm4, + rte_be_to_cpu_32(raddr->s_addr), &idx); + if (rc == 0) + return i; + } + } else { + + /* search by local address */ + for (i = 0; i != becfg.cpu_num; i++) { + bc = becfg.cpu + i; + for (j = 0; j != bc->prt_num; j++) + if (laddr->s_addr == bc->prt[j].port.ipv4) + return i; + } + } + + return -ENOENT; +} + +static int +netbe_find6(const struct in6_addr *laddr, const struct in6_addr *raddr) +{ + uint32_t i, j; + int32_t rc; + uint8_t idx; + struct netbe_lcore *bc; + + if (memcmp(laddr, &in6addr_any, sizeof(*laddr)) == 0) { + + /* we have exactly one BE, use it for all traffic */ + if (becfg.cpu_num == 1) + return 0; + + /* search by remote address. */ + for (i = 0; i != becfg.cpu_num; i++) { + bc = becfg.cpu + i; + rc = rte_lpm6_lookup(bc->lpm6, + (uint8_t *)(uintptr_t)raddr->s6_addr, &idx); + if (rc == 0) + return i; + } + } else { + /* search by local address */ + for (i = 0; i != becfg.cpu_num; i++) { + bc = becfg.cpu + i; + for (j = 0; j != bc->prt_num; j++) + if (memcmp(laddr, &bc->prt[j].port.ipv6, + sizeof(*laddr)) == 0) + return i; + } + } + + return -ENOENT; +} + +static int +netbe_find(const struct tle_udp_stream_param *p) +{ + const struct sockaddr_in *l4, *r4; + const struct sockaddr_in6 *l6, *r6; + + if (p->local_addr.ss_family == AF_INET) { + l4 = (const struct sockaddr_in *)&p->local_addr; + r4 = (const struct sockaddr_in *)&p->remote_addr; + return netbe_find4(&l4->sin_addr, &r4->sin_addr); + } else if (p->local_addr.ss_family == AF_INET6) { + l6 = (const struct sockaddr_in6 *)&p->local_addr; + r6 = (const struct sockaddr_in6 *)&p->remote_addr; + return netbe_find6(&l6->sin6_addr, &r6->sin6_addr); + } + return -EINVAL; +} + +static int +netfe_sprm_flll_be(struct netfe_sprm *sp, uint32_t line) +{ + int32_t bidx; + + bidx = netbe_find(&sp->prm); + if (bidx < 0) { + RTE_LOG(ERR, USER1, "%s(line=%u): no BE for that stream\n", + __func__, line); + return -EINVAL; + } + sp->bidx = bidx; + return 0; +} + +/* start front-end processing. */ +static int +netfe_launch(struct netfe_lcore_prm *lprm) +{ + uint32_t i, j, k, lc, ln, mi; + struct netfe_lcore_prm feprm[RTE_MAX_LCORE]; + + /* determine on what BE each stream should be open. */ + for (i = 0; i != lprm->nb_streams; i++) { + + lc = lprm->stream[i].lcore; + ln = lprm->stream[i].line; + + if (netfe_sprm_flll_be(&lprm->stream[i].sprm, ln) != 0 || + (lprm->stream[i].op == FWD && + netfe_sprm_flll_be(&lprm->stream[i].fprm, + ln) != 0)) + return -EINVAL; + } + + /* group all fe parameters by lcore. */ + + memset(feprm, 0, sizeof(feprm)); + qsort(lprm->stream, lprm->nb_streams, sizeof(lprm->stream[0]), + netfe_lcore_cmp); + + k = 0; + mi = UINT32_MAX; + for (i = 0; i != lprm->nb_streams; i = j) { + + lc = lprm->stream[i].lcore; + ln = lprm->stream[i].line; + + if (rte_lcore_is_enabled(lc) == 0) { + RTE_LOG(ERR, USER1, + "%s(line=%u): lcore %u is not enabled\n", + __func__, ln, lc); + return -EINVAL; + } + + if (rte_get_master_lcore() == lc) + mi = k; + else if (rte_eal_get_lcore_state(lc) == RUNNING) { + RTE_LOG(ERR, USER1, + "%s(line=%u): lcore %u already in use\n", + __func__, ln, lc); + return -EINVAL; + } + + for (j = i + 1; j != lprm->nb_streams && + lc == lprm->stream[j].lcore; + j++) + ; + + feprm[k].max_streams = lprm->max_streams; + feprm[k].nb_streams = j - i; + feprm[k].stream = lprm->stream + i; + k++; + } + + /* launch all slave FE lcores. */ + for (i = 0; i != k; i++) { + if (i != mi) + rte_eal_remote_launch(netfe_lcore, feprm + i, + feprm[i].stream[0].lcore); + } + + /* launch FE at master lcore. */ + if (mi != UINT32_MAX) + netfe_lcore(feprm + mi); + + return 0; +} + +int +main(int argc, char *argv[]) +{ + int32_t opt, opt_idx, rc; + uint32_t i; + uint64_t v; + struct tle_udp_ctx_param ctx_prm; + struct netfe_lcore_prm feprm; + struct rte_eth_stats stats; + char fecfg_fname[PATH_MAX + 1]; + char becfg_fname[PATH_MAX + 1]; + + fecfg_fname[0] = 0; + becfg_fname[0] = 0; + + rc = rte_eal_init(argc, argv); + if (rc < 0) + rte_exit(EXIT_FAILURE, + "%s: rte_eal_init failed with error code: %d\n", + __func__, rc); + + argc -= rc; + argv += rc; + + optind = 0; + optarg = NULL; + while ((opt = getopt_long(argc, argv, "PR:S:b:f:s:", long_opt, + &opt_idx)) != EOF) { + if (opt == OPT_SHORT_PROMISC) { + becfg.promisc = 1; + } else if (opt == OPT_SHORT_RBUFS) { + rc = parse_uint_val(NULL, optarg, &v); + if (rc < 0) + rte_exit(EXIT_FAILURE, "%s: invalid value: %s " + "for option: \'%c\'\n", + __func__, optarg, opt); + ctx_prm.max_stream_rbufs = v; + } else if (opt == OPT_SHORT_SBUFS) { + rc = parse_uint_val(NULL, optarg, &v); + if (rc < 0) + rte_exit(EXIT_FAILURE, "%s: invalid value: %s " + "for option: \'%c\'\n", + __func__, optarg, opt); + ctx_prm.max_stream_sbufs = v; + } else if (opt == OPT_SHORT_STREAMS) { + rc = parse_uint_val(NULL, optarg, &v); + if (rc < 0) + rte_exit(EXIT_FAILURE, "%s: invalid value: %s " + "for option: \'%c\'\n", + __func__, optarg, opt); + ctx_prm.max_streams = v; + } else if (opt == OPT_SHORT_BECFG) { + snprintf(becfg_fname, sizeof(becfg_fname), "%s", + optarg); + } else if (opt == OPT_SHORT_FECFG) { + snprintf(fecfg_fname, sizeof(fecfg_fname), "%s", + optarg); + } else { + rte_exit(EXIT_FAILURE, + "%s: unknown option: \'%c\'\n", + __func__, opt); + } + } + + signal(SIGINT, sig_handle); + + netbe_port_init(&becfg, argc - optind, argv + optind); + netbe_lcore_init(&becfg, &ctx_prm); + + if ((rc = netbe_dest_init(becfg_fname, &becfg)) != 0) + sig_handle(SIGQUIT); + + for (i = 0; i != becfg.prt_num && rc == 0; i++) { + RTE_LOG(NOTICE, USER1, "%s: starting port %u\n", + __func__, becfg.prt[i].id); + rc = rte_eth_dev_start(becfg.prt[i].id); + if (rc != 0) { + RTE_LOG(ERR, USER1, + "%s: rte_eth_dev_start(%u) returned " + "error code: %d\n", + __func__, becfg.prt[i].id, rc); + sig_handle(SIGQUIT); + } + } + + feprm.max_streams = ctx_prm.max_streams * becfg.cpu_num; + if (rc == 0 && (rc = netfe_parse_cfg(fecfg_fname, &feprm)) != 0) + sig_handle(SIGQUIT); + + for (i = 0; rc == 0 && i != becfg.cpu_num; i++) { + rte_eal_remote_launch(netbe_lcore, becfg.cpu + i, + becfg.cpu[i].id); + } + + if (rc == 0 && (rc = netfe_launch(&feprm)) != 0) + sig_handle(SIGQUIT); + + rte_eal_mp_wait_lcore(); + + for (i = 0; i != becfg.prt_num; i++) { + RTE_LOG(NOTICE, USER1, "%s: stoping port %u\n", + __func__, becfg.prt[i].id); + rte_eth_stats_get(becfg.prt[i].id, &stats); + RTE_LOG(NOTICE, USER1, "port %u stats={\n" + "ipackets=%" PRIu64 ";" + "ibytes=%" PRIu64 ";" + "ierrors=%" PRIu64 ";\n" + "opackets=%" PRIu64 ";" + "obytes=%" PRIu64 ";" + "oerrors=%" PRIu64 ";\n" + "}\n", + becfg.prt[i].id, + stats.ipackets, + stats.ibytes, + stats.ierrors, + stats.opackets, + stats.obytes, + stats.oerrors); + rte_eth_dev_stop(becfg.prt[i].id); + } + + netbe_lcore_fini(&becfg); + + return 0; +} diff --git a/examples/udpfwd/netbe.h b/examples/udpfwd/netbe.h new file mode 100644 index 0000000..41bd452 --- /dev/null +++ b/examples/udpfwd/netbe.h @@ -0,0 +1,251 @@ +/* + * Copyright (c) 2016 Intel Corporation. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NETBE_H__ +#define __NETBE_H__ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define MAX_PKT_BURST 0x20 + +/* + * BE related structures. + */ + +struct netbe_port { + uint32_t id; + uint32_t lcore; + uint32_t mtu; + uint32_t rx_offload; + uint32_t tx_offload; + uint32_t ipv4; + struct in6_addr ipv6; + struct ether_addr mac; +}; + +struct netbe_dest { + uint32_t line; + uint32_t port; + uint32_t mtu; + uint32_t prfx; + uint16_t family; + union { + struct in_addr ipv4; + struct in6_addr ipv6; + }; + struct ether_addr mac; +}; + +struct netbe_dest_prm { + uint32_t nb_dest; + struct netbe_dest *dest; +}; + +struct pkt_buf { + uint32_t num; + struct rte_mbuf *pkt[2 * MAX_PKT_BURST]; +}; + +struct netbe_dev { + uint16_t rxqid; + uint16_t txqid; + struct netbe_port port; + struct tle_udp_dev *dev; + struct { + uint64_t in; + uint64_t up; + uint64_t drop; + } rx_stat; + struct { + uint64_t down; + uint64_t out; + uint64_t drop; + } tx_stat; + struct pkt_buf tx_buf; +}; + +/* 8 bit LPM user data. */ +#define LCORE_MAX_DST (UINT8_MAX + 1) + +struct netbe_lcore { + uint32_t id; + struct rte_lpm *lpm4; + struct rte_lpm6 *lpm6; + struct rte_ip_frag_tbl *ftbl; + struct tle_udp_ctx *ctx; + uint32_t prt_num; + uint32_t dst4_num; + uint32_t dst6_num; + struct netbe_dev prt[RTE_MAX_ETHPORTS]; + struct tle_udp_dest dst4[LCORE_MAX_DST]; + struct tle_udp_dest dst6[LCORE_MAX_DST]; + struct rte_ip_frag_death_row death_row; +}; + +struct netbe_cfg { + uint32_t promisc; + uint32_t prt_num; + uint32_t cpu_num; + struct netbe_port prt[RTE_MAX_ETHPORTS]; + struct netbe_lcore cpu[RTE_MAX_LCORE]; +}; + +/* + * FE related structures. + */ + +enum { + RXONLY, + TXONLY, + RXTX, + FWD, +}; + +struct netfe_sprm { + uint32_t bidx; /* BE index to use. */ + struct tle_udp_stream_param prm; +}; + +struct netfe_stream_prm { + uint32_t lcore; + uint32_t line; + uint16_t op; + uint16_t txlen; /* valid/used only for TXONLY op. */ + struct netfe_sprm sprm; + struct netfe_sprm fprm; /* valid/used only for FWD op. */ +}; + +struct netfe_lcore_prm { + uint32_t max_streams; + uint32_t nb_streams; + struct netfe_stream_prm *stream; +}; + +struct netfe_stream { + struct tle_udp_stream *s; + struct tle_event *rxev; + struct tle_event *txev; + uint16_t op; + uint16_t family; + uint16_t txlen; + struct { + uint64_t rxp; + uint64_t txp; + uint64_t fwp; + uint64_t drops; + uint64_t rxev[TLE_SEV_NUM]; + uint64_t txev[TLE_SEV_NUM]; + } stat; + struct pkt_buf pbuf; + struct sockaddr_storage raddr; + struct netfe_sprm fwdprm; +}; + +struct netfe_lcore { + uint32_t snum; /* max number of streams */ + uint32_t sidx; /* last open stream index */ + struct tle_evq *rxeq; + struct tle_evq *txeq; + struct rte_hash *fw4h; + struct rte_hash *fw6h; + struct netfe_stream *fs; +}; + +/* + * debug/trace macros. + */ + +#define DUMMY_MACRO do {} while (0) + +#ifdef NETFE_DEBUG +#define NETFE_TRACE(fmt, arg...) printf(fmt, ##arg) +#define NETFE_PKT_DUMP(p) rte_pktmbuf_dump(stdout, (p), 64) +#else +#define NETFE_TRACE(fmt, arg...) DUMMY_MACRO +#define NETFE_PKT_DUMP(p) DUMMY_MACRO +#endif + +#ifdef NETBE_DEBUG +#define NETBE_TRACE(fmt, arg...) printf(fmt, ##arg) +#define NETBE_PKT_DUMP(p) rte_pktmbuf_dump(stdout, (p), 64) +#else +#define NETBE_TRACE(fmt, arg...) DUMMY_MACRO +#define NETBE_PKT_DUMP(p) DUMMY_MACRO +#endif + +#define FUNC_STAT(v, c) do { \ + static uint64_t nb_call, nb_data; \ + nb_call++; \ + nb_data += (v); \ + if ((nb_call & ((c) - 1)) == 0) { \ + printf("%s#%d@%u: nb_call=%lu, avg(" #v ")=%#Lf\n", \ + __func__, __LINE__, rte_lcore_id(), nb_call, \ + (long double)nb_data / nb_call); \ + nb_call = 0; \ + nb_data = 0; \ + } \ +} while (0) + +#define FUNC_TM_STAT(v, c) do { \ + static uint64_t nb_call, nb_data; \ + static uint64_t cts, pts, sts; \ + cts = rte_rdtsc(); \ + if (pts != 0) \ + sts += cts - pts; \ + pts = cts; \ + nb_call++; \ + nb_data += (v); \ + if ((nb_call & ((c) - 1)) == 0) { \ + printf("%s#%d@%u: nb_call=%lu, " \ + "avg(" #v ")=%#Lf, " \ + "avg(cycles)=%#Lf, " \ + "avg(cycles/" #v ")=%#Lf\n", \ + __func__, __LINE__, rte_lcore_id(), nb_call, \ + (long double)nb_data / nb_call, \ + (long double)sts / nb_call, \ + (long double)sts / nb_data); \ + nb_call = 0; \ + nb_data = 0; \ + sts = 0; \ + } \ +} while (0) + +int setup_rx_cb(const struct netbe_port *uprt, struct netbe_lcore *lc); + +#endif /* __NETBE_H__ */ diff --git a/examples/udpfwd/parse.c b/examples/udpfwd/parse.c new file mode 100644 index 0000000..979145c --- /dev/null +++ b/examples/udpfwd/parse.c @@ -0,0 +1,586 @@ +/* + * Copyright (c) 2016 Intel Corporation. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "netbe.h" +#include "parse.h" + +#define DEF_LINE_NUM 0x400 + +static const struct { + const char *name; + uint16_t op; +} name2feop[] = { + { .name = "rx", .op = RXONLY,}, + { .name = "tx", .op = TXONLY,}, + { .name = "echo", .op = RXTX,}, + { .name = "fwd", .op = FWD,}, +}; + +static int +parse_ipv4_val(__rte_unused const char *key, const char *val, void *prm) +{ + union parse_val *rv; + + rv = prm; + if (inet_pton(AF_INET, val, &rv->in.addr4) != 1) + return -EINVAL; + rv->in.family = AF_INET; + return 0; +} + +static int +parse_ipv6_val(__rte_unused const char *key, const char *val, void *prm) +{ + union parse_val *rv; + + rv = prm; + if (inet_pton(AF_INET6, val, &rv->in.addr6) != 1) + return -EINVAL; + rv->in.family = AF_INET6; + return 0; +} + +static int +parse_ip_val(__rte_unused const char *key, const char *val, void *prm) +{ + if (parse_ipv6_val(key, val, prm) != 0 && + parse_ipv4_val(key, val, prm) != 0) + return -EINVAL; + return 0; +} + + +#define PARSE_UINT8x16(s, v, l) \ +do { \ + char *end; \ + unsigned long t; \ + errno = 0; \ + t = strtoul((s), &end, 16); \ + if (errno != 0 || end[0] != (l) || t > UINT8_MAX) \ + return -EINVAL; \ + (s) = end + 1; \ + (v) = t; \ +} while (0) + +static int +parse_mac_val(__rte_unused const char *key, const char *val, void *prm) +{ + union parse_val *rv; + const char *s; + + rv = prm; + s = val; + + PARSE_UINT8x16(s, rv->mac.addr_bytes[0], ':'); + PARSE_UINT8x16(s, rv->mac.addr_bytes[1], ':'); + PARSE_UINT8x16(s, rv->mac.addr_bytes[2], ':'); + PARSE_UINT8x16(s, rv->mac.addr_bytes[3], ':'); + PARSE_UINT8x16(s, rv->mac.addr_bytes[4], ':'); + PARSE_UINT8x16(s, rv->mac.addr_bytes[5], 0); + return 0; +} + +static int +parse_feop_val(__rte_unused const char *key, const char *val, void *prm) +{ + uint32_t i; + union parse_val *rv; + + rv = prm; + for (i = 0; i != RTE_DIM(name2feop); i++) { + if (strcmp(val, name2feop[i].name) == 0) { + rv->u64 = name2feop[i].op; + return 0; + } + } + + return -EINVAL; +} + +static int +parse_kvargs(const char *arg, const char *keys_man[], uint32_t nb_man, + const char *keys_opt[], uint32_t nb_opt, + const arg_handler_t hndl[], union parse_val val[]) +{ + uint32_t j, k; + struct rte_kvargs *kvl; + + kvl = rte_kvargs_parse(arg, NULL); + if (kvl == NULL) { + RTE_LOG(ERR, USER1, + "%s: invalid parameter: %s\n", + __func__, arg); + return -EINVAL; + } + + for (j = 0; j != nb_man; j++) { + if (rte_kvargs_count(kvl, keys_man[j]) == 0) { + RTE_LOG(ERR, USER1, + "%s: %s missing mandatory key: %s\n", + __func__, arg, keys_man[j]); + rte_kvargs_free(kvl); + return -EINVAL; + } + } + + for (j = 0; j != nb_man; j++) { + if (rte_kvargs_process(kvl, keys_man[j], hndl[j], + val + j) != 0) { + RTE_LOG(ERR, USER1, + "%s: %s invalid value for key: %s\n", + __func__, arg, keys_man[j]); + rte_kvargs_free(kvl); + return -EINVAL; + } + } + + for (j = 0; j != nb_opt; j++) { + k = j + nb_man; + if (rte_kvargs_process(kvl, keys_opt[j], hndl[k], + val + k) != 0) { + RTE_LOG(ERR, USER1, + "%s: %s invalid value for key: %s\n", + __func__, arg, keys_opt[j]); + rte_kvargs_free(kvl); + return -EINVAL; + } + } + + rte_kvargs_free(kvl); + return 0; +} + +int +parse_netbe_arg(struct netbe_port *prt, const char *arg) +{ + int32_t rc; + + static const char *keys_man[] = { + "port", + "lcore", + }; + + static const char *keys_opt[] = { + "mtu", + "rx_offload", + "tx_offload", + "ipv4", + "ipv6", + }; + + static const arg_handler_t hndl[] = { + parse_uint_val, + parse_uint_val, + parse_uint_val, + parse_uint_val, + parse_uint_val, + parse_ipv4_val, + parse_ipv6_val, + }; + + union parse_val val[RTE_DIM(hndl)]; + + memset(val, 0, sizeof(val)); + val[2].u64 = ETHER_MAX_VLAN_FRAME_LEN - ETHER_CRC_LEN; + + rc = parse_kvargs(arg, keys_man, RTE_DIM(keys_man), + keys_opt, RTE_DIM(keys_opt), hndl, val); + if (rc != 0) + return rc; + + prt->id = val[0].u64; + prt->lcore = val[1].u64; + prt->mtu = val[2].u64; + prt->rx_offload = val[3].u64; + prt->tx_offload = val[4].u64; + prt->ipv4 = val[5].in.addr4.s_addr; + prt->ipv6 = val[6].in.addr6; + + return 0; +} +static int +check_netbe_dest(const struct netbe_dest *dst) +{ + if (dst->port >= RTE_MAX_ETHPORTS) { + RTE_LOG(ERR, USER1, "%s(line=%u) invalid port=%u", + __func__, dst->line, dst->port); + return -EINVAL; + } else if ((dst->family == AF_INET && + dst->prfx > sizeof(struct in_addr) * CHAR_BIT) || + (dst->family == AF_INET6 && + dst->prfx > sizeof(struct in6_addr) * CHAR_BIT)) { + RTE_LOG(ERR, USER1, "%s(line=%u) invalid masklen=%u", + __func__, dst->line, dst->prfx); + return -EINVAL; + } else if (dst->mtu > ETHER_MAX_JUMBO_FRAME_LEN - ETHER_CRC_LEN) { + RTE_LOG(ERR, USER1, "%s(line=%u) invalid mtu=%u", + __func__, dst->line, dst->mtu); + return -EINVAL; + } + return 0; +} + +static int +parse_netbe_dest(struct netbe_dest *dst, const char *arg) +{ + int32_t rc; + + static const char *keys_man[] = { + "port", + "addr", + "masklen", + "mac", + }; + + static const char *keys_opt[] = { + "mtu", + }; + + static const arg_handler_t hndl[] = { + parse_uint_val, + parse_ip_val, + parse_uint_val, + parse_mac_val, + parse_uint_val, + }; + + union parse_val val[RTE_DIM(hndl)]; + + /* set default values. */ + memset(val, 0, sizeof(val)); + val[4].u64 = ETHER_MAX_JUMBO_FRAME_LEN - ETHER_CRC_LEN; + + rc = parse_kvargs(arg, keys_man, RTE_DIM(keys_man), + keys_opt, RTE_DIM(keys_opt), hndl, val); + if (rc != 0) + return rc; + + dst->port = val[0].u64; + dst->family = val[1].in.family; + if (val[1].in.family == AF_INET) + dst->ipv4 = val[1].in.addr4; + else + dst->ipv6 = val[1].in.addr6; + dst->prfx = val[2].u64; + memcpy(&dst->mac, &val[3].mac, sizeof(dst->mac)); + dst->mtu = val[4].u64; + + return 0; +} + +int +netbe_parse_dest(const char *fname, struct netbe_dest_prm *prm) +{ + uint32_t i, ln, n, num; + int32_t rc; + size_t sz; + char *s; + FILE *f; + struct netbe_dest *dp; + char line[LINE_MAX]; + + f = fopen(fname, "r"); + if (f == NULL) { + RTE_LOG(ERR, USER1, "%s failed to open file \"%s\"\n", + __func__, fname); + return -EINVAL; + } + + n = 0; + num = 0; + dp = NULL; + + for (ln = 0; fgets(line, sizeof(line), f) != NULL; ln++) { + + /* skip spaces at the start. */ + for (s = line; isspace(s[0]); s++) + ; + + /* skip comment line. */ + if (s[0] == '#' || s[0] == 0) + continue; + + /* skip spaces at the end. */ + for (i = strlen(s); i-- != 0 && isspace(s[i]); s[i] = 0) + ; + + if (n == num) { + num += DEF_LINE_NUM; + sz = sizeof(dp[0]) * num; + dp = realloc(dp, sizeof(dp[0]) * num); + if (dp == NULL) { + RTE_LOG(ERR, USER1, + "%s(%s) allocation of %zu bytes " + "failed\n", + __func__, fname, sz); + rc = -ENOMEM; + break; + } + } + + dp[n].line = ln + 1; + if ((rc = parse_netbe_dest(dp + n, s)) != 0 || + (rc = check_netbe_dest(dp + n)) != 0) { + RTE_LOG(ERR, USER1, "%s(%s) failed to parse line %u\n", + __func__, fname, dp[n].line); + break; + } + n++; + } + + fclose(f); + + if (rc != 0) { + free(dp); + dp = NULL; + n = 0; + } + + prm->dest = dp; + prm->nb_dest = n; + return rc; +} + +static void +pv2saddr(struct sockaddr_storage *ss, const union parse_val *pva, + const union parse_val *pvp) +{ + ss->ss_family = pva->in.family; + if (pva->in.family == AF_INET) { + struct sockaddr_in *si = (struct sockaddr_in *)ss; + si->sin_addr = pva->in.addr4; + si->sin_port = rte_cpu_to_be_16((uint16_t)pvp->u64); + } else { + struct sockaddr_in6 *si = (struct sockaddr_in6 *)ss; + si->sin6_addr = pva->in.addr6; + si->sin6_port = rte_cpu_to_be_16((uint16_t)pvp->u64); + } +} + +static int +parse_netfe_arg(struct netfe_stream_prm *sp, const char *arg) +{ + int32_t rc; + + static const char *keys_man[] = { + "lcore", + "op", + "laddr", + "lport", + "raddr", + "rport", + }; + + static const char *keys_opt[] = { + "txlen", + "fwladdr", + "fwlport", + "fwraddr", + "fwrport", + }; + + static const arg_handler_t hndl[] = { + parse_uint_val, + parse_feop_val, + parse_ip_val, + parse_uint_val, + parse_ip_val, + parse_uint_val, + parse_uint_val, + parse_ip_val, + parse_uint_val, + parse_ip_val, + parse_uint_val, + }; + + union parse_val val[RTE_DIM(hndl)]; + + memset(val, 0, sizeof(val)); + rc = parse_kvargs(arg, keys_man, RTE_DIM(keys_man), + keys_opt, RTE_DIM(keys_opt), hndl, val); + if (rc != 0) + return rc; + + sp->lcore = val[0].u64; + sp->op = val[1].u64; + pv2saddr(&sp->sprm.prm.local_addr, val + 2, val + 3); + pv2saddr(&sp->sprm.prm.remote_addr, val + 4, val + 5); + sp->txlen = val[6].u64; + pv2saddr(&sp->fprm.prm.local_addr, val + 7, val + 8); + pv2saddr(&sp->fprm.prm.remote_addr, val + 9, val + 10); + + return 0; +} + +static const char * +format_feop(uint16_t op) +{ + uint32_t i; + + for (i = 0; i != RTE_DIM(name2feop); i++) { + if (name2feop[i].op == op) + return name2feop[i].name; + } + + return NULL; +} + +static int +is_addr_wc(const struct sockaddr_storage *sp) +{ + const struct sockaddr_in *i4; + const struct sockaddr_in6 *i6; + + if (sp->ss_family == AF_INET) { + i4 = (const struct sockaddr_in *)sp; + return (i4->sin_addr.s_addr == INADDR_ANY); + } else if (sp->ss_family == AF_INET6) { + i6 = (const struct sockaddr_in6 *)sp; + return (memcmp(&i6->sin6_addr, &in6addr_any, + sizeof(i6->sin6_addr)) == 0); + } + return 0; +} + +static int +check_netfe_arg(const struct netfe_stream_prm *sp) +{ + char buf[INET6_ADDRSTRLEN]; + + if (sp->sprm.prm.local_addr.ss_family != + sp->sprm.prm.remote_addr.ss_family) { + RTE_LOG(ERR, USER1, "invalid arg at line %u: " + "laddr and raddr for different protocols\n", + sp->line); + return -EINVAL; + } + + if (sp->op == TXONLY) { + if (sp->txlen > RTE_MBUF_DEFAULT_DATAROOM || sp->txlen == 0) { + RTE_LOG(ERR, USER1, "invalid arg at line %u: txlen=%u " + "exceeds allowed values: (0, %u]\n", + sp->line, sp->txlen, RTE_MBUF_DEFAULT_DATAROOM); + return -EINVAL; + } else if (is_addr_wc(&sp->sprm.prm.remote_addr)) { + RTE_LOG(ERR, USER1, "invalid arg at line %u: " + "raddr=%s are not allowed for op=%s;\n", + sp->line, + format_addr(&sp->sprm.prm.remote_addr, + buf, sizeof(buf)), + format_feop(sp->op)); + return -EINVAL; + } + } else if (sp->op == FWD) { + if (sp->fprm.prm.local_addr.ss_family != + sp->fprm.prm.remote_addr.ss_family) { + RTE_LOG(ERR, USER1, "invalid arg at line %u: " + "fwladdr and fwraddr for different protocols\n", + sp->line); + return -EINVAL; + } else if (is_addr_wc(&sp->fprm.prm.remote_addr)) { + RTE_LOG(ERR, USER1, "invalid arg at line %u: " + "fwaddr=%s are not allowed for op=%s;\n", + sp->line, + format_addr(&sp->fprm.prm.remote_addr, + buf, sizeof(buf)), + format_feop(sp->op)); + return -EINVAL; + } + } + + return 0; +} + +int +netfe_parse_cfg(const char *fname, struct netfe_lcore_prm *lp) +{ + uint32_t i, ln, n, num; + int32_t rc; + size_t sz; + char *s; + FILE *f; + struct netfe_stream_prm *sp; + char line[LINE_MAX]; + + f = fopen(fname, "r"); + if (f == NULL) { + RTE_LOG(ERR, USER1, "%s failed to open file \"%s\"\n", + __func__, fname); + return -EINVAL; + } + + n = 0; + num = 0; + sp = NULL; + + for (ln = 0; fgets(line, sizeof(line), f) != NULL; ln++) { + + /* skip spaces at the start. */ + for (s = line; isspace(s[0]); s++) + ; + + /* skip comment line. */ + if (s[0] == '#' || s[0] == 0) + continue; + + /* skip spaces at the end. */ + for (i = strlen(s); i-- != 0 && isspace(s[i]); s[i] = 0) + ; + + if (n == lp->max_streams) { + RTE_LOG(ERR, USER1, + "%s(%s) number of entries exceed max streams " + "value: %u\n", + __func__, fname, n); + rc = -EINVAL; + break; + } + + if (n == num) { + num += DEF_LINE_NUM; + sz = sizeof(sp[0]) * num; + sp = realloc(sp, sizeof(sp[0]) * num); + if (sp == NULL) { + RTE_LOG(ERR, USER1, + "%s(%s) allocation of %zu bytes " + "failed\n", + __func__, fname, sz); + rc = -ENOMEM; + break; + } + } + + sp[n].line = ln + 1; + if ((rc = parse_netfe_arg(sp + n, s)) != 0 || + (rc = check_netfe_arg(sp + n)) != 0) { + RTE_LOG(ERR, USER1, "%s(%s) failed to parse line %u\n", + __func__, fname, sp[n].line); + break; + } + n++; + } + + fclose(f); + + if (rc != 0) { + free(sp); + sp = NULL; + n = 0; + } + + lp->stream = sp; + lp->nb_streams = n; + return rc; +} diff --git a/examples/udpfwd/parse.h b/examples/udpfwd/parse.h new file mode 100644 index 0000000..911c874 --- /dev/null +++ b/examples/udpfwd/parse.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2016 Intel Corporation. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __PARSE_H__ +#define __PARSE_H__ + +union parse_val { + uint64_t u64; + struct { + uint16_t family; + union { + struct in_addr addr4; + struct in6_addr addr6; + }; + } in; + struct ether_addr mac; +}; + +static int +parse_uint_val(__rte_unused const char *key, const char *val, void *prm) +{ + union parse_val *rv; + unsigned long v; + char *end; + + rv = prm; + errno = 0; + v = strtoul(val, &end, 0); + if (errno != 0 || end[0] != 0 || v > UINT32_MAX) + return -EINVAL; + + rv->u64 = v; + return 0; +} + +static const char * +format_addr(const struct sockaddr_storage *sp, char buf[], size_t len) +{ + const struct sockaddr_in *i4; + const struct sockaddr_in6 *i6; + const void *addr; + + if (sp->ss_family == AF_INET) { + i4 = (const struct sockaddr_in *)sp; + addr = &i4->sin_addr; + } else if (sp->ss_family == AF_INET6) { + i6 = (const struct sockaddr_in6 *)sp; + addr = &i6->sin6_addr; + } else + return NULL; + + + return inet_ntop(sp->ss_family, addr, buf, len); +} + +int parse_netbe_arg(struct netbe_port *prt, const char *arg); + +int netbe_parse_dest(const char *fname, struct netbe_dest_prm *prm); + +int netfe_parse_cfg(const char *fname, struct netfe_lcore_prm *lp); + +#endif /* __PARSE_H__ */ + diff --git a/examples/udpfwd/pkt.c b/examples/udpfwd/pkt.c new file mode 100644 index 0000000..b0d4452 --- /dev/null +++ b/examples/udpfwd/pkt.c @@ -0,0 +1,579 @@ +/* + * Copyright (c) 2016 Intel Corporation. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "netbe.h" +#include + +static inline void +fill_pkt_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t l3, uint32_t l4) +{ + m->l2_len = l2; + m->l3_len = l3; + m->l4_len = l4; +} + +static inline int +is_ipv4_frag(const struct ipv4_hdr *iph) +{ + const uint16_t mask = rte_cpu_to_be_16(~IPV4_HDR_DF_FLAG); + + return ((mask & iph->fragment_offset) != 0); +} + +static inline void +fill_ipv4_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t proto, + uint32_t frag) +{ + const struct ipv4_hdr *iph; + int32_t dlen, len; + + dlen = rte_pktmbuf_data_len(m); + dlen -= l2 + sizeof(struct udp_hdr); + + iph = rte_pktmbuf_mtod_offset(m, const struct ipv4_hdr *, l2); + len = (iph->version_ihl & IPV4_HDR_IHL_MASK) * IPV4_IHL_MULTIPLIER; + + if (frag != 0 && is_ipv4_frag(iph)) { + m->packet_type &= ~RTE_PTYPE_L4_MASK; + m->packet_type |= RTE_PTYPE_L4_FRAG; + } + + if (len > dlen || (proto <= IPPROTO_MAX && iph->next_proto_id != proto)) + m->packet_type = RTE_PTYPE_UNKNOWN; + else + fill_pkt_hdr_len(m, l2, len, sizeof(struct udp_hdr)); +} + +static inline int +ipv6x_hdr(uint32_t proto) +{ + return (proto == IPPROTO_HOPOPTS || + proto == IPPROTO_ROUTING || + proto == IPPROTO_FRAGMENT || + proto == IPPROTO_AH || + proto == IPPROTO_NONE || + proto == IPPROTO_DSTOPTS); +} + +static inline void +fill_ipv6x_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t nproto, + uint32_t fproto) +{ + const struct ip6_ext *ipx; + int32_t dlen, len, ofs; + + len = sizeof(struct ipv6_hdr); + + dlen = rte_pktmbuf_data_len(m); + dlen -= l2 + sizeof(struct udp_hdr); + + ofs = l2 + len; + ipx = rte_pktmbuf_mtod_offset(m, const struct ip6_ext *, ofs); + + while (ofs > 0 && len < dlen) { + + switch (nproto) { + case IPPROTO_HOPOPTS: + case IPPROTO_ROUTING: + case IPPROTO_DSTOPTS: + ofs = (ipx->ip6e_len + 1) << 3; + break; + case IPPROTO_AH: + ofs = (ipx->ip6e_len + 2) << 2; + break; + case IPPROTO_FRAGMENT: + /* + * tso_segsz is not used by RX, so suse it as temporary + * buffer to store the fragment offset. + */ + m->tso_segsz = ofs; + ofs = sizeof(struct ip6_frag); + m->packet_type &= ~RTE_PTYPE_L4_MASK; + m->packet_type |= RTE_PTYPE_L4_FRAG; + break; + default: + ofs = 0; + } + + if (ofs > 0) { + nproto = ipx->ip6e_nxt; + len += ofs; + ipx += ofs / sizeof(*ipx); + } + } + + /* undercognised or invalid packet. */ + if ((ofs == 0 && nproto != fproto) || len > dlen) + m->packet_type = RTE_PTYPE_UNKNOWN; + else + fill_pkt_hdr_len(m, l2, len, sizeof(struct udp_hdr)); +} + +static inline void +fill_ipv6_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t fproto) +{ + const struct ipv6_hdr *iph; + + iph = rte_pktmbuf_mtod_offset(m, const struct ipv6_hdr *, + sizeof(struct ether_hdr)); + + if (iph->proto == fproto) + fill_pkt_hdr_len(m, l2, sizeof(struct ipv6_hdr), + sizeof(struct udp_hdr)); + else if (ipv6x_hdr(iph->proto) != 0) + fill_ipv6x_hdr_len(m, l2, iph->proto, fproto); +} + +static inline void +fill_eth_hdr_len(struct rte_mbuf *m) +{ + uint32_t dlen, l2; + uint16_t etp; + const struct ether_hdr *eth; + + dlen = rte_pktmbuf_data_len(m); + + /* check that first segment is at least 42B long. */ + if (dlen < sizeof(struct ether_hdr) + sizeof(struct ipv4_hdr) + + sizeof(struct udp_hdr)) { + m->packet_type = RTE_PTYPE_UNKNOWN; + return; + } + + l2 = sizeof(*eth); + + eth = rte_pktmbuf_mtod(m, const struct ether_hdr *); + etp = eth->ether_type; + if (etp == rte_be_to_cpu_16(ETHER_TYPE_VLAN)) + l2 += sizeof(struct vlan_hdr); + + if (etp == rte_be_to_cpu_16(ETHER_TYPE_IPv4)) { + m->packet_type = RTE_PTYPE_L4_UDP | + RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L2_ETHER; + fill_ipv4_hdr_len(m, l2, IPPROTO_UDP, 1); + } else if (etp == rte_be_to_cpu_16(ETHER_TYPE_IPv6) && + dlen >= l2 + sizeof(struct ipv6_hdr) + + sizeof(struct udp_hdr)) { + m->packet_type = RTE_PTYPE_L4_UDP | + RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L2_ETHER; + fill_ipv6_hdr_len(m, l2, IPPROTO_UDP); + } else + m->packet_type = RTE_PTYPE_UNKNOWN; +} + +static inline void +fix_reassembled(struct rte_mbuf *m) +{ + /* update packet type. */ + m->packet_type &= ~RTE_PTYPE_L4_MASK; + m->packet_type |= RTE_PTYPE_L4_UDP; + + /* fix reassemble setting TX flags. */ + m->ol_flags &= ~PKT_TX_IP_CKSUM; + + /* fix l3_len after reassemble. */ + if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) + m->l3_len = m->l3_len - sizeof(struct ipv6_extension_fragment); +} + +static struct rte_mbuf * +reassemble(struct rte_mbuf *m, struct rte_ip_frag_tbl *tbl, + struct rte_ip_frag_death_row *dr, uint64_t tms) +{ + if (RTE_ETH_IS_IPV4_HDR(m->packet_type)) { + + struct ipv4_hdr *iph; + + iph = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len); + + /* process this fragment. */ + m = rte_ipv4_frag_reassemble_packet(tbl, dr, m, tms, iph); + + } else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) { + + struct ipv6_hdr *iph; + struct ipv6_extension_fragment *fhdr; + + iph = rte_pktmbuf_mtod_offset(m, struct ipv6_hdr *, m->l2_len); + + /* + * we store fragment header offset in tso_segsz before + * temporary, just to avoid another scan of ipv6 header. + */ + fhdr = rte_pktmbuf_mtod_offset(m, + struct ipv6_extension_fragment *, m->tso_segsz); + m->tso_segsz = 0; + + /* process this fragment. */ + m = rte_ipv6_frag_reassemble_packet(tbl, dr, m, tms, iph, fhdr); + + } else { + rte_pktmbuf_free(m); + m = NULL; + } + + /* got reassembled packet. */ + if (m != NULL) + fix_reassembled(m); + + return m; +} + +/* exclude NULLs from the final list of packets. */ +static inline uint32_t +compress_pkt_list(struct rte_mbuf *pkt[], uint32_t nb_pkt, uint32_t nb_zero) +{ + uint32_t i, j, k, l; + + for (j = nb_pkt; nb_zero != 0 && j-- != 0; ) { + + /* found a hole. */ + if (pkt[j] == NULL) { + + /* find how big is it. */ + for (i = j; i-- != 0 && pkt[i] == NULL; ) + ; + /* fill the hole. */ + for (k = j + 1, l = i + 1; k != nb_pkt; k++, l++) + pkt[l] = pkt[k]; + + nb_pkt -= j - i; + nb_zero -= j - i; + } + } + + return nb_pkt; +} + +/* + * HW can recognise L2/L3 with/without extentions/L4 (ixgbe/igb/fm10k) + */ +static uint16_t +type0_rx_callback(__rte_unused uint8_t port, __rte_unused uint16_t queue, + struct rte_mbuf *pkt[], uint16_t nb_pkts, + __rte_unused uint16_t max_pkts, void *user_param) +{ + uint32_t j, tp, x; + uint64_t cts; + struct netbe_lcore *lc; + + lc = user_param; + cts = 0; + + x = 0; + for (j = 0; j != nb_pkts; j++) { + + NETBE_PKT_DUMP(pkt[j]); + + tp = pkt[j]->packet_type & (RTE_PTYPE_L4_MASK | + RTE_PTYPE_L3_MASK | RTE_PTYPE_L2_MASK); + + switch (tp) { + /* non fragmented udp packets. */ + case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV4 | + RTE_PTYPE_L2_ETHER): + fill_pkt_hdr_len(pkt[j], sizeof(struct ether_hdr), + sizeof(struct ipv4_hdr), + sizeof(struct udp_hdr)); + break; + case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV6 | + RTE_PTYPE_L2_ETHER): + fill_pkt_hdr_len(pkt[j], sizeof(struct ether_hdr), + sizeof(struct ipv6_hdr), + sizeof(struct udp_hdr)); + break; + case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV4_EXT | + RTE_PTYPE_L2_ETHER): + fill_ipv4_hdr_len(pkt[j], sizeof(struct ether_hdr), + UINT32_MAX, 0); + break; + case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV6_EXT | + RTE_PTYPE_L2_ETHER): + fill_ipv6_hdr_len(pkt[j], sizeof(struct ether_hdr), + IPPROTO_UDP); + break; + /* possibly fragmented udp packets. */ + case (RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L2_ETHER): + case (RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L2_ETHER): + fill_ipv4_hdr_len(pkt[j], sizeof(struct ether_hdr), + IPPROTO_UDP, 1); + break; + case (RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L2_ETHER): + case (RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L2_ETHER): + fill_ipv6_hdr_len(pkt[j], sizeof(struct ether_hdr), + IPPROTO_UDP); + break; + default: + /* treat packet types as invalid. */ + pkt[j]->packet_type = RTE_PTYPE_UNKNOWN; + break; + } + + /* + * if it is a fragment, try to reassemble it, + * if by some reason it can't be done, then + * set pkt[] entry to NULL. + */ + if ((pkt[j]->packet_type & RTE_PTYPE_L4_MASK) == + RTE_PTYPE_L4_FRAG) { + cts = (cts == 0) ? rte_rdtsc() : cts; + pkt[j] = reassemble(pkt[j], lc->ftbl, &lc->death_row, + cts); + x += (pkt[j] == NULL); + } + } + + /* reassemble was invoked, cleanup its death-row. */ + if (cts != 0) + rte_ip_frag_free_death_row(&lc->death_row, 0); + + if (x == 0) + return nb_pkts; + + NETBE_TRACE("%s(port=%u, queue=%u, nb_pkts=%u): " + "%u non-reassembled fragments;\n", + __func__, port, queue, nb_pkts, x); + + return compress_pkt_list(pkt, nb_pkts, x); +} + +/* + * HW can recognise L2/L3/L4 and fragments (i40e). + */ +static uint16_t +type1_rx_callback(__rte_unused uint8_t port, __rte_unused uint16_t queue, + struct rte_mbuf *pkt[], uint16_t nb_pkts, + __rte_unused uint16_t max_pkts, void *user_param) +{ + uint32_t j, tp, x; + uint64_t cts; + struct netbe_lcore *lc; + + lc = user_param; + cts = 0; + + x = 0; + for (j = 0; j != nb_pkts; j++) { + + NETBE_PKT_DUMP(pkt[j]); + + tp = pkt[j]->packet_type & (RTE_PTYPE_L4_MASK | + RTE_PTYPE_L3_MASK | RTE_PTYPE_L2_MASK); + + switch (tp) { + case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L2_ETHER): + fill_ipv4_hdr_len(pkt[j], sizeof(struct ether_hdr), + UINT32_MAX, 0); + break; + case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L2_ETHER): + fill_ipv6_hdr_len(pkt[j], sizeof(struct ether_hdr), + IPPROTO_UDP); + break; + case (RTE_PTYPE_L4_FRAG | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L2_ETHER): + fill_ipv4_hdr_len(pkt[j], sizeof(struct ether_hdr), + IPPROTO_UDP, 0); + break; + case (RTE_PTYPE_L4_FRAG | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L2_ETHER): + fill_ipv6_hdr_len(pkt[j], sizeof(struct ether_hdr), + IPPROTO_UDP); + break; + default: + /* treat packet types as invalid. */ + pkt[j]->packet_type = RTE_PTYPE_UNKNOWN; + break; + } + + /* + * if it is a fragment, try to reassemble it, + * if by some reason it can't be done, then + * set pkt[] entry to NULL. + */ + if ((pkt[j]->packet_type & RTE_PTYPE_L4_MASK) == + RTE_PTYPE_L4_FRAG) { + cts = (cts == 0) ? rte_rdtsc() : cts; + pkt[j] = reassemble(pkt[j], lc->ftbl, &lc->death_row, + cts); + x += (pkt[j] == NULL); + } + } + + /* reassemble was invoked, cleanup its death-row. */ + if (cts != 0) + rte_ip_frag_free_death_row(&lc->death_row, 0); + + if (x == 0) + return nb_pkts; + + NETBE_TRACE("%s(port=%u, queue=%u, nb_pkts=%u): " + "%u non-reassembled fragments;\n", + __func__, port, queue, nb_pkts, x); + + return compress_pkt_list(pkt, nb_pkts, x); +} + +/* + * generic, assumes HW doesn't recognise any packet type. + */ +static uint16_t +typen_rx_callback(__rte_unused uint8_t port, __rte_unused uint16_t queue, + struct rte_mbuf *pkt[], uint16_t nb_pkts, + __rte_unused uint16_t max_pkts, void *user_param) +{ + uint32_t j, x; + uint64_t cts; + struct netbe_lcore *lc; + + lc = user_param; + cts = 0; + + x = 0; + for (j = 0; j != nb_pkts; j++) { + + NETBE_PKT_DUMP(pkt[j]); + fill_eth_hdr_len(pkt[j]); + + /* + * if it is a fragment, try to reassemble it, + * if by some reason it can't be done, then + * set pkt[] entry to NULL. + */ + if ((pkt[j]->packet_type & RTE_PTYPE_L4_MASK) == + RTE_PTYPE_L4_FRAG) { + cts = (cts == 0) ? rte_rdtsc() : cts; + pkt[j] = reassemble(pkt[j], lc->ftbl, &lc->death_row, + cts); + x += (pkt[j] == NULL); + } + } + + /* reassemble was invoked, cleanup its death-row. */ + if (cts != 0) + rte_ip_frag_free_death_row(&lc->death_row, 0); + + if (x == 0) + return nb_pkts; + + NETBE_TRACE("%s(port=%u, queue=%u, nb_pkts=%u): " + "%u non-reassembled fragments;\n", + __func__, port, queue, nb_pkts, x); + + return compress_pkt_list(pkt, nb_pkts, x); +} + +int +setup_rx_cb(const struct netbe_port *uprt, struct netbe_lcore *lc) +{ + int32_t i, rc; + uint32_t smask; + void *cb; + + const uint32_t pmask = RTE_PTYPE_L2_MASK | RTE_PTYPE_L3_MASK | + RTE_PTYPE_L4_MASK; + + enum { + ETHER_PTYPE = 0x1, + IPV4_PTYPE = 0x2, + IPV4_EXT_PTYPE = 0x4, + IPV6_PTYPE = 0x8, + IPV6_EXT_PTYPE = 0x10, + UDP_PTYPE = 0x20, + }; + + static const struct { + uint32_t mask; + const char *name; + rte_rx_callback_fn fn; + } ptype2cb[] = { + { + .mask = ETHER_PTYPE | IPV4_PTYPE | IPV4_EXT_PTYPE | + IPV6_PTYPE | IPV6_EXT_PTYPE | UDP_PTYPE, + .name = "HW l2/l3x/l4 ptype", + .fn = type0_rx_callback, + }, + { + .mask = ETHER_PTYPE | IPV4_PTYPE | IPV6_PTYPE | + UDP_PTYPE, + .name = "HW l2/l3/l4 ptype", + .fn = type1_rx_callback, + }, + { + .mask = 0, + .name = "no HW ptype", + .fn = typen_rx_callback, + }, + }; + + rc = rte_eth_dev_get_supported_ptypes(uprt->id, pmask, NULL, 0); + if (rc < 0) { + RTE_LOG(ERR, USER1, + "%s(port=%u) failed to get supported ptypes;\n", + __func__, uprt->id); + return rc; + } + + uint32_t ptype[rc]; + rc = rte_eth_dev_get_supported_ptypes(uprt->id, pmask, ptype, rc); + + smask = 0; + for (i = 0; i != rc; i++) { + switch (ptype[i]) { + case RTE_PTYPE_L2_ETHER: + smask |= ETHER_PTYPE; + break; + case RTE_PTYPE_L3_IPV4: + case RTE_PTYPE_L3_IPV4_EXT_UNKNOWN: + smask |= IPV4_PTYPE; + break; + case RTE_PTYPE_L3_IPV4_EXT: + smask |= IPV4_EXT_PTYPE; + break; + case RTE_PTYPE_L3_IPV6: + case RTE_PTYPE_L3_IPV6_EXT_UNKNOWN: + smask |= IPV6_PTYPE; + break; + case RTE_PTYPE_L3_IPV6_EXT: + smask |= IPV6_EXT_PTYPE; + break; + case RTE_PTYPE_L4_UDP: + smask |= UDP_PTYPE; + break; + } + } + + for (i = 0; i != RTE_DIM(ptype2cb); i++) { + if ((smask & ptype2cb[i].mask) == ptype2cb[i].mask) { + cb = rte_eth_add_rx_callback(uprt->id, 0, + ptype2cb[i].fn, lc); + rc = -rte_errno; + RTE_LOG(ERR, USER1, + "%s(port=%u), setup RX callback \"%s\" " + "returns %p;\n", + __func__, uprt->id, ptype2cb[i].name, cb); + return ((cb == NULL) ? rc : 0); + } + } + + /* no proper callback found. */ + RTE_LOG(ERR, USER1, + "%s(port=%u) failed to find an appropriate callback;\n", + __func__, uprt->id); + return -ENOENT; +} -- cgit 1.2.3-korg